blob: ead59164e49d02873e4813112ba894fe4bb248b2 [file] [log] [blame]
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001/* vi:set ts=8 sts=4 sw=4 noet:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * strings.c: string manipulation functions
12 */
13
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +020014#define USING_FLOAT_STUFF
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020015#include "vim.h"
16
17/*
18 * Copy "string" into newly allocated memory.
19 */
20 char_u *
21vim_strsave(char_u *string)
22{
23 char_u *p;
24 size_t len;
25
26 len = STRLEN(string) + 1;
27 p = alloc(len);
28 if (p != NULL)
29 mch_memmove(p, string, len);
30 return p;
31}
32
33/*
34 * Copy up to "len" bytes of "string" into newly allocated memory and
35 * terminate with a NUL.
36 * The allocated memory always has size "len + 1", also when "string" is
37 * shorter.
38 */
39 char_u *
40vim_strnsave(char_u *string, size_t len)
41{
42 char_u *p;
43
44 p = alloc(len + 1);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +000045 if (p == NULL)
46 return NULL;
47
48 STRNCPY(p, string, len);
49 p[len] = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020050 return p;
51}
52
53/*
54 * Same as vim_strsave(), but any characters found in esc_chars are preceded
55 * by a backslash.
56 */
57 char_u *
58vim_strsave_escaped(char_u *string, char_u *esc_chars)
59{
60 return vim_strsave_escaped_ext(string, esc_chars, '\\', FALSE);
61}
62
63/*
64 * Same as vim_strsave_escaped(), but when "bsl" is TRUE also escape
65 * characters where rem_backslash() would remove the backslash.
66 * Escape the characters with "cc".
67 */
68 char_u *
69vim_strsave_escaped_ext(
70 char_u *string,
71 char_u *esc_chars,
72 int cc,
73 int bsl)
74{
75 char_u *p;
76 char_u *p2;
77 char_u *escaped_string;
78 unsigned length;
79 int l;
80
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +020081 // First count the number of backslashes required.
82 // Then allocate the memory and insert them.
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020083 length = 1; // count the trailing NUL
84 for (p = string; *p; p++)
85 {
86 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
87 {
88 length += l; // count a multibyte char
89 p += l - 1;
90 continue;
91 }
92 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
93 ++length; // count a backslash
94 ++length; // count an ordinary char
95 }
96 escaped_string = alloc(length);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +000097 if (escaped_string == NULL)
98 return NULL;
99 p2 = escaped_string;
100 for (p = string; *p; p++)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200101 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000102 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200103 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000104 mch_memmove(p2, p, (size_t)l);
105 p2 += l;
106 p += l - 1; // skip multibyte char
107 continue;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200108 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000109 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
110 *p2++ = cc;
111 *p2++ = *p;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200112 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000113 *p2 = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200114 return escaped_string;
115}
116
117/*
118 * Return TRUE when 'shell' has "csh" in the tail.
119 */
120 int
121csh_like_shell(void)
122{
123 return (strstr((char *)gettail(p_sh), "csh") != NULL);
124}
125
126/*
Jason Cox6e823512021-08-29 12:36:49 +0200127 * Return TRUE when 'shell' has "fish" in the tail.
128 */
Dominique Pellede05ae72021-08-30 19:57:34 +0200129 static int
Jason Cox6e823512021-08-29 12:36:49 +0200130fish_like_shell(void)
131{
132 return (strstr((char *)gettail(p_sh), "fish") != NULL);
133}
134
135/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200136 * Escape "string" for use as a shell argument with system().
137 * This uses single quotes, except when we know we need to use double quotes
138 * (MS-DOS and MS-Windows not using PowerShell and without 'shellslash' set).
139 * PowerShell also uses a novel escaping for enclosed single quotes - double
140 * them up.
141 * Escape a newline, depending on the 'shell' option.
142 * When "do_special" is TRUE also replace "!", "%", "#" and things starting
143 * with "<" like "<cfile>".
144 * When "do_newline" is FALSE do not escape newline unless it is csh shell.
145 * Returns the result in allocated memory, NULL if we have run out.
146 */
147 char_u *
148vim_strsave_shellescape(char_u *string, int do_special, int do_newline)
149{
150 unsigned length;
151 char_u *p;
152 char_u *d;
153 char_u *escaped_string;
154 int l;
155 int csh_like;
Jason Cox6e823512021-08-29 12:36:49 +0200156 int fish_like;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200157 char_u *shname;
158 int powershell;
159# ifdef MSWIN
160 int double_quotes;
161# endif
162
163 // Only csh and similar shells expand '!' within single quotes. For sh and
164 // the like we must not put a backslash before it, it will be taken
165 // literally. If do_special is set the '!' will be escaped twice.
166 // Csh also needs to have "\n" escaped twice when do_special is set.
167 csh_like = csh_like_shell();
168
Jason Cox6e823512021-08-29 12:36:49 +0200169 // Fish shell uses '\' as an escape character within single quotes, so '\'
170 // itself must be escaped to get a literal '\'.
171 fish_like = fish_like_shell();
172
Dominique Pelleaf4a61a2021-12-27 17:21:41 +0000173 // PowerShell uses its own version for quoting single quotes
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200174 shname = gettail(p_sh);
175 powershell = strstr((char *)shname, "pwsh") != NULL;
176# ifdef MSWIN
177 powershell = powershell || strstr((char *)shname, "powershell") != NULL;
178 // PowerShell only accepts single quotes so override shellslash.
179 double_quotes = !powershell && !p_ssl;
180# endif
181
182 // First count the number of extra bytes required.
183 length = (unsigned)STRLEN(string) + 3; // two quotes and a trailing NUL
184 for (p = string; *p != NUL; MB_PTR_ADV(p))
185 {
186# ifdef MSWIN
187 if (double_quotes)
188 {
189 if (*p == '"')
190 ++length; // " -> ""
191 }
192 else
193# endif
194 if (*p == '\'')
195 {
196 if (powershell)
197 length +=2; // ' => ''
198 else
199 length += 3; // ' => '\''
200 }
201 if ((*p == '\n' && (csh_like || do_newline))
202 || (*p == '!' && (csh_like || do_special)))
203 {
204 ++length; // insert backslash
205 if (csh_like && do_special)
206 ++length; // insert backslash
207 }
208 if (do_special && find_cmdline_var(p, &l) >= 0)
209 {
210 ++length; // insert backslash
211 p += l - 1;
212 }
Jason Cox6e823512021-08-29 12:36:49 +0200213 if (*p == '\\' && fish_like)
214 ++length; // insert backslash
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200215 }
216
217 // Allocate memory for the result and fill it.
218 escaped_string = alloc(length);
219 if (escaped_string != NULL)
220 {
221 d = escaped_string;
222
223 // add opening quote
224# ifdef MSWIN
225 if (double_quotes)
226 *d++ = '"';
227 else
228# endif
229 *d++ = '\'';
230
231 for (p = string; *p != NUL; )
232 {
233# ifdef MSWIN
234 if (double_quotes)
235 {
236 if (*p == '"')
237 {
238 *d++ = '"';
239 *d++ = '"';
240 ++p;
241 continue;
242 }
243 }
244 else
245# endif
246 if (*p == '\'')
247 {
248 if (powershell)
249 {
250 *d++ = '\'';
251 *d++ = '\'';
252 }
253 else
254 {
255 *d++ = '\'';
256 *d++ = '\\';
257 *d++ = '\'';
258 *d++ = '\'';
259 }
260 ++p;
261 continue;
262 }
263 if ((*p == '\n' && (csh_like || do_newline))
264 || (*p == '!' && (csh_like || do_special)))
265 {
266 *d++ = '\\';
267 if (csh_like && do_special)
268 *d++ = '\\';
269 *d++ = *p++;
270 continue;
271 }
272 if (do_special && find_cmdline_var(p, &l) >= 0)
273 {
274 *d++ = '\\'; // insert backslash
275 while (--l >= 0) // copy the var
276 *d++ = *p++;
277 continue;
278 }
Jason Cox6e823512021-08-29 12:36:49 +0200279 if (*p == '\\' && fish_like)
280 {
281 *d++ = '\\';
282 *d++ = *p++;
Bram Moolenaar66315972021-09-01 14:31:51 +0200283 continue;
Jason Cox6e823512021-08-29 12:36:49 +0200284 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200285
286 MB_COPY_CHAR(p, d);
287 }
288
289 // add terminating quote and finish with a NUL
290# ifdef MSWIN
291 if (double_quotes)
292 *d++ = '"';
293 else
294# endif
295 *d++ = '\'';
296 *d = NUL;
297 }
298
299 return escaped_string;
300}
301
302/*
303 * Like vim_strsave(), but make all characters uppercase.
304 * This uses ASCII lower-to-upper case translation, language independent.
305 */
306 char_u *
307vim_strsave_up(char_u *string)
308{
309 char_u *p1;
310
311 p1 = vim_strsave(string);
312 vim_strup(p1);
313 return p1;
314}
315
316/*
317 * Like vim_strnsave(), but make all characters uppercase.
318 * This uses ASCII lower-to-upper case translation, language independent.
319 */
320 char_u *
321vim_strnsave_up(char_u *string, size_t len)
322{
323 char_u *p1;
324
325 p1 = vim_strnsave(string, len);
326 vim_strup(p1);
327 return p1;
328}
329
330/*
331 * ASCII lower-to-upper case translation, language independent.
332 */
333 void
334vim_strup(
335 char_u *p)
336{
337 char_u *p2;
338 int c;
339
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000340 if (p == NULL)
341 return;
342
343 p2 = p;
344 while ((c = *p2) != NUL)
345 *p2++ = (c < 'a' || c > 'z') ? c : (c - 0x20);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200346}
347
348#if defined(FEAT_EVAL) || defined(FEAT_SPELL) || defined(PROTO)
349/*
350 * Make string "s" all upper-case and return it in allocated memory.
351 * Handles multi-byte characters as well as possible.
352 * Returns NULL when out of memory.
353 */
354 static char_u *
355strup_save(char_u *orig)
356{
357 char_u *p;
358 char_u *res;
359
360 res = p = vim_strsave(orig);
361
362 if (res != NULL)
363 while (*p != NUL)
364 {
365 int l;
366
367 if (enc_utf8)
368 {
369 int c, uc;
370 int newl;
371 char_u *s;
372
373 c = utf_ptr2char(p);
374 l = utf_ptr2len(p);
375 if (c == 0)
376 {
377 // overlong sequence, use only the first byte
378 c = *p;
379 l = 1;
380 }
381 uc = utf_toupper(c);
382
383 // Reallocate string when byte count changes. This is rare,
384 // thus it's OK to do another malloc()/free().
385 newl = utf_char2len(uc);
386 if (newl != l)
387 {
388 s = alloc(STRLEN(res) + 1 + newl - l);
389 if (s == NULL)
390 {
391 vim_free(res);
392 return NULL;
393 }
394 mch_memmove(s, res, p - res);
395 STRCPY(s + (p - res) + newl, p + l);
396 p = s + (p - res);
397 vim_free(res);
398 res = s;
399 }
400
401 utf_char2bytes(uc, p);
402 p += newl;
403 }
404 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
405 p += l; // skip multi-byte character
406 else
407 {
408 *p = TOUPPER_LOC(*p); // note that toupper() can be a macro
409 p++;
410 }
411 }
412
413 return res;
414}
415
416/*
417 * Make string "s" all lower-case and return it in allocated memory.
418 * Handles multi-byte characters as well as possible.
419 * Returns NULL when out of memory.
420 */
421 char_u *
422strlow_save(char_u *orig)
423{
424 char_u *p;
425 char_u *res;
426
427 res = p = vim_strsave(orig);
428
429 if (res != NULL)
430 while (*p != NUL)
431 {
432 int l;
433
434 if (enc_utf8)
435 {
436 int c, lc;
437 int newl;
438 char_u *s;
439
440 c = utf_ptr2char(p);
441 l = utf_ptr2len(p);
442 if (c == 0)
443 {
444 // overlong sequence, use only the first byte
445 c = *p;
446 l = 1;
447 }
448 lc = utf_tolower(c);
449
450 // Reallocate string when byte count changes. This is rare,
451 // thus it's OK to do another malloc()/free().
452 newl = utf_char2len(lc);
453 if (newl != l)
454 {
455 s = alloc(STRLEN(res) + 1 + newl - l);
456 if (s == NULL)
457 {
458 vim_free(res);
459 return NULL;
460 }
461 mch_memmove(s, res, p - res);
462 STRCPY(s + (p - res) + newl, p + l);
463 p = s + (p - res);
464 vim_free(res);
465 res = s;
466 }
467
468 utf_char2bytes(lc, p);
469 p += newl;
470 }
471 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
472 p += l; // skip multi-byte character
473 else
474 {
475 *p = TOLOWER_LOC(*p); // note that tolower() can be a macro
476 p++;
477 }
478 }
479
480 return res;
481}
482#endif
483
484/*
485 * delete spaces at the end of a string
486 */
487 void
488del_trailing_spaces(char_u *ptr)
489{
490 char_u *q;
491
492 q = ptr + STRLEN(ptr);
493 while (--q > ptr && VIM_ISWHITE(q[0]) && q[-1] != '\\' && q[-1] != Ctrl_V)
494 *q = NUL;
495}
496
497/*
498 * Like strncpy(), but always terminate the result with one NUL.
499 * "to" must be "len + 1" long!
500 */
501 void
502vim_strncpy(char_u *to, char_u *from, size_t len)
503{
504 STRNCPY(to, from, len);
505 to[len] = NUL;
506}
507
508/*
509 * Like strcat(), but make sure the result fits in "tosize" bytes and is
510 * always NUL terminated. "from" and "to" may overlap.
511 */
512 void
513vim_strcat(char_u *to, char_u *from, size_t tosize)
514{
515 size_t tolen = STRLEN(to);
516 size_t fromlen = STRLEN(from);
517
518 if (tolen + fromlen + 1 > tosize)
519 {
520 mch_memmove(to + tolen, from, tosize - tolen - 1);
521 to[tosize - 1] = NUL;
522 }
523 else
524 mch_memmove(to + tolen, from, fromlen + 1);
525}
526
Bram Moolenaarc32949b2023-01-04 15:56:51 +0000527/*
528 * A version of strlen() that has a maximum length.
529 */
530 size_t
531vim_strlen_maxlen(char *s, size_t maxlen)
532{
533 size_t i;
534 for (i = 0; i < maxlen; ++i)
535 if (s[i] == NUL)
536 break;
537 return i;
538}
539
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200540#if (!defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)) || defined(PROTO)
541/*
542 * Compare two strings, ignoring case, using current locale.
543 * Doesn't work for multi-byte characters.
544 * return 0 for match, < 0 for smaller, > 0 for bigger
545 */
546 int
547vim_stricmp(char *s1, char *s2)
548{
549 int i;
550
551 for (;;)
552 {
553 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
554 if (i != 0)
555 return i; // this character different
556 if (*s1 == NUL)
557 break; // strings match until NUL
558 ++s1;
559 ++s2;
560 }
561 return 0; // strings match
562}
563#endif
564
565#if (!defined(HAVE_STRNCASECMP) && !defined(HAVE_STRNICMP)) || defined(PROTO)
566/*
567 * Compare two strings, for length "len", ignoring case, using current locale.
568 * Doesn't work for multi-byte characters.
569 * return 0 for match, < 0 for smaller, > 0 for bigger
570 */
571 int
572vim_strnicmp(char *s1, char *s2, size_t len)
573{
574 int i;
575
576 while (len > 0)
577 {
578 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
579 if (i != 0)
580 return i; // this character different
581 if (*s1 == NUL)
582 break; // strings match until NUL
583 ++s1;
584 ++s2;
585 --len;
586 }
587 return 0; // strings match
588}
589#endif
590
591/*
592 * Search for first occurrence of "c" in "string".
593 * Version of strchr() that handles unsigned char strings with characters from
594 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
595 * end of the string.
596 */
Bram Moolenaarc32949b2023-01-04 15:56:51 +0000597 char_u *
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200598vim_strchr(char_u *string, int c)
599{
600 char_u *p;
601 int b;
602
603 p = string;
604 if (enc_utf8 && c >= 0x80)
605 {
606 while (*p != NUL)
607 {
608 int l = utfc_ptr2len(p);
609
610 // Avoid matching an illegal byte here.
611 if (utf_ptr2char(p) == c && l > 1)
612 return p;
613 p += l;
614 }
615 return NULL;
616 }
617 if (enc_dbcs != 0 && c > 255)
618 {
619 int n2 = c & 0xff;
620
621 c = ((unsigned)c >> 8) & 0xff;
622 while ((b = *p) != NUL)
623 {
624 if (b == c && p[1] == n2)
625 return p;
626 p += (*mb_ptr2len)(p);
627 }
628 return NULL;
629 }
630 if (has_mbyte)
631 {
632 while ((b = *p) != NUL)
633 {
634 if (b == c)
635 return p;
636 p += (*mb_ptr2len)(p);
637 }
638 return NULL;
639 }
640 while ((b = *p) != NUL)
641 {
642 if (b == c)
643 return p;
644 ++p;
645 }
646 return NULL;
647}
648
649/*
650 * Version of strchr() that only works for bytes and handles unsigned char
651 * strings with characters above 128 correctly. It also doesn't return a
652 * pointer to the NUL at the end of the string.
653 */
654 char_u *
655vim_strbyte(char_u *string, int c)
656{
657 char_u *p = string;
658
659 while (*p != NUL)
660 {
661 if (*p == c)
662 return p;
663 ++p;
664 }
665 return NULL;
666}
667
668/*
669 * Search for last occurrence of "c" in "string".
670 * Version of strrchr() that handles unsigned char strings with characters from
671 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
672 * end of the string.
673 * Return NULL if not found.
674 * Does not handle multi-byte char for "c"!
675 */
676 char_u *
677vim_strrchr(char_u *string, int c)
678{
679 char_u *retval = NULL;
680 char_u *p = string;
681
682 while (*p)
683 {
684 if (*p == c)
685 retval = p;
686 MB_PTR_ADV(p);
687 }
688 return retval;
689}
690
691/*
692 * Vim's version of strpbrk(), in case it's missing.
693 * Don't generate a prototype for this, causes problems when it's not used.
694 */
695#ifndef PROTO
696# ifndef HAVE_STRPBRK
697# ifdef vim_strpbrk
698# undef vim_strpbrk
699# endif
700 char_u *
701vim_strpbrk(char_u *s, char_u *charset)
702{
703 while (*s)
704 {
705 if (vim_strchr(charset, *s) != NULL)
706 return s;
707 MB_PTR_ADV(s);
708 }
709 return NULL;
710}
711# endif
712#endif
713
714/*
715 * Sort an array of strings.
716 */
717static int sort_compare(const void *s1, const void *s2);
718
719 static int
720sort_compare(const void *s1, const void *s2)
721{
722 return STRCMP(*(char **)s1, *(char **)s2);
723}
724
725 void
726sort_strings(
727 char_u **files,
728 int count)
729{
730 qsort((void *)files, (size_t)count, sizeof(char_u *), sort_compare);
731}
732
733#if defined(FEAT_QUICKFIX) || defined(FEAT_SPELL) || defined(PROTO)
734/*
735 * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
736 * When "s" is NULL FALSE is returned.
737 */
738 int
739has_non_ascii(char_u *s)
740{
741 char_u *p;
742
743 if (s != NULL)
744 for (p = s; *p != NUL; ++p)
745 if (*p >= 128)
746 return TRUE;
747 return FALSE;
748}
749#endif
750
751/*
752 * Concatenate two strings and return the result in allocated memory.
753 * Returns NULL when out of memory.
754 */
755 char_u *
756concat_str(char_u *str1, char_u *str2)
757{
758 char_u *dest;
759 size_t l = str1 == NULL ? 0 : STRLEN(str1);
760
761 dest = alloc(l + (str2 == NULL ? 0 : STRLEN(str2)) + 1L);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000762 if (dest == NULL)
763 return NULL;
764 if (str1 == NULL)
765 *dest = NUL;
766 else
767 STRCPY(dest, str1);
768 if (str2 != NULL)
769 STRCPY(dest + l, str2);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200770 return dest;
771}
772
773#if defined(FEAT_EVAL) || defined(PROTO)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200774/*
775 * Return string "str" in ' quotes, doubling ' characters.
776 * If "str" is NULL an empty string is assumed.
777 * If "function" is TRUE make it function('string').
778 */
779 char_u *
780string_quote(char_u *str, int function)
781{
782 unsigned len;
783 char_u *p, *r, *s;
784
785 len = (function ? 13 : 3);
786 if (str != NULL)
787 {
788 len += (unsigned)STRLEN(str);
789 for (p = str; *p != NUL; MB_PTR_ADV(p))
790 if (*p == '\'')
791 ++len;
792 }
793 s = r = alloc(len);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000794 if (r == NULL)
795 return NULL;
796
797 if (function)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200798 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000799 STRCPY(r, "function('");
800 r += 10;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200801 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000802 else
803 *r++ = '\'';
804 if (str != NULL)
805 for (p = str; *p != NUL; )
806 {
807 if (*p == '\'')
808 *r++ = '\'';
809 MB_COPY_CHAR(p, r);
810 }
811 *r++ = '\'';
812 if (function)
813 *r++ = ')';
814 *r++ = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200815 return s;
816}
817
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000818/*
819 * Count the number of times "needle" occurs in string "haystack". Case is
820 * ignored if "ic" is TRUE.
821 */
822 long
823string_count(char_u *haystack, char_u *needle, int ic)
824{
825 long n = 0;
826 char_u *p = haystack;
827 char_u *next;
828
829 if (p == NULL || needle == NULL || *needle == NUL)
830 return 0;
831
832 if (ic)
833 {
834 size_t len = STRLEN(needle);
835
836 while (*p != NUL)
837 {
838 if (MB_STRNICMP(p, needle, len) == 0)
839 {
840 ++n;
841 p += len;
842 }
843 else
844 MB_PTR_ADV(p);
845 }
846 }
847 else
848 while ((next = (char_u *)strstr((char *)p, (char *)needle)) != NULL)
849 {
850 ++n;
851 p = next + STRLEN(needle);
852 }
853
854 return n;
855}
856
857/*
Yegappan Lakshmanan03ff1c22023-05-06 14:08:21 +0100858 * Reverse the string in 'str' and set the result in 'rettv'.
859 */
860 void
861string_reverse(char_u *str, typval_T *rettv)
862{
863 rettv->v_type = VAR_STRING;
864 rettv->vval.v_string = NULL;
865 if (str == NULL)
866 return;
867
868 char_u *rstr = vim_strsave(str);
869 rettv->vval.v_string = rstr;
870 if (rstr == NULL || *str == NUL)
871 return;
872
873 size_t len = STRLEN(rstr);
874 if (has_mbyte)
875 {
876 char_u *src = str;
877 char_u *dest = rstr + len;
878
879 while (src < str + len)
880 {
881 int clen = mb_ptr2len(src);
882 dest -= clen;
883 mch_memmove(dest, src, (size_t)clen);
884 src += clen;
885 }
886 }
887 else
888 {
889 for (size_t i = 0; i < len / 2; i++)
890 {
891 char tmp = rstr[len - i - 1];
892 rstr[len - i - 1] = rstr[i];
893 rstr[i] = tmp;
894 }
895 }
896}
897
898/*
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000899 * Make a typval_T of the first character of "input" and store it in "output".
900 * Return OK or FAIL.
901 */
902 static int
903copy_first_char_to_tv(char_u *input, typval_T *output)
904{
905 char_u buf[MB_MAXBYTES + 1];
906 int len;
907
908 if (input == NULL || output == NULL)
909 return FAIL;
910
911 len = has_mbyte ? mb_ptr2len(input) : 1;
912 STRNCPY(buf, input, len);
913 buf[len] = NUL;
914 output->v_type = VAR_STRING;
915 output->vval.v_string = vim_strsave(buf);
916
917 return output->vval.v_string == NULL ? FAIL : OK;
918}
919
920/*
921 * Implementation of map() and filter() for a String. Apply "expr" to every
922 * character in string "str" and return the result in "rettv".
923 */
924 void
925string_filter_map(
926 char_u *str,
927 filtermap_T filtermap,
928 typval_T *expr,
929 typval_T *rettv)
930{
931 char_u *p;
932 typval_T tv;
933 garray_T ga;
934 int len = 0;
935 int idx = 0;
936 int rem;
Bram Moolenaar82418262022-09-28 16:16:15 +0100937 typval_T newtv;
938 funccall_T *fc;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000939
940 rettv->v_type = VAR_STRING;
941 rettv->vval.v_string = NULL;
942
943 // set_vim_var_nr() doesn't set the type
944 set_vim_var_type(VV_KEY, VAR_NUMBER);
945
zeertzjqe7d49462023-04-16 20:53:55 +0100946 // Create one funccall_T for all eval_expr_typval() calls.
Bram Moolenaar82418262022-09-28 16:16:15 +0100947 fc = eval_expr_get_funccal(expr, &newtv);
948
Bram Moolenaar04935fb2022-01-08 16:19:22 +0000949 ga_init2(&ga, sizeof(char), 80);
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000950 for (p = str; *p != NUL; p += len)
951 {
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000952 if (copy_first_char_to_tv(p, &tv) == FAIL)
953 break;
954 len = (int)STRLEN(tv.vval.v_string);
955
Bram Moolenaardd7eff02022-05-06 11:02:05 +0100956 newtv.v_type = VAR_UNKNOWN;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000957 set_vim_var_nr(VV_KEY, idx);
Bram Moolenaar82418262022-09-28 16:16:15 +0100958 if (filter_map_one(&tv, expr, filtermap, fc, &newtv, &rem) == FAIL
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000959 || did_emsg)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000960 {
961 clear_tv(&newtv);
962 clear_tv(&tv);
963 break;
964 }
965 else if (filtermap != FILTERMAP_FILTER)
966 {
967 if (newtv.v_type != VAR_STRING)
968 {
969 clear_tv(&newtv);
970 clear_tv(&tv);
Bram Moolenaare70cec92022-01-01 14:25:55 +0000971 emsg(_(e_string_required));
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000972 break;
973 }
974 else
975 ga_concat(&ga, newtv.vval.v_string);
976 }
977 else if (!rem)
978 ga_concat(&ga, tv.vval.v_string);
979
980 clear_tv(&newtv);
981 clear_tv(&tv);
982
983 ++idx;
984 }
985 ga_append(&ga, NUL);
986 rettv->vval.v_string = ga.ga_data;
Bram Moolenaar82418262022-09-28 16:16:15 +0100987 if (fc != NULL)
988 remove_funccal();
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000989}
990
991/*
Bram Moolenaarf1c60d42022-09-22 17:07:00 +0100992 * Implementation of reduce() for String "argvars[0]" using the function "expr"
993 * starting with the optional initial value "argvars[2]" and return the result
994 * in "rettv".
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000995 */
996 void
997string_reduce(
998 typval_T *argvars,
Bram Moolenaarf1c60d42022-09-22 17:07:00 +0100999 typval_T *expr,
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001000 typval_T *rettv)
1001{
1002 char_u *p = tv_get_string(&argvars[0]);
1003 int len;
1004 typval_T argv[3];
1005 int r;
1006 int called_emsg_start = called_emsg;
Bram Moolenaar82418262022-09-28 16:16:15 +01001007 funccall_T *fc;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001008
1009 if (argvars[2].v_type == VAR_UNKNOWN)
1010 {
1011 if (*p == NUL)
1012 {
Bram Moolenaare70cec92022-01-01 14:25:55 +00001013 semsg(_(e_reduce_of_an_empty_str_with_no_initial_value), "String");
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001014 return;
1015 }
1016 if (copy_first_char_to_tv(p, rettv) == FAIL)
1017 return;
1018 p += STRLEN(rettv->vval.v_string);
1019 }
Yegappan Lakshmanan8deb2b32022-09-02 15:15:27 +01001020 else if (check_for_string_arg(argvars, 2) == FAIL)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001021 return;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001022 else
1023 copy_tv(&argvars[2], rettv);
1024
zeertzjqe7d49462023-04-16 20:53:55 +01001025 // Create one funccall_T for all eval_expr_typval() calls.
Bram Moolenaar82418262022-09-28 16:16:15 +01001026 fc = eval_expr_get_funccal(expr, rettv);
1027
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001028 for ( ; *p != NUL; p += len)
1029 {
1030 argv[0] = *rettv;
1031 if (copy_first_char_to_tv(p, &argv[1]) == FAIL)
1032 break;
1033 len = (int)STRLEN(argv[1].vval.v_string);
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001034
Bram Moolenaar82418262022-09-28 16:16:15 +01001035 r = eval_expr_typval(expr, argv, 2, fc, rettv);
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001036
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001037 clear_tv(&argv[0]);
1038 clear_tv(&argv[1]);
1039 if (r == FAIL || called_emsg != called_emsg_start)
1040 return;
1041 }
Bram Moolenaar82418262022-09-28 16:16:15 +01001042
1043 if (fc != NULL)
1044 remove_funccal();
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001045}
1046
Bram Moolenaare4098452023-05-07 18:53:49 +01001047/*
1048 * Implementation of "byteidx()" and "byteidxcomp()" functions
1049 */
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001050 static void
Bram Moolenaare4098452023-05-07 18:53:49 +01001051byteidx_common(typval_T *argvars, typval_T *rettv, int comp UNUSED)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001052{
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001053 rettv->vval.v_number = -1;
1054
1055 if (in_vim9script()
1056 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001057 || check_for_number_arg(argvars, 1) == FAIL
1058 || check_for_opt_bool_arg(argvars, 2) == FAIL))
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001059 return;
1060
Christian Brabandt67672ef2023-04-24 21:09:54 +01001061 char_u *str = tv_get_string_chk(&argvars[0]);
1062 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001063 if (str == NULL || idx < 0)
1064 return;
1065
Christian Brabandt67672ef2023-04-24 21:09:54 +01001066 varnumber_T utf16idx = FALSE;
1067 if (argvars[2].v_type != VAR_UNKNOWN)
1068 {
zeertzjq8cf51372023-05-08 15:31:38 +01001069 int error = FALSE;
1070 utf16idx = tv_get_bool_chk(&argvars[2], &error);
1071 if (error)
1072 return;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001073 if (utf16idx < 0 || utf16idx > 1)
1074 {
zeertzjq8cf51372023-05-08 15:31:38 +01001075 semsg(_(e_using_number_as_bool_nr), utf16idx);
Christian Brabandt67672ef2023-04-24 21:09:54 +01001076 return;
1077 }
1078 }
1079
1080 int (*ptr2len)(char_u *);
1081 if (enc_utf8 && comp)
1082 ptr2len = utf_ptr2len;
1083 else
1084 ptr2len = mb_ptr2len;
1085
1086 char_u *t = str;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001087 for ( ; idx > 0; idx--)
1088 {
1089 if (*t == NUL) // EOL reached
1090 return;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001091 if (utf16idx)
1092 {
1093 int clen = ptr2len(t);
1094 int c = (clen > 1) ? utf_ptr2char(t) : *t;
1095 if (c > 0xFFFF)
1096 idx--;
1097 }
1098 if (idx > 0)
1099 t += ptr2len(t);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001100 }
1101 rettv->vval.v_number = (varnumber_T)(t - str);
1102}
1103
1104/*
1105 * "byteidx()" function
1106 */
1107 void
1108f_byteidx(typval_T *argvars, typval_T *rettv)
1109{
Bram Moolenaare4098452023-05-07 18:53:49 +01001110 byteidx_common(argvars, rettv, FALSE);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001111}
1112
1113/*
1114 * "byteidxcomp()" function
1115 */
1116 void
1117f_byteidxcomp(typval_T *argvars, typval_T *rettv)
1118{
Bram Moolenaare4098452023-05-07 18:53:49 +01001119 byteidx_common(argvars, rettv, TRUE);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001120}
1121
1122/*
1123 * "charidx()" function
1124 */
1125 void
1126f_charidx(typval_T *argvars, typval_T *rettv)
1127{
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001128 rettv->vval.v_number = -1;
1129
Christian Brabandt67672ef2023-04-24 21:09:54 +01001130 if (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001131 || check_for_number_arg(argvars, 1) == FAIL
Christian Brabandt67672ef2023-04-24 21:09:54 +01001132 || check_for_opt_bool_arg(argvars, 2) == FAIL
1133 || (argvars[2].v_type != VAR_UNKNOWN
1134 && check_for_opt_bool_arg(argvars, 3) == FAIL))
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001135 return;
1136
Christian Brabandt67672ef2023-04-24 21:09:54 +01001137 char_u *str = tv_get_string_chk(&argvars[0]);
1138 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001139 if (str == NULL || idx < 0)
1140 return;
1141
Christian Brabandt67672ef2023-04-24 21:09:54 +01001142 varnumber_T countcc = FALSE;
1143 varnumber_T utf16idx = FALSE;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001144 if (argvars[2].v_type != VAR_UNKNOWN)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001145 {
Christian Brabandt67672ef2023-04-24 21:09:54 +01001146 countcc = tv_get_bool(&argvars[2]);
1147 if (argvars[3].v_type != VAR_UNKNOWN)
1148 utf16idx = tv_get_bool(&argvars[3]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001149 }
1150
Christian Brabandt67672ef2023-04-24 21:09:54 +01001151 int (*ptr2len)(char_u *);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001152 if (enc_utf8 && countcc)
1153 ptr2len = utf_ptr2len;
1154 else
1155 ptr2len = mb_ptr2len;
1156
Christian Brabandt67672ef2023-04-24 21:09:54 +01001157 char_u *p;
1158 int len;
1159 for (p = str, len = 0; utf16idx ? idx >= 0 : p <= str + idx; len++)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001160 {
1161 if (*p == NUL)
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001162 {
1163 // If the index is exactly the number of bytes or utf-16 code units
1164 // in the string then return the length of the string in
1165 // characters.
1166 if (utf16idx ? (idx == 0) : (p == (str + idx)))
1167 rettv->vval.v_number = len;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001168 return;
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001169 }
Christian Brabandt67672ef2023-04-24 21:09:54 +01001170 if (utf16idx)
1171 {
1172 idx--;
1173 int clen = ptr2len(p);
1174 int c = (clen > 1) ? utf_ptr2char(p) : *p;
1175 if (c > 0xFFFF)
1176 idx--;
1177 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001178 p += ptr2len(p);
1179 }
1180
1181 rettv->vval.v_number = len > 0 ? len - 1 : 0;
1182}
1183
1184/*
1185 * "str2list()" function
1186 */
1187 void
1188f_str2list(typval_T *argvars, typval_T *rettv)
1189{
1190 char_u *p;
1191 int utf8 = FALSE;
1192
1193 if (rettv_list_alloc(rettv) == FAIL)
1194 return;
1195
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001196 if (in_vim9script()
1197 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001198 || check_for_opt_bool_arg(argvars, 1) == FAIL))
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001199 return;
1200
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001201 if (argvars[1].v_type != VAR_UNKNOWN)
1202 utf8 = (int)tv_get_bool_chk(&argvars[1], NULL);
1203
1204 p = tv_get_string(&argvars[0]);
1205
1206 if (has_mbyte || utf8)
1207 {
1208 int (*ptr2len)(char_u *);
1209 int (*ptr2char)(char_u *);
1210
1211 if (utf8 || enc_utf8)
1212 {
1213 ptr2len = utf_ptr2len;
1214 ptr2char = utf_ptr2char;
1215 }
1216 else
1217 {
1218 ptr2len = mb_ptr2len;
1219 ptr2char = mb_ptr2char;
1220 }
1221
1222 for ( ; *p != NUL; p += (*ptr2len)(p))
1223 list_append_number(rettv->vval.v_list, (*ptr2char)(p));
1224 }
1225 else
1226 for ( ; *p != NUL; ++p)
1227 list_append_number(rettv->vval.v_list, *p);
1228}
1229
1230/*
1231 * "str2nr()" function
1232 */
1233 void
1234f_str2nr(typval_T *argvars, typval_T *rettv)
1235{
1236 int base = 10;
1237 char_u *p;
1238 varnumber_T n;
1239 int what = 0;
1240 int isneg;
1241
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001242 if (in_vim9script()
1243 && (check_for_string_arg(argvars, 0) == FAIL
1244 || check_for_opt_number_arg(argvars, 1) == FAIL
1245 || (argvars[1].v_type != VAR_UNKNOWN
1246 && check_for_opt_bool_arg(argvars, 2) == FAIL)))
1247 return;
1248
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001249 if (argvars[1].v_type != VAR_UNKNOWN)
1250 {
1251 base = (int)tv_get_number(&argvars[1]);
1252 if (base != 2 && base != 8 && base != 10 && base != 16)
1253 {
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00001254 emsg(_(e_invalid_argument));
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001255 return;
1256 }
1257 if (argvars[2].v_type != VAR_UNKNOWN && tv_get_bool(&argvars[2]))
1258 what |= STR2NR_QUOTE;
1259 }
1260
1261 p = skipwhite(tv_get_string_strict(&argvars[0]));
1262 isneg = (*p == '-');
1263 if (*p == '+' || *p == '-')
1264 p = skipwhite(p + 1);
1265 switch (base)
1266 {
1267 case 2: what |= STR2NR_BIN + STR2NR_FORCE; break;
1268 case 8: what |= STR2NR_OCT + STR2NR_OOCT + STR2NR_FORCE; break;
1269 case 16: what |= STR2NR_HEX + STR2NR_FORCE; break;
1270 }
Bram Moolenaar5fb78c32023-03-04 20:47:39 +00001271 vim_str2nr(p, NULL, NULL, what, &n, NULL, 0, FALSE, NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001272 // Text after the number is silently ignored.
1273 if (isneg)
1274 rettv->vval.v_number = -n;
1275 else
1276 rettv->vval.v_number = n;
1277
1278}
1279
1280/*
1281 * "strgetchar()" function
1282 */
1283 void
1284f_strgetchar(typval_T *argvars, typval_T *rettv)
1285{
1286 char_u *str;
1287 int len;
1288 int error = FALSE;
1289 int charidx;
1290 int byteidx = 0;
1291
1292 rettv->vval.v_number = -1;
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001293
1294 if (in_vim9script()
1295 && (check_for_string_arg(argvars, 0) == FAIL
1296 || check_for_number_arg(argvars, 1) == FAIL))
1297 return;
1298
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001299 str = tv_get_string_chk(&argvars[0]);
1300 if (str == NULL)
1301 return;
1302 len = (int)STRLEN(str);
1303 charidx = (int)tv_get_number_chk(&argvars[1], &error);
1304 if (error)
1305 return;
1306
1307 while (charidx >= 0 && byteidx < len)
1308 {
1309 if (charidx == 0)
1310 {
1311 rettv->vval.v_number = mb_ptr2char(str + byteidx);
1312 break;
1313 }
1314 --charidx;
1315 byteidx += MB_CPTR2LEN(str + byteidx);
1316 }
1317}
1318
1319/*
1320 * "stridx()" function
1321 */
1322 void
1323f_stridx(typval_T *argvars, typval_T *rettv)
1324{
1325 char_u buf[NUMBUFLEN];
1326 char_u *needle;
1327 char_u *haystack;
1328 char_u *save_haystack;
1329 char_u *pos;
1330 int start_idx;
1331
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001332 if (in_vim9script()
1333 && (check_for_string_arg(argvars, 0) == FAIL
1334 || check_for_string_arg(argvars, 1) == FAIL
1335 || check_for_opt_number_arg(argvars, 2) == FAIL))
1336 return;
1337
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001338 needle = tv_get_string_chk(&argvars[1]);
1339 save_haystack = haystack = tv_get_string_buf_chk(&argvars[0], buf);
1340 rettv->vval.v_number = -1;
1341 if (needle == NULL || haystack == NULL)
1342 return; // type error; errmsg already given
1343
1344 if (argvars[2].v_type != VAR_UNKNOWN)
1345 {
1346 int error = FALSE;
1347
1348 start_idx = (int)tv_get_number_chk(&argvars[2], &error);
1349 if (error || start_idx >= (int)STRLEN(haystack))
1350 return;
1351 if (start_idx >= 0)
1352 haystack += start_idx;
1353 }
1354
1355 pos = (char_u *)strstr((char *)haystack, (char *)needle);
1356 if (pos != NULL)
1357 rettv->vval.v_number = (varnumber_T)(pos - save_haystack);
1358}
1359
1360/*
1361 * "string()" function
1362 */
1363 void
1364f_string(typval_T *argvars, typval_T *rettv)
1365{
1366 char_u *tofree;
1367 char_u numbuf[NUMBUFLEN];
1368
1369 rettv->v_type = VAR_STRING;
1370 rettv->vval.v_string = tv2string(&argvars[0], &tofree, numbuf,
1371 get_copyID());
1372 // Make a copy if we have a value but it's not in allocated memory.
1373 if (rettv->vval.v_string != NULL && tofree == NULL)
1374 rettv->vval.v_string = vim_strsave(rettv->vval.v_string);
1375}
1376
1377/*
1378 * "strlen()" function
1379 */
1380 void
1381f_strlen(typval_T *argvars, typval_T *rettv)
1382{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001383 if (in_vim9script()
1384 && check_for_string_or_number_arg(argvars, 0) == FAIL)
1385 return;
1386
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001387 rettv->vval.v_number = (varnumber_T)(STRLEN(
1388 tv_get_string(&argvars[0])));
1389}
1390
1391 static void
1392strchar_common(typval_T *argvars, typval_T *rettv, int skipcc)
1393{
1394 char_u *s = tv_get_string(&argvars[0]);
1395 varnumber_T len = 0;
1396 int (*func_mb_ptr2char_adv)(char_u **pp);
1397
1398 func_mb_ptr2char_adv = skipcc ? mb_ptr2char_adv : mb_cptr2char_adv;
1399 while (*s != NUL)
1400 {
1401 func_mb_ptr2char_adv(&s);
1402 ++len;
1403 }
1404 rettv->vval.v_number = len;
1405}
1406
1407/*
1408 * "strcharlen()" function
1409 */
1410 void
1411f_strcharlen(typval_T *argvars, typval_T *rettv)
1412{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001413 if (in_vim9script()
1414 && check_for_string_or_number_arg(argvars, 0) == FAIL)
1415 return;
1416
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001417 strchar_common(argvars, rettv, TRUE);
1418}
1419
1420/*
1421 * "strchars()" function
1422 */
1423 void
1424f_strchars(typval_T *argvars, typval_T *rettv)
1425{
1426 varnumber_T skipcc = FALSE;
1427
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001428 if (in_vim9script()
1429 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001430 || check_for_opt_bool_arg(argvars, 1) == FAIL))
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001431 return;
1432
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001433 if (argvars[1].v_type != VAR_UNKNOWN)
Bram Moolenaare4098452023-05-07 18:53:49 +01001434 {
zeertzjq8cf51372023-05-08 15:31:38 +01001435 int error = FALSE;
1436 skipcc = tv_get_bool_chk(&argvars[1], &error);
1437 if (error)
1438 return;
1439 if (skipcc < 0 || skipcc > 1)
1440 {
Bram Moolenaare4098452023-05-07 18:53:49 +01001441 semsg(_(e_using_number_as_bool_nr), skipcc);
zeertzjq8cf51372023-05-08 15:31:38 +01001442 return;
1443 }
Bram Moolenaare4098452023-05-07 18:53:49 +01001444 }
zeertzjq8cf51372023-05-08 15:31:38 +01001445
1446 strchar_common(argvars, rettv, skipcc);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001447}
1448
1449/*
Christian Brabandt67672ef2023-04-24 21:09:54 +01001450 * "strutf16len()" function
1451 */
1452 void
1453f_strutf16len(typval_T *argvars, typval_T *rettv)
1454{
1455 rettv->vval.v_number = -1;
1456
1457 if (check_for_string_arg(argvars, 0) == FAIL
1458 || check_for_opt_bool_arg(argvars, 1) == FAIL)
1459 return;
1460
1461 varnumber_T countcc = FALSE;
1462 if (argvars[1].v_type != VAR_UNKNOWN)
1463 countcc = tv_get_bool(&argvars[1]);
1464
1465 char_u *s = tv_get_string(&argvars[0]);
1466 varnumber_T len = 0;
1467 int (*func_mb_ptr2char_adv)(char_u **pp);
1468 int ch;
1469
1470 func_mb_ptr2char_adv = countcc ? mb_cptr2char_adv : mb_ptr2char_adv;
1471 while (*s != NUL)
1472 {
1473 ch = func_mb_ptr2char_adv(&s);
1474 if (ch > 0xFFFF)
1475 ++len;
1476 ++len;
1477 }
1478 rettv->vval.v_number = len;
1479}
1480
1481/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001482 * "strdisplaywidth()" function
1483 */
1484 void
1485f_strdisplaywidth(typval_T *argvars, typval_T *rettv)
1486{
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001487 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001488 int col = 0;
1489
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001490 rettv->vval.v_number = -1;
1491
1492 if (in_vim9script()
1493 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001494 || check_for_opt_number_arg(argvars, 1) == FAIL))
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001495 return;
1496
1497 s = tv_get_string(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001498 if (argvars[1].v_type != VAR_UNKNOWN)
1499 col = (int)tv_get_number(&argvars[1]);
1500
1501 rettv->vval.v_number = (varnumber_T)(linetabsize_col(col, s) - col);
1502}
1503
1504/*
1505 * "strwidth()" function
1506 */
1507 void
1508f_strwidth(typval_T *argvars, typval_T *rettv)
1509{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001510 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001511
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001512 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1513 return;
1514
1515 s = tv_get_string_strict(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001516 rettv->vval.v_number = (varnumber_T)(mb_string2cells(s, -1));
1517}
1518
1519/*
1520 * "strcharpart()" function
1521 */
1522 void
1523f_strcharpart(typval_T *argvars, typval_T *rettv)
1524{
1525 char_u *p;
1526 int nchar;
1527 int nbyte = 0;
1528 int charlen;
1529 int skipcc = FALSE;
1530 int len = 0;
1531 int slen;
1532 int error = FALSE;
1533
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001534 if (in_vim9script()
1535 && (check_for_string_arg(argvars, 0) == FAIL
1536 || check_for_number_arg(argvars, 1) == FAIL
1537 || check_for_opt_number_arg(argvars, 2) == FAIL
1538 || (argvars[2].v_type != VAR_UNKNOWN
1539 && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1540 return;
1541
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001542 p = tv_get_string(&argvars[0]);
1543 slen = (int)STRLEN(p);
1544
1545 nchar = (int)tv_get_number_chk(&argvars[1], &error);
1546 if (!error)
1547 {
1548 if (argvars[2].v_type != VAR_UNKNOWN
1549 && argvars[3].v_type != VAR_UNKNOWN)
1550 {
zeertzjq8cf51372023-05-08 15:31:38 +01001551 skipcc = tv_get_bool_chk(&argvars[3], &error);
1552 if (error)
1553 return;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001554 if (skipcc < 0 || skipcc > 1)
1555 {
zeertzjq8cf51372023-05-08 15:31:38 +01001556 semsg(_(e_using_number_as_bool_nr), skipcc);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001557 return;
1558 }
1559 }
1560
1561 if (nchar > 0)
1562 while (nchar > 0 && nbyte < slen)
1563 {
1564 if (skipcc)
1565 nbyte += mb_ptr2len(p + nbyte);
1566 else
1567 nbyte += MB_CPTR2LEN(p + nbyte);
1568 --nchar;
1569 }
1570 else
1571 nbyte = nchar;
1572 if (argvars[2].v_type != VAR_UNKNOWN)
1573 {
1574 charlen = (int)tv_get_number(&argvars[2]);
1575 while (charlen > 0 && nbyte + len < slen)
1576 {
1577 int off = nbyte + len;
1578
1579 if (off < 0)
1580 len += 1;
1581 else
1582 {
1583 if (skipcc)
1584 len += mb_ptr2len(p + off);
1585 else
1586 len += MB_CPTR2LEN(p + off);
1587 }
1588 --charlen;
1589 }
1590 }
1591 else
1592 len = slen - nbyte; // default: all bytes that are available.
1593 }
1594
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02001595 // Only return the overlap between the specified part and the actual
1596 // string.
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001597 if (nbyte < 0)
1598 {
1599 len += nbyte;
1600 nbyte = 0;
1601 }
1602 else if (nbyte > slen)
1603 nbyte = slen;
1604 if (len < 0)
1605 len = 0;
1606 else if (nbyte + len > slen)
1607 len = slen - nbyte;
1608
1609 rettv->v_type = VAR_STRING;
1610 rettv->vval.v_string = vim_strnsave(p + nbyte, len);
1611}
1612
1613/*
1614 * "strpart()" function
1615 */
1616 void
1617f_strpart(typval_T *argvars, typval_T *rettv)
1618{
1619 char_u *p;
1620 int n;
1621 int len;
1622 int slen;
1623 int error = FALSE;
1624
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001625 if (in_vim9script()
1626 && (check_for_string_arg(argvars, 0) == FAIL
1627 || check_for_number_arg(argvars, 1) == FAIL
1628 || check_for_opt_number_arg(argvars, 2) == FAIL
1629 || (argvars[2].v_type != VAR_UNKNOWN
1630 && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1631 return;
1632
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001633 p = tv_get_string(&argvars[0]);
1634 slen = (int)STRLEN(p);
1635
1636 n = (int)tv_get_number_chk(&argvars[1], &error);
1637 if (error)
1638 len = 0;
1639 else if (argvars[2].v_type != VAR_UNKNOWN)
1640 len = (int)tv_get_number(&argvars[2]);
1641 else
1642 len = slen - n; // default len: all bytes that are available.
1643
1644 // Only return the overlap between the specified part and the actual
1645 // string.
1646 if (n < 0)
1647 {
1648 len += n;
1649 n = 0;
1650 }
1651 else if (n > slen)
1652 n = slen;
1653 if (len < 0)
1654 len = 0;
1655 else if (n + len > slen)
1656 len = slen - n;
1657
1658 if (argvars[2].v_type != VAR_UNKNOWN && argvars[3].v_type != VAR_UNKNOWN)
1659 {
1660 int off;
1661
1662 // length in characters
1663 for (off = n; off < slen && len > 0; --len)
1664 off += mb_ptr2len(p + off);
1665 len = off - n;
1666 }
1667
1668 rettv->v_type = VAR_STRING;
1669 rettv->vval.v_string = vim_strnsave(p + n, len);
1670}
1671
1672/*
1673 * "strridx()" function
1674 */
1675 void
1676f_strridx(typval_T *argvars, typval_T *rettv)
1677{
1678 char_u buf[NUMBUFLEN];
1679 char_u *needle;
1680 char_u *haystack;
1681 char_u *rest;
1682 char_u *lastmatch = NULL;
1683 int haystack_len, end_idx;
1684
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001685 if (in_vim9script()
1686 && (check_for_string_arg(argvars, 0) == FAIL
1687 || check_for_string_arg(argvars, 1) == FAIL
1688 || check_for_opt_number_arg(argvars, 2) == FAIL))
1689 return;
1690
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001691 needle = tv_get_string_chk(&argvars[1]);
1692 haystack = tv_get_string_buf_chk(&argvars[0], buf);
1693
1694 rettv->vval.v_number = -1;
1695 if (needle == NULL || haystack == NULL)
1696 return; // type error; errmsg already given
1697
1698 haystack_len = (int)STRLEN(haystack);
1699 if (argvars[2].v_type != VAR_UNKNOWN)
1700 {
1701 // Third argument: upper limit for index
1702 end_idx = (int)tv_get_number_chk(&argvars[2], NULL);
1703 if (end_idx < 0)
1704 return; // can never find a match
1705 }
1706 else
1707 end_idx = haystack_len;
1708
1709 if (*needle == NUL)
1710 {
1711 // Empty string matches past the end.
1712 lastmatch = haystack + end_idx;
1713 }
1714 else
1715 {
1716 for (rest = haystack; *rest != '\0'; ++rest)
1717 {
1718 rest = (char_u *)strstr((char *)rest, (char *)needle);
1719 if (rest == NULL || rest > haystack + end_idx)
1720 break;
1721 lastmatch = rest;
1722 }
1723 }
1724
1725 if (lastmatch == NULL)
1726 rettv->vval.v_number = -1;
1727 else
1728 rettv->vval.v_number = (varnumber_T)(lastmatch - haystack);
1729}
1730
1731/*
1732 * "strtrans()" function
1733 */
1734 void
1735f_strtrans(typval_T *argvars, typval_T *rettv)
1736{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001737 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1738 return;
1739
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001740 rettv->v_type = VAR_STRING;
1741 rettv->vval.v_string = transstr(tv_get_string(&argvars[0]));
1742}
1743
Christian Brabandt67672ef2023-04-24 21:09:54 +01001744
1745/*
Christian Brabandt67672ef2023-04-24 21:09:54 +01001746 * "utf16idx()" function
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01001747 *
1748 * Converts a byte or character offset in a string to the corresponding UTF-16
1749 * code unit offset.
Christian Brabandt67672ef2023-04-24 21:09:54 +01001750 */
1751 void
1752f_utf16idx(typval_T *argvars, typval_T *rettv)
1753{
1754 rettv->vval.v_number = -1;
1755
1756 if (check_for_string_arg(argvars, 0) == FAIL
1757 || check_for_opt_number_arg(argvars, 1) == FAIL
1758 || check_for_opt_bool_arg(argvars, 2) == FAIL
1759 || (argvars[2].v_type != VAR_UNKNOWN
1760 && check_for_opt_bool_arg(argvars, 3) == FAIL))
1761 return;
1762
1763 char_u *str = tv_get_string_chk(&argvars[0]);
1764 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
1765 if (str == NULL || idx < 0)
1766 return;
1767
1768 varnumber_T countcc = FALSE;
1769 varnumber_T charidx = FALSE;
1770 if (argvars[2].v_type != VAR_UNKNOWN)
1771 {
1772 countcc = tv_get_bool(&argvars[2]);
1773 if (argvars[3].v_type != VAR_UNKNOWN)
1774 charidx = tv_get_bool(&argvars[3]);
1775 }
1776
1777 int (*ptr2len)(char_u *);
1778 if (enc_utf8 && countcc)
1779 ptr2len = utf_ptr2len;
1780 else
1781 ptr2len = mb_ptr2len;
1782
1783 char_u *p;
1784 int len;
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01001785 int utf16idx = 0;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001786 for (p = str, len = 0; charidx ? idx >= 0 : p <= str + idx; len++)
1787 {
1788 if (*p == NUL)
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001789 {
1790 // If the index is exactly the number of bytes or characters in the
1791 // string then return the length of the string in utf-16 code
1792 // units.
1793 if (charidx ? (idx == 0) : (p == (str + idx)))
1794 rettv->vval.v_number = len;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001795 return;
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001796 }
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01001797 utf16idx = len;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001798 int clen = ptr2len(p);
1799 int c = (clen > 1) ? utf_ptr2char(p) : *p;
1800 if (c > 0xFFFF)
1801 len++;
1802 p += ptr2len(p);
1803 if (charidx)
1804 idx--;
1805 }
1806
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01001807 rettv->vval.v_number = utf16idx;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001808}
1809
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001810/*
1811 * "tolower(string)" function
1812 */
1813 void
1814f_tolower(typval_T *argvars, typval_T *rettv)
1815{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001816 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1817 return;
1818
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001819 rettv->v_type = VAR_STRING;
1820 rettv->vval.v_string = strlow_save(tv_get_string(&argvars[0]));
1821}
1822
1823/*
1824 * "toupper(string)" function
1825 */
1826 void
1827f_toupper(typval_T *argvars, typval_T *rettv)
1828{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001829 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1830 return;
1831
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001832 rettv->v_type = VAR_STRING;
1833 rettv->vval.v_string = strup_save(tv_get_string(&argvars[0]));
1834}
1835
1836/*
1837 * "tr(string, fromstr, tostr)" function
1838 */
1839 void
1840f_tr(typval_T *argvars, typval_T *rettv)
1841{
1842 char_u *in_str;
1843 char_u *fromstr;
1844 char_u *tostr;
1845 char_u *p;
1846 int inlen;
1847 int fromlen;
1848 int tolen;
1849 int idx;
1850 char_u *cpstr;
1851 int cplen;
1852 int first = TRUE;
1853 char_u buf[NUMBUFLEN];
1854 char_u buf2[NUMBUFLEN];
1855 garray_T ga;
1856
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001857 if (in_vim9script()
1858 && (check_for_string_arg(argvars, 0) == FAIL
1859 || check_for_string_arg(argvars, 1) == FAIL
1860 || check_for_string_arg(argvars, 2) == FAIL))
1861 return;
1862
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001863 in_str = tv_get_string(&argvars[0]);
1864 fromstr = tv_get_string_buf_chk(&argvars[1], buf);
1865 tostr = tv_get_string_buf_chk(&argvars[2], buf2);
1866
1867 // Default return value: empty string.
1868 rettv->v_type = VAR_STRING;
1869 rettv->vval.v_string = NULL;
1870 if (fromstr == NULL || tostr == NULL)
1871 return; // type error; errmsg already given
Bram Moolenaar04935fb2022-01-08 16:19:22 +00001872 ga_init2(&ga, sizeof(char), 80);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001873
1874 if (!has_mbyte)
1875 // not multi-byte: fromstr and tostr must be the same length
1876 if (STRLEN(fromstr) != STRLEN(tostr))
1877 {
1878error:
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00001879 semsg(_(e_invalid_argument_str), fromstr);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001880 ga_clear(&ga);
1881 return;
1882 }
1883
1884 // fromstr and tostr have to contain the same number of chars
1885 while (*in_str != NUL)
1886 {
1887 if (has_mbyte)
1888 {
1889 inlen = (*mb_ptr2len)(in_str);
1890 cpstr = in_str;
1891 cplen = inlen;
1892 idx = 0;
1893 for (p = fromstr; *p != NUL; p += fromlen)
1894 {
1895 fromlen = (*mb_ptr2len)(p);
1896 if (fromlen == inlen && STRNCMP(in_str, p, inlen) == 0)
1897 {
1898 for (p = tostr; *p != NUL; p += tolen)
1899 {
1900 tolen = (*mb_ptr2len)(p);
1901 if (idx-- == 0)
1902 {
1903 cplen = tolen;
1904 cpstr = p;
1905 break;
1906 }
1907 }
1908 if (*p == NUL) // tostr is shorter than fromstr
1909 goto error;
1910 break;
1911 }
1912 ++idx;
1913 }
1914
1915 if (first && cpstr == in_str)
1916 {
1917 // Check that fromstr and tostr have the same number of
1918 // (multi-byte) characters. Done only once when a character
1919 // of in_str doesn't appear in fromstr.
1920 first = FALSE;
1921 for (p = tostr; *p != NUL; p += tolen)
1922 {
1923 tolen = (*mb_ptr2len)(p);
1924 --idx;
1925 }
1926 if (idx != 0)
1927 goto error;
1928 }
1929
1930 (void)ga_grow(&ga, cplen);
1931 mch_memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen);
1932 ga.ga_len += cplen;
1933
1934 in_str += inlen;
1935 }
1936 else
1937 {
1938 // When not using multi-byte chars we can do it faster.
1939 p = vim_strchr(fromstr, *in_str);
1940 if (p != NULL)
1941 ga_append(&ga, tostr[p - fromstr]);
1942 else
1943 ga_append(&ga, *in_str);
1944 ++in_str;
1945 }
1946 }
1947
1948 // add a terminating NUL
1949 (void)ga_grow(&ga, 1);
1950 ga_append(&ga, NUL);
1951
1952 rettv->vval.v_string = ga.ga_data;
1953}
1954
1955/*
1956 * "trim({expr})" function
1957 */
1958 void
1959f_trim(typval_T *argvars, typval_T *rettv)
1960{
1961 char_u buf1[NUMBUFLEN];
1962 char_u buf2[NUMBUFLEN];
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001963 char_u *head;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001964 char_u *mask = NULL;
1965 char_u *tail;
1966 char_u *prev;
1967 char_u *p;
1968 int c1;
1969 int dir = 0;
1970
1971 rettv->v_type = VAR_STRING;
1972 rettv->vval.v_string = NULL;
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001973
1974 if (in_vim9script()
1975 && (check_for_string_arg(argvars, 0) == FAIL
1976 || check_for_opt_string_arg(argvars, 1) == FAIL
1977 || (argvars[1].v_type != VAR_UNKNOWN
1978 && check_for_opt_number_arg(argvars, 2) == FAIL)))
1979 return;
1980
1981 head = tv_get_string_buf_chk(&argvars[0], buf1);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001982 if (head == NULL)
1983 return;
1984
Yegappan Lakshmanan8deb2b32022-09-02 15:15:27 +01001985 if (check_for_opt_string_arg(argvars, 1) == FAIL)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001986 return;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001987
1988 if (argvars[1].v_type == VAR_STRING)
1989 {
1990 mask = tv_get_string_buf_chk(&argvars[1], buf2);
1991
1992 if (argvars[2].v_type != VAR_UNKNOWN)
1993 {
1994 int error = 0;
1995
1996 // leading or trailing characters to trim
1997 dir = (int)tv_get_number_chk(&argvars[2], &error);
1998 if (error)
1999 return;
2000 if (dir < 0 || dir > 2)
2001 {
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00002002 semsg(_(e_invalid_argument_str), tv_get_string(&argvars[2]));
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002003 return;
2004 }
2005 }
2006 }
2007
2008 if (dir == 0 || dir == 1)
2009 {
2010 // Trim leading characters
2011 while (*head != NUL)
2012 {
2013 c1 = PTR2CHAR(head);
2014 if (mask == NULL)
2015 {
2016 if (c1 > ' ' && c1 != 0xa0)
2017 break;
2018 }
2019 else
2020 {
2021 for (p = mask; *p != NUL; MB_PTR_ADV(p))
2022 if (c1 == PTR2CHAR(p))
2023 break;
2024 if (*p == NUL)
2025 break;
2026 }
2027 MB_PTR_ADV(head);
2028 }
2029 }
2030
2031 tail = head + STRLEN(head);
2032 if (dir == 0 || dir == 2)
2033 {
2034 // Trim trailing characters
2035 for (; tail > head; tail = prev)
2036 {
2037 prev = tail;
2038 MB_PTR_BACK(head, prev);
2039 c1 = PTR2CHAR(prev);
2040 if (mask == NULL)
2041 {
2042 if (c1 > ' ' && c1 != 0xa0)
2043 break;
2044 }
2045 else
2046 {
2047 for (p = mask; *p != NUL; MB_PTR_ADV(p))
2048 if (c1 == PTR2CHAR(p))
2049 break;
2050 if (*p == NUL)
2051 break;
2052 }
2053 }
2054 }
2055 rettv->vval.v_string = vim_strnsave(head, tail - head);
2056}
2057
Bram Moolenaar677658a2022-01-05 16:09:06 +00002058static char *e_printf = N_(e_insufficient_arguments_for_printf);
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002059
2060/*
2061 * Get number argument from "idxp" entry in "tvs". First entry is 1.
2062 */
2063 static varnumber_T
2064tv_nr(typval_T *tvs, int *idxp)
2065{
2066 int idx = *idxp - 1;
2067 varnumber_T n = 0;
2068 int err = FALSE;
2069
2070 if (tvs[idx].v_type == VAR_UNKNOWN)
2071 emsg(_(e_printf));
2072 else
2073 {
2074 ++*idxp;
2075 n = tv_get_number_chk(&tvs[idx], &err);
2076 if (err)
2077 n = 0;
2078 }
2079 return n;
2080}
2081
2082/*
2083 * Get string argument from "idxp" entry in "tvs". First entry is 1.
2084 * If "tofree" is NULL tv_get_string_chk() is used. Some types (e.g. List)
2085 * are not converted to a string.
2086 * If "tofree" is not NULL echo_string() is used. All types are converted to
2087 * a string with the same format as ":echo". The caller must free "*tofree".
2088 * Returns NULL for an error.
2089 */
2090 static char *
2091tv_str(typval_T *tvs, int *idxp, char_u **tofree)
2092{
2093 int idx = *idxp - 1;
2094 char *s = NULL;
2095 static char_u numbuf[NUMBUFLEN];
2096
2097 if (tvs[idx].v_type == VAR_UNKNOWN)
2098 emsg(_(e_printf));
2099 else
2100 {
2101 ++*idxp;
2102 if (tofree != NULL)
2103 s = (char *)echo_string(&tvs[idx], tofree, numbuf, get_copyID());
2104 else
2105 s = (char *)tv_get_string_chk(&tvs[idx]);
2106 }
2107 return s;
2108}
2109
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002110/*
2111 * Get float argument from "idxp" entry in "tvs". First entry is 1.
2112 */
2113 static double
2114tv_float(typval_T *tvs, int *idxp)
2115{
2116 int idx = *idxp - 1;
2117 double f = 0;
2118
2119 if (tvs[idx].v_type == VAR_UNKNOWN)
2120 emsg(_(e_printf));
2121 else
2122 {
2123 ++*idxp;
2124 if (tvs[idx].v_type == VAR_FLOAT)
2125 f = tvs[idx].vval.v_float;
2126 else if (tvs[idx].v_type == VAR_NUMBER)
2127 f = (double)tvs[idx].vval.v_number;
2128 else
Bram Moolenaar9d00e4a2022-01-05 17:49:15 +00002129 emsg(_(e_expected_float_argument_for_printf));
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002130 }
2131 return f;
2132}
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00002133
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002134#endif
2135
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002136/*
2137 * Return the representation of infinity for printf() function:
2138 * "-inf", "inf", "+inf", " inf", "-INF", "INF", "+INF" or " INF".
2139 */
2140 static const char *
2141infinity_str(int positive,
2142 char fmt_spec,
2143 int force_sign,
2144 int space_for_positive)
2145{
2146 static const char *table[] =
2147 {
2148 "-inf", "inf", "+inf", " inf",
2149 "-INF", "INF", "+INF", " INF"
2150 };
2151 int idx = positive * (1 + force_sign + force_sign * space_for_positive);
2152
2153 if (ASCII_ISUPPER(fmt_spec))
2154 idx += 4;
2155 return table[idx];
2156}
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002157
2158/*
2159 * This code was included to provide a portable vsnprintf() and snprintf().
2160 * Some systems may provide their own, but we always use this one for
2161 * consistency.
2162 *
2163 * This code is based on snprintf.c - a portable implementation of snprintf
2164 * by Mark Martinec <mark.martinec@ijs.si>, Version 2.2, 2000-10-06.
2165 * Included with permission. It was heavily modified to fit in Vim.
2166 * The original code, including useful comments, can be found here:
2167 * http://www.ijs.si/software/snprintf/
2168 *
2169 * This snprintf() only supports the following conversion specifiers:
2170 * s, c, d, u, o, x, X, p (and synonyms: i, D, U, O - see below)
2171 * with flags: '-', '+', ' ', '0' and '#'.
2172 * An asterisk is supported for field width as well as precision.
2173 *
2174 * Limited support for floating point was added: 'f', 'F', 'e', 'E', 'g', 'G'.
2175 *
2176 * Length modifiers 'h' (short int) and 'l' (long int) and 'll' (long long int)
2177 * are supported. NOTE: for 'll' the argument is varnumber_T or uvarnumber_T.
2178 *
2179 * The locale is not used, the string is used as a byte string. This is only
2180 * relevant for double-byte encodings where the second byte may be '%'.
2181 *
2182 * It is permitted for "str_m" to be zero, and it is permitted to specify NULL
2183 * pointer for resulting string argument if "str_m" is zero (as per ISO C99).
2184 *
2185 * The return value is the number of characters which would be generated
2186 * for the given input, excluding the trailing NUL. If this value
2187 * is greater or equal to "str_m", not all characters from the result
2188 * have been stored in str, output bytes beyond the ("str_m"-1) -th character
2189 * are discarded. If "str_m" is greater than zero it is guaranteed
2190 * the resulting string will be NUL-terminated.
2191 */
2192
2193/*
2194 * When va_list is not supported we only define vim_snprintf().
2195 *
2196 * vim_vsnprintf_typval() can be invoked with either "va_list" or a list of
2197 * "typval_T". When the latter is not used it must be NULL.
2198 */
2199
2200// When generating prototypes all of this is skipped, cproto doesn't
2201// understand this.
2202#ifndef PROTO
2203
2204// Like vim_vsnprintf() but append to the string.
2205 int
2206vim_snprintf_add(char *str, size_t str_m, const char *fmt, ...)
2207{
2208 va_list ap;
2209 int str_l;
2210 size_t len = STRLEN(str);
2211 size_t space;
2212
2213 if (str_m <= len)
2214 space = 0;
2215 else
2216 space = str_m - len;
2217 va_start(ap, fmt);
2218 str_l = vim_vsnprintf(str + len, space, fmt, ap);
2219 va_end(ap);
2220 return str_l;
2221}
2222
2223 int
2224vim_snprintf(char *str, size_t str_m, const char *fmt, ...)
2225{
2226 va_list ap;
2227 int str_l;
2228
2229 va_start(ap, fmt);
2230 str_l = vim_vsnprintf(str, str_m, fmt, ap);
2231 va_end(ap);
2232 return str_l;
2233}
2234
2235 int
2236vim_vsnprintf(
2237 char *str,
2238 size_t str_m,
2239 const char *fmt,
2240 va_list ap)
2241{
2242 return vim_vsnprintf_typval(str, str_m, fmt, ap, NULL);
2243}
2244
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002245enum
2246{
2247 TYPE_UNKNOWN = -1,
2248 TYPE_INT,
2249 TYPE_LONGINT,
2250 TYPE_LONGLONGINT,
2251 TYPE_UNSIGNEDINT,
2252 TYPE_UNSIGNEDLONGINT,
2253 TYPE_UNSIGNEDLONGLONGINT,
2254 TYPE_POINTER,
2255 TYPE_PERCENT,
2256 TYPE_CHAR,
2257 TYPE_STRING,
2258 TYPE_FLOAT
2259};
2260
2261/* Types that can be used in a format string
2262 */
2263 int
2264format_typeof(
2265 const char *type,
2266 int usetvs UNUSED)
2267{
2268 // allowed values: \0, h, l, L
2269 char length_modifier = '\0';
2270
2271 // current conversion specifier character
2272 char fmt_spec = '\0';
2273
2274 // parse 'h', 'l' and 'll' length modifiers
2275 if (*type == 'h' || *type == 'l')
2276 {
2277 length_modifier = *type;
2278 type++;
2279 if (length_modifier == 'l' && *type == 'l')
2280 {
2281 // double l = __int64 / varnumber_T
2282 length_modifier = 'L';
2283 type++;
2284 }
2285 }
2286 fmt_spec = *type;
2287
2288 // common synonyms:
2289 switch (fmt_spec)
2290 {
2291 case 'i': fmt_spec = 'd'; break;
2292 case '*': fmt_spec = 'd'; length_modifier = 'h'; break;
2293 case 'D': fmt_spec = 'd'; length_modifier = 'l'; break;
2294 case 'U': fmt_spec = 'u'; length_modifier = 'l'; break;
2295 case 'O': fmt_spec = 'o'; length_modifier = 'l'; break;
2296 default: break;
2297 }
2298
2299# if defined(FEAT_EVAL)
2300 if (usetvs)
2301 {
2302 switch (fmt_spec)
2303 {
2304 case 'd': case 'u': case 'o': case 'x': case 'X':
2305 if (length_modifier == '\0')
2306 length_modifier = 'L';
2307 }
2308 }
2309# endif
2310
2311 // get parameter value, do initial processing
2312 switch (fmt_spec)
2313 {
2314 // '%' and 'c' behave similar to 's' regarding flags and field
2315 // widths
2316 case '%':
2317 return TYPE_PERCENT;
2318
2319 case 'c':
2320 return TYPE_CHAR;
2321
2322 case 's':
2323 case 'S':
2324 return TYPE_STRING;
2325
2326 case 'd': case 'u':
2327 case 'b': case 'B':
2328 case 'o':
2329 case 'x': case 'X':
2330 case 'p':
2331 {
2332 // NOTE: the u, b, o, x, X and p conversion specifiers
2333 // imply the value is unsigned; d implies a signed
2334 // value
2335
2336 // 0 if numeric argument is zero (or if pointer is
2337 // NULL for 'p'), +1 if greater than zero (or nonzero
2338 // for unsigned arguments), -1 if negative (unsigned
2339 // argument is never negative)
2340
2341 if (fmt_spec == 'p')
2342 return TYPE_POINTER;
2343 else if (fmt_spec == 'b' || fmt_spec == 'B')
2344 return TYPE_UNSIGNEDINT;
2345 else if (fmt_spec == 'd')
2346 {
2347 // signed
2348 switch (length_modifier)
2349 {
2350 case '\0':
2351 case 'h':
2352 // char and short arguments are passed as int.
2353 return TYPE_INT;
2354 case 'l':
2355 return TYPE_LONGINT;
2356 case 'L':
2357 return TYPE_LONGLONGINT;
2358 }
2359 }
2360 else
2361 {
2362 // unsigned
2363 switch (length_modifier)
2364 {
2365 case '\0':
2366 case 'h':
2367 return TYPE_UNSIGNEDINT;
2368 case 'l':
2369 return TYPE_UNSIGNEDLONGINT;
2370 case 'L':
2371 return TYPE_UNSIGNEDLONGLONGINT;
2372 }
2373 }
2374 }
2375 break;
2376
2377 case 'f':
2378 case 'F':
2379 case 'e':
2380 case 'E':
2381 case 'g':
2382 case 'G':
2383 return TYPE_FLOAT;
2384 }
2385
2386 return TYPE_UNKNOWN;
2387}
2388
2389 char *
2390format_typename(
2391 const char *type)
2392{
2393 switch (format_typeof(type, FALSE))
2394 {
2395 case TYPE_INT:
2396 return _(typename_int);
2397
2398 case TYPE_LONGINT:
2399 return _(typename_longint);
2400
2401 case TYPE_LONGLONGINT:
2402 return _(typename_longlongint);
2403
2404 case TYPE_UNSIGNEDINT:
2405 return _(typename_unsignedint);
2406
2407 case TYPE_UNSIGNEDLONGINT:
2408 return _(typename_unsignedlongint);
2409
2410 case TYPE_UNSIGNEDLONGLONGINT:
2411 return _(typename_unsignedlonglongint);
2412
2413 case TYPE_POINTER:
2414 return _(typename_pointer);
2415
2416 case TYPE_PERCENT:
2417 return _(typename_percent);
2418
2419 case TYPE_CHAR:
2420 return _(typename_char);
2421
2422 case TYPE_STRING:
2423 return _(typename_string);
2424
2425 case TYPE_FLOAT:
2426 return _(typename_float);
2427 }
2428
2429 return _(typename_unknown);
2430}
2431
2432 int
2433adjust_types(
2434 const char ***ap_types,
2435 int arg,
2436 int *num_posarg,
2437 const char *type)
2438{
2439 if (*ap_types == NULL || *num_posarg < arg)
2440 {
2441 int idx;
2442 const char **new_types;
2443
2444 if (*ap_types == NULL)
2445 new_types = ALLOC_CLEAR_MULT(const char *, arg);
2446 else
2447 new_types = vim_realloc(*ap_types, arg * sizeof(const char *));
2448
2449 if (new_types == NULL)
2450 return FAIL;
2451
2452 for (idx = *num_posarg; idx < arg; ++idx)
2453 new_types[idx] = NULL;
2454
2455 *ap_types = new_types;
2456 *num_posarg = arg;
2457 }
2458
2459 if ((*ap_types)[arg - 1] != NULL)
2460 {
2461 if ((*ap_types)[arg - 1][0] == '*' || type[0] == '*')
2462 {
2463 const char *pt = type;
2464 if (pt[0] == '*')
2465 pt = (*ap_types)[arg - 1];
2466
2467 if (pt[0] != '*')
2468 {
2469 switch (pt[0])
2470 {
2471 case 'd': case 'i': break;
2472 default:
2473 semsg(_(e_positional_num_field_spec_reused_str_str), arg, format_typename((*ap_types)[arg - 1]), format_typename(type));
2474 return FAIL;
2475 }
2476 }
2477 }
2478 else
2479 {
2480 if (format_typeof(type, FALSE) != format_typeof((*ap_types)[arg - 1], FALSE))
2481 {
2482 semsg(_( e_positional_arg_num_type_inconsistent_str_str), arg, format_typename(type), format_typename((*ap_types)[arg - 1]));
2483 return FAIL;
2484 }
2485 }
2486 }
2487
2488 (*ap_types)[arg - 1] = type;
2489
2490 return OK;
2491}
2492
2493 int
2494parse_fmt_types(
2495 const char ***ap_types,
2496 int *num_posarg,
2497 const char *fmt,
2498 typval_T *tvs UNUSED
2499 )
2500{
2501 const char *p = fmt;
2502 const char *arg = NULL;
2503
2504 int any_pos = 0;
2505 int any_arg = 0;
2506 int arg_idx;
2507
2508#define CHECK_POS_ARG do { \
2509 if (any_pos && any_arg) \
2510 { \
2511 semsg(_( e_cannot_mix_positional_and_non_positional_str), fmt); \
2512 goto error; \
2513 } \
2514} while (0);
2515
2516 if (p == NULL)
2517 return OK;
2518
2519 while (*p != NUL)
2520 {
2521 if (*p != '%')
2522 {
2523 char *q = strchr(p + 1, '%');
2524 size_t n = (q == NULL) ? STRLEN(p) : (size_t)(q - p);
2525
2526 p += n;
2527 }
2528 else
2529 {
2530 // allowed values: \0, h, l, L
2531 char length_modifier = '\0';
2532
2533 // variable for positional arg
2534 int pos_arg = -1;
2535 const char *ptype = NULL;
2536
2537 p++; // skip '%'
2538
2539 // First check to see if we find a positional
2540 // argument specifier
2541 ptype = p;
2542
2543 while (VIM_ISDIGIT(*ptype))
2544 ++ptype;
2545
2546 if (*ptype == '$')
2547 {
2548 if (*p == '0')
2549 {
2550 // 0 flag at the wrong place
2551 semsg(_( e_invalid_format_specifier_str), fmt);
2552 goto error;
2553 }
2554
2555 // Positional argument
2556 unsigned int uj = *p++ - '0';
2557
2558 while (VIM_ISDIGIT((int)(*p)))
2559 uj = 10 * uj + (unsigned int)(*p++ - '0');
2560 pos_arg = uj;
2561
2562 any_pos = 1;
2563 CHECK_POS_ARG;
2564
2565 ++p;
2566 }
2567
2568 // parse flags
2569 while (*p == '0' || *p == '-' || *p == '+' || *p == ' '
2570 || *p == '#' || *p == '\'')
2571 {
2572 switch (*p)
2573 {
2574 case '0': break;
2575 case '-': break;
2576 case '+': break;
2577 case ' ': // If both the ' ' and '+' flags appear, the ' '
2578 // flag should be ignored
2579 break;
2580 case '#': break;
2581 case '\'': break;
2582 }
2583 p++;
2584 }
2585 // If the '0' and '-' flags both appear, the '0' flag should be
2586 // ignored.
2587
2588 // parse field width
2589 if (*(arg = p) == '*')
2590 {
2591 p++;
2592
2593 if (VIM_ISDIGIT((int)(*p)))
2594 {
2595 // Positional argument field width
2596 unsigned int uj = *p++ - '0';
2597
2598 while (VIM_ISDIGIT((int)(*p)))
2599 uj = 10 * uj + (unsigned int)(*p++ - '0');
2600
2601 if (*p != '$')
2602 {
2603 semsg(_( e_invalid_format_specifier_str), fmt);
2604 goto error;
2605 }
2606 else
2607 {
2608 ++p;
2609 any_pos = 1;
2610 CHECK_POS_ARG;
2611
2612 if (adjust_types(ap_types, uj, num_posarg, arg) == FAIL)
2613 goto error;
2614 }
2615 }
2616 else
2617 {
2618 any_arg = 1;
2619 CHECK_POS_ARG;
2620 }
2621 }
2622 else if (VIM_ISDIGIT((int)(*(arg = p))))
2623 {
2624 // size_t could be wider than unsigned int; make sure we treat
2625 // argument like common implementations do
2626 unsigned int uj = *p++ - '0';
2627
2628 while (VIM_ISDIGIT((int)(*p)))
2629 uj = 10 * uj + (unsigned int)(*p++ - '0');
2630
2631 if (*p == '$')
2632 {
2633 semsg(_( e_invalid_format_specifier_str), fmt);
2634 goto error;
2635 }
2636 }
2637
2638 // parse precision
2639 if (*p == '.')
2640 {
2641 p++;
2642
2643 if (*(arg = p) == '*')
2644 {
2645 p++;
2646
2647 if (VIM_ISDIGIT((int)(*p)))
2648 {
2649 // Parse precision
2650 unsigned int uj = *p++ - '0';
2651
2652 while (VIM_ISDIGIT((int)(*p)))
2653 uj = 10 * uj + (unsigned int)(*p++ - '0');
2654
2655 if (*p == '$')
2656 {
2657 any_pos = 1;
2658 CHECK_POS_ARG;
2659
2660 ++p;
2661
2662 if (adjust_types(ap_types, uj, num_posarg, arg) == FAIL)
2663 goto error;
2664 }
2665 else
2666 {
2667 semsg(_( e_invalid_format_specifier_str), fmt);
2668 goto error;
2669 }
2670 }
2671 else
2672 {
2673 any_arg = 1;
2674 CHECK_POS_ARG;
2675 }
2676 }
2677 else if (VIM_ISDIGIT((int)(*(arg = p))))
2678 {
2679 // size_t could be wider than unsigned int; make sure we
2680 // treat argument like common implementations do
2681 unsigned int uj = *p++ - '0';
2682
2683 while (VIM_ISDIGIT((int)(*p)))
2684 uj = 10 * uj + (unsigned int)(*p++ - '0');
2685
2686 if (*p == '$')
2687 {
2688 semsg(_( e_invalid_format_specifier_str), fmt);
2689 goto error;
2690 }
2691 }
2692 }
2693
2694 if (pos_arg != -1)
2695 {
2696 any_pos = 1;
2697 CHECK_POS_ARG;
2698
2699 ptype = p;
2700 }
2701
2702 // parse 'h', 'l' and 'll' length modifiers
2703 if (*p == 'h' || *p == 'l')
2704 {
2705 length_modifier = *p;
2706 p++;
2707 if (length_modifier == 'l' && *p == 'l')
2708 {
2709 // double l = __int64 / varnumber_T
2710 length_modifier = 'L';
2711 p++;
2712 }
2713 }
2714
2715 switch (*p)
2716 {
2717 // Check for known format specifiers. % is special!
2718 case 'i':
2719 case '*':
2720 case 'd':
2721 case 'u':
2722 case 'o':
2723 case 'D':
2724 case 'U':
2725 case 'O':
2726 case 'x':
2727 case 'X':
2728 case 'b':
2729 case 'B':
2730 case 'c':
2731 case 's':
2732 case 'S':
2733 case 'p':
2734 case 'f':
2735 case 'F':
2736 case 'e':
2737 case 'E':
2738 case 'g':
2739 case 'G':
2740 if (pos_arg != -1)
2741 {
2742 if (adjust_types(ap_types, pos_arg, num_posarg, ptype) == FAIL)
2743 goto error;
2744 }
2745 else
2746 {
2747 any_arg = 1;
2748 CHECK_POS_ARG;
2749 }
2750 break;
2751
2752 default:
2753 if (pos_arg != -1)
2754 {
2755 semsg(_( e_cannot_mix_positional_and_non_positional_str), fmt);
2756 goto error;
2757 }
2758 }
2759
2760 if (*p != NUL)
2761 p++; // step over the just processed conversion specifier
2762 }
2763 }
2764
2765 for (arg_idx = 0; arg_idx < *num_posarg; ++arg_idx)
2766 {
2767 if ((*ap_types)[arg_idx] == NULL)
2768 {
2769 semsg(_(e_fmt_arg_nr_unused_str), arg_idx + 1, fmt);
2770 goto error;
2771 }
2772
2773# if defined(FEAT_EVAL)
2774 if (tvs != NULL && tvs[arg_idx].v_type == VAR_UNKNOWN)
2775 {
2776 semsg(_(e_positional_nr_out_of_bounds_str), arg_idx + 1, fmt);
2777 goto error;
2778 }
2779# endif
2780 }
2781
2782 return OK;
2783
2784error:
2785 vim_free(*ap_types);
2786 *ap_types = NULL;
2787 *num_posarg = 0;
2788 return FAIL;
2789}
2790
2791 void
2792skip_to_arg(
2793 const char **ap_types,
2794 va_list ap_start,
2795 va_list *ap,
2796 int *arg_idx,
2797 int *arg_cur)
2798{
2799 int arg_min = 0;
2800
2801 if (*arg_cur + 1 == *arg_idx)
2802 {
2803 ++*arg_cur;
2804 ++*arg_idx;
2805 return;
2806 }
2807
2808 if (*arg_cur >= *arg_idx)
2809 {
2810 // Reset ap to ap_start and skip arg_idx - 1 types
2811 va_end(*ap);
2812 va_copy(*ap, ap_start);
2813 }
2814 else
2815 {
2816 // Skip over any we should skip
2817 arg_min = *arg_cur;
2818 }
2819
2820 for (*arg_cur = arg_min; *arg_cur < *arg_idx - 1; ++*arg_cur)
2821 {
2822 const char *p = ap_types[*arg_cur];
2823
2824 int fmt_type = format_typeof(p, TRUE);
2825
2826 // get parameter value, do initial processing
2827 switch (fmt_type)
2828 {
2829 case TYPE_PERCENT:
2830 case TYPE_UNKNOWN:
2831 break;
2832
2833 case TYPE_CHAR:
2834 va_arg(*ap, int);
2835 break;
2836
2837 case TYPE_STRING:
2838 va_arg(*ap, char *);
2839 break;
2840
2841 case TYPE_POINTER:
2842 va_arg(*ap, void *);
2843 break;
2844
2845 case TYPE_INT:
2846 va_arg(*ap, int);
2847 break;
2848
2849 case TYPE_LONGINT:
2850 va_arg(*ap, long int);
2851 break;
2852
2853 case TYPE_LONGLONGINT:
2854 va_arg(*ap, varnumber_T);
2855 break;
2856
2857 case TYPE_UNSIGNEDINT:
2858 va_arg(*ap, unsigned int);
2859 break;
2860
2861 case TYPE_UNSIGNEDLONGINT:
2862 va_arg(*ap, unsigned long int);
2863 break;
2864
2865 case TYPE_UNSIGNEDLONGLONGINT:
2866 va_arg(*ap, uvarnumber_T);
2867 break;
2868
2869 case TYPE_FLOAT:
2870 va_arg(*ap, double);
2871 break;
2872 }
2873 }
2874
2875 // Because we know that after we return from this call,
2876 // a va_arg() call is made, we can pre-emptively
2877 // increment the current argument index.
2878 ++*arg_cur;
2879 ++*arg_idx;
2880
2881 return;
2882}
2883
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002884 int
2885vim_vsnprintf_typval(
2886 char *str,
2887 size_t str_m,
2888 const char *fmt,
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002889 va_list ap_start,
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002890 typval_T *tvs)
2891{
2892 size_t str_l = 0;
2893 const char *p = fmt;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002894 int arg_cur = 0;
2895 int num_posarg = 0;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002896 int arg_idx = 1;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002897 va_list ap;
2898 const char **ap_types = NULL;
2899
2900 if (parse_fmt_types(&ap_types, &num_posarg, fmt, tvs) == FAIL)
2901 return 0;
2902
2903 va_copy(ap, ap_start);
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002904
2905 if (p == NULL)
2906 p = "";
2907 while (*p != NUL)
2908 {
2909 if (*p != '%')
2910 {
2911 char *q = strchr(p + 1, '%');
2912 size_t n = (q == NULL) ? STRLEN(p) : (size_t)(q - p);
2913
2914 // Copy up to the next '%' or NUL without any changes.
2915 if (str_l < str_m)
2916 {
2917 size_t avail = str_m - str_l;
2918
2919 mch_memmove(str + str_l, p, n > avail ? avail : n);
2920 }
2921 p += n;
2922 str_l += n;
2923 }
2924 else
2925 {
2926 size_t min_field_width = 0, precision = 0;
2927 int zero_padding = 0, precision_specified = 0, justify_left = 0;
2928 int alternate_form = 0, force_sign = 0;
2929
2930 // If both the ' ' and '+' flags appear, the ' ' flag should be
2931 // ignored.
2932 int space_for_positive = 1;
2933
2934 // allowed values: \0, h, l, L
2935 char length_modifier = '\0';
2936
2937 // temporary buffer for simple numeric->string conversion
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01002938# define TMP_LEN 350 // On my system 1e308 is the biggest number possible.
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002939 // That sounds reasonable to use as the maximum
2940 // printable.
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002941 char tmp[TMP_LEN];
2942
2943 // string address in case of string argument
2944 const char *str_arg = NULL;
2945
2946 // natural field width of arg without padding and sign
2947 size_t str_arg_l;
2948
2949 // unsigned char argument value - only defined for c conversion.
2950 // N.B. standard explicitly states the char argument for the c
2951 // conversion is unsigned
2952 unsigned char uchar_arg;
2953
2954 // number of zeros to be inserted for numeric conversions as
2955 // required by the precision or minimal field width
2956 size_t number_of_zeros_to_pad = 0;
2957
2958 // index into tmp where zero padding is to be inserted
2959 size_t zero_padding_insertion_ind = 0;
2960
2961 // current conversion specifier character
2962 char fmt_spec = '\0';
2963
2964 // buffer for 's' and 'S' specs
2965 char_u *tofree = NULL;
2966
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002967 // variables for positional arg
2968 int pos_arg = -1;
2969 const char *ptype;
2970
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002971
2972 p++; // skip '%'
2973
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002974 // First check to see if we find a positional
2975 // argument specifier
2976 ptype = p;
2977
2978 while (VIM_ISDIGIT(*ptype))
2979 ++ptype;
2980
2981 if (*ptype == '$')
2982 {
2983 // Positional argument
2984 unsigned int uj = *p++ - '0';
2985
2986 while (VIM_ISDIGIT((int)(*p)))
2987 uj = 10 * uj + (unsigned int)(*p++ - '0');
2988 pos_arg = uj;
2989
2990 ++p;
2991 }
2992
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002993 // parse flags
2994 while (*p == '0' || *p == '-' || *p == '+' || *p == ' '
2995 || *p == '#' || *p == '\'')
2996 {
2997 switch (*p)
2998 {
2999 case '0': zero_padding = 1; break;
3000 case '-': justify_left = 1; break;
3001 case '+': force_sign = 1; space_for_positive = 0; break;
3002 case ' ': force_sign = 1;
3003 // If both the ' ' and '+' flags appear, the ' '
3004 // flag should be ignored
3005 break;
3006 case '#': alternate_form = 1; break;
3007 case '\'': break;
3008 }
3009 p++;
3010 }
3011 // If the '0' and '-' flags both appear, the '0' flag should be
3012 // ignored.
3013
3014 // parse field width
3015 if (*p == '*')
3016 {
3017 int j;
3018
3019 p++;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003020
3021 if (VIM_ISDIGIT((int)(*p)))
3022 {
3023 // Positional argument field width
3024 unsigned int uj = *p++ - '0';
3025
3026 while (VIM_ISDIGIT((int)(*p)))
3027 uj = 10 * uj + (unsigned int)(*p++ - '0');
3028 arg_idx = uj;
3029
3030 ++p;
3031 }
3032
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003033 j =
3034# if defined(FEAT_EVAL)
3035 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3036# endif
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003037 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, &arg_cur),
3038 va_arg(ap, int));
3039
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003040 if (j >= 0)
3041 min_field_width = j;
3042 else
3043 {
3044 min_field_width = -j;
3045 justify_left = 1;
3046 }
3047 }
3048 else if (VIM_ISDIGIT((int)(*p)))
3049 {
3050 // size_t could be wider than unsigned int; make sure we treat
3051 // argument like common implementations do
3052 unsigned int uj = *p++ - '0';
3053
3054 while (VIM_ISDIGIT((int)(*p)))
3055 uj = 10 * uj + (unsigned int)(*p++ - '0');
3056 min_field_width = uj;
3057 }
3058
3059 // parse precision
3060 if (*p == '.')
3061 {
3062 p++;
3063 precision_specified = 1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003064
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003065 if (VIM_ISDIGIT((int)(*p)))
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003066 {
3067 // size_t could be wider than unsigned int; make sure we
3068 // treat argument like common implementations do
3069 unsigned int uj = *p++ - '0';
3070
3071 while (VIM_ISDIGIT((int)(*p)))
3072 uj = 10 * uj + (unsigned int)(*p++ - '0');
3073 precision = uj;
3074 }
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003075 else if (*p == '*')
3076 {
3077 int j;
3078
3079 p++;
3080
3081 if (VIM_ISDIGIT((int)(*p)))
3082 {
3083 // positional argument
3084 unsigned int uj = *p++ - '0';
3085
3086 while (VIM_ISDIGIT((int)(*p)))
3087 uj = 10 * uj + (unsigned int)(*p++ - '0');
3088 arg_idx = uj;
3089
3090 ++p;
3091 }
3092
3093 j =
3094# if defined(FEAT_EVAL)
3095 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3096# endif
3097 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, &arg_cur),
3098 va_arg(ap, int));
3099
3100 if (j >= 0)
3101 precision = j;
3102 else
3103 {
3104 precision_specified = 0;
3105 precision = 0;
3106 }
3107 }
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003108 }
3109
3110 // parse 'h', 'l' and 'll' length modifiers
3111 if (*p == 'h' || *p == 'l')
3112 {
3113 length_modifier = *p;
3114 p++;
3115 if (length_modifier == 'l' && *p == 'l')
3116 {
3117 // double l = __int64 / varnumber_T
3118 length_modifier = 'L';
3119 p++;
3120 }
3121 }
3122 fmt_spec = *p;
3123
3124 // common synonyms:
3125 switch (fmt_spec)
3126 {
3127 case 'i': fmt_spec = 'd'; break;
3128 case 'D': fmt_spec = 'd'; length_modifier = 'l'; break;
3129 case 'U': fmt_spec = 'u'; length_modifier = 'l'; break;
3130 case 'O': fmt_spec = 'o'; length_modifier = 'l'; break;
3131 default: break;
3132 }
3133
3134# if defined(FEAT_EVAL)
3135 switch (fmt_spec)
3136 {
3137 case 'd': case 'u': case 'o': case 'x': case 'X':
3138 if (tvs != NULL && length_modifier == '\0')
3139 length_modifier = 'L';
3140 }
3141# endif
3142
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003143 if (pos_arg != -1)
3144 arg_idx = pos_arg;
3145
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003146 // get parameter value, do initial processing
3147 switch (fmt_spec)
3148 {
3149 // '%' and 'c' behave similar to 's' regarding flags and field
3150 // widths
3151 case '%':
3152 case 'c':
3153 case 's':
3154 case 'S':
3155 str_arg_l = 1;
3156 switch (fmt_spec)
3157 {
3158 case '%':
3159 str_arg = p;
3160 break;
3161
3162 case 'c':
3163 {
3164 int j;
3165
3166 j =
3167# if defined(FEAT_EVAL)
3168 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3169# endif
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003170 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, &arg_cur),
3171 va_arg(ap, int));
3172
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003173 // standard demands unsigned char
3174 uchar_arg = (unsigned char)j;
3175 str_arg = (char *)&uchar_arg;
3176 break;
3177 }
3178
3179 case 's':
3180 case 'S':
3181 str_arg =
3182# if defined(FEAT_EVAL)
3183 tvs != NULL ? tv_str(tvs, &arg_idx, &tofree) :
3184# endif
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003185 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, &arg_cur),
3186 va_arg(ap, char *));
3187
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003188 if (str_arg == NULL)
3189 {
3190 str_arg = "[NULL]";
3191 str_arg_l = 6;
3192 }
3193 // make sure not to address string beyond the specified
3194 // precision !!!
3195 else if (!precision_specified)
3196 str_arg_l = strlen(str_arg);
3197 // truncate string if necessary as requested by precision
3198 else if (precision == 0)
3199 str_arg_l = 0;
3200 else
3201 {
3202 // Don't put the #if inside memchr(), it can be a
3203 // macro.
3204 // memchr on HP does not like n > 2^31 !!!
3205 char *q = memchr(str_arg, '\0',
3206 precision <= (size_t)0x7fffffffL ? precision
3207 : (size_t)0x7fffffffL);
presukud85fccd2021-11-20 19:38:31 +00003208
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003209 str_arg_l = (q == NULL) ? precision
3210 : (size_t)(q - str_arg);
3211 }
3212 if (fmt_spec == 'S')
3213 {
presuku1f2453f2021-11-24 15:32:57 +00003214 char_u *p1;
3215 size_t i;
3216 int cell;
presukud85fccd2021-11-20 19:38:31 +00003217
presuku1f2453f2021-11-24 15:32:57 +00003218 for (i = 0, p1 = (char_u *)str_arg; *p1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003219 p1 += mb_ptr2len(p1))
presuku1f2453f2021-11-24 15:32:57 +00003220 {
3221 cell = mb_ptr2cells(p1);
3222 if (precision_specified && i + cell > precision)
3223 break;
3224 i += cell;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003225 }
presuku1f2453f2021-11-24 15:32:57 +00003226
3227 str_arg_l = p1 - (char_u *)str_arg;
presukud85fccd2021-11-20 19:38:31 +00003228 if (min_field_width != 0)
presuku1f2453f2021-11-24 15:32:57 +00003229 min_field_width += str_arg_l - i;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003230 }
3231 break;
3232
3233 default:
3234 break;
3235 }
3236 break;
3237
3238 case 'd': case 'u':
3239 case 'b': case 'B':
3240 case 'o':
3241 case 'x': case 'X':
3242 case 'p':
3243 {
3244 // NOTE: the u, b, o, x, X and p conversion specifiers
3245 // imply the value is unsigned; d implies a signed
3246 // value
3247
3248 // 0 if numeric argument is zero (or if pointer is
3249 // NULL for 'p'), +1 if greater than zero (or nonzero
3250 // for unsigned arguments), -1 if negative (unsigned
3251 // argument is never negative)
3252 int arg_sign = 0;
3253
3254 // only set for length modifier h, or for no length
3255 // modifiers
3256 int int_arg = 0;
3257 unsigned int uint_arg = 0;
3258
3259 // only set for length modifier l
3260 long int long_arg = 0;
3261 unsigned long int ulong_arg = 0;
3262
3263 // only set for length modifier ll
3264 varnumber_T llong_arg = 0;
3265 uvarnumber_T ullong_arg = 0;
3266
3267 // only set for b conversion
3268 uvarnumber_T bin_arg = 0;
3269
3270 // pointer argument value -only defined for p
3271 // conversion
3272 void *ptr_arg = NULL;
3273
3274 if (fmt_spec == 'p')
3275 {
3276 length_modifier = '\0';
3277 ptr_arg =
3278# if defined(FEAT_EVAL)
3279 tvs != NULL ? (void *)tv_str(tvs, &arg_idx,
3280 NULL) :
3281# endif
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003282 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, &arg_cur),
3283 va_arg(ap, void *));
3284
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003285 if (ptr_arg != NULL)
3286 arg_sign = 1;
3287 }
3288 else if (fmt_spec == 'b' || fmt_spec == 'B')
3289 {
3290 bin_arg =
3291# if defined(FEAT_EVAL)
3292 tvs != NULL ?
3293 (uvarnumber_T)tv_nr(tvs, &arg_idx) :
3294# endif
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003295 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, &arg_cur),
3296 va_arg(ap, uvarnumber_T));
3297
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003298 if (bin_arg != 0)
3299 arg_sign = 1;
3300 }
3301 else if (fmt_spec == 'd')
3302 {
3303 // signed
3304 switch (length_modifier)
3305 {
3306 case '\0':
3307 case 'h':
3308 // char and short arguments are passed as int.
3309 int_arg =
3310# if defined(FEAT_EVAL)
3311 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3312# endif
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003313 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, &arg_cur),
3314 va_arg(ap, int));
3315
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003316 if (int_arg > 0)
3317 arg_sign = 1;
3318 else if (int_arg < 0)
3319 arg_sign = -1;
3320 break;
3321 case 'l':
3322 long_arg =
3323# if defined(FEAT_EVAL)
3324 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3325# endif
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003326 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, &arg_cur),
3327 va_arg(ap, long int));
3328
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003329 if (long_arg > 0)
3330 arg_sign = 1;
3331 else if (long_arg < 0)
3332 arg_sign = -1;
3333 break;
3334 case 'L':
3335 llong_arg =
3336# if defined(FEAT_EVAL)
3337 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3338# endif
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003339 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, &arg_cur),
3340 va_arg(ap, varnumber_T));
3341
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003342 if (llong_arg > 0)
3343 arg_sign = 1;
3344 else if (llong_arg < 0)
3345 arg_sign = -1;
3346 break;
3347 }
3348 }
3349 else
3350 {
3351 // unsigned
3352 switch (length_modifier)
3353 {
3354 case '\0':
3355 case 'h':
3356 uint_arg =
3357# if defined(FEAT_EVAL)
3358 tvs != NULL ? (unsigned)
3359 tv_nr(tvs, &arg_idx) :
3360# endif
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003361 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, &arg_cur),
3362 va_arg(ap, unsigned int));
3363
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003364 if (uint_arg != 0)
3365 arg_sign = 1;
3366 break;
3367 case 'l':
3368 ulong_arg =
3369# if defined(FEAT_EVAL)
3370 tvs != NULL ? (unsigned long)
3371 tv_nr(tvs, &arg_idx) :
3372# endif
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003373 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, &arg_cur),
3374 va_arg(ap, unsigned long int));
3375
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003376 if (ulong_arg != 0)
3377 arg_sign = 1;
3378 break;
3379 case 'L':
3380 ullong_arg =
3381# if defined(FEAT_EVAL)
3382 tvs != NULL ? (uvarnumber_T)
3383 tv_nr(tvs, &arg_idx) :
3384# endif
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003385 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, &arg_cur),
3386 va_arg(ap, uvarnumber_T));
3387
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003388 if (ullong_arg != 0)
3389 arg_sign = 1;
3390 break;
3391 }
3392 }
3393
3394 str_arg = tmp;
3395 str_arg_l = 0;
3396
3397 // NOTE:
3398 // For d, i, u, o, x, and X conversions, if precision is
3399 // specified, the '0' flag should be ignored. This is so
3400 // with Solaris 2.6, Digital UNIX 4.0, HPUX 10, Linux,
3401 // FreeBSD, NetBSD; but not with Perl.
3402 if (precision_specified)
3403 zero_padding = 0;
3404 if (fmt_spec == 'd')
3405 {
3406 if (force_sign && arg_sign >= 0)
3407 tmp[str_arg_l++] = space_for_positive ? ' ' : '+';
3408 // leave negative numbers for sprintf to handle, to
3409 // avoid handling tricky cases like (short int)-32768
3410 }
3411 else if (alternate_form)
3412 {
3413 if (arg_sign != 0
3414 && (fmt_spec == 'b' || fmt_spec == 'B'
3415 || fmt_spec == 'x' || fmt_spec == 'X') )
3416 {
3417 tmp[str_arg_l++] = '0';
3418 tmp[str_arg_l++] = fmt_spec;
3419 }
3420 // alternate form should have no effect for p
3421 // conversion, but ...
3422 }
3423
3424 zero_padding_insertion_ind = str_arg_l;
3425 if (!precision_specified)
3426 precision = 1; // default precision is 1
3427 if (precision == 0 && arg_sign == 0)
3428 {
3429 // When zero value is formatted with an explicit
3430 // precision 0, the resulting formatted string is
3431 // empty (d, i, u, b, B, o, x, X, p).
3432 }
3433 else
3434 {
3435 char f[6];
3436 int f_l = 0;
3437
3438 // construct a simple format string for sprintf
3439 f[f_l++] = '%';
3440 if (!length_modifier)
3441 ;
3442 else if (length_modifier == 'L')
3443 {
3444# ifdef MSWIN
3445 f[f_l++] = 'I';
3446 f[f_l++] = '6';
3447 f[f_l++] = '4';
3448# else
3449 f[f_l++] = 'l';
3450 f[f_l++] = 'l';
3451# endif
3452 }
3453 else
3454 f[f_l++] = length_modifier;
3455 f[f_l++] = fmt_spec;
3456 f[f_l++] = '\0';
3457
3458 if (fmt_spec == 'p')
3459 str_arg_l += sprintf(tmp + str_arg_l, f, ptr_arg);
3460 else if (fmt_spec == 'b' || fmt_spec == 'B')
3461 {
3462 char b[8 * sizeof(uvarnumber_T)];
3463 size_t b_l = 0;
3464 uvarnumber_T bn = bin_arg;
3465
3466 do
3467 {
3468 b[sizeof(b) - ++b_l] = '0' + (bn & 0x1);
3469 bn >>= 1;
3470 }
3471 while (bn != 0);
3472
3473 memcpy(tmp + str_arg_l, b + sizeof(b) - b_l, b_l);
3474 str_arg_l += b_l;
3475 }
3476 else if (fmt_spec == 'd')
3477 {
3478 // signed
3479 switch (length_modifier)
3480 {
3481 case '\0': str_arg_l += sprintf(
3482 tmp + str_arg_l, f,
3483 int_arg);
3484 break;
3485 case 'h': str_arg_l += sprintf(
3486 tmp + str_arg_l, f,
3487 (short)int_arg);
3488 break;
3489 case 'l': str_arg_l += sprintf(
3490 tmp + str_arg_l, f, long_arg);
3491 break;
3492 case 'L': str_arg_l += sprintf(
3493 tmp + str_arg_l, f, llong_arg);
3494 break;
3495 }
3496 }
3497 else
3498 {
3499 // unsigned
3500 switch (length_modifier)
3501 {
3502 case '\0': str_arg_l += sprintf(
3503 tmp + str_arg_l, f,
3504 uint_arg);
3505 break;
3506 case 'h': str_arg_l += sprintf(
3507 tmp + str_arg_l, f,
3508 (unsigned short)uint_arg);
3509 break;
3510 case 'l': str_arg_l += sprintf(
3511 tmp + str_arg_l, f, ulong_arg);
3512 break;
3513 case 'L': str_arg_l += sprintf(
3514 tmp + str_arg_l, f, ullong_arg);
3515 break;
3516 }
3517 }
3518
3519 // include the optional minus sign and possible
3520 // "0x" in the region before the zero padding
3521 // insertion point
3522 if (zero_padding_insertion_ind < str_arg_l
3523 && tmp[zero_padding_insertion_ind] == '-')
3524 zero_padding_insertion_ind++;
3525 if (zero_padding_insertion_ind + 1 < str_arg_l
3526 && tmp[zero_padding_insertion_ind] == '0'
3527 && (tmp[zero_padding_insertion_ind + 1] == 'x'
3528 || tmp[zero_padding_insertion_ind + 1] == 'X'))
3529 zero_padding_insertion_ind += 2;
3530 }
3531
3532 {
3533 size_t num_of_digits = str_arg_l
3534 - zero_padding_insertion_ind;
3535
3536 if (alternate_form && fmt_spec == 'o'
3537 // unless zero is already the first
3538 // character
3539 && !(zero_padding_insertion_ind < str_arg_l
3540 && tmp[zero_padding_insertion_ind] == '0'))
3541 {
3542 // assure leading zero for alternate-form
3543 // octal numbers
3544 if (!precision_specified
3545 || precision < num_of_digits + 1)
3546 {
3547 // precision is increased to force the
3548 // first character to be zero, except if a
3549 // zero value is formatted with an
3550 // explicit precision of zero
3551 precision = num_of_digits + 1;
3552 }
3553 }
3554 // zero padding to specified precision?
3555 if (num_of_digits < precision)
3556 number_of_zeros_to_pad = precision - num_of_digits;
3557 }
3558 // zero padding to specified minimal field width?
3559 if (!justify_left && zero_padding)
3560 {
3561 int n = (int)(min_field_width - (str_arg_l
3562 + number_of_zeros_to_pad));
3563 if (n > 0)
3564 number_of_zeros_to_pad += n;
3565 }
3566 break;
3567 }
3568
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003569 case 'f':
3570 case 'F':
3571 case 'e':
3572 case 'E':
3573 case 'g':
3574 case 'G':
3575 {
3576 // Floating point.
3577 double f;
3578 double abs_f;
3579 char format[40];
3580 int l;
3581 int remove_trailing_zeroes = FALSE;
3582
3583 f =
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003584# if defined(FEAT_EVAL)
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003585 tvs != NULL ? tv_float(tvs, &arg_idx) :
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003586# endif
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003587 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx, &arg_cur),
3588 va_arg(ap, double));
3589
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003590 abs_f = f < 0 ? -f : f;
3591
3592 if (fmt_spec == 'g' || fmt_spec == 'G')
3593 {
3594 // Would be nice to use %g directly, but it prints
3595 // "1.0" as "1", we don't want that.
3596 if ((abs_f >= 0.001 && abs_f < 10000000.0)
3597 || abs_f == 0.0)
3598 fmt_spec = ASCII_ISUPPER(fmt_spec) ? 'F' : 'f';
3599 else
3600 fmt_spec = fmt_spec == 'g' ? 'e' : 'E';
3601 remove_trailing_zeroes = TRUE;
3602 }
3603
3604 if ((fmt_spec == 'f' || fmt_spec == 'F') &&
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003605# ifdef VAX
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003606 abs_f > 1.0e38
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003607# else
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003608 abs_f > 1.0e307
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003609# endif
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003610 )
3611 {
3612 // Avoid a buffer overflow
3613 STRCPY(tmp, infinity_str(f > 0.0, fmt_spec,
3614 force_sign, space_for_positive));
3615 str_arg_l = STRLEN(tmp);
3616 zero_padding = 0;
3617 }
3618 else
3619 {
3620 if (isnan(f))
3621 {
3622 // Not a number: nan or NAN
3623 STRCPY(tmp, ASCII_ISUPPER(fmt_spec) ? "NAN"
3624 : "nan");
3625 str_arg_l = 3;
3626 zero_padding = 0;
3627 }
3628 else if (isinf(f))
3629 {
3630 STRCPY(tmp, infinity_str(f > 0.0, fmt_spec,
3631 force_sign, space_for_positive));
3632 str_arg_l = STRLEN(tmp);
3633 zero_padding = 0;
3634 }
3635 else
3636 {
3637 // Regular float number
3638 format[0] = '%';
3639 l = 1;
3640 if (force_sign)
3641 format[l++] = space_for_positive ? ' ' : '+';
3642 if (precision_specified)
3643 {
3644 size_t max_prec = TMP_LEN - 10;
3645
3646 // Make sure we don't get more digits than we
3647 // have room for.
3648 if ((fmt_spec == 'f' || fmt_spec == 'F')
3649 && abs_f > 1.0)
3650 max_prec -= (size_t)log10(abs_f);
3651 if (precision > max_prec)
3652 precision = max_prec;
3653 l += sprintf(format + l, ".%d", (int)precision);
3654 }
3655 format[l] = fmt_spec == 'F' ? 'f' : fmt_spec;
3656 format[l + 1] = NUL;
3657
3658 str_arg_l = sprintf(tmp, format, f);
3659 }
3660
3661 if (remove_trailing_zeroes)
3662 {
3663 int i;
3664 char *tp;
3665
3666 // Using %g or %G: remove superfluous zeroes.
3667 if (fmt_spec == 'f' || fmt_spec == 'F')
3668 tp = tmp + str_arg_l - 1;
3669 else
3670 {
3671 tp = (char *)vim_strchr((char_u *)tmp,
3672 fmt_spec == 'e' ? 'e' : 'E');
3673 if (tp != NULL)
3674 {
3675 // Remove superfluous '+' and leading
3676 // zeroes from the exponent.
3677 if (tp[1] == '+')
3678 {
3679 // Change "1.0e+07" to "1.0e07"
3680 STRMOVE(tp + 1, tp + 2);
3681 --str_arg_l;
3682 }
3683 i = (tp[1] == '-') ? 2 : 1;
3684 while (tp[i] == '0')
3685 {
3686 // Change "1.0e07" to "1.0e7"
3687 STRMOVE(tp + i, tp + i + 1);
3688 --str_arg_l;
3689 }
3690 --tp;
3691 }
3692 }
3693
3694 if (tp != NULL && !precision_specified)
3695 // Remove trailing zeroes, but keep the one
3696 // just after a dot.
3697 while (tp > tmp + 2 && *tp == '0'
3698 && tp[-1] != '.')
3699 {
3700 STRMOVE(tp, tp + 1);
3701 --tp;
3702 --str_arg_l;
3703 }
3704 }
3705 else
3706 {
3707 char *tp;
3708
3709 // Be consistent: some printf("%e") use 1.0e+12
3710 // and some 1.0e+012. Remove one zero in the last
3711 // case.
3712 tp = (char *)vim_strchr((char_u *)tmp,
3713 fmt_spec == 'e' ? 'e' : 'E');
3714 if (tp != NULL && (tp[1] == '+' || tp[1] == '-')
3715 && tp[2] == '0'
3716 && vim_isdigit(tp[3])
3717 && vim_isdigit(tp[4]))
3718 {
3719 STRMOVE(tp + 2, tp + 3);
3720 --str_arg_l;
3721 }
3722 }
3723 }
3724 if (zero_padding && min_field_width > str_arg_l
3725 && (tmp[0] == '-' || force_sign))
3726 {
3727 // padding 0's should be inserted after the sign
3728 number_of_zeros_to_pad = min_field_width - str_arg_l;
3729 zero_padding_insertion_ind = 1;
3730 }
3731 str_arg = tmp;
3732 break;
3733 }
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003734
3735 default:
3736 // unrecognized conversion specifier, keep format string
3737 // as-is
3738 zero_padding = 0; // turn zero padding off for non-numeric
3739 // conversion
3740 justify_left = 1;
3741 min_field_width = 0; // reset flags
3742
3743 // discard the unrecognized conversion, just keep *
3744 // the unrecognized conversion character
3745 str_arg = p;
3746 str_arg_l = 0;
3747 if (*p != NUL)
3748 str_arg_l++; // include invalid conversion specifier
3749 // unchanged if not at end-of-string
3750 break;
3751 }
3752
3753 if (*p != NUL)
3754 p++; // step over the just processed conversion specifier
3755
3756 // insert padding to the left as requested by min_field_width;
3757 // this does not include the zero padding in case of numerical
3758 // conversions
3759 if (!justify_left)
3760 {
3761 // left padding with blank or zero
3762 int pn = (int)(min_field_width - (str_arg_l + number_of_zeros_to_pad));
3763
3764 if (pn > 0)
3765 {
3766 if (str_l < str_m)
3767 {
3768 size_t avail = str_m - str_l;
3769
3770 vim_memset(str + str_l, zero_padding ? '0' : ' ',
3771 (size_t)pn > avail ? avail
3772 : (size_t)pn);
3773 }
3774 str_l += pn;
3775 }
3776 }
3777
3778 // zero padding as requested by the precision or by the minimal
3779 // field width for numeric conversions required?
3780 if (number_of_zeros_to_pad == 0)
3781 {
3782 // will not copy first part of numeric right now, *
3783 // force it to be copied later in its entirety
3784 zero_padding_insertion_ind = 0;
3785 }
3786 else
3787 {
3788 // insert first part of numerics (sign or '0x') before zero
3789 // padding
3790 int zn = (int)zero_padding_insertion_ind;
3791
3792 if (zn > 0)
3793 {
3794 if (str_l < str_m)
3795 {
3796 size_t avail = str_m - str_l;
3797
3798 mch_memmove(str + str_l, str_arg,
3799 (size_t)zn > avail ? avail
3800 : (size_t)zn);
3801 }
3802 str_l += zn;
3803 }
3804
3805 // insert zero padding as requested by the precision or min
3806 // field width
3807 zn = (int)number_of_zeros_to_pad;
3808 if (zn > 0)
3809 {
3810 if (str_l < str_m)
3811 {
3812 size_t avail = str_m - str_l;
3813
3814 vim_memset(str + str_l, '0',
3815 (size_t)zn > avail ? avail
3816 : (size_t)zn);
3817 }
3818 str_l += zn;
3819 }
3820 }
3821
3822 // insert formatted string
3823 // (or as-is conversion specifier for unknown conversions)
3824 {
3825 int sn = (int)(str_arg_l - zero_padding_insertion_ind);
3826
3827 if (sn > 0)
3828 {
3829 if (str_l < str_m)
3830 {
3831 size_t avail = str_m - str_l;
3832
3833 mch_memmove(str + str_l,
3834 str_arg + zero_padding_insertion_ind,
3835 (size_t)sn > avail ? avail : (size_t)sn);
3836 }
3837 str_l += sn;
3838 }
3839 }
3840
3841 // insert right padding
3842 if (justify_left)
3843 {
3844 // right blank padding to the field width
3845 int pn = (int)(min_field_width
3846 - (str_arg_l + number_of_zeros_to_pad));
3847
3848 if (pn > 0)
3849 {
3850 if (str_l < str_m)
3851 {
3852 size_t avail = str_m - str_l;
3853
3854 vim_memset(str + str_l, ' ',
3855 (size_t)pn > avail ? avail
3856 : (size_t)pn);
3857 }
3858 str_l += pn;
3859 }
3860 }
3861 vim_free(tofree);
3862 }
3863 }
3864
3865 if (str_m > 0)
3866 {
3867 // make sure the string is nul-terminated even at the expense of
3868 // overwriting the last character (shouldn't happen, but just in case)
3869 //
3870 str[str_l <= str_m - 1 ? str_l : str_m - 1] = '\0';
3871 }
3872
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003873 if (tvs != NULL && tvs[num_posarg != 0 ? num_posarg : arg_idx - 1].v_type != VAR_UNKNOWN)
Bram Moolenaar677658a2022-01-05 16:09:06 +00003874 emsg(_(e_too_many_arguments_to_printf));
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003875
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003876 vim_free(ap_types);
3877 va_end(ap);
3878
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003879 // Return the number of characters formatted (excluding trailing nul
3880 // character), that is, the number of characters that would have been
3881 // written to the buffer if it were large enough.
3882 return (int)str_l;
3883}
3884
3885#endif // PROTO