blob: b38c3d0bd7ef89bd1a82a57bf2cde89a200198b7 [file] [log] [blame]
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001/* vi:set ts=8 sts=4 sw=4 noet:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * strings.c: string manipulation functions
12 */
13
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +020014#define USING_FLOAT_STUFF
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020015#include "vim.h"
16
17/*
18 * Copy "string" into newly allocated memory.
19 */
20 char_u *
21vim_strsave(char_u *string)
22{
23 char_u *p;
24 size_t len;
25
26 len = STRLEN(string) + 1;
27 p = alloc(len);
28 if (p != NULL)
29 mch_memmove(p, string, len);
30 return p;
31}
32
33/*
34 * Copy up to "len" bytes of "string" into newly allocated memory and
35 * terminate with a NUL.
36 * The allocated memory always has size "len + 1", also when "string" is
37 * shorter.
38 */
39 char_u *
40vim_strnsave(char_u *string, size_t len)
41{
42 char_u *p;
43
44 p = alloc(len + 1);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +000045 if (p == NULL)
46 return NULL;
47
48 STRNCPY(p, string, len);
49 p[len] = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020050 return p;
51}
52
53/*
54 * Same as vim_strsave(), but any characters found in esc_chars are preceded
55 * by a backslash.
56 */
57 char_u *
58vim_strsave_escaped(char_u *string, char_u *esc_chars)
59{
60 return vim_strsave_escaped_ext(string, esc_chars, '\\', FALSE);
61}
62
63/*
64 * Same as vim_strsave_escaped(), but when "bsl" is TRUE also escape
65 * characters where rem_backslash() would remove the backslash.
66 * Escape the characters with "cc".
67 */
68 char_u *
69vim_strsave_escaped_ext(
70 char_u *string,
71 char_u *esc_chars,
72 int cc,
73 int bsl)
74{
75 char_u *p;
76 char_u *p2;
77 char_u *escaped_string;
78 unsigned length;
79 int l;
80
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +020081 // First count the number of backslashes required.
82 // Then allocate the memory and insert them.
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020083 length = 1; // count the trailing NUL
84 for (p = string; *p; p++)
85 {
86 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
87 {
88 length += l; // count a multibyte char
89 p += l - 1;
90 continue;
91 }
92 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
93 ++length; // count a backslash
94 ++length; // count an ordinary char
95 }
96 escaped_string = alloc(length);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +000097 if (escaped_string == NULL)
98 return NULL;
99 p2 = escaped_string;
100 for (p = string; *p; p++)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200101 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000102 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200103 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000104 mch_memmove(p2, p, (size_t)l);
105 p2 += l;
106 p += l - 1; // skip multibyte char
107 continue;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200108 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000109 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
110 *p2++ = cc;
111 *p2++ = *p;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200112 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000113 *p2 = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200114 return escaped_string;
115}
116
117/*
118 * Return TRUE when 'shell' has "csh" in the tail.
119 */
120 int
121csh_like_shell(void)
122{
123 return (strstr((char *)gettail(p_sh), "csh") != NULL);
124}
125
126/*
Jason Cox6e823512021-08-29 12:36:49 +0200127 * Return TRUE when 'shell' has "fish" in the tail.
128 */
Dominique Pellede05ae72021-08-30 19:57:34 +0200129 static int
Jason Cox6e823512021-08-29 12:36:49 +0200130fish_like_shell(void)
131{
132 return (strstr((char *)gettail(p_sh), "fish") != NULL);
133}
134
135/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200136 * Escape "string" for use as a shell argument with system().
137 * This uses single quotes, except when we know we need to use double quotes
138 * (MS-DOS and MS-Windows not using PowerShell and without 'shellslash' set).
139 * PowerShell also uses a novel escaping for enclosed single quotes - double
140 * them up.
141 * Escape a newline, depending on the 'shell' option.
142 * When "do_special" is TRUE also replace "!", "%", "#" and things starting
143 * with "<" like "<cfile>".
144 * When "do_newline" is FALSE do not escape newline unless it is csh shell.
145 * Returns the result in allocated memory, NULL if we have run out.
146 */
147 char_u *
148vim_strsave_shellescape(char_u *string, int do_special, int do_newline)
149{
150 unsigned length;
151 char_u *p;
152 char_u *d;
153 char_u *escaped_string;
154 int l;
155 int csh_like;
Jason Cox6e823512021-08-29 12:36:49 +0200156 int fish_like;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200157 char_u *shname;
158 int powershell;
159# ifdef MSWIN
160 int double_quotes;
161# endif
162
163 // Only csh and similar shells expand '!' within single quotes. For sh and
164 // the like we must not put a backslash before it, it will be taken
165 // literally. If do_special is set the '!' will be escaped twice.
166 // Csh also needs to have "\n" escaped twice when do_special is set.
167 csh_like = csh_like_shell();
168
Jason Cox6e823512021-08-29 12:36:49 +0200169 // Fish shell uses '\' as an escape character within single quotes, so '\'
170 // itself must be escaped to get a literal '\'.
171 fish_like = fish_like_shell();
172
Dominique Pelleaf4a61a2021-12-27 17:21:41 +0000173 // PowerShell uses its own version for quoting single quotes
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200174 shname = gettail(p_sh);
175 powershell = strstr((char *)shname, "pwsh") != NULL;
176# ifdef MSWIN
177 powershell = powershell || strstr((char *)shname, "powershell") != NULL;
178 // PowerShell only accepts single quotes so override shellslash.
179 double_quotes = !powershell && !p_ssl;
180# endif
181
182 // First count the number of extra bytes required.
183 length = (unsigned)STRLEN(string) + 3; // two quotes and a trailing NUL
184 for (p = string; *p != NUL; MB_PTR_ADV(p))
185 {
186# ifdef MSWIN
187 if (double_quotes)
188 {
189 if (*p == '"')
190 ++length; // " -> ""
191 }
192 else
193# endif
194 if (*p == '\'')
195 {
196 if (powershell)
197 length +=2; // ' => ''
198 else
199 length += 3; // ' => '\''
200 }
201 if ((*p == '\n' && (csh_like || do_newline))
202 || (*p == '!' && (csh_like || do_special)))
203 {
204 ++length; // insert backslash
205 if (csh_like && do_special)
206 ++length; // insert backslash
207 }
208 if (do_special && find_cmdline_var(p, &l) >= 0)
209 {
210 ++length; // insert backslash
211 p += l - 1;
212 }
Jason Cox6e823512021-08-29 12:36:49 +0200213 if (*p == '\\' && fish_like)
214 ++length; // insert backslash
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200215 }
216
217 // Allocate memory for the result and fill it.
218 escaped_string = alloc(length);
219 if (escaped_string != NULL)
220 {
221 d = escaped_string;
222
223 // add opening quote
224# ifdef MSWIN
225 if (double_quotes)
226 *d++ = '"';
227 else
228# endif
229 *d++ = '\'';
230
231 for (p = string; *p != NUL; )
232 {
233# ifdef MSWIN
234 if (double_quotes)
235 {
236 if (*p == '"')
237 {
238 *d++ = '"';
239 *d++ = '"';
240 ++p;
241 continue;
242 }
243 }
244 else
245# endif
246 if (*p == '\'')
247 {
248 if (powershell)
249 {
250 *d++ = '\'';
251 *d++ = '\'';
252 }
253 else
254 {
255 *d++ = '\'';
256 *d++ = '\\';
257 *d++ = '\'';
258 *d++ = '\'';
259 }
260 ++p;
261 continue;
262 }
263 if ((*p == '\n' && (csh_like || do_newline))
264 || (*p == '!' && (csh_like || do_special)))
265 {
266 *d++ = '\\';
267 if (csh_like && do_special)
268 *d++ = '\\';
269 *d++ = *p++;
270 continue;
271 }
272 if (do_special && find_cmdline_var(p, &l) >= 0)
273 {
274 *d++ = '\\'; // insert backslash
275 while (--l >= 0) // copy the var
276 *d++ = *p++;
277 continue;
278 }
Jason Cox6e823512021-08-29 12:36:49 +0200279 if (*p == '\\' && fish_like)
280 {
281 *d++ = '\\';
282 *d++ = *p++;
Bram Moolenaar66315972021-09-01 14:31:51 +0200283 continue;
Jason Cox6e823512021-08-29 12:36:49 +0200284 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200285
286 MB_COPY_CHAR(p, d);
287 }
288
289 // add terminating quote and finish with a NUL
290# ifdef MSWIN
291 if (double_quotes)
292 *d++ = '"';
293 else
294# endif
295 *d++ = '\'';
296 *d = NUL;
297 }
298
299 return escaped_string;
300}
301
302/*
303 * Like vim_strsave(), but make all characters uppercase.
304 * This uses ASCII lower-to-upper case translation, language independent.
305 */
306 char_u *
307vim_strsave_up(char_u *string)
308{
309 char_u *p1;
310
311 p1 = vim_strsave(string);
312 vim_strup(p1);
313 return p1;
314}
315
316/*
317 * Like vim_strnsave(), but make all characters uppercase.
318 * This uses ASCII lower-to-upper case translation, language independent.
319 */
320 char_u *
321vim_strnsave_up(char_u *string, size_t len)
322{
323 char_u *p1;
324
325 p1 = vim_strnsave(string, len);
326 vim_strup(p1);
327 return p1;
328}
329
330/*
331 * ASCII lower-to-upper case translation, language independent.
332 */
333 void
334vim_strup(
335 char_u *p)
336{
337 char_u *p2;
338 int c;
339
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000340 if (p == NULL)
341 return;
342
343 p2 = p;
344 while ((c = *p2) != NUL)
345 *p2++ = (c < 'a' || c > 'z') ? c : (c - 0x20);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200346}
347
348#if defined(FEAT_EVAL) || defined(FEAT_SPELL) || defined(PROTO)
349/*
350 * Make string "s" all upper-case and return it in allocated memory.
351 * Handles multi-byte characters as well as possible.
352 * Returns NULL when out of memory.
353 */
354 static char_u *
355strup_save(char_u *orig)
356{
357 char_u *p;
358 char_u *res;
359
360 res = p = vim_strsave(orig);
361
362 if (res != NULL)
363 while (*p != NUL)
364 {
365 int l;
366
367 if (enc_utf8)
368 {
369 int c, uc;
370 int newl;
371 char_u *s;
372
373 c = utf_ptr2char(p);
374 l = utf_ptr2len(p);
375 if (c == 0)
376 {
377 // overlong sequence, use only the first byte
378 c = *p;
379 l = 1;
380 }
381 uc = utf_toupper(c);
382
383 // Reallocate string when byte count changes. This is rare,
384 // thus it's OK to do another malloc()/free().
385 newl = utf_char2len(uc);
386 if (newl != l)
387 {
388 s = alloc(STRLEN(res) + 1 + newl - l);
389 if (s == NULL)
390 {
391 vim_free(res);
392 return NULL;
393 }
394 mch_memmove(s, res, p - res);
395 STRCPY(s + (p - res) + newl, p + l);
396 p = s + (p - res);
397 vim_free(res);
398 res = s;
399 }
400
401 utf_char2bytes(uc, p);
402 p += newl;
403 }
404 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
405 p += l; // skip multi-byte character
406 else
407 {
408 *p = TOUPPER_LOC(*p); // note that toupper() can be a macro
409 p++;
410 }
411 }
412
413 return res;
414}
415
416/*
417 * Make string "s" all lower-case and return it in allocated memory.
418 * Handles multi-byte characters as well as possible.
419 * Returns NULL when out of memory.
420 */
421 char_u *
422strlow_save(char_u *orig)
423{
424 char_u *p;
425 char_u *res;
426
427 res = p = vim_strsave(orig);
428
429 if (res != NULL)
430 while (*p != NUL)
431 {
432 int l;
433
434 if (enc_utf8)
435 {
436 int c, lc;
437 int newl;
438 char_u *s;
439
440 c = utf_ptr2char(p);
441 l = utf_ptr2len(p);
442 if (c == 0)
443 {
444 // overlong sequence, use only the first byte
445 c = *p;
446 l = 1;
447 }
448 lc = utf_tolower(c);
449
450 // Reallocate string when byte count changes. This is rare,
451 // thus it's OK to do another malloc()/free().
452 newl = utf_char2len(lc);
453 if (newl != l)
454 {
455 s = alloc(STRLEN(res) + 1 + newl - l);
456 if (s == NULL)
457 {
458 vim_free(res);
459 return NULL;
460 }
461 mch_memmove(s, res, p - res);
462 STRCPY(s + (p - res) + newl, p + l);
463 p = s + (p - res);
464 vim_free(res);
465 res = s;
466 }
467
468 utf_char2bytes(lc, p);
469 p += newl;
470 }
471 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
472 p += l; // skip multi-byte character
473 else
474 {
475 *p = TOLOWER_LOC(*p); // note that tolower() can be a macro
476 p++;
477 }
478 }
479
480 return res;
481}
482#endif
483
484/*
485 * delete spaces at the end of a string
486 */
487 void
488del_trailing_spaces(char_u *ptr)
489{
490 char_u *q;
491
492 q = ptr + STRLEN(ptr);
493 while (--q > ptr && VIM_ISWHITE(q[0]) && q[-1] != '\\' && q[-1] != Ctrl_V)
494 *q = NUL;
495}
496
497/*
498 * Like strncpy(), but always terminate the result with one NUL.
499 * "to" must be "len + 1" long!
500 */
501 void
502vim_strncpy(char_u *to, char_u *from, size_t len)
503{
504 STRNCPY(to, from, len);
505 to[len] = NUL;
506}
507
508/*
509 * Like strcat(), but make sure the result fits in "tosize" bytes and is
510 * always NUL terminated. "from" and "to" may overlap.
511 */
512 void
513vim_strcat(char_u *to, char_u *from, size_t tosize)
514{
515 size_t tolen = STRLEN(to);
516 size_t fromlen = STRLEN(from);
517
518 if (tolen + fromlen + 1 > tosize)
519 {
520 mch_memmove(to + tolen, from, tosize - tolen - 1);
521 to[tosize - 1] = NUL;
522 }
523 else
524 mch_memmove(to + tolen, from, fromlen + 1);
525}
526
Bram Moolenaarc32949b2023-01-04 15:56:51 +0000527/*
528 * A version of strlen() that has a maximum length.
529 */
530 size_t
531vim_strlen_maxlen(char *s, size_t maxlen)
532{
533 size_t i;
534 for (i = 0; i < maxlen; ++i)
535 if (s[i] == NUL)
536 break;
537 return i;
538}
539
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200540#if (!defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)) || defined(PROTO)
541/*
542 * Compare two strings, ignoring case, using current locale.
543 * Doesn't work for multi-byte characters.
544 * return 0 for match, < 0 for smaller, > 0 for bigger
545 */
546 int
547vim_stricmp(char *s1, char *s2)
548{
549 int i;
550
551 for (;;)
552 {
553 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
554 if (i != 0)
555 return i; // this character different
556 if (*s1 == NUL)
557 break; // strings match until NUL
558 ++s1;
559 ++s2;
560 }
561 return 0; // strings match
562}
563#endif
564
565#if (!defined(HAVE_STRNCASECMP) && !defined(HAVE_STRNICMP)) || defined(PROTO)
566/*
567 * Compare two strings, for length "len", ignoring case, using current locale.
568 * Doesn't work for multi-byte characters.
569 * return 0 for match, < 0 for smaller, > 0 for bigger
570 */
571 int
572vim_strnicmp(char *s1, char *s2, size_t len)
573{
574 int i;
575
576 while (len > 0)
577 {
578 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
579 if (i != 0)
580 return i; // this character different
581 if (*s1 == NUL)
582 break; // strings match until NUL
583 ++s1;
584 ++s2;
585 --len;
586 }
587 return 0; // strings match
588}
589#endif
590
591/*
592 * Search for first occurrence of "c" in "string".
593 * Version of strchr() that handles unsigned char strings with characters from
594 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
595 * end of the string.
596 */
Bram Moolenaarc32949b2023-01-04 15:56:51 +0000597 char_u *
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200598vim_strchr(char_u *string, int c)
599{
600 char_u *p;
601 int b;
602
603 p = string;
604 if (enc_utf8 && c >= 0x80)
605 {
606 while (*p != NUL)
607 {
608 int l = utfc_ptr2len(p);
609
610 // Avoid matching an illegal byte here.
611 if (utf_ptr2char(p) == c && l > 1)
612 return p;
613 p += l;
614 }
615 return NULL;
616 }
617 if (enc_dbcs != 0 && c > 255)
618 {
619 int n2 = c & 0xff;
620
621 c = ((unsigned)c >> 8) & 0xff;
622 while ((b = *p) != NUL)
623 {
624 if (b == c && p[1] == n2)
625 return p;
626 p += (*mb_ptr2len)(p);
627 }
628 return NULL;
629 }
630 if (has_mbyte)
631 {
632 while ((b = *p) != NUL)
633 {
634 if (b == c)
635 return p;
636 p += (*mb_ptr2len)(p);
637 }
638 return NULL;
639 }
640 while ((b = *p) != NUL)
641 {
642 if (b == c)
643 return p;
644 ++p;
645 }
646 return NULL;
647}
648
649/*
650 * Version of strchr() that only works for bytes and handles unsigned char
651 * strings with characters above 128 correctly. It also doesn't return a
652 * pointer to the NUL at the end of the string.
653 */
654 char_u *
655vim_strbyte(char_u *string, int c)
656{
657 char_u *p = string;
658
659 while (*p != NUL)
660 {
661 if (*p == c)
662 return p;
663 ++p;
664 }
665 return NULL;
666}
667
668/*
669 * Search for last occurrence of "c" in "string".
670 * Version of strrchr() that handles unsigned char strings with characters from
671 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
672 * end of the string.
673 * Return NULL if not found.
674 * Does not handle multi-byte char for "c"!
675 */
676 char_u *
677vim_strrchr(char_u *string, int c)
678{
679 char_u *retval = NULL;
680 char_u *p = string;
681
682 while (*p)
683 {
684 if (*p == c)
685 retval = p;
686 MB_PTR_ADV(p);
687 }
688 return retval;
689}
690
691/*
692 * Vim's version of strpbrk(), in case it's missing.
693 * Don't generate a prototype for this, causes problems when it's not used.
694 */
695#ifndef PROTO
696# ifndef HAVE_STRPBRK
697# ifdef vim_strpbrk
698# undef vim_strpbrk
699# endif
700 char_u *
701vim_strpbrk(char_u *s, char_u *charset)
702{
703 while (*s)
704 {
705 if (vim_strchr(charset, *s) != NULL)
706 return s;
707 MB_PTR_ADV(s);
708 }
709 return NULL;
710}
711# endif
712#endif
713
714/*
715 * Sort an array of strings.
716 */
717static int sort_compare(const void *s1, const void *s2);
718
719 static int
720sort_compare(const void *s1, const void *s2)
721{
722 return STRCMP(*(char **)s1, *(char **)s2);
723}
724
725 void
726sort_strings(
727 char_u **files,
728 int count)
729{
730 qsort((void *)files, (size_t)count, sizeof(char_u *), sort_compare);
731}
732
733#if defined(FEAT_QUICKFIX) || defined(FEAT_SPELL) || defined(PROTO)
734/*
735 * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
736 * When "s" is NULL FALSE is returned.
737 */
738 int
739has_non_ascii(char_u *s)
740{
741 char_u *p;
742
743 if (s != NULL)
744 for (p = s; *p != NUL; ++p)
745 if (*p >= 128)
746 return TRUE;
747 return FALSE;
748}
749#endif
750
751/*
752 * Concatenate two strings and return the result in allocated memory.
753 * Returns NULL when out of memory.
754 */
755 char_u *
756concat_str(char_u *str1, char_u *str2)
757{
758 char_u *dest;
759 size_t l = str1 == NULL ? 0 : STRLEN(str1);
760
761 dest = alloc(l + (str2 == NULL ? 0 : STRLEN(str2)) + 1L);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000762 if (dest == NULL)
763 return NULL;
764 if (str1 == NULL)
765 *dest = NUL;
766 else
767 STRCPY(dest, str1);
768 if (str2 != NULL)
769 STRCPY(dest + l, str2);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200770 return dest;
771}
772
zeertzjq4dd266c2023-08-19 11:35:03 +0200773#if defined(FEAT_EVAL) || defined(FEAT_RIGHTLEFT) || defined(PROTO)
774/*
775 * Reverse text into allocated memory.
776 * Returns the allocated string, NULL when out of memory.
777 */
778 char_u *
779reverse_text(char_u *s)
780{
781 size_t len = STRLEN(s);
782 char_u *rev = alloc(len + 1);
783 if (rev == NULL)
784 return NULL;
785
786 for (size_t s_i = 0, rev_i = len; s_i < len; ++s_i)
787 {
788 if (has_mbyte)
789 {
790 int mb_len = (*mb_ptr2len)(s + s_i);
791 rev_i -= mb_len;
792 mch_memmove(rev + rev_i, s + s_i, mb_len);
793 s_i += mb_len - 1;
794 }
795 else
796 rev[--rev_i] = s[s_i];
797 }
798 rev[len] = NUL;
799 return rev;
800}
801#endif
802
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200803#if defined(FEAT_EVAL) || defined(PROTO)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200804/*
805 * Return string "str" in ' quotes, doubling ' characters.
806 * If "str" is NULL an empty string is assumed.
807 * If "function" is TRUE make it function('string').
808 */
809 char_u *
810string_quote(char_u *str, int function)
811{
812 unsigned len;
813 char_u *p, *r, *s;
814
815 len = (function ? 13 : 3);
816 if (str != NULL)
817 {
818 len += (unsigned)STRLEN(str);
819 for (p = str; *p != NUL; MB_PTR_ADV(p))
820 if (*p == '\'')
821 ++len;
822 }
823 s = r = alloc(len);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000824 if (r == NULL)
825 return NULL;
826
827 if (function)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200828 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000829 STRCPY(r, "function('");
830 r += 10;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200831 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000832 else
833 *r++ = '\'';
834 if (str != NULL)
835 for (p = str; *p != NUL; )
836 {
837 if (*p == '\'')
838 *r++ = '\'';
839 MB_COPY_CHAR(p, r);
840 }
841 *r++ = '\'';
842 if (function)
843 *r++ = ')';
844 *r++ = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200845 return s;
846}
847
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000848/*
849 * Count the number of times "needle" occurs in string "haystack". Case is
850 * ignored if "ic" is TRUE.
851 */
852 long
853string_count(char_u *haystack, char_u *needle, int ic)
854{
855 long n = 0;
856 char_u *p = haystack;
857 char_u *next;
858
859 if (p == NULL || needle == NULL || *needle == NUL)
860 return 0;
861
862 if (ic)
863 {
864 size_t len = STRLEN(needle);
865
866 while (*p != NUL)
867 {
868 if (MB_STRNICMP(p, needle, len) == 0)
869 {
870 ++n;
871 p += len;
872 }
873 else
874 MB_PTR_ADV(p);
875 }
876 }
877 else
878 while ((next = (char_u *)strstr((char *)p, (char *)needle)) != NULL)
879 {
880 ++n;
881 p = next + STRLEN(needle);
882 }
883
884 return n;
885}
886
887/*
888 * Make a typval_T of the first character of "input" and store it in "output".
889 * Return OK or FAIL.
890 */
891 static int
892copy_first_char_to_tv(char_u *input, typval_T *output)
893{
894 char_u buf[MB_MAXBYTES + 1];
895 int len;
896
897 if (input == NULL || output == NULL)
898 return FAIL;
899
900 len = has_mbyte ? mb_ptr2len(input) : 1;
901 STRNCPY(buf, input, len);
902 buf[len] = NUL;
903 output->v_type = VAR_STRING;
904 output->vval.v_string = vim_strsave(buf);
905
906 return output->vval.v_string == NULL ? FAIL : OK;
907}
908
909/*
910 * Implementation of map() and filter() for a String. Apply "expr" to every
911 * character in string "str" and return the result in "rettv".
912 */
913 void
914string_filter_map(
915 char_u *str,
916 filtermap_T filtermap,
917 typval_T *expr,
918 typval_T *rettv)
919{
920 char_u *p;
921 typval_T tv;
922 garray_T ga;
923 int len = 0;
924 int idx = 0;
925 int rem;
Bram Moolenaar82418262022-09-28 16:16:15 +0100926 typval_T newtv;
927 funccall_T *fc;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000928
929 rettv->v_type = VAR_STRING;
930 rettv->vval.v_string = NULL;
931
932 // set_vim_var_nr() doesn't set the type
933 set_vim_var_type(VV_KEY, VAR_NUMBER);
934
zeertzjqe7d49462023-04-16 20:53:55 +0100935 // Create one funccall_T for all eval_expr_typval() calls.
Bram Moolenaar82418262022-09-28 16:16:15 +0100936 fc = eval_expr_get_funccal(expr, &newtv);
937
Bram Moolenaar04935fb2022-01-08 16:19:22 +0000938 ga_init2(&ga, sizeof(char), 80);
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000939 for (p = str; *p != NUL; p += len)
940 {
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000941 if (copy_first_char_to_tv(p, &tv) == FAIL)
942 break;
943 len = (int)STRLEN(tv.vval.v_string);
944
945 set_vim_var_nr(VV_KEY, idx);
Bram Moolenaar82418262022-09-28 16:16:15 +0100946 if (filter_map_one(&tv, expr, filtermap, fc, &newtv, &rem) == FAIL
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000947 || did_emsg)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000948 {
949 clear_tv(&newtv);
950 clear_tv(&tv);
951 break;
952 }
Ernie Raele79e2072024-01-13 11:47:33 +0100953 if (filtermap == FILTERMAP_MAP || filtermap == FILTERMAP_MAPNEW)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000954 {
955 if (newtv.v_type != VAR_STRING)
956 {
957 clear_tv(&newtv);
958 clear_tv(&tv);
Bram Moolenaare70cec92022-01-01 14:25:55 +0000959 emsg(_(e_string_required));
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000960 break;
961 }
962 else
963 ga_concat(&ga, newtv.vval.v_string);
964 }
Ernie Raele79e2072024-01-13 11:47:33 +0100965 else if (filtermap == FILTERMAP_FOREACH || !rem)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000966 ga_concat(&ga, tv.vval.v_string);
967
968 clear_tv(&newtv);
969 clear_tv(&tv);
970
971 ++idx;
972 }
973 ga_append(&ga, NUL);
974 rettv->vval.v_string = ga.ga_data;
Bram Moolenaar82418262022-09-28 16:16:15 +0100975 if (fc != NULL)
976 remove_funccal();
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000977}
978
979/*
Bram Moolenaarf1c60d42022-09-22 17:07:00 +0100980 * Implementation of reduce() for String "argvars[0]" using the function "expr"
981 * starting with the optional initial value "argvars[2]" and return the result
982 * in "rettv".
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000983 */
984 void
985string_reduce(
986 typval_T *argvars,
Bram Moolenaarf1c60d42022-09-22 17:07:00 +0100987 typval_T *expr,
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000988 typval_T *rettv)
989{
990 char_u *p = tv_get_string(&argvars[0]);
991 int len;
992 typval_T argv[3];
993 int r;
994 int called_emsg_start = called_emsg;
Bram Moolenaar82418262022-09-28 16:16:15 +0100995 funccall_T *fc;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000996
997 if (argvars[2].v_type == VAR_UNKNOWN)
998 {
999 if (*p == NUL)
1000 {
Bram Moolenaare70cec92022-01-01 14:25:55 +00001001 semsg(_(e_reduce_of_an_empty_str_with_no_initial_value), "String");
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001002 return;
1003 }
1004 if (copy_first_char_to_tv(p, rettv) == FAIL)
1005 return;
1006 p += STRLEN(rettv->vval.v_string);
1007 }
Yegappan Lakshmanan8deb2b32022-09-02 15:15:27 +01001008 else if (check_for_string_arg(argvars, 2) == FAIL)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001009 return;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001010 else
1011 copy_tv(&argvars[2], rettv);
1012
zeertzjqe7d49462023-04-16 20:53:55 +01001013 // Create one funccall_T for all eval_expr_typval() calls.
Bram Moolenaar82418262022-09-28 16:16:15 +01001014 fc = eval_expr_get_funccal(expr, rettv);
1015
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001016 for ( ; *p != NUL; p += len)
1017 {
1018 argv[0] = *rettv;
1019 if (copy_first_char_to_tv(p, &argv[1]) == FAIL)
1020 break;
1021 len = (int)STRLEN(argv[1].vval.v_string);
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001022
zeertzjqad0c4422023-08-17 22:15:47 +02001023 r = eval_expr_typval(expr, TRUE, argv, 2, fc, rettv);
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001024
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001025 clear_tv(&argv[0]);
1026 clear_tv(&argv[1]);
1027 if (r == FAIL || called_emsg != called_emsg_start)
1028 return;
1029 }
Bram Moolenaar82418262022-09-28 16:16:15 +01001030
1031 if (fc != NULL)
1032 remove_funccal();
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001033}
1034
Bram Moolenaare4098452023-05-07 18:53:49 +01001035/*
1036 * Implementation of "byteidx()" and "byteidxcomp()" functions
1037 */
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001038 static void
Bram Moolenaare4098452023-05-07 18:53:49 +01001039byteidx_common(typval_T *argvars, typval_T *rettv, int comp UNUSED)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001040{
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001041 rettv->vval.v_number = -1;
1042
1043 if (in_vim9script()
1044 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001045 || check_for_number_arg(argvars, 1) == FAIL
1046 || check_for_opt_bool_arg(argvars, 2) == FAIL))
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001047 return;
1048
Christian Brabandt67672ef2023-04-24 21:09:54 +01001049 char_u *str = tv_get_string_chk(&argvars[0]);
1050 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001051 if (str == NULL || idx < 0)
1052 return;
1053
Christian Brabandt67672ef2023-04-24 21:09:54 +01001054 varnumber_T utf16idx = FALSE;
1055 if (argvars[2].v_type != VAR_UNKNOWN)
1056 {
zeertzjq8cf51372023-05-08 15:31:38 +01001057 int error = FALSE;
1058 utf16idx = tv_get_bool_chk(&argvars[2], &error);
1059 if (error)
1060 return;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001061 if (utf16idx < 0 || utf16idx > 1)
1062 {
zeertzjq8cf51372023-05-08 15:31:38 +01001063 semsg(_(e_using_number_as_bool_nr), utf16idx);
Christian Brabandt67672ef2023-04-24 21:09:54 +01001064 return;
1065 }
1066 }
1067
1068 int (*ptr2len)(char_u *);
1069 if (enc_utf8 && comp)
1070 ptr2len = utf_ptr2len;
1071 else
1072 ptr2len = mb_ptr2len;
1073
1074 char_u *t = str;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001075 for ( ; idx > 0; idx--)
1076 {
1077 if (*t == NUL) // EOL reached
1078 return;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001079 if (utf16idx)
1080 {
1081 int clen = ptr2len(t);
1082 int c = (clen > 1) ? utf_ptr2char(t) : *t;
1083 if (c > 0xFFFF)
1084 idx--;
1085 }
1086 if (idx > 0)
1087 t += ptr2len(t);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001088 }
1089 rettv->vval.v_number = (varnumber_T)(t - str);
1090}
1091
1092/*
1093 * "byteidx()" function
1094 */
1095 void
1096f_byteidx(typval_T *argvars, typval_T *rettv)
1097{
Bram Moolenaare4098452023-05-07 18:53:49 +01001098 byteidx_common(argvars, rettv, FALSE);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001099}
1100
1101/*
1102 * "byteidxcomp()" function
1103 */
1104 void
1105f_byteidxcomp(typval_T *argvars, typval_T *rettv)
1106{
Bram Moolenaare4098452023-05-07 18:53:49 +01001107 byteidx_common(argvars, rettv, TRUE);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001108}
1109
1110/*
1111 * "charidx()" function
1112 */
1113 void
1114f_charidx(typval_T *argvars, typval_T *rettv)
1115{
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001116 rettv->vval.v_number = -1;
1117
Christian Brabandt67672ef2023-04-24 21:09:54 +01001118 if (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001119 || check_for_number_arg(argvars, 1) == FAIL
Christian Brabandt67672ef2023-04-24 21:09:54 +01001120 || check_for_opt_bool_arg(argvars, 2) == FAIL
1121 || (argvars[2].v_type != VAR_UNKNOWN
1122 && check_for_opt_bool_arg(argvars, 3) == FAIL))
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001123 return;
1124
Christian Brabandt67672ef2023-04-24 21:09:54 +01001125 char_u *str = tv_get_string_chk(&argvars[0]);
1126 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001127 if (str == NULL || idx < 0)
1128 return;
1129
Christian Brabandt67672ef2023-04-24 21:09:54 +01001130 varnumber_T countcc = FALSE;
1131 varnumber_T utf16idx = FALSE;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001132 if (argvars[2].v_type != VAR_UNKNOWN)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001133 {
Christian Brabandt67672ef2023-04-24 21:09:54 +01001134 countcc = tv_get_bool(&argvars[2]);
1135 if (argvars[3].v_type != VAR_UNKNOWN)
1136 utf16idx = tv_get_bool(&argvars[3]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001137 }
1138
Christian Brabandt67672ef2023-04-24 21:09:54 +01001139 int (*ptr2len)(char_u *);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001140 if (enc_utf8 && countcc)
1141 ptr2len = utf_ptr2len;
1142 else
1143 ptr2len = mb_ptr2len;
1144
Christian Brabandt67672ef2023-04-24 21:09:54 +01001145 char_u *p;
1146 int len;
1147 for (p = str, len = 0; utf16idx ? idx >= 0 : p <= str + idx; len++)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001148 {
1149 if (*p == NUL)
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001150 {
1151 // If the index is exactly the number of bytes or utf-16 code units
1152 // in the string then return the length of the string in
1153 // characters.
1154 if (utf16idx ? (idx == 0) : (p == (str + idx)))
1155 rettv->vval.v_number = len;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001156 return;
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001157 }
Christian Brabandt67672ef2023-04-24 21:09:54 +01001158 if (utf16idx)
1159 {
1160 idx--;
1161 int clen = ptr2len(p);
1162 int c = (clen > 1) ? utf_ptr2char(p) : *p;
1163 if (c > 0xFFFF)
1164 idx--;
1165 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001166 p += ptr2len(p);
1167 }
1168
1169 rettv->vval.v_number = len > 0 ? len - 1 : 0;
1170}
1171
1172/*
1173 * "str2list()" function
1174 */
1175 void
1176f_str2list(typval_T *argvars, typval_T *rettv)
1177{
1178 char_u *p;
1179 int utf8 = FALSE;
1180
1181 if (rettv_list_alloc(rettv) == FAIL)
1182 return;
1183
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001184 if (in_vim9script()
1185 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001186 || check_for_opt_bool_arg(argvars, 1) == FAIL))
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001187 return;
1188
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001189 if (argvars[1].v_type != VAR_UNKNOWN)
1190 utf8 = (int)tv_get_bool_chk(&argvars[1], NULL);
1191
1192 p = tv_get_string(&argvars[0]);
1193
1194 if (has_mbyte || utf8)
1195 {
1196 int (*ptr2len)(char_u *);
1197 int (*ptr2char)(char_u *);
1198
1199 if (utf8 || enc_utf8)
1200 {
1201 ptr2len = utf_ptr2len;
1202 ptr2char = utf_ptr2char;
1203 }
1204 else
1205 {
1206 ptr2len = mb_ptr2len;
1207 ptr2char = mb_ptr2char;
1208 }
1209
1210 for ( ; *p != NUL; p += (*ptr2len)(p))
1211 list_append_number(rettv->vval.v_list, (*ptr2char)(p));
1212 }
1213 else
1214 for ( ; *p != NUL; ++p)
1215 list_append_number(rettv->vval.v_list, *p);
1216}
1217
1218/*
1219 * "str2nr()" function
1220 */
1221 void
1222f_str2nr(typval_T *argvars, typval_T *rettv)
1223{
1224 int base = 10;
1225 char_u *p;
1226 varnumber_T n;
1227 int what = 0;
1228 int isneg;
1229
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001230 if (in_vim9script()
1231 && (check_for_string_arg(argvars, 0) == FAIL
1232 || check_for_opt_number_arg(argvars, 1) == FAIL
1233 || (argvars[1].v_type != VAR_UNKNOWN
1234 && check_for_opt_bool_arg(argvars, 2) == FAIL)))
1235 return;
1236
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001237 if (argvars[1].v_type != VAR_UNKNOWN)
1238 {
1239 base = (int)tv_get_number(&argvars[1]);
1240 if (base != 2 && base != 8 && base != 10 && base != 16)
1241 {
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00001242 emsg(_(e_invalid_argument));
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001243 return;
1244 }
1245 if (argvars[2].v_type != VAR_UNKNOWN && tv_get_bool(&argvars[2]))
1246 what |= STR2NR_QUOTE;
1247 }
1248
1249 p = skipwhite(tv_get_string_strict(&argvars[0]));
1250 isneg = (*p == '-');
1251 if (*p == '+' || *p == '-')
1252 p = skipwhite(p + 1);
1253 switch (base)
1254 {
1255 case 2: what |= STR2NR_BIN + STR2NR_FORCE; break;
1256 case 8: what |= STR2NR_OCT + STR2NR_OOCT + STR2NR_FORCE; break;
1257 case 16: what |= STR2NR_HEX + STR2NR_FORCE; break;
1258 }
Bram Moolenaar5fb78c32023-03-04 20:47:39 +00001259 vim_str2nr(p, NULL, NULL, what, &n, NULL, 0, FALSE, NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001260 // Text after the number is silently ignored.
1261 if (isneg)
1262 rettv->vval.v_number = -n;
1263 else
1264 rettv->vval.v_number = n;
1265
1266}
1267
1268/*
1269 * "strgetchar()" function
1270 */
1271 void
1272f_strgetchar(typval_T *argvars, typval_T *rettv)
1273{
1274 char_u *str;
1275 int len;
1276 int error = FALSE;
1277 int charidx;
1278 int byteidx = 0;
1279
1280 rettv->vval.v_number = -1;
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001281
1282 if (in_vim9script()
1283 && (check_for_string_arg(argvars, 0) == FAIL
1284 || check_for_number_arg(argvars, 1) == FAIL))
1285 return;
1286
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001287 str = tv_get_string_chk(&argvars[0]);
1288 if (str == NULL)
1289 return;
1290 len = (int)STRLEN(str);
1291 charidx = (int)tv_get_number_chk(&argvars[1], &error);
1292 if (error)
1293 return;
1294
1295 while (charidx >= 0 && byteidx < len)
1296 {
1297 if (charidx == 0)
1298 {
1299 rettv->vval.v_number = mb_ptr2char(str + byteidx);
1300 break;
1301 }
1302 --charidx;
1303 byteidx += MB_CPTR2LEN(str + byteidx);
1304 }
1305}
1306
1307/*
1308 * "stridx()" function
1309 */
1310 void
1311f_stridx(typval_T *argvars, typval_T *rettv)
1312{
1313 char_u buf[NUMBUFLEN];
1314 char_u *needle;
1315 char_u *haystack;
1316 char_u *save_haystack;
1317 char_u *pos;
1318 int start_idx;
1319
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001320 if (in_vim9script()
1321 && (check_for_string_arg(argvars, 0) == FAIL
1322 || check_for_string_arg(argvars, 1) == FAIL
1323 || check_for_opt_number_arg(argvars, 2) == FAIL))
1324 return;
1325
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001326 needle = tv_get_string_chk(&argvars[1]);
1327 save_haystack = haystack = tv_get_string_buf_chk(&argvars[0], buf);
1328 rettv->vval.v_number = -1;
1329 if (needle == NULL || haystack == NULL)
1330 return; // type error; errmsg already given
1331
1332 if (argvars[2].v_type != VAR_UNKNOWN)
1333 {
1334 int error = FALSE;
1335
1336 start_idx = (int)tv_get_number_chk(&argvars[2], &error);
1337 if (error || start_idx >= (int)STRLEN(haystack))
1338 return;
1339 if (start_idx >= 0)
1340 haystack += start_idx;
1341 }
1342
1343 pos = (char_u *)strstr((char *)haystack, (char *)needle);
1344 if (pos != NULL)
1345 rettv->vval.v_number = (varnumber_T)(pos - save_haystack);
1346}
1347
1348/*
1349 * "string()" function
1350 */
1351 void
1352f_string(typval_T *argvars, typval_T *rettv)
1353{
1354 char_u *tofree;
1355 char_u numbuf[NUMBUFLEN];
1356
1357 rettv->v_type = VAR_STRING;
1358 rettv->vval.v_string = tv2string(&argvars[0], &tofree, numbuf,
1359 get_copyID());
1360 // Make a copy if we have a value but it's not in allocated memory.
1361 if (rettv->vval.v_string != NULL && tofree == NULL)
1362 rettv->vval.v_string = vim_strsave(rettv->vval.v_string);
1363}
1364
1365/*
1366 * "strlen()" function
1367 */
1368 void
1369f_strlen(typval_T *argvars, typval_T *rettv)
1370{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001371 if (in_vim9script()
1372 && check_for_string_or_number_arg(argvars, 0) == FAIL)
1373 return;
1374
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001375 rettv->vval.v_number = (varnumber_T)(STRLEN(
1376 tv_get_string(&argvars[0])));
1377}
1378
1379 static void
1380strchar_common(typval_T *argvars, typval_T *rettv, int skipcc)
1381{
1382 char_u *s = tv_get_string(&argvars[0]);
1383 varnumber_T len = 0;
1384 int (*func_mb_ptr2char_adv)(char_u **pp);
1385
1386 func_mb_ptr2char_adv = skipcc ? mb_ptr2char_adv : mb_cptr2char_adv;
1387 while (*s != NUL)
1388 {
1389 func_mb_ptr2char_adv(&s);
1390 ++len;
1391 }
1392 rettv->vval.v_number = len;
1393}
1394
1395/*
1396 * "strcharlen()" function
1397 */
1398 void
1399f_strcharlen(typval_T *argvars, typval_T *rettv)
1400{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001401 if (in_vim9script()
1402 && check_for_string_or_number_arg(argvars, 0) == FAIL)
1403 return;
1404
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001405 strchar_common(argvars, rettv, TRUE);
1406}
1407
1408/*
1409 * "strchars()" function
1410 */
1411 void
1412f_strchars(typval_T *argvars, typval_T *rettv)
1413{
1414 varnumber_T skipcc = FALSE;
1415
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001416 if (in_vim9script()
1417 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001418 || check_for_opt_bool_arg(argvars, 1) == FAIL))
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001419 return;
1420
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001421 if (argvars[1].v_type != VAR_UNKNOWN)
Bram Moolenaare4098452023-05-07 18:53:49 +01001422 {
zeertzjq8cf51372023-05-08 15:31:38 +01001423 int error = FALSE;
1424 skipcc = tv_get_bool_chk(&argvars[1], &error);
1425 if (error)
1426 return;
1427 if (skipcc < 0 || skipcc > 1)
1428 {
Bram Moolenaare4098452023-05-07 18:53:49 +01001429 semsg(_(e_using_number_as_bool_nr), skipcc);
zeertzjq8cf51372023-05-08 15:31:38 +01001430 return;
1431 }
Bram Moolenaare4098452023-05-07 18:53:49 +01001432 }
zeertzjq8cf51372023-05-08 15:31:38 +01001433
1434 strchar_common(argvars, rettv, skipcc);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001435}
1436
1437/*
Christian Brabandt67672ef2023-04-24 21:09:54 +01001438 * "strutf16len()" function
1439 */
1440 void
1441f_strutf16len(typval_T *argvars, typval_T *rettv)
1442{
1443 rettv->vval.v_number = -1;
1444
1445 if (check_for_string_arg(argvars, 0) == FAIL
1446 || check_for_opt_bool_arg(argvars, 1) == FAIL)
1447 return;
1448
1449 varnumber_T countcc = FALSE;
1450 if (argvars[1].v_type != VAR_UNKNOWN)
1451 countcc = tv_get_bool(&argvars[1]);
1452
1453 char_u *s = tv_get_string(&argvars[0]);
1454 varnumber_T len = 0;
1455 int (*func_mb_ptr2char_adv)(char_u **pp);
1456 int ch;
1457
1458 func_mb_ptr2char_adv = countcc ? mb_cptr2char_adv : mb_ptr2char_adv;
1459 while (*s != NUL)
1460 {
1461 ch = func_mb_ptr2char_adv(&s);
1462 if (ch > 0xFFFF)
1463 ++len;
1464 ++len;
1465 }
1466 rettv->vval.v_number = len;
1467}
1468
1469/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001470 * "strdisplaywidth()" function
1471 */
1472 void
1473f_strdisplaywidth(typval_T *argvars, typval_T *rettv)
1474{
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001475 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001476 int col = 0;
1477
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001478 rettv->vval.v_number = -1;
1479
1480 if (in_vim9script()
1481 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001482 || check_for_opt_number_arg(argvars, 1) == FAIL))
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001483 return;
1484
1485 s = tv_get_string(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001486 if (argvars[1].v_type != VAR_UNKNOWN)
1487 col = (int)tv_get_number(&argvars[1]);
1488
1489 rettv->vval.v_number = (varnumber_T)(linetabsize_col(col, s) - col);
1490}
1491
1492/*
1493 * "strwidth()" function
1494 */
1495 void
1496f_strwidth(typval_T *argvars, typval_T *rettv)
1497{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001498 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001499
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001500 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1501 return;
1502
1503 s = tv_get_string_strict(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001504 rettv->vval.v_number = (varnumber_T)(mb_string2cells(s, -1));
1505}
1506
1507/*
1508 * "strcharpart()" function
1509 */
1510 void
1511f_strcharpart(typval_T *argvars, typval_T *rettv)
1512{
1513 char_u *p;
1514 int nchar;
1515 int nbyte = 0;
1516 int charlen;
1517 int skipcc = FALSE;
1518 int len = 0;
1519 int slen;
1520 int error = FALSE;
1521
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001522 if (in_vim9script()
1523 && (check_for_string_arg(argvars, 0) == FAIL
1524 || check_for_number_arg(argvars, 1) == FAIL
1525 || check_for_opt_number_arg(argvars, 2) == FAIL
1526 || (argvars[2].v_type != VAR_UNKNOWN
1527 && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1528 return;
1529
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001530 p = tv_get_string(&argvars[0]);
1531 slen = (int)STRLEN(p);
1532
1533 nchar = (int)tv_get_number_chk(&argvars[1], &error);
1534 if (!error)
1535 {
1536 if (argvars[2].v_type != VAR_UNKNOWN
1537 && argvars[3].v_type != VAR_UNKNOWN)
1538 {
zeertzjq8cf51372023-05-08 15:31:38 +01001539 skipcc = tv_get_bool_chk(&argvars[3], &error);
1540 if (error)
1541 return;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001542 if (skipcc < 0 || skipcc > 1)
1543 {
zeertzjq8cf51372023-05-08 15:31:38 +01001544 semsg(_(e_using_number_as_bool_nr), skipcc);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001545 return;
1546 }
1547 }
1548
1549 if (nchar > 0)
1550 while (nchar > 0 && nbyte < slen)
1551 {
1552 if (skipcc)
1553 nbyte += mb_ptr2len(p + nbyte);
1554 else
1555 nbyte += MB_CPTR2LEN(p + nbyte);
1556 --nchar;
1557 }
1558 else
1559 nbyte = nchar;
1560 if (argvars[2].v_type != VAR_UNKNOWN)
1561 {
1562 charlen = (int)tv_get_number(&argvars[2]);
1563 while (charlen > 0 && nbyte + len < slen)
1564 {
1565 int off = nbyte + len;
1566
1567 if (off < 0)
1568 len += 1;
1569 else
1570 {
1571 if (skipcc)
1572 len += mb_ptr2len(p + off);
1573 else
1574 len += MB_CPTR2LEN(p + off);
1575 }
1576 --charlen;
1577 }
1578 }
1579 else
1580 len = slen - nbyte; // default: all bytes that are available.
1581 }
1582
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02001583 // Only return the overlap between the specified part and the actual
1584 // string.
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001585 if (nbyte < 0)
1586 {
1587 len += nbyte;
1588 nbyte = 0;
1589 }
1590 else if (nbyte > slen)
1591 nbyte = slen;
1592 if (len < 0)
1593 len = 0;
1594 else if (nbyte + len > slen)
1595 len = slen - nbyte;
1596
1597 rettv->v_type = VAR_STRING;
1598 rettv->vval.v_string = vim_strnsave(p + nbyte, len);
1599}
1600
1601/*
1602 * "strpart()" function
1603 */
1604 void
1605f_strpart(typval_T *argvars, typval_T *rettv)
1606{
1607 char_u *p;
1608 int n;
1609 int len;
1610 int slen;
1611 int error = FALSE;
1612
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001613 if (in_vim9script()
1614 && (check_for_string_arg(argvars, 0) == FAIL
1615 || check_for_number_arg(argvars, 1) == FAIL
1616 || check_for_opt_number_arg(argvars, 2) == FAIL
1617 || (argvars[2].v_type != VAR_UNKNOWN
1618 && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1619 return;
1620
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001621 p = tv_get_string(&argvars[0]);
1622 slen = (int)STRLEN(p);
1623
1624 n = (int)tv_get_number_chk(&argvars[1], &error);
1625 if (error)
1626 len = 0;
1627 else if (argvars[2].v_type != VAR_UNKNOWN)
1628 len = (int)tv_get_number(&argvars[2]);
1629 else
1630 len = slen - n; // default len: all bytes that are available.
1631
1632 // Only return the overlap between the specified part and the actual
1633 // string.
1634 if (n < 0)
1635 {
1636 len += n;
1637 n = 0;
1638 }
1639 else if (n > slen)
1640 n = slen;
1641 if (len < 0)
1642 len = 0;
1643 else if (n + len > slen)
1644 len = slen - n;
1645
1646 if (argvars[2].v_type != VAR_UNKNOWN && argvars[3].v_type != VAR_UNKNOWN)
1647 {
1648 int off;
1649
1650 // length in characters
1651 for (off = n; off < slen && len > 0; --len)
1652 off += mb_ptr2len(p + off);
1653 len = off - n;
1654 }
1655
1656 rettv->v_type = VAR_STRING;
1657 rettv->vval.v_string = vim_strnsave(p + n, len);
1658}
1659
1660/*
1661 * "strridx()" function
1662 */
1663 void
1664f_strridx(typval_T *argvars, typval_T *rettv)
1665{
1666 char_u buf[NUMBUFLEN];
1667 char_u *needle;
1668 char_u *haystack;
1669 char_u *rest;
1670 char_u *lastmatch = NULL;
1671 int haystack_len, end_idx;
1672
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001673 if (in_vim9script()
1674 && (check_for_string_arg(argvars, 0) == FAIL
1675 || check_for_string_arg(argvars, 1) == FAIL
1676 || check_for_opt_number_arg(argvars, 2) == FAIL))
1677 return;
1678
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001679 needle = tv_get_string_chk(&argvars[1]);
1680 haystack = tv_get_string_buf_chk(&argvars[0], buf);
1681
1682 rettv->vval.v_number = -1;
1683 if (needle == NULL || haystack == NULL)
1684 return; // type error; errmsg already given
1685
1686 haystack_len = (int)STRLEN(haystack);
1687 if (argvars[2].v_type != VAR_UNKNOWN)
1688 {
1689 // Third argument: upper limit for index
1690 end_idx = (int)tv_get_number_chk(&argvars[2], NULL);
1691 if (end_idx < 0)
1692 return; // can never find a match
1693 }
1694 else
1695 end_idx = haystack_len;
1696
1697 if (*needle == NUL)
1698 {
1699 // Empty string matches past the end.
1700 lastmatch = haystack + end_idx;
1701 }
1702 else
1703 {
1704 for (rest = haystack; *rest != '\0'; ++rest)
1705 {
1706 rest = (char_u *)strstr((char *)rest, (char *)needle);
1707 if (rest == NULL || rest > haystack + end_idx)
1708 break;
1709 lastmatch = rest;
1710 }
1711 }
1712
1713 if (lastmatch == NULL)
1714 rettv->vval.v_number = -1;
1715 else
1716 rettv->vval.v_number = (varnumber_T)(lastmatch - haystack);
1717}
1718
1719/*
1720 * "strtrans()" function
1721 */
1722 void
1723f_strtrans(typval_T *argvars, typval_T *rettv)
1724{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001725 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1726 return;
1727
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001728 rettv->v_type = VAR_STRING;
1729 rettv->vval.v_string = transstr(tv_get_string(&argvars[0]));
1730}
1731
Christian Brabandt67672ef2023-04-24 21:09:54 +01001732
1733/*
Christian Brabandt67672ef2023-04-24 21:09:54 +01001734 * "utf16idx()" function
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01001735 *
1736 * Converts a byte or character offset in a string to the corresponding UTF-16
1737 * code unit offset.
Christian Brabandt67672ef2023-04-24 21:09:54 +01001738 */
1739 void
1740f_utf16idx(typval_T *argvars, typval_T *rettv)
1741{
1742 rettv->vval.v_number = -1;
1743
1744 if (check_for_string_arg(argvars, 0) == FAIL
1745 || check_for_opt_number_arg(argvars, 1) == FAIL
1746 || check_for_opt_bool_arg(argvars, 2) == FAIL
1747 || (argvars[2].v_type != VAR_UNKNOWN
1748 && check_for_opt_bool_arg(argvars, 3) == FAIL))
1749 return;
1750
1751 char_u *str = tv_get_string_chk(&argvars[0]);
1752 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
1753 if (str == NULL || idx < 0)
1754 return;
1755
1756 varnumber_T countcc = FALSE;
1757 varnumber_T charidx = FALSE;
1758 if (argvars[2].v_type != VAR_UNKNOWN)
1759 {
1760 countcc = tv_get_bool(&argvars[2]);
1761 if (argvars[3].v_type != VAR_UNKNOWN)
1762 charidx = tv_get_bool(&argvars[3]);
1763 }
1764
1765 int (*ptr2len)(char_u *);
1766 if (enc_utf8 && countcc)
1767 ptr2len = utf_ptr2len;
1768 else
1769 ptr2len = mb_ptr2len;
1770
1771 char_u *p;
1772 int len;
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01001773 int utf16idx = 0;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001774 for (p = str, len = 0; charidx ? idx >= 0 : p <= str + idx; len++)
1775 {
1776 if (*p == NUL)
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001777 {
1778 // If the index is exactly the number of bytes or characters in the
1779 // string then return the length of the string in utf-16 code
1780 // units.
1781 if (charidx ? (idx == 0) : (p == (str + idx)))
1782 rettv->vval.v_number = len;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001783 return;
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001784 }
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01001785 utf16idx = len;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001786 int clen = ptr2len(p);
1787 int c = (clen > 1) ? utf_ptr2char(p) : *p;
1788 if (c > 0xFFFF)
1789 len++;
1790 p += ptr2len(p);
1791 if (charidx)
1792 idx--;
1793 }
1794
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01001795 rettv->vval.v_number = utf16idx;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001796}
1797
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001798/*
1799 * "tolower(string)" function
1800 */
1801 void
1802f_tolower(typval_T *argvars, typval_T *rettv)
1803{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001804 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1805 return;
1806
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001807 rettv->v_type = VAR_STRING;
1808 rettv->vval.v_string = strlow_save(tv_get_string(&argvars[0]));
1809}
1810
1811/*
1812 * "toupper(string)" function
1813 */
1814 void
1815f_toupper(typval_T *argvars, typval_T *rettv)
1816{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001817 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1818 return;
1819
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001820 rettv->v_type = VAR_STRING;
1821 rettv->vval.v_string = strup_save(tv_get_string(&argvars[0]));
1822}
1823
1824/*
1825 * "tr(string, fromstr, tostr)" function
1826 */
1827 void
1828f_tr(typval_T *argvars, typval_T *rettv)
1829{
1830 char_u *in_str;
1831 char_u *fromstr;
1832 char_u *tostr;
1833 char_u *p;
1834 int inlen;
1835 int fromlen;
1836 int tolen;
1837 int idx;
1838 char_u *cpstr;
1839 int cplen;
1840 int first = TRUE;
1841 char_u buf[NUMBUFLEN];
1842 char_u buf2[NUMBUFLEN];
1843 garray_T ga;
1844
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001845 if (in_vim9script()
1846 && (check_for_string_arg(argvars, 0) == FAIL
1847 || check_for_string_arg(argvars, 1) == FAIL
1848 || check_for_string_arg(argvars, 2) == FAIL))
1849 return;
1850
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001851 in_str = tv_get_string(&argvars[0]);
1852 fromstr = tv_get_string_buf_chk(&argvars[1], buf);
1853 tostr = tv_get_string_buf_chk(&argvars[2], buf2);
1854
1855 // Default return value: empty string.
1856 rettv->v_type = VAR_STRING;
1857 rettv->vval.v_string = NULL;
1858 if (fromstr == NULL || tostr == NULL)
1859 return; // type error; errmsg already given
Bram Moolenaar04935fb2022-01-08 16:19:22 +00001860 ga_init2(&ga, sizeof(char), 80);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001861
1862 if (!has_mbyte)
1863 // not multi-byte: fromstr and tostr must be the same length
1864 if (STRLEN(fromstr) != STRLEN(tostr))
1865 {
1866error:
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00001867 semsg(_(e_invalid_argument_str), fromstr);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001868 ga_clear(&ga);
1869 return;
1870 }
1871
1872 // fromstr and tostr have to contain the same number of chars
1873 while (*in_str != NUL)
1874 {
1875 if (has_mbyte)
1876 {
1877 inlen = (*mb_ptr2len)(in_str);
1878 cpstr = in_str;
1879 cplen = inlen;
1880 idx = 0;
1881 for (p = fromstr; *p != NUL; p += fromlen)
1882 {
1883 fromlen = (*mb_ptr2len)(p);
1884 if (fromlen == inlen && STRNCMP(in_str, p, inlen) == 0)
1885 {
1886 for (p = tostr; *p != NUL; p += tolen)
1887 {
1888 tolen = (*mb_ptr2len)(p);
1889 if (idx-- == 0)
1890 {
1891 cplen = tolen;
1892 cpstr = p;
1893 break;
1894 }
1895 }
1896 if (*p == NUL) // tostr is shorter than fromstr
1897 goto error;
1898 break;
1899 }
1900 ++idx;
1901 }
1902
1903 if (first && cpstr == in_str)
1904 {
1905 // Check that fromstr and tostr have the same number of
1906 // (multi-byte) characters. Done only once when a character
1907 // of in_str doesn't appear in fromstr.
1908 first = FALSE;
1909 for (p = tostr; *p != NUL; p += tolen)
1910 {
1911 tolen = (*mb_ptr2len)(p);
1912 --idx;
1913 }
1914 if (idx != 0)
1915 goto error;
1916 }
1917
1918 (void)ga_grow(&ga, cplen);
1919 mch_memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen);
1920 ga.ga_len += cplen;
1921
1922 in_str += inlen;
1923 }
1924 else
1925 {
1926 // When not using multi-byte chars we can do it faster.
1927 p = vim_strchr(fromstr, *in_str);
1928 if (p != NULL)
1929 ga_append(&ga, tostr[p - fromstr]);
1930 else
1931 ga_append(&ga, *in_str);
1932 ++in_str;
1933 }
1934 }
1935
1936 // add a terminating NUL
1937 (void)ga_grow(&ga, 1);
1938 ga_append(&ga, NUL);
1939
1940 rettv->vval.v_string = ga.ga_data;
1941}
1942
1943/*
1944 * "trim({expr})" function
1945 */
1946 void
1947f_trim(typval_T *argvars, typval_T *rettv)
1948{
1949 char_u buf1[NUMBUFLEN];
1950 char_u buf2[NUMBUFLEN];
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001951 char_u *head;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001952 char_u *mask = NULL;
1953 char_u *tail;
1954 char_u *prev;
1955 char_u *p;
1956 int c1;
1957 int dir = 0;
1958
1959 rettv->v_type = VAR_STRING;
1960 rettv->vval.v_string = NULL;
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001961
1962 if (in_vim9script()
1963 && (check_for_string_arg(argvars, 0) == FAIL
Illia Bobyr80799172023-10-17 18:00:50 +02001964 || check_for_opt_string_arg(argvars, 1) == FAIL
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001965 || (argvars[1].v_type != VAR_UNKNOWN
1966 && check_for_opt_number_arg(argvars, 2) == FAIL)))
1967 return;
1968
1969 head = tv_get_string_buf_chk(&argvars[0], buf1);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001970 if (head == NULL)
1971 return;
1972
Illia Bobyr80799172023-10-17 18:00:50 +02001973 if (check_for_opt_string_arg(argvars, 1) == FAIL)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001974 return;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001975
1976 if (argvars[1].v_type == VAR_STRING)
Illia Bobyr6e638672023-10-17 11:09:45 +02001977 {
Illia Bobyr80799172023-10-17 18:00:50 +02001978 mask = tv_get_string_buf_chk(&argvars[1], buf2);
1979 if (*mask == NUL)
1980 mask = NULL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001981
Illia Bobyr80799172023-10-17 18:00:50 +02001982 if (argvars[2].v_type != VAR_UNKNOWN)
Illia Bobyr6e638672023-10-17 11:09:45 +02001983 {
Illia Bobyr80799172023-10-17 18:00:50 +02001984 int error = 0;
1985
1986 // leading or trailing characters to trim
1987 dir = (int)tv_get_number_chk(&argvars[2], &error);
1988 if (error)
1989 return;
1990 if (dir < 0 || dir > 2)
1991 {
1992 semsg(_(e_invalid_argument_str), tv_get_string(&argvars[2]));
1993 return;
1994 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001995 }
1996 }
1997
1998 if (dir == 0 || dir == 1)
1999 {
2000 // Trim leading characters
2001 while (*head != NUL)
2002 {
2003 c1 = PTR2CHAR(head);
2004 if (mask == NULL)
2005 {
2006 if (c1 > ' ' && c1 != 0xa0)
2007 break;
2008 }
2009 else
2010 {
2011 for (p = mask; *p != NUL; MB_PTR_ADV(p))
2012 if (c1 == PTR2CHAR(p))
2013 break;
2014 if (*p == NUL)
2015 break;
2016 }
2017 MB_PTR_ADV(head);
2018 }
2019 }
2020
2021 tail = head + STRLEN(head);
2022 if (dir == 0 || dir == 2)
2023 {
2024 // Trim trailing characters
2025 for (; tail > head; tail = prev)
2026 {
2027 prev = tail;
2028 MB_PTR_BACK(head, prev);
2029 c1 = PTR2CHAR(prev);
2030 if (mask == NULL)
2031 {
2032 if (c1 > ' ' && c1 != 0xa0)
2033 break;
2034 }
2035 else
2036 {
2037 for (p = mask; *p != NUL; MB_PTR_ADV(p))
2038 if (c1 == PTR2CHAR(p))
2039 break;
2040 if (*p == NUL)
2041 break;
2042 }
2043 }
2044 }
2045 rettv->vval.v_string = vim_strnsave(head, tail - head);
2046}
2047
Bram Moolenaar677658a2022-01-05 16:09:06 +00002048static char *e_printf = N_(e_insufficient_arguments_for_printf);
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002049
2050/*
2051 * Get number argument from "idxp" entry in "tvs". First entry is 1.
2052 */
2053 static varnumber_T
2054tv_nr(typval_T *tvs, int *idxp)
2055{
2056 int idx = *idxp - 1;
2057 varnumber_T n = 0;
2058 int err = FALSE;
2059
2060 if (tvs[idx].v_type == VAR_UNKNOWN)
2061 emsg(_(e_printf));
2062 else
2063 {
2064 ++*idxp;
2065 n = tv_get_number_chk(&tvs[idx], &err);
2066 if (err)
2067 n = 0;
2068 }
2069 return n;
2070}
2071
2072/*
2073 * Get string argument from "idxp" entry in "tvs". First entry is 1.
2074 * If "tofree" is NULL tv_get_string_chk() is used. Some types (e.g. List)
2075 * are not converted to a string.
2076 * If "tofree" is not NULL echo_string() is used. All types are converted to
2077 * a string with the same format as ":echo". The caller must free "*tofree".
2078 * Returns NULL for an error.
2079 */
2080 static char *
2081tv_str(typval_T *tvs, int *idxp, char_u **tofree)
2082{
2083 int idx = *idxp - 1;
2084 char *s = NULL;
2085 static char_u numbuf[NUMBUFLEN];
2086
2087 if (tvs[idx].v_type == VAR_UNKNOWN)
2088 emsg(_(e_printf));
2089 else
2090 {
2091 ++*idxp;
2092 if (tofree != NULL)
2093 s = (char *)echo_string(&tvs[idx], tofree, numbuf, get_copyID());
2094 else
2095 s = (char *)tv_get_string_chk(&tvs[idx]);
2096 }
2097 return s;
2098}
2099
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002100/*
2101 * Get float argument from "idxp" entry in "tvs". First entry is 1.
2102 */
2103 static double
2104tv_float(typval_T *tvs, int *idxp)
2105{
2106 int idx = *idxp - 1;
2107 double f = 0;
2108
2109 if (tvs[idx].v_type == VAR_UNKNOWN)
2110 emsg(_(e_printf));
2111 else
2112 {
2113 ++*idxp;
2114 if (tvs[idx].v_type == VAR_FLOAT)
2115 f = tvs[idx].vval.v_float;
2116 else if (tvs[idx].v_type == VAR_NUMBER)
2117 f = (double)tvs[idx].vval.v_number;
2118 else
Bram Moolenaar9d00e4a2022-01-05 17:49:15 +00002119 emsg(_(e_expected_float_argument_for_printf));
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002120 }
2121 return f;
2122}
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00002123
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002124#endif
2125
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002126/*
2127 * Return the representation of infinity for printf() function:
2128 * "-inf", "inf", "+inf", " inf", "-INF", "INF", "+INF" or " INF".
2129 */
2130 static const char *
2131infinity_str(int positive,
2132 char fmt_spec,
2133 int force_sign,
2134 int space_for_positive)
2135{
2136 static const char *table[] =
2137 {
2138 "-inf", "inf", "+inf", " inf",
2139 "-INF", "INF", "+INF", " INF"
2140 };
2141 int idx = positive * (1 + force_sign + force_sign * space_for_positive);
2142
2143 if (ASCII_ISUPPER(fmt_spec))
2144 idx += 4;
2145 return table[idx];
2146}
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002147
2148/*
2149 * This code was included to provide a portable vsnprintf() and snprintf().
2150 * Some systems may provide their own, but we always use this one for
2151 * consistency.
2152 *
2153 * This code is based on snprintf.c - a portable implementation of snprintf
2154 * by Mark Martinec <mark.martinec@ijs.si>, Version 2.2, 2000-10-06.
2155 * Included with permission. It was heavily modified to fit in Vim.
2156 * The original code, including useful comments, can be found here:
2157 * http://www.ijs.si/software/snprintf/
2158 *
2159 * This snprintf() only supports the following conversion specifiers:
2160 * s, c, d, u, o, x, X, p (and synonyms: i, D, U, O - see below)
2161 * with flags: '-', '+', ' ', '0' and '#'.
2162 * An asterisk is supported for field width as well as precision.
2163 *
2164 * Limited support for floating point was added: 'f', 'F', 'e', 'E', 'g', 'G'.
2165 *
2166 * Length modifiers 'h' (short int) and 'l' (long int) and 'll' (long long int)
2167 * are supported. NOTE: for 'll' the argument is varnumber_T or uvarnumber_T.
2168 *
2169 * The locale is not used, the string is used as a byte string. This is only
2170 * relevant for double-byte encodings where the second byte may be '%'.
2171 *
2172 * It is permitted for "str_m" to be zero, and it is permitted to specify NULL
2173 * pointer for resulting string argument if "str_m" is zero (as per ISO C99).
2174 *
2175 * The return value is the number of characters which would be generated
2176 * for the given input, excluding the trailing NUL. If this value
2177 * is greater or equal to "str_m", not all characters from the result
2178 * have been stored in str, output bytes beyond the ("str_m"-1) -th character
2179 * are discarded. If "str_m" is greater than zero it is guaranteed
2180 * the resulting string will be NUL-terminated.
2181 */
2182
2183/*
2184 * When va_list is not supported we only define vim_snprintf().
2185 *
2186 * vim_vsnprintf_typval() can be invoked with either "va_list" or a list of
2187 * "typval_T". When the latter is not used it must be NULL.
2188 */
2189
2190// When generating prototypes all of this is skipped, cproto doesn't
2191// understand this.
2192#ifndef PROTO
2193
2194// Like vim_vsnprintf() but append to the string.
2195 int
2196vim_snprintf_add(char *str, size_t str_m, const char *fmt, ...)
2197{
2198 va_list ap;
2199 int str_l;
2200 size_t len = STRLEN(str);
2201 size_t space;
2202
2203 if (str_m <= len)
2204 space = 0;
2205 else
2206 space = str_m - len;
2207 va_start(ap, fmt);
2208 str_l = vim_vsnprintf(str + len, space, fmt, ap);
2209 va_end(ap);
2210 return str_l;
2211}
2212
2213 int
2214vim_snprintf(char *str, size_t str_m, const char *fmt, ...)
2215{
2216 va_list ap;
2217 int str_l;
2218
2219 va_start(ap, fmt);
2220 str_l = vim_vsnprintf(str, str_m, fmt, ap);
2221 va_end(ap);
2222 return str_l;
2223}
2224
2225 int
2226vim_vsnprintf(
2227 char *str,
2228 size_t str_m,
2229 const char *fmt,
2230 va_list ap)
2231{
2232 return vim_vsnprintf_typval(str, str_m, fmt, ap, NULL);
2233}
2234
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002235enum
2236{
2237 TYPE_UNKNOWN = -1,
2238 TYPE_INT,
2239 TYPE_LONGINT,
2240 TYPE_LONGLONGINT,
2241 TYPE_UNSIGNEDINT,
2242 TYPE_UNSIGNEDLONGINT,
2243 TYPE_UNSIGNEDLONGLONGINT,
2244 TYPE_POINTER,
2245 TYPE_PERCENT,
2246 TYPE_CHAR,
2247 TYPE_STRING,
2248 TYPE_FLOAT
2249};
2250
2251/* Types that can be used in a format string
2252 */
zeertzjq7772c932023-08-15 22:48:40 +02002253 static int
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002254format_typeof(
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002255 const char *type)
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002256{
2257 // allowed values: \0, h, l, L
2258 char length_modifier = '\0';
2259
2260 // current conversion specifier character
2261 char fmt_spec = '\0';
2262
2263 // parse 'h', 'l' and 'll' length modifiers
2264 if (*type == 'h' || *type == 'l')
2265 {
2266 length_modifier = *type;
2267 type++;
2268 if (length_modifier == 'l' && *type == 'l')
2269 {
2270 // double l = __int64 / varnumber_T
2271 length_modifier = 'L';
2272 type++;
2273 }
2274 }
2275 fmt_spec = *type;
2276
2277 // common synonyms:
2278 switch (fmt_spec)
2279 {
2280 case 'i': fmt_spec = 'd'; break;
2281 case '*': fmt_spec = 'd'; length_modifier = 'h'; break;
2282 case 'D': fmt_spec = 'd'; length_modifier = 'l'; break;
2283 case 'U': fmt_spec = 'u'; length_modifier = 'l'; break;
2284 case 'O': fmt_spec = 'o'; length_modifier = 'l'; break;
2285 default: break;
2286 }
2287
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002288 // get parameter value, do initial processing
2289 switch (fmt_spec)
2290 {
2291 // '%' and 'c' behave similar to 's' regarding flags and field
2292 // widths
2293 case '%':
2294 return TYPE_PERCENT;
2295
2296 case 'c':
2297 return TYPE_CHAR;
2298
2299 case 's':
2300 case 'S':
2301 return TYPE_STRING;
2302
2303 case 'd': case 'u':
2304 case 'b': case 'B':
2305 case 'o':
2306 case 'x': case 'X':
2307 case 'p':
2308 {
2309 // NOTE: the u, b, o, x, X and p conversion specifiers
2310 // imply the value is unsigned; d implies a signed
2311 // value
2312
2313 // 0 if numeric argument is zero (or if pointer is
2314 // NULL for 'p'), +1 if greater than zero (or nonzero
2315 // for unsigned arguments), -1 if negative (unsigned
2316 // argument is never negative)
2317
2318 if (fmt_spec == 'p')
2319 return TYPE_POINTER;
2320 else if (fmt_spec == 'b' || fmt_spec == 'B')
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002321 return TYPE_UNSIGNEDLONGLONGINT;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002322 else if (fmt_spec == 'd')
2323 {
2324 // signed
2325 switch (length_modifier)
2326 {
2327 case '\0':
2328 case 'h':
2329 // char and short arguments are passed as int.
2330 return TYPE_INT;
2331 case 'l':
2332 return TYPE_LONGINT;
2333 case 'L':
2334 return TYPE_LONGLONGINT;
2335 }
2336 }
2337 else
2338 {
2339 // unsigned
2340 switch (length_modifier)
2341 {
2342 case '\0':
2343 case 'h':
2344 return TYPE_UNSIGNEDINT;
2345 case 'l':
2346 return TYPE_UNSIGNEDLONGINT;
2347 case 'L':
2348 return TYPE_UNSIGNEDLONGLONGINT;
2349 }
2350 }
2351 }
2352 break;
2353
2354 case 'f':
2355 case 'F':
2356 case 'e':
2357 case 'E':
2358 case 'g':
2359 case 'G':
2360 return TYPE_FLOAT;
2361 }
2362
2363 return TYPE_UNKNOWN;
2364}
2365
zeertzjq7772c932023-08-15 22:48:40 +02002366 static char *
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002367format_typename(
2368 const char *type)
2369{
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002370 switch (format_typeof(type))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002371 {
2372 case TYPE_INT:
2373 return _(typename_int);
2374
2375 case TYPE_LONGINT:
2376 return _(typename_longint);
2377
2378 case TYPE_LONGLONGINT:
2379 return _(typename_longlongint);
2380
2381 case TYPE_UNSIGNEDINT:
2382 return _(typename_unsignedint);
2383
2384 case TYPE_UNSIGNEDLONGINT:
2385 return _(typename_unsignedlongint);
2386
2387 case TYPE_UNSIGNEDLONGLONGINT:
2388 return _(typename_unsignedlonglongint);
2389
2390 case TYPE_POINTER:
2391 return _(typename_pointer);
2392
2393 case TYPE_PERCENT:
2394 return _(typename_percent);
2395
2396 case TYPE_CHAR:
2397 return _(typename_char);
2398
2399 case TYPE_STRING:
2400 return _(typename_string);
2401
2402 case TYPE_FLOAT:
2403 return _(typename_float);
2404 }
2405
2406 return _(typename_unknown);
2407}
2408
zeertzjq7772c932023-08-15 22:48:40 +02002409 static int
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002410adjust_types(
2411 const char ***ap_types,
2412 int arg,
2413 int *num_posarg,
2414 const char *type)
2415{
2416 if (*ap_types == NULL || *num_posarg < arg)
2417 {
2418 int idx;
2419 const char **new_types;
2420
2421 if (*ap_types == NULL)
2422 new_types = ALLOC_CLEAR_MULT(const char *, arg);
2423 else
K.Takata4c215ec2023-08-26 18:05:08 +02002424 new_types = vim_realloc((char **)*ap_types,
2425 arg * sizeof(const char *));
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002426
2427 if (new_types == NULL)
2428 return FAIL;
2429
2430 for (idx = *num_posarg; idx < arg; ++idx)
2431 new_types[idx] = NULL;
2432
2433 *ap_types = new_types;
2434 *num_posarg = arg;
2435 }
2436
2437 if ((*ap_types)[arg - 1] != NULL)
2438 {
2439 if ((*ap_types)[arg - 1][0] == '*' || type[0] == '*')
2440 {
2441 const char *pt = type;
2442 if (pt[0] == '*')
2443 pt = (*ap_types)[arg - 1];
2444
2445 if (pt[0] != '*')
2446 {
2447 switch (pt[0])
2448 {
2449 case 'd': case 'i': break;
2450 default:
2451 semsg(_(e_positional_num_field_spec_reused_str_str), arg, format_typename((*ap_types)[arg - 1]), format_typename(type));
2452 return FAIL;
2453 }
2454 }
2455 }
2456 else
2457 {
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002458 if (format_typeof(type) != format_typeof((*ap_types)[arg - 1]))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002459 {
2460 semsg(_( e_positional_arg_num_type_inconsistent_str_str), arg, format_typename(type), format_typename((*ap_types)[arg - 1]));
2461 return FAIL;
2462 }
2463 }
2464 }
2465
2466 (*ap_types)[arg - 1] = type;
2467
2468 return OK;
2469}
2470
zeertzjq7772c932023-08-15 22:48:40 +02002471 static int
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002472parse_fmt_types(
2473 const char ***ap_types,
2474 int *num_posarg,
2475 const char *fmt,
2476 typval_T *tvs UNUSED
2477 )
2478{
2479 const char *p = fmt;
2480 const char *arg = NULL;
2481
2482 int any_pos = 0;
2483 int any_arg = 0;
2484 int arg_idx;
2485
2486#define CHECK_POS_ARG do { \
2487 if (any_pos && any_arg) \
2488 { \
2489 semsg(_( e_cannot_mix_positional_and_non_positional_str), fmt); \
2490 goto error; \
2491 } \
2492} while (0);
2493
2494 if (p == NULL)
2495 return OK;
2496
2497 while (*p != NUL)
2498 {
2499 if (*p != '%')
2500 {
2501 char *q = strchr(p + 1, '%');
2502 size_t n = (q == NULL) ? STRLEN(p) : (size_t)(q - p);
2503
2504 p += n;
2505 }
2506 else
2507 {
2508 // allowed values: \0, h, l, L
2509 char length_modifier = '\0';
2510
2511 // variable for positional arg
2512 int pos_arg = -1;
2513 const char *ptype = NULL;
2514
2515 p++; // skip '%'
2516
2517 // First check to see if we find a positional
2518 // argument specifier
2519 ptype = p;
2520
2521 while (VIM_ISDIGIT(*ptype))
2522 ++ptype;
2523
2524 if (*ptype == '$')
2525 {
2526 if (*p == '0')
2527 {
2528 // 0 flag at the wrong place
2529 semsg(_( e_invalid_format_specifier_str), fmt);
2530 goto error;
2531 }
2532
2533 // Positional argument
2534 unsigned int uj = *p++ - '0';
2535
2536 while (VIM_ISDIGIT((int)(*p)))
2537 uj = 10 * uj + (unsigned int)(*p++ - '0');
2538 pos_arg = uj;
2539
2540 any_pos = 1;
2541 CHECK_POS_ARG;
2542
2543 ++p;
2544 }
2545
2546 // parse flags
2547 while (*p == '0' || *p == '-' || *p == '+' || *p == ' '
2548 || *p == '#' || *p == '\'')
2549 {
2550 switch (*p)
2551 {
2552 case '0': break;
2553 case '-': break;
2554 case '+': break;
2555 case ' ': // If both the ' ' and '+' flags appear, the ' '
2556 // flag should be ignored
2557 break;
2558 case '#': break;
2559 case '\'': break;
2560 }
2561 p++;
2562 }
2563 // If the '0' and '-' flags both appear, the '0' flag should be
2564 // ignored.
2565
2566 // parse field width
2567 if (*(arg = p) == '*')
2568 {
2569 p++;
2570
2571 if (VIM_ISDIGIT((int)(*p)))
2572 {
2573 // Positional argument field width
2574 unsigned int uj = *p++ - '0';
2575
2576 while (VIM_ISDIGIT((int)(*p)))
2577 uj = 10 * uj + (unsigned int)(*p++ - '0');
2578
2579 if (*p != '$')
2580 {
2581 semsg(_( e_invalid_format_specifier_str), fmt);
2582 goto error;
2583 }
2584 else
2585 {
2586 ++p;
2587 any_pos = 1;
2588 CHECK_POS_ARG;
2589
2590 if (adjust_types(ap_types, uj, num_posarg, arg) == FAIL)
2591 goto error;
2592 }
2593 }
2594 else
2595 {
2596 any_arg = 1;
2597 CHECK_POS_ARG;
2598 }
2599 }
dundargoc580c1fc2023-10-06 19:41:14 +02002600 else if (VIM_ISDIGIT((int)(*p)))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002601 {
2602 // size_t could be wider than unsigned int; make sure we treat
2603 // argument like common implementations do
2604 unsigned int uj = *p++ - '0';
2605
2606 while (VIM_ISDIGIT((int)(*p)))
2607 uj = 10 * uj + (unsigned int)(*p++ - '0');
2608
2609 if (*p == '$')
2610 {
2611 semsg(_( e_invalid_format_specifier_str), fmt);
2612 goto error;
2613 }
2614 }
2615
2616 // parse precision
2617 if (*p == '.')
2618 {
2619 p++;
2620
2621 if (*(arg = p) == '*')
2622 {
2623 p++;
2624
2625 if (VIM_ISDIGIT((int)(*p)))
2626 {
2627 // Parse precision
2628 unsigned int uj = *p++ - '0';
2629
2630 while (VIM_ISDIGIT((int)(*p)))
2631 uj = 10 * uj + (unsigned int)(*p++ - '0');
2632
2633 if (*p == '$')
2634 {
2635 any_pos = 1;
2636 CHECK_POS_ARG;
2637
2638 ++p;
2639
2640 if (adjust_types(ap_types, uj, num_posarg, arg) == FAIL)
2641 goto error;
2642 }
2643 else
2644 {
2645 semsg(_( e_invalid_format_specifier_str), fmt);
2646 goto error;
2647 }
2648 }
2649 else
2650 {
2651 any_arg = 1;
2652 CHECK_POS_ARG;
2653 }
2654 }
dundargoc580c1fc2023-10-06 19:41:14 +02002655 else if (VIM_ISDIGIT((int)(*p)))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002656 {
2657 // size_t could be wider than unsigned int; make sure we
2658 // treat argument like common implementations do
2659 unsigned int uj = *p++ - '0';
2660
2661 while (VIM_ISDIGIT((int)(*p)))
2662 uj = 10 * uj + (unsigned int)(*p++ - '0');
2663
2664 if (*p == '$')
2665 {
2666 semsg(_( e_invalid_format_specifier_str), fmt);
2667 goto error;
2668 }
2669 }
2670 }
2671
2672 if (pos_arg != -1)
2673 {
2674 any_pos = 1;
2675 CHECK_POS_ARG;
2676
2677 ptype = p;
2678 }
2679
2680 // parse 'h', 'l' and 'll' length modifiers
2681 if (*p == 'h' || *p == 'l')
2682 {
2683 length_modifier = *p;
2684 p++;
2685 if (length_modifier == 'l' && *p == 'l')
2686 {
2687 // double l = __int64 / varnumber_T
dundargoc580c1fc2023-10-06 19:41:14 +02002688 // length_modifier = 'L';
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002689 p++;
2690 }
2691 }
2692
2693 switch (*p)
2694 {
2695 // Check for known format specifiers. % is special!
2696 case 'i':
2697 case '*':
2698 case 'd':
2699 case 'u':
2700 case 'o':
2701 case 'D':
2702 case 'U':
2703 case 'O':
2704 case 'x':
2705 case 'X':
2706 case 'b':
2707 case 'B':
2708 case 'c':
2709 case 's':
2710 case 'S':
2711 case 'p':
2712 case 'f':
2713 case 'F':
2714 case 'e':
2715 case 'E':
2716 case 'g':
2717 case 'G':
2718 if (pos_arg != -1)
2719 {
2720 if (adjust_types(ap_types, pos_arg, num_posarg, ptype) == FAIL)
2721 goto error;
2722 }
2723 else
2724 {
2725 any_arg = 1;
2726 CHECK_POS_ARG;
2727 }
2728 break;
2729
2730 default:
2731 if (pos_arg != -1)
2732 {
2733 semsg(_( e_cannot_mix_positional_and_non_positional_str), fmt);
2734 goto error;
2735 }
2736 }
2737
2738 if (*p != NUL)
2739 p++; // step over the just processed conversion specifier
2740 }
2741 }
2742
2743 for (arg_idx = 0; arg_idx < *num_posarg; ++arg_idx)
2744 {
2745 if ((*ap_types)[arg_idx] == NULL)
2746 {
2747 semsg(_(e_fmt_arg_nr_unused_str), arg_idx + 1, fmt);
2748 goto error;
2749 }
2750
2751# if defined(FEAT_EVAL)
2752 if (tvs != NULL && tvs[arg_idx].v_type == VAR_UNKNOWN)
2753 {
2754 semsg(_(e_positional_nr_out_of_bounds_str), arg_idx + 1, fmt);
2755 goto error;
2756 }
2757# endif
2758 }
2759
2760 return OK;
2761
2762error:
K.Takata4c215ec2023-08-26 18:05:08 +02002763 vim_free((char**)*ap_types);
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002764 *ap_types = NULL;
2765 *num_posarg = 0;
2766 return FAIL;
2767}
2768
zeertzjq7772c932023-08-15 22:48:40 +02002769 static void
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002770skip_to_arg(
2771 const char **ap_types,
2772 va_list ap_start,
2773 va_list *ap,
2774 int *arg_idx,
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002775 int *arg_cur,
2776 const char *fmt)
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002777{
2778 int arg_min = 0;
2779
2780 if (*arg_cur + 1 == *arg_idx)
2781 {
2782 ++*arg_cur;
2783 ++*arg_idx;
2784 return;
2785 }
2786
2787 if (*arg_cur >= *arg_idx)
2788 {
2789 // Reset ap to ap_start and skip arg_idx - 1 types
2790 va_end(*ap);
2791 va_copy(*ap, ap_start);
2792 }
2793 else
2794 {
2795 // Skip over any we should skip
2796 arg_min = *arg_cur;
2797 }
2798
2799 for (*arg_cur = arg_min; *arg_cur < *arg_idx - 1; ++*arg_cur)
2800 {
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002801 const char *p;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002802
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002803 if (ap_types == NULL || ap_types[*arg_cur] == NULL)
2804 {
Christ van Willegen1bd2cb12023-09-08 19:17:09 +02002805 siemsg(e_aptypes_is_null_nr_str, *arg_cur, fmt);
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002806 return;
2807 }
2808
2809 p = ap_types[*arg_cur];
2810
2811 int fmt_type = format_typeof(p);
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002812
2813 // get parameter value, do initial processing
2814 switch (fmt_type)
2815 {
2816 case TYPE_PERCENT:
2817 case TYPE_UNKNOWN:
2818 break;
2819
2820 case TYPE_CHAR:
2821 va_arg(*ap, int);
2822 break;
2823
2824 case TYPE_STRING:
2825 va_arg(*ap, char *);
2826 break;
2827
2828 case TYPE_POINTER:
2829 va_arg(*ap, void *);
2830 break;
2831
2832 case TYPE_INT:
2833 va_arg(*ap, int);
2834 break;
2835
2836 case TYPE_LONGINT:
2837 va_arg(*ap, long int);
2838 break;
2839
2840 case TYPE_LONGLONGINT:
2841 va_arg(*ap, varnumber_T);
2842 break;
2843
2844 case TYPE_UNSIGNEDINT:
2845 va_arg(*ap, unsigned int);
2846 break;
2847
2848 case TYPE_UNSIGNEDLONGINT:
2849 va_arg(*ap, unsigned long int);
2850 break;
2851
2852 case TYPE_UNSIGNEDLONGLONGINT:
2853 va_arg(*ap, uvarnumber_T);
2854 break;
2855
2856 case TYPE_FLOAT:
2857 va_arg(*ap, double);
2858 break;
2859 }
2860 }
2861
2862 // Because we know that after we return from this call,
2863 // a va_arg() call is made, we can pre-emptively
2864 // increment the current argument index.
2865 ++*arg_cur;
2866 ++*arg_idx;
2867
2868 return;
2869}
2870
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002871 int
2872vim_vsnprintf_typval(
2873 char *str,
2874 size_t str_m,
2875 const char *fmt,
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002876 va_list ap_start,
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002877 typval_T *tvs)
2878{
2879 size_t str_l = 0;
2880 const char *p = fmt;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002881 int arg_cur = 0;
2882 int num_posarg = 0;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002883 int arg_idx = 1;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002884 va_list ap;
2885 const char **ap_types = NULL;
2886
2887 if (parse_fmt_types(&ap_types, &num_posarg, fmt, tvs) == FAIL)
2888 return 0;
2889
2890 va_copy(ap, ap_start);
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002891
2892 if (p == NULL)
2893 p = "";
2894 while (*p != NUL)
2895 {
2896 if (*p != '%')
2897 {
2898 char *q = strchr(p + 1, '%');
2899 size_t n = (q == NULL) ? STRLEN(p) : (size_t)(q - p);
2900
2901 // Copy up to the next '%' or NUL without any changes.
2902 if (str_l < str_m)
2903 {
2904 size_t avail = str_m - str_l;
2905
2906 mch_memmove(str + str_l, p, n > avail ? avail : n);
2907 }
2908 p += n;
2909 str_l += n;
2910 }
2911 else
2912 {
2913 size_t min_field_width = 0, precision = 0;
2914 int zero_padding = 0, precision_specified = 0, justify_left = 0;
2915 int alternate_form = 0, force_sign = 0;
2916
2917 // If both the ' ' and '+' flags appear, the ' ' flag should be
2918 // ignored.
2919 int space_for_positive = 1;
2920
2921 // allowed values: \0, h, l, L
2922 char length_modifier = '\0';
2923
2924 // temporary buffer for simple numeric->string conversion
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01002925# define TMP_LEN 350 // On my system 1e308 is the biggest number possible.
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002926 // That sounds reasonable to use as the maximum
2927 // printable.
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002928 char tmp[TMP_LEN];
2929
2930 // string address in case of string argument
2931 const char *str_arg = NULL;
2932
2933 // natural field width of arg without padding and sign
2934 size_t str_arg_l;
2935
2936 // unsigned char argument value - only defined for c conversion.
2937 // N.B. standard explicitly states the char argument for the c
2938 // conversion is unsigned
2939 unsigned char uchar_arg;
2940
2941 // number of zeros to be inserted for numeric conversions as
2942 // required by the precision or minimal field width
2943 size_t number_of_zeros_to_pad = 0;
2944
2945 // index into tmp where zero padding is to be inserted
2946 size_t zero_padding_insertion_ind = 0;
2947
2948 // current conversion specifier character
2949 char fmt_spec = '\0';
2950
2951 // buffer for 's' and 'S' specs
2952 char_u *tofree = NULL;
2953
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002954 // variables for positional arg
2955 int pos_arg = -1;
2956 const char *ptype;
2957
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002958
2959 p++; // skip '%'
2960
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002961 // First check to see if we find a positional
2962 // argument specifier
2963 ptype = p;
2964
2965 while (VIM_ISDIGIT(*ptype))
2966 ++ptype;
2967
2968 if (*ptype == '$')
2969 {
2970 // Positional argument
2971 unsigned int uj = *p++ - '0';
2972
2973 while (VIM_ISDIGIT((int)(*p)))
2974 uj = 10 * uj + (unsigned int)(*p++ - '0');
2975 pos_arg = uj;
2976
2977 ++p;
2978 }
2979
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002980 // parse flags
2981 while (*p == '0' || *p == '-' || *p == '+' || *p == ' '
2982 || *p == '#' || *p == '\'')
2983 {
2984 switch (*p)
2985 {
2986 case '0': zero_padding = 1; break;
2987 case '-': justify_left = 1; break;
2988 case '+': force_sign = 1; space_for_positive = 0; break;
2989 case ' ': force_sign = 1;
2990 // If both the ' ' and '+' flags appear, the ' '
2991 // flag should be ignored
2992 break;
2993 case '#': alternate_form = 1; break;
2994 case '\'': break;
2995 }
2996 p++;
2997 }
2998 // If the '0' and '-' flags both appear, the '0' flag should be
2999 // ignored.
3000
3001 // parse field width
3002 if (*p == '*')
3003 {
3004 int j;
3005
3006 p++;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003007
3008 if (VIM_ISDIGIT((int)(*p)))
3009 {
3010 // Positional argument field width
3011 unsigned int uj = *p++ - '0';
3012
3013 while (VIM_ISDIGIT((int)(*p)))
3014 uj = 10 * uj + (unsigned int)(*p++ - '0');
3015 arg_idx = uj;
3016
3017 ++p;
3018 }
3019
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003020 j =
3021# if defined(FEAT_EVAL)
3022 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3023# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003024 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3025 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003026 va_arg(ap, int));
3027
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003028 if (j >= 0)
3029 min_field_width = j;
3030 else
3031 {
3032 min_field_width = -j;
3033 justify_left = 1;
3034 }
3035 }
3036 else if (VIM_ISDIGIT((int)(*p)))
3037 {
3038 // size_t could be wider than unsigned int; make sure we treat
3039 // argument like common implementations do
3040 unsigned int uj = *p++ - '0';
3041
3042 while (VIM_ISDIGIT((int)(*p)))
3043 uj = 10 * uj + (unsigned int)(*p++ - '0');
3044 min_field_width = uj;
3045 }
3046
3047 // parse precision
3048 if (*p == '.')
3049 {
3050 p++;
3051 precision_specified = 1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003052
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003053 if (VIM_ISDIGIT((int)(*p)))
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003054 {
3055 // size_t could be wider than unsigned int; make sure we
3056 // treat argument like common implementations do
3057 unsigned int uj = *p++ - '0';
3058
3059 while (VIM_ISDIGIT((int)(*p)))
3060 uj = 10 * uj + (unsigned int)(*p++ - '0');
3061 precision = uj;
3062 }
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003063 else if (*p == '*')
3064 {
3065 int j;
3066
3067 p++;
3068
3069 if (VIM_ISDIGIT((int)(*p)))
3070 {
3071 // positional argument
3072 unsigned int uj = *p++ - '0';
3073
3074 while (VIM_ISDIGIT((int)(*p)))
3075 uj = 10 * uj + (unsigned int)(*p++ - '0');
3076 arg_idx = uj;
3077
3078 ++p;
3079 }
3080
3081 j =
3082# if defined(FEAT_EVAL)
3083 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3084# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003085 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3086 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003087 va_arg(ap, int));
3088
3089 if (j >= 0)
3090 precision = j;
3091 else
3092 {
3093 precision_specified = 0;
3094 precision = 0;
3095 }
3096 }
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003097 }
3098
3099 // parse 'h', 'l' and 'll' length modifiers
3100 if (*p == 'h' || *p == 'l')
3101 {
3102 length_modifier = *p;
3103 p++;
3104 if (length_modifier == 'l' && *p == 'l')
3105 {
3106 // double l = __int64 / varnumber_T
3107 length_modifier = 'L';
3108 p++;
3109 }
3110 }
3111 fmt_spec = *p;
3112
3113 // common synonyms:
3114 switch (fmt_spec)
3115 {
3116 case 'i': fmt_spec = 'd'; break;
3117 case 'D': fmt_spec = 'd'; length_modifier = 'l'; break;
3118 case 'U': fmt_spec = 'u'; length_modifier = 'l'; break;
3119 case 'O': fmt_spec = 'o'; length_modifier = 'l'; break;
3120 default: break;
3121 }
3122
3123# if defined(FEAT_EVAL)
3124 switch (fmt_spec)
3125 {
3126 case 'd': case 'u': case 'o': case 'x': case 'X':
3127 if (tvs != NULL && length_modifier == '\0')
3128 length_modifier = 'L';
3129 }
3130# endif
3131
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003132 if (pos_arg != -1)
3133 arg_idx = pos_arg;
3134
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003135 // get parameter value, do initial processing
3136 switch (fmt_spec)
3137 {
3138 // '%' and 'c' behave similar to 's' regarding flags and field
3139 // widths
3140 case '%':
3141 case 'c':
3142 case 's':
3143 case 'S':
3144 str_arg_l = 1;
3145 switch (fmt_spec)
3146 {
3147 case '%':
3148 str_arg = p;
3149 break;
3150
3151 case 'c':
3152 {
3153 int j;
3154
3155 j =
3156# if defined(FEAT_EVAL)
3157 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3158# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003159 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3160 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003161 va_arg(ap, int));
3162
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003163 // standard demands unsigned char
3164 uchar_arg = (unsigned char)j;
3165 str_arg = (char *)&uchar_arg;
3166 break;
3167 }
3168
3169 case 's':
3170 case 'S':
3171 str_arg =
3172# if defined(FEAT_EVAL)
3173 tvs != NULL ? tv_str(tvs, &arg_idx, &tofree) :
3174# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003175 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3176 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003177 va_arg(ap, char *));
3178
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003179 if (str_arg == NULL)
3180 {
3181 str_arg = "[NULL]";
3182 str_arg_l = 6;
3183 }
3184 // make sure not to address string beyond the specified
3185 // precision !!!
3186 else if (!precision_specified)
3187 str_arg_l = strlen(str_arg);
3188 // truncate string if necessary as requested by precision
3189 else if (precision == 0)
3190 str_arg_l = 0;
3191 else
3192 {
3193 // Don't put the #if inside memchr(), it can be a
3194 // macro.
3195 // memchr on HP does not like n > 2^31 !!!
3196 char *q = memchr(str_arg, '\0',
3197 precision <= (size_t)0x7fffffffL ? precision
3198 : (size_t)0x7fffffffL);
presukud85fccd2021-11-20 19:38:31 +00003199
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003200 str_arg_l = (q == NULL) ? precision
3201 : (size_t)(q - str_arg);
3202 }
3203 if (fmt_spec == 'S')
3204 {
presuku1f2453f2021-11-24 15:32:57 +00003205 char_u *p1;
3206 size_t i;
3207 int cell;
presukud85fccd2021-11-20 19:38:31 +00003208
presuku1f2453f2021-11-24 15:32:57 +00003209 for (i = 0, p1 = (char_u *)str_arg; *p1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003210 p1 += mb_ptr2len(p1))
presuku1f2453f2021-11-24 15:32:57 +00003211 {
3212 cell = mb_ptr2cells(p1);
3213 if (precision_specified && i + cell > precision)
3214 break;
3215 i += cell;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003216 }
presuku1f2453f2021-11-24 15:32:57 +00003217
3218 str_arg_l = p1 - (char_u *)str_arg;
presukud85fccd2021-11-20 19:38:31 +00003219 if (min_field_width != 0)
presuku1f2453f2021-11-24 15:32:57 +00003220 min_field_width += str_arg_l - i;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003221 }
3222 break;
3223
3224 default:
3225 break;
3226 }
3227 break;
3228
3229 case 'd': case 'u':
3230 case 'b': case 'B':
3231 case 'o':
3232 case 'x': case 'X':
3233 case 'p':
3234 {
3235 // NOTE: the u, b, o, x, X and p conversion specifiers
3236 // imply the value is unsigned; d implies a signed
3237 // value
3238
3239 // 0 if numeric argument is zero (or if pointer is
3240 // NULL for 'p'), +1 if greater than zero (or nonzero
3241 // for unsigned arguments), -1 if negative (unsigned
3242 // argument is never negative)
3243 int arg_sign = 0;
3244
3245 // only set for length modifier h, or for no length
3246 // modifiers
3247 int int_arg = 0;
3248 unsigned int uint_arg = 0;
3249
3250 // only set for length modifier l
3251 long int long_arg = 0;
3252 unsigned long int ulong_arg = 0;
3253
3254 // only set for length modifier ll
3255 varnumber_T llong_arg = 0;
3256 uvarnumber_T ullong_arg = 0;
3257
3258 // only set for b conversion
3259 uvarnumber_T bin_arg = 0;
3260
3261 // pointer argument value -only defined for p
3262 // conversion
3263 void *ptr_arg = NULL;
3264
3265 if (fmt_spec == 'p')
3266 {
3267 length_modifier = '\0';
3268 ptr_arg =
3269# if defined(FEAT_EVAL)
3270 tvs != NULL ? (void *)tv_str(tvs, &arg_idx,
3271 NULL) :
3272# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003273 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3274 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003275 va_arg(ap, void *));
3276
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003277 if (ptr_arg != NULL)
3278 arg_sign = 1;
3279 }
3280 else if (fmt_spec == 'b' || fmt_spec == 'B')
3281 {
3282 bin_arg =
3283# if defined(FEAT_EVAL)
3284 tvs != NULL ?
3285 (uvarnumber_T)tv_nr(tvs, &arg_idx) :
3286# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003287 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3288 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003289 va_arg(ap, uvarnumber_T));
3290
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003291 if (bin_arg != 0)
3292 arg_sign = 1;
3293 }
3294 else if (fmt_spec == 'd')
3295 {
3296 // signed
3297 switch (length_modifier)
3298 {
3299 case '\0':
3300 case 'h':
3301 // char and short arguments are passed as int.
3302 int_arg =
3303# if defined(FEAT_EVAL)
3304 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3305# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003306 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3307 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003308 va_arg(ap, int));
3309
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003310 if (int_arg > 0)
3311 arg_sign = 1;
3312 else if (int_arg < 0)
3313 arg_sign = -1;
3314 break;
3315 case 'l':
3316 long_arg =
3317# if defined(FEAT_EVAL)
3318 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3319# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003320 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3321 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003322 va_arg(ap, long int));
3323
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003324 if (long_arg > 0)
3325 arg_sign = 1;
3326 else if (long_arg < 0)
3327 arg_sign = -1;
3328 break;
3329 case 'L':
3330 llong_arg =
3331# if defined(FEAT_EVAL)
3332 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3333# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003334 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3335 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003336 va_arg(ap, varnumber_T));
3337
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003338 if (llong_arg > 0)
3339 arg_sign = 1;
3340 else if (llong_arg < 0)
3341 arg_sign = -1;
3342 break;
3343 }
3344 }
3345 else
3346 {
3347 // unsigned
3348 switch (length_modifier)
3349 {
3350 case '\0':
3351 case 'h':
3352 uint_arg =
3353# if defined(FEAT_EVAL)
3354 tvs != NULL ? (unsigned)
3355 tv_nr(tvs, &arg_idx) :
3356# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003357 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3358 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003359 va_arg(ap, unsigned int));
3360
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003361 if (uint_arg != 0)
3362 arg_sign = 1;
3363 break;
3364 case 'l':
3365 ulong_arg =
3366# if defined(FEAT_EVAL)
3367 tvs != NULL ? (unsigned long)
3368 tv_nr(tvs, &arg_idx) :
3369# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003370 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3371 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003372 va_arg(ap, unsigned long int));
3373
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003374 if (ulong_arg != 0)
3375 arg_sign = 1;
3376 break;
3377 case 'L':
3378 ullong_arg =
3379# if defined(FEAT_EVAL)
3380 tvs != NULL ? (uvarnumber_T)
3381 tv_nr(tvs, &arg_idx) :
3382# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003383 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3384 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003385 va_arg(ap, uvarnumber_T));
3386
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003387 if (ullong_arg != 0)
3388 arg_sign = 1;
3389 break;
3390 }
3391 }
3392
3393 str_arg = tmp;
3394 str_arg_l = 0;
3395
3396 // NOTE:
3397 // For d, i, u, o, x, and X conversions, if precision is
3398 // specified, the '0' flag should be ignored. This is so
3399 // with Solaris 2.6, Digital UNIX 4.0, HPUX 10, Linux,
3400 // FreeBSD, NetBSD; but not with Perl.
3401 if (precision_specified)
3402 zero_padding = 0;
3403 if (fmt_spec == 'd')
3404 {
3405 if (force_sign && arg_sign >= 0)
3406 tmp[str_arg_l++] = space_for_positive ? ' ' : '+';
3407 // leave negative numbers for sprintf to handle, to
3408 // avoid handling tricky cases like (short int)-32768
3409 }
3410 else if (alternate_form)
3411 {
3412 if (arg_sign != 0
3413 && (fmt_spec == 'b' || fmt_spec == 'B'
3414 || fmt_spec == 'x' || fmt_spec == 'X') )
3415 {
3416 tmp[str_arg_l++] = '0';
3417 tmp[str_arg_l++] = fmt_spec;
3418 }
3419 // alternate form should have no effect for p
3420 // conversion, but ...
3421 }
3422
3423 zero_padding_insertion_ind = str_arg_l;
3424 if (!precision_specified)
3425 precision = 1; // default precision is 1
3426 if (precision == 0 && arg_sign == 0)
3427 {
3428 // When zero value is formatted with an explicit
3429 // precision 0, the resulting formatted string is
3430 // empty (d, i, u, b, B, o, x, X, p).
3431 }
3432 else
3433 {
3434 char f[6];
3435 int f_l = 0;
3436
3437 // construct a simple format string for sprintf
3438 f[f_l++] = '%';
3439 if (!length_modifier)
3440 ;
3441 else if (length_modifier == 'L')
3442 {
3443# ifdef MSWIN
3444 f[f_l++] = 'I';
3445 f[f_l++] = '6';
3446 f[f_l++] = '4';
3447# else
3448 f[f_l++] = 'l';
3449 f[f_l++] = 'l';
3450# endif
3451 }
3452 else
3453 f[f_l++] = length_modifier;
3454 f[f_l++] = fmt_spec;
3455 f[f_l++] = '\0';
3456
3457 if (fmt_spec == 'p')
3458 str_arg_l += sprintf(tmp + str_arg_l, f, ptr_arg);
3459 else if (fmt_spec == 'b' || fmt_spec == 'B')
3460 {
3461 char b[8 * sizeof(uvarnumber_T)];
3462 size_t b_l = 0;
3463 uvarnumber_T bn = bin_arg;
3464
3465 do
3466 {
3467 b[sizeof(b) - ++b_l] = '0' + (bn & 0x1);
3468 bn >>= 1;
3469 }
3470 while (bn != 0);
3471
3472 memcpy(tmp + str_arg_l, b + sizeof(b) - b_l, b_l);
3473 str_arg_l += b_l;
3474 }
3475 else if (fmt_spec == 'd')
3476 {
3477 // signed
3478 switch (length_modifier)
3479 {
3480 case '\0': str_arg_l += sprintf(
3481 tmp + str_arg_l, f,
3482 int_arg);
3483 break;
3484 case 'h': str_arg_l += sprintf(
3485 tmp + str_arg_l, f,
3486 (short)int_arg);
3487 break;
3488 case 'l': str_arg_l += sprintf(
3489 tmp + str_arg_l, f, long_arg);
3490 break;
3491 case 'L': str_arg_l += sprintf(
3492 tmp + str_arg_l, f, llong_arg);
3493 break;
3494 }
3495 }
3496 else
3497 {
3498 // unsigned
3499 switch (length_modifier)
3500 {
3501 case '\0': str_arg_l += sprintf(
3502 tmp + str_arg_l, f,
3503 uint_arg);
3504 break;
3505 case 'h': str_arg_l += sprintf(
3506 tmp + str_arg_l, f,
3507 (unsigned short)uint_arg);
3508 break;
3509 case 'l': str_arg_l += sprintf(
3510 tmp + str_arg_l, f, ulong_arg);
3511 break;
3512 case 'L': str_arg_l += sprintf(
3513 tmp + str_arg_l, f, ullong_arg);
3514 break;
3515 }
3516 }
3517
3518 // include the optional minus sign and possible
3519 // "0x" in the region before the zero padding
3520 // insertion point
3521 if (zero_padding_insertion_ind < str_arg_l
3522 && tmp[zero_padding_insertion_ind] == '-')
3523 zero_padding_insertion_ind++;
3524 if (zero_padding_insertion_ind + 1 < str_arg_l
3525 && tmp[zero_padding_insertion_ind] == '0'
3526 && (tmp[zero_padding_insertion_ind + 1] == 'x'
3527 || tmp[zero_padding_insertion_ind + 1] == 'X'))
3528 zero_padding_insertion_ind += 2;
3529 }
3530
3531 {
3532 size_t num_of_digits = str_arg_l
3533 - zero_padding_insertion_ind;
3534
3535 if (alternate_form && fmt_spec == 'o'
3536 // unless zero is already the first
3537 // character
3538 && !(zero_padding_insertion_ind < str_arg_l
3539 && tmp[zero_padding_insertion_ind] == '0'))
3540 {
3541 // assure leading zero for alternate-form
3542 // octal numbers
3543 if (!precision_specified
3544 || precision < num_of_digits + 1)
3545 {
3546 // precision is increased to force the
3547 // first character to be zero, except if a
3548 // zero value is formatted with an
3549 // explicit precision of zero
3550 precision = num_of_digits + 1;
3551 }
3552 }
3553 // zero padding to specified precision?
3554 if (num_of_digits < precision)
3555 number_of_zeros_to_pad = precision - num_of_digits;
3556 }
3557 // zero padding to specified minimal field width?
3558 if (!justify_left && zero_padding)
3559 {
3560 int n = (int)(min_field_width - (str_arg_l
3561 + number_of_zeros_to_pad));
3562 if (n > 0)
3563 number_of_zeros_to_pad += n;
3564 }
3565 break;
3566 }
3567
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003568 case 'f':
3569 case 'F':
3570 case 'e':
3571 case 'E':
3572 case 'g':
3573 case 'G':
3574 {
3575 // Floating point.
3576 double f;
3577 double abs_f;
3578 char format[40];
3579 int l;
3580 int remove_trailing_zeroes = FALSE;
3581
3582 f =
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003583# if defined(FEAT_EVAL)
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003584 tvs != NULL ? tv_float(tvs, &arg_idx) :
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003585# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003586 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3587 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003588 va_arg(ap, double));
3589
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003590 abs_f = f < 0 ? -f : f;
3591
3592 if (fmt_spec == 'g' || fmt_spec == 'G')
3593 {
3594 // Would be nice to use %g directly, but it prints
3595 // "1.0" as "1", we don't want that.
3596 if ((abs_f >= 0.001 && abs_f < 10000000.0)
3597 || abs_f == 0.0)
3598 fmt_spec = ASCII_ISUPPER(fmt_spec) ? 'F' : 'f';
3599 else
3600 fmt_spec = fmt_spec == 'g' ? 'e' : 'E';
3601 remove_trailing_zeroes = TRUE;
3602 }
3603
3604 if ((fmt_spec == 'f' || fmt_spec == 'F') &&
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003605# ifdef VAX
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003606 abs_f > 1.0e38
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003607# else
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003608 abs_f > 1.0e307
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003609# endif
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003610 )
3611 {
3612 // Avoid a buffer overflow
3613 STRCPY(tmp, infinity_str(f > 0.0, fmt_spec,
3614 force_sign, space_for_positive));
3615 str_arg_l = STRLEN(tmp);
3616 zero_padding = 0;
3617 }
3618 else
3619 {
3620 if (isnan(f))
3621 {
3622 // Not a number: nan or NAN
3623 STRCPY(tmp, ASCII_ISUPPER(fmt_spec) ? "NAN"
3624 : "nan");
3625 str_arg_l = 3;
3626 zero_padding = 0;
3627 }
3628 else if (isinf(f))
3629 {
3630 STRCPY(tmp, infinity_str(f > 0.0, fmt_spec,
3631 force_sign, space_for_positive));
3632 str_arg_l = STRLEN(tmp);
3633 zero_padding = 0;
3634 }
3635 else
3636 {
3637 // Regular float number
3638 format[0] = '%';
3639 l = 1;
3640 if (force_sign)
3641 format[l++] = space_for_positive ? ' ' : '+';
3642 if (precision_specified)
3643 {
3644 size_t max_prec = TMP_LEN - 10;
3645
3646 // Make sure we don't get more digits than we
3647 // have room for.
3648 if ((fmt_spec == 'f' || fmt_spec == 'F')
3649 && abs_f > 1.0)
3650 max_prec -= (size_t)log10(abs_f);
3651 if (precision > max_prec)
3652 precision = max_prec;
3653 l += sprintf(format + l, ".%d", (int)precision);
3654 }
3655 format[l] = fmt_spec == 'F' ? 'f' : fmt_spec;
3656 format[l + 1] = NUL;
3657
3658 str_arg_l = sprintf(tmp, format, f);
3659 }
3660
3661 if (remove_trailing_zeroes)
3662 {
3663 int i;
3664 char *tp;
3665
3666 // Using %g or %G: remove superfluous zeroes.
3667 if (fmt_spec == 'f' || fmt_spec == 'F')
3668 tp = tmp + str_arg_l - 1;
3669 else
3670 {
3671 tp = (char *)vim_strchr((char_u *)tmp,
3672 fmt_spec == 'e' ? 'e' : 'E');
3673 if (tp != NULL)
3674 {
3675 // Remove superfluous '+' and leading
3676 // zeroes from the exponent.
3677 if (tp[1] == '+')
3678 {
3679 // Change "1.0e+07" to "1.0e07"
3680 STRMOVE(tp + 1, tp + 2);
3681 --str_arg_l;
3682 }
3683 i = (tp[1] == '-') ? 2 : 1;
3684 while (tp[i] == '0')
3685 {
3686 // Change "1.0e07" to "1.0e7"
3687 STRMOVE(tp + i, tp + i + 1);
3688 --str_arg_l;
3689 }
3690 --tp;
3691 }
3692 }
3693
3694 if (tp != NULL && !precision_specified)
3695 // Remove trailing zeroes, but keep the one
3696 // just after a dot.
3697 while (tp > tmp + 2 && *tp == '0'
3698 && tp[-1] != '.')
3699 {
3700 STRMOVE(tp, tp + 1);
3701 --tp;
3702 --str_arg_l;
3703 }
3704 }
3705 else
3706 {
3707 char *tp;
3708
3709 // Be consistent: some printf("%e") use 1.0e+12
3710 // and some 1.0e+012. Remove one zero in the last
3711 // case.
3712 tp = (char *)vim_strchr((char_u *)tmp,
3713 fmt_spec == 'e' ? 'e' : 'E');
3714 if (tp != NULL && (tp[1] == '+' || tp[1] == '-')
3715 && tp[2] == '0'
3716 && vim_isdigit(tp[3])
3717 && vim_isdigit(tp[4]))
3718 {
3719 STRMOVE(tp + 2, tp + 3);
3720 --str_arg_l;
3721 }
3722 }
3723 }
3724 if (zero_padding && min_field_width > str_arg_l
3725 && (tmp[0] == '-' || force_sign))
3726 {
3727 // padding 0's should be inserted after the sign
3728 number_of_zeros_to_pad = min_field_width - str_arg_l;
3729 zero_padding_insertion_ind = 1;
3730 }
3731 str_arg = tmp;
3732 break;
3733 }
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003734
3735 default:
3736 // unrecognized conversion specifier, keep format string
3737 // as-is
3738 zero_padding = 0; // turn zero padding off for non-numeric
3739 // conversion
3740 justify_left = 1;
3741 min_field_width = 0; // reset flags
3742
3743 // discard the unrecognized conversion, just keep *
3744 // the unrecognized conversion character
3745 str_arg = p;
3746 str_arg_l = 0;
3747 if (*p != NUL)
3748 str_arg_l++; // include invalid conversion specifier
3749 // unchanged if not at end-of-string
3750 break;
3751 }
3752
3753 if (*p != NUL)
3754 p++; // step over the just processed conversion specifier
3755
3756 // insert padding to the left as requested by min_field_width;
3757 // this does not include the zero padding in case of numerical
3758 // conversions
3759 if (!justify_left)
3760 {
3761 // left padding with blank or zero
3762 int pn = (int)(min_field_width - (str_arg_l + number_of_zeros_to_pad));
3763
3764 if (pn > 0)
3765 {
3766 if (str_l < str_m)
3767 {
3768 size_t avail = str_m - str_l;
3769
3770 vim_memset(str + str_l, zero_padding ? '0' : ' ',
3771 (size_t)pn > avail ? avail
3772 : (size_t)pn);
3773 }
3774 str_l += pn;
3775 }
3776 }
3777
3778 // zero padding as requested by the precision or by the minimal
3779 // field width for numeric conversions required?
3780 if (number_of_zeros_to_pad == 0)
3781 {
3782 // will not copy first part of numeric right now, *
3783 // force it to be copied later in its entirety
3784 zero_padding_insertion_ind = 0;
3785 }
3786 else
3787 {
3788 // insert first part of numerics (sign or '0x') before zero
3789 // padding
3790 int zn = (int)zero_padding_insertion_ind;
3791
3792 if (zn > 0)
3793 {
3794 if (str_l < str_m)
3795 {
3796 size_t avail = str_m - str_l;
3797
3798 mch_memmove(str + str_l, str_arg,
3799 (size_t)zn > avail ? avail
3800 : (size_t)zn);
3801 }
3802 str_l += zn;
3803 }
3804
3805 // insert zero padding as requested by the precision or min
3806 // field width
3807 zn = (int)number_of_zeros_to_pad;
3808 if (zn > 0)
3809 {
3810 if (str_l < str_m)
3811 {
3812 size_t avail = str_m - str_l;
3813
3814 vim_memset(str + str_l, '0',
3815 (size_t)zn > avail ? avail
3816 : (size_t)zn);
3817 }
3818 str_l += zn;
3819 }
3820 }
3821
3822 // insert formatted string
3823 // (or as-is conversion specifier for unknown conversions)
3824 {
3825 int sn = (int)(str_arg_l - zero_padding_insertion_ind);
3826
3827 if (sn > 0)
3828 {
3829 if (str_l < str_m)
3830 {
3831 size_t avail = str_m - str_l;
3832
3833 mch_memmove(str + str_l,
3834 str_arg + zero_padding_insertion_ind,
3835 (size_t)sn > avail ? avail : (size_t)sn);
3836 }
3837 str_l += sn;
3838 }
3839 }
3840
3841 // insert right padding
3842 if (justify_left)
3843 {
3844 // right blank padding to the field width
3845 int pn = (int)(min_field_width
3846 - (str_arg_l + number_of_zeros_to_pad));
3847
3848 if (pn > 0)
3849 {
3850 if (str_l < str_m)
3851 {
3852 size_t avail = str_m - str_l;
3853
3854 vim_memset(str + str_l, ' ',
3855 (size_t)pn > avail ? avail
3856 : (size_t)pn);
3857 }
3858 str_l += pn;
3859 }
3860 }
3861 vim_free(tofree);
3862 }
3863 }
3864
3865 if (str_m > 0)
3866 {
3867 // make sure the string is nul-terminated even at the expense of
3868 // overwriting the last character (shouldn't happen, but just in case)
3869 //
3870 str[str_l <= str_m - 1 ? str_l : str_m - 1] = '\0';
3871 }
3872
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003873 if (tvs != NULL && tvs[num_posarg != 0 ? num_posarg : arg_idx - 1].v_type != VAR_UNKNOWN)
Bram Moolenaar677658a2022-01-05 16:09:06 +00003874 emsg(_(e_too_many_arguments_to_printf));
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003875
K.Takata4c215ec2023-08-26 18:05:08 +02003876 vim_free((char*)ap_types);
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003877 va_end(ap);
3878
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003879 // Return the number of characters formatted (excluding trailing nul
3880 // character), that is, the number of characters that would have been
3881 // written to the buffer if it were large enough.
3882 return (int)str_l;
3883}
3884
3885#endif // PROTO