blob: c26914d0d049317413dfa196c8833d543e6d3509 [file] [log] [blame]
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001/* vi:set ts=8 sts=4 sw=4 noet:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * strings.c: string manipulation functions
12 */
13
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +020014#define USING_FLOAT_STUFF
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020015#include "vim.h"
16
17/*
18 * Copy "string" into newly allocated memory.
19 */
20 char_u *
21vim_strsave(char_u *string)
22{
23 char_u *p;
24 size_t len;
25
26 len = STRLEN(string) + 1;
27 p = alloc(len);
28 if (p != NULL)
29 mch_memmove(p, string, len);
30 return p;
31}
32
33/*
34 * Copy up to "len" bytes of "string" into newly allocated memory and
35 * terminate with a NUL.
36 * The allocated memory always has size "len + 1", also when "string" is
37 * shorter.
38 */
39 char_u *
40vim_strnsave(char_u *string, size_t len)
41{
42 char_u *p;
43
44 p = alloc(len + 1);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +000045 if (p == NULL)
46 return NULL;
47
48 STRNCPY(p, string, len);
49 p[len] = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020050 return p;
51}
52
53/*
54 * Same as vim_strsave(), but any characters found in esc_chars are preceded
55 * by a backslash.
56 */
57 char_u *
58vim_strsave_escaped(char_u *string, char_u *esc_chars)
59{
60 return vim_strsave_escaped_ext(string, esc_chars, '\\', FALSE);
61}
62
63/*
64 * Same as vim_strsave_escaped(), but when "bsl" is TRUE also escape
65 * characters where rem_backslash() would remove the backslash.
66 * Escape the characters with "cc".
67 */
68 char_u *
69vim_strsave_escaped_ext(
70 char_u *string,
71 char_u *esc_chars,
72 int cc,
73 int bsl)
74{
75 char_u *p;
76 char_u *p2;
77 char_u *escaped_string;
78 unsigned length;
79 int l;
80
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +020081 // First count the number of backslashes required.
82 // Then allocate the memory and insert them.
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020083 length = 1; // count the trailing NUL
84 for (p = string; *p; p++)
85 {
86 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
87 {
88 length += l; // count a multibyte char
89 p += l - 1;
90 continue;
91 }
92 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
93 ++length; // count a backslash
94 ++length; // count an ordinary char
95 }
96 escaped_string = alloc(length);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +000097 if (escaped_string == NULL)
98 return NULL;
99 p2 = escaped_string;
100 for (p = string; *p; p++)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200101 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000102 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200103 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000104 mch_memmove(p2, p, (size_t)l);
105 p2 += l;
106 p += l - 1; // skip multibyte char
107 continue;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200108 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000109 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
110 *p2++ = cc;
111 *p2++ = *p;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200112 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000113 *p2 = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200114 return escaped_string;
115}
116
117/*
118 * Return TRUE when 'shell' has "csh" in the tail.
119 */
120 int
121csh_like_shell(void)
122{
123 return (strstr((char *)gettail(p_sh), "csh") != NULL);
124}
125
126/*
Jason Cox6e823512021-08-29 12:36:49 +0200127 * Return TRUE when 'shell' has "fish" in the tail.
128 */
Dominique Pellede05ae72021-08-30 19:57:34 +0200129 static int
Jason Cox6e823512021-08-29 12:36:49 +0200130fish_like_shell(void)
131{
132 return (strstr((char *)gettail(p_sh), "fish") != NULL);
133}
134
135/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200136 * Escape "string" for use as a shell argument with system().
137 * This uses single quotes, except when we know we need to use double quotes
138 * (MS-DOS and MS-Windows not using PowerShell and without 'shellslash' set).
139 * PowerShell also uses a novel escaping for enclosed single quotes - double
140 * them up.
141 * Escape a newline, depending on the 'shell' option.
142 * When "do_special" is TRUE also replace "!", "%", "#" and things starting
143 * with "<" like "<cfile>".
144 * When "do_newline" is FALSE do not escape newline unless it is csh shell.
145 * Returns the result in allocated memory, NULL if we have run out.
146 */
147 char_u *
148vim_strsave_shellescape(char_u *string, int do_special, int do_newline)
149{
150 unsigned length;
151 char_u *p;
152 char_u *d;
153 char_u *escaped_string;
Mike Williams51024bb2024-05-30 07:46:30 +0200154 size_t l;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200155 int csh_like;
Jason Cox6e823512021-08-29 12:36:49 +0200156 int fish_like;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200157 char_u *shname;
158 int powershell;
159# ifdef MSWIN
160 int double_quotes;
161# endif
162
163 // Only csh and similar shells expand '!' within single quotes. For sh and
164 // the like we must not put a backslash before it, it will be taken
165 // literally. If do_special is set the '!' will be escaped twice.
166 // Csh also needs to have "\n" escaped twice when do_special is set.
167 csh_like = csh_like_shell();
168
Jason Cox6e823512021-08-29 12:36:49 +0200169 // Fish shell uses '\' as an escape character within single quotes, so '\'
170 // itself must be escaped to get a literal '\'.
171 fish_like = fish_like_shell();
172
Dominique Pelleaf4a61a2021-12-27 17:21:41 +0000173 // PowerShell uses its own version for quoting single quotes
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200174 shname = gettail(p_sh);
175 powershell = strstr((char *)shname, "pwsh") != NULL;
176# ifdef MSWIN
177 powershell = powershell || strstr((char *)shname, "powershell") != NULL;
178 // PowerShell only accepts single quotes so override shellslash.
179 double_quotes = !powershell && !p_ssl;
180# endif
181
182 // First count the number of extra bytes required.
183 length = (unsigned)STRLEN(string) + 3; // two quotes and a trailing NUL
184 for (p = string; *p != NUL; MB_PTR_ADV(p))
185 {
186# ifdef MSWIN
187 if (double_quotes)
188 {
189 if (*p == '"')
190 ++length; // " -> ""
191 }
192 else
193# endif
194 if (*p == '\'')
195 {
196 if (powershell)
197 length +=2; // ' => ''
198 else
199 length += 3; // ' => '\''
200 }
201 if ((*p == '\n' && (csh_like || do_newline))
202 || (*p == '!' && (csh_like || do_special)))
203 {
204 ++length; // insert backslash
205 if (csh_like && do_special)
206 ++length; // insert backslash
207 }
208 if (do_special && find_cmdline_var(p, &l) >= 0)
209 {
210 ++length; // insert backslash
211 p += l - 1;
212 }
Jason Cox6e823512021-08-29 12:36:49 +0200213 if (*p == '\\' && fish_like)
214 ++length; // insert backslash
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200215 }
216
217 // Allocate memory for the result and fill it.
218 escaped_string = alloc(length);
219 if (escaped_string != NULL)
220 {
221 d = escaped_string;
222
223 // add opening quote
224# ifdef MSWIN
225 if (double_quotes)
226 *d++ = '"';
227 else
228# endif
229 *d++ = '\'';
230
231 for (p = string; *p != NUL; )
232 {
233# ifdef MSWIN
234 if (double_quotes)
235 {
236 if (*p == '"')
237 {
238 *d++ = '"';
239 *d++ = '"';
240 ++p;
241 continue;
242 }
243 }
244 else
245# endif
246 if (*p == '\'')
247 {
248 if (powershell)
249 {
250 *d++ = '\'';
251 *d++ = '\'';
252 }
253 else
254 {
255 *d++ = '\'';
256 *d++ = '\\';
257 *d++ = '\'';
258 *d++ = '\'';
259 }
260 ++p;
261 continue;
262 }
263 if ((*p == '\n' && (csh_like || do_newline))
264 || (*p == '!' && (csh_like || do_special)))
265 {
266 *d++ = '\\';
267 if (csh_like && do_special)
268 *d++ = '\\';
269 *d++ = *p++;
270 continue;
271 }
zeertzjq88c8c542024-05-30 19:27:25 +0200272 if (do_special && find_cmdline_var(p, &l) >= 0)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200273 {
274 *d++ = '\\'; // insert backslash
zeertzjq88c8c542024-05-30 19:27:25 +0200275 memcpy(d, p, l); // copy the var
276 d += l;
277 p += l;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200278 continue;
279 }
Jason Cox6e823512021-08-29 12:36:49 +0200280 if (*p == '\\' && fish_like)
281 {
282 *d++ = '\\';
283 *d++ = *p++;
Bram Moolenaar66315972021-09-01 14:31:51 +0200284 continue;
Jason Cox6e823512021-08-29 12:36:49 +0200285 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200286
287 MB_COPY_CHAR(p, d);
288 }
289
290 // add terminating quote and finish with a NUL
291# ifdef MSWIN
292 if (double_quotes)
293 *d++ = '"';
294 else
295# endif
296 *d++ = '\'';
297 *d = NUL;
298 }
299
300 return escaped_string;
301}
302
303/*
304 * Like vim_strsave(), but make all characters uppercase.
305 * This uses ASCII lower-to-upper case translation, language independent.
306 */
307 char_u *
308vim_strsave_up(char_u *string)
309{
310 char_u *p1;
311
312 p1 = vim_strsave(string);
313 vim_strup(p1);
314 return p1;
315}
316
317/*
318 * Like vim_strnsave(), but make all characters uppercase.
319 * This uses ASCII lower-to-upper case translation, language independent.
320 */
321 char_u *
322vim_strnsave_up(char_u *string, size_t len)
323{
324 char_u *p1;
325
326 p1 = vim_strnsave(string, len);
327 vim_strup(p1);
328 return p1;
329}
330
331/*
332 * ASCII lower-to-upper case translation, language independent.
333 */
334 void
335vim_strup(
336 char_u *p)
337{
338 char_u *p2;
339 int c;
340
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000341 if (p == NULL)
342 return;
343
344 p2 = p;
345 while ((c = *p2) != NUL)
346 *p2++ = (c < 'a' || c > 'z') ? c : (c - 0x20);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200347}
348
349#if defined(FEAT_EVAL) || defined(FEAT_SPELL) || defined(PROTO)
350/*
351 * Make string "s" all upper-case and return it in allocated memory.
352 * Handles multi-byte characters as well as possible.
353 * Returns NULL when out of memory.
354 */
355 static char_u *
356strup_save(char_u *orig)
357{
358 char_u *p;
359 char_u *res;
360
361 res = p = vim_strsave(orig);
362
363 if (res != NULL)
364 while (*p != NUL)
365 {
366 int l;
367
368 if (enc_utf8)
369 {
370 int c, uc;
371 int newl;
372 char_u *s;
373
374 c = utf_ptr2char(p);
375 l = utf_ptr2len(p);
376 if (c == 0)
377 {
378 // overlong sequence, use only the first byte
379 c = *p;
380 l = 1;
381 }
382 uc = utf_toupper(c);
383
384 // Reallocate string when byte count changes. This is rare,
385 // thus it's OK to do another malloc()/free().
386 newl = utf_char2len(uc);
387 if (newl != l)
388 {
389 s = alloc(STRLEN(res) + 1 + newl - l);
390 if (s == NULL)
391 {
392 vim_free(res);
393 return NULL;
394 }
395 mch_memmove(s, res, p - res);
396 STRCPY(s + (p - res) + newl, p + l);
397 p = s + (p - res);
398 vim_free(res);
399 res = s;
400 }
401
402 utf_char2bytes(uc, p);
403 p += newl;
404 }
405 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
406 p += l; // skip multi-byte character
407 else
408 {
409 *p = TOUPPER_LOC(*p); // note that toupper() can be a macro
410 p++;
411 }
412 }
413
414 return res;
415}
416
417/*
418 * Make string "s" all lower-case and return it in allocated memory.
419 * Handles multi-byte characters as well as possible.
420 * Returns NULL when out of memory.
421 */
422 char_u *
423strlow_save(char_u *orig)
424{
425 char_u *p;
426 char_u *res;
427
428 res = p = vim_strsave(orig);
429
430 if (res != NULL)
431 while (*p != NUL)
432 {
433 int l;
434
435 if (enc_utf8)
436 {
437 int c, lc;
438 int newl;
439 char_u *s;
440
441 c = utf_ptr2char(p);
442 l = utf_ptr2len(p);
443 if (c == 0)
444 {
445 // overlong sequence, use only the first byte
446 c = *p;
447 l = 1;
448 }
449 lc = utf_tolower(c);
450
451 // Reallocate string when byte count changes. This is rare,
452 // thus it's OK to do another malloc()/free().
453 newl = utf_char2len(lc);
454 if (newl != l)
455 {
456 s = alloc(STRLEN(res) + 1 + newl - l);
457 if (s == NULL)
458 {
459 vim_free(res);
460 return NULL;
461 }
462 mch_memmove(s, res, p - res);
463 STRCPY(s + (p - res) + newl, p + l);
464 p = s + (p - res);
465 vim_free(res);
466 res = s;
467 }
468
469 utf_char2bytes(lc, p);
470 p += newl;
471 }
472 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
473 p += l; // skip multi-byte character
474 else
475 {
476 *p = TOLOWER_LOC(*p); // note that tolower() can be a macro
477 p++;
478 }
479 }
480
481 return res;
482}
483#endif
484
485/*
486 * delete spaces at the end of a string
487 */
488 void
489del_trailing_spaces(char_u *ptr)
490{
491 char_u *q;
492
493 q = ptr + STRLEN(ptr);
494 while (--q > ptr && VIM_ISWHITE(q[0]) && q[-1] != '\\' && q[-1] != Ctrl_V)
495 *q = NUL;
496}
497
498/*
499 * Like strncpy(), but always terminate the result with one NUL.
500 * "to" must be "len + 1" long!
501 */
502 void
503vim_strncpy(char_u *to, char_u *from, size_t len)
504{
505 STRNCPY(to, from, len);
506 to[len] = NUL;
507}
508
509/*
510 * Like strcat(), but make sure the result fits in "tosize" bytes and is
511 * always NUL terminated. "from" and "to" may overlap.
512 */
513 void
514vim_strcat(char_u *to, char_u *from, size_t tosize)
515{
516 size_t tolen = STRLEN(to);
517 size_t fromlen = STRLEN(from);
518
519 if (tolen + fromlen + 1 > tosize)
520 {
521 mch_memmove(to + tolen, from, tosize - tolen - 1);
522 to[tosize - 1] = NUL;
523 }
524 else
525 mch_memmove(to + tolen, from, fromlen + 1);
526}
527
Bram Moolenaarc32949b2023-01-04 15:56:51 +0000528/*
529 * A version of strlen() that has a maximum length.
530 */
531 size_t
532vim_strlen_maxlen(char *s, size_t maxlen)
533{
534 size_t i;
535 for (i = 0; i < maxlen; ++i)
536 if (s[i] == NUL)
537 break;
538 return i;
539}
540
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200541#if (!defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)) || defined(PROTO)
542/*
543 * Compare two strings, ignoring case, using current locale.
544 * Doesn't work for multi-byte characters.
545 * return 0 for match, < 0 for smaller, > 0 for bigger
546 */
547 int
548vim_stricmp(char *s1, char *s2)
549{
550 int i;
551
552 for (;;)
553 {
554 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
555 if (i != 0)
556 return i; // this character different
557 if (*s1 == NUL)
558 break; // strings match until NUL
559 ++s1;
560 ++s2;
561 }
562 return 0; // strings match
563}
564#endif
565
566#if (!defined(HAVE_STRNCASECMP) && !defined(HAVE_STRNICMP)) || defined(PROTO)
567/*
568 * Compare two strings, for length "len", ignoring case, using current locale.
569 * Doesn't work for multi-byte characters.
570 * return 0 for match, < 0 for smaller, > 0 for bigger
571 */
572 int
573vim_strnicmp(char *s1, char *s2, size_t len)
574{
575 int i;
576
577 while (len > 0)
578 {
579 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
580 if (i != 0)
581 return i; // this character different
582 if (*s1 == NUL)
583 break; // strings match until NUL
584 ++s1;
585 ++s2;
586 --len;
587 }
588 return 0; // strings match
589}
590#endif
591
592/*
Christian Brabandt84e31752024-09-02 09:59:18 +0200593 * Compare two ASCII strings, for length "len", ignoring case, ignoring locale
594 * (mostly matters for turkish locale where i I might be different).
595 * return 0 for match, < 0 for smaller, > 0 for bigger
596 */
597 int
598vim_strnicmp_asc(char *s1, char *s2, size_t len)
599{
John Marriottc847c122024-11-24 14:09:40 +0100600 int i = 0;
Christian Brabandt84e31752024-09-02 09:59:18 +0200601 int save_cmp_flags = cmp_flags;
602
603 cmp_flags |= CMP_KEEPASCII; // compare by ASCII value, ignoring locale
604 while (len > 0)
605 {
606 i = vim_tolower(*s1) - vim_tolower(*s2);
607 if (i != 0)
608 break; // this character is different
609 if (*s1 == NUL)
610 break; // strings match until NUL
611 ++s1;
612 ++s2;
613 --len;
614 }
615 cmp_flags = save_cmp_flags;
616 return i;
617}
618
619/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200620 * Search for first occurrence of "c" in "string".
621 * Version of strchr() that handles unsigned char strings with characters from
622 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
623 * end of the string.
624 */
Bram Moolenaarc32949b2023-01-04 15:56:51 +0000625 char_u *
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200626vim_strchr(char_u *string, int c)
627{
628 char_u *p;
629 int b;
630
631 p = string;
632 if (enc_utf8 && c >= 0x80)
633 {
634 while (*p != NUL)
635 {
636 int l = utfc_ptr2len(p);
637
638 // Avoid matching an illegal byte here.
639 if (utf_ptr2char(p) == c && l > 1)
640 return p;
641 p += l;
642 }
643 return NULL;
644 }
645 if (enc_dbcs != 0 && c > 255)
646 {
647 int n2 = c & 0xff;
648
649 c = ((unsigned)c >> 8) & 0xff;
650 while ((b = *p) != NUL)
651 {
652 if (b == c && p[1] == n2)
653 return p;
654 p += (*mb_ptr2len)(p);
655 }
656 return NULL;
657 }
658 if (has_mbyte)
659 {
660 while ((b = *p) != NUL)
661 {
662 if (b == c)
663 return p;
664 p += (*mb_ptr2len)(p);
665 }
666 return NULL;
667 }
668 while ((b = *p) != NUL)
669 {
670 if (b == c)
671 return p;
672 ++p;
673 }
674 return NULL;
675}
676
Jonathon7c7a4e62025-01-12 09:58:00 +0100677// Sized version of strchr that can handle embedded NULs.
678// Adjusts n to the new size.
679 char *
680vim_strnchr(const char *p, size_t *n, int c)
681{
682 while (*n > 0)
683 {
684 if (*p == c)
685 return (char *)p;
686 p++;
687 (*n)--;
688 }
689
690 return NULL;
691}
692
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200693/*
694 * Version of strchr() that only works for bytes and handles unsigned char
695 * strings with characters above 128 correctly. It also doesn't return a
696 * pointer to the NUL at the end of the string.
697 */
698 char_u *
699vim_strbyte(char_u *string, int c)
700{
701 char_u *p = string;
702
703 while (*p != NUL)
704 {
705 if (*p == c)
706 return p;
707 ++p;
708 }
709 return NULL;
710}
711
712/*
713 * Search for last occurrence of "c" in "string".
714 * Version of strrchr() that handles unsigned char strings with characters from
715 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
716 * end of the string.
717 * Return NULL if not found.
718 * Does not handle multi-byte char for "c"!
719 */
720 char_u *
721vim_strrchr(char_u *string, int c)
722{
723 char_u *retval = NULL;
724 char_u *p = string;
725
726 while (*p)
727 {
728 if (*p == c)
729 retval = p;
730 MB_PTR_ADV(p);
731 }
732 return retval;
733}
734
735/*
736 * Vim's version of strpbrk(), in case it's missing.
737 * Don't generate a prototype for this, causes problems when it's not used.
738 */
739#ifndef PROTO
740# ifndef HAVE_STRPBRK
741# ifdef vim_strpbrk
742# undef vim_strpbrk
743# endif
744 char_u *
745vim_strpbrk(char_u *s, char_u *charset)
746{
747 while (*s)
748 {
749 if (vim_strchr(charset, *s) != NULL)
750 return s;
751 MB_PTR_ADV(s);
752 }
753 return NULL;
754}
755# endif
756#endif
757
758/*
759 * Sort an array of strings.
760 */
761static int sort_compare(const void *s1, const void *s2);
762
763 static int
764sort_compare(const void *s1, const void *s2)
765{
766 return STRCMP(*(char **)s1, *(char **)s2);
767}
768
769 void
770sort_strings(
771 char_u **files,
772 int count)
773{
774 qsort((void *)files, (size_t)count, sizeof(char_u *), sort_compare);
775}
776
777#if defined(FEAT_QUICKFIX) || defined(FEAT_SPELL) || defined(PROTO)
778/*
779 * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
780 * When "s" is NULL FALSE is returned.
781 */
782 int
783has_non_ascii(char_u *s)
784{
785 char_u *p;
786
787 if (s != NULL)
788 for (p = s; *p != NUL; ++p)
789 if (*p >= 128)
790 return TRUE;
791 return FALSE;
792}
793#endif
794
795/*
796 * Concatenate two strings and return the result in allocated memory.
797 * Returns NULL when out of memory.
798 */
799 char_u *
800concat_str(char_u *str1, char_u *str2)
801{
802 char_u *dest;
803 size_t l = str1 == NULL ? 0 : STRLEN(str1);
804
805 dest = alloc(l + (str2 == NULL ? 0 : STRLEN(str2)) + 1L);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000806 if (dest == NULL)
807 return NULL;
808 if (str1 == NULL)
809 *dest = NUL;
810 else
811 STRCPY(dest, str1);
812 if (str2 != NULL)
813 STRCPY(dest + l, str2);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200814 return dest;
815}
816
zeertzjq4dd266c2023-08-19 11:35:03 +0200817#if defined(FEAT_EVAL) || defined(FEAT_RIGHTLEFT) || defined(PROTO)
818/*
819 * Reverse text into allocated memory.
820 * Returns the allocated string, NULL when out of memory.
821 */
822 char_u *
823reverse_text(char_u *s)
824{
825 size_t len = STRLEN(s);
826 char_u *rev = alloc(len + 1);
827 if (rev == NULL)
828 return NULL;
829
830 for (size_t s_i = 0, rev_i = len; s_i < len; ++s_i)
831 {
832 if (has_mbyte)
833 {
834 int mb_len = (*mb_ptr2len)(s + s_i);
835 rev_i -= mb_len;
836 mch_memmove(rev + rev_i, s + s_i, mb_len);
837 s_i += mb_len - 1;
838 }
839 else
840 rev[--rev_i] = s[s_i];
841 }
842 rev[len] = NUL;
843 return rev;
844}
845#endif
846
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200847#if defined(FEAT_EVAL) || defined(PROTO)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200848/*
849 * Return string "str" in ' quotes, doubling ' characters.
850 * If "str" is NULL an empty string is assumed.
851 * If "function" is TRUE make it function('string').
852 */
853 char_u *
854string_quote(char_u *str, int function)
855{
856 unsigned len;
857 char_u *p, *r, *s;
858
859 len = (function ? 13 : 3);
860 if (str != NULL)
861 {
862 len += (unsigned)STRLEN(str);
863 for (p = str; *p != NUL; MB_PTR_ADV(p))
864 if (*p == '\'')
865 ++len;
866 }
867 s = r = alloc(len);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000868 if (r == NULL)
869 return NULL;
870
871 if (function)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200872 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000873 STRCPY(r, "function('");
874 r += 10;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200875 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000876 else
877 *r++ = '\'';
878 if (str != NULL)
879 for (p = str; *p != NUL; )
880 {
881 if (*p == '\'')
882 *r++ = '\'';
883 MB_COPY_CHAR(p, r);
884 }
885 *r++ = '\'';
886 if (function)
887 *r++ = ')';
888 *r++ = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200889 return s;
890}
891
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000892/*
893 * Count the number of times "needle" occurs in string "haystack". Case is
894 * ignored if "ic" is TRUE.
895 */
896 long
897string_count(char_u *haystack, char_u *needle, int ic)
898{
899 long n = 0;
900 char_u *p = haystack;
901 char_u *next;
902
903 if (p == NULL || needle == NULL || *needle == NUL)
904 return 0;
905
906 if (ic)
907 {
908 size_t len = STRLEN(needle);
909
910 while (*p != NUL)
911 {
912 if (MB_STRNICMP(p, needle, len) == 0)
913 {
914 ++n;
915 p += len;
916 }
917 else
918 MB_PTR_ADV(p);
919 }
920 }
921 else
922 while ((next = (char_u *)strstr((char *)p, (char *)needle)) != NULL)
923 {
924 ++n;
925 p = next + STRLEN(needle);
926 }
927
928 return n;
929}
930
931/*
932 * Make a typval_T of the first character of "input" and store it in "output".
933 * Return OK or FAIL.
934 */
935 static int
936copy_first_char_to_tv(char_u *input, typval_T *output)
937{
938 char_u buf[MB_MAXBYTES + 1];
939 int len;
940
941 if (input == NULL || output == NULL)
942 return FAIL;
943
944 len = has_mbyte ? mb_ptr2len(input) : 1;
945 STRNCPY(buf, input, len);
946 buf[len] = NUL;
947 output->v_type = VAR_STRING;
948 output->vval.v_string = vim_strsave(buf);
949
950 return output->vval.v_string == NULL ? FAIL : OK;
951}
952
953/*
954 * Implementation of map() and filter() for a String. Apply "expr" to every
955 * character in string "str" and return the result in "rettv".
956 */
957 void
958string_filter_map(
959 char_u *str,
960 filtermap_T filtermap,
961 typval_T *expr,
962 typval_T *rettv)
963{
964 char_u *p;
965 typval_T tv;
966 garray_T ga;
967 int len = 0;
968 int idx = 0;
969 int rem;
Bram Moolenaar82418262022-09-28 16:16:15 +0100970 typval_T newtv;
971 funccall_T *fc;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000972
973 rettv->v_type = VAR_STRING;
974 rettv->vval.v_string = NULL;
975
976 // set_vim_var_nr() doesn't set the type
977 set_vim_var_type(VV_KEY, VAR_NUMBER);
978
zeertzjqe7d49462023-04-16 20:53:55 +0100979 // Create one funccall_T for all eval_expr_typval() calls.
Bram Moolenaar82418262022-09-28 16:16:15 +0100980 fc = eval_expr_get_funccal(expr, &newtv);
981
Bram Moolenaar04935fb2022-01-08 16:19:22 +0000982 ga_init2(&ga, sizeof(char), 80);
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000983 for (p = str; *p != NUL; p += len)
984 {
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000985 if (copy_first_char_to_tv(p, &tv) == FAIL)
986 break;
987 len = (int)STRLEN(tv.vval.v_string);
988
989 set_vim_var_nr(VV_KEY, idx);
Bram Moolenaar82418262022-09-28 16:16:15 +0100990 if (filter_map_one(&tv, expr, filtermap, fc, &newtv, &rem) == FAIL
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000991 || did_emsg)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000992 {
993 clear_tv(&newtv);
994 clear_tv(&tv);
995 break;
996 }
Ernie Raele79e2072024-01-13 11:47:33 +0100997 if (filtermap == FILTERMAP_MAP || filtermap == FILTERMAP_MAPNEW)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000998 {
999 if (newtv.v_type != VAR_STRING)
1000 {
1001 clear_tv(&newtv);
1002 clear_tv(&tv);
Bram Moolenaare70cec92022-01-01 14:25:55 +00001003 emsg(_(e_string_required));
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001004 break;
1005 }
1006 else
1007 ga_concat(&ga, newtv.vval.v_string);
1008 }
Ernie Raele79e2072024-01-13 11:47:33 +01001009 else if (filtermap == FILTERMAP_FOREACH || !rem)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001010 ga_concat(&ga, tv.vval.v_string);
1011
1012 clear_tv(&newtv);
1013 clear_tv(&tv);
1014
1015 ++idx;
1016 }
1017 ga_append(&ga, NUL);
1018 rettv->vval.v_string = ga.ga_data;
Bram Moolenaar82418262022-09-28 16:16:15 +01001019 if (fc != NULL)
1020 remove_funccal();
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001021}
1022
1023/*
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001024 * Implementation of reduce() for String "argvars[0]" using the function "expr"
1025 * starting with the optional initial value "argvars[2]" and return the result
1026 * in "rettv".
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001027 */
1028 void
1029string_reduce(
1030 typval_T *argvars,
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001031 typval_T *expr,
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001032 typval_T *rettv)
1033{
1034 char_u *p = tv_get_string(&argvars[0]);
1035 int len;
1036 typval_T argv[3];
1037 int r;
1038 int called_emsg_start = called_emsg;
Bram Moolenaar82418262022-09-28 16:16:15 +01001039 funccall_T *fc;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001040
1041 if (argvars[2].v_type == VAR_UNKNOWN)
1042 {
1043 if (*p == NUL)
1044 {
Bram Moolenaare70cec92022-01-01 14:25:55 +00001045 semsg(_(e_reduce_of_an_empty_str_with_no_initial_value), "String");
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001046 return;
1047 }
1048 if (copy_first_char_to_tv(p, rettv) == FAIL)
1049 return;
1050 p += STRLEN(rettv->vval.v_string);
1051 }
Yegappan Lakshmanan8deb2b32022-09-02 15:15:27 +01001052 else if (check_for_string_arg(argvars, 2) == FAIL)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001053 return;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001054 else
1055 copy_tv(&argvars[2], rettv);
1056
zeertzjqe7d49462023-04-16 20:53:55 +01001057 // Create one funccall_T for all eval_expr_typval() calls.
Bram Moolenaar82418262022-09-28 16:16:15 +01001058 fc = eval_expr_get_funccal(expr, rettv);
1059
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001060 for ( ; *p != NUL; p += len)
1061 {
1062 argv[0] = *rettv;
1063 if (copy_first_char_to_tv(p, &argv[1]) == FAIL)
1064 break;
1065 len = (int)STRLEN(argv[1].vval.v_string);
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001066
zeertzjqad0c4422023-08-17 22:15:47 +02001067 r = eval_expr_typval(expr, TRUE, argv, 2, fc, rettv);
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001068
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001069 clear_tv(&argv[0]);
1070 clear_tv(&argv[1]);
1071 if (r == FAIL || called_emsg != called_emsg_start)
1072 return;
1073 }
Bram Moolenaar82418262022-09-28 16:16:15 +01001074
1075 if (fc != NULL)
1076 remove_funccal();
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001077}
1078
Bram Moolenaare4098452023-05-07 18:53:49 +01001079/*
1080 * Implementation of "byteidx()" and "byteidxcomp()" functions
1081 */
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001082 static void
Dominique Pellé0268ff32024-07-28 21:12:20 +02001083byteidx_common(typval_T *argvars, typval_T *rettv, int comp)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001084{
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001085 rettv->vval.v_number = -1;
1086
1087 if (in_vim9script()
1088 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001089 || check_for_number_arg(argvars, 1) == FAIL
1090 || check_for_opt_bool_arg(argvars, 2) == FAIL))
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001091 return;
1092
Christian Brabandt67672ef2023-04-24 21:09:54 +01001093 char_u *str = tv_get_string_chk(&argvars[0]);
1094 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001095 if (str == NULL || idx < 0)
1096 return;
1097
Christian Brabandt67672ef2023-04-24 21:09:54 +01001098 varnumber_T utf16idx = FALSE;
1099 if (argvars[2].v_type != VAR_UNKNOWN)
1100 {
zeertzjq8cf51372023-05-08 15:31:38 +01001101 int error = FALSE;
1102 utf16idx = tv_get_bool_chk(&argvars[2], &error);
1103 if (error)
1104 return;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001105 if (utf16idx < 0 || utf16idx > 1)
1106 {
zeertzjq8cf51372023-05-08 15:31:38 +01001107 semsg(_(e_using_number_as_bool_nr), utf16idx);
Christian Brabandt67672ef2023-04-24 21:09:54 +01001108 return;
1109 }
1110 }
1111
1112 int (*ptr2len)(char_u *);
1113 if (enc_utf8 && comp)
1114 ptr2len = utf_ptr2len;
1115 else
1116 ptr2len = mb_ptr2len;
1117
1118 char_u *t = str;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001119 for ( ; idx > 0; idx--)
1120 {
1121 if (*t == NUL) // EOL reached
1122 return;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001123 if (utf16idx)
1124 {
1125 int clen = ptr2len(t);
1126 int c = (clen > 1) ? utf_ptr2char(t) : *t;
1127 if (c > 0xFFFF)
1128 idx--;
1129 }
1130 if (idx > 0)
1131 t += ptr2len(t);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001132 }
1133 rettv->vval.v_number = (varnumber_T)(t - str);
1134}
1135
1136/*
1137 * "byteidx()" function
1138 */
1139 void
1140f_byteidx(typval_T *argvars, typval_T *rettv)
1141{
Bram Moolenaare4098452023-05-07 18:53:49 +01001142 byteidx_common(argvars, rettv, FALSE);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001143}
1144
1145/*
1146 * "byteidxcomp()" function
1147 */
1148 void
1149f_byteidxcomp(typval_T *argvars, typval_T *rettv)
1150{
Bram Moolenaare4098452023-05-07 18:53:49 +01001151 byteidx_common(argvars, rettv, TRUE);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001152}
1153
1154/*
1155 * "charidx()" function
1156 */
1157 void
1158f_charidx(typval_T *argvars, typval_T *rettv)
1159{
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001160 rettv->vval.v_number = -1;
1161
Christian Brabandt67672ef2023-04-24 21:09:54 +01001162 if (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001163 || check_for_number_arg(argvars, 1) == FAIL
Christian Brabandt67672ef2023-04-24 21:09:54 +01001164 || check_for_opt_bool_arg(argvars, 2) == FAIL
1165 || (argvars[2].v_type != VAR_UNKNOWN
1166 && check_for_opt_bool_arg(argvars, 3) == FAIL))
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001167 return;
1168
Christian Brabandt67672ef2023-04-24 21:09:54 +01001169 char_u *str = tv_get_string_chk(&argvars[0]);
1170 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001171 if (str == NULL || idx < 0)
1172 return;
1173
Christian Brabandt67672ef2023-04-24 21:09:54 +01001174 varnumber_T countcc = FALSE;
1175 varnumber_T utf16idx = FALSE;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001176 if (argvars[2].v_type != VAR_UNKNOWN)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001177 {
Christian Brabandt67672ef2023-04-24 21:09:54 +01001178 countcc = tv_get_bool(&argvars[2]);
1179 if (argvars[3].v_type != VAR_UNKNOWN)
1180 utf16idx = tv_get_bool(&argvars[3]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001181 }
1182
Christian Brabandt67672ef2023-04-24 21:09:54 +01001183 int (*ptr2len)(char_u *);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001184 if (enc_utf8 && countcc)
1185 ptr2len = utf_ptr2len;
1186 else
1187 ptr2len = mb_ptr2len;
1188
Christian Brabandt67672ef2023-04-24 21:09:54 +01001189 char_u *p;
1190 int len;
1191 for (p = str, len = 0; utf16idx ? idx >= 0 : p <= str + idx; len++)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001192 {
1193 if (*p == NUL)
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001194 {
1195 // If the index is exactly the number of bytes or utf-16 code units
1196 // in the string then return the length of the string in
1197 // characters.
1198 if (utf16idx ? (idx == 0) : (p == (str + idx)))
1199 rettv->vval.v_number = len;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001200 return;
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001201 }
Christian Brabandt67672ef2023-04-24 21:09:54 +01001202 if (utf16idx)
1203 {
1204 idx--;
1205 int clen = ptr2len(p);
1206 int c = (clen > 1) ? utf_ptr2char(p) : *p;
1207 if (c > 0xFFFF)
1208 idx--;
1209 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001210 p += ptr2len(p);
1211 }
1212
1213 rettv->vval.v_number = len > 0 ? len - 1 : 0;
1214}
1215
1216/*
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001217 * Convert the string "str", from encoding "from" to encoding "to".
1218 */
1219 static char_u *
1220convert_string(char_u *str, char_u *from, char_u *to)
1221{
1222 vimconv_T vimconv;
1223
1224 vimconv.vc_type = CONV_NONE;
1225 if (convert_setup(&vimconv, from, to) == FAIL)
1226 return NULL;
1227 vimconv.vc_fail = TRUE;
1228 if (vimconv.vc_type == CONV_NONE)
1229 str = vim_strsave(str);
1230 else
1231 str = string_convert(&vimconv, str, NULL);
1232 convert_setup(&vimconv, NULL, NULL);
1233
1234 return str;
1235}
1236
1237/*
1238 * "blob2str()" function
1239 * Converts a blob to a string, ensuring valid UTF-8 encoding.
1240 */
1241 void
1242f_blob2str(typval_T *argvars, typval_T *rettv)
1243{
1244 blob_T *blob;
1245 char_u *str;
1246 char_u *p;
1247 int blen;
1248
1249 if (check_for_blob_arg(argvars, 0) == FAIL
1250 || check_for_opt_dict_arg(argvars, 1) == FAIL)
1251 return;
1252
1253 blob = argvars->vval.v_blob;
1254 blen = blob_len(blob);
1255
1256 rettv->v_type = VAR_STRING;
1257
1258 str = alloc(blen + 1);
1259 if (str == NULL)
1260 return;
1261
1262 for (int i = 0; i < blen; i++)
1263 str[i] = (char_u)blob_get(blob, i);
1264 str[blen] = NUL;
1265
1266 p = str;
1267 if (argvars[1].v_type != VAR_UNKNOWN)
1268 {
1269 dict_T *d = argvars[1].vval.v_dict;
1270 if (d != NULL)
1271 {
1272 char_u *enc = dict_get_string(d, "encoding", FALSE);
1273 if (enc != NULL)
1274 {
1275 char_u *from = enc_canonize(enc_skip(enc));
1276 p = convert_string(str, from, p_enc);
1277 vim_free(str);
1278 if (p == NULL)
1279 {
1280 semsg(_(e_str_encoding_failed), "from", from);
1281 vim_free(from);
1282 return;
1283 }
1284 vim_free(from);
1285 }
1286 }
1287 }
1288
1289 if (STRCMP(p_enc, "utf-8") == 0 || STRCMP(p_enc, "utf8") == 0)
1290 {
1291 if (!utf_valid_string(p, NULL))
1292 {
1293 semsg(_(e_str_encoding_failed), "from", p_enc);
1294 vim_free(p);
1295 return;
1296 }
1297 }
1298
1299 rettv->vval.v_string = p;
1300}
1301
1302/*
1303 * "str2blob()" function
1304 */
1305 void
1306f_str2blob(typval_T *argvars, typval_T *rettv)
1307{
1308 blob_T *blob;
1309 char_u *p;
1310 size_t len;
1311
1312 if (check_for_string_arg(argvars, 0) == FAIL
1313 || check_for_opt_dict_arg(argvars, 1) == FAIL)
1314 return;
1315
1316 if (rettv_blob_alloc(rettv) == FAIL)
1317 return;
1318
1319 blob = rettv->vval.v_blob;
1320
1321 p = tv_get_string_chk(&argvars[0]);
1322 if (p == NULL)
1323 return;
1324
1325 int free_str = FALSE;
1326 if (argvars[1].v_type != VAR_UNKNOWN)
1327 {
1328 dict_T *d = argvars[1].vval.v_dict;
1329 if (d != NULL)
1330 {
1331 char_u *enc = dict_get_string(d, "encoding", FALSE);
1332 if (enc != NULL)
1333 {
1334 char_u *to = enc_canonize(enc_skip(enc));
1335 p = convert_string(p, p_enc, to);
1336 if (p == NULL)
1337 {
1338 semsg(_(e_str_encoding_failed), "to", to);
1339 vim_free(to);
1340 return;
1341 }
1342 vim_free(to);
1343 free_str = TRUE;
1344 }
1345 }
1346 }
1347
1348 len = STRLEN(p);
1349 for (size_t i = 0; i < len; i++)
1350 ga_append(&blob->bv_ga, (int)p[i]);
1351
1352 if (free_str)
1353 vim_free(p);
1354}
1355
1356/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001357 * "str2list()" function
1358 */
1359 void
1360f_str2list(typval_T *argvars, typval_T *rettv)
1361{
1362 char_u *p;
1363 int utf8 = FALSE;
1364
1365 if (rettv_list_alloc(rettv) == FAIL)
1366 return;
1367
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001368 if (in_vim9script()
1369 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001370 || check_for_opt_bool_arg(argvars, 1) == FAIL))
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001371 return;
1372
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001373 if (argvars[1].v_type != VAR_UNKNOWN)
1374 utf8 = (int)tv_get_bool_chk(&argvars[1], NULL);
1375
1376 p = tv_get_string(&argvars[0]);
1377
1378 if (has_mbyte || utf8)
1379 {
1380 int (*ptr2len)(char_u *);
1381 int (*ptr2char)(char_u *);
1382
1383 if (utf8 || enc_utf8)
1384 {
1385 ptr2len = utf_ptr2len;
1386 ptr2char = utf_ptr2char;
1387 }
1388 else
1389 {
1390 ptr2len = mb_ptr2len;
1391 ptr2char = mb_ptr2char;
1392 }
1393
1394 for ( ; *p != NUL; p += (*ptr2len)(p))
1395 list_append_number(rettv->vval.v_list, (*ptr2char)(p));
1396 }
1397 else
1398 for ( ; *p != NUL; ++p)
1399 list_append_number(rettv->vval.v_list, *p);
1400}
1401
1402/*
1403 * "str2nr()" function
1404 */
1405 void
1406f_str2nr(typval_T *argvars, typval_T *rettv)
1407{
1408 int base = 10;
1409 char_u *p;
1410 varnumber_T n;
1411 int what = 0;
1412 int isneg;
1413
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001414 if (in_vim9script()
1415 && (check_for_string_arg(argvars, 0) == FAIL
1416 || check_for_opt_number_arg(argvars, 1) == FAIL
1417 || (argvars[1].v_type != VAR_UNKNOWN
1418 && check_for_opt_bool_arg(argvars, 2) == FAIL)))
1419 return;
1420
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001421 if (argvars[1].v_type != VAR_UNKNOWN)
1422 {
1423 base = (int)tv_get_number(&argvars[1]);
1424 if (base != 2 && base != 8 && base != 10 && base != 16)
1425 {
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00001426 emsg(_(e_invalid_argument));
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001427 return;
1428 }
1429 if (argvars[2].v_type != VAR_UNKNOWN && tv_get_bool(&argvars[2]))
1430 what |= STR2NR_QUOTE;
1431 }
1432
1433 p = skipwhite(tv_get_string_strict(&argvars[0]));
1434 isneg = (*p == '-');
1435 if (*p == '+' || *p == '-')
1436 p = skipwhite(p + 1);
1437 switch (base)
1438 {
1439 case 2: what |= STR2NR_BIN + STR2NR_FORCE; break;
1440 case 8: what |= STR2NR_OCT + STR2NR_OOCT + STR2NR_FORCE; break;
1441 case 16: what |= STR2NR_HEX + STR2NR_FORCE; break;
1442 }
Bram Moolenaar5fb78c32023-03-04 20:47:39 +00001443 vim_str2nr(p, NULL, NULL, what, &n, NULL, 0, FALSE, NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001444 // Text after the number is silently ignored.
1445 if (isneg)
1446 rettv->vval.v_number = -n;
1447 else
1448 rettv->vval.v_number = n;
1449
1450}
1451
1452/*
1453 * "strgetchar()" function
1454 */
1455 void
1456f_strgetchar(typval_T *argvars, typval_T *rettv)
1457{
1458 char_u *str;
1459 int len;
1460 int error = FALSE;
1461 int charidx;
1462 int byteidx = 0;
1463
1464 rettv->vval.v_number = -1;
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001465
1466 if (in_vim9script()
1467 && (check_for_string_arg(argvars, 0) == FAIL
1468 || check_for_number_arg(argvars, 1) == FAIL))
1469 return;
1470
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001471 str = tv_get_string_chk(&argvars[0]);
1472 if (str == NULL)
1473 return;
1474 len = (int)STRLEN(str);
1475 charidx = (int)tv_get_number_chk(&argvars[1], &error);
1476 if (error)
1477 return;
1478
1479 while (charidx >= 0 && byteidx < len)
1480 {
1481 if (charidx == 0)
1482 {
1483 rettv->vval.v_number = mb_ptr2char(str + byteidx);
1484 break;
1485 }
1486 --charidx;
1487 byteidx += MB_CPTR2LEN(str + byteidx);
1488 }
1489}
1490
1491/*
1492 * "stridx()" function
1493 */
1494 void
1495f_stridx(typval_T *argvars, typval_T *rettv)
1496{
1497 char_u buf[NUMBUFLEN];
1498 char_u *needle;
1499 char_u *haystack;
1500 char_u *save_haystack;
1501 char_u *pos;
1502 int start_idx;
1503
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001504 if (in_vim9script()
1505 && (check_for_string_arg(argvars, 0) == FAIL
1506 || check_for_string_arg(argvars, 1) == FAIL
1507 || check_for_opt_number_arg(argvars, 2) == FAIL))
1508 return;
1509
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001510 needle = tv_get_string_chk(&argvars[1]);
1511 save_haystack = haystack = tv_get_string_buf_chk(&argvars[0], buf);
1512 rettv->vval.v_number = -1;
1513 if (needle == NULL || haystack == NULL)
1514 return; // type error; errmsg already given
1515
1516 if (argvars[2].v_type != VAR_UNKNOWN)
1517 {
1518 int error = FALSE;
1519
1520 start_idx = (int)tv_get_number_chk(&argvars[2], &error);
1521 if (error || start_idx >= (int)STRLEN(haystack))
1522 return;
1523 if (start_idx >= 0)
1524 haystack += start_idx;
1525 }
1526
1527 pos = (char_u *)strstr((char *)haystack, (char *)needle);
1528 if (pos != NULL)
1529 rettv->vval.v_number = (varnumber_T)(pos - save_haystack);
1530}
1531
1532/*
1533 * "string()" function
1534 */
1535 void
1536f_string(typval_T *argvars, typval_T *rettv)
1537{
1538 char_u *tofree;
1539 char_u numbuf[NUMBUFLEN];
1540
1541 rettv->v_type = VAR_STRING;
1542 rettv->vval.v_string = tv2string(&argvars[0], &tofree, numbuf,
1543 get_copyID());
1544 // Make a copy if we have a value but it's not in allocated memory.
1545 if (rettv->vval.v_string != NULL && tofree == NULL)
1546 rettv->vval.v_string = vim_strsave(rettv->vval.v_string);
1547}
1548
1549/*
1550 * "strlen()" function
1551 */
1552 void
1553f_strlen(typval_T *argvars, typval_T *rettv)
1554{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001555 if (in_vim9script()
1556 && check_for_string_or_number_arg(argvars, 0) == FAIL)
1557 return;
1558
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001559 rettv->vval.v_number = (varnumber_T)(STRLEN(
1560 tv_get_string(&argvars[0])));
1561}
1562
1563 static void
1564strchar_common(typval_T *argvars, typval_T *rettv, int skipcc)
1565{
1566 char_u *s = tv_get_string(&argvars[0]);
1567 varnumber_T len = 0;
1568 int (*func_mb_ptr2char_adv)(char_u **pp);
1569
1570 func_mb_ptr2char_adv = skipcc ? mb_ptr2char_adv : mb_cptr2char_adv;
1571 while (*s != NUL)
1572 {
1573 func_mb_ptr2char_adv(&s);
1574 ++len;
1575 }
1576 rettv->vval.v_number = len;
1577}
1578
1579/*
1580 * "strcharlen()" function
1581 */
1582 void
1583f_strcharlen(typval_T *argvars, typval_T *rettv)
1584{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001585 if (in_vim9script()
1586 && check_for_string_or_number_arg(argvars, 0) == FAIL)
1587 return;
1588
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001589 strchar_common(argvars, rettv, TRUE);
1590}
1591
1592/*
1593 * "strchars()" function
1594 */
1595 void
1596f_strchars(typval_T *argvars, typval_T *rettv)
1597{
1598 varnumber_T skipcc = FALSE;
1599
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001600 if (in_vim9script()
1601 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001602 || check_for_opt_bool_arg(argvars, 1) == FAIL))
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001603 return;
1604
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001605 if (argvars[1].v_type != VAR_UNKNOWN)
Bram Moolenaare4098452023-05-07 18:53:49 +01001606 {
zeertzjq8cf51372023-05-08 15:31:38 +01001607 int error = FALSE;
1608 skipcc = tv_get_bool_chk(&argvars[1], &error);
1609 if (error)
1610 return;
1611 if (skipcc < 0 || skipcc > 1)
1612 {
Bram Moolenaare4098452023-05-07 18:53:49 +01001613 semsg(_(e_using_number_as_bool_nr), skipcc);
zeertzjq8cf51372023-05-08 15:31:38 +01001614 return;
1615 }
Bram Moolenaare4098452023-05-07 18:53:49 +01001616 }
zeertzjq8cf51372023-05-08 15:31:38 +01001617
1618 strchar_common(argvars, rettv, skipcc);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001619}
1620
1621/*
Christian Brabandt67672ef2023-04-24 21:09:54 +01001622 * "strutf16len()" function
1623 */
1624 void
1625f_strutf16len(typval_T *argvars, typval_T *rettv)
1626{
1627 rettv->vval.v_number = -1;
1628
1629 if (check_for_string_arg(argvars, 0) == FAIL
1630 || check_for_opt_bool_arg(argvars, 1) == FAIL)
1631 return;
1632
1633 varnumber_T countcc = FALSE;
1634 if (argvars[1].v_type != VAR_UNKNOWN)
1635 countcc = tv_get_bool(&argvars[1]);
1636
1637 char_u *s = tv_get_string(&argvars[0]);
1638 varnumber_T len = 0;
1639 int (*func_mb_ptr2char_adv)(char_u **pp);
1640 int ch;
1641
1642 func_mb_ptr2char_adv = countcc ? mb_cptr2char_adv : mb_ptr2char_adv;
1643 while (*s != NUL)
1644 {
1645 ch = func_mb_ptr2char_adv(&s);
1646 if (ch > 0xFFFF)
1647 ++len;
1648 ++len;
1649 }
1650 rettv->vval.v_number = len;
1651}
1652
1653/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001654 * "strdisplaywidth()" function
1655 */
1656 void
1657f_strdisplaywidth(typval_T *argvars, typval_T *rettv)
1658{
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001659 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001660 int col = 0;
1661
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001662 rettv->vval.v_number = -1;
1663
1664 if (in_vim9script()
1665 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001666 || check_for_opt_number_arg(argvars, 1) == FAIL))
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001667 return;
1668
1669 s = tv_get_string(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001670 if (argvars[1].v_type != VAR_UNKNOWN)
1671 col = (int)tv_get_number(&argvars[1]);
1672
1673 rettv->vval.v_number = (varnumber_T)(linetabsize_col(col, s) - col);
1674}
1675
1676/*
1677 * "strwidth()" function
1678 */
1679 void
1680f_strwidth(typval_T *argvars, typval_T *rettv)
1681{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001682 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001683
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001684 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1685 return;
1686
1687 s = tv_get_string_strict(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001688 rettv->vval.v_number = (varnumber_T)(mb_string2cells(s, -1));
1689}
1690
1691/*
1692 * "strcharpart()" function
1693 */
1694 void
1695f_strcharpart(typval_T *argvars, typval_T *rettv)
1696{
1697 char_u *p;
1698 int nchar;
1699 int nbyte = 0;
1700 int charlen;
1701 int skipcc = FALSE;
1702 int len = 0;
1703 int slen;
1704 int error = FALSE;
1705
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001706 if (in_vim9script()
1707 && (check_for_string_arg(argvars, 0) == FAIL
1708 || check_for_number_arg(argvars, 1) == FAIL
1709 || check_for_opt_number_arg(argvars, 2) == FAIL
1710 || (argvars[2].v_type != VAR_UNKNOWN
1711 && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1712 return;
1713
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001714 p = tv_get_string(&argvars[0]);
1715 slen = (int)STRLEN(p);
1716
1717 nchar = (int)tv_get_number_chk(&argvars[1], &error);
1718 if (!error)
1719 {
1720 if (argvars[2].v_type != VAR_UNKNOWN
1721 && argvars[3].v_type != VAR_UNKNOWN)
1722 {
zeertzjq8cf51372023-05-08 15:31:38 +01001723 skipcc = tv_get_bool_chk(&argvars[3], &error);
1724 if (error)
1725 return;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001726 if (skipcc < 0 || skipcc > 1)
1727 {
zeertzjq8cf51372023-05-08 15:31:38 +01001728 semsg(_(e_using_number_as_bool_nr), skipcc);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001729 return;
1730 }
1731 }
1732
1733 if (nchar > 0)
1734 while (nchar > 0 && nbyte < slen)
1735 {
1736 if (skipcc)
1737 nbyte += mb_ptr2len(p + nbyte);
1738 else
1739 nbyte += MB_CPTR2LEN(p + nbyte);
1740 --nchar;
1741 }
1742 else
1743 nbyte = nchar;
1744 if (argvars[2].v_type != VAR_UNKNOWN)
1745 {
1746 charlen = (int)tv_get_number(&argvars[2]);
1747 while (charlen > 0 && nbyte + len < slen)
1748 {
1749 int off = nbyte + len;
1750
1751 if (off < 0)
1752 len += 1;
1753 else
1754 {
1755 if (skipcc)
1756 len += mb_ptr2len(p + off);
1757 else
1758 len += MB_CPTR2LEN(p + off);
1759 }
1760 --charlen;
1761 }
1762 }
1763 else
1764 len = slen - nbyte; // default: all bytes that are available.
1765 }
1766
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02001767 // Only return the overlap between the specified part and the actual
1768 // string.
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001769 if (nbyte < 0)
1770 {
1771 len += nbyte;
1772 nbyte = 0;
1773 }
1774 else if (nbyte > slen)
1775 nbyte = slen;
1776 if (len < 0)
1777 len = 0;
1778 else if (nbyte + len > slen)
1779 len = slen - nbyte;
1780
1781 rettv->v_type = VAR_STRING;
1782 rettv->vval.v_string = vim_strnsave(p + nbyte, len);
1783}
1784
1785/*
1786 * "strpart()" function
1787 */
1788 void
1789f_strpart(typval_T *argvars, typval_T *rettv)
1790{
1791 char_u *p;
1792 int n;
1793 int len;
1794 int slen;
1795 int error = FALSE;
1796
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001797 if (in_vim9script()
1798 && (check_for_string_arg(argvars, 0) == FAIL
1799 || check_for_number_arg(argvars, 1) == FAIL
1800 || check_for_opt_number_arg(argvars, 2) == FAIL
1801 || (argvars[2].v_type != VAR_UNKNOWN
1802 && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1803 return;
1804
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001805 p = tv_get_string(&argvars[0]);
1806 slen = (int)STRLEN(p);
1807
1808 n = (int)tv_get_number_chk(&argvars[1], &error);
1809 if (error)
1810 len = 0;
1811 else if (argvars[2].v_type != VAR_UNKNOWN)
1812 len = (int)tv_get_number(&argvars[2]);
1813 else
1814 len = slen - n; // default len: all bytes that are available.
1815
1816 // Only return the overlap between the specified part and the actual
1817 // string.
1818 if (n < 0)
1819 {
1820 len += n;
1821 n = 0;
1822 }
1823 else if (n > slen)
1824 n = slen;
1825 if (len < 0)
1826 len = 0;
1827 else if (n + len > slen)
1828 len = slen - n;
1829
1830 if (argvars[2].v_type != VAR_UNKNOWN && argvars[3].v_type != VAR_UNKNOWN)
1831 {
1832 int off;
1833
1834 // length in characters
1835 for (off = n; off < slen && len > 0; --len)
1836 off += mb_ptr2len(p + off);
1837 len = off - n;
1838 }
1839
1840 rettv->v_type = VAR_STRING;
1841 rettv->vval.v_string = vim_strnsave(p + n, len);
1842}
1843
1844/*
1845 * "strridx()" function
1846 */
1847 void
1848f_strridx(typval_T *argvars, typval_T *rettv)
1849{
1850 char_u buf[NUMBUFLEN];
1851 char_u *needle;
1852 char_u *haystack;
1853 char_u *rest;
1854 char_u *lastmatch = NULL;
1855 int haystack_len, end_idx;
1856
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001857 if (in_vim9script()
1858 && (check_for_string_arg(argvars, 0) == FAIL
1859 || check_for_string_arg(argvars, 1) == FAIL
1860 || check_for_opt_number_arg(argvars, 2) == FAIL))
1861 return;
1862
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001863 needle = tv_get_string_chk(&argvars[1]);
1864 haystack = tv_get_string_buf_chk(&argvars[0], buf);
1865
1866 rettv->vval.v_number = -1;
1867 if (needle == NULL || haystack == NULL)
1868 return; // type error; errmsg already given
1869
1870 haystack_len = (int)STRLEN(haystack);
1871 if (argvars[2].v_type != VAR_UNKNOWN)
1872 {
1873 // Third argument: upper limit for index
1874 end_idx = (int)tv_get_number_chk(&argvars[2], NULL);
1875 if (end_idx < 0)
1876 return; // can never find a match
1877 }
1878 else
1879 end_idx = haystack_len;
1880
1881 if (*needle == NUL)
1882 {
1883 // Empty string matches past the end.
1884 lastmatch = haystack + end_idx;
1885 }
1886 else
1887 {
1888 for (rest = haystack; *rest != '\0'; ++rest)
1889 {
1890 rest = (char_u *)strstr((char *)rest, (char *)needle);
1891 if (rest == NULL || rest > haystack + end_idx)
1892 break;
1893 lastmatch = rest;
1894 }
1895 }
1896
1897 if (lastmatch == NULL)
1898 rettv->vval.v_number = -1;
1899 else
1900 rettv->vval.v_number = (varnumber_T)(lastmatch - haystack);
1901}
1902
1903/*
1904 * "strtrans()" function
1905 */
1906 void
1907f_strtrans(typval_T *argvars, typval_T *rettv)
1908{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001909 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1910 return;
1911
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001912 rettv->v_type = VAR_STRING;
1913 rettv->vval.v_string = transstr(tv_get_string(&argvars[0]));
1914}
1915
Christian Brabandt67672ef2023-04-24 21:09:54 +01001916
1917/*
Christian Brabandt67672ef2023-04-24 21:09:54 +01001918 * "utf16idx()" function
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01001919 *
1920 * Converts a byte or character offset in a string to the corresponding UTF-16
1921 * code unit offset.
Christian Brabandt67672ef2023-04-24 21:09:54 +01001922 */
1923 void
1924f_utf16idx(typval_T *argvars, typval_T *rettv)
1925{
1926 rettv->vval.v_number = -1;
1927
1928 if (check_for_string_arg(argvars, 0) == FAIL
1929 || check_for_opt_number_arg(argvars, 1) == FAIL
1930 || check_for_opt_bool_arg(argvars, 2) == FAIL
1931 || (argvars[2].v_type != VAR_UNKNOWN
1932 && check_for_opt_bool_arg(argvars, 3) == FAIL))
1933 return;
1934
1935 char_u *str = tv_get_string_chk(&argvars[0]);
1936 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
1937 if (str == NULL || idx < 0)
1938 return;
1939
1940 varnumber_T countcc = FALSE;
1941 varnumber_T charidx = FALSE;
1942 if (argvars[2].v_type != VAR_UNKNOWN)
1943 {
1944 countcc = tv_get_bool(&argvars[2]);
1945 if (argvars[3].v_type != VAR_UNKNOWN)
1946 charidx = tv_get_bool(&argvars[3]);
1947 }
1948
1949 int (*ptr2len)(char_u *);
1950 if (enc_utf8 && countcc)
1951 ptr2len = utf_ptr2len;
1952 else
1953 ptr2len = mb_ptr2len;
1954
1955 char_u *p;
1956 int len;
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01001957 int utf16idx = 0;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001958 for (p = str, len = 0; charidx ? idx >= 0 : p <= str + idx; len++)
1959 {
1960 if (*p == NUL)
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001961 {
1962 // If the index is exactly the number of bytes or characters in the
1963 // string then return the length of the string in utf-16 code
1964 // units.
1965 if (charidx ? (idx == 0) : (p == (str + idx)))
1966 rettv->vval.v_number = len;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001967 return;
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001968 }
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01001969 utf16idx = len;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001970 int clen = ptr2len(p);
1971 int c = (clen > 1) ? utf_ptr2char(p) : *p;
1972 if (c > 0xFFFF)
1973 len++;
1974 p += ptr2len(p);
1975 if (charidx)
1976 idx--;
1977 }
1978
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01001979 rettv->vval.v_number = utf16idx;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001980}
1981
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001982/*
1983 * "tolower(string)" function
1984 */
1985 void
1986f_tolower(typval_T *argvars, typval_T *rettv)
1987{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001988 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1989 return;
1990
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001991 rettv->v_type = VAR_STRING;
1992 rettv->vval.v_string = strlow_save(tv_get_string(&argvars[0]));
1993}
1994
1995/*
1996 * "toupper(string)" function
1997 */
1998 void
1999f_toupper(typval_T *argvars, typval_T *rettv)
2000{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002001 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
2002 return;
2003
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002004 rettv->v_type = VAR_STRING;
2005 rettv->vval.v_string = strup_save(tv_get_string(&argvars[0]));
2006}
2007
2008/*
2009 * "tr(string, fromstr, tostr)" function
2010 */
2011 void
2012f_tr(typval_T *argvars, typval_T *rettv)
2013{
2014 char_u *in_str;
2015 char_u *fromstr;
2016 char_u *tostr;
2017 char_u *p;
2018 int inlen;
2019 int fromlen;
2020 int tolen;
2021 int idx;
2022 char_u *cpstr;
2023 int cplen;
2024 int first = TRUE;
2025 char_u buf[NUMBUFLEN];
2026 char_u buf2[NUMBUFLEN];
2027 garray_T ga;
2028
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002029 if (in_vim9script()
2030 && (check_for_string_arg(argvars, 0) == FAIL
2031 || check_for_string_arg(argvars, 1) == FAIL
2032 || check_for_string_arg(argvars, 2) == FAIL))
2033 return;
2034
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002035 in_str = tv_get_string(&argvars[0]);
2036 fromstr = tv_get_string_buf_chk(&argvars[1], buf);
2037 tostr = tv_get_string_buf_chk(&argvars[2], buf2);
2038
2039 // Default return value: empty string.
2040 rettv->v_type = VAR_STRING;
2041 rettv->vval.v_string = NULL;
2042 if (fromstr == NULL || tostr == NULL)
2043 return; // type error; errmsg already given
Bram Moolenaar04935fb2022-01-08 16:19:22 +00002044 ga_init2(&ga, sizeof(char), 80);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002045
2046 if (!has_mbyte)
2047 // not multi-byte: fromstr and tostr must be the same length
2048 if (STRLEN(fromstr) != STRLEN(tostr))
2049 {
2050error:
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00002051 semsg(_(e_invalid_argument_str), fromstr);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002052 ga_clear(&ga);
2053 return;
2054 }
2055
2056 // fromstr and tostr have to contain the same number of chars
2057 while (*in_str != NUL)
2058 {
2059 if (has_mbyte)
2060 {
2061 inlen = (*mb_ptr2len)(in_str);
2062 cpstr = in_str;
2063 cplen = inlen;
2064 idx = 0;
2065 for (p = fromstr; *p != NUL; p += fromlen)
2066 {
2067 fromlen = (*mb_ptr2len)(p);
2068 if (fromlen == inlen && STRNCMP(in_str, p, inlen) == 0)
2069 {
2070 for (p = tostr; *p != NUL; p += tolen)
2071 {
2072 tolen = (*mb_ptr2len)(p);
2073 if (idx-- == 0)
2074 {
2075 cplen = tolen;
2076 cpstr = p;
2077 break;
2078 }
2079 }
2080 if (*p == NUL) // tostr is shorter than fromstr
2081 goto error;
2082 break;
2083 }
2084 ++idx;
2085 }
2086
2087 if (first && cpstr == in_str)
2088 {
2089 // Check that fromstr and tostr have the same number of
2090 // (multi-byte) characters. Done only once when a character
2091 // of in_str doesn't appear in fromstr.
2092 first = FALSE;
2093 for (p = tostr; *p != NUL; p += tolen)
2094 {
2095 tolen = (*mb_ptr2len)(p);
2096 --idx;
2097 }
2098 if (idx != 0)
2099 goto error;
2100 }
2101
2102 (void)ga_grow(&ga, cplen);
2103 mch_memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen);
2104 ga.ga_len += cplen;
2105
2106 in_str += inlen;
2107 }
2108 else
2109 {
2110 // When not using multi-byte chars we can do it faster.
2111 p = vim_strchr(fromstr, *in_str);
2112 if (p != NULL)
2113 ga_append(&ga, tostr[p - fromstr]);
2114 else
2115 ga_append(&ga, *in_str);
2116 ++in_str;
2117 }
2118 }
2119
2120 // add a terminating NUL
2121 (void)ga_grow(&ga, 1);
2122 ga_append(&ga, NUL);
2123
2124 rettv->vval.v_string = ga.ga_data;
2125}
2126
2127/*
2128 * "trim({expr})" function
2129 */
2130 void
2131f_trim(typval_T *argvars, typval_T *rettv)
2132{
2133 char_u buf1[NUMBUFLEN];
2134 char_u buf2[NUMBUFLEN];
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002135 char_u *head;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002136 char_u *mask = NULL;
2137 char_u *tail;
2138 char_u *prev;
2139 char_u *p;
2140 int c1;
2141 int dir = 0;
2142
2143 rettv->v_type = VAR_STRING;
2144 rettv->vval.v_string = NULL;
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002145
2146 if (in_vim9script()
2147 && (check_for_string_arg(argvars, 0) == FAIL
Illia Bobyr80799172023-10-17 18:00:50 +02002148 || check_for_opt_string_arg(argvars, 1) == FAIL
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002149 || (argvars[1].v_type != VAR_UNKNOWN
2150 && check_for_opt_number_arg(argvars, 2) == FAIL)))
2151 return;
2152
2153 head = tv_get_string_buf_chk(&argvars[0], buf1);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002154 if (head == NULL)
2155 return;
2156
Illia Bobyr80799172023-10-17 18:00:50 +02002157 if (check_for_opt_string_arg(argvars, 1) == FAIL)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002158 return;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002159
2160 if (argvars[1].v_type == VAR_STRING)
Illia Bobyr6e638672023-10-17 11:09:45 +02002161 {
Illia Bobyr80799172023-10-17 18:00:50 +02002162 mask = tv_get_string_buf_chk(&argvars[1], buf2);
2163 if (*mask == NUL)
2164 mask = NULL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002165
Illia Bobyr80799172023-10-17 18:00:50 +02002166 if (argvars[2].v_type != VAR_UNKNOWN)
Illia Bobyr6e638672023-10-17 11:09:45 +02002167 {
Illia Bobyr80799172023-10-17 18:00:50 +02002168 int error = 0;
2169
2170 // leading or trailing characters to trim
2171 dir = (int)tv_get_number_chk(&argvars[2], &error);
2172 if (error)
2173 return;
2174 if (dir < 0 || dir > 2)
2175 {
2176 semsg(_(e_invalid_argument_str), tv_get_string(&argvars[2]));
2177 return;
2178 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002179 }
2180 }
2181
2182 if (dir == 0 || dir == 1)
2183 {
2184 // Trim leading characters
2185 while (*head != NUL)
2186 {
2187 c1 = PTR2CHAR(head);
2188 if (mask == NULL)
2189 {
2190 if (c1 > ' ' && c1 != 0xa0)
2191 break;
2192 }
2193 else
2194 {
2195 for (p = mask; *p != NUL; MB_PTR_ADV(p))
2196 if (c1 == PTR2CHAR(p))
2197 break;
2198 if (*p == NUL)
2199 break;
2200 }
2201 MB_PTR_ADV(head);
2202 }
2203 }
2204
2205 tail = head + STRLEN(head);
2206 if (dir == 0 || dir == 2)
2207 {
2208 // Trim trailing characters
2209 for (; tail > head; tail = prev)
2210 {
2211 prev = tail;
2212 MB_PTR_BACK(head, prev);
2213 c1 = PTR2CHAR(prev);
2214 if (mask == NULL)
2215 {
2216 if (c1 > ' ' && c1 != 0xa0)
2217 break;
2218 }
2219 else
2220 {
2221 for (p = mask; *p != NUL; MB_PTR_ADV(p))
2222 if (c1 == PTR2CHAR(p))
2223 break;
2224 if (*p == NUL)
2225 break;
2226 }
2227 }
2228 }
2229 rettv->vval.v_string = vim_strnsave(head, tail - head);
2230}
2231
Bram Moolenaar677658a2022-01-05 16:09:06 +00002232static char *e_printf = N_(e_insufficient_arguments_for_printf);
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002233
2234/*
2235 * Get number argument from "idxp" entry in "tvs". First entry is 1.
2236 */
2237 static varnumber_T
2238tv_nr(typval_T *tvs, int *idxp)
2239{
2240 int idx = *idxp - 1;
2241 varnumber_T n = 0;
2242 int err = FALSE;
2243
2244 if (tvs[idx].v_type == VAR_UNKNOWN)
2245 emsg(_(e_printf));
2246 else
2247 {
2248 ++*idxp;
2249 n = tv_get_number_chk(&tvs[idx], &err);
2250 if (err)
2251 n = 0;
2252 }
2253 return n;
2254}
2255
2256/*
2257 * Get string argument from "idxp" entry in "tvs". First entry is 1.
2258 * If "tofree" is NULL tv_get_string_chk() is used. Some types (e.g. List)
2259 * are not converted to a string.
2260 * If "tofree" is not NULL echo_string() is used. All types are converted to
2261 * a string with the same format as ":echo". The caller must free "*tofree".
2262 * Returns NULL for an error.
2263 */
2264 static char *
2265tv_str(typval_T *tvs, int *idxp, char_u **tofree)
2266{
2267 int idx = *idxp - 1;
2268 char *s = NULL;
2269 static char_u numbuf[NUMBUFLEN];
2270
2271 if (tvs[idx].v_type == VAR_UNKNOWN)
2272 emsg(_(e_printf));
2273 else
2274 {
2275 ++*idxp;
2276 if (tofree != NULL)
2277 s = (char *)echo_string(&tvs[idx], tofree, numbuf, get_copyID());
2278 else
2279 s = (char *)tv_get_string_chk(&tvs[idx]);
2280 }
2281 return s;
2282}
2283
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002284/*
2285 * Get float argument from "idxp" entry in "tvs". First entry is 1.
2286 */
2287 static double
2288tv_float(typval_T *tvs, int *idxp)
2289{
2290 int idx = *idxp - 1;
2291 double f = 0;
2292
2293 if (tvs[idx].v_type == VAR_UNKNOWN)
2294 emsg(_(e_printf));
2295 else
2296 {
2297 ++*idxp;
2298 if (tvs[idx].v_type == VAR_FLOAT)
2299 f = tvs[idx].vval.v_float;
2300 else if (tvs[idx].v_type == VAR_NUMBER)
2301 f = (double)tvs[idx].vval.v_number;
2302 else
Bram Moolenaar9d00e4a2022-01-05 17:49:15 +00002303 emsg(_(e_expected_float_argument_for_printf));
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002304 }
2305 return f;
2306}
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00002307
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002308#endif
2309
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002310/*
2311 * Return the representation of infinity for printf() function:
2312 * "-inf", "inf", "+inf", " inf", "-INF", "INF", "+INF" or " INF".
2313 */
2314 static const char *
2315infinity_str(int positive,
2316 char fmt_spec,
2317 int force_sign,
2318 int space_for_positive)
2319{
2320 static const char *table[] =
2321 {
2322 "-inf", "inf", "+inf", " inf",
2323 "-INF", "INF", "+INF", " INF"
2324 };
2325 int idx = positive * (1 + force_sign + force_sign * space_for_positive);
2326
2327 if (ASCII_ISUPPER(fmt_spec))
2328 idx += 4;
2329 return table[idx];
2330}
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002331
2332/*
2333 * This code was included to provide a portable vsnprintf() and snprintf().
2334 * Some systems may provide their own, but we always use this one for
2335 * consistency.
2336 *
2337 * This code is based on snprintf.c - a portable implementation of snprintf
2338 * by Mark Martinec <mark.martinec@ijs.si>, Version 2.2, 2000-10-06.
2339 * Included with permission. It was heavily modified to fit in Vim.
2340 * The original code, including useful comments, can be found here:
2341 * http://www.ijs.si/software/snprintf/
2342 *
2343 * This snprintf() only supports the following conversion specifiers:
2344 * s, c, d, u, o, x, X, p (and synonyms: i, D, U, O - see below)
2345 * with flags: '-', '+', ' ', '0' and '#'.
2346 * An asterisk is supported for field width as well as precision.
2347 *
2348 * Limited support for floating point was added: 'f', 'F', 'e', 'E', 'g', 'G'.
2349 *
2350 * Length modifiers 'h' (short int) and 'l' (long int) and 'll' (long long int)
2351 * are supported. NOTE: for 'll' the argument is varnumber_T or uvarnumber_T.
2352 *
2353 * The locale is not used, the string is used as a byte string. This is only
2354 * relevant for double-byte encodings where the second byte may be '%'.
2355 *
2356 * It is permitted for "str_m" to be zero, and it is permitted to specify NULL
2357 * pointer for resulting string argument if "str_m" is zero (as per ISO C99).
2358 *
2359 * The return value is the number of characters which would be generated
2360 * for the given input, excluding the trailing NUL. If this value
2361 * is greater or equal to "str_m", not all characters from the result
2362 * have been stored in str, output bytes beyond the ("str_m"-1) -th character
2363 * are discarded. If "str_m" is greater than zero it is guaranteed
2364 * the resulting string will be NUL-terminated.
2365 */
2366
2367/*
2368 * When va_list is not supported we only define vim_snprintf().
2369 *
2370 * vim_vsnprintf_typval() can be invoked with either "va_list" or a list of
2371 * "typval_T". When the latter is not used it must be NULL.
2372 */
2373
2374// When generating prototypes all of this is skipped, cproto doesn't
2375// understand this.
2376#ifndef PROTO
2377
2378// Like vim_vsnprintf() but append to the string.
2379 int
2380vim_snprintf_add(char *str, size_t str_m, const char *fmt, ...)
2381{
2382 va_list ap;
2383 int str_l;
2384 size_t len = STRLEN(str);
2385 size_t space;
2386
2387 if (str_m <= len)
2388 space = 0;
2389 else
2390 space = str_m - len;
2391 va_start(ap, fmt);
2392 str_l = vim_vsnprintf(str + len, space, fmt, ap);
2393 va_end(ap);
2394 return str_l;
2395}
2396
2397 int
2398vim_snprintf(char *str, size_t str_m, const char *fmt, ...)
2399{
2400 va_list ap;
2401 int str_l;
2402
2403 va_start(ap, fmt);
2404 str_l = vim_vsnprintf(str, str_m, fmt, ap);
2405 va_end(ap);
2406 return str_l;
2407}
2408
2409 int
2410vim_vsnprintf(
2411 char *str,
2412 size_t str_m,
2413 const char *fmt,
2414 va_list ap)
2415{
2416 return vim_vsnprintf_typval(str, str_m, fmt, ap, NULL);
2417}
2418
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002419enum
2420{
2421 TYPE_UNKNOWN = -1,
2422 TYPE_INT,
2423 TYPE_LONGINT,
2424 TYPE_LONGLONGINT,
2425 TYPE_UNSIGNEDINT,
2426 TYPE_UNSIGNEDLONGINT,
2427 TYPE_UNSIGNEDLONGLONGINT,
2428 TYPE_POINTER,
2429 TYPE_PERCENT,
2430 TYPE_CHAR,
2431 TYPE_STRING,
2432 TYPE_FLOAT
2433};
2434
2435/* Types that can be used in a format string
2436 */
zeertzjq7772c932023-08-15 22:48:40 +02002437 static int
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002438format_typeof(
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002439 const char *type)
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002440{
2441 // allowed values: \0, h, l, L
2442 char length_modifier = '\0';
2443
2444 // current conversion specifier character
2445 char fmt_spec = '\0';
2446
2447 // parse 'h', 'l' and 'll' length modifiers
2448 if (*type == 'h' || *type == 'l')
2449 {
2450 length_modifier = *type;
2451 type++;
2452 if (length_modifier == 'l' && *type == 'l')
2453 {
2454 // double l = __int64 / varnumber_T
2455 length_modifier = 'L';
2456 type++;
2457 }
2458 }
2459 fmt_spec = *type;
2460
2461 // common synonyms:
2462 switch (fmt_spec)
2463 {
2464 case 'i': fmt_spec = 'd'; break;
2465 case '*': fmt_spec = 'd'; length_modifier = 'h'; break;
2466 case 'D': fmt_spec = 'd'; length_modifier = 'l'; break;
2467 case 'U': fmt_spec = 'u'; length_modifier = 'l'; break;
2468 case 'O': fmt_spec = 'o'; length_modifier = 'l'; break;
2469 default: break;
2470 }
2471
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002472 // get parameter value, do initial processing
2473 switch (fmt_spec)
2474 {
2475 // '%' and 'c' behave similar to 's' regarding flags and field
2476 // widths
2477 case '%':
2478 return TYPE_PERCENT;
2479
2480 case 'c':
2481 return TYPE_CHAR;
2482
2483 case 's':
2484 case 'S':
2485 return TYPE_STRING;
2486
2487 case 'd': case 'u':
2488 case 'b': case 'B':
2489 case 'o':
2490 case 'x': case 'X':
2491 case 'p':
2492 {
2493 // NOTE: the u, b, o, x, X and p conversion specifiers
2494 // imply the value is unsigned; d implies a signed
2495 // value
2496
2497 // 0 if numeric argument is zero (or if pointer is
2498 // NULL for 'p'), +1 if greater than zero (or nonzero
2499 // for unsigned arguments), -1 if negative (unsigned
2500 // argument is never negative)
2501
2502 if (fmt_spec == 'p')
2503 return TYPE_POINTER;
2504 else if (fmt_spec == 'b' || fmt_spec == 'B')
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002505 return TYPE_UNSIGNEDLONGLONGINT;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002506 else if (fmt_spec == 'd')
2507 {
2508 // signed
2509 switch (length_modifier)
2510 {
2511 case '\0':
2512 case 'h':
2513 // char and short arguments are passed as int.
2514 return TYPE_INT;
2515 case 'l':
2516 return TYPE_LONGINT;
2517 case 'L':
2518 return TYPE_LONGLONGINT;
2519 }
2520 }
2521 else
2522 {
2523 // unsigned
2524 switch (length_modifier)
2525 {
2526 case '\0':
2527 case 'h':
2528 return TYPE_UNSIGNEDINT;
2529 case 'l':
2530 return TYPE_UNSIGNEDLONGINT;
2531 case 'L':
2532 return TYPE_UNSIGNEDLONGLONGINT;
2533 }
2534 }
2535 }
2536 break;
2537
2538 case 'f':
2539 case 'F':
2540 case 'e':
2541 case 'E':
2542 case 'g':
2543 case 'G':
2544 return TYPE_FLOAT;
2545 }
2546
2547 return TYPE_UNKNOWN;
2548}
2549
zeertzjq7772c932023-08-15 22:48:40 +02002550 static char *
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002551format_typename(
2552 const char *type)
2553{
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002554 switch (format_typeof(type))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002555 {
2556 case TYPE_INT:
2557 return _(typename_int);
2558
2559 case TYPE_LONGINT:
2560 return _(typename_longint);
2561
2562 case TYPE_LONGLONGINT:
2563 return _(typename_longlongint);
2564
2565 case TYPE_UNSIGNEDINT:
2566 return _(typename_unsignedint);
2567
2568 case TYPE_UNSIGNEDLONGINT:
2569 return _(typename_unsignedlongint);
2570
2571 case TYPE_UNSIGNEDLONGLONGINT:
2572 return _(typename_unsignedlonglongint);
2573
2574 case TYPE_POINTER:
2575 return _(typename_pointer);
2576
2577 case TYPE_PERCENT:
2578 return _(typename_percent);
2579
2580 case TYPE_CHAR:
2581 return _(typename_char);
2582
2583 case TYPE_STRING:
2584 return _(typename_string);
2585
2586 case TYPE_FLOAT:
2587 return _(typename_float);
2588 }
2589
2590 return _(typename_unknown);
2591}
2592
zeertzjq7772c932023-08-15 22:48:40 +02002593 static int
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002594adjust_types(
2595 const char ***ap_types,
2596 int arg,
2597 int *num_posarg,
2598 const char *type)
2599{
2600 if (*ap_types == NULL || *num_posarg < arg)
2601 {
2602 int idx;
2603 const char **new_types;
2604
2605 if (*ap_types == NULL)
2606 new_types = ALLOC_CLEAR_MULT(const char *, arg);
2607 else
K.Takata4c215ec2023-08-26 18:05:08 +02002608 new_types = vim_realloc((char **)*ap_types,
2609 arg * sizeof(const char *));
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002610
2611 if (new_types == NULL)
2612 return FAIL;
2613
2614 for (idx = *num_posarg; idx < arg; ++idx)
2615 new_types[idx] = NULL;
2616
2617 *ap_types = new_types;
2618 *num_posarg = arg;
2619 }
2620
2621 if ((*ap_types)[arg - 1] != NULL)
2622 {
2623 if ((*ap_types)[arg - 1][0] == '*' || type[0] == '*')
2624 {
2625 const char *pt = type;
2626 if (pt[0] == '*')
2627 pt = (*ap_types)[arg - 1];
2628
2629 if (pt[0] != '*')
2630 {
2631 switch (pt[0])
2632 {
2633 case 'd': case 'i': break;
2634 default:
2635 semsg(_(e_positional_num_field_spec_reused_str_str), arg, format_typename((*ap_types)[arg - 1]), format_typename(type));
2636 return FAIL;
2637 }
2638 }
2639 }
2640 else
2641 {
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002642 if (format_typeof(type) != format_typeof((*ap_types)[arg - 1]))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002643 {
2644 semsg(_( e_positional_arg_num_type_inconsistent_str_str), arg, format_typename(type), format_typename((*ap_types)[arg - 1]));
2645 return FAIL;
2646 }
2647 }
2648 }
2649
2650 (*ap_types)[arg - 1] = type;
2651
2652 return OK;
2653}
2654
Christ van Willegenc35fc032024-03-14 18:30:41 +01002655 static void
2656format_overflow_error(const char *pstart)
2657{
2658 size_t arglen = 0;
2659 char *argcopy = NULL;
2660 const char *p = pstart;
2661
2662 while (VIM_ISDIGIT((int)(*p)))
2663 ++p;
2664
2665 arglen = p - pstart;
2666 argcopy = ALLOC_CLEAR_MULT(char, arglen + 1);
2667 if (argcopy != NULL)
2668 {
2669 strncpy(argcopy, pstart, arglen);
2670 semsg(_( e_val_too_large), argcopy);
2671 free(argcopy);
2672 }
2673 else
2674 semsg(_(e_out_of_memory_allocating_nr_bytes), arglen);
2675}
2676
2677#define MAX_ALLOWED_STRING_WIDTH 6400
2678
2679 static int
2680get_unsigned_int(
2681 const char *pstart,
2682 const char **p,
zeertzjq0dff3152024-07-29 20:28:14 +02002683 unsigned int *uj,
2684 int overflow_err)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002685{
2686 *uj = **p - '0';
2687 ++*p;
2688
2689 while (VIM_ISDIGIT((int)(**p)) && *uj < MAX_ALLOWED_STRING_WIDTH)
2690 {
2691 *uj = 10 * *uj + (unsigned int)(**p - '0');
2692 ++*p;
2693 }
2694
2695 if (*uj > MAX_ALLOWED_STRING_WIDTH)
2696 {
zeertzjq0dff3152024-07-29 20:28:14 +02002697 if (overflow_err)
2698 {
2699 format_overflow_error(pstart);
2700 return FAIL;
2701 }
2702 else
2703 *uj = MAX_ALLOWED_STRING_WIDTH;
Christ van Willegenc35fc032024-03-14 18:30:41 +01002704 }
2705
2706 return OK;
2707}
2708
2709
zeertzjq7772c932023-08-15 22:48:40 +02002710 static int
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002711parse_fmt_types(
2712 const char ***ap_types,
2713 int *num_posarg,
2714 const char *fmt,
2715 typval_T *tvs UNUSED
2716 )
2717{
2718 const char *p = fmt;
2719 const char *arg = NULL;
2720
2721 int any_pos = 0;
2722 int any_arg = 0;
2723 int arg_idx;
2724
2725#define CHECK_POS_ARG do { \
2726 if (any_pos && any_arg) \
2727 { \
2728 semsg(_( e_cannot_mix_positional_and_non_positional_str), fmt); \
2729 goto error; \
2730 } \
2731} while (0);
2732
2733 if (p == NULL)
2734 return OK;
2735
2736 while (*p != NUL)
2737 {
2738 if (*p != '%')
2739 {
2740 char *q = strchr(p + 1, '%');
2741 size_t n = (q == NULL) ? STRLEN(p) : (size_t)(q - p);
2742
2743 p += n;
2744 }
2745 else
2746 {
2747 // allowed values: \0, h, l, L
2748 char length_modifier = '\0';
2749
2750 // variable for positional arg
2751 int pos_arg = -1;
2752 const char *ptype = NULL;
Christ van Willegenc35fc032024-03-14 18:30:41 +01002753 const char *pstart = p+1;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002754
2755 p++; // skip '%'
2756
2757 // First check to see if we find a positional
2758 // argument specifier
2759 ptype = p;
2760
2761 while (VIM_ISDIGIT(*ptype))
2762 ++ptype;
2763
2764 if (*ptype == '$')
2765 {
2766 if (*p == '0')
2767 {
2768 // 0 flag at the wrong place
2769 semsg(_( e_invalid_format_specifier_str), fmt);
2770 goto error;
2771 }
2772
2773 // Positional argument
Christ van Willegenc35fc032024-03-14 18:30:41 +01002774 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002775
zeertzjq0dff3152024-07-29 20:28:14 +02002776 if (get_unsigned_int(pstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002777 goto error;
2778
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002779 pos_arg = uj;
2780
2781 any_pos = 1;
2782 CHECK_POS_ARG;
2783
2784 ++p;
2785 }
2786
2787 // parse flags
2788 while (*p == '0' || *p == '-' || *p == '+' || *p == ' '
2789 || *p == '#' || *p == '\'')
2790 {
2791 switch (*p)
2792 {
2793 case '0': break;
2794 case '-': break;
2795 case '+': break;
2796 case ' ': // If both the ' ' and '+' flags appear, the ' '
2797 // flag should be ignored
2798 break;
2799 case '#': break;
2800 case '\'': break;
2801 }
2802 p++;
2803 }
2804 // If the '0' and '-' flags both appear, the '0' flag should be
2805 // ignored.
2806
2807 // parse field width
2808 if (*(arg = p) == '*')
2809 {
2810 p++;
2811
2812 if (VIM_ISDIGIT((int)(*p)))
2813 {
2814 // Positional argument field width
Christ van Willegenc35fc032024-03-14 18:30:41 +01002815 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002816
zeertzjq0dff3152024-07-29 20:28:14 +02002817 if (get_unsigned_int(arg + 1, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002818 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002819
2820 if (*p != '$')
2821 {
2822 semsg(_( e_invalid_format_specifier_str), fmt);
2823 goto error;
2824 }
2825 else
2826 {
2827 ++p;
2828 any_pos = 1;
2829 CHECK_POS_ARG;
2830
2831 if (adjust_types(ap_types, uj, num_posarg, arg) == FAIL)
2832 goto error;
2833 }
2834 }
2835 else
2836 {
2837 any_arg = 1;
2838 CHECK_POS_ARG;
2839 }
2840 }
dundargoc580c1fc2023-10-06 19:41:14 +02002841 else if (VIM_ISDIGIT((int)(*p)))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002842 {
2843 // size_t could be wider than unsigned int; make sure we treat
2844 // argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01002845 const char *digstart = p;
2846 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002847
zeertzjq0dff3152024-07-29 20:28:14 +02002848 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002849 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002850
2851 if (*p == '$')
2852 {
2853 semsg(_( e_invalid_format_specifier_str), fmt);
2854 goto error;
2855 }
2856 }
2857
2858 // parse precision
2859 if (*p == '.')
2860 {
2861 p++;
2862
2863 if (*(arg = p) == '*')
2864 {
2865 p++;
2866
2867 if (VIM_ISDIGIT((int)(*p)))
2868 {
2869 // Parse precision
Christ van Willegenc35fc032024-03-14 18:30:41 +01002870 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002871
zeertzjq0dff3152024-07-29 20:28:14 +02002872 if (get_unsigned_int(arg + 1, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002873 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002874
2875 if (*p == '$')
2876 {
2877 any_pos = 1;
2878 CHECK_POS_ARG;
2879
2880 ++p;
2881
2882 if (adjust_types(ap_types, uj, num_posarg, arg) == FAIL)
2883 goto error;
2884 }
2885 else
2886 {
2887 semsg(_( e_invalid_format_specifier_str), fmt);
2888 goto error;
2889 }
2890 }
2891 else
2892 {
2893 any_arg = 1;
2894 CHECK_POS_ARG;
2895 }
2896 }
dundargoc580c1fc2023-10-06 19:41:14 +02002897 else if (VIM_ISDIGIT((int)(*p)))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002898 {
2899 // size_t could be wider than unsigned int; make sure we
2900 // treat argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01002901 const char *digstart = p;
2902 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002903
zeertzjq0dff3152024-07-29 20:28:14 +02002904 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002905 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002906
2907 if (*p == '$')
2908 {
2909 semsg(_( e_invalid_format_specifier_str), fmt);
2910 goto error;
2911 }
2912 }
2913 }
2914
2915 if (pos_arg != -1)
2916 {
2917 any_pos = 1;
2918 CHECK_POS_ARG;
2919
2920 ptype = p;
2921 }
2922
2923 // parse 'h', 'l' and 'll' length modifiers
2924 if (*p == 'h' || *p == 'l')
2925 {
2926 length_modifier = *p;
2927 p++;
2928 if (length_modifier == 'l' && *p == 'l')
2929 {
2930 // double l = __int64 / varnumber_T
dundargoc580c1fc2023-10-06 19:41:14 +02002931 // length_modifier = 'L';
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002932 p++;
2933 }
2934 }
2935
2936 switch (*p)
2937 {
2938 // Check for known format specifiers. % is special!
2939 case 'i':
2940 case '*':
2941 case 'd':
2942 case 'u':
2943 case 'o':
2944 case 'D':
2945 case 'U':
2946 case 'O':
2947 case 'x':
2948 case 'X':
2949 case 'b':
2950 case 'B':
2951 case 'c':
2952 case 's':
2953 case 'S':
2954 case 'p':
2955 case 'f':
2956 case 'F':
2957 case 'e':
2958 case 'E':
2959 case 'g':
2960 case 'G':
2961 if (pos_arg != -1)
2962 {
2963 if (adjust_types(ap_types, pos_arg, num_posarg, ptype) == FAIL)
2964 goto error;
2965 }
2966 else
2967 {
2968 any_arg = 1;
2969 CHECK_POS_ARG;
2970 }
2971 break;
2972
2973 default:
2974 if (pos_arg != -1)
2975 {
2976 semsg(_( e_cannot_mix_positional_and_non_positional_str), fmt);
2977 goto error;
2978 }
2979 }
2980
2981 if (*p != NUL)
2982 p++; // step over the just processed conversion specifier
2983 }
2984 }
2985
2986 for (arg_idx = 0; arg_idx < *num_posarg; ++arg_idx)
2987 {
2988 if ((*ap_types)[arg_idx] == NULL)
2989 {
2990 semsg(_(e_fmt_arg_nr_unused_str), arg_idx + 1, fmt);
2991 goto error;
2992 }
2993
2994# if defined(FEAT_EVAL)
2995 if (tvs != NULL && tvs[arg_idx].v_type == VAR_UNKNOWN)
2996 {
2997 semsg(_(e_positional_nr_out_of_bounds_str), arg_idx + 1, fmt);
2998 goto error;
2999 }
3000# endif
3001 }
3002
3003 return OK;
3004
3005error:
K.Takata4c215ec2023-08-26 18:05:08 +02003006 vim_free((char**)*ap_types);
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003007 *ap_types = NULL;
3008 *num_posarg = 0;
3009 return FAIL;
3010}
3011
zeertzjq7772c932023-08-15 22:48:40 +02003012 static void
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003013skip_to_arg(
3014 const char **ap_types,
3015 va_list ap_start,
3016 va_list *ap,
3017 int *arg_idx,
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003018 int *arg_cur,
3019 const char *fmt)
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003020{
3021 int arg_min = 0;
3022
3023 if (*arg_cur + 1 == *arg_idx)
3024 {
3025 ++*arg_cur;
3026 ++*arg_idx;
3027 return;
3028 }
3029
3030 if (*arg_cur >= *arg_idx)
3031 {
3032 // Reset ap to ap_start and skip arg_idx - 1 types
3033 va_end(*ap);
3034 va_copy(*ap, ap_start);
3035 }
3036 else
3037 {
3038 // Skip over any we should skip
3039 arg_min = *arg_cur;
3040 }
3041
3042 for (*arg_cur = arg_min; *arg_cur < *arg_idx - 1; ++*arg_cur)
3043 {
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003044 const char *p;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003045
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003046 if (ap_types == NULL || ap_types[*arg_cur] == NULL)
3047 {
Christ van Willegen1bd2cb12023-09-08 19:17:09 +02003048 siemsg(e_aptypes_is_null_nr_str, *arg_cur, fmt);
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003049 return;
3050 }
3051
3052 p = ap_types[*arg_cur];
3053
3054 int fmt_type = format_typeof(p);
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003055
3056 // get parameter value, do initial processing
3057 switch (fmt_type)
3058 {
3059 case TYPE_PERCENT:
3060 case TYPE_UNKNOWN:
3061 break;
3062
3063 case TYPE_CHAR:
3064 va_arg(*ap, int);
3065 break;
3066
3067 case TYPE_STRING:
3068 va_arg(*ap, char *);
3069 break;
3070
3071 case TYPE_POINTER:
3072 va_arg(*ap, void *);
3073 break;
3074
3075 case TYPE_INT:
3076 va_arg(*ap, int);
3077 break;
3078
3079 case TYPE_LONGINT:
3080 va_arg(*ap, long int);
3081 break;
3082
3083 case TYPE_LONGLONGINT:
3084 va_arg(*ap, varnumber_T);
3085 break;
3086
3087 case TYPE_UNSIGNEDINT:
3088 va_arg(*ap, unsigned int);
3089 break;
3090
3091 case TYPE_UNSIGNEDLONGINT:
3092 va_arg(*ap, unsigned long int);
3093 break;
3094
3095 case TYPE_UNSIGNEDLONGLONGINT:
3096 va_arg(*ap, uvarnumber_T);
3097 break;
3098
3099 case TYPE_FLOAT:
3100 va_arg(*ap, double);
3101 break;
3102 }
3103 }
3104
3105 // Because we know that after we return from this call,
3106 // a va_arg() call is made, we can pre-emptively
3107 // increment the current argument index.
3108 ++*arg_cur;
3109 ++*arg_idx;
3110
3111 return;
3112}
3113
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003114 int
3115vim_vsnprintf_typval(
3116 char *str,
3117 size_t str_m,
3118 const char *fmt,
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003119 va_list ap_start,
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003120 typval_T *tvs)
3121{
3122 size_t str_l = 0;
3123 const char *p = fmt;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003124 int arg_cur = 0;
3125 int num_posarg = 0;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003126 int arg_idx = 1;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003127 va_list ap;
3128 const char **ap_types = NULL;
3129
3130 if (parse_fmt_types(&ap_types, &num_posarg, fmt, tvs) == FAIL)
3131 return 0;
3132
3133 va_copy(ap, ap_start);
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003134
3135 if (p == NULL)
3136 p = "";
3137 while (*p != NUL)
3138 {
3139 if (*p != '%')
3140 {
3141 char *q = strchr(p + 1, '%');
3142 size_t n = (q == NULL) ? STRLEN(p) : (size_t)(q - p);
3143
3144 // Copy up to the next '%' or NUL without any changes.
3145 if (str_l < str_m)
3146 {
3147 size_t avail = str_m - str_l;
3148
3149 mch_memmove(str + str_l, p, n > avail ? avail : n);
3150 }
3151 p += n;
3152 str_l += n;
3153 }
3154 else
3155 {
3156 size_t min_field_width = 0, precision = 0;
3157 int zero_padding = 0, precision_specified = 0, justify_left = 0;
3158 int alternate_form = 0, force_sign = 0;
3159
3160 // If both the ' ' and '+' flags appear, the ' ' flag should be
3161 // ignored.
3162 int space_for_positive = 1;
3163
3164 // allowed values: \0, h, l, L
3165 char length_modifier = '\0';
3166
3167 // temporary buffer for simple numeric->string conversion
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003168# define TMP_LEN 350 // On my system 1e308 is the biggest number possible.
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003169 // That sounds reasonable to use as the maximum
3170 // printable.
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003171 char tmp[TMP_LEN];
3172
3173 // string address in case of string argument
3174 const char *str_arg = NULL;
3175
3176 // natural field width of arg without padding and sign
3177 size_t str_arg_l;
3178
3179 // unsigned char argument value - only defined for c conversion.
3180 // N.B. standard explicitly states the char argument for the c
3181 // conversion is unsigned
3182 unsigned char uchar_arg;
3183
3184 // number of zeros to be inserted for numeric conversions as
3185 // required by the precision or minimal field width
3186 size_t number_of_zeros_to_pad = 0;
3187
3188 // index into tmp where zero padding is to be inserted
3189 size_t zero_padding_insertion_ind = 0;
3190
3191 // current conversion specifier character
3192 char fmt_spec = '\0';
3193
3194 // buffer for 's' and 'S' specs
3195 char_u *tofree = NULL;
3196
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003197 // variables for positional arg
3198 int pos_arg = -1;
3199 const char *ptype;
3200
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003201
3202 p++; // skip '%'
3203
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003204 // First check to see if we find a positional
3205 // argument specifier
3206 ptype = p;
3207
3208 while (VIM_ISDIGIT(*ptype))
3209 ++ptype;
3210
3211 if (*ptype == '$')
3212 {
3213 // Positional argument
Christ van Willegenc35fc032024-03-14 18:30:41 +01003214 const char *digstart = p;
3215 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003216
zeertzjq0dff3152024-07-29 20:28:14 +02003217 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003218 goto error;
3219
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003220 pos_arg = uj;
3221
3222 ++p;
3223 }
3224
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003225 // parse flags
3226 while (*p == '0' || *p == '-' || *p == '+' || *p == ' '
3227 || *p == '#' || *p == '\'')
3228 {
3229 switch (*p)
3230 {
3231 case '0': zero_padding = 1; break;
3232 case '-': justify_left = 1; break;
3233 case '+': force_sign = 1; space_for_positive = 0; break;
3234 case ' ': force_sign = 1;
3235 // If both the ' ' and '+' flags appear, the ' '
3236 // flag should be ignored
3237 break;
3238 case '#': alternate_form = 1; break;
3239 case '\'': break;
3240 }
3241 p++;
3242 }
3243 // If the '0' and '-' flags both appear, the '0' flag should be
3244 // ignored.
3245
3246 // parse field width
3247 if (*p == '*')
3248 {
3249 int j;
Christ van Willegenc35fc032024-03-14 18:30:41 +01003250 const char *digstart = p + 1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003251
3252 p++;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003253
3254 if (VIM_ISDIGIT((int)(*p)))
3255 {
3256 // Positional argument field width
Christ van Willegenc35fc032024-03-14 18:30:41 +01003257 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003258
zeertzjq0dff3152024-07-29 20:28:14 +02003259 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003260 goto error;
3261
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003262 arg_idx = uj;
3263
3264 ++p;
3265 }
3266
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003267 j =
3268# if defined(FEAT_EVAL)
3269 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3270# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003271 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3272 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003273 va_arg(ap, int));
3274
Christ van Willegenc35fc032024-03-14 18:30:41 +01003275 if (j > MAX_ALLOWED_STRING_WIDTH)
3276 {
zeertzjq0dff3152024-07-29 20:28:14 +02003277 if (tvs != NULL)
3278 {
3279 format_overflow_error(digstart);
3280 goto error;
3281 }
3282 else
3283 j = MAX_ALLOWED_STRING_WIDTH;
Christ van Willegenc35fc032024-03-14 18:30:41 +01003284 }
3285
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003286 if (j >= 0)
3287 min_field_width = j;
3288 else
3289 {
3290 min_field_width = -j;
3291 justify_left = 1;
3292 }
3293 }
3294 else if (VIM_ISDIGIT((int)(*p)))
3295 {
3296 // size_t could be wider than unsigned int; make sure we treat
3297 // argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01003298 const char *digstart = p;
3299 unsigned int uj;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003300
zeertzjq0dff3152024-07-29 20:28:14 +02003301 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003302 goto error;
3303
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003304 min_field_width = uj;
3305 }
3306
3307 // parse precision
3308 if (*p == '.')
3309 {
3310 p++;
3311 precision_specified = 1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003312
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003313 if (VIM_ISDIGIT((int)(*p)))
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003314 {
3315 // size_t could be wider than unsigned int; make sure we
3316 // treat argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01003317 const char *digstart = p;
3318 unsigned int uj;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003319
zeertzjq0dff3152024-07-29 20:28:14 +02003320 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003321 goto error;
3322
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003323 precision = uj;
3324 }
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003325 else if (*p == '*')
3326 {
3327 int j;
Christ van Willegenc35fc032024-03-14 18:30:41 +01003328 const char *digstart = p;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003329
3330 p++;
3331
3332 if (VIM_ISDIGIT((int)(*p)))
3333 {
3334 // positional argument
Christ van Willegenc35fc032024-03-14 18:30:41 +01003335 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003336
zeertzjq0dff3152024-07-29 20:28:14 +02003337 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003338 goto error;
3339
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003340 arg_idx = uj;
3341
3342 ++p;
3343 }
3344
3345 j =
3346# if defined(FEAT_EVAL)
3347 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3348# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003349 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3350 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003351 va_arg(ap, int));
3352
Christ van Willegenc35fc032024-03-14 18:30:41 +01003353 if (j > MAX_ALLOWED_STRING_WIDTH)
3354 {
zeertzjq0dff3152024-07-29 20:28:14 +02003355 if (tvs != NULL)
3356 {
3357 format_overflow_error(digstart);
3358 goto error;
3359 }
3360 else
3361 j = MAX_ALLOWED_STRING_WIDTH;
Christ van Willegenc35fc032024-03-14 18:30:41 +01003362 }
3363
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003364 if (j >= 0)
3365 precision = j;
3366 else
3367 {
3368 precision_specified = 0;
3369 precision = 0;
3370 }
3371 }
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003372 }
3373
3374 // parse 'h', 'l' and 'll' length modifiers
3375 if (*p == 'h' || *p == 'l')
3376 {
3377 length_modifier = *p;
3378 p++;
3379 if (length_modifier == 'l' && *p == 'l')
3380 {
3381 // double l = __int64 / varnumber_T
3382 length_modifier = 'L';
3383 p++;
3384 }
3385 }
3386 fmt_spec = *p;
3387
3388 // common synonyms:
3389 switch (fmt_spec)
3390 {
3391 case 'i': fmt_spec = 'd'; break;
3392 case 'D': fmt_spec = 'd'; length_modifier = 'l'; break;
3393 case 'U': fmt_spec = 'u'; length_modifier = 'l'; break;
3394 case 'O': fmt_spec = 'o'; length_modifier = 'l'; break;
3395 default: break;
3396 }
3397
3398# if defined(FEAT_EVAL)
3399 switch (fmt_spec)
3400 {
3401 case 'd': case 'u': case 'o': case 'x': case 'X':
3402 if (tvs != NULL && length_modifier == '\0')
3403 length_modifier = 'L';
3404 }
3405# endif
3406
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003407 if (pos_arg != -1)
3408 arg_idx = pos_arg;
3409
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003410 // get parameter value, do initial processing
3411 switch (fmt_spec)
3412 {
3413 // '%' and 'c' behave similar to 's' regarding flags and field
3414 // widths
3415 case '%':
3416 case 'c':
3417 case 's':
3418 case 'S':
3419 str_arg_l = 1;
3420 switch (fmt_spec)
3421 {
3422 case '%':
3423 str_arg = p;
3424 break;
3425
3426 case 'c':
3427 {
3428 int j;
3429
3430 j =
3431# if defined(FEAT_EVAL)
3432 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3433# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003434 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3435 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003436 va_arg(ap, int));
3437
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003438 // standard demands unsigned char
3439 uchar_arg = (unsigned char)j;
3440 str_arg = (char *)&uchar_arg;
3441 break;
3442 }
3443
3444 case 's':
3445 case 'S':
3446 str_arg =
3447# if defined(FEAT_EVAL)
3448 tvs != NULL ? tv_str(tvs, &arg_idx, &tofree) :
3449# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003450 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3451 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003452 va_arg(ap, char *));
3453
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003454 if (str_arg == NULL)
3455 {
3456 str_arg = "[NULL]";
3457 str_arg_l = 6;
3458 }
3459 // make sure not to address string beyond the specified
3460 // precision !!!
3461 else if (!precision_specified)
3462 str_arg_l = strlen(str_arg);
3463 // truncate string if necessary as requested by precision
3464 else if (precision == 0)
3465 str_arg_l = 0;
3466 else
3467 {
3468 // Don't put the #if inside memchr(), it can be a
3469 // macro.
3470 // memchr on HP does not like n > 2^31 !!!
3471 char *q = memchr(str_arg, '\0',
3472 precision <= (size_t)0x7fffffffL ? precision
3473 : (size_t)0x7fffffffL);
presukud85fccd2021-11-20 19:38:31 +00003474
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003475 str_arg_l = (q == NULL) ? precision
3476 : (size_t)(q - str_arg);
3477 }
3478 if (fmt_spec == 'S')
3479 {
presuku1f2453f2021-11-24 15:32:57 +00003480 char_u *p1;
3481 size_t i;
3482 int cell;
presukud85fccd2021-11-20 19:38:31 +00003483
presuku1f2453f2021-11-24 15:32:57 +00003484 for (i = 0, p1 = (char_u *)str_arg; *p1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003485 p1 += mb_ptr2len(p1))
presuku1f2453f2021-11-24 15:32:57 +00003486 {
3487 cell = mb_ptr2cells(p1);
3488 if (precision_specified && i + cell > precision)
3489 break;
3490 i += cell;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003491 }
presuku1f2453f2021-11-24 15:32:57 +00003492
3493 str_arg_l = p1 - (char_u *)str_arg;
presukud85fccd2021-11-20 19:38:31 +00003494 if (min_field_width != 0)
presuku1f2453f2021-11-24 15:32:57 +00003495 min_field_width += str_arg_l - i;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003496 }
3497 break;
3498
3499 default:
3500 break;
3501 }
3502 break;
3503
3504 case 'd': case 'u':
3505 case 'b': case 'B':
3506 case 'o':
3507 case 'x': case 'X':
3508 case 'p':
3509 {
3510 // NOTE: the u, b, o, x, X and p conversion specifiers
3511 // imply the value is unsigned; d implies a signed
3512 // value
3513
3514 // 0 if numeric argument is zero (or if pointer is
3515 // NULL for 'p'), +1 if greater than zero (or nonzero
3516 // for unsigned arguments), -1 if negative (unsigned
3517 // argument is never negative)
3518 int arg_sign = 0;
3519
3520 // only set for length modifier h, or for no length
3521 // modifiers
3522 int int_arg = 0;
3523 unsigned int uint_arg = 0;
3524
3525 // only set for length modifier l
3526 long int long_arg = 0;
3527 unsigned long int ulong_arg = 0;
3528
3529 // only set for length modifier ll
3530 varnumber_T llong_arg = 0;
3531 uvarnumber_T ullong_arg = 0;
3532
3533 // only set for b conversion
3534 uvarnumber_T bin_arg = 0;
3535
3536 // pointer argument value -only defined for p
3537 // conversion
3538 void *ptr_arg = NULL;
3539
3540 if (fmt_spec == 'p')
3541 {
3542 length_modifier = '\0';
3543 ptr_arg =
3544# if defined(FEAT_EVAL)
3545 tvs != NULL ? (void *)tv_str(tvs, &arg_idx,
3546 NULL) :
3547# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003548 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3549 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003550 va_arg(ap, void *));
3551
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003552 if (ptr_arg != NULL)
3553 arg_sign = 1;
3554 }
3555 else if (fmt_spec == 'b' || fmt_spec == 'B')
3556 {
3557 bin_arg =
3558# if defined(FEAT_EVAL)
3559 tvs != NULL ?
3560 (uvarnumber_T)tv_nr(tvs, &arg_idx) :
3561# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003562 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3563 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003564 va_arg(ap, uvarnumber_T));
3565
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003566 if (bin_arg != 0)
3567 arg_sign = 1;
3568 }
3569 else if (fmt_spec == 'd')
3570 {
3571 // signed
3572 switch (length_modifier)
3573 {
3574 case '\0':
3575 case 'h':
3576 // char and short arguments are passed as int.
3577 int_arg =
3578# if defined(FEAT_EVAL)
3579 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3580# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003581 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3582 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003583 va_arg(ap, int));
3584
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003585 if (int_arg > 0)
3586 arg_sign = 1;
3587 else if (int_arg < 0)
3588 arg_sign = -1;
3589 break;
3590 case 'l':
3591 long_arg =
3592# if defined(FEAT_EVAL)
3593 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3594# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003595 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3596 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003597 va_arg(ap, long int));
3598
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003599 if (long_arg > 0)
3600 arg_sign = 1;
3601 else if (long_arg < 0)
3602 arg_sign = -1;
3603 break;
3604 case 'L':
3605 llong_arg =
3606# if defined(FEAT_EVAL)
3607 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3608# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003609 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3610 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003611 va_arg(ap, varnumber_T));
3612
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003613 if (llong_arg > 0)
3614 arg_sign = 1;
3615 else if (llong_arg < 0)
3616 arg_sign = -1;
3617 break;
3618 }
3619 }
3620 else
3621 {
3622 // unsigned
3623 switch (length_modifier)
3624 {
3625 case '\0':
3626 case 'h':
3627 uint_arg =
3628# if defined(FEAT_EVAL)
3629 tvs != NULL ? (unsigned)
3630 tv_nr(tvs, &arg_idx) :
3631# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003632 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3633 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003634 va_arg(ap, unsigned int));
3635
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003636 if (uint_arg != 0)
3637 arg_sign = 1;
3638 break;
3639 case 'l':
3640 ulong_arg =
3641# if defined(FEAT_EVAL)
3642 tvs != NULL ? (unsigned long)
3643 tv_nr(tvs, &arg_idx) :
3644# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003645 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3646 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003647 va_arg(ap, unsigned long int));
3648
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003649 if (ulong_arg != 0)
3650 arg_sign = 1;
3651 break;
3652 case 'L':
3653 ullong_arg =
3654# if defined(FEAT_EVAL)
3655 tvs != NULL ? (uvarnumber_T)
3656 tv_nr(tvs, &arg_idx) :
3657# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003658 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3659 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003660 va_arg(ap, uvarnumber_T));
3661
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003662 if (ullong_arg != 0)
3663 arg_sign = 1;
3664 break;
3665 }
3666 }
3667
3668 str_arg = tmp;
3669 str_arg_l = 0;
3670
3671 // NOTE:
3672 // For d, i, u, o, x, and X conversions, if precision is
3673 // specified, the '0' flag should be ignored. This is so
3674 // with Solaris 2.6, Digital UNIX 4.0, HPUX 10, Linux,
3675 // FreeBSD, NetBSD; but not with Perl.
3676 if (precision_specified)
3677 zero_padding = 0;
3678 if (fmt_spec == 'd')
3679 {
3680 if (force_sign && arg_sign >= 0)
3681 tmp[str_arg_l++] = space_for_positive ? ' ' : '+';
3682 // leave negative numbers for sprintf to handle, to
3683 // avoid handling tricky cases like (short int)-32768
3684 }
3685 else if (alternate_form)
3686 {
3687 if (arg_sign != 0
3688 && (fmt_spec == 'b' || fmt_spec == 'B'
3689 || fmt_spec == 'x' || fmt_spec == 'X') )
3690 {
3691 tmp[str_arg_l++] = '0';
3692 tmp[str_arg_l++] = fmt_spec;
3693 }
3694 // alternate form should have no effect for p
3695 // conversion, but ...
3696 }
3697
3698 zero_padding_insertion_ind = str_arg_l;
3699 if (!precision_specified)
3700 precision = 1; // default precision is 1
3701 if (precision == 0 && arg_sign == 0)
3702 {
3703 // When zero value is formatted with an explicit
3704 // precision 0, the resulting formatted string is
3705 // empty (d, i, u, b, B, o, x, X, p).
3706 }
3707 else
3708 {
3709 char f[6];
3710 int f_l = 0;
3711
3712 // construct a simple format string for sprintf
3713 f[f_l++] = '%';
3714 if (!length_modifier)
3715 ;
3716 else if (length_modifier == 'L')
3717 {
3718# ifdef MSWIN
3719 f[f_l++] = 'I';
3720 f[f_l++] = '6';
3721 f[f_l++] = '4';
3722# else
3723 f[f_l++] = 'l';
3724 f[f_l++] = 'l';
3725# endif
3726 }
3727 else
3728 f[f_l++] = length_modifier;
3729 f[f_l++] = fmt_spec;
3730 f[f_l++] = '\0';
3731
3732 if (fmt_spec == 'p')
3733 str_arg_l += sprintf(tmp + str_arg_l, f, ptr_arg);
3734 else if (fmt_spec == 'b' || fmt_spec == 'B')
3735 {
3736 char b[8 * sizeof(uvarnumber_T)];
3737 size_t b_l = 0;
3738 uvarnumber_T bn = bin_arg;
3739
3740 do
3741 {
3742 b[sizeof(b) - ++b_l] = '0' + (bn & 0x1);
3743 bn >>= 1;
3744 }
3745 while (bn != 0);
3746
3747 memcpy(tmp + str_arg_l, b + sizeof(b) - b_l, b_l);
3748 str_arg_l += b_l;
3749 }
3750 else if (fmt_spec == 'd')
3751 {
3752 // signed
3753 switch (length_modifier)
3754 {
3755 case '\0': str_arg_l += sprintf(
3756 tmp + str_arg_l, f,
3757 int_arg);
3758 break;
3759 case 'h': str_arg_l += sprintf(
3760 tmp + str_arg_l, f,
3761 (short)int_arg);
3762 break;
3763 case 'l': str_arg_l += sprintf(
3764 tmp + str_arg_l, f, long_arg);
3765 break;
3766 case 'L': str_arg_l += sprintf(
3767 tmp + str_arg_l, f, llong_arg);
3768 break;
3769 }
3770 }
3771 else
3772 {
3773 // unsigned
3774 switch (length_modifier)
3775 {
3776 case '\0': str_arg_l += sprintf(
3777 tmp + str_arg_l, f,
3778 uint_arg);
3779 break;
3780 case 'h': str_arg_l += sprintf(
3781 tmp + str_arg_l, f,
3782 (unsigned short)uint_arg);
3783 break;
3784 case 'l': str_arg_l += sprintf(
3785 tmp + str_arg_l, f, ulong_arg);
3786 break;
3787 case 'L': str_arg_l += sprintf(
3788 tmp + str_arg_l, f, ullong_arg);
3789 break;
3790 }
3791 }
3792
3793 // include the optional minus sign and possible
3794 // "0x" in the region before the zero padding
3795 // insertion point
3796 if (zero_padding_insertion_ind < str_arg_l
3797 && tmp[zero_padding_insertion_ind] == '-')
3798 zero_padding_insertion_ind++;
3799 if (zero_padding_insertion_ind + 1 < str_arg_l
3800 && tmp[zero_padding_insertion_ind] == '0'
3801 && (tmp[zero_padding_insertion_ind + 1] == 'x'
3802 || tmp[zero_padding_insertion_ind + 1] == 'X'))
3803 zero_padding_insertion_ind += 2;
3804 }
3805
3806 {
3807 size_t num_of_digits = str_arg_l
3808 - zero_padding_insertion_ind;
3809
3810 if (alternate_form && fmt_spec == 'o'
3811 // unless zero is already the first
3812 // character
3813 && !(zero_padding_insertion_ind < str_arg_l
3814 && tmp[zero_padding_insertion_ind] == '0'))
3815 {
3816 // assure leading zero for alternate-form
3817 // octal numbers
3818 if (!precision_specified
3819 || precision < num_of_digits + 1)
3820 {
3821 // precision is increased to force the
3822 // first character to be zero, except if a
3823 // zero value is formatted with an
3824 // explicit precision of zero
3825 precision = num_of_digits + 1;
3826 }
3827 }
3828 // zero padding to specified precision?
3829 if (num_of_digits < precision)
3830 number_of_zeros_to_pad = precision - num_of_digits;
3831 }
3832 // zero padding to specified minimal field width?
3833 if (!justify_left && zero_padding)
3834 {
3835 int n = (int)(min_field_width - (str_arg_l
3836 + number_of_zeros_to_pad));
3837 if (n > 0)
3838 number_of_zeros_to_pad += n;
3839 }
3840 break;
3841 }
3842
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003843 case 'f':
3844 case 'F':
3845 case 'e':
3846 case 'E':
3847 case 'g':
3848 case 'G':
3849 {
3850 // Floating point.
3851 double f;
3852 double abs_f;
3853 char format[40];
3854 int l;
3855 int remove_trailing_zeroes = FALSE;
3856
3857 f =
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003858# if defined(FEAT_EVAL)
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003859 tvs != NULL ? tv_float(tvs, &arg_idx) :
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003860# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003861 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3862 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003863 va_arg(ap, double));
3864
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003865 abs_f = f < 0 ? -f : f;
3866
3867 if (fmt_spec == 'g' || fmt_spec == 'G')
3868 {
3869 // Would be nice to use %g directly, but it prints
3870 // "1.0" as "1", we don't want that.
3871 if ((abs_f >= 0.001 && abs_f < 10000000.0)
3872 || abs_f == 0.0)
3873 fmt_spec = ASCII_ISUPPER(fmt_spec) ? 'F' : 'f';
3874 else
3875 fmt_spec = fmt_spec == 'g' ? 'e' : 'E';
3876 remove_trailing_zeroes = TRUE;
3877 }
3878
3879 if ((fmt_spec == 'f' || fmt_spec == 'F') &&
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003880# ifdef VAX
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003881 abs_f > 1.0e38
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003882# else
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003883 abs_f > 1.0e307
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003884# endif
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003885 )
3886 {
3887 // Avoid a buffer overflow
3888 STRCPY(tmp, infinity_str(f > 0.0, fmt_spec,
3889 force_sign, space_for_positive));
3890 str_arg_l = STRLEN(tmp);
3891 zero_padding = 0;
3892 }
3893 else
3894 {
3895 if (isnan(f))
3896 {
3897 // Not a number: nan or NAN
3898 STRCPY(tmp, ASCII_ISUPPER(fmt_spec) ? "NAN"
3899 : "nan");
3900 str_arg_l = 3;
3901 zero_padding = 0;
3902 }
3903 else if (isinf(f))
3904 {
3905 STRCPY(tmp, infinity_str(f > 0.0, fmt_spec,
3906 force_sign, space_for_positive));
3907 str_arg_l = STRLEN(tmp);
3908 zero_padding = 0;
3909 }
3910 else
3911 {
3912 // Regular float number
3913 format[0] = '%';
3914 l = 1;
3915 if (force_sign)
3916 format[l++] = space_for_positive ? ' ' : '+';
3917 if (precision_specified)
3918 {
3919 size_t max_prec = TMP_LEN - 10;
3920
3921 // Make sure we don't get more digits than we
3922 // have room for.
3923 if ((fmt_spec == 'f' || fmt_spec == 'F')
3924 && abs_f > 1.0)
3925 max_prec -= (size_t)log10(abs_f);
3926 if (precision > max_prec)
3927 precision = max_prec;
3928 l += sprintf(format + l, ".%d", (int)precision);
3929 }
3930 format[l] = fmt_spec == 'F' ? 'f' : fmt_spec;
3931 format[l + 1] = NUL;
3932
3933 str_arg_l = sprintf(tmp, format, f);
3934 }
3935
3936 if (remove_trailing_zeroes)
3937 {
3938 int i;
3939 char *tp;
3940
3941 // Using %g or %G: remove superfluous zeroes.
3942 if (fmt_spec == 'f' || fmt_spec == 'F')
3943 tp = tmp + str_arg_l - 1;
3944 else
3945 {
3946 tp = (char *)vim_strchr((char_u *)tmp,
3947 fmt_spec == 'e' ? 'e' : 'E');
3948 if (tp != NULL)
3949 {
3950 // Remove superfluous '+' and leading
3951 // zeroes from the exponent.
3952 if (tp[1] == '+')
3953 {
3954 // Change "1.0e+07" to "1.0e07"
3955 STRMOVE(tp + 1, tp + 2);
3956 --str_arg_l;
3957 }
3958 i = (tp[1] == '-') ? 2 : 1;
3959 while (tp[i] == '0')
3960 {
3961 // Change "1.0e07" to "1.0e7"
3962 STRMOVE(tp + i, tp + i + 1);
3963 --str_arg_l;
3964 }
3965 --tp;
3966 }
3967 }
3968
3969 if (tp != NULL && !precision_specified)
3970 // Remove trailing zeroes, but keep the one
3971 // just after a dot.
3972 while (tp > tmp + 2 && *tp == '0'
3973 && tp[-1] != '.')
3974 {
3975 STRMOVE(tp, tp + 1);
3976 --tp;
3977 --str_arg_l;
3978 }
3979 }
3980 else
3981 {
3982 char *tp;
3983
3984 // Be consistent: some printf("%e") use 1.0e+12
3985 // and some 1.0e+012. Remove one zero in the last
3986 // case.
3987 tp = (char *)vim_strchr((char_u *)tmp,
3988 fmt_spec == 'e' ? 'e' : 'E');
3989 if (tp != NULL && (tp[1] == '+' || tp[1] == '-')
3990 && tp[2] == '0'
3991 && vim_isdigit(tp[3])
3992 && vim_isdigit(tp[4]))
3993 {
3994 STRMOVE(tp + 2, tp + 3);
3995 --str_arg_l;
3996 }
3997 }
3998 }
3999 if (zero_padding && min_field_width > str_arg_l
4000 && (tmp[0] == '-' || force_sign))
4001 {
4002 // padding 0's should be inserted after the sign
4003 number_of_zeros_to_pad = min_field_width - str_arg_l;
4004 zero_padding_insertion_ind = 1;
4005 }
4006 str_arg = tmp;
4007 break;
4008 }
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02004009
4010 default:
4011 // unrecognized conversion specifier, keep format string
4012 // as-is
4013 zero_padding = 0; // turn zero padding off for non-numeric
4014 // conversion
4015 justify_left = 1;
4016 min_field_width = 0; // reset flags
4017
4018 // discard the unrecognized conversion, just keep *
4019 // the unrecognized conversion character
4020 str_arg = p;
4021 str_arg_l = 0;
4022 if (*p != NUL)
4023 str_arg_l++; // include invalid conversion specifier
4024 // unchanged if not at end-of-string
4025 break;
4026 }
4027
4028 if (*p != NUL)
4029 p++; // step over the just processed conversion specifier
4030
4031 // insert padding to the left as requested by min_field_width;
4032 // this does not include the zero padding in case of numerical
4033 // conversions
4034 if (!justify_left)
4035 {
4036 // left padding with blank or zero
4037 int pn = (int)(min_field_width - (str_arg_l + number_of_zeros_to_pad));
4038
4039 if (pn > 0)
4040 {
4041 if (str_l < str_m)
4042 {
4043 size_t avail = str_m - str_l;
4044
4045 vim_memset(str + str_l, zero_padding ? '0' : ' ',
4046 (size_t)pn > avail ? avail
4047 : (size_t)pn);
4048 }
4049 str_l += pn;
4050 }
4051 }
4052
4053 // zero padding as requested by the precision or by the minimal
4054 // field width for numeric conversions required?
4055 if (number_of_zeros_to_pad == 0)
4056 {
4057 // will not copy first part of numeric right now, *
4058 // force it to be copied later in its entirety
4059 zero_padding_insertion_ind = 0;
4060 }
4061 else
4062 {
4063 // insert first part of numerics (sign or '0x') before zero
4064 // padding
4065 int zn = (int)zero_padding_insertion_ind;
4066
4067 if (zn > 0)
4068 {
4069 if (str_l < str_m)
4070 {
4071 size_t avail = str_m - str_l;
4072
4073 mch_memmove(str + str_l, str_arg,
4074 (size_t)zn > avail ? avail
4075 : (size_t)zn);
4076 }
4077 str_l += zn;
4078 }
4079
4080 // insert zero padding as requested by the precision or min
4081 // field width
4082 zn = (int)number_of_zeros_to_pad;
4083 if (zn > 0)
4084 {
4085 if (str_l < str_m)
4086 {
4087 size_t avail = str_m - str_l;
4088
4089 vim_memset(str + str_l, '0',
4090 (size_t)zn > avail ? avail
4091 : (size_t)zn);
4092 }
4093 str_l += zn;
4094 }
4095 }
4096
4097 // insert formatted string
4098 // (or as-is conversion specifier for unknown conversions)
4099 {
4100 int sn = (int)(str_arg_l - zero_padding_insertion_ind);
4101
4102 if (sn > 0)
4103 {
4104 if (str_l < str_m)
4105 {
4106 size_t avail = str_m - str_l;
4107
4108 mch_memmove(str + str_l,
4109 str_arg + zero_padding_insertion_ind,
4110 (size_t)sn > avail ? avail : (size_t)sn);
4111 }
4112 str_l += sn;
4113 }
4114 }
4115
4116 // insert right padding
4117 if (justify_left)
4118 {
4119 // right blank padding to the field width
4120 int pn = (int)(min_field_width
4121 - (str_arg_l + number_of_zeros_to_pad));
4122
4123 if (pn > 0)
4124 {
4125 if (str_l < str_m)
4126 {
4127 size_t avail = str_m - str_l;
4128
4129 vim_memset(str + str_l, ' ',
4130 (size_t)pn > avail ? avail
4131 : (size_t)pn);
4132 }
4133 str_l += pn;
4134 }
4135 }
4136 vim_free(tofree);
4137 }
4138 }
4139
4140 if (str_m > 0)
4141 {
4142 // make sure the string is nul-terminated even at the expense of
4143 // overwriting the last character (shouldn't happen, but just in case)
4144 //
4145 str[str_l <= str_m - 1 ? str_l : str_m - 1] = '\0';
4146 }
4147
Christ van Willegen0c6181f2023-08-13 18:03:14 +02004148 if (tvs != NULL && tvs[num_posarg != 0 ? num_posarg : arg_idx - 1].v_type != VAR_UNKNOWN)
Bram Moolenaar677658a2022-01-05 16:09:06 +00004149 emsg(_(e_too_many_arguments_to_printf));
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02004150
Christ van Willegenc35fc032024-03-14 18:30:41 +01004151error:
K.Takata4c215ec2023-08-26 18:05:08 +02004152 vim_free((char*)ap_types);
Christ van Willegen0c6181f2023-08-13 18:03:14 +02004153 va_end(ap);
4154
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02004155 // Return the number of characters formatted (excluding trailing nul
4156 // character), that is, the number of characters that would have been
4157 // written to the buffer if it were large enough.
4158 return (int)str_l;
4159}
4160
4161#endif // PROTO