blob: 53e0171aacce1a361edcea87ff98eef02ab959ac [file] [log] [blame]
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001/* vi:set ts=8 sts=4 sw=4 noet:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * strings.c: string manipulation functions
12 */
13
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +020014#define USING_FLOAT_STUFF
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020015#include "vim.h"
16
17/*
18 * Copy "string" into newly allocated memory.
19 */
20 char_u *
21vim_strsave(char_u *string)
22{
23 char_u *p;
24 size_t len;
25
26 len = STRLEN(string) + 1;
27 p = alloc(len);
28 if (p != NULL)
29 mch_memmove(p, string, len);
30 return p;
31}
32
33/*
34 * Copy up to "len" bytes of "string" into newly allocated memory and
35 * terminate with a NUL.
36 * The allocated memory always has size "len + 1", also when "string" is
37 * shorter.
38 */
39 char_u *
40vim_strnsave(char_u *string, size_t len)
41{
42 char_u *p;
43
44 p = alloc(len + 1);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +000045 if (p == NULL)
46 return NULL;
47
48 STRNCPY(p, string, len);
49 p[len] = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020050 return p;
51}
52
53/*
54 * Same as vim_strsave(), but any characters found in esc_chars are preceded
55 * by a backslash.
56 */
57 char_u *
58vim_strsave_escaped(char_u *string, char_u *esc_chars)
59{
60 return vim_strsave_escaped_ext(string, esc_chars, '\\', FALSE);
61}
62
63/*
64 * Same as vim_strsave_escaped(), but when "bsl" is TRUE also escape
65 * characters where rem_backslash() would remove the backslash.
66 * Escape the characters with "cc".
67 */
68 char_u *
69vim_strsave_escaped_ext(
70 char_u *string,
71 char_u *esc_chars,
72 int cc,
73 int bsl)
74{
75 char_u *p;
76 char_u *p2;
77 char_u *escaped_string;
78 unsigned length;
79 int l;
80
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +020081 // First count the number of backslashes required.
82 // Then allocate the memory and insert them.
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020083 length = 1; // count the trailing NUL
84 for (p = string; *p; p++)
85 {
86 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
87 {
88 length += l; // count a multibyte char
89 p += l - 1;
90 continue;
91 }
92 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
93 ++length; // count a backslash
94 ++length; // count an ordinary char
95 }
96 escaped_string = alloc(length);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +000097 if (escaped_string == NULL)
98 return NULL;
99 p2 = escaped_string;
100 for (p = string; *p; p++)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200101 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000102 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200103 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000104 mch_memmove(p2, p, (size_t)l);
105 p2 += l;
106 p += l - 1; // skip multibyte char
107 continue;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200108 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000109 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
110 *p2++ = cc;
111 *p2++ = *p;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200112 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000113 *p2 = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200114 return escaped_string;
115}
116
117/*
118 * Return TRUE when 'shell' has "csh" in the tail.
119 */
120 int
121csh_like_shell(void)
122{
123 return (strstr((char *)gettail(p_sh), "csh") != NULL);
124}
125
126/*
Jason Cox6e823512021-08-29 12:36:49 +0200127 * Return TRUE when 'shell' has "fish" in the tail.
128 */
Dominique Pellede05ae72021-08-30 19:57:34 +0200129 static int
Jason Cox6e823512021-08-29 12:36:49 +0200130fish_like_shell(void)
131{
132 return (strstr((char *)gettail(p_sh), "fish") != NULL);
133}
134
135/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200136 * Escape "string" for use as a shell argument with system().
137 * This uses single quotes, except when we know we need to use double quotes
138 * (MS-DOS and MS-Windows not using PowerShell and without 'shellslash' set).
139 * PowerShell also uses a novel escaping for enclosed single quotes - double
140 * them up.
141 * Escape a newline, depending on the 'shell' option.
142 * When "do_special" is TRUE also replace "!", "%", "#" and things starting
143 * with "<" like "<cfile>".
144 * When "do_newline" is FALSE do not escape newline unless it is csh shell.
145 * Returns the result in allocated memory, NULL if we have run out.
146 */
147 char_u *
148vim_strsave_shellescape(char_u *string, int do_special, int do_newline)
149{
150 unsigned length;
151 char_u *p;
152 char_u *d;
153 char_u *escaped_string;
Mike Williams51024bb2024-05-30 07:46:30 +0200154 size_t l;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200155 int csh_like;
Jason Cox6e823512021-08-29 12:36:49 +0200156 int fish_like;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200157 char_u *shname;
158 int powershell;
159# ifdef MSWIN
160 int double_quotes;
161# endif
162
163 // Only csh and similar shells expand '!' within single quotes. For sh and
164 // the like we must not put a backslash before it, it will be taken
165 // literally. If do_special is set the '!' will be escaped twice.
166 // Csh also needs to have "\n" escaped twice when do_special is set.
167 csh_like = csh_like_shell();
168
Jason Cox6e823512021-08-29 12:36:49 +0200169 // Fish shell uses '\' as an escape character within single quotes, so '\'
170 // itself must be escaped to get a literal '\'.
171 fish_like = fish_like_shell();
172
Dominique Pelleaf4a61a2021-12-27 17:21:41 +0000173 // PowerShell uses its own version for quoting single quotes
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200174 shname = gettail(p_sh);
175 powershell = strstr((char *)shname, "pwsh") != NULL;
176# ifdef MSWIN
177 powershell = powershell || strstr((char *)shname, "powershell") != NULL;
178 // PowerShell only accepts single quotes so override shellslash.
179 double_quotes = !powershell && !p_ssl;
180# endif
181
182 // First count the number of extra bytes required.
183 length = (unsigned)STRLEN(string) + 3; // two quotes and a trailing NUL
184 for (p = string; *p != NUL; MB_PTR_ADV(p))
185 {
186# ifdef MSWIN
187 if (double_quotes)
188 {
189 if (*p == '"')
190 ++length; // " -> ""
191 }
192 else
193# endif
194 if (*p == '\'')
195 {
196 if (powershell)
197 length +=2; // ' => ''
198 else
199 length += 3; // ' => '\''
200 }
201 if ((*p == '\n' && (csh_like || do_newline))
202 || (*p == '!' && (csh_like || do_special)))
203 {
204 ++length; // insert backslash
205 if (csh_like && do_special)
206 ++length; // insert backslash
207 }
208 if (do_special && find_cmdline_var(p, &l) >= 0)
209 {
210 ++length; // insert backslash
211 p += l - 1;
212 }
Jason Cox6e823512021-08-29 12:36:49 +0200213 if (*p == '\\' && fish_like)
214 ++length; // insert backslash
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200215 }
216
217 // Allocate memory for the result and fill it.
218 escaped_string = alloc(length);
219 if (escaped_string != NULL)
220 {
221 d = escaped_string;
222
223 // add opening quote
224# ifdef MSWIN
225 if (double_quotes)
226 *d++ = '"';
227 else
228# endif
229 *d++ = '\'';
230
231 for (p = string; *p != NUL; )
232 {
233# ifdef MSWIN
234 if (double_quotes)
235 {
236 if (*p == '"')
237 {
238 *d++ = '"';
239 *d++ = '"';
240 ++p;
241 continue;
242 }
243 }
244 else
245# endif
246 if (*p == '\'')
247 {
248 if (powershell)
249 {
250 *d++ = '\'';
251 *d++ = '\'';
252 }
253 else
254 {
255 *d++ = '\'';
256 *d++ = '\\';
257 *d++ = '\'';
258 *d++ = '\'';
259 }
260 ++p;
261 continue;
262 }
263 if ((*p == '\n' && (csh_like || do_newline))
264 || (*p == '!' && (csh_like || do_special)))
265 {
266 *d++ = '\\';
267 if (csh_like && do_special)
268 *d++ = '\\';
269 *d++ = *p++;
270 continue;
271 }
zeertzjq88c8c542024-05-30 19:27:25 +0200272 if (do_special && find_cmdline_var(p, &l) >= 0)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200273 {
274 *d++ = '\\'; // insert backslash
zeertzjq88c8c542024-05-30 19:27:25 +0200275 memcpy(d, p, l); // copy the var
276 d += l;
277 p += l;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200278 continue;
279 }
Jason Cox6e823512021-08-29 12:36:49 +0200280 if (*p == '\\' && fish_like)
281 {
282 *d++ = '\\';
283 *d++ = *p++;
Bram Moolenaar66315972021-09-01 14:31:51 +0200284 continue;
Jason Cox6e823512021-08-29 12:36:49 +0200285 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200286
287 MB_COPY_CHAR(p, d);
288 }
289
290 // add terminating quote and finish with a NUL
291# ifdef MSWIN
292 if (double_quotes)
293 *d++ = '"';
294 else
295# endif
296 *d++ = '\'';
297 *d = NUL;
298 }
299
300 return escaped_string;
301}
302
303/*
304 * Like vim_strsave(), but make all characters uppercase.
305 * This uses ASCII lower-to-upper case translation, language independent.
306 */
307 char_u *
308vim_strsave_up(char_u *string)
309{
310 char_u *p1;
311
312 p1 = vim_strsave(string);
313 vim_strup(p1);
314 return p1;
315}
316
317/*
318 * Like vim_strnsave(), but make all characters uppercase.
319 * This uses ASCII lower-to-upper case translation, language independent.
320 */
321 char_u *
322vim_strnsave_up(char_u *string, size_t len)
323{
324 char_u *p1;
325
326 p1 = vim_strnsave(string, len);
327 vim_strup(p1);
328 return p1;
329}
330
331/*
332 * ASCII lower-to-upper case translation, language independent.
333 */
334 void
335vim_strup(
336 char_u *p)
337{
338 char_u *p2;
339 int c;
340
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000341 if (p == NULL)
342 return;
343
344 p2 = p;
345 while ((c = *p2) != NUL)
346 *p2++ = (c < 'a' || c > 'z') ? c : (c - 0x20);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200347}
348
349#if defined(FEAT_EVAL) || defined(FEAT_SPELL) || defined(PROTO)
350/*
351 * Make string "s" all upper-case and return it in allocated memory.
352 * Handles multi-byte characters as well as possible.
353 * Returns NULL when out of memory.
354 */
355 static char_u *
356strup_save(char_u *orig)
357{
358 char_u *p;
359 char_u *res;
360
361 res = p = vim_strsave(orig);
362
363 if (res != NULL)
364 while (*p != NUL)
365 {
366 int l;
367
368 if (enc_utf8)
369 {
370 int c, uc;
371 int newl;
372 char_u *s;
373
374 c = utf_ptr2char(p);
375 l = utf_ptr2len(p);
376 if (c == 0)
377 {
378 // overlong sequence, use only the first byte
379 c = *p;
380 l = 1;
381 }
382 uc = utf_toupper(c);
383
384 // Reallocate string when byte count changes. This is rare,
385 // thus it's OK to do another malloc()/free().
386 newl = utf_char2len(uc);
387 if (newl != l)
388 {
389 s = alloc(STRLEN(res) + 1 + newl - l);
390 if (s == NULL)
391 {
392 vim_free(res);
393 return NULL;
394 }
395 mch_memmove(s, res, p - res);
396 STRCPY(s + (p - res) + newl, p + l);
397 p = s + (p - res);
398 vim_free(res);
399 res = s;
400 }
401
402 utf_char2bytes(uc, p);
403 p += newl;
404 }
405 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
406 p += l; // skip multi-byte character
407 else
408 {
409 *p = TOUPPER_LOC(*p); // note that toupper() can be a macro
410 p++;
411 }
412 }
413
414 return res;
415}
416
417/*
418 * Make string "s" all lower-case and return it in allocated memory.
419 * Handles multi-byte characters as well as possible.
420 * Returns NULL when out of memory.
421 */
422 char_u *
423strlow_save(char_u *orig)
424{
425 char_u *p;
426 char_u *res;
427
428 res = p = vim_strsave(orig);
429
430 if (res != NULL)
431 while (*p != NUL)
432 {
433 int l;
434
435 if (enc_utf8)
436 {
437 int c, lc;
438 int newl;
439 char_u *s;
440
441 c = utf_ptr2char(p);
442 l = utf_ptr2len(p);
443 if (c == 0)
444 {
445 // overlong sequence, use only the first byte
446 c = *p;
447 l = 1;
448 }
449 lc = utf_tolower(c);
450
451 // Reallocate string when byte count changes. This is rare,
452 // thus it's OK to do another malloc()/free().
453 newl = utf_char2len(lc);
454 if (newl != l)
455 {
456 s = alloc(STRLEN(res) + 1 + newl - l);
457 if (s == NULL)
458 {
459 vim_free(res);
460 return NULL;
461 }
462 mch_memmove(s, res, p - res);
463 STRCPY(s + (p - res) + newl, p + l);
464 p = s + (p - res);
465 vim_free(res);
466 res = s;
467 }
468
469 utf_char2bytes(lc, p);
470 p += newl;
471 }
472 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
473 p += l; // skip multi-byte character
474 else
475 {
476 *p = TOLOWER_LOC(*p); // note that tolower() can be a macro
477 p++;
478 }
479 }
480
481 return res;
482}
483#endif
484
485/*
486 * delete spaces at the end of a string
487 */
488 void
489del_trailing_spaces(char_u *ptr)
490{
491 char_u *q;
492
493 q = ptr + STRLEN(ptr);
494 while (--q > ptr && VIM_ISWHITE(q[0]) && q[-1] != '\\' && q[-1] != Ctrl_V)
495 *q = NUL;
496}
497
498/*
499 * Like strncpy(), but always terminate the result with one NUL.
500 * "to" must be "len + 1" long!
501 */
502 void
503vim_strncpy(char_u *to, char_u *from, size_t len)
504{
505 STRNCPY(to, from, len);
506 to[len] = NUL;
507}
508
509/*
510 * Like strcat(), but make sure the result fits in "tosize" bytes and is
511 * always NUL terminated. "from" and "to" may overlap.
512 */
513 void
514vim_strcat(char_u *to, char_u *from, size_t tosize)
515{
516 size_t tolen = STRLEN(to);
517 size_t fromlen = STRLEN(from);
518
519 if (tolen + fromlen + 1 > tosize)
520 {
521 mch_memmove(to + tolen, from, tosize - tolen - 1);
522 to[tosize - 1] = NUL;
523 }
524 else
525 mch_memmove(to + tolen, from, fromlen + 1);
526}
527
Bram Moolenaarc32949b2023-01-04 15:56:51 +0000528/*
529 * A version of strlen() that has a maximum length.
530 */
531 size_t
532vim_strlen_maxlen(char *s, size_t maxlen)
533{
534 size_t i;
535 for (i = 0; i < maxlen; ++i)
536 if (s[i] == NUL)
537 break;
538 return i;
539}
540
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200541#if (!defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)) || defined(PROTO)
542/*
543 * Compare two strings, ignoring case, using current locale.
544 * Doesn't work for multi-byte characters.
545 * return 0 for match, < 0 for smaller, > 0 for bigger
546 */
547 int
548vim_stricmp(char *s1, char *s2)
549{
550 int i;
551
552 for (;;)
553 {
554 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
555 if (i != 0)
556 return i; // this character different
557 if (*s1 == NUL)
558 break; // strings match until NUL
559 ++s1;
560 ++s2;
561 }
562 return 0; // strings match
563}
564#endif
565
566#if (!defined(HAVE_STRNCASECMP) && !defined(HAVE_STRNICMP)) || defined(PROTO)
567/*
568 * Compare two strings, for length "len", ignoring case, using current locale.
569 * Doesn't work for multi-byte characters.
570 * return 0 for match, < 0 for smaller, > 0 for bigger
571 */
572 int
573vim_strnicmp(char *s1, char *s2, size_t len)
574{
575 int i;
576
577 while (len > 0)
578 {
579 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
580 if (i != 0)
581 return i; // this character different
582 if (*s1 == NUL)
583 break; // strings match until NUL
584 ++s1;
585 ++s2;
586 --len;
587 }
588 return 0; // strings match
589}
590#endif
591
592/*
Christian Brabandt84e31752024-09-02 09:59:18 +0200593 * Compare two ASCII strings, for length "len", ignoring case, ignoring locale
594 * (mostly matters for turkish locale where i I might be different).
595 * return 0 for match, < 0 for smaller, > 0 for bigger
596 */
597 int
598vim_strnicmp_asc(char *s1, char *s2, size_t len)
599{
John Marriottc847c122024-11-24 14:09:40 +0100600 int i = 0;
Christian Brabandt84e31752024-09-02 09:59:18 +0200601 int save_cmp_flags = cmp_flags;
602
603 cmp_flags |= CMP_KEEPASCII; // compare by ASCII value, ignoring locale
604 while (len > 0)
605 {
606 i = vim_tolower(*s1) - vim_tolower(*s2);
607 if (i != 0)
608 break; // this character is different
609 if (*s1 == NUL)
610 break; // strings match until NUL
611 ++s1;
612 ++s2;
613 --len;
614 }
615 cmp_flags = save_cmp_flags;
616 return i;
617}
618
619/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200620 * Search for first occurrence of "c" in "string".
621 * Version of strchr() that handles unsigned char strings with characters from
622 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
623 * end of the string.
624 */
Bram Moolenaarc32949b2023-01-04 15:56:51 +0000625 char_u *
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200626vim_strchr(char_u *string, int c)
627{
628 char_u *p;
629 int b;
630
631 p = string;
632 if (enc_utf8 && c >= 0x80)
633 {
634 while (*p != NUL)
635 {
636 int l = utfc_ptr2len(p);
637
638 // Avoid matching an illegal byte here.
639 if (utf_ptr2char(p) == c && l > 1)
640 return p;
641 p += l;
642 }
643 return NULL;
644 }
645 if (enc_dbcs != 0 && c > 255)
646 {
647 int n2 = c & 0xff;
648
649 c = ((unsigned)c >> 8) & 0xff;
650 while ((b = *p) != NUL)
651 {
652 if (b == c && p[1] == n2)
653 return p;
654 p += (*mb_ptr2len)(p);
655 }
656 return NULL;
657 }
658 if (has_mbyte)
659 {
660 while ((b = *p) != NUL)
661 {
662 if (b == c)
663 return p;
664 p += (*mb_ptr2len)(p);
665 }
666 return NULL;
667 }
668 while ((b = *p) != NUL)
669 {
670 if (b == c)
671 return p;
672 ++p;
673 }
674 return NULL;
675}
676
Jonathon7c7a4e62025-01-12 09:58:00 +0100677// Sized version of strchr that can handle embedded NULs.
678// Adjusts n to the new size.
679 char *
680vim_strnchr(const char *p, size_t *n, int c)
681{
682 while (*n > 0)
683 {
684 if (*p == c)
685 return (char *)p;
686 p++;
687 (*n)--;
688 }
689
690 return NULL;
691}
692
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200693/*
694 * Version of strchr() that only works for bytes and handles unsigned char
695 * strings with characters above 128 correctly. It also doesn't return a
696 * pointer to the NUL at the end of the string.
697 */
698 char_u *
699vim_strbyte(char_u *string, int c)
700{
701 char_u *p = string;
702
703 while (*p != NUL)
704 {
705 if (*p == c)
706 return p;
707 ++p;
708 }
709 return NULL;
710}
711
712/*
713 * Search for last occurrence of "c" in "string".
714 * Version of strrchr() that handles unsigned char strings with characters from
715 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
716 * end of the string.
717 * Return NULL if not found.
718 * Does not handle multi-byte char for "c"!
719 */
720 char_u *
721vim_strrchr(char_u *string, int c)
722{
723 char_u *retval = NULL;
724 char_u *p = string;
725
726 while (*p)
727 {
728 if (*p == c)
729 retval = p;
730 MB_PTR_ADV(p);
731 }
732 return retval;
733}
734
735/*
736 * Vim's version of strpbrk(), in case it's missing.
737 * Don't generate a prototype for this, causes problems when it's not used.
738 */
739#ifndef PROTO
740# ifndef HAVE_STRPBRK
741# ifdef vim_strpbrk
742# undef vim_strpbrk
743# endif
744 char_u *
745vim_strpbrk(char_u *s, char_u *charset)
746{
747 while (*s)
748 {
749 if (vim_strchr(charset, *s) != NULL)
750 return s;
751 MB_PTR_ADV(s);
752 }
753 return NULL;
754}
755# endif
756#endif
757
758/*
759 * Sort an array of strings.
760 */
761static int sort_compare(const void *s1, const void *s2);
762
763 static int
764sort_compare(const void *s1, const void *s2)
765{
766 return STRCMP(*(char **)s1, *(char **)s2);
767}
768
769 void
770sort_strings(
771 char_u **files,
772 int count)
773{
774 qsort((void *)files, (size_t)count, sizeof(char_u *), sort_compare);
775}
776
777#if defined(FEAT_QUICKFIX) || defined(FEAT_SPELL) || defined(PROTO)
778/*
779 * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
780 * When "s" is NULL FALSE is returned.
781 */
782 int
783has_non_ascii(char_u *s)
784{
785 char_u *p;
786
787 if (s != NULL)
788 for (p = s; *p != NUL; ++p)
789 if (*p >= 128)
790 return TRUE;
791 return FALSE;
792}
793#endif
794
795/*
796 * Concatenate two strings and return the result in allocated memory.
797 * Returns NULL when out of memory.
798 */
799 char_u *
800concat_str(char_u *str1, char_u *str2)
801{
802 char_u *dest;
803 size_t l = str1 == NULL ? 0 : STRLEN(str1);
804
805 dest = alloc(l + (str2 == NULL ? 0 : STRLEN(str2)) + 1L);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000806 if (dest == NULL)
807 return NULL;
808 if (str1 == NULL)
809 *dest = NUL;
810 else
811 STRCPY(dest, str1);
812 if (str2 != NULL)
813 STRCPY(dest + l, str2);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200814 return dest;
815}
816
zeertzjq4dd266c2023-08-19 11:35:03 +0200817#if defined(FEAT_EVAL) || defined(FEAT_RIGHTLEFT) || defined(PROTO)
818/*
819 * Reverse text into allocated memory.
820 * Returns the allocated string, NULL when out of memory.
821 */
822 char_u *
823reverse_text(char_u *s)
824{
825 size_t len = STRLEN(s);
826 char_u *rev = alloc(len + 1);
827 if (rev == NULL)
828 return NULL;
829
830 for (size_t s_i = 0, rev_i = len; s_i < len; ++s_i)
831 {
832 if (has_mbyte)
833 {
834 int mb_len = (*mb_ptr2len)(s + s_i);
835 rev_i -= mb_len;
836 mch_memmove(rev + rev_i, s + s_i, mb_len);
837 s_i += mb_len - 1;
838 }
839 else
840 rev[--rev_i] = s[s_i];
841 }
842 rev[len] = NUL;
843 return rev;
844}
845#endif
846
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200847#if defined(FEAT_EVAL) || defined(PROTO)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200848/*
849 * Return string "str" in ' quotes, doubling ' characters.
850 * If "str" is NULL an empty string is assumed.
851 * If "function" is TRUE make it function('string').
852 */
853 char_u *
854string_quote(char_u *str, int function)
855{
856 unsigned len;
857 char_u *p, *r, *s;
858
859 len = (function ? 13 : 3);
860 if (str != NULL)
861 {
862 len += (unsigned)STRLEN(str);
863 for (p = str; *p != NUL; MB_PTR_ADV(p))
864 if (*p == '\'')
865 ++len;
866 }
867 s = r = alloc(len);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000868 if (r == NULL)
869 return NULL;
870
871 if (function)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200872 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000873 STRCPY(r, "function('");
874 r += 10;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200875 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000876 else
877 *r++ = '\'';
878 if (str != NULL)
879 for (p = str; *p != NUL; )
880 {
881 if (*p == '\'')
882 *r++ = '\'';
883 MB_COPY_CHAR(p, r);
884 }
885 *r++ = '\'';
886 if (function)
887 *r++ = ')';
888 *r++ = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200889 return s;
890}
891
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000892/*
893 * Count the number of times "needle" occurs in string "haystack". Case is
894 * ignored if "ic" is TRUE.
895 */
896 long
897string_count(char_u *haystack, char_u *needle, int ic)
898{
899 long n = 0;
900 char_u *p = haystack;
901 char_u *next;
902
903 if (p == NULL || needle == NULL || *needle == NUL)
904 return 0;
905
906 if (ic)
907 {
908 size_t len = STRLEN(needle);
909
910 while (*p != NUL)
911 {
912 if (MB_STRNICMP(p, needle, len) == 0)
913 {
914 ++n;
915 p += len;
916 }
917 else
918 MB_PTR_ADV(p);
919 }
920 }
921 else
922 while ((next = (char_u *)strstr((char *)p, (char *)needle)) != NULL)
923 {
924 ++n;
925 p = next + STRLEN(needle);
926 }
927
928 return n;
929}
930
931/*
932 * Make a typval_T of the first character of "input" and store it in "output".
933 * Return OK or FAIL.
934 */
935 static int
936copy_first_char_to_tv(char_u *input, typval_T *output)
937{
938 char_u buf[MB_MAXBYTES + 1];
939 int len;
940
941 if (input == NULL || output == NULL)
942 return FAIL;
943
944 len = has_mbyte ? mb_ptr2len(input) : 1;
945 STRNCPY(buf, input, len);
946 buf[len] = NUL;
947 output->v_type = VAR_STRING;
948 output->vval.v_string = vim_strsave(buf);
949
950 return output->vval.v_string == NULL ? FAIL : OK;
951}
952
953/*
954 * Implementation of map() and filter() for a String. Apply "expr" to every
955 * character in string "str" and return the result in "rettv".
956 */
957 void
958string_filter_map(
959 char_u *str,
960 filtermap_T filtermap,
961 typval_T *expr,
962 typval_T *rettv)
963{
964 char_u *p;
965 typval_T tv;
966 garray_T ga;
967 int len = 0;
968 int idx = 0;
969 int rem;
Bram Moolenaar82418262022-09-28 16:16:15 +0100970 typval_T newtv;
971 funccall_T *fc;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000972
973 rettv->v_type = VAR_STRING;
974 rettv->vval.v_string = NULL;
975
976 // set_vim_var_nr() doesn't set the type
977 set_vim_var_type(VV_KEY, VAR_NUMBER);
978
zeertzjqe7d49462023-04-16 20:53:55 +0100979 // Create one funccall_T for all eval_expr_typval() calls.
Bram Moolenaar82418262022-09-28 16:16:15 +0100980 fc = eval_expr_get_funccal(expr, &newtv);
981
Bram Moolenaar04935fb2022-01-08 16:19:22 +0000982 ga_init2(&ga, sizeof(char), 80);
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000983 for (p = str; *p != NUL; p += len)
984 {
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000985 if (copy_first_char_to_tv(p, &tv) == FAIL)
986 break;
987 len = (int)STRLEN(tv.vval.v_string);
988
989 set_vim_var_nr(VV_KEY, idx);
Bram Moolenaar82418262022-09-28 16:16:15 +0100990 if (filter_map_one(&tv, expr, filtermap, fc, &newtv, &rem) == FAIL
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000991 || did_emsg)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000992 {
993 clear_tv(&newtv);
994 clear_tv(&tv);
995 break;
996 }
Ernie Raele79e2072024-01-13 11:47:33 +0100997 if (filtermap == FILTERMAP_MAP || filtermap == FILTERMAP_MAPNEW)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000998 {
999 if (newtv.v_type != VAR_STRING)
1000 {
1001 clear_tv(&newtv);
1002 clear_tv(&tv);
Bram Moolenaare70cec92022-01-01 14:25:55 +00001003 emsg(_(e_string_required));
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001004 break;
1005 }
1006 else
1007 ga_concat(&ga, newtv.vval.v_string);
1008 }
Ernie Raele79e2072024-01-13 11:47:33 +01001009 else if (filtermap == FILTERMAP_FOREACH || !rem)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001010 ga_concat(&ga, tv.vval.v_string);
1011
1012 clear_tv(&newtv);
1013 clear_tv(&tv);
1014
1015 ++idx;
1016 }
1017 ga_append(&ga, NUL);
1018 rettv->vval.v_string = ga.ga_data;
Bram Moolenaar82418262022-09-28 16:16:15 +01001019 if (fc != NULL)
1020 remove_funccal();
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001021}
1022
1023/*
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001024 * Implementation of reduce() for String "argvars[0]" using the function "expr"
1025 * starting with the optional initial value "argvars[2]" and return the result
1026 * in "rettv".
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001027 */
1028 void
1029string_reduce(
1030 typval_T *argvars,
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001031 typval_T *expr,
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001032 typval_T *rettv)
1033{
1034 char_u *p = tv_get_string(&argvars[0]);
1035 int len;
1036 typval_T argv[3];
1037 int r;
1038 int called_emsg_start = called_emsg;
Bram Moolenaar82418262022-09-28 16:16:15 +01001039 funccall_T *fc;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001040
1041 if (argvars[2].v_type == VAR_UNKNOWN)
1042 {
1043 if (*p == NUL)
1044 {
Bram Moolenaare70cec92022-01-01 14:25:55 +00001045 semsg(_(e_reduce_of_an_empty_str_with_no_initial_value), "String");
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001046 return;
1047 }
1048 if (copy_first_char_to_tv(p, rettv) == FAIL)
1049 return;
1050 p += STRLEN(rettv->vval.v_string);
1051 }
Yegappan Lakshmanan8deb2b32022-09-02 15:15:27 +01001052 else if (check_for_string_arg(argvars, 2) == FAIL)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001053 return;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001054 else
1055 copy_tv(&argvars[2], rettv);
1056
zeertzjqe7d49462023-04-16 20:53:55 +01001057 // Create one funccall_T for all eval_expr_typval() calls.
Bram Moolenaar82418262022-09-28 16:16:15 +01001058 fc = eval_expr_get_funccal(expr, rettv);
1059
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001060 for ( ; *p != NUL; p += len)
1061 {
1062 argv[0] = *rettv;
1063 if (copy_first_char_to_tv(p, &argv[1]) == FAIL)
1064 break;
1065 len = (int)STRLEN(argv[1].vval.v_string);
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001066
zeertzjqad0c4422023-08-17 22:15:47 +02001067 r = eval_expr_typval(expr, TRUE, argv, 2, fc, rettv);
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001068
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001069 clear_tv(&argv[0]);
1070 clear_tv(&argv[1]);
1071 if (r == FAIL || called_emsg != called_emsg_start)
1072 return;
1073 }
Bram Moolenaar82418262022-09-28 16:16:15 +01001074
1075 if (fc != NULL)
1076 remove_funccal();
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001077}
1078
Bram Moolenaare4098452023-05-07 18:53:49 +01001079/*
1080 * Implementation of "byteidx()" and "byteidxcomp()" functions
1081 */
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001082 static void
Dominique Pellé0268ff32024-07-28 21:12:20 +02001083byteidx_common(typval_T *argvars, typval_T *rettv, int comp)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001084{
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001085 rettv->vval.v_number = -1;
1086
1087 if (in_vim9script()
1088 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001089 || check_for_number_arg(argvars, 1) == FAIL
1090 || check_for_opt_bool_arg(argvars, 2) == FAIL))
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001091 return;
1092
Christian Brabandt67672ef2023-04-24 21:09:54 +01001093 char_u *str = tv_get_string_chk(&argvars[0]);
1094 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001095 if (str == NULL || idx < 0)
1096 return;
1097
Christian Brabandt67672ef2023-04-24 21:09:54 +01001098 varnumber_T utf16idx = FALSE;
1099 if (argvars[2].v_type != VAR_UNKNOWN)
1100 {
zeertzjq8cf51372023-05-08 15:31:38 +01001101 int error = FALSE;
1102 utf16idx = tv_get_bool_chk(&argvars[2], &error);
1103 if (error)
1104 return;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001105 if (utf16idx < 0 || utf16idx > 1)
1106 {
zeertzjq8cf51372023-05-08 15:31:38 +01001107 semsg(_(e_using_number_as_bool_nr), utf16idx);
Christian Brabandt67672ef2023-04-24 21:09:54 +01001108 return;
1109 }
1110 }
1111
1112 int (*ptr2len)(char_u *);
1113 if (enc_utf8 && comp)
1114 ptr2len = utf_ptr2len;
1115 else
1116 ptr2len = mb_ptr2len;
1117
1118 char_u *t = str;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001119 for ( ; idx > 0; idx--)
1120 {
1121 if (*t == NUL) // EOL reached
1122 return;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001123 if (utf16idx)
1124 {
1125 int clen = ptr2len(t);
1126 int c = (clen > 1) ? utf_ptr2char(t) : *t;
1127 if (c > 0xFFFF)
1128 idx--;
1129 }
1130 if (idx > 0)
1131 t += ptr2len(t);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001132 }
1133 rettv->vval.v_number = (varnumber_T)(t - str);
1134}
1135
1136/*
1137 * "byteidx()" function
1138 */
1139 void
1140f_byteidx(typval_T *argvars, typval_T *rettv)
1141{
Bram Moolenaare4098452023-05-07 18:53:49 +01001142 byteidx_common(argvars, rettv, FALSE);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001143}
1144
1145/*
1146 * "byteidxcomp()" function
1147 */
1148 void
1149f_byteidxcomp(typval_T *argvars, typval_T *rettv)
1150{
Bram Moolenaare4098452023-05-07 18:53:49 +01001151 byteidx_common(argvars, rettv, TRUE);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001152}
1153
1154/*
1155 * "charidx()" function
1156 */
1157 void
1158f_charidx(typval_T *argvars, typval_T *rettv)
1159{
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001160 rettv->vval.v_number = -1;
1161
Christian Brabandt67672ef2023-04-24 21:09:54 +01001162 if (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001163 || check_for_number_arg(argvars, 1) == FAIL
Christian Brabandt67672ef2023-04-24 21:09:54 +01001164 || check_for_opt_bool_arg(argvars, 2) == FAIL
1165 || (argvars[2].v_type != VAR_UNKNOWN
1166 && check_for_opt_bool_arg(argvars, 3) == FAIL))
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001167 return;
1168
Christian Brabandt67672ef2023-04-24 21:09:54 +01001169 char_u *str = tv_get_string_chk(&argvars[0]);
1170 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001171 if (str == NULL || idx < 0)
1172 return;
1173
Christian Brabandt67672ef2023-04-24 21:09:54 +01001174 varnumber_T countcc = FALSE;
1175 varnumber_T utf16idx = FALSE;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001176 if (argvars[2].v_type != VAR_UNKNOWN)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001177 {
Christian Brabandt67672ef2023-04-24 21:09:54 +01001178 countcc = tv_get_bool(&argvars[2]);
1179 if (argvars[3].v_type != VAR_UNKNOWN)
1180 utf16idx = tv_get_bool(&argvars[3]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001181 }
1182
Christian Brabandt67672ef2023-04-24 21:09:54 +01001183 int (*ptr2len)(char_u *);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001184 if (enc_utf8 && countcc)
1185 ptr2len = utf_ptr2len;
1186 else
1187 ptr2len = mb_ptr2len;
1188
Christian Brabandt67672ef2023-04-24 21:09:54 +01001189 char_u *p;
1190 int len;
1191 for (p = str, len = 0; utf16idx ? idx >= 0 : p <= str + idx; len++)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001192 {
1193 if (*p == NUL)
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001194 {
1195 // If the index is exactly the number of bytes or utf-16 code units
1196 // in the string then return the length of the string in
1197 // characters.
1198 if (utf16idx ? (idx == 0) : (p == (str + idx)))
1199 rettv->vval.v_number = len;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001200 return;
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001201 }
Christian Brabandt67672ef2023-04-24 21:09:54 +01001202 if (utf16idx)
1203 {
1204 idx--;
1205 int clen = ptr2len(p);
1206 int c = (clen > 1) ? utf_ptr2char(p) : *p;
1207 if (c > 0xFFFF)
1208 idx--;
1209 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001210 p += ptr2len(p);
1211 }
1212
1213 rettv->vval.v_number = len > 0 ? len - 1 : 0;
1214}
1215
1216/*
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001217 * Convert the string "str", from encoding "from" to encoding "to".
1218 */
1219 static char_u *
1220convert_string(char_u *str, char_u *from, char_u *to)
1221{
1222 vimconv_T vimconv;
1223
1224 vimconv.vc_type = CONV_NONE;
1225 if (convert_setup(&vimconv, from, to) == FAIL)
1226 return NULL;
1227 vimconv.vc_fail = TRUE;
1228 if (vimconv.vc_type == CONV_NONE)
1229 str = vim_strsave(str);
1230 else
1231 str = string_convert(&vimconv, str, NULL);
1232 convert_setup(&vimconv, NULL, NULL);
1233
1234 return str;
1235}
1236
1237/*
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001238 * Add the bytes from "str" to "blob".
1239 */
1240 static void
1241blob_from_string(char_u *str, blob_T *blob)
1242{
1243 size_t len = STRLEN(str);
1244
1245 for (size_t i = 0; i < len; i++)
1246 {
1247 int ch = str[i];
1248
1249 if (str[i] == NL)
1250 // Translate newlines in the string to NUL character
1251 ch = NUL;
1252
1253 ga_append(&blob->bv_ga, ch);
1254 }
1255}
1256
1257/*
1258 * Return a string created from the bytes in blob starting at "start_idx".
1259 * A NL character in the blob indicates end of string.
1260 * A NUL character in the blob is translated to a NL.
1261 * On return, "start_idx" points to next byte to process in blob.
1262 */
1263 static char_u *
1264string_from_blob(blob_T *blob, long *start_idx)
1265{
1266 garray_T str_ga;
1267 long blen;
Yegappan Lakshmanan5e9aaed2025-01-18 10:24:25 +01001268 int idx;
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001269
1270 ga_init2(&str_ga, sizeof(char), 80);
1271
1272 blen = blob_len(blob);
1273
1274 for (idx = *start_idx; idx < blen; idx++)
1275 {
1276 char_u byte = (char_u)blob_get(blob, idx);
1277 if (byte == NL)
1278 {
1279 idx++;
1280 break;
1281 }
1282
1283 if (byte == NUL)
1284 byte = NL;
1285
1286 ga_append(&str_ga, byte);
1287 }
1288
1289 ga_append(&str_ga, NUL);
1290
1291 char_u *ret_str = vim_strsave(str_ga.ga_data);
1292 *start_idx = idx;
1293
1294 ga_clear(&str_ga);
1295 return ret_str;
1296}
1297
1298/*
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001299 * "blob2str()" function
1300 * Converts a blob to a string, ensuring valid UTF-8 encoding.
1301 */
1302 void
1303f_blob2str(typval_T *argvars, typval_T *rettv)
1304{
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001305 blob_T *blob;
1306 int blen;
1307 long idx;
1308 int utf8_inuse = FALSE;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001309
1310 if (check_for_blob_arg(argvars, 0) == FAIL
1311 || check_for_opt_dict_arg(argvars, 1) == FAIL)
1312 return;
1313
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001314 if (rettv_list_alloc(rettv) == FAIL)
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001315 return;
1316
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001317 blob = argvars->vval.v_blob;
1318 if (blob == NULL)
1319 return;
1320 blen = blob_len(blob);
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001321
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001322 char_u *from_encoding = NULL;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001323 if (argvars[1].v_type != VAR_UNKNOWN)
1324 {
1325 dict_T *d = argvars[1].vval.v_dict;
1326 if (d != NULL)
1327 {
1328 char_u *enc = dict_get_string(d, "encoding", FALSE);
1329 if (enc != NULL)
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001330 from_encoding = enc_canonize(enc_skip(enc));
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001331 }
1332 }
1333
1334 if (STRCMP(p_enc, "utf-8") == 0 || STRCMP(p_enc, "utf8") == 0)
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001335 utf8_inuse = TRUE;
1336
1337 idx = 0;
1338 while (idx < blen)
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001339 {
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001340 char_u *str;
1341 char_u *converted_str;
1342
1343 str = string_from_blob(blob, &idx);
1344 if (str == NULL)
1345 break;
1346
1347 converted_str = str;
1348 if (from_encoding != NULL)
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001349 {
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001350 converted_str = convert_string(str, from_encoding, p_enc);
1351 vim_free(str);
1352 if (converted_str == NULL)
1353 {
1354 semsg(_(e_str_encoding_failed), "from", from_encoding);
1355 goto done;
1356 }
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001357 }
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001358
1359 if (utf8_inuse)
1360 {
1361 if (!utf_valid_string(converted_str, NULL))
1362 {
1363 semsg(_(e_str_encoding_failed), "from", p_enc);
1364 vim_free(converted_str);
1365 goto done;
1366 }
1367 }
1368
1369 if (list_append_string(rettv->vval.v_list, converted_str, -1) == FAIL)
1370 break;
1371 vim_free(converted_str);
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001372 }
1373
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001374done:
1375 vim_free(from_encoding);
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001376}
1377
1378/*
1379 * "str2blob()" function
1380 */
1381 void
1382f_str2blob(typval_T *argvars, typval_T *rettv)
1383{
1384 blob_T *blob;
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001385 list_T *list;
1386 listitem_T *li;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001387
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001388 if (check_for_list_arg(argvars, 0) == FAIL
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001389 || check_for_opt_dict_arg(argvars, 1) == FAIL)
1390 return;
1391
1392 if (rettv_blob_alloc(rettv) == FAIL)
1393 return;
1394
1395 blob = rettv->vval.v_blob;
1396
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001397 list = argvars[0].vval.v_list;
1398 if (list == NULL)
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001399 return;
1400
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001401 char_u *to_encoding = NULL;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001402 if (argvars[1].v_type != VAR_UNKNOWN)
1403 {
1404 dict_T *d = argvars[1].vval.v_dict;
1405 if (d != NULL)
1406 {
1407 char_u *enc = dict_get_string(d, "encoding", FALSE);
1408 if (enc != NULL)
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001409 to_encoding = enc_canonize(enc_skip(enc));
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001410 }
1411 }
1412
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001413 FOR_ALL_LIST_ITEMS(list, li)
1414 {
1415 if (li->li_tv.v_type != VAR_STRING)
1416 continue;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001417
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001418 char_u *str = li->li_tv.vval.v_string;
1419
1420 if (str == NULL)
1421 continue;
1422
1423 if (to_encoding != NULL)
1424 {
1425 str = convert_string(str, p_enc, to_encoding);
1426 if (str == NULL)
1427 {
1428 semsg(_(e_str_encoding_failed), "to", to_encoding);
1429 goto done;
1430 }
1431 }
1432
1433 if (li != list->lv_first)
1434 // Each list string item is separated by a newline in the blob
1435 ga_append(&blob->bv_ga, NL);
1436
1437 blob_from_string(str, blob);
1438
1439 if (to_encoding != NULL)
1440 vim_free(str);
1441 }
1442
1443done:
1444 if (to_encoding != NULL)
1445 vim_free(to_encoding);
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001446}
1447
1448/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001449 * "str2list()" function
1450 */
1451 void
1452f_str2list(typval_T *argvars, typval_T *rettv)
1453{
1454 char_u *p;
1455 int utf8 = FALSE;
1456
1457 if (rettv_list_alloc(rettv) == FAIL)
1458 return;
1459
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001460 if (in_vim9script()
1461 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001462 || check_for_opt_bool_arg(argvars, 1) == FAIL))
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001463 return;
1464
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001465 if (argvars[1].v_type != VAR_UNKNOWN)
1466 utf8 = (int)tv_get_bool_chk(&argvars[1], NULL);
1467
1468 p = tv_get_string(&argvars[0]);
1469
1470 if (has_mbyte || utf8)
1471 {
1472 int (*ptr2len)(char_u *);
1473 int (*ptr2char)(char_u *);
1474
1475 if (utf8 || enc_utf8)
1476 {
1477 ptr2len = utf_ptr2len;
1478 ptr2char = utf_ptr2char;
1479 }
1480 else
1481 {
1482 ptr2len = mb_ptr2len;
1483 ptr2char = mb_ptr2char;
1484 }
1485
1486 for ( ; *p != NUL; p += (*ptr2len)(p))
1487 list_append_number(rettv->vval.v_list, (*ptr2char)(p));
1488 }
1489 else
1490 for ( ; *p != NUL; ++p)
1491 list_append_number(rettv->vval.v_list, *p);
1492}
1493
1494/*
1495 * "str2nr()" function
1496 */
1497 void
1498f_str2nr(typval_T *argvars, typval_T *rettv)
1499{
1500 int base = 10;
1501 char_u *p;
1502 varnumber_T n;
1503 int what = 0;
1504 int isneg;
1505
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001506 if (in_vim9script()
1507 && (check_for_string_arg(argvars, 0) == FAIL
1508 || check_for_opt_number_arg(argvars, 1) == FAIL
1509 || (argvars[1].v_type != VAR_UNKNOWN
1510 && check_for_opt_bool_arg(argvars, 2) == FAIL)))
1511 return;
1512
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001513 if (argvars[1].v_type != VAR_UNKNOWN)
1514 {
1515 base = (int)tv_get_number(&argvars[1]);
1516 if (base != 2 && base != 8 && base != 10 && base != 16)
1517 {
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00001518 emsg(_(e_invalid_argument));
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001519 return;
1520 }
1521 if (argvars[2].v_type != VAR_UNKNOWN && tv_get_bool(&argvars[2]))
1522 what |= STR2NR_QUOTE;
1523 }
1524
1525 p = skipwhite(tv_get_string_strict(&argvars[0]));
1526 isneg = (*p == '-');
1527 if (*p == '+' || *p == '-')
1528 p = skipwhite(p + 1);
1529 switch (base)
1530 {
1531 case 2: what |= STR2NR_BIN + STR2NR_FORCE; break;
1532 case 8: what |= STR2NR_OCT + STR2NR_OOCT + STR2NR_FORCE; break;
1533 case 16: what |= STR2NR_HEX + STR2NR_FORCE; break;
1534 }
Bram Moolenaar5fb78c32023-03-04 20:47:39 +00001535 vim_str2nr(p, NULL, NULL, what, &n, NULL, 0, FALSE, NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001536 // Text after the number is silently ignored.
1537 if (isneg)
1538 rettv->vval.v_number = -n;
1539 else
1540 rettv->vval.v_number = n;
1541
1542}
1543
1544/*
1545 * "strgetchar()" function
1546 */
1547 void
1548f_strgetchar(typval_T *argvars, typval_T *rettv)
1549{
1550 char_u *str;
1551 int len;
1552 int error = FALSE;
1553 int charidx;
1554 int byteidx = 0;
1555
1556 rettv->vval.v_number = -1;
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001557
1558 if (in_vim9script()
1559 && (check_for_string_arg(argvars, 0) == FAIL
1560 || check_for_number_arg(argvars, 1) == FAIL))
1561 return;
1562
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001563 str = tv_get_string_chk(&argvars[0]);
1564 if (str == NULL)
1565 return;
1566 len = (int)STRLEN(str);
1567 charidx = (int)tv_get_number_chk(&argvars[1], &error);
1568 if (error)
1569 return;
1570
1571 while (charidx >= 0 && byteidx < len)
1572 {
1573 if (charidx == 0)
1574 {
1575 rettv->vval.v_number = mb_ptr2char(str + byteidx);
1576 break;
1577 }
1578 --charidx;
1579 byteidx += MB_CPTR2LEN(str + byteidx);
1580 }
1581}
1582
1583/*
1584 * "stridx()" function
1585 */
1586 void
1587f_stridx(typval_T *argvars, typval_T *rettv)
1588{
1589 char_u buf[NUMBUFLEN];
1590 char_u *needle;
1591 char_u *haystack;
1592 char_u *save_haystack;
1593 char_u *pos;
1594 int start_idx;
1595
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001596 if (in_vim9script()
1597 && (check_for_string_arg(argvars, 0) == FAIL
1598 || check_for_string_arg(argvars, 1) == FAIL
1599 || check_for_opt_number_arg(argvars, 2) == FAIL))
1600 return;
1601
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001602 needle = tv_get_string_chk(&argvars[1]);
1603 save_haystack = haystack = tv_get_string_buf_chk(&argvars[0], buf);
1604 rettv->vval.v_number = -1;
1605 if (needle == NULL || haystack == NULL)
1606 return; // type error; errmsg already given
1607
1608 if (argvars[2].v_type != VAR_UNKNOWN)
1609 {
1610 int error = FALSE;
1611
1612 start_idx = (int)tv_get_number_chk(&argvars[2], &error);
1613 if (error || start_idx >= (int)STRLEN(haystack))
1614 return;
1615 if (start_idx >= 0)
1616 haystack += start_idx;
1617 }
1618
1619 pos = (char_u *)strstr((char *)haystack, (char *)needle);
1620 if (pos != NULL)
1621 rettv->vval.v_number = (varnumber_T)(pos - save_haystack);
1622}
1623
1624/*
1625 * "string()" function
1626 */
1627 void
1628f_string(typval_T *argvars, typval_T *rettv)
1629{
1630 char_u *tofree;
1631 char_u numbuf[NUMBUFLEN];
1632
1633 rettv->v_type = VAR_STRING;
1634 rettv->vval.v_string = tv2string(&argvars[0], &tofree, numbuf,
1635 get_copyID());
1636 // Make a copy if we have a value but it's not in allocated memory.
1637 if (rettv->vval.v_string != NULL && tofree == NULL)
1638 rettv->vval.v_string = vim_strsave(rettv->vval.v_string);
1639}
1640
1641/*
1642 * "strlen()" function
1643 */
1644 void
1645f_strlen(typval_T *argvars, typval_T *rettv)
1646{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001647 if (in_vim9script()
1648 && check_for_string_or_number_arg(argvars, 0) == FAIL)
1649 return;
1650
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001651 rettv->vval.v_number = (varnumber_T)(STRLEN(
1652 tv_get_string(&argvars[0])));
1653}
1654
1655 static void
1656strchar_common(typval_T *argvars, typval_T *rettv, int skipcc)
1657{
1658 char_u *s = tv_get_string(&argvars[0]);
1659 varnumber_T len = 0;
1660 int (*func_mb_ptr2char_adv)(char_u **pp);
1661
1662 func_mb_ptr2char_adv = skipcc ? mb_ptr2char_adv : mb_cptr2char_adv;
1663 while (*s != NUL)
1664 {
1665 func_mb_ptr2char_adv(&s);
1666 ++len;
1667 }
1668 rettv->vval.v_number = len;
1669}
1670
1671/*
1672 * "strcharlen()" function
1673 */
1674 void
1675f_strcharlen(typval_T *argvars, typval_T *rettv)
1676{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001677 if (in_vim9script()
1678 && check_for_string_or_number_arg(argvars, 0) == FAIL)
1679 return;
1680
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001681 strchar_common(argvars, rettv, TRUE);
1682}
1683
1684/*
1685 * "strchars()" function
1686 */
1687 void
1688f_strchars(typval_T *argvars, typval_T *rettv)
1689{
1690 varnumber_T skipcc = FALSE;
1691
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001692 if (in_vim9script()
1693 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001694 || check_for_opt_bool_arg(argvars, 1) == FAIL))
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001695 return;
1696
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001697 if (argvars[1].v_type != VAR_UNKNOWN)
Bram Moolenaare4098452023-05-07 18:53:49 +01001698 {
zeertzjq8cf51372023-05-08 15:31:38 +01001699 int error = FALSE;
1700 skipcc = tv_get_bool_chk(&argvars[1], &error);
1701 if (error)
1702 return;
1703 if (skipcc < 0 || skipcc > 1)
1704 {
Bram Moolenaare4098452023-05-07 18:53:49 +01001705 semsg(_(e_using_number_as_bool_nr), skipcc);
zeertzjq8cf51372023-05-08 15:31:38 +01001706 return;
1707 }
Bram Moolenaare4098452023-05-07 18:53:49 +01001708 }
zeertzjq8cf51372023-05-08 15:31:38 +01001709
1710 strchar_common(argvars, rettv, skipcc);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001711}
1712
1713/*
Christian Brabandt67672ef2023-04-24 21:09:54 +01001714 * "strutf16len()" function
1715 */
1716 void
1717f_strutf16len(typval_T *argvars, typval_T *rettv)
1718{
1719 rettv->vval.v_number = -1;
1720
1721 if (check_for_string_arg(argvars, 0) == FAIL
1722 || check_for_opt_bool_arg(argvars, 1) == FAIL)
1723 return;
1724
1725 varnumber_T countcc = FALSE;
1726 if (argvars[1].v_type != VAR_UNKNOWN)
1727 countcc = tv_get_bool(&argvars[1]);
1728
1729 char_u *s = tv_get_string(&argvars[0]);
1730 varnumber_T len = 0;
1731 int (*func_mb_ptr2char_adv)(char_u **pp);
1732 int ch;
1733
1734 func_mb_ptr2char_adv = countcc ? mb_cptr2char_adv : mb_ptr2char_adv;
1735 while (*s != NUL)
1736 {
1737 ch = func_mb_ptr2char_adv(&s);
1738 if (ch > 0xFFFF)
1739 ++len;
1740 ++len;
1741 }
1742 rettv->vval.v_number = len;
1743}
1744
1745/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001746 * "strdisplaywidth()" function
1747 */
1748 void
1749f_strdisplaywidth(typval_T *argvars, typval_T *rettv)
1750{
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001751 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001752 int col = 0;
1753
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001754 rettv->vval.v_number = -1;
1755
1756 if (in_vim9script()
1757 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001758 || check_for_opt_number_arg(argvars, 1) == FAIL))
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001759 return;
1760
1761 s = tv_get_string(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001762 if (argvars[1].v_type != VAR_UNKNOWN)
1763 col = (int)tv_get_number(&argvars[1]);
1764
1765 rettv->vval.v_number = (varnumber_T)(linetabsize_col(col, s) - col);
1766}
1767
1768/*
1769 * "strwidth()" function
1770 */
1771 void
1772f_strwidth(typval_T *argvars, typval_T *rettv)
1773{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001774 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001775
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001776 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1777 return;
1778
1779 s = tv_get_string_strict(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001780 rettv->vval.v_number = (varnumber_T)(mb_string2cells(s, -1));
1781}
1782
1783/*
1784 * "strcharpart()" function
1785 */
1786 void
1787f_strcharpart(typval_T *argvars, typval_T *rettv)
1788{
1789 char_u *p;
1790 int nchar;
1791 int nbyte = 0;
1792 int charlen;
1793 int skipcc = FALSE;
1794 int len = 0;
1795 int slen;
1796 int error = FALSE;
1797
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001798 if (in_vim9script()
1799 && (check_for_string_arg(argvars, 0) == FAIL
1800 || check_for_number_arg(argvars, 1) == FAIL
1801 || check_for_opt_number_arg(argvars, 2) == FAIL
1802 || (argvars[2].v_type != VAR_UNKNOWN
1803 && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1804 return;
1805
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001806 p = tv_get_string(&argvars[0]);
1807 slen = (int)STRLEN(p);
1808
1809 nchar = (int)tv_get_number_chk(&argvars[1], &error);
1810 if (!error)
1811 {
1812 if (argvars[2].v_type != VAR_UNKNOWN
1813 && argvars[3].v_type != VAR_UNKNOWN)
1814 {
zeertzjq8cf51372023-05-08 15:31:38 +01001815 skipcc = tv_get_bool_chk(&argvars[3], &error);
1816 if (error)
1817 return;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001818 if (skipcc < 0 || skipcc > 1)
1819 {
zeertzjq8cf51372023-05-08 15:31:38 +01001820 semsg(_(e_using_number_as_bool_nr), skipcc);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001821 return;
1822 }
1823 }
1824
1825 if (nchar > 0)
1826 while (nchar > 0 && nbyte < slen)
1827 {
1828 if (skipcc)
1829 nbyte += mb_ptr2len(p + nbyte);
1830 else
1831 nbyte += MB_CPTR2LEN(p + nbyte);
1832 --nchar;
1833 }
1834 else
1835 nbyte = nchar;
1836 if (argvars[2].v_type != VAR_UNKNOWN)
1837 {
1838 charlen = (int)tv_get_number(&argvars[2]);
1839 while (charlen > 0 && nbyte + len < slen)
1840 {
1841 int off = nbyte + len;
1842
1843 if (off < 0)
1844 len += 1;
1845 else
1846 {
1847 if (skipcc)
1848 len += mb_ptr2len(p + off);
1849 else
1850 len += MB_CPTR2LEN(p + off);
1851 }
1852 --charlen;
1853 }
1854 }
1855 else
1856 len = slen - nbyte; // default: all bytes that are available.
1857 }
1858
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02001859 // Only return the overlap between the specified part and the actual
1860 // string.
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001861 if (nbyte < 0)
1862 {
1863 len += nbyte;
1864 nbyte = 0;
1865 }
1866 else if (nbyte > slen)
1867 nbyte = slen;
1868 if (len < 0)
1869 len = 0;
1870 else if (nbyte + len > slen)
1871 len = slen - nbyte;
1872
1873 rettv->v_type = VAR_STRING;
1874 rettv->vval.v_string = vim_strnsave(p + nbyte, len);
1875}
1876
1877/*
1878 * "strpart()" function
1879 */
1880 void
1881f_strpart(typval_T *argvars, typval_T *rettv)
1882{
1883 char_u *p;
1884 int n;
1885 int len;
1886 int slen;
1887 int error = FALSE;
1888
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001889 if (in_vim9script()
1890 && (check_for_string_arg(argvars, 0) == FAIL
1891 || check_for_number_arg(argvars, 1) == FAIL
1892 || check_for_opt_number_arg(argvars, 2) == FAIL
1893 || (argvars[2].v_type != VAR_UNKNOWN
1894 && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1895 return;
1896
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001897 p = tv_get_string(&argvars[0]);
1898 slen = (int)STRLEN(p);
1899
1900 n = (int)tv_get_number_chk(&argvars[1], &error);
1901 if (error)
1902 len = 0;
1903 else if (argvars[2].v_type != VAR_UNKNOWN)
1904 len = (int)tv_get_number(&argvars[2]);
1905 else
1906 len = slen - n; // default len: all bytes that are available.
1907
1908 // Only return the overlap between the specified part and the actual
1909 // string.
1910 if (n < 0)
1911 {
1912 len += n;
1913 n = 0;
1914 }
1915 else if (n > slen)
1916 n = slen;
1917 if (len < 0)
1918 len = 0;
1919 else if (n + len > slen)
1920 len = slen - n;
1921
1922 if (argvars[2].v_type != VAR_UNKNOWN && argvars[3].v_type != VAR_UNKNOWN)
1923 {
1924 int off;
1925
1926 // length in characters
1927 for (off = n; off < slen && len > 0; --len)
1928 off += mb_ptr2len(p + off);
1929 len = off - n;
1930 }
1931
1932 rettv->v_type = VAR_STRING;
1933 rettv->vval.v_string = vim_strnsave(p + n, len);
1934}
1935
1936/*
1937 * "strridx()" function
1938 */
1939 void
1940f_strridx(typval_T *argvars, typval_T *rettv)
1941{
1942 char_u buf[NUMBUFLEN];
1943 char_u *needle;
1944 char_u *haystack;
1945 char_u *rest;
1946 char_u *lastmatch = NULL;
1947 int haystack_len, end_idx;
1948
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001949 if (in_vim9script()
1950 && (check_for_string_arg(argvars, 0) == FAIL
1951 || check_for_string_arg(argvars, 1) == FAIL
1952 || check_for_opt_number_arg(argvars, 2) == FAIL))
1953 return;
1954
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001955 needle = tv_get_string_chk(&argvars[1]);
1956 haystack = tv_get_string_buf_chk(&argvars[0], buf);
1957
1958 rettv->vval.v_number = -1;
1959 if (needle == NULL || haystack == NULL)
1960 return; // type error; errmsg already given
1961
1962 haystack_len = (int)STRLEN(haystack);
1963 if (argvars[2].v_type != VAR_UNKNOWN)
1964 {
1965 // Third argument: upper limit for index
1966 end_idx = (int)tv_get_number_chk(&argvars[2], NULL);
1967 if (end_idx < 0)
1968 return; // can never find a match
1969 }
1970 else
1971 end_idx = haystack_len;
1972
1973 if (*needle == NUL)
1974 {
1975 // Empty string matches past the end.
1976 lastmatch = haystack + end_idx;
1977 }
1978 else
1979 {
1980 for (rest = haystack; *rest != '\0'; ++rest)
1981 {
1982 rest = (char_u *)strstr((char *)rest, (char *)needle);
1983 if (rest == NULL || rest > haystack + end_idx)
1984 break;
1985 lastmatch = rest;
1986 }
1987 }
1988
1989 if (lastmatch == NULL)
1990 rettv->vval.v_number = -1;
1991 else
1992 rettv->vval.v_number = (varnumber_T)(lastmatch - haystack);
1993}
1994
1995/*
1996 * "strtrans()" function
1997 */
1998 void
1999f_strtrans(typval_T *argvars, typval_T *rettv)
2000{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002001 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
2002 return;
2003
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002004 rettv->v_type = VAR_STRING;
2005 rettv->vval.v_string = transstr(tv_get_string(&argvars[0]));
2006}
2007
Christian Brabandt67672ef2023-04-24 21:09:54 +01002008
2009/*
Christian Brabandt67672ef2023-04-24 21:09:54 +01002010 * "utf16idx()" function
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01002011 *
2012 * Converts a byte or character offset in a string to the corresponding UTF-16
2013 * code unit offset.
Christian Brabandt67672ef2023-04-24 21:09:54 +01002014 */
2015 void
2016f_utf16idx(typval_T *argvars, typval_T *rettv)
2017{
2018 rettv->vval.v_number = -1;
2019
2020 if (check_for_string_arg(argvars, 0) == FAIL
2021 || check_for_opt_number_arg(argvars, 1) == FAIL
2022 || check_for_opt_bool_arg(argvars, 2) == FAIL
2023 || (argvars[2].v_type != VAR_UNKNOWN
2024 && check_for_opt_bool_arg(argvars, 3) == FAIL))
2025 return;
2026
2027 char_u *str = tv_get_string_chk(&argvars[0]);
2028 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
2029 if (str == NULL || idx < 0)
2030 return;
2031
2032 varnumber_T countcc = FALSE;
2033 varnumber_T charidx = FALSE;
2034 if (argvars[2].v_type != VAR_UNKNOWN)
2035 {
2036 countcc = tv_get_bool(&argvars[2]);
2037 if (argvars[3].v_type != VAR_UNKNOWN)
2038 charidx = tv_get_bool(&argvars[3]);
2039 }
2040
2041 int (*ptr2len)(char_u *);
2042 if (enc_utf8 && countcc)
2043 ptr2len = utf_ptr2len;
2044 else
2045 ptr2len = mb_ptr2len;
2046
2047 char_u *p;
2048 int len;
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01002049 int utf16idx = 0;
Christian Brabandt67672ef2023-04-24 21:09:54 +01002050 for (p = str, len = 0; charidx ? idx >= 0 : p <= str + idx; len++)
2051 {
2052 if (*p == NUL)
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01002053 {
2054 // If the index is exactly the number of bytes or characters in the
2055 // string then return the length of the string in utf-16 code
2056 // units.
2057 if (charidx ? (idx == 0) : (p == (str + idx)))
2058 rettv->vval.v_number = len;
Christian Brabandt67672ef2023-04-24 21:09:54 +01002059 return;
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01002060 }
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01002061 utf16idx = len;
Christian Brabandt67672ef2023-04-24 21:09:54 +01002062 int clen = ptr2len(p);
2063 int c = (clen > 1) ? utf_ptr2char(p) : *p;
2064 if (c > 0xFFFF)
2065 len++;
2066 p += ptr2len(p);
2067 if (charidx)
2068 idx--;
2069 }
2070
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01002071 rettv->vval.v_number = utf16idx;
Christian Brabandt67672ef2023-04-24 21:09:54 +01002072}
2073
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002074/*
2075 * "tolower(string)" function
2076 */
2077 void
2078f_tolower(typval_T *argvars, typval_T *rettv)
2079{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002080 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
2081 return;
2082
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002083 rettv->v_type = VAR_STRING;
2084 rettv->vval.v_string = strlow_save(tv_get_string(&argvars[0]));
2085}
2086
2087/*
2088 * "toupper(string)" function
2089 */
2090 void
2091f_toupper(typval_T *argvars, typval_T *rettv)
2092{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002093 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
2094 return;
2095
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002096 rettv->v_type = VAR_STRING;
2097 rettv->vval.v_string = strup_save(tv_get_string(&argvars[0]));
2098}
2099
2100/*
2101 * "tr(string, fromstr, tostr)" function
2102 */
2103 void
2104f_tr(typval_T *argvars, typval_T *rettv)
2105{
2106 char_u *in_str;
2107 char_u *fromstr;
2108 char_u *tostr;
2109 char_u *p;
2110 int inlen;
2111 int fromlen;
2112 int tolen;
2113 int idx;
2114 char_u *cpstr;
2115 int cplen;
2116 int first = TRUE;
2117 char_u buf[NUMBUFLEN];
2118 char_u buf2[NUMBUFLEN];
2119 garray_T ga;
2120
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002121 if (in_vim9script()
2122 && (check_for_string_arg(argvars, 0) == FAIL
2123 || check_for_string_arg(argvars, 1) == FAIL
2124 || check_for_string_arg(argvars, 2) == FAIL))
2125 return;
2126
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002127 in_str = tv_get_string(&argvars[0]);
2128 fromstr = tv_get_string_buf_chk(&argvars[1], buf);
2129 tostr = tv_get_string_buf_chk(&argvars[2], buf2);
2130
2131 // Default return value: empty string.
2132 rettv->v_type = VAR_STRING;
2133 rettv->vval.v_string = NULL;
2134 if (fromstr == NULL || tostr == NULL)
2135 return; // type error; errmsg already given
Bram Moolenaar04935fb2022-01-08 16:19:22 +00002136 ga_init2(&ga, sizeof(char), 80);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002137
2138 if (!has_mbyte)
2139 // not multi-byte: fromstr and tostr must be the same length
2140 if (STRLEN(fromstr) != STRLEN(tostr))
2141 {
2142error:
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00002143 semsg(_(e_invalid_argument_str), fromstr);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002144 ga_clear(&ga);
2145 return;
2146 }
2147
2148 // fromstr and tostr have to contain the same number of chars
2149 while (*in_str != NUL)
2150 {
2151 if (has_mbyte)
2152 {
2153 inlen = (*mb_ptr2len)(in_str);
2154 cpstr = in_str;
2155 cplen = inlen;
2156 idx = 0;
2157 for (p = fromstr; *p != NUL; p += fromlen)
2158 {
2159 fromlen = (*mb_ptr2len)(p);
2160 if (fromlen == inlen && STRNCMP(in_str, p, inlen) == 0)
2161 {
2162 for (p = tostr; *p != NUL; p += tolen)
2163 {
2164 tolen = (*mb_ptr2len)(p);
2165 if (idx-- == 0)
2166 {
2167 cplen = tolen;
2168 cpstr = p;
2169 break;
2170 }
2171 }
2172 if (*p == NUL) // tostr is shorter than fromstr
2173 goto error;
2174 break;
2175 }
2176 ++idx;
2177 }
2178
2179 if (first && cpstr == in_str)
2180 {
2181 // Check that fromstr and tostr have the same number of
2182 // (multi-byte) characters. Done only once when a character
2183 // of in_str doesn't appear in fromstr.
2184 first = FALSE;
2185 for (p = tostr; *p != NUL; p += tolen)
2186 {
2187 tolen = (*mb_ptr2len)(p);
2188 --idx;
2189 }
2190 if (idx != 0)
2191 goto error;
2192 }
2193
2194 (void)ga_grow(&ga, cplen);
2195 mch_memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen);
2196 ga.ga_len += cplen;
2197
2198 in_str += inlen;
2199 }
2200 else
2201 {
2202 // When not using multi-byte chars we can do it faster.
2203 p = vim_strchr(fromstr, *in_str);
2204 if (p != NULL)
2205 ga_append(&ga, tostr[p - fromstr]);
2206 else
2207 ga_append(&ga, *in_str);
2208 ++in_str;
2209 }
2210 }
2211
2212 // add a terminating NUL
2213 (void)ga_grow(&ga, 1);
2214 ga_append(&ga, NUL);
2215
2216 rettv->vval.v_string = ga.ga_data;
2217}
2218
2219/*
2220 * "trim({expr})" function
2221 */
2222 void
2223f_trim(typval_T *argvars, typval_T *rettv)
2224{
2225 char_u buf1[NUMBUFLEN];
2226 char_u buf2[NUMBUFLEN];
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002227 char_u *head;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002228 char_u *mask = NULL;
2229 char_u *tail;
2230 char_u *prev;
2231 char_u *p;
2232 int c1;
2233 int dir = 0;
2234
2235 rettv->v_type = VAR_STRING;
2236 rettv->vval.v_string = NULL;
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002237
2238 if (in_vim9script()
2239 && (check_for_string_arg(argvars, 0) == FAIL
Illia Bobyr80799172023-10-17 18:00:50 +02002240 || check_for_opt_string_arg(argvars, 1) == FAIL
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002241 || (argvars[1].v_type != VAR_UNKNOWN
2242 && check_for_opt_number_arg(argvars, 2) == FAIL)))
2243 return;
2244
2245 head = tv_get_string_buf_chk(&argvars[0], buf1);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002246 if (head == NULL)
2247 return;
2248
Illia Bobyr80799172023-10-17 18:00:50 +02002249 if (check_for_opt_string_arg(argvars, 1) == FAIL)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002250 return;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002251
2252 if (argvars[1].v_type == VAR_STRING)
Illia Bobyr6e638672023-10-17 11:09:45 +02002253 {
Illia Bobyr80799172023-10-17 18:00:50 +02002254 mask = tv_get_string_buf_chk(&argvars[1], buf2);
2255 if (*mask == NUL)
2256 mask = NULL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002257
Illia Bobyr80799172023-10-17 18:00:50 +02002258 if (argvars[2].v_type != VAR_UNKNOWN)
Illia Bobyr6e638672023-10-17 11:09:45 +02002259 {
Illia Bobyr80799172023-10-17 18:00:50 +02002260 int error = 0;
2261
2262 // leading or trailing characters to trim
2263 dir = (int)tv_get_number_chk(&argvars[2], &error);
2264 if (error)
2265 return;
2266 if (dir < 0 || dir > 2)
2267 {
2268 semsg(_(e_invalid_argument_str), tv_get_string(&argvars[2]));
2269 return;
2270 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002271 }
2272 }
2273
2274 if (dir == 0 || dir == 1)
2275 {
2276 // Trim leading characters
2277 while (*head != NUL)
2278 {
2279 c1 = PTR2CHAR(head);
2280 if (mask == NULL)
2281 {
2282 if (c1 > ' ' && c1 != 0xa0)
2283 break;
2284 }
2285 else
2286 {
2287 for (p = mask; *p != NUL; MB_PTR_ADV(p))
2288 if (c1 == PTR2CHAR(p))
2289 break;
2290 if (*p == NUL)
2291 break;
2292 }
2293 MB_PTR_ADV(head);
2294 }
2295 }
2296
2297 tail = head + STRLEN(head);
2298 if (dir == 0 || dir == 2)
2299 {
2300 // Trim trailing characters
2301 for (; tail > head; tail = prev)
2302 {
2303 prev = tail;
2304 MB_PTR_BACK(head, prev);
2305 c1 = PTR2CHAR(prev);
2306 if (mask == NULL)
2307 {
2308 if (c1 > ' ' && c1 != 0xa0)
2309 break;
2310 }
2311 else
2312 {
2313 for (p = mask; *p != NUL; MB_PTR_ADV(p))
2314 if (c1 == PTR2CHAR(p))
2315 break;
2316 if (*p == NUL)
2317 break;
2318 }
2319 }
2320 }
2321 rettv->vval.v_string = vim_strnsave(head, tail - head);
2322}
2323
Bram Moolenaar677658a2022-01-05 16:09:06 +00002324static char *e_printf = N_(e_insufficient_arguments_for_printf);
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002325
2326/*
2327 * Get number argument from "idxp" entry in "tvs". First entry is 1.
2328 */
2329 static varnumber_T
2330tv_nr(typval_T *tvs, int *idxp)
2331{
2332 int idx = *idxp - 1;
2333 varnumber_T n = 0;
2334 int err = FALSE;
2335
2336 if (tvs[idx].v_type == VAR_UNKNOWN)
2337 emsg(_(e_printf));
2338 else
2339 {
2340 ++*idxp;
2341 n = tv_get_number_chk(&tvs[idx], &err);
2342 if (err)
2343 n = 0;
2344 }
2345 return n;
2346}
2347
2348/*
2349 * Get string argument from "idxp" entry in "tvs". First entry is 1.
2350 * If "tofree" is NULL tv_get_string_chk() is used. Some types (e.g. List)
2351 * are not converted to a string.
2352 * If "tofree" is not NULL echo_string() is used. All types are converted to
2353 * a string with the same format as ":echo". The caller must free "*tofree".
2354 * Returns NULL for an error.
2355 */
2356 static char *
2357tv_str(typval_T *tvs, int *idxp, char_u **tofree)
2358{
2359 int idx = *idxp - 1;
2360 char *s = NULL;
2361 static char_u numbuf[NUMBUFLEN];
2362
2363 if (tvs[idx].v_type == VAR_UNKNOWN)
2364 emsg(_(e_printf));
2365 else
2366 {
2367 ++*idxp;
2368 if (tofree != NULL)
2369 s = (char *)echo_string(&tvs[idx], tofree, numbuf, get_copyID());
2370 else
2371 s = (char *)tv_get_string_chk(&tvs[idx]);
2372 }
2373 return s;
2374}
2375
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002376/*
2377 * Get float argument from "idxp" entry in "tvs". First entry is 1.
2378 */
2379 static double
2380tv_float(typval_T *tvs, int *idxp)
2381{
2382 int idx = *idxp - 1;
2383 double f = 0;
2384
2385 if (tvs[idx].v_type == VAR_UNKNOWN)
2386 emsg(_(e_printf));
2387 else
2388 {
2389 ++*idxp;
2390 if (tvs[idx].v_type == VAR_FLOAT)
2391 f = tvs[idx].vval.v_float;
2392 else if (tvs[idx].v_type == VAR_NUMBER)
2393 f = (double)tvs[idx].vval.v_number;
2394 else
Bram Moolenaar9d00e4a2022-01-05 17:49:15 +00002395 emsg(_(e_expected_float_argument_for_printf));
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002396 }
2397 return f;
2398}
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00002399
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002400#endif
2401
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002402/*
2403 * Return the representation of infinity for printf() function:
2404 * "-inf", "inf", "+inf", " inf", "-INF", "INF", "+INF" or " INF".
2405 */
2406 static const char *
2407infinity_str(int positive,
2408 char fmt_spec,
2409 int force_sign,
2410 int space_for_positive)
2411{
2412 static const char *table[] =
2413 {
2414 "-inf", "inf", "+inf", " inf",
2415 "-INF", "INF", "+INF", " INF"
2416 };
2417 int idx = positive * (1 + force_sign + force_sign * space_for_positive);
2418
2419 if (ASCII_ISUPPER(fmt_spec))
2420 idx += 4;
2421 return table[idx];
2422}
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002423
2424/*
2425 * This code was included to provide a portable vsnprintf() and snprintf().
2426 * Some systems may provide their own, but we always use this one for
2427 * consistency.
2428 *
2429 * This code is based on snprintf.c - a portable implementation of snprintf
2430 * by Mark Martinec <mark.martinec@ijs.si>, Version 2.2, 2000-10-06.
2431 * Included with permission. It was heavily modified to fit in Vim.
2432 * The original code, including useful comments, can be found here:
2433 * http://www.ijs.si/software/snprintf/
2434 *
2435 * This snprintf() only supports the following conversion specifiers:
2436 * s, c, d, u, o, x, X, p (and synonyms: i, D, U, O - see below)
2437 * with flags: '-', '+', ' ', '0' and '#'.
2438 * An asterisk is supported for field width as well as precision.
2439 *
2440 * Limited support for floating point was added: 'f', 'F', 'e', 'E', 'g', 'G'.
2441 *
2442 * Length modifiers 'h' (short int) and 'l' (long int) and 'll' (long long int)
2443 * are supported. NOTE: for 'll' the argument is varnumber_T or uvarnumber_T.
2444 *
2445 * The locale is not used, the string is used as a byte string. This is only
2446 * relevant for double-byte encodings where the second byte may be '%'.
2447 *
2448 * It is permitted for "str_m" to be zero, and it is permitted to specify NULL
2449 * pointer for resulting string argument if "str_m" is zero (as per ISO C99).
2450 *
2451 * The return value is the number of characters which would be generated
2452 * for the given input, excluding the trailing NUL. If this value
2453 * is greater or equal to "str_m", not all characters from the result
2454 * have been stored in str, output bytes beyond the ("str_m"-1) -th character
2455 * are discarded. If "str_m" is greater than zero it is guaranteed
2456 * the resulting string will be NUL-terminated.
2457 */
2458
2459/*
2460 * When va_list is not supported we only define vim_snprintf().
2461 *
2462 * vim_vsnprintf_typval() can be invoked with either "va_list" or a list of
2463 * "typval_T". When the latter is not used it must be NULL.
2464 */
2465
2466// When generating prototypes all of this is skipped, cproto doesn't
2467// understand this.
2468#ifndef PROTO
2469
2470// Like vim_vsnprintf() but append to the string.
2471 int
2472vim_snprintf_add(char *str, size_t str_m, const char *fmt, ...)
2473{
2474 va_list ap;
2475 int str_l;
2476 size_t len = STRLEN(str);
2477 size_t space;
2478
2479 if (str_m <= len)
2480 space = 0;
2481 else
2482 space = str_m - len;
2483 va_start(ap, fmt);
2484 str_l = vim_vsnprintf(str + len, space, fmt, ap);
2485 va_end(ap);
2486 return str_l;
2487}
2488
2489 int
2490vim_snprintf(char *str, size_t str_m, const char *fmt, ...)
2491{
2492 va_list ap;
2493 int str_l;
2494
2495 va_start(ap, fmt);
2496 str_l = vim_vsnprintf(str, str_m, fmt, ap);
2497 va_end(ap);
2498 return str_l;
2499}
2500
2501 int
2502vim_vsnprintf(
2503 char *str,
2504 size_t str_m,
2505 const char *fmt,
2506 va_list ap)
2507{
2508 return vim_vsnprintf_typval(str, str_m, fmt, ap, NULL);
2509}
2510
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002511enum
2512{
2513 TYPE_UNKNOWN = -1,
2514 TYPE_INT,
2515 TYPE_LONGINT,
2516 TYPE_LONGLONGINT,
2517 TYPE_UNSIGNEDINT,
2518 TYPE_UNSIGNEDLONGINT,
2519 TYPE_UNSIGNEDLONGLONGINT,
2520 TYPE_POINTER,
2521 TYPE_PERCENT,
2522 TYPE_CHAR,
2523 TYPE_STRING,
2524 TYPE_FLOAT
2525};
2526
2527/* Types that can be used in a format string
2528 */
zeertzjq7772c932023-08-15 22:48:40 +02002529 static int
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002530format_typeof(
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002531 const char *type)
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002532{
2533 // allowed values: \0, h, l, L
2534 char length_modifier = '\0';
2535
2536 // current conversion specifier character
2537 char fmt_spec = '\0';
2538
2539 // parse 'h', 'l' and 'll' length modifiers
2540 if (*type == 'h' || *type == 'l')
2541 {
2542 length_modifier = *type;
2543 type++;
2544 if (length_modifier == 'l' && *type == 'l')
2545 {
2546 // double l = __int64 / varnumber_T
2547 length_modifier = 'L';
2548 type++;
2549 }
2550 }
2551 fmt_spec = *type;
2552
2553 // common synonyms:
2554 switch (fmt_spec)
2555 {
2556 case 'i': fmt_spec = 'd'; break;
2557 case '*': fmt_spec = 'd'; length_modifier = 'h'; break;
2558 case 'D': fmt_spec = 'd'; length_modifier = 'l'; break;
2559 case 'U': fmt_spec = 'u'; length_modifier = 'l'; break;
2560 case 'O': fmt_spec = 'o'; length_modifier = 'l'; break;
2561 default: break;
2562 }
2563
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002564 // get parameter value, do initial processing
2565 switch (fmt_spec)
2566 {
2567 // '%' and 'c' behave similar to 's' regarding flags and field
2568 // widths
2569 case '%':
2570 return TYPE_PERCENT;
2571
2572 case 'c':
2573 return TYPE_CHAR;
2574
2575 case 's':
2576 case 'S':
2577 return TYPE_STRING;
2578
2579 case 'd': case 'u':
2580 case 'b': case 'B':
2581 case 'o':
2582 case 'x': case 'X':
2583 case 'p':
2584 {
2585 // NOTE: the u, b, o, x, X and p conversion specifiers
2586 // imply the value is unsigned; d implies a signed
2587 // value
2588
2589 // 0 if numeric argument is zero (or if pointer is
2590 // NULL for 'p'), +1 if greater than zero (or nonzero
2591 // for unsigned arguments), -1 if negative (unsigned
2592 // argument is never negative)
2593
2594 if (fmt_spec == 'p')
2595 return TYPE_POINTER;
2596 else if (fmt_spec == 'b' || fmt_spec == 'B')
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002597 return TYPE_UNSIGNEDLONGLONGINT;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002598 else if (fmt_spec == 'd')
2599 {
2600 // signed
2601 switch (length_modifier)
2602 {
2603 case '\0':
2604 case 'h':
2605 // char and short arguments are passed as int.
2606 return TYPE_INT;
2607 case 'l':
2608 return TYPE_LONGINT;
2609 case 'L':
2610 return TYPE_LONGLONGINT;
2611 }
2612 }
2613 else
2614 {
2615 // unsigned
2616 switch (length_modifier)
2617 {
2618 case '\0':
2619 case 'h':
2620 return TYPE_UNSIGNEDINT;
2621 case 'l':
2622 return TYPE_UNSIGNEDLONGINT;
2623 case 'L':
2624 return TYPE_UNSIGNEDLONGLONGINT;
2625 }
2626 }
2627 }
2628 break;
2629
2630 case 'f':
2631 case 'F':
2632 case 'e':
2633 case 'E':
2634 case 'g':
2635 case 'G':
2636 return TYPE_FLOAT;
2637 }
2638
2639 return TYPE_UNKNOWN;
2640}
2641
zeertzjq7772c932023-08-15 22:48:40 +02002642 static char *
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002643format_typename(
2644 const char *type)
2645{
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002646 switch (format_typeof(type))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002647 {
2648 case TYPE_INT:
2649 return _(typename_int);
2650
2651 case TYPE_LONGINT:
2652 return _(typename_longint);
2653
2654 case TYPE_LONGLONGINT:
2655 return _(typename_longlongint);
2656
2657 case TYPE_UNSIGNEDINT:
2658 return _(typename_unsignedint);
2659
2660 case TYPE_UNSIGNEDLONGINT:
2661 return _(typename_unsignedlongint);
2662
2663 case TYPE_UNSIGNEDLONGLONGINT:
2664 return _(typename_unsignedlonglongint);
2665
2666 case TYPE_POINTER:
2667 return _(typename_pointer);
2668
2669 case TYPE_PERCENT:
2670 return _(typename_percent);
2671
2672 case TYPE_CHAR:
2673 return _(typename_char);
2674
2675 case TYPE_STRING:
2676 return _(typename_string);
2677
2678 case TYPE_FLOAT:
2679 return _(typename_float);
2680 }
2681
2682 return _(typename_unknown);
2683}
2684
zeertzjq7772c932023-08-15 22:48:40 +02002685 static int
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002686adjust_types(
2687 const char ***ap_types,
2688 int arg,
2689 int *num_posarg,
2690 const char *type)
2691{
2692 if (*ap_types == NULL || *num_posarg < arg)
2693 {
2694 int idx;
2695 const char **new_types;
2696
2697 if (*ap_types == NULL)
2698 new_types = ALLOC_CLEAR_MULT(const char *, arg);
2699 else
K.Takata4c215ec2023-08-26 18:05:08 +02002700 new_types = vim_realloc((char **)*ap_types,
2701 arg * sizeof(const char *));
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002702
2703 if (new_types == NULL)
2704 return FAIL;
2705
2706 for (idx = *num_posarg; idx < arg; ++idx)
2707 new_types[idx] = NULL;
2708
2709 *ap_types = new_types;
2710 *num_posarg = arg;
2711 }
2712
2713 if ((*ap_types)[arg - 1] != NULL)
2714 {
2715 if ((*ap_types)[arg - 1][0] == '*' || type[0] == '*')
2716 {
2717 const char *pt = type;
2718 if (pt[0] == '*')
2719 pt = (*ap_types)[arg - 1];
2720
2721 if (pt[0] != '*')
2722 {
2723 switch (pt[0])
2724 {
2725 case 'd': case 'i': break;
2726 default:
2727 semsg(_(e_positional_num_field_spec_reused_str_str), arg, format_typename((*ap_types)[arg - 1]), format_typename(type));
2728 return FAIL;
2729 }
2730 }
2731 }
2732 else
2733 {
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002734 if (format_typeof(type) != format_typeof((*ap_types)[arg - 1]))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002735 {
2736 semsg(_( e_positional_arg_num_type_inconsistent_str_str), arg, format_typename(type), format_typename((*ap_types)[arg - 1]));
2737 return FAIL;
2738 }
2739 }
2740 }
2741
2742 (*ap_types)[arg - 1] = type;
2743
2744 return OK;
2745}
2746
Christ van Willegenc35fc032024-03-14 18:30:41 +01002747 static void
2748format_overflow_error(const char *pstart)
2749{
2750 size_t arglen = 0;
2751 char *argcopy = NULL;
2752 const char *p = pstart;
2753
2754 while (VIM_ISDIGIT((int)(*p)))
2755 ++p;
2756
2757 arglen = p - pstart;
2758 argcopy = ALLOC_CLEAR_MULT(char, arglen + 1);
2759 if (argcopy != NULL)
2760 {
2761 strncpy(argcopy, pstart, arglen);
2762 semsg(_( e_val_too_large), argcopy);
2763 free(argcopy);
2764 }
2765 else
2766 semsg(_(e_out_of_memory_allocating_nr_bytes), arglen);
2767}
2768
2769#define MAX_ALLOWED_STRING_WIDTH 6400
2770
2771 static int
2772get_unsigned_int(
2773 const char *pstart,
2774 const char **p,
zeertzjq0dff3152024-07-29 20:28:14 +02002775 unsigned int *uj,
2776 int overflow_err)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002777{
2778 *uj = **p - '0';
2779 ++*p;
2780
2781 while (VIM_ISDIGIT((int)(**p)) && *uj < MAX_ALLOWED_STRING_WIDTH)
2782 {
2783 *uj = 10 * *uj + (unsigned int)(**p - '0');
2784 ++*p;
2785 }
2786
2787 if (*uj > MAX_ALLOWED_STRING_WIDTH)
2788 {
zeertzjq0dff3152024-07-29 20:28:14 +02002789 if (overflow_err)
2790 {
2791 format_overflow_error(pstart);
2792 return FAIL;
2793 }
2794 else
2795 *uj = MAX_ALLOWED_STRING_WIDTH;
Christ van Willegenc35fc032024-03-14 18:30:41 +01002796 }
2797
2798 return OK;
2799}
2800
2801
zeertzjq7772c932023-08-15 22:48:40 +02002802 static int
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002803parse_fmt_types(
2804 const char ***ap_types,
2805 int *num_posarg,
2806 const char *fmt,
2807 typval_T *tvs UNUSED
2808 )
2809{
2810 const char *p = fmt;
2811 const char *arg = NULL;
2812
2813 int any_pos = 0;
2814 int any_arg = 0;
2815 int arg_idx;
2816
2817#define CHECK_POS_ARG do { \
2818 if (any_pos && any_arg) \
2819 { \
2820 semsg(_( e_cannot_mix_positional_and_non_positional_str), fmt); \
2821 goto error; \
2822 } \
2823} while (0);
2824
2825 if (p == NULL)
2826 return OK;
2827
2828 while (*p != NUL)
2829 {
2830 if (*p != '%')
2831 {
2832 char *q = strchr(p + 1, '%');
2833 size_t n = (q == NULL) ? STRLEN(p) : (size_t)(q - p);
2834
2835 p += n;
2836 }
2837 else
2838 {
2839 // allowed values: \0, h, l, L
2840 char length_modifier = '\0';
2841
2842 // variable for positional arg
2843 int pos_arg = -1;
2844 const char *ptype = NULL;
Christ van Willegenc35fc032024-03-14 18:30:41 +01002845 const char *pstart = p+1;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002846
2847 p++; // skip '%'
2848
2849 // First check to see if we find a positional
2850 // argument specifier
2851 ptype = p;
2852
2853 while (VIM_ISDIGIT(*ptype))
2854 ++ptype;
2855
2856 if (*ptype == '$')
2857 {
2858 if (*p == '0')
2859 {
2860 // 0 flag at the wrong place
2861 semsg(_( e_invalid_format_specifier_str), fmt);
2862 goto error;
2863 }
2864
2865 // Positional argument
Christ van Willegenc35fc032024-03-14 18:30:41 +01002866 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002867
zeertzjq0dff3152024-07-29 20:28:14 +02002868 if (get_unsigned_int(pstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002869 goto error;
2870
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002871 pos_arg = uj;
2872
2873 any_pos = 1;
2874 CHECK_POS_ARG;
2875
2876 ++p;
2877 }
2878
2879 // parse flags
2880 while (*p == '0' || *p == '-' || *p == '+' || *p == ' '
2881 || *p == '#' || *p == '\'')
2882 {
2883 switch (*p)
2884 {
2885 case '0': break;
2886 case '-': break;
2887 case '+': break;
2888 case ' ': // If both the ' ' and '+' flags appear, the ' '
2889 // flag should be ignored
2890 break;
2891 case '#': break;
2892 case '\'': break;
2893 }
2894 p++;
2895 }
2896 // If the '0' and '-' flags both appear, the '0' flag should be
2897 // ignored.
2898
2899 // parse field width
2900 if (*(arg = p) == '*')
2901 {
2902 p++;
2903
2904 if (VIM_ISDIGIT((int)(*p)))
2905 {
2906 // Positional argument field width
Christ van Willegenc35fc032024-03-14 18:30:41 +01002907 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002908
zeertzjq0dff3152024-07-29 20:28:14 +02002909 if (get_unsigned_int(arg + 1, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002910 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002911
2912 if (*p != '$')
2913 {
2914 semsg(_( e_invalid_format_specifier_str), fmt);
2915 goto error;
2916 }
2917 else
2918 {
2919 ++p;
2920 any_pos = 1;
2921 CHECK_POS_ARG;
2922
2923 if (adjust_types(ap_types, uj, num_posarg, arg) == FAIL)
2924 goto error;
2925 }
2926 }
2927 else
2928 {
2929 any_arg = 1;
2930 CHECK_POS_ARG;
2931 }
2932 }
dundargoc580c1fc2023-10-06 19:41:14 +02002933 else if (VIM_ISDIGIT((int)(*p)))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002934 {
2935 // size_t could be wider than unsigned int; make sure we treat
2936 // argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01002937 const char *digstart = p;
2938 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002939
zeertzjq0dff3152024-07-29 20:28:14 +02002940 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002941 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002942
2943 if (*p == '$')
2944 {
2945 semsg(_( e_invalid_format_specifier_str), fmt);
2946 goto error;
2947 }
2948 }
2949
2950 // parse precision
2951 if (*p == '.')
2952 {
2953 p++;
2954
2955 if (*(arg = p) == '*')
2956 {
2957 p++;
2958
2959 if (VIM_ISDIGIT((int)(*p)))
2960 {
2961 // Parse precision
Christ van Willegenc35fc032024-03-14 18:30:41 +01002962 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002963
zeertzjq0dff3152024-07-29 20:28:14 +02002964 if (get_unsigned_int(arg + 1, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002965 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002966
2967 if (*p == '$')
2968 {
2969 any_pos = 1;
2970 CHECK_POS_ARG;
2971
2972 ++p;
2973
2974 if (adjust_types(ap_types, uj, num_posarg, arg) == FAIL)
2975 goto error;
2976 }
2977 else
2978 {
2979 semsg(_( e_invalid_format_specifier_str), fmt);
2980 goto error;
2981 }
2982 }
2983 else
2984 {
2985 any_arg = 1;
2986 CHECK_POS_ARG;
2987 }
2988 }
dundargoc580c1fc2023-10-06 19:41:14 +02002989 else if (VIM_ISDIGIT((int)(*p)))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002990 {
2991 // size_t could be wider than unsigned int; make sure we
2992 // treat argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01002993 const char *digstart = p;
2994 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002995
zeertzjq0dff3152024-07-29 20:28:14 +02002996 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002997 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002998
2999 if (*p == '$')
3000 {
3001 semsg(_( e_invalid_format_specifier_str), fmt);
3002 goto error;
3003 }
3004 }
3005 }
3006
3007 if (pos_arg != -1)
3008 {
3009 any_pos = 1;
3010 CHECK_POS_ARG;
3011
3012 ptype = p;
3013 }
3014
3015 // parse 'h', 'l' and 'll' length modifiers
3016 if (*p == 'h' || *p == 'l')
3017 {
3018 length_modifier = *p;
3019 p++;
3020 if (length_modifier == 'l' && *p == 'l')
3021 {
3022 // double l = __int64 / varnumber_T
dundargoc580c1fc2023-10-06 19:41:14 +02003023 // length_modifier = 'L';
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003024 p++;
3025 }
3026 }
3027
3028 switch (*p)
3029 {
3030 // Check for known format specifiers. % is special!
3031 case 'i':
3032 case '*':
3033 case 'd':
3034 case 'u':
3035 case 'o':
3036 case 'D':
3037 case 'U':
3038 case 'O':
3039 case 'x':
3040 case 'X':
3041 case 'b':
3042 case 'B':
3043 case 'c':
3044 case 's':
3045 case 'S':
3046 case 'p':
3047 case 'f':
3048 case 'F':
3049 case 'e':
3050 case 'E':
3051 case 'g':
3052 case 'G':
3053 if (pos_arg != -1)
3054 {
3055 if (adjust_types(ap_types, pos_arg, num_posarg, ptype) == FAIL)
3056 goto error;
3057 }
3058 else
3059 {
3060 any_arg = 1;
3061 CHECK_POS_ARG;
3062 }
3063 break;
3064
3065 default:
3066 if (pos_arg != -1)
3067 {
3068 semsg(_( e_cannot_mix_positional_and_non_positional_str), fmt);
3069 goto error;
3070 }
3071 }
3072
3073 if (*p != NUL)
3074 p++; // step over the just processed conversion specifier
3075 }
3076 }
3077
3078 for (arg_idx = 0; arg_idx < *num_posarg; ++arg_idx)
3079 {
3080 if ((*ap_types)[arg_idx] == NULL)
3081 {
3082 semsg(_(e_fmt_arg_nr_unused_str), arg_idx + 1, fmt);
3083 goto error;
3084 }
3085
3086# if defined(FEAT_EVAL)
3087 if (tvs != NULL && tvs[arg_idx].v_type == VAR_UNKNOWN)
3088 {
3089 semsg(_(e_positional_nr_out_of_bounds_str), arg_idx + 1, fmt);
3090 goto error;
3091 }
3092# endif
3093 }
3094
3095 return OK;
3096
3097error:
K.Takata4c215ec2023-08-26 18:05:08 +02003098 vim_free((char**)*ap_types);
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003099 *ap_types = NULL;
3100 *num_posarg = 0;
3101 return FAIL;
3102}
3103
zeertzjq7772c932023-08-15 22:48:40 +02003104 static void
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003105skip_to_arg(
3106 const char **ap_types,
3107 va_list ap_start,
3108 va_list *ap,
3109 int *arg_idx,
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003110 int *arg_cur,
3111 const char *fmt)
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003112{
3113 int arg_min = 0;
3114
3115 if (*arg_cur + 1 == *arg_idx)
3116 {
3117 ++*arg_cur;
3118 ++*arg_idx;
3119 return;
3120 }
3121
3122 if (*arg_cur >= *arg_idx)
3123 {
3124 // Reset ap to ap_start and skip arg_idx - 1 types
3125 va_end(*ap);
3126 va_copy(*ap, ap_start);
3127 }
3128 else
3129 {
3130 // Skip over any we should skip
3131 arg_min = *arg_cur;
3132 }
3133
3134 for (*arg_cur = arg_min; *arg_cur < *arg_idx - 1; ++*arg_cur)
3135 {
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003136 const char *p;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003137
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003138 if (ap_types == NULL || ap_types[*arg_cur] == NULL)
3139 {
Christ van Willegen1bd2cb12023-09-08 19:17:09 +02003140 siemsg(e_aptypes_is_null_nr_str, *arg_cur, fmt);
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003141 return;
3142 }
3143
3144 p = ap_types[*arg_cur];
3145
3146 int fmt_type = format_typeof(p);
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003147
3148 // get parameter value, do initial processing
3149 switch (fmt_type)
3150 {
3151 case TYPE_PERCENT:
3152 case TYPE_UNKNOWN:
3153 break;
3154
3155 case TYPE_CHAR:
3156 va_arg(*ap, int);
3157 break;
3158
3159 case TYPE_STRING:
3160 va_arg(*ap, char *);
3161 break;
3162
3163 case TYPE_POINTER:
3164 va_arg(*ap, void *);
3165 break;
3166
3167 case TYPE_INT:
3168 va_arg(*ap, int);
3169 break;
3170
3171 case TYPE_LONGINT:
3172 va_arg(*ap, long int);
3173 break;
3174
3175 case TYPE_LONGLONGINT:
3176 va_arg(*ap, varnumber_T);
3177 break;
3178
3179 case TYPE_UNSIGNEDINT:
3180 va_arg(*ap, unsigned int);
3181 break;
3182
3183 case TYPE_UNSIGNEDLONGINT:
3184 va_arg(*ap, unsigned long int);
3185 break;
3186
3187 case TYPE_UNSIGNEDLONGLONGINT:
3188 va_arg(*ap, uvarnumber_T);
3189 break;
3190
3191 case TYPE_FLOAT:
3192 va_arg(*ap, double);
3193 break;
3194 }
3195 }
3196
3197 // Because we know that after we return from this call,
3198 // a va_arg() call is made, we can pre-emptively
3199 // increment the current argument index.
3200 ++*arg_cur;
3201 ++*arg_idx;
3202
3203 return;
3204}
3205
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003206 int
3207vim_vsnprintf_typval(
3208 char *str,
3209 size_t str_m,
3210 const char *fmt,
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003211 va_list ap_start,
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003212 typval_T *tvs)
3213{
3214 size_t str_l = 0;
3215 const char *p = fmt;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003216 int arg_cur = 0;
3217 int num_posarg = 0;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003218 int arg_idx = 1;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003219 va_list ap;
3220 const char **ap_types = NULL;
3221
3222 if (parse_fmt_types(&ap_types, &num_posarg, fmt, tvs) == FAIL)
3223 return 0;
3224
3225 va_copy(ap, ap_start);
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003226
3227 if (p == NULL)
3228 p = "";
3229 while (*p != NUL)
3230 {
3231 if (*p != '%')
3232 {
3233 char *q = strchr(p + 1, '%');
3234 size_t n = (q == NULL) ? STRLEN(p) : (size_t)(q - p);
3235
3236 // Copy up to the next '%' or NUL without any changes.
3237 if (str_l < str_m)
3238 {
3239 size_t avail = str_m - str_l;
3240
3241 mch_memmove(str + str_l, p, n > avail ? avail : n);
3242 }
3243 p += n;
3244 str_l += n;
3245 }
3246 else
3247 {
3248 size_t min_field_width = 0, precision = 0;
3249 int zero_padding = 0, precision_specified = 0, justify_left = 0;
3250 int alternate_form = 0, force_sign = 0;
3251
3252 // If both the ' ' and '+' flags appear, the ' ' flag should be
3253 // ignored.
3254 int space_for_positive = 1;
3255
3256 // allowed values: \0, h, l, L
3257 char length_modifier = '\0';
3258
3259 // temporary buffer for simple numeric->string conversion
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003260# define TMP_LEN 350 // On my system 1e308 is the biggest number possible.
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003261 // That sounds reasonable to use as the maximum
3262 // printable.
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003263 char tmp[TMP_LEN];
3264
3265 // string address in case of string argument
3266 const char *str_arg = NULL;
3267
3268 // natural field width of arg without padding and sign
3269 size_t str_arg_l;
3270
3271 // unsigned char argument value - only defined for c conversion.
3272 // N.B. standard explicitly states the char argument for the c
3273 // conversion is unsigned
3274 unsigned char uchar_arg;
3275
3276 // number of zeros to be inserted for numeric conversions as
3277 // required by the precision or minimal field width
3278 size_t number_of_zeros_to_pad = 0;
3279
3280 // index into tmp where zero padding is to be inserted
3281 size_t zero_padding_insertion_ind = 0;
3282
3283 // current conversion specifier character
3284 char fmt_spec = '\0';
3285
3286 // buffer for 's' and 'S' specs
3287 char_u *tofree = NULL;
3288
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003289 // variables for positional arg
3290 int pos_arg = -1;
3291 const char *ptype;
3292
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003293
3294 p++; // skip '%'
3295
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003296 // First check to see if we find a positional
3297 // argument specifier
3298 ptype = p;
3299
3300 while (VIM_ISDIGIT(*ptype))
3301 ++ptype;
3302
3303 if (*ptype == '$')
3304 {
3305 // Positional argument
Christ van Willegenc35fc032024-03-14 18:30:41 +01003306 const char *digstart = p;
3307 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003308
zeertzjq0dff3152024-07-29 20:28:14 +02003309 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003310 goto error;
3311
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003312 pos_arg = uj;
3313
3314 ++p;
3315 }
3316
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003317 // parse flags
3318 while (*p == '0' || *p == '-' || *p == '+' || *p == ' '
3319 || *p == '#' || *p == '\'')
3320 {
3321 switch (*p)
3322 {
3323 case '0': zero_padding = 1; break;
3324 case '-': justify_left = 1; break;
3325 case '+': force_sign = 1; space_for_positive = 0; break;
3326 case ' ': force_sign = 1;
3327 // If both the ' ' and '+' flags appear, the ' '
3328 // flag should be ignored
3329 break;
3330 case '#': alternate_form = 1; break;
3331 case '\'': break;
3332 }
3333 p++;
3334 }
3335 // If the '0' and '-' flags both appear, the '0' flag should be
3336 // ignored.
3337
3338 // parse field width
3339 if (*p == '*')
3340 {
3341 int j;
Christ van Willegenc35fc032024-03-14 18:30:41 +01003342 const char *digstart = p + 1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003343
3344 p++;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003345
3346 if (VIM_ISDIGIT((int)(*p)))
3347 {
3348 // Positional argument field width
Christ van Willegenc35fc032024-03-14 18:30:41 +01003349 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003350
zeertzjq0dff3152024-07-29 20:28:14 +02003351 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003352 goto error;
3353
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003354 arg_idx = uj;
3355
3356 ++p;
3357 }
3358
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003359 j =
3360# if defined(FEAT_EVAL)
3361 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3362# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003363 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3364 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003365 va_arg(ap, int));
3366
Christ van Willegenc35fc032024-03-14 18:30:41 +01003367 if (j > MAX_ALLOWED_STRING_WIDTH)
3368 {
zeertzjq0dff3152024-07-29 20:28:14 +02003369 if (tvs != NULL)
3370 {
3371 format_overflow_error(digstart);
3372 goto error;
3373 }
3374 else
3375 j = MAX_ALLOWED_STRING_WIDTH;
Christ van Willegenc35fc032024-03-14 18:30:41 +01003376 }
3377
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003378 if (j >= 0)
3379 min_field_width = j;
3380 else
3381 {
3382 min_field_width = -j;
3383 justify_left = 1;
3384 }
3385 }
3386 else if (VIM_ISDIGIT((int)(*p)))
3387 {
3388 // size_t could be wider than unsigned int; make sure we treat
3389 // argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01003390 const char *digstart = p;
3391 unsigned int uj;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003392
zeertzjq0dff3152024-07-29 20:28:14 +02003393 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003394 goto error;
3395
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003396 min_field_width = uj;
3397 }
3398
3399 // parse precision
3400 if (*p == '.')
3401 {
3402 p++;
3403 precision_specified = 1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003404
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003405 if (VIM_ISDIGIT((int)(*p)))
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003406 {
3407 // size_t could be wider than unsigned int; make sure we
3408 // treat argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01003409 const char *digstart = p;
3410 unsigned int uj;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003411
zeertzjq0dff3152024-07-29 20:28:14 +02003412 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003413 goto error;
3414
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003415 precision = uj;
3416 }
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003417 else if (*p == '*')
3418 {
3419 int j;
Christ van Willegenc35fc032024-03-14 18:30:41 +01003420 const char *digstart = p;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003421
3422 p++;
3423
3424 if (VIM_ISDIGIT((int)(*p)))
3425 {
3426 // positional argument
Christ van Willegenc35fc032024-03-14 18:30:41 +01003427 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003428
zeertzjq0dff3152024-07-29 20:28:14 +02003429 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003430 goto error;
3431
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003432 arg_idx = uj;
3433
3434 ++p;
3435 }
3436
3437 j =
3438# if defined(FEAT_EVAL)
3439 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3440# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003441 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3442 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003443 va_arg(ap, int));
3444
Christ van Willegenc35fc032024-03-14 18:30:41 +01003445 if (j > MAX_ALLOWED_STRING_WIDTH)
3446 {
zeertzjq0dff3152024-07-29 20:28:14 +02003447 if (tvs != NULL)
3448 {
3449 format_overflow_error(digstart);
3450 goto error;
3451 }
3452 else
3453 j = MAX_ALLOWED_STRING_WIDTH;
Christ van Willegenc35fc032024-03-14 18:30:41 +01003454 }
3455
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003456 if (j >= 0)
3457 precision = j;
3458 else
3459 {
3460 precision_specified = 0;
3461 precision = 0;
3462 }
3463 }
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003464 }
3465
3466 // parse 'h', 'l' and 'll' length modifiers
3467 if (*p == 'h' || *p == 'l')
3468 {
3469 length_modifier = *p;
3470 p++;
3471 if (length_modifier == 'l' && *p == 'l')
3472 {
3473 // double l = __int64 / varnumber_T
3474 length_modifier = 'L';
3475 p++;
3476 }
3477 }
3478 fmt_spec = *p;
3479
3480 // common synonyms:
3481 switch (fmt_spec)
3482 {
3483 case 'i': fmt_spec = 'd'; break;
3484 case 'D': fmt_spec = 'd'; length_modifier = 'l'; break;
3485 case 'U': fmt_spec = 'u'; length_modifier = 'l'; break;
3486 case 'O': fmt_spec = 'o'; length_modifier = 'l'; break;
3487 default: break;
3488 }
3489
3490# if defined(FEAT_EVAL)
3491 switch (fmt_spec)
3492 {
3493 case 'd': case 'u': case 'o': case 'x': case 'X':
3494 if (tvs != NULL && length_modifier == '\0')
3495 length_modifier = 'L';
3496 }
3497# endif
3498
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003499 if (pos_arg != -1)
3500 arg_idx = pos_arg;
3501
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003502 // get parameter value, do initial processing
3503 switch (fmt_spec)
3504 {
3505 // '%' and 'c' behave similar to 's' regarding flags and field
3506 // widths
3507 case '%':
3508 case 'c':
3509 case 's':
3510 case 'S':
3511 str_arg_l = 1;
3512 switch (fmt_spec)
3513 {
3514 case '%':
3515 str_arg = p;
3516 break;
3517
3518 case 'c':
3519 {
3520 int j;
3521
3522 j =
3523# if defined(FEAT_EVAL)
3524 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3525# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003526 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3527 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003528 va_arg(ap, int));
3529
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003530 // standard demands unsigned char
3531 uchar_arg = (unsigned char)j;
3532 str_arg = (char *)&uchar_arg;
3533 break;
3534 }
3535
3536 case 's':
3537 case 'S':
3538 str_arg =
3539# if defined(FEAT_EVAL)
3540 tvs != NULL ? tv_str(tvs, &arg_idx, &tofree) :
3541# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003542 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3543 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003544 va_arg(ap, char *));
3545
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003546 if (str_arg == NULL)
3547 {
3548 str_arg = "[NULL]";
3549 str_arg_l = 6;
3550 }
3551 // make sure not to address string beyond the specified
3552 // precision !!!
3553 else if (!precision_specified)
3554 str_arg_l = strlen(str_arg);
3555 // truncate string if necessary as requested by precision
3556 else if (precision == 0)
3557 str_arg_l = 0;
3558 else
3559 {
3560 // Don't put the #if inside memchr(), it can be a
3561 // macro.
3562 // memchr on HP does not like n > 2^31 !!!
3563 char *q = memchr(str_arg, '\0',
3564 precision <= (size_t)0x7fffffffL ? precision
3565 : (size_t)0x7fffffffL);
presukud85fccd2021-11-20 19:38:31 +00003566
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003567 str_arg_l = (q == NULL) ? precision
3568 : (size_t)(q - str_arg);
3569 }
3570 if (fmt_spec == 'S')
3571 {
presuku1f2453f2021-11-24 15:32:57 +00003572 char_u *p1;
3573 size_t i;
3574 int cell;
presukud85fccd2021-11-20 19:38:31 +00003575
presuku1f2453f2021-11-24 15:32:57 +00003576 for (i = 0, p1 = (char_u *)str_arg; *p1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003577 p1 += mb_ptr2len(p1))
presuku1f2453f2021-11-24 15:32:57 +00003578 {
3579 cell = mb_ptr2cells(p1);
3580 if (precision_specified && i + cell > precision)
3581 break;
3582 i += cell;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003583 }
presuku1f2453f2021-11-24 15:32:57 +00003584
3585 str_arg_l = p1 - (char_u *)str_arg;
presukud85fccd2021-11-20 19:38:31 +00003586 if (min_field_width != 0)
presuku1f2453f2021-11-24 15:32:57 +00003587 min_field_width += str_arg_l - i;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003588 }
3589 break;
3590
3591 default:
3592 break;
3593 }
3594 break;
3595
3596 case 'd': case 'u':
3597 case 'b': case 'B':
3598 case 'o':
3599 case 'x': case 'X':
3600 case 'p':
3601 {
3602 // NOTE: the u, b, o, x, X and p conversion specifiers
3603 // imply the value is unsigned; d implies a signed
3604 // value
3605
3606 // 0 if numeric argument is zero (or if pointer is
3607 // NULL for 'p'), +1 if greater than zero (or nonzero
3608 // for unsigned arguments), -1 if negative (unsigned
3609 // argument is never negative)
3610 int arg_sign = 0;
3611
3612 // only set for length modifier h, or for no length
3613 // modifiers
3614 int int_arg = 0;
3615 unsigned int uint_arg = 0;
3616
3617 // only set for length modifier l
3618 long int long_arg = 0;
3619 unsigned long int ulong_arg = 0;
3620
3621 // only set for length modifier ll
3622 varnumber_T llong_arg = 0;
3623 uvarnumber_T ullong_arg = 0;
3624
3625 // only set for b conversion
3626 uvarnumber_T bin_arg = 0;
3627
3628 // pointer argument value -only defined for p
3629 // conversion
3630 void *ptr_arg = NULL;
3631
3632 if (fmt_spec == 'p')
3633 {
3634 length_modifier = '\0';
3635 ptr_arg =
3636# if defined(FEAT_EVAL)
3637 tvs != NULL ? (void *)tv_str(tvs, &arg_idx,
3638 NULL) :
3639# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003640 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3641 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003642 va_arg(ap, void *));
3643
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003644 if (ptr_arg != NULL)
3645 arg_sign = 1;
3646 }
3647 else if (fmt_spec == 'b' || fmt_spec == 'B')
3648 {
3649 bin_arg =
3650# if defined(FEAT_EVAL)
3651 tvs != NULL ?
3652 (uvarnumber_T)tv_nr(tvs, &arg_idx) :
3653# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003654 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3655 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003656 va_arg(ap, uvarnumber_T));
3657
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003658 if (bin_arg != 0)
3659 arg_sign = 1;
3660 }
3661 else if (fmt_spec == 'd')
3662 {
3663 // signed
3664 switch (length_modifier)
3665 {
3666 case '\0':
3667 case 'h':
3668 // char and short arguments are passed as int.
3669 int_arg =
3670# if defined(FEAT_EVAL)
3671 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3672# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003673 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3674 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003675 va_arg(ap, int));
3676
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003677 if (int_arg > 0)
3678 arg_sign = 1;
3679 else if (int_arg < 0)
3680 arg_sign = -1;
3681 break;
3682 case 'l':
3683 long_arg =
3684# if defined(FEAT_EVAL)
3685 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3686# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003687 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3688 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003689 va_arg(ap, long int));
3690
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003691 if (long_arg > 0)
3692 arg_sign = 1;
3693 else if (long_arg < 0)
3694 arg_sign = -1;
3695 break;
3696 case 'L':
3697 llong_arg =
3698# if defined(FEAT_EVAL)
3699 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3700# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003701 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3702 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003703 va_arg(ap, varnumber_T));
3704
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003705 if (llong_arg > 0)
3706 arg_sign = 1;
3707 else if (llong_arg < 0)
3708 arg_sign = -1;
3709 break;
3710 }
3711 }
3712 else
3713 {
3714 // unsigned
3715 switch (length_modifier)
3716 {
3717 case '\0':
3718 case 'h':
3719 uint_arg =
3720# if defined(FEAT_EVAL)
3721 tvs != NULL ? (unsigned)
3722 tv_nr(tvs, &arg_idx) :
3723# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003724 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3725 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003726 va_arg(ap, unsigned int));
3727
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003728 if (uint_arg != 0)
3729 arg_sign = 1;
3730 break;
3731 case 'l':
3732 ulong_arg =
3733# if defined(FEAT_EVAL)
3734 tvs != NULL ? (unsigned long)
3735 tv_nr(tvs, &arg_idx) :
3736# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003737 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3738 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003739 va_arg(ap, unsigned long int));
3740
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003741 if (ulong_arg != 0)
3742 arg_sign = 1;
3743 break;
3744 case 'L':
3745 ullong_arg =
3746# if defined(FEAT_EVAL)
3747 tvs != NULL ? (uvarnumber_T)
3748 tv_nr(tvs, &arg_idx) :
3749# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003750 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3751 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003752 va_arg(ap, uvarnumber_T));
3753
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003754 if (ullong_arg != 0)
3755 arg_sign = 1;
3756 break;
3757 }
3758 }
3759
3760 str_arg = tmp;
3761 str_arg_l = 0;
3762
3763 // NOTE:
3764 // For d, i, u, o, x, and X conversions, if precision is
3765 // specified, the '0' flag should be ignored. This is so
3766 // with Solaris 2.6, Digital UNIX 4.0, HPUX 10, Linux,
3767 // FreeBSD, NetBSD; but not with Perl.
3768 if (precision_specified)
3769 zero_padding = 0;
3770 if (fmt_spec == 'd')
3771 {
3772 if (force_sign && arg_sign >= 0)
3773 tmp[str_arg_l++] = space_for_positive ? ' ' : '+';
3774 // leave negative numbers for sprintf to handle, to
3775 // avoid handling tricky cases like (short int)-32768
3776 }
3777 else if (alternate_form)
3778 {
3779 if (arg_sign != 0
3780 && (fmt_spec == 'b' || fmt_spec == 'B'
3781 || fmt_spec == 'x' || fmt_spec == 'X') )
3782 {
3783 tmp[str_arg_l++] = '0';
3784 tmp[str_arg_l++] = fmt_spec;
3785 }
3786 // alternate form should have no effect for p
3787 // conversion, but ...
3788 }
3789
3790 zero_padding_insertion_ind = str_arg_l;
3791 if (!precision_specified)
3792 precision = 1; // default precision is 1
3793 if (precision == 0 && arg_sign == 0)
3794 {
3795 // When zero value is formatted with an explicit
3796 // precision 0, the resulting formatted string is
3797 // empty (d, i, u, b, B, o, x, X, p).
3798 }
3799 else
3800 {
3801 char f[6];
3802 int f_l = 0;
3803
3804 // construct a simple format string for sprintf
3805 f[f_l++] = '%';
3806 if (!length_modifier)
3807 ;
3808 else if (length_modifier == 'L')
3809 {
3810# ifdef MSWIN
3811 f[f_l++] = 'I';
3812 f[f_l++] = '6';
3813 f[f_l++] = '4';
3814# else
3815 f[f_l++] = 'l';
3816 f[f_l++] = 'l';
3817# endif
3818 }
3819 else
3820 f[f_l++] = length_modifier;
3821 f[f_l++] = fmt_spec;
3822 f[f_l++] = '\0';
3823
3824 if (fmt_spec == 'p')
3825 str_arg_l += sprintf(tmp + str_arg_l, f, ptr_arg);
3826 else if (fmt_spec == 'b' || fmt_spec == 'B')
3827 {
3828 char b[8 * sizeof(uvarnumber_T)];
3829 size_t b_l = 0;
3830 uvarnumber_T bn = bin_arg;
3831
3832 do
3833 {
3834 b[sizeof(b) - ++b_l] = '0' + (bn & 0x1);
3835 bn >>= 1;
3836 }
3837 while (bn != 0);
3838
3839 memcpy(tmp + str_arg_l, b + sizeof(b) - b_l, b_l);
3840 str_arg_l += b_l;
3841 }
3842 else if (fmt_spec == 'd')
3843 {
3844 // signed
3845 switch (length_modifier)
3846 {
3847 case '\0': str_arg_l += sprintf(
3848 tmp + str_arg_l, f,
3849 int_arg);
3850 break;
3851 case 'h': str_arg_l += sprintf(
3852 tmp + str_arg_l, f,
3853 (short)int_arg);
3854 break;
3855 case 'l': str_arg_l += sprintf(
3856 tmp + str_arg_l, f, long_arg);
3857 break;
3858 case 'L': str_arg_l += sprintf(
3859 tmp + str_arg_l, f, llong_arg);
3860 break;
3861 }
3862 }
3863 else
3864 {
3865 // unsigned
3866 switch (length_modifier)
3867 {
3868 case '\0': str_arg_l += sprintf(
3869 tmp + str_arg_l, f,
3870 uint_arg);
3871 break;
3872 case 'h': str_arg_l += sprintf(
3873 tmp + str_arg_l, f,
3874 (unsigned short)uint_arg);
3875 break;
3876 case 'l': str_arg_l += sprintf(
3877 tmp + str_arg_l, f, ulong_arg);
3878 break;
3879 case 'L': str_arg_l += sprintf(
3880 tmp + str_arg_l, f, ullong_arg);
3881 break;
3882 }
3883 }
3884
3885 // include the optional minus sign and possible
3886 // "0x" in the region before the zero padding
3887 // insertion point
3888 if (zero_padding_insertion_ind < str_arg_l
3889 && tmp[zero_padding_insertion_ind] == '-')
3890 zero_padding_insertion_ind++;
3891 if (zero_padding_insertion_ind + 1 < str_arg_l
3892 && tmp[zero_padding_insertion_ind] == '0'
3893 && (tmp[zero_padding_insertion_ind + 1] == 'x'
3894 || tmp[zero_padding_insertion_ind + 1] == 'X'))
3895 zero_padding_insertion_ind += 2;
3896 }
3897
3898 {
3899 size_t num_of_digits = str_arg_l
3900 - zero_padding_insertion_ind;
3901
3902 if (alternate_form && fmt_spec == 'o'
3903 // unless zero is already the first
3904 // character
3905 && !(zero_padding_insertion_ind < str_arg_l
3906 && tmp[zero_padding_insertion_ind] == '0'))
3907 {
3908 // assure leading zero for alternate-form
3909 // octal numbers
3910 if (!precision_specified
3911 || precision < num_of_digits + 1)
3912 {
3913 // precision is increased to force the
3914 // first character to be zero, except if a
3915 // zero value is formatted with an
3916 // explicit precision of zero
3917 precision = num_of_digits + 1;
3918 }
3919 }
3920 // zero padding to specified precision?
3921 if (num_of_digits < precision)
3922 number_of_zeros_to_pad = precision - num_of_digits;
3923 }
3924 // zero padding to specified minimal field width?
3925 if (!justify_left && zero_padding)
3926 {
3927 int n = (int)(min_field_width - (str_arg_l
3928 + number_of_zeros_to_pad));
3929 if (n > 0)
3930 number_of_zeros_to_pad += n;
3931 }
3932 break;
3933 }
3934
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003935 case 'f':
3936 case 'F':
3937 case 'e':
3938 case 'E':
3939 case 'g':
3940 case 'G':
3941 {
3942 // Floating point.
3943 double f;
3944 double abs_f;
3945 char format[40];
3946 int l;
3947 int remove_trailing_zeroes = FALSE;
3948
3949 f =
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003950# if defined(FEAT_EVAL)
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003951 tvs != NULL ? tv_float(tvs, &arg_idx) :
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003952# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003953 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3954 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003955 va_arg(ap, double));
3956
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003957 abs_f = f < 0 ? -f : f;
3958
3959 if (fmt_spec == 'g' || fmt_spec == 'G')
3960 {
3961 // Would be nice to use %g directly, but it prints
3962 // "1.0" as "1", we don't want that.
3963 if ((abs_f >= 0.001 && abs_f < 10000000.0)
3964 || abs_f == 0.0)
3965 fmt_spec = ASCII_ISUPPER(fmt_spec) ? 'F' : 'f';
3966 else
3967 fmt_spec = fmt_spec == 'g' ? 'e' : 'E';
3968 remove_trailing_zeroes = TRUE;
3969 }
3970
3971 if ((fmt_spec == 'f' || fmt_spec == 'F') &&
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003972# ifdef VAX
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003973 abs_f > 1.0e38
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003974# else
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003975 abs_f > 1.0e307
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003976# endif
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003977 )
3978 {
3979 // Avoid a buffer overflow
3980 STRCPY(tmp, infinity_str(f > 0.0, fmt_spec,
3981 force_sign, space_for_positive));
3982 str_arg_l = STRLEN(tmp);
3983 zero_padding = 0;
3984 }
3985 else
3986 {
3987 if (isnan(f))
3988 {
3989 // Not a number: nan or NAN
3990 STRCPY(tmp, ASCII_ISUPPER(fmt_spec) ? "NAN"
3991 : "nan");
3992 str_arg_l = 3;
3993 zero_padding = 0;
3994 }
3995 else if (isinf(f))
3996 {
3997 STRCPY(tmp, infinity_str(f > 0.0, fmt_spec,
3998 force_sign, space_for_positive));
3999 str_arg_l = STRLEN(tmp);
4000 zero_padding = 0;
4001 }
4002 else
4003 {
4004 // Regular float number
4005 format[0] = '%';
4006 l = 1;
4007 if (force_sign)
4008 format[l++] = space_for_positive ? ' ' : '+';
4009 if (precision_specified)
4010 {
4011 size_t max_prec = TMP_LEN - 10;
4012
4013 // Make sure we don't get more digits than we
4014 // have room for.
4015 if ((fmt_spec == 'f' || fmt_spec == 'F')
4016 && abs_f > 1.0)
4017 max_prec -= (size_t)log10(abs_f);
4018 if (precision > max_prec)
4019 precision = max_prec;
4020 l += sprintf(format + l, ".%d", (int)precision);
4021 }
4022 format[l] = fmt_spec == 'F' ? 'f' : fmt_spec;
4023 format[l + 1] = NUL;
4024
4025 str_arg_l = sprintf(tmp, format, f);
4026 }
4027
4028 if (remove_trailing_zeroes)
4029 {
4030 int i;
4031 char *tp;
4032
4033 // Using %g or %G: remove superfluous zeroes.
4034 if (fmt_spec == 'f' || fmt_spec == 'F')
4035 tp = tmp + str_arg_l - 1;
4036 else
4037 {
4038 tp = (char *)vim_strchr((char_u *)tmp,
4039 fmt_spec == 'e' ? 'e' : 'E');
4040 if (tp != NULL)
4041 {
4042 // Remove superfluous '+' and leading
4043 // zeroes from the exponent.
4044 if (tp[1] == '+')
4045 {
4046 // Change "1.0e+07" to "1.0e07"
4047 STRMOVE(tp + 1, tp + 2);
4048 --str_arg_l;
4049 }
4050 i = (tp[1] == '-') ? 2 : 1;
4051 while (tp[i] == '0')
4052 {
4053 // Change "1.0e07" to "1.0e7"
4054 STRMOVE(tp + i, tp + i + 1);
4055 --str_arg_l;
4056 }
4057 --tp;
4058 }
4059 }
4060
4061 if (tp != NULL && !precision_specified)
4062 // Remove trailing zeroes, but keep the one
4063 // just after a dot.
4064 while (tp > tmp + 2 && *tp == '0'
4065 && tp[-1] != '.')
4066 {
4067 STRMOVE(tp, tp + 1);
4068 --tp;
4069 --str_arg_l;
4070 }
4071 }
4072 else
4073 {
4074 char *tp;
4075
4076 // Be consistent: some printf("%e") use 1.0e+12
4077 // and some 1.0e+012. Remove one zero in the last
4078 // case.
4079 tp = (char *)vim_strchr((char_u *)tmp,
4080 fmt_spec == 'e' ? 'e' : 'E');
4081 if (tp != NULL && (tp[1] == '+' || tp[1] == '-')
4082 && tp[2] == '0'
4083 && vim_isdigit(tp[3])
4084 && vim_isdigit(tp[4]))
4085 {
4086 STRMOVE(tp + 2, tp + 3);
4087 --str_arg_l;
4088 }
4089 }
4090 }
4091 if (zero_padding && min_field_width > str_arg_l
4092 && (tmp[0] == '-' || force_sign))
4093 {
4094 // padding 0's should be inserted after the sign
4095 number_of_zeros_to_pad = min_field_width - str_arg_l;
4096 zero_padding_insertion_ind = 1;
4097 }
4098 str_arg = tmp;
4099 break;
4100 }
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02004101
4102 default:
4103 // unrecognized conversion specifier, keep format string
4104 // as-is
4105 zero_padding = 0; // turn zero padding off for non-numeric
4106 // conversion
4107 justify_left = 1;
4108 min_field_width = 0; // reset flags
4109
4110 // discard the unrecognized conversion, just keep *
4111 // the unrecognized conversion character
4112 str_arg = p;
4113 str_arg_l = 0;
4114 if (*p != NUL)
4115 str_arg_l++; // include invalid conversion specifier
4116 // unchanged if not at end-of-string
4117 break;
4118 }
4119
4120 if (*p != NUL)
4121 p++; // step over the just processed conversion specifier
4122
4123 // insert padding to the left as requested by min_field_width;
4124 // this does not include the zero padding in case of numerical
4125 // conversions
4126 if (!justify_left)
4127 {
4128 // left padding with blank or zero
4129 int pn = (int)(min_field_width - (str_arg_l + number_of_zeros_to_pad));
4130
4131 if (pn > 0)
4132 {
4133 if (str_l < str_m)
4134 {
4135 size_t avail = str_m - str_l;
4136
4137 vim_memset(str + str_l, zero_padding ? '0' : ' ',
4138 (size_t)pn > avail ? avail
4139 : (size_t)pn);
4140 }
4141 str_l += pn;
4142 }
4143 }
4144
4145 // zero padding as requested by the precision or by the minimal
4146 // field width for numeric conversions required?
4147 if (number_of_zeros_to_pad == 0)
4148 {
4149 // will not copy first part of numeric right now, *
4150 // force it to be copied later in its entirety
4151 zero_padding_insertion_ind = 0;
4152 }
4153 else
4154 {
4155 // insert first part of numerics (sign or '0x') before zero
4156 // padding
4157 int zn = (int)zero_padding_insertion_ind;
4158
4159 if (zn > 0)
4160 {
4161 if (str_l < str_m)
4162 {
4163 size_t avail = str_m - str_l;
4164
4165 mch_memmove(str + str_l, str_arg,
4166 (size_t)zn > avail ? avail
4167 : (size_t)zn);
4168 }
4169 str_l += zn;
4170 }
4171
4172 // insert zero padding as requested by the precision or min
4173 // field width
4174 zn = (int)number_of_zeros_to_pad;
4175 if (zn > 0)
4176 {
4177 if (str_l < str_m)
4178 {
4179 size_t avail = str_m - str_l;
4180
4181 vim_memset(str + str_l, '0',
4182 (size_t)zn > avail ? avail
4183 : (size_t)zn);
4184 }
4185 str_l += zn;
4186 }
4187 }
4188
4189 // insert formatted string
4190 // (or as-is conversion specifier for unknown conversions)
4191 {
4192 int sn = (int)(str_arg_l - zero_padding_insertion_ind);
4193
4194 if (sn > 0)
4195 {
4196 if (str_l < str_m)
4197 {
4198 size_t avail = str_m - str_l;
4199
4200 mch_memmove(str + str_l,
4201 str_arg + zero_padding_insertion_ind,
4202 (size_t)sn > avail ? avail : (size_t)sn);
4203 }
4204 str_l += sn;
4205 }
4206 }
4207
4208 // insert right padding
4209 if (justify_left)
4210 {
4211 // right blank padding to the field width
4212 int pn = (int)(min_field_width
4213 - (str_arg_l + number_of_zeros_to_pad));
4214
4215 if (pn > 0)
4216 {
4217 if (str_l < str_m)
4218 {
4219 size_t avail = str_m - str_l;
4220
4221 vim_memset(str + str_l, ' ',
4222 (size_t)pn > avail ? avail
4223 : (size_t)pn);
4224 }
4225 str_l += pn;
4226 }
4227 }
4228 vim_free(tofree);
4229 }
4230 }
4231
4232 if (str_m > 0)
4233 {
4234 // make sure the string is nul-terminated even at the expense of
4235 // overwriting the last character (shouldn't happen, but just in case)
4236 //
4237 str[str_l <= str_m - 1 ? str_l : str_m - 1] = '\0';
4238 }
4239
Christ van Willegen0c6181f2023-08-13 18:03:14 +02004240 if (tvs != NULL && tvs[num_posarg != 0 ? num_posarg : arg_idx - 1].v_type != VAR_UNKNOWN)
Bram Moolenaar677658a2022-01-05 16:09:06 +00004241 emsg(_(e_too_many_arguments_to_printf));
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02004242
Christ van Willegenc35fc032024-03-14 18:30:41 +01004243error:
K.Takata4c215ec2023-08-26 18:05:08 +02004244 vim_free((char*)ap_types);
Christ van Willegen0c6181f2023-08-13 18:03:14 +02004245 va_end(ap);
4246
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02004247 // Return the number of characters formatted (excluding trailing nul
4248 // character), that is, the number of characters that would have been
4249 // written to the buffer if it were large enough.
4250 return (int)str_l;
4251}
4252
4253#endif // PROTO