blob: 120d393552efea08beb89e6732849a752f4b4f9e [file] [log] [blame]
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001/* vi:set ts=8 sts=4 sw=4 noet:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * strings.c: string manipulation functions
12 */
13
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +020014#define USING_FLOAT_STUFF
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020015#include "vim.h"
16
17/*
18 * Copy "string" into newly allocated memory.
19 */
20 char_u *
21vim_strsave(char_u *string)
22{
23 char_u *p;
24 size_t len;
25
26 len = STRLEN(string) + 1;
27 p = alloc(len);
28 if (p != NULL)
29 mch_memmove(p, string, len);
30 return p;
31}
32
33/*
34 * Copy up to "len" bytes of "string" into newly allocated memory and
35 * terminate with a NUL.
36 * The allocated memory always has size "len + 1", also when "string" is
37 * shorter.
38 */
39 char_u *
40vim_strnsave(char_u *string, size_t len)
41{
42 char_u *p;
43
44 p = alloc(len + 1);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +000045 if (p == NULL)
46 return NULL;
47
48 STRNCPY(p, string, len);
49 p[len] = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020050 return p;
51}
52
53/*
54 * Same as vim_strsave(), but any characters found in esc_chars are preceded
55 * by a backslash.
56 */
57 char_u *
58vim_strsave_escaped(char_u *string, char_u *esc_chars)
59{
60 return vim_strsave_escaped_ext(string, esc_chars, '\\', FALSE);
61}
62
63/*
64 * Same as vim_strsave_escaped(), but when "bsl" is TRUE also escape
65 * characters where rem_backslash() would remove the backslash.
66 * Escape the characters with "cc".
67 */
68 char_u *
69vim_strsave_escaped_ext(
70 char_u *string,
71 char_u *esc_chars,
72 int cc,
73 int bsl)
74{
75 char_u *p;
76 char_u *p2;
77 char_u *escaped_string;
78 unsigned length;
79 int l;
80
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +020081 // First count the number of backslashes required.
82 // Then allocate the memory and insert them.
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020083 length = 1; // count the trailing NUL
84 for (p = string; *p; p++)
85 {
86 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
87 {
88 length += l; // count a multibyte char
89 p += l - 1;
90 continue;
91 }
92 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
93 ++length; // count a backslash
94 ++length; // count an ordinary char
95 }
96 escaped_string = alloc(length);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +000097 if (escaped_string == NULL)
98 return NULL;
99 p2 = escaped_string;
100 for (p = string; *p; p++)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200101 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000102 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200103 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000104 mch_memmove(p2, p, (size_t)l);
105 p2 += l;
106 p += l - 1; // skip multibyte char
107 continue;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200108 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000109 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
110 *p2++ = cc;
111 *p2++ = *p;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200112 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000113 *p2 = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200114 return escaped_string;
115}
116
117/*
118 * Return TRUE when 'shell' has "csh" in the tail.
119 */
120 int
121csh_like_shell(void)
122{
123 return (strstr((char *)gettail(p_sh), "csh") != NULL);
124}
125
126/*
Jason Cox6e823512021-08-29 12:36:49 +0200127 * Return TRUE when 'shell' has "fish" in the tail.
128 */
Dominique Pellede05ae72021-08-30 19:57:34 +0200129 static int
Jason Cox6e823512021-08-29 12:36:49 +0200130fish_like_shell(void)
131{
132 return (strstr((char *)gettail(p_sh), "fish") != NULL);
133}
134
135/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200136 * Escape "string" for use as a shell argument with system().
137 * This uses single quotes, except when we know we need to use double quotes
138 * (MS-DOS and MS-Windows not using PowerShell and without 'shellslash' set).
139 * PowerShell also uses a novel escaping for enclosed single quotes - double
140 * them up.
141 * Escape a newline, depending on the 'shell' option.
142 * When "do_special" is TRUE also replace "!", "%", "#" and things starting
143 * with "<" like "<cfile>".
144 * When "do_newline" is FALSE do not escape newline unless it is csh shell.
145 * Returns the result in allocated memory, NULL if we have run out.
146 */
147 char_u *
148vim_strsave_shellescape(char_u *string, int do_special, int do_newline)
149{
150 unsigned length;
151 char_u *p;
152 char_u *d;
153 char_u *escaped_string;
Mike Williams51024bb2024-05-30 07:46:30 +0200154 size_t l;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200155 int csh_like;
Jason Cox6e823512021-08-29 12:36:49 +0200156 int fish_like;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200157 char_u *shname;
158 int powershell;
159# ifdef MSWIN
160 int double_quotes;
161# endif
162
163 // Only csh and similar shells expand '!' within single quotes. For sh and
164 // the like we must not put a backslash before it, it will be taken
165 // literally. If do_special is set the '!' will be escaped twice.
166 // Csh also needs to have "\n" escaped twice when do_special is set.
167 csh_like = csh_like_shell();
168
Jason Cox6e823512021-08-29 12:36:49 +0200169 // Fish shell uses '\' as an escape character within single quotes, so '\'
170 // itself must be escaped to get a literal '\'.
171 fish_like = fish_like_shell();
172
Dominique Pelleaf4a61a2021-12-27 17:21:41 +0000173 // PowerShell uses its own version for quoting single quotes
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200174 shname = gettail(p_sh);
175 powershell = strstr((char *)shname, "pwsh") != NULL;
176# ifdef MSWIN
177 powershell = powershell || strstr((char *)shname, "powershell") != NULL;
178 // PowerShell only accepts single quotes so override shellslash.
179 double_quotes = !powershell && !p_ssl;
180# endif
181
182 // First count the number of extra bytes required.
183 length = (unsigned)STRLEN(string) + 3; // two quotes and a trailing NUL
184 for (p = string; *p != NUL; MB_PTR_ADV(p))
185 {
186# ifdef MSWIN
187 if (double_quotes)
188 {
189 if (*p == '"')
190 ++length; // " -> ""
191 }
192 else
193# endif
194 if (*p == '\'')
195 {
196 if (powershell)
197 length +=2; // ' => ''
198 else
199 length += 3; // ' => '\''
200 }
201 if ((*p == '\n' && (csh_like || do_newline))
202 || (*p == '!' && (csh_like || do_special)))
203 {
204 ++length; // insert backslash
205 if (csh_like && do_special)
206 ++length; // insert backslash
207 }
208 if (do_special && find_cmdline_var(p, &l) >= 0)
209 {
210 ++length; // insert backslash
211 p += l - 1;
212 }
Jason Cox6e823512021-08-29 12:36:49 +0200213 if (*p == '\\' && fish_like)
214 ++length; // insert backslash
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200215 }
216
217 // Allocate memory for the result and fill it.
218 escaped_string = alloc(length);
219 if (escaped_string != NULL)
220 {
221 d = escaped_string;
222
223 // add opening quote
224# ifdef MSWIN
225 if (double_quotes)
226 *d++ = '"';
227 else
228# endif
229 *d++ = '\'';
230
231 for (p = string; *p != NUL; )
232 {
233# ifdef MSWIN
234 if (double_quotes)
235 {
236 if (*p == '"')
237 {
238 *d++ = '"';
239 *d++ = '"';
240 ++p;
241 continue;
242 }
243 }
244 else
245# endif
246 if (*p == '\'')
247 {
248 if (powershell)
249 {
250 *d++ = '\'';
251 *d++ = '\'';
252 }
253 else
254 {
255 *d++ = '\'';
256 *d++ = '\\';
257 *d++ = '\'';
258 *d++ = '\'';
259 }
260 ++p;
261 continue;
262 }
263 if ((*p == '\n' && (csh_like || do_newline))
264 || (*p == '!' && (csh_like || do_special)))
265 {
266 *d++ = '\\';
267 if (csh_like && do_special)
268 *d++ = '\\';
269 *d++ = *p++;
270 continue;
271 }
zeertzjq88c8c542024-05-30 19:27:25 +0200272 if (do_special && find_cmdline_var(p, &l) >= 0)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200273 {
274 *d++ = '\\'; // insert backslash
zeertzjq88c8c542024-05-30 19:27:25 +0200275 memcpy(d, p, l); // copy the var
276 d += l;
277 p += l;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200278 continue;
279 }
Jason Cox6e823512021-08-29 12:36:49 +0200280 if (*p == '\\' && fish_like)
281 {
282 *d++ = '\\';
283 *d++ = *p++;
Bram Moolenaar66315972021-09-01 14:31:51 +0200284 continue;
Jason Cox6e823512021-08-29 12:36:49 +0200285 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200286
287 MB_COPY_CHAR(p, d);
288 }
289
290 // add terminating quote and finish with a NUL
291# ifdef MSWIN
292 if (double_quotes)
293 *d++ = '"';
294 else
295# endif
296 *d++ = '\'';
297 *d = NUL;
298 }
299
300 return escaped_string;
301}
302
303/*
304 * Like vim_strsave(), but make all characters uppercase.
305 * This uses ASCII lower-to-upper case translation, language independent.
306 */
307 char_u *
308vim_strsave_up(char_u *string)
309{
310 char_u *p1;
311
312 p1 = vim_strsave(string);
313 vim_strup(p1);
314 return p1;
315}
316
317/*
318 * Like vim_strnsave(), but make all characters uppercase.
319 * This uses ASCII lower-to-upper case translation, language independent.
320 */
321 char_u *
322vim_strnsave_up(char_u *string, size_t len)
323{
324 char_u *p1;
325
326 p1 = vim_strnsave(string, len);
327 vim_strup(p1);
328 return p1;
329}
330
331/*
332 * ASCII lower-to-upper case translation, language independent.
333 */
334 void
335vim_strup(
336 char_u *p)
337{
338 char_u *p2;
339 int c;
340
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000341 if (p == NULL)
342 return;
343
344 p2 = p;
345 while ((c = *p2) != NUL)
346 *p2++ = (c < 'a' || c > 'z') ? c : (c - 0x20);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200347}
348
349#if defined(FEAT_EVAL) || defined(FEAT_SPELL) || defined(PROTO)
350/*
351 * Make string "s" all upper-case and return it in allocated memory.
352 * Handles multi-byte characters as well as possible.
353 * Returns NULL when out of memory.
354 */
355 static char_u *
356strup_save(char_u *orig)
357{
358 char_u *p;
359 char_u *res;
360
361 res = p = vim_strsave(orig);
362
363 if (res != NULL)
364 while (*p != NUL)
365 {
366 int l;
367
368 if (enc_utf8)
369 {
370 int c, uc;
371 int newl;
372 char_u *s;
373
374 c = utf_ptr2char(p);
375 l = utf_ptr2len(p);
376 if (c == 0)
377 {
378 // overlong sequence, use only the first byte
379 c = *p;
380 l = 1;
381 }
382 uc = utf_toupper(c);
383
384 // Reallocate string when byte count changes. This is rare,
385 // thus it's OK to do another malloc()/free().
386 newl = utf_char2len(uc);
387 if (newl != l)
388 {
389 s = alloc(STRLEN(res) + 1 + newl - l);
390 if (s == NULL)
391 {
392 vim_free(res);
393 return NULL;
394 }
395 mch_memmove(s, res, p - res);
396 STRCPY(s + (p - res) + newl, p + l);
397 p = s + (p - res);
398 vim_free(res);
399 res = s;
400 }
401
402 utf_char2bytes(uc, p);
403 p += newl;
404 }
405 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
406 p += l; // skip multi-byte character
407 else
408 {
409 *p = TOUPPER_LOC(*p); // note that toupper() can be a macro
410 p++;
411 }
412 }
413
414 return res;
415}
416
417/*
418 * Make string "s" all lower-case and return it in allocated memory.
419 * Handles multi-byte characters as well as possible.
420 * Returns NULL when out of memory.
421 */
422 char_u *
423strlow_save(char_u *orig)
424{
425 char_u *p;
426 char_u *res;
427
428 res = p = vim_strsave(orig);
429
430 if (res != NULL)
431 while (*p != NUL)
432 {
433 int l;
434
435 if (enc_utf8)
436 {
437 int c, lc;
438 int newl;
439 char_u *s;
440
441 c = utf_ptr2char(p);
442 l = utf_ptr2len(p);
443 if (c == 0)
444 {
445 // overlong sequence, use only the first byte
446 c = *p;
447 l = 1;
448 }
449 lc = utf_tolower(c);
450
451 // Reallocate string when byte count changes. This is rare,
452 // thus it's OK to do another malloc()/free().
453 newl = utf_char2len(lc);
454 if (newl != l)
455 {
456 s = alloc(STRLEN(res) + 1 + newl - l);
457 if (s == NULL)
458 {
459 vim_free(res);
460 return NULL;
461 }
462 mch_memmove(s, res, p - res);
463 STRCPY(s + (p - res) + newl, p + l);
464 p = s + (p - res);
465 vim_free(res);
466 res = s;
467 }
468
469 utf_char2bytes(lc, p);
470 p += newl;
471 }
472 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
473 p += l; // skip multi-byte character
474 else
475 {
476 *p = TOLOWER_LOC(*p); // note that tolower() can be a macro
477 p++;
478 }
479 }
480
481 return res;
482}
483#endif
484
485/*
486 * delete spaces at the end of a string
487 */
488 void
489del_trailing_spaces(char_u *ptr)
490{
491 char_u *q;
492
493 q = ptr + STRLEN(ptr);
494 while (--q > ptr && VIM_ISWHITE(q[0]) && q[-1] != '\\' && q[-1] != Ctrl_V)
495 *q = NUL;
496}
497
498/*
499 * Like strncpy(), but always terminate the result with one NUL.
500 * "to" must be "len + 1" long!
501 */
502 void
503vim_strncpy(char_u *to, char_u *from, size_t len)
504{
505 STRNCPY(to, from, len);
506 to[len] = NUL;
507}
508
509/*
510 * Like strcat(), but make sure the result fits in "tosize" bytes and is
511 * always NUL terminated. "from" and "to" may overlap.
512 */
513 void
514vim_strcat(char_u *to, char_u *from, size_t tosize)
515{
516 size_t tolen = STRLEN(to);
517 size_t fromlen = STRLEN(from);
518
519 if (tolen + fromlen + 1 > tosize)
520 {
521 mch_memmove(to + tolen, from, tosize - tolen - 1);
522 to[tosize - 1] = NUL;
523 }
524 else
525 mch_memmove(to + tolen, from, fromlen + 1);
526}
527
Bram Moolenaarc32949b2023-01-04 15:56:51 +0000528/*
529 * A version of strlen() that has a maximum length.
530 */
531 size_t
532vim_strlen_maxlen(char *s, size_t maxlen)
533{
534 size_t i;
535 for (i = 0; i < maxlen; ++i)
536 if (s[i] == NUL)
537 break;
538 return i;
539}
540
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200541#if (!defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)) || defined(PROTO)
542/*
543 * Compare two strings, ignoring case, using current locale.
544 * Doesn't work for multi-byte characters.
545 * return 0 for match, < 0 for smaller, > 0 for bigger
546 */
547 int
548vim_stricmp(char *s1, char *s2)
549{
550 int i;
551
552 for (;;)
553 {
554 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
555 if (i != 0)
556 return i; // this character different
557 if (*s1 == NUL)
558 break; // strings match until NUL
559 ++s1;
560 ++s2;
561 }
562 return 0; // strings match
563}
564#endif
565
566#if (!defined(HAVE_STRNCASECMP) && !defined(HAVE_STRNICMP)) || defined(PROTO)
567/*
568 * Compare two strings, for length "len", ignoring case, using current locale.
569 * Doesn't work for multi-byte characters.
570 * return 0 for match, < 0 for smaller, > 0 for bigger
571 */
572 int
573vim_strnicmp(char *s1, char *s2, size_t len)
574{
575 int i;
576
577 while (len > 0)
578 {
579 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
580 if (i != 0)
581 return i; // this character different
582 if (*s1 == NUL)
583 break; // strings match until NUL
584 ++s1;
585 ++s2;
586 --len;
587 }
588 return 0; // strings match
589}
590#endif
591
592/*
Christian Brabandt84e31752024-09-02 09:59:18 +0200593 * Compare two ASCII strings, for length "len", ignoring case, ignoring locale
594 * (mostly matters for turkish locale where i I might be different).
595 * return 0 for match, < 0 for smaller, > 0 for bigger
596 */
597 int
598vim_strnicmp_asc(char *s1, char *s2, size_t len)
599{
John Marriottc847c122024-11-24 14:09:40 +0100600 int i = 0;
Christian Brabandt84e31752024-09-02 09:59:18 +0200601 int save_cmp_flags = cmp_flags;
602
603 cmp_flags |= CMP_KEEPASCII; // compare by ASCII value, ignoring locale
604 while (len > 0)
605 {
606 i = vim_tolower(*s1) - vim_tolower(*s2);
607 if (i != 0)
608 break; // this character is different
609 if (*s1 == NUL)
610 break; // strings match until NUL
611 ++s1;
612 ++s2;
613 --len;
614 }
615 cmp_flags = save_cmp_flags;
616 return i;
617}
618
619/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200620 * Search for first occurrence of "c" in "string".
621 * Version of strchr() that handles unsigned char strings with characters from
622 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
623 * end of the string.
624 */
Bram Moolenaarc32949b2023-01-04 15:56:51 +0000625 char_u *
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200626vim_strchr(char_u *string, int c)
627{
628 char_u *p;
629 int b;
630
631 p = string;
632 if (enc_utf8 && c >= 0x80)
633 {
634 while (*p != NUL)
635 {
636 int l = utfc_ptr2len(p);
637
638 // Avoid matching an illegal byte here.
639 if (utf_ptr2char(p) == c && l > 1)
640 return p;
641 p += l;
642 }
643 return NULL;
644 }
645 if (enc_dbcs != 0 && c > 255)
646 {
647 int n2 = c & 0xff;
648
649 c = ((unsigned)c >> 8) & 0xff;
650 while ((b = *p) != NUL)
651 {
652 if (b == c && p[1] == n2)
653 return p;
654 p += (*mb_ptr2len)(p);
655 }
656 return NULL;
657 }
658 if (has_mbyte)
659 {
660 while ((b = *p) != NUL)
661 {
662 if (b == c)
663 return p;
664 p += (*mb_ptr2len)(p);
665 }
666 return NULL;
667 }
668 while ((b = *p) != NUL)
669 {
670 if (b == c)
671 return p;
672 ++p;
673 }
674 return NULL;
675}
676
Jonathon7c7a4e62025-01-12 09:58:00 +0100677// Sized version of strchr that can handle embedded NULs.
678// Adjusts n to the new size.
679 char *
680vim_strnchr(const char *p, size_t *n, int c)
681{
682 while (*n > 0)
683 {
684 if (*p == c)
685 return (char *)p;
686 p++;
687 (*n)--;
688 }
689
690 return NULL;
691}
692
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200693/*
694 * Version of strchr() that only works for bytes and handles unsigned char
695 * strings with characters above 128 correctly. It also doesn't return a
696 * pointer to the NUL at the end of the string.
697 */
698 char_u *
699vim_strbyte(char_u *string, int c)
700{
701 char_u *p = string;
702
703 while (*p != NUL)
704 {
705 if (*p == c)
706 return p;
707 ++p;
708 }
709 return NULL;
710}
711
712/*
713 * Search for last occurrence of "c" in "string".
714 * Version of strrchr() that handles unsigned char strings with characters from
715 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
716 * end of the string.
717 * Return NULL if not found.
718 * Does not handle multi-byte char for "c"!
719 */
720 char_u *
721vim_strrchr(char_u *string, int c)
722{
723 char_u *retval = NULL;
724 char_u *p = string;
725
726 while (*p)
727 {
728 if (*p == c)
729 retval = p;
730 MB_PTR_ADV(p);
731 }
732 return retval;
733}
734
735/*
736 * Vim's version of strpbrk(), in case it's missing.
737 * Don't generate a prototype for this, causes problems when it's not used.
738 */
739#ifndef PROTO
740# ifndef HAVE_STRPBRK
741# ifdef vim_strpbrk
742# undef vim_strpbrk
743# endif
744 char_u *
745vim_strpbrk(char_u *s, char_u *charset)
746{
747 while (*s)
748 {
749 if (vim_strchr(charset, *s) != NULL)
750 return s;
751 MB_PTR_ADV(s);
752 }
753 return NULL;
754}
755# endif
756#endif
757
758/*
759 * Sort an array of strings.
760 */
761static int sort_compare(const void *s1, const void *s2);
762
763 static int
764sort_compare(const void *s1, const void *s2)
765{
766 return STRCMP(*(char **)s1, *(char **)s2);
767}
768
769 void
770sort_strings(
771 char_u **files,
772 int count)
773{
774 qsort((void *)files, (size_t)count, sizeof(char_u *), sort_compare);
775}
776
777#if defined(FEAT_QUICKFIX) || defined(FEAT_SPELL) || defined(PROTO)
778/*
779 * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
780 * When "s" is NULL FALSE is returned.
781 */
782 int
783has_non_ascii(char_u *s)
784{
785 char_u *p;
786
787 if (s != NULL)
788 for (p = s; *p != NUL; ++p)
789 if (*p >= 128)
790 return TRUE;
791 return FALSE;
792}
793#endif
794
795/*
796 * Concatenate two strings and return the result in allocated memory.
797 * Returns NULL when out of memory.
798 */
799 char_u *
800concat_str(char_u *str1, char_u *str2)
801{
802 char_u *dest;
803 size_t l = str1 == NULL ? 0 : STRLEN(str1);
804
805 dest = alloc(l + (str2 == NULL ? 0 : STRLEN(str2)) + 1L);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000806 if (dest == NULL)
807 return NULL;
808 if (str1 == NULL)
809 *dest = NUL;
810 else
811 STRCPY(dest, str1);
812 if (str2 != NULL)
813 STRCPY(dest + l, str2);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200814 return dest;
815}
816
zeertzjq4dd266c2023-08-19 11:35:03 +0200817#if defined(FEAT_EVAL) || defined(FEAT_RIGHTLEFT) || defined(PROTO)
818/*
819 * Reverse text into allocated memory.
820 * Returns the allocated string, NULL when out of memory.
821 */
822 char_u *
823reverse_text(char_u *s)
824{
825 size_t len = STRLEN(s);
826 char_u *rev = alloc(len + 1);
827 if (rev == NULL)
828 return NULL;
829
830 for (size_t s_i = 0, rev_i = len; s_i < len; ++s_i)
831 {
832 if (has_mbyte)
833 {
834 int mb_len = (*mb_ptr2len)(s + s_i);
835 rev_i -= mb_len;
836 mch_memmove(rev + rev_i, s + s_i, mb_len);
837 s_i += mb_len - 1;
838 }
839 else
840 rev[--rev_i] = s[s_i];
841 }
842 rev[len] = NUL;
843 return rev;
844}
845#endif
846
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200847#if defined(FEAT_EVAL) || defined(PROTO)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200848/*
849 * Return string "str" in ' quotes, doubling ' characters.
850 * If "str" is NULL an empty string is assumed.
851 * If "function" is TRUE make it function('string').
852 */
853 char_u *
854string_quote(char_u *str, int function)
855{
856 unsigned len;
857 char_u *p, *r, *s;
858
859 len = (function ? 13 : 3);
860 if (str != NULL)
861 {
862 len += (unsigned)STRLEN(str);
863 for (p = str; *p != NUL; MB_PTR_ADV(p))
864 if (*p == '\'')
865 ++len;
866 }
867 s = r = alloc(len);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000868 if (r == NULL)
869 return NULL;
870
871 if (function)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200872 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000873 STRCPY(r, "function('");
874 r += 10;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200875 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000876 else
877 *r++ = '\'';
878 if (str != NULL)
879 for (p = str; *p != NUL; )
880 {
881 if (*p == '\'')
882 *r++ = '\'';
883 MB_COPY_CHAR(p, r);
884 }
885 *r++ = '\'';
886 if (function)
887 *r++ = ')';
888 *r++ = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200889 return s;
890}
891
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000892/*
893 * Count the number of times "needle" occurs in string "haystack". Case is
894 * ignored if "ic" is TRUE.
895 */
896 long
897string_count(char_u *haystack, char_u *needle, int ic)
898{
899 long n = 0;
900 char_u *p = haystack;
901 char_u *next;
902
903 if (p == NULL || needle == NULL || *needle == NUL)
904 return 0;
905
906 if (ic)
907 {
908 size_t len = STRLEN(needle);
909
910 while (*p != NUL)
911 {
912 if (MB_STRNICMP(p, needle, len) == 0)
913 {
914 ++n;
915 p += len;
916 }
917 else
918 MB_PTR_ADV(p);
919 }
920 }
921 else
922 while ((next = (char_u *)strstr((char *)p, (char *)needle)) != NULL)
923 {
924 ++n;
925 p = next + STRLEN(needle);
926 }
927
928 return n;
929}
930
931/*
932 * Make a typval_T of the first character of "input" and store it in "output".
933 * Return OK or FAIL.
934 */
935 static int
936copy_first_char_to_tv(char_u *input, typval_T *output)
937{
938 char_u buf[MB_MAXBYTES + 1];
939 int len;
940
941 if (input == NULL || output == NULL)
942 return FAIL;
943
944 len = has_mbyte ? mb_ptr2len(input) : 1;
945 STRNCPY(buf, input, len);
946 buf[len] = NUL;
947 output->v_type = VAR_STRING;
948 output->vval.v_string = vim_strsave(buf);
949
950 return output->vval.v_string == NULL ? FAIL : OK;
951}
952
953/*
954 * Implementation of map() and filter() for a String. Apply "expr" to every
955 * character in string "str" and return the result in "rettv".
956 */
957 void
958string_filter_map(
959 char_u *str,
960 filtermap_T filtermap,
961 typval_T *expr,
962 typval_T *rettv)
963{
964 char_u *p;
965 typval_T tv;
966 garray_T ga;
967 int len = 0;
968 int idx = 0;
969 int rem;
Bram Moolenaar82418262022-09-28 16:16:15 +0100970 typval_T newtv;
971 funccall_T *fc;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000972
973 rettv->v_type = VAR_STRING;
974 rettv->vval.v_string = NULL;
975
976 // set_vim_var_nr() doesn't set the type
977 set_vim_var_type(VV_KEY, VAR_NUMBER);
978
zeertzjqe7d49462023-04-16 20:53:55 +0100979 // Create one funccall_T for all eval_expr_typval() calls.
Bram Moolenaar82418262022-09-28 16:16:15 +0100980 fc = eval_expr_get_funccal(expr, &newtv);
981
Bram Moolenaar04935fb2022-01-08 16:19:22 +0000982 ga_init2(&ga, sizeof(char), 80);
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000983 for (p = str; *p != NUL; p += len)
984 {
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000985 if (copy_first_char_to_tv(p, &tv) == FAIL)
986 break;
987 len = (int)STRLEN(tv.vval.v_string);
988
989 set_vim_var_nr(VV_KEY, idx);
Bram Moolenaar82418262022-09-28 16:16:15 +0100990 if (filter_map_one(&tv, expr, filtermap, fc, &newtv, &rem) == FAIL
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000991 || did_emsg)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000992 {
993 clear_tv(&newtv);
994 clear_tv(&tv);
995 break;
996 }
Ernie Raele79e2072024-01-13 11:47:33 +0100997 if (filtermap == FILTERMAP_MAP || filtermap == FILTERMAP_MAPNEW)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000998 {
999 if (newtv.v_type != VAR_STRING)
1000 {
1001 clear_tv(&newtv);
1002 clear_tv(&tv);
Bram Moolenaare70cec92022-01-01 14:25:55 +00001003 emsg(_(e_string_required));
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001004 break;
1005 }
1006 else
1007 ga_concat(&ga, newtv.vval.v_string);
1008 }
Ernie Raele79e2072024-01-13 11:47:33 +01001009 else if (filtermap == FILTERMAP_FOREACH || !rem)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001010 ga_concat(&ga, tv.vval.v_string);
1011
1012 clear_tv(&newtv);
1013 clear_tv(&tv);
1014
1015 ++idx;
1016 }
1017 ga_append(&ga, NUL);
1018 rettv->vval.v_string = ga.ga_data;
Bram Moolenaar82418262022-09-28 16:16:15 +01001019 if (fc != NULL)
1020 remove_funccal();
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001021}
1022
1023/*
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001024 * Implementation of reduce() for String "argvars[0]" using the function "expr"
1025 * starting with the optional initial value "argvars[2]" and return the result
1026 * in "rettv".
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001027 */
1028 void
1029string_reduce(
1030 typval_T *argvars,
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001031 typval_T *expr,
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001032 typval_T *rettv)
1033{
1034 char_u *p = tv_get_string(&argvars[0]);
1035 int len;
1036 typval_T argv[3];
1037 int r;
1038 int called_emsg_start = called_emsg;
Bram Moolenaar82418262022-09-28 16:16:15 +01001039 funccall_T *fc;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001040
1041 if (argvars[2].v_type == VAR_UNKNOWN)
1042 {
1043 if (*p == NUL)
1044 {
Bram Moolenaare70cec92022-01-01 14:25:55 +00001045 semsg(_(e_reduce_of_an_empty_str_with_no_initial_value), "String");
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001046 return;
1047 }
1048 if (copy_first_char_to_tv(p, rettv) == FAIL)
1049 return;
1050 p += STRLEN(rettv->vval.v_string);
1051 }
Yegappan Lakshmanan8deb2b32022-09-02 15:15:27 +01001052 else if (check_for_string_arg(argvars, 2) == FAIL)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001053 return;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001054 else
1055 copy_tv(&argvars[2], rettv);
1056
zeertzjqe7d49462023-04-16 20:53:55 +01001057 // Create one funccall_T for all eval_expr_typval() calls.
Bram Moolenaar82418262022-09-28 16:16:15 +01001058 fc = eval_expr_get_funccal(expr, rettv);
1059
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001060 for ( ; *p != NUL; p += len)
1061 {
1062 argv[0] = *rettv;
1063 if (copy_first_char_to_tv(p, &argv[1]) == FAIL)
1064 break;
1065 len = (int)STRLEN(argv[1].vval.v_string);
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001066
zeertzjqad0c4422023-08-17 22:15:47 +02001067 r = eval_expr_typval(expr, TRUE, argv, 2, fc, rettv);
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001068
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001069 clear_tv(&argv[0]);
1070 clear_tv(&argv[1]);
1071 if (r == FAIL || called_emsg != called_emsg_start)
1072 return;
1073 }
Bram Moolenaar82418262022-09-28 16:16:15 +01001074
1075 if (fc != NULL)
1076 remove_funccal();
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001077}
1078
Bram Moolenaare4098452023-05-07 18:53:49 +01001079/*
1080 * Implementation of "byteidx()" and "byteidxcomp()" functions
1081 */
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001082 static void
Dominique Pellé0268ff32024-07-28 21:12:20 +02001083byteidx_common(typval_T *argvars, typval_T *rettv, int comp)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001084{
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001085 rettv->vval.v_number = -1;
1086
1087 if (in_vim9script()
1088 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001089 || check_for_number_arg(argvars, 1) == FAIL
1090 || check_for_opt_bool_arg(argvars, 2) == FAIL))
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001091 return;
1092
Christian Brabandt67672ef2023-04-24 21:09:54 +01001093 char_u *str = tv_get_string_chk(&argvars[0]);
1094 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001095 if (str == NULL || idx < 0)
1096 return;
1097
Christian Brabandt67672ef2023-04-24 21:09:54 +01001098 varnumber_T utf16idx = FALSE;
1099 if (argvars[2].v_type != VAR_UNKNOWN)
1100 {
zeertzjq8cf51372023-05-08 15:31:38 +01001101 int error = FALSE;
1102 utf16idx = tv_get_bool_chk(&argvars[2], &error);
1103 if (error)
1104 return;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001105 if (utf16idx < 0 || utf16idx > 1)
1106 {
zeertzjq8cf51372023-05-08 15:31:38 +01001107 semsg(_(e_using_number_as_bool_nr), utf16idx);
Christian Brabandt67672ef2023-04-24 21:09:54 +01001108 return;
1109 }
1110 }
1111
1112 int (*ptr2len)(char_u *);
1113 if (enc_utf8 && comp)
1114 ptr2len = utf_ptr2len;
1115 else
1116 ptr2len = mb_ptr2len;
1117
1118 char_u *t = str;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001119 for ( ; idx > 0; idx--)
1120 {
1121 if (*t == NUL) // EOL reached
1122 return;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001123 if (utf16idx)
1124 {
1125 int clen = ptr2len(t);
1126 int c = (clen > 1) ? utf_ptr2char(t) : *t;
1127 if (c > 0xFFFF)
1128 idx--;
1129 }
1130 if (idx > 0)
1131 t += ptr2len(t);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001132 }
1133 rettv->vval.v_number = (varnumber_T)(t - str);
1134}
1135
1136/*
1137 * "byteidx()" function
1138 */
1139 void
1140f_byteidx(typval_T *argvars, typval_T *rettv)
1141{
Bram Moolenaare4098452023-05-07 18:53:49 +01001142 byteidx_common(argvars, rettv, FALSE);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001143}
1144
1145/*
1146 * "byteidxcomp()" function
1147 */
1148 void
1149f_byteidxcomp(typval_T *argvars, typval_T *rettv)
1150{
Bram Moolenaare4098452023-05-07 18:53:49 +01001151 byteidx_common(argvars, rettv, TRUE);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001152}
1153
1154/*
1155 * "charidx()" function
1156 */
1157 void
1158f_charidx(typval_T *argvars, typval_T *rettv)
1159{
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001160 rettv->vval.v_number = -1;
1161
Christian Brabandt67672ef2023-04-24 21:09:54 +01001162 if (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001163 || check_for_number_arg(argvars, 1) == FAIL
Christian Brabandt67672ef2023-04-24 21:09:54 +01001164 || check_for_opt_bool_arg(argvars, 2) == FAIL
1165 || (argvars[2].v_type != VAR_UNKNOWN
1166 && check_for_opt_bool_arg(argvars, 3) == FAIL))
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001167 return;
1168
Christian Brabandt67672ef2023-04-24 21:09:54 +01001169 char_u *str = tv_get_string_chk(&argvars[0]);
1170 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001171 if (str == NULL || idx < 0)
1172 return;
1173
Christian Brabandt67672ef2023-04-24 21:09:54 +01001174 varnumber_T countcc = FALSE;
1175 varnumber_T utf16idx = FALSE;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001176 if (argvars[2].v_type != VAR_UNKNOWN)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001177 {
Christian Brabandt67672ef2023-04-24 21:09:54 +01001178 countcc = tv_get_bool(&argvars[2]);
1179 if (argvars[3].v_type != VAR_UNKNOWN)
1180 utf16idx = tv_get_bool(&argvars[3]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001181 }
1182
Christian Brabandt67672ef2023-04-24 21:09:54 +01001183 int (*ptr2len)(char_u *);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001184 if (enc_utf8 && countcc)
1185 ptr2len = utf_ptr2len;
1186 else
1187 ptr2len = mb_ptr2len;
1188
Christian Brabandt67672ef2023-04-24 21:09:54 +01001189 char_u *p;
1190 int len;
1191 for (p = str, len = 0; utf16idx ? idx >= 0 : p <= str + idx; len++)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001192 {
1193 if (*p == NUL)
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001194 {
1195 // If the index is exactly the number of bytes or utf-16 code units
1196 // in the string then return the length of the string in
1197 // characters.
1198 if (utf16idx ? (idx == 0) : (p == (str + idx)))
1199 rettv->vval.v_number = len;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001200 return;
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001201 }
Christian Brabandt67672ef2023-04-24 21:09:54 +01001202 if (utf16idx)
1203 {
1204 idx--;
1205 int clen = ptr2len(p);
1206 int c = (clen > 1) ? utf_ptr2char(p) : *p;
1207 if (c > 0xFFFF)
1208 idx--;
1209 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001210 p += ptr2len(p);
1211 }
1212
1213 rettv->vval.v_number = len > 0 ? len - 1 : 0;
1214}
1215
1216/*
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001217 * Convert the string "str", from encoding "from" to encoding "to".
1218 */
1219 static char_u *
1220convert_string(char_u *str, char_u *from, char_u *to)
1221{
1222 vimconv_T vimconv;
1223
1224 vimconv.vc_type = CONV_NONE;
1225 if (convert_setup(&vimconv, from, to) == FAIL)
1226 return NULL;
1227 vimconv.vc_fail = TRUE;
1228 if (vimconv.vc_type == CONV_NONE)
1229 str = vim_strsave(str);
1230 else
1231 str = string_convert(&vimconv, str, NULL);
1232 convert_setup(&vimconv, NULL, NULL);
1233
1234 return str;
1235}
1236
1237/*
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001238 * Add the bytes from "str" to "blob".
1239 */
1240 static void
1241blob_from_string(char_u *str, blob_T *blob)
1242{
1243 size_t len = STRLEN(str);
1244
1245 for (size_t i = 0; i < len; i++)
1246 {
1247 int ch = str[i];
1248
1249 if (str[i] == NL)
1250 // Translate newlines in the string to NUL character
1251 ch = NUL;
1252
1253 ga_append(&blob->bv_ga, ch);
1254 }
1255}
1256
1257/*
1258 * Return a string created from the bytes in blob starting at "start_idx".
1259 * A NL character in the blob indicates end of string.
1260 * A NUL character in the blob is translated to a NL.
1261 * On return, "start_idx" points to next byte to process in blob.
1262 */
1263 static char_u *
1264string_from_blob(blob_T *blob, long *start_idx)
1265{
1266 garray_T str_ga;
1267 long blen;
Yegappan Lakshmanan5e9aaed2025-01-18 10:24:25 +01001268 int idx;
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001269
1270 ga_init2(&str_ga, sizeof(char), 80);
1271
1272 blen = blob_len(blob);
1273
1274 for (idx = *start_idx; idx < blen; idx++)
1275 {
1276 char_u byte = (char_u)blob_get(blob, idx);
1277 if (byte == NL)
1278 {
1279 idx++;
1280 break;
1281 }
1282
1283 if (byte == NUL)
1284 byte = NL;
1285
1286 ga_append(&str_ga, byte);
1287 }
1288
1289 ga_append(&str_ga, NUL);
1290
1291 char_u *ret_str = vim_strsave(str_ga.ga_data);
1292 *start_idx = idx;
1293
1294 ga_clear(&str_ga);
1295 return ret_str;
1296}
1297
1298/*
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001299 * "blob2str()" function
1300 * Converts a blob to a string, ensuring valid UTF-8 encoding.
1301 */
1302 void
1303f_blob2str(typval_T *argvars, typval_T *rettv)
1304{
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001305 blob_T *blob;
1306 int blen;
1307 long idx;
1308 int utf8_inuse = FALSE;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001309
1310 if (check_for_blob_arg(argvars, 0) == FAIL
1311 || check_for_opt_dict_arg(argvars, 1) == FAIL)
1312 return;
1313
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001314 if (rettv_list_alloc(rettv) == FAIL)
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001315 return;
1316
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001317 blob = argvars->vval.v_blob;
1318 if (blob == NULL)
1319 return;
1320 blen = blob_len(blob);
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001321
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001322 char_u *from_encoding = NULL;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001323 if (argvars[1].v_type != VAR_UNKNOWN)
1324 {
1325 dict_T *d = argvars[1].vval.v_dict;
1326 if (d != NULL)
1327 {
1328 char_u *enc = dict_get_string(d, "encoding", FALSE);
1329 if (enc != NULL)
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001330 from_encoding = enc_canonize(enc_skip(enc));
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001331 }
1332 }
1333
1334 if (STRCMP(p_enc, "utf-8") == 0 || STRCMP(p_enc, "utf8") == 0)
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001335 utf8_inuse = TRUE;
1336
1337 idx = 0;
1338 while (idx < blen)
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001339 {
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001340 char_u *str;
1341 char_u *converted_str;
1342
1343 str = string_from_blob(blob, &idx);
1344 if (str == NULL)
1345 break;
1346
1347 converted_str = str;
1348 if (from_encoding != NULL)
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001349 {
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001350 converted_str = convert_string(str, from_encoding, p_enc);
1351 vim_free(str);
1352 if (converted_str == NULL)
1353 {
1354 semsg(_(e_str_encoding_failed), "from", from_encoding);
1355 goto done;
1356 }
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001357 }
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001358
1359 if (utf8_inuse)
1360 {
1361 if (!utf_valid_string(converted_str, NULL))
1362 {
1363 semsg(_(e_str_encoding_failed), "from", p_enc);
1364 vim_free(converted_str);
1365 goto done;
1366 }
1367 }
1368
Yegappan Lakshmanan90b39752025-01-19 09:37:07 +01001369 int ret = list_append_string(rettv->vval.v_list, converted_str, -1);
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001370 vim_free(converted_str);
Yegappan Lakshmanan90b39752025-01-19 09:37:07 +01001371 if (ret == FAIL)
1372 break;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001373 }
1374
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001375done:
1376 vim_free(from_encoding);
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001377}
1378
1379/*
1380 * "str2blob()" function
1381 */
1382 void
1383f_str2blob(typval_T *argvars, typval_T *rettv)
1384{
1385 blob_T *blob;
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001386 list_T *list;
1387 listitem_T *li;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001388
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001389 if (check_for_list_arg(argvars, 0) == FAIL
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001390 || check_for_opt_dict_arg(argvars, 1) == FAIL)
1391 return;
1392
1393 if (rettv_blob_alloc(rettv) == FAIL)
1394 return;
1395
1396 blob = rettv->vval.v_blob;
1397
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001398 list = argvars[0].vval.v_list;
1399 if (list == NULL)
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001400 return;
1401
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001402 char_u *to_encoding = NULL;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001403 if (argvars[1].v_type != VAR_UNKNOWN)
1404 {
1405 dict_T *d = argvars[1].vval.v_dict;
1406 if (d != NULL)
1407 {
1408 char_u *enc = dict_get_string(d, "encoding", FALSE);
1409 if (enc != NULL)
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001410 to_encoding = enc_canonize(enc_skip(enc));
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001411 }
1412 }
1413
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001414 FOR_ALL_LIST_ITEMS(list, li)
1415 {
1416 if (li->li_tv.v_type != VAR_STRING)
1417 continue;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001418
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001419 char_u *str = li->li_tv.vval.v_string;
1420
1421 if (str == NULL)
1422 continue;
1423
1424 if (to_encoding != NULL)
1425 {
1426 str = convert_string(str, p_enc, to_encoding);
1427 if (str == NULL)
1428 {
1429 semsg(_(e_str_encoding_failed), "to", to_encoding);
1430 goto done;
1431 }
1432 }
1433
1434 if (li != list->lv_first)
1435 // Each list string item is separated by a newline in the blob
1436 ga_append(&blob->bv_ga, NL);
1437
1438 blob_from_string(str, blob);
1439
1440 if (to_encoding != NULL)
1441 vim_free(str);
1442 }
1443
1444done:
1445 if (to_encoding != NULL)
1446 vim_free(to_encoding);
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001447}
1448
1449/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001450 * "str2list()" function
1451 */
1452 void
1453f_str2list(typval_T *argvars, typval_T *rettv)
1454{
1455 char_u *p;
1456 int utf8 = FALSE;
1457
1458 if (rettv_list_alloc(rettv) == FAIL)
1459 return;
1460
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001461 if (in_vim9script()
1462 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001463 || check_for_opt_bool_arg(argvars, 1) == FAIL))
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001464 return;
1465
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001466 if (argvars[1].v_type != VAR_UNKNOWN)
1467 utf8 = (int)tv_get_bool_chk(&argvars[1], NULL);
1468
1469 p = tv_get_string(&argvars[0]);
1470
1471 if (has_mbyte || utf8)
1472 {
1473 int (*ptr2len)(char_u *);
1474 int (*ptr2char)(char_u *);
1475
1476 if (utf8 || enc_utf8)
1477 {
1478 ptr2len = utf_ptr2len;
1479 ptr2char = utf_ptr2char;
1480 }
1481 else
1482 {
1483 ptr2len = mb_ptr2len;
1484 ptr2char = mb_ptr2char;
1485 }
1486
1487 for ( ; *p != NUL; p += (*ptr2len)(p))
1488 list_append_number(rettv->vval.v_list, (*ptr2char)(p));
1489 }
1490 else
1491 for ( ; *p != NUL; ++p)
1492 list_append_number(rettv->vval.v_list, *p);
1493}
1494
1495/*
1496 * "str2nr()" function
1497 */
1498 void
1499f_str2nr(typval_T *argvars, typval_T *rettv)
1500{
1501 int base = 10;
1502 char_u *p;
1503 varnumber_T n;
1504 int what = 0;
1505 int isneg;
1506
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001507 if (in_vim9script()
1508 && (check_for_string_arg(argvars, 0) == FAIL
1509 || check_for_opt_number_arg(argvars, 1) == FAIL
1510 || (argvars[1].v_type != VAR_UNKNOWN
1511 && check_for_opt_bool_arg(argvars, 2) == FAIL)))
1512 return;
1513
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001514 if (argvars[1].v_type != VAR_UNKNOWN)
1515 {
1516 base = (int)tv_get_number(&argvars[1]);
1517 if (base != 2 && base != 8 && base != 10 && base != 16)
1518 {
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00001519 emsg(_(e_invalid_argument));
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001520 return;
1521 }
1522 if (argvars[2].v_type != VAR_UNKNOWN && tv_get_bool(&argvars[2]))
1523 what |= STR2NR_QUOTE;
1524 }
1525
1526 p = skipwhite(tv_get_string_strict(&argvars[0]));
1527 isneg = (*p == '-');
1528 if (*p == '+' || *p == '-')
1529 p = skipwhite(p + 1);
1530 switch (base)
1531 {
1532 case 2: what |= STR2NR_BIN + STR2NR_FORCE; break;
1533 case 8: what |= STR2NR_OCT + STR2NR_OOCT + STR2NR_FORCE; break;
1534 case 16: what |= STR2NR_HEX + STR2NR_FORCE; break;
1535 }
Bram Moolenaar5fb78c32023-03-04 20:47:39 +00001536 vim_str2nr(p, NULL, NULL, what, &n, NULL, 0, FALSE, NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001537 // Text after the number is silently ignored.
1538 if (isneg)
1539 rettv->vval.v_number = -n;
1540 else
1541 rettv->vval.v_number = n;
1542
1543}
1544
1545/*
1546 * "strgetchar()" function
1547 */
1548 void
1549f_strgetchar(typval_T *argvars, typval_T *rettv)
1550{
1551 char_u *str;
1552 int len;
1553 int error = FALSE;
1554 int charidx;
1555 int byteidx = 0;
1556
1557 rettv->vval.v_number = -1;
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001558
1559 if (in_vim9script()
1560 && (check_for_string_arg(argvars, 0) == FAIL
1561 || check_for_number_arg(argvars, 1) == FAIL))
1562 return;
1563
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001564 str = tv_get_string_chk(&argvars[0]);
1565 if (str == NULL)
1566 return;
1567 len = (int)STRLEN(str);
1568 charidx = (int)tv_get_number_chk(&argvars[1], &error);
1569 if (error)
1570 return;
1571
1572 while (charidx >= 0 && byteidx < len)
1573 {
1574 if (charidx == 0)
1575 {
1576 rettv->vval.v_number = mb_ptr2char(str + byteidx);
1577 break;
1578 }
1579 --charidx;
1580 byteidx += MB_CPTR2LEN(str + byteidx);
1581 }
1582}
1583
1584/*
1585 * "stridx()" function
1586 */
1587 void
1588f_stridx(typval_T *argvars, typval_T *rettv)
1589{
1590 char_u buf[NUMBUFLEN];
1591 char_u *needle;
1592 char_u *haystack;
1593 char_u *save_haystack;
1594 char_u *pos;
1595 int start_idx;
1596
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001597 if (in_vim9script()
1598 && (check_for_string_arg(argvars, 0) == FAIL
1599 || check_for_string_arg(argvars, 1) == FAIL
1600 || check_for_opt_number_arg(argvars, 2) == FAIL))
1601 return;
1602
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001603 needle = tv_get_string_chk(&argvars[1]);
1604 save_haystack = haystack = tv_get_string_buf_chk(&argvars[0], buf);
1605 rettv->vval.v_number = -1;
1606 if (needle == NULL || haystack == NULL)
1607 return; // type error; errmsg already given
1608
1609 if (argvars[2].v_type != VAR_UNKNOWN)
1610 {
1611 int error = FALSE;
1612
1613 start_idx = (int)tv_get_number_chk(&argvars[2], &error);
1614 if (error || start_idx >= (int)STRLEN(haystack))
1615 return;
1616 if (start_idx >= 0)
1617 haystack += start_idx;
1618 }
1619
1620 pos = (char_u *)strstr((char *)haystack, (char *)needle);
1621 if (pos != NULL)
1622 rettv->vval.v_number = (varnumber_T)(pos - save_haystack);
1623}
1624
1625/*
1626 * "string()" function
1627 */
1628 void
1629f_string(typval_T *argvars, typval_T *rettv)
1630{
1631 char_u *tofree;
1632 char_u numbuf[NUMBUFLEN];
1633
1634 rettv->v_type = VAR_STRING;
1635 rettv->vval.v_string = tv2string(&argvars[0], &tofree, numbuf,
1636 get_copyID());
1637 // Make a copy if we have a value but it's not in allocated memory.
1638 if (rettv->vval.v_string != NULL && tofree == NULL)
1639 rettv->vval.v_string = vim_strsave(rettv->vval.v_string);
1640}
1641
1642/*
1643 * "strlen()" function
1644 */
1645 void
1646f_strlen(typval_T *argvars, typval_T *rettv)
1647{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001648 if (in_vim9script()
1649 && check_for_string_or_number_arg(argvars, 0) == FAIL)
1650 return;
1651
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001652 rettv->vval.v_number = (varnumber_T)(STRLEN(
1653 tv_get_string(&argvars[0])));
1654}
1655
1656 static void
1657strchar_common(typval_T *argvars, typval_T *rettv, int skipcc)
1658{
1659 char_u *s = tv_get_string(&argvars[0]);
1660 varnumber_T len = 0;
1661 int (*func_mb_ptr2char_adv)(char_u **pp);
1662
1663 func_mb_ptr2char_adv = skipcc ? mb_ptr2char_adv : mb_cptr2char_adv;
1664 while (*s != NUL)
1665 {
1666 func_mb_ptr2char_adv(&s);
1667 ++len;
1668 }
1669 rettv->vval.v_number = len;
1670}
1671
1672/*
1673 * "strcharlen()" function
1674 */
1675 void
1676f_strcharlen(typval_T *argvars, typval_T *rettv)
1677{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001678 if (in_vim9script()
1679 && check_for_string_or_number_arg(argvars, 0) == FAIL)
1680 return;
1681
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001682 strchar_common(argvars, rettv, TRUE);
1683}
1684
1685/*
1686 * "strchars()" function
1687 */
1688 void
1689f_strchars(typval_T *argvars, typval_T *rettv)
1690{
1691 varnumber_T skipcc = FALSE;
1692
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001693 if (in_vim9script()
1694 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001695 || check_for_opt_bool_arg(argvars, 1) == FAIL))
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001696 return;
1697
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001698 if (argvars[1].v_type != VAR_UNKNOWN)
Bram Moolenaare4098452023-05-07 18:53:49 +01001699 {
zeertzjq8cf51372023-05-08 15:31:38 +01001700 int error = FALSE;
1701 skipcc = tv_get_bool_chk(&argvars[1], &error);
1702 if (error)
1703 return;
1704 if (skipcc < 0 || skipcc > 1)
1705 {
Bram Moolenaare4098452023-05-07 18:53:49 +01001706 semsg(_(e_using_number_as_bool_nr), skipcc);
zeertzjq8cf51372023-05-08 15:31:38 +01001707 return;
1708 }
Bram Moolenaare4098452023-05-07 18:53:49 +01001709 }
zeertzjq8cf51372023-05-08 15:31:38 +01001710
1711 strchar_common(argvars, rettv, skipcc);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001712}
1713
1714/*
Christian Brabandt67672ef2023-04-24 21:09:54 +01001715 * "strutf16len()" function
1716 */
1717 void
1718f_strutf16len(typval_T *argvars, typval_T *rettv)
1719{
1720 rettv->vval.v_number = -1;
1721
1722 if (check_for_string_arg(argvars, 0) == FAIL
1723 || check_for_opt_bool_arg(argvars, 1) == FAIL)
1724 return;
1725
1726 varnumber_T countcc = FALSE;
1727 if (argvars[1].v_type != VAR_UNKNOWN)
1728 countcc = tv_get_bool(&argvars[1]);
1729
1730 char_u *s = tv_get_string(&argvars[0]);
1731 varnumber_T len = 0;
1732 int (*func_mb_ptr2char_adv)(char_u **pp);
1733 int ch;
1734
1735 func_mb_ptr2char_adv = countcc ? mb_cptr2char_adv : mb_ptr2char_adv;
1736 while (*s != NUL)
1737 {
1738 ch = func_mb_ptr2char_adv(&s);
1739 if (ch > 0xFFFF)
1740 ++len;
1741 ++len;
1742 }
1743 rettv->vval.v_number = len;
1744}
1745
1746/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001747 * "strdisplaywidth()" function
1748 */
1749 void
1750f_strdisplaywidth(typval_T *argvars, typval_T *rettv)
1751{
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001752 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001753 int col = 0;
1754
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001755 rettv->vval.v_number = -1;
1756
1757 if (in_vim9script()
1758 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001759 || check_for_opt_number_arg(argvars, 1) == FAIL))
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001760 return;
1761
1762 s = tv_get_string(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001763 if (argvars[1].v_type != VAR_UNKNOWN)
1764 col = (int)tv_get_number(&argvars[1]);
1765
1766 rettv->vval.v_number = (varnumber_T)(linetabsize_col(col, s) - col);
1767}
1768
1769/*
1770 * "strwidth()" function
1771 */
1772 void
1773f_strwidth(typval_T *argvars, typval_T *rettv)
1774{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001775 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001776
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001777 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1778 return;
1779
1780 s = tv_get_string_strict(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001781 rettv->vval.v_number = (varnumber_T)(mb_string2cells(s, -1));
1782}
1783
1784/*
1785 * "strcharpart()" function
1786 */
1787 void
1788f_strcharpart(typval_T *argvars, typval_T *rettv)
1789{
1790 char_u *p;
1791 int nchar;
1792 int nbyte = 0;
1793 int charlen;
1794 int skipcc = FALSE;
1795 int len = 0;
1796 int slen;
1797 int error = FALSE;
1798
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001799 if (in_vim9script()
1800 && (check_for_string_arg(argvars, 0) == FAIL
1801 || check_for_number_arg(argvars, 1) == FAIL
1802 || check_for_opt_number_arg(argvars, 2) == FAIL
1803 || (argvars[2].v_type != VAR_UNKNOWN
1804 && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1805 return;
1806
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001807 p = tv_get_string(&argvars[0]);
1808 slen = (int)STRLEN(p);
1809
1810 nchar = (int)tv_get_number_chk(&argvars[1], &error);
1811 if (!error)
1812 {
1813 if (argvars[2].v_type != VAR_UNKNOWN
1814 && argvars[3].v_type != VAR_UNKNOWN)
1815 {
zeertzjq8cf51372023-05-08 15:31:38 +01001816 skipcc = tv_get_bool_chk(&argvars[3], &error);
1817 if (error)
1818 return;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001819 if (skipcc < 0 || skipcc > 1)
1820 {
zeertzjq8cf51372023-05-08 15:31:38 +01001821 semsg(_(e_using_number_as_bool_nr), skipcc);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001822 return;
1823 }
1824 }
1825
1826 if (nchar > 0)
1827 while (nchar > 0 && nbyte < slen)
1828 {
1829 if (skipcc)
1830 nbyte += mb_ptr2len(p + nbyte);
1831 else
1832 nbyte += MB_CPTR2LEN(p + nbyte);
1833 --nchar;
1834 }
1835 else
1836 nbyte = nchar;
1837 if (argvars[2].v_type != VAR_UNKNOWN)
1838 {
1839 charlen = (int)tv_get_number(&argvars[2]);
1840 while (charlen > 0 && nbyte + len < slen)
1841 {
1842 int off = nbyte + len;
1843
1844 if (off < 0)
1845 len += 1;
1846 else
1847 {
1848 if (skipcc)
1849 len += mb_ptr2len(p + off);
1850 else
1851 len += MB_CPTR2LEN(p + off);
1852 }
1853 --charlen;
1854 }
1855 }
1856 else
1857 len = slen - nbyte; // default: all bytes that are available.
1858 }
1859
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02001860 // Only return the overlap between the specified part and the actual
1861 // string.
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001862 if (nbyte < 0)
1863 {
1864 len += nbyte;
1865 nbyte = 0;
1866 }
1867 else if (nbyte > slen)
1868 nbyte = slen;
1869 if (len < 0)
1870 len = 0;
1871 else if (nbyte + len > slen)
1872 len = slen - nbyte;
1873
1874 rettv->v_type = VAR_STRING;
1875 rettv->vval.v_string = vim_strnsave(p + nbyte, len);
1876}
1877
1878/*
1879 * "strpart()" function
1880 */
1881 void
1882f_strpart(typval_T *argvars, typval_T *rettv)
1883{
1884 char_u *p;
1885 int n;
1886 int len;
1887 int slen;
1888 int error = FALSE;
1889
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001890 if (in_vim9script()
1891 && (check_for_string_arg(argvars, 0) == FAIL
1892 || check_for_number_arg(argvars, 1) == FAIL
1893 || check_for_opt_number_arg(argvars, 2) == FAIL
1894 || (argvars[2].v_type != VAR_UNKNOWN
1895 && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1896 return;
1897
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001898 p = tv_get_string(&argvars[0]);
1899 slen = (int)STRLEN(p);
1900
1901 n = (int)tv_get_number_chk(&argvars[1], &error);
1902 if (error)
1903 len = 0;
1904 else if (argvars[2].v_type != VAR_UNKNOWN)
1905 len = (int)tv_get_number(&argvars[2]);
1906 else
1907 len = slen - n; // default len: all bytes that are available.
1908
1909 // Only return the overlap between the specified part and the actual
1910 // string.
1911 if (n < 0)
1912 {
1913 len += n;
1914 n = 0;
1915 }
1916 else if (n > slen)
1917 n = slen;
1918 if (len < 0)
1919 len = 0;
1920 else if (n + len > slen)
1921 len = slen - n;
1922
1923 if (argvars[2].v_type != VAR_UNKNOWN && argvars[3].v_type != VAR_UNKNOWN)
1924 {
1925 int off;
1926
1927 // length in characters
1928 for (off = n; off < slen && len > 0; --len)
1929 off += mb_ptr2len(p + off);
1930 len = off - n;
1931 }
1932
1933 rettv->v_type = VAR_STRING;
1934 rettv->vval.v_string = vim_strnsave(p + n, len);
1935}
1936
1937/*
1938 * "strridx()" function
1939 */
1940 void
1941f_strridx(typval_T *argvars, typval_T *rettv)
1942{
1943 char_u buf[NUMBUFLEN];
1944 char_u *needle;
1945 char_u *haystack;
1946 char_u *rest;
1947 char_u *lastmatch = NULL;
1948 int haystack_len, end_idx;
1949
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001950 if (in_vim9script()
1951 && (check_for_string_arg(argvars, 0) == FAIL
1952 || check_for_string_arg(argvars, 1) == FAIL
1953 || check_for_opt_number_arg(argvars, 2) == FAIL))
1954 return;
1955
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001956 needle = tv_get_string_chk(&argvars[1]);
1957 haystack = tv_get_string_buf_chk(&argvars[0], buf);
1958
1959 rettv->vval.v_number = -1;
1960 if (needle == NULL || haystack == NULL)
1961 return; // type error; errmsg already given
1962
1963 haystack_len = (int)STRLEN(haystack);
1964 if (argvars[2].v_type != VAR_UNKNOWN)
1965 {
1966 // Third argument: upper limit for index
1967 end_idx = (int)tv_get_number_chk(&argvars[2], NULL);
1968 if (end_idx < 0)
1969 return; // can never find a match
1970 }
1971 else
1972 end_idx = haystack_len;
1973
1974 if (*needle == NUL)
1975 {
1976 // Empty string matches past the end.
1977 lastmatch = haystack + end_idx;
1978 }
1979 else
1980 {
1981 for (rest = haystack; *rest != '\0'; ++rest)
1982 {
1983 rest = (char_u *)strstr((char *)rest, (char *)needle);
1984 if (rest == NULL || rest > haystack + end_idx)
1985 break;
1986 lastmatch = rest;
1987 }
1988 }
1989
1990 if (lastmatch == NULL)
1991 rettv->vval.v_number = -1;
1992 else
1993 rettv->vval.v_number = (varnumber_T)(lastmatch - haystack);
1994}
1995
1996/*
1997 * "strtrans()" function
1998 */
1999 void
2000f_strtrans(typval_T *argvars, typval_T *rettv)
2001{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002002 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
2003 return;
2004
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002005 rettv->v_type = VAR_STRING;
2006 rettv->vval.v_string = transstr(tv_get_string(&argvars[0]));
2007}
2008
Christian Brabandt67672ef2023-04-24 21:09:54 +01002009
2010/*
Christian Brabandt67672ef2023-04-24 21:09:54 +01002011 * "utf16idx()" function
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01002012 *
2013 * Converts a byte or character offset in a string to the corresponding UTF-16
2014 * code unit offset.
Christian Brabandt67672ef2023-04-24 21:09:54 +01002015 */
2016 void
2017f_utf16idx(typval_T *argvars, typval_T *rettv)
2018{
2019 rettv->vval.v_number = -1;
2020
2021 if (check_for_string_arg(argvars, 0) == FAIL
2022 || check_for_opt_number_arg(argvars, 1) == FAIL
2023 || check_for_opt_bool_arg(argvars, 2) == FAIL
2024 || (argvars[2].v_type != VAR_UNKNOWN
2025 && check_for_opt_bool_arg(argvars, 3) == FAIL))
2026 return;
2027
2028 char_u *str = tv_get_string_chk(&argvars[0]);
2029 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
2030 if (str == NULL || idx < 0)
2031 return;
2032
2033 varnumber_T countcc = FALSE;
2034 varnumber_T charidx = FALSE;
2035 if (argvars[2].v_type != VAR_UNKNOWN)
2036 {
2037 countcc = tv_get_bool(&argvars[2]);
2038 if (argvars[3].v_type != VAR_UNKNOWN)
2039 charidx = tv_get_bool(&argvars[3]);
2040 }
2041
2042 int (*ptr2len)(char_u *);
2043 if (enc_utf8 && countcc)
2044 ptr2len = utf_ptr2len;
2045 else
2046 ptr2len = mb_ptr2len;
2047
2048 char_u *p;
2049 int len;
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01002050 int utf16idx = 0;
Christian Brabandt67672ef2023-04-24 21:09:54 +01002051 for (p = str, len = 0; charidx ? idx >= 0 : p <= str + idx; len++)
2052 {
2053 if (*p == NUL)
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01002054 {
2055 // If the index is exactly the number of bytes or characters in the
2056 // string then return the length of the string in utf-16 code
2057 // units.
2058 if (charidx ? (idx == 0) : (p == (str + idx)))
2059 rettv->vval.v_number = len;
Christian Brabandt67672ef2023-04-24 21:09:54 +01002060 return;
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01002061 }
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01002062 utf16idx = len;
Christian Brabandt67672ef2023-04-24 21:09:54 +01002063 int clen = ptr2len(p);
2064 int c = (clen > 1) ? utf_ptr2char(p) : *p;
2065 if (c > 0xFFFF)
2066 len++;
2067 p += ptr2len(p);
2068 if (charidx)
2069 idx--;
2070 }
2071
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01002072 rettv->vval.v_number = utf16idx;
Christian Brabandt67672ef2023-04-24 21:09:54 +01002073}
2074
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002075/*
2076 * "tolower(string)" function
2077 */
2078 void
2079f_tolower(typval_T *argvars, typval_T *rettv)
2080{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002081 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
2082 return;
2083
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002084 rettv->v_type = VAR_STRING;
2085 rettv->vval.v_string = strlow_save(tv_get_string(&argvars[0]));
2086}
2087
2088/*
2089 * "toupper(string)" function
2090 */
2091 void
2092f_toupper(typval_T *argvars, typval_T *rettv)
2093{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002094 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
2095 return;
2096
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002097 rettv->v_type = VAR_STRING;
2098 rettv->vval.v_string = strup_save(tv_get_string(&argvars[0]));
2099}
2100
2101/*
2102 * "tr(string, fromstr, tostr)" function
2103 */
2104 void
2105f_tr(typval_T *argvars, typval_T *rettv)
2106{
2107 char_u *in_str;
2108 char_u *fromstr;
2109 char_u *tostr;
2110 char_u *p;
2111 int inlen;
2112 int fromlen;
2113 int tolen;
2114 int idx;
2115 char_u *cpstr;
2116 int cplen;
2117 int first = TRUE;
2118 char_u buf[NUMBUFLEN];
2119 char_u buf2[NUMBUFLEN];
2120 garray_T ga;
2121
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002122 if (in_vim9script()
2123 && (check_for_string_arg(argvars, 0) == FAIL
2124 || check_for_string_arg(argvars, 1) == FAIL
2125 || check_for_string_arg(argvars, 2) == FAIL))
2126 return;
2127
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002128 in_str = tv_get_string(&argvars[0]);
2129 fromstr = tv_get_string_buf_chk(&argvars[1], buf);
2130 tostr = tv_get_string_buf_chk(&argvars[2], buf2);
2131
2132 // Default return value: empty string.
2133 rettv->v_type = VAR_STRING;
2134 rettv->vval.v_string = NULL;
2135 if (fromstr == NULL || tostr == NULL)
2136 return; // type error; errmsg already given
Bram Moolenaar04935fb2022-01-08 16:19:22 +00002137 ga_init2(&ga, sizeof(char), 80);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002138
2139 if (!has_mbyte)
2140 // not multi-byte: fromstr and tostr must be the same length
2141 if (STRLEN(fromstr) != STRLEN(tostr))
2142 {
2143error:
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00002144 semsg(_(e_invalid_argument_str), fromstr);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002145 ga_clear(&ga);
2146 return;
2147 }
2148
2149 // fromstr and tostr have to contain the same number of chars
2150 while (*in_str != NUL)
2151 {
2152 if (has_mbyte)
2153 {
2154 inlen = (*mb_ptr2len)(in_str);
2155 cpstr = in_str;
2156 cplen = inlen;
2157 idx = 0;
2158 for (p = fromstr; *p != NUL; p += fromlen)
2159 {
2160 fromlen = (*mb_ptr2len)(p);
2161 if (fromlen == inlen && STRNCMP(in_str, p, inlen) == 0)
2162 {
2163 for (p = tostr; *p != NUL; p += tolen)
2164 {
2165 tolen = (*mb_ptr2len)(p);
2166 if (idx-- == 0)
2167 {
2168 cplen = tolen;
2169 cpstr = p;
2170 break;
2171 }
2172 }
2173 if (*p == NUL) // tostr is shorter than fromstr
2174 goto error;
2175 break;
2176 }
2177 ++idx;
2178 }
2179
2180 if (first && cpstr == in_str)
2181 {
2182 // Check that fromstr and tostr have the same number of
2183 // (multi-byte) characters. Done only once when a character
2184 // of in_str doesn't appear in fromstr.
2185 first = FALSE;
2186 for (p = tostr; *p != NUL; p += tolen)
2187 {
2188 tolen = (*mb_ptr2len)(p);
2189 --idx;
2190 }
2191 if (idx != 0)
2192 goto error;
2193 }
2194
2195 (void)ga_grow(&ga, cplen);
2196 mch_memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen);
2197 ga.ga_len += cplen;
2198
2199 in_str += inlen;
2200 }
2201 else
2202 {
2203 // When not using multi-byte chars we can do it faster.
2204 p = vim_strchr(fromstr, *in_str);
2205 if (p != NULL)
2206 ga_append(&ga, tostr[p - fromstr]);
2207 else
2208 ga_append(&ga, *in_str);
2209 ++in_str;
2210 }
2211 }
2212
2213 // add a terminating NUL
2214 (void)ga_grow(&ga, 1);
2215 ga_append(&ga, NUL);
2216
2217 rettv->vval.v_string = ga.ga_data;
2218}
2219
2220/*
2221 * "trim({expr})" function
2222 */
2223 void
2224f_trim(typval_T *argvars, typval_T *rettv)
2225{
2226 char_u buf1[NUMBUFLEN];
2227 char_u buf2[NUMBUFLEN];
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002228 char_u *head;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002229 char_u *mask = NULL;
2230 char_u *tail;
2231 char_u *prev;
2232 char_u *p;
2233 int c1;
2234 int dir = 0;
2235
2236 rettv->v_type = VAR_STRING;
2237 rettv->vval.v_string = NULL;
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002238
2239 if (in_vim9script()
2240 && (check_for_string_arg(argvars, 0) == FAIL
Illia Bobyr80799172023-10-17 18:00:50 +02002241 || check_for_opt_string_arg(argvars, 1) == FAIL
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002242 || (argvars[1].v_type != VAR_UNKNOWN
2243 && check_for_opt_number_arg(argvars, 2) == FAIL)))
2244 return;
2245
2246 head = tv_get_string_buf_chk(&argvars[0], buf1);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002247 if (head == NULL)
2248 return;
2249
Illia Bobyr80799172023-10-17 18:00:50 +02002250 if (check_for_opt_string_arg(argvars, 1) == FAIL)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002251 return;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002252
2253 if (argvars[1].v_type == VAR_STRING)
Illia Bobyr6e638672023-10-17 11:09:45 +02002254 {
Illia Bobyr80799172023-10-17 18:00:50 +02002255 mask = tv_get_string_buf_chk(&argvars[1], buf2);
2256 if (*mask == NUL)
2257 mask = NULL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002258
Illia Bobyr80799172023-10-17 18:00:50 +02002259 if (argvars[2].v_type != VAR_UNKNOWN)
Illia Bobyr6e638672023-10-17 11:09:45 +02002260 {
Illia Bobyr80799172023-10-17 18:00:50 +02002261 int error = 0;
2262
2263 // leading or trailing characters to trim
2264 dir = (int)tv_get_number_chk(&argvars[2], &error);
2265 if (error)
2266 return;
2267 if (dir < 0 || dir > 2)
2268 {
2269 semsg(_(e_invalid_argument_str), tv_get_string(&argvars[2]));
2270 return;
2271 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002272 }
2273 }
2274
2275 if (dir == 0 || dir == 1)
2276 {
2277 // Trim leading characters
2278 while (*head != NUL)
2279 {
2280 c1 = PTR2CHAR(head);
2281 if (mask == NULL)
2282 {
2283 if (c1 > ' ' && c1 != 0xa0)
2284 break;
2285 }
2286 else
2287 {
2288 for (p = mask; *p != NUL; MB_PTR_ADV(p))
2289 if (c1 == PTR2CHAR(p))
2290 break;
2291 if (*p == NUL)
2292 break;
2293 }
2294 MB_PTR_ADV(head);
2295 }
2296 }
2297
2298 tail = head + STRLEN(head);
2299 if (dir == 0 || dir == 2)
2300 {
2301 // Trim trailing characters
2302 for (; tail > head; tail = prev)
2303 {
2304 prev = tail;
2305 MB_PTR_BACK(head, prev);
2306 c1 = PTR2CHAR(prev);
2307 if (mask == NULL)
2308 {
2309 if (c1 > ' ' && c1 != 0xa0)
2310 break;
2311 }
2312 else
2313 {
2314 for (p = mask; *p != NUL; MB_PTR_ADV(p))
2315 if (c1 == PTR2CHAR(p))
2316 break;
2317 if (*p == NUL)
2318 break;
2319 }
2320 }
2321 }
2322 rettv->vval.v_string = vim_strnsave(head, tail - head);
2323}
2324
Bram Moolenaar677658a2022-01-05 16:09:06 +00002325static char *e_printf = N_(e_insufficient_arguments_for_printf);
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002326
2327/*
2328 * Get number argument from "idxp" entry in "tvs". First entry is 1.
2329 */
2330 static varnumber_T
2331tv_nr(typval_T *tvs, int *idxp)
2332{
2333 int idx = *idxp - 1;
2334 varnumber_T n = 0;
2335 int err = FALSE;
2336
2337 if (tvs[idx].v_type == VAR_UNKNOWN)
2338 emsg(_(e_printf));
2339 else
2340 {
2341 ++*idxp;
2342 n = tv_get_number_chk(&tvs[idx], &err);
2343 if (err)
2344 n = 0;
2345 }
2346 return n;
2347}
2348
2349/*
2350 * Get string argument from "idxp" entry in "tvs". First entry is 1.
2351 * If "tofree" is NULL tv_get_string_chk() is used. Some types (e.g. List)
2352 * are not converted to a string.
2353 * If "tofree" is not NULL echo_string() is used. All types are converted to
2354 * a string with the same format as ":echo". The caller must free "*tofree".
2355 * Returns NULL for an error.
2356 */
2357 static char *
2358tv_str(typval_T *tvs, int *idxp, char_u **tofree)
2359{
2360 int idx = *idxp - 1;
2361 char *s = NULL;
2362 static char_u numbuf[NUMBUFLEN];
2363
2364 if (tvs[idx].v_type == VAR_UNKNOWN)
2365 emsg(_(e_printf));
2366 else
2367 {
2368 ++*idxp;
2369 if (tofree != NULL)
2370 s = (char *)echo_string(&tvs[idx], tofree, numbuf, get_copyID());
2371 else
2372 s = (char *)tv_get_string_chk(&tvs[idx]);
2373 }
2374 return s;
2375}
2376
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002377/*
2378 * Get float argument from "idxp" entry in "tvs". First entry is 1.
2379 */
2380 static double
2381tv_float(typval_T *tvs, int *idxp)
2382{
2383 int idx = *idxp - 1;
2384 double f = 0;
2385
2386 if (tvs[idx].v_type == VAR_UNKNOWN)
2387 emsg(_(e_printf));
2388 else
2389 {
2390 ++*idxp;
2391 if (tvs[idx].v_type == VAR_FLOAT)
2392 f = tvs[idx].vval.v_float;
2393 else if (tvs[idx].v_type == VAR_NUMBER)
2394 f = (double)tvs[idx].vval.v_number;
2395 else
Bram Moolenaar9d00e4a2022-01-05 17:49:15 +00002396 emsg(_(e_expected_float_argument_for_printf));
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002397 }
2398 return f;
2399}
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00002400
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002401#endif
2402
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002403/*
2404 * Return the representation of infinity for printf() function:
2405 * "-inf", "inf", "+inf", " inf", "-INF", "INF", "+INF" or " INF".
2406 */
2407 static const char *
2408infinity_str(int positive,
2409 char fmt_spec,
2410 int force_sign,
2411 int space_for_positive)
2412{
2413 static const char *table[] =
2414 {
2415 "-inf", "inf", "+inf", " inf",
2416 "-INF", "INF", "+INF", " INF"
2417 };
2418 int idx = positive * (1 + force_sign + force_sign * space_for_positive);
2419
2420 if (ASCII_ISUPPER(fmt_spec))
2421 idx += 4;
2422 return table[idx];
2423}
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002424
2425/*
2426 * This code was included to provide a portable vsnprintf() and snprintf().
2427 * Some systems may provide their own, but we always use this one for
2428 * consistency.
2429 *
2430 * This code is based on snprintf.c - a portable implementation of snprintf
2431 * by Mark Martinec <mark.martinec@ijs.si>, Version 2.2, 2000-10-06.
2432 * Included with permission. It was heavily modified to fit in Vim.
2433 * The original code, including useful comments, can be found here:
2434 * http://www.ijs.si/software/snprintf/
2435 *
2436 * This snprintf() only supports the following conversion specifiers:
2437 * s, c, d, u, o, x, X, p (and synonyms: i, D, U, O - see below)
2438 * with flags: '-', '+', ' ', '0' and '#'.
2439 * An asterisk is supported for field width as well as precision.
2440 *
2441 * Limited support for floating point was added: 'f', 'F', 'e', 'E', 'g', 'G'.
2442 *
2443 * Length modifiers 'h' (short int) and 'l' (long int) and 'll' (long long int)
2444 * are supported. NOTE: for 'll' the argument is varnumber_T or uvarnumber_T.
2445 *
2446 * The locale is not used, the string is used as a byte string. This is only
2447 * relevant for double-byte encodings where the second byte may be '%'.
2448 *
2449 * It is permitted for "str_m" to be zero, and it is permitted to specify NULL
2450 * pointer for resulting string argument if "str_m" is zero (as per ISO C99).
2451 *
2452 * The return value is the number of characters which would be generated
2453 * for the given input, excluding the trailing NUL. If this value
2454 * is greater or equal to "str_m", not all characters from the result
2455 * have been stored in str, output bytes beyond the ("str_m"-1) -th character
2456 * are discarded. If "str_m" is greater than zero it is guaranteed
2457 * the resulting string will be NUL-terminated.
2458 */
2459
2460/*
2461 * When va_list is not supported we only define vim_snprintf().
2462 *
2463 * vim_vsnprintf_typval() can be invoked with either "va_list" or a list of
2464 * "typval_T". When the latter is not used it must be NULL.
2465 */
2466
2467// When generating prototypes all of this is skipped, cproto doesn't
2468// understand this.
2469#ifndef PROTO
2470
2471// Like vim_vsnprintf() but append to the string.
2472 int
2473vim_snprintf_add(char *str, size_t str_m, const char *fmt, ...)
2474{
2475 va_list ap;
2476 int str_l;
2477 size_t len = STRLEN(str);
2478 size_t space;
2479
2480 if (str_m <= len)
2481 space = 0;
2482 else
2483 space = str_m - len;
2484 va_start(ap, fmt);
2485 str_l = vim_vsnprintf(str + len, space, fmt, ap);
2486 va_end(ap);
2487 return str_l;
2488}
2489
2490 int
2491vim_snprintf(char *str, size_t str_m, const char *fmt, ...)
2492{
2493 va_list ap;
2494 int str_l;
2495
2496 va_start(ap, fmt);
2497 str_l = vim_vsnprintf(str, str_m, fmt, ap);
2498 va_end(ap);
2499 return str_l;
2500}
2501
2502 int
2503vim_vsnprintf(
2504 char *str,
2505 size_t str_m,
2506 const char *fmt,
2507 va_list ap)
2508{
2509 return vim_vsnprintf_typval(str, str_m, fmt, ap, NULL);
2510}
2511
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002512enum
2513{
2514 TYPE_UNKNOWN = -1,
2515 TYPE_INT,
2516 TYPE_LONGINT,
2517 TYPE_LONGLONGINT,
2518 TYPE_UNSIGNEDINT,
2519 TYPE_UNSIGNEDLONGINT,
2520 TYPE_UNSIGNEDLONGLONGINT,
2521 TYPE_POINTER,
2522 TYPE_PERCENT,
2523 TYPE_CHAR,
2524 TYPE_STRING,
2525 TYPE_FLOAT
2526};
2527
2528/* Types that can be used in a format string
2529 */
zeertzjq7772c932023-08-15 22:48:40 +02002530 static int
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002531format_typeof(
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002532 const char *type)
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002533{
2534 // allowed values: \0, h, l, L
2535 char length_modifier = '\0';
2536
2537 // current conversion specifier character
2538 char fmt_spec = '\0';
2539
2540 // parse 'h', 'l' and 'll' length modifiers
2541 if (*type == 'h' || *type == 'l')
2542 {
2543 length_modifier = *type;
2544 type++;
2545 if (length_modifier == 'l' && *type == 'l')
2546 {
2547 // double l = __int64 / varnumber_T
2548 length_modifier = 'L';
2549 type++;
2550 }
2551 }
2552 fmt_spec = *type;
2553
2554 // common synonyms:
2555 switch (fmt_spec)
2556 {
2557 case 'i': fmt_spec = 'd'; break;
2558 case '*': fmt_spec = 'd'; length_modifier = 'h'; break;
2559 case 'D': fmt_spec = 'd'; length_modifier = 'l'; break;
2560 case 'U': fmt_spec = 'u'; length_modifier = 'l'; break;
2561 case 'O': fmt_spec = 'o'; length_modifier = 'l'; break;
2562 default: break;
2563 }
2564
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002565 // get parameter value, do initial processing
2566 switch (fmt_spec)
2567 {
2568 // '%' and 'c' behave similar to 's' regarding flags and field
2569 // widths
2570 case '%':
2571 return TYPE_PERCENT;
2572
2573 case 'c':
2574 return TYPE_CHAR;
2575
2576 case 's':
2577 case 'S':
2578 return TYPE_STRING;
2579
2580 case 'd': case 'u':
2581 case 'b': case 'B':
2582 case 'o':
2583 case 'x': case 'X':
2584 case 'p':
2585 {
2586 // NOTE: the u, b, o, x, X and p conversion specifiers
2587 // imply the value is unsigned; d implies a signed
2588 // value
2589
2590 // 0 if numeric argument is zero (or if pointer is
2591 // NULL for 'p'), +1 if greater than zero (or nonzero
2592 // for unsigned arguments), -1 if negative (unsigned
2593 // argument is never negative)
2594
2595 if (fmt_spec == 'p')
2596 return TYPE_POINTER;
2597 else if (fmt_spec == 'b' || fmt_spec == 'B')
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002598 return TYPE_UNSIGNEDLONGLONGINT;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002599 else if (fmt_spec == 'd')
2600 {
2601 // signed
2602 switch (length_modifier)
2603 {
2604 case '\0':
2605 case 'h':
2606 // char and short arguments are passed as int.
2607 return TYPE_INT;
2608 case 'l':
2609 return TYPE_LONGINT;
2610 case 'L':
2611 return TYPE_LONGLONGINT;
2612 }
2613 }
2614 else
2615 {
2616 // unsigned
2617 switch (length_modifier)
2618 {
2619 case '\0':
2620 case 'h':
2621 return TYPE_UNSIGNEDINT;
2622 case 'l':
2623 return TYPE_UNSIGNEDLONGINT;
2624 case 'L':
2625 return TYPE_UNSIGNEDLONGLONGINT;
2626 }
2627 }
2628 }
2629 break;
2630
2631 case 'f':
2632 case 'F':
2633 case 'e':
2634 case 'E':
2635 case 'g':
2636 case 'G':
2637 return TYPE_FLOAT;
2638 }
2639
2640 return TYPE_UNKNOWN;
2641}
2642
zeertzjq7772c932023-08-15 22:48:40 +02002643 static char *
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002644format_typename(
2645 const char *type)
2646{
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002647 switch (format_typeof(type))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002648 {
2649 case TYPE_INT:
2650 return _(typename_int);
2651
2652 case TYPE_LONGINT:
2653 return _(typename_longint);
2654
2655 case TYPE_LONGLONGINT:
2656 return _(typename_longlongint);
2657
2658 case TYPE_UNSIGNEDINT:
2659 return _(typename_unsignedint);
2660
2661 case TYPE_UNSIGNEDLONGINT:
2662 return _(typename_unsignedlongint);
2663
2664 case TYPE_UNSIGNEDLONGLONGINT:
2665 return _(typename_unsignedlonglongint);
2666
2667 case TYPE_POINTER:
2668 return _(typename_pointer);
2669
2670 case TYPE_PERCENT:
2671 return _(typename_percent);
2672
2673 case TYPE_CHAR:
2674 return _(typename_char);
2675
2676 case TYPE_STRING:
2677 return _(typename_string);
2678
2679 case TYPE_FLOAT:
2680 return _(typename_float);
2681 }
2682
2683 return _(typename_unknown);
2684}
2685
zeertzjq7772c932023-08-15 22:48:40 +02002686 static int
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002687adjust_types(
2688 const char ***ap_types,
2689 int arg,
2690 int *num_posarg,
2691 const char *type)
2692{
2693 if (*ap_types == NULL || *num_posarg < arg)
2694 {
2695 int idx;
2696 const char **new_types;
2697
2698 if (*ap_types == NULL)
2699 new_types = ALLOC_CLEAR_MULT(const char *, arg);
2700 else
K.Takata4c215ec2023-08-26 18:05:08 +02002701 new_types = vim_realloc((char **)*ap_types,
2702 arg * sizeof(const char *));
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002703
2704 if (new_types == NULL)
2705 return FAIL;
2706
2707 for (idx = *num_posarg; idx < arg; ++idx)
2708 new_types[idx] = NULL;
2709
2710 *ap_types = new_types;
2711 *num_posarg = arg;
2712 }
2713
2714 if ((*ap_types)[arg - 1] != NULL)
2715 {
2716 if ((*ap_types)[arg - 1][0] == '*' || type[0] == '*')
2717 {
2718 const char *pt = type;
2719 if (pt[0] == '*')
2720 pt = (*ap_types)[arg - 1];
2721
2722 if (pt[0] != '*')
2723 {
2724 switch (pt[0])
2725 {
2726 case 'd': case 'i': break;
2727 default:
2728 semsg(_(e_positional_num_field_spec_reused_str_str), arg, format_typename((*ap_types)[arg - 1]), format_typename(type));
2729 return FAIL;
2730 }
2731 }
2732 }
2733 else
2734 {
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002735 if (format_typeof(type) != format_typeof((*ap_types)[arg - 1]))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002736 {
2737 semsg(_( e_positional_arg_num_type_inconsistent_str_str), arg, format_typename(type), format_typename((*ap_types)[arg - 1]));
2738 return FAIL;
2739 }
2740 }
2741 }
2742
2743 (*ap_types)[arg - 1] = type;
2744
2745 return OK;
2746}
2747
Christ van Willegenc35fc032024-03-14 18:30:41 +01002748 static void
2749format_overflow_error(const char *pstart)
2750{
2751 size_t arglen = 0;
2752 char *argcopy = NULL;
2753 const char *p = pstart;
2754
2755 while (VIM_ISDIGIT((int)(*p)))
2756 ++p;
2757
2758 arglen = p - pstart;
2759 argcopy = ALLOC_CLEAR_MULT(char, arglen + 1);
2760 if (argcopy != NULL)
2761 {
2762 strncpy(argcopy, pstart, arglen);
2763 semsg(_( e_val_too_large), argcopy);
2764 free(argcopy);
2765 }
2766 else
2767 semsg(_(e_out_of_memory_allocating_nr_bytes), arglen);
2768}
2769
2770#define MAX_ALLOWED_STRING_WIDTH 6400
2771
2772 static int
2773get_unsigned_int(
2774 const char *pstart,
2775 const char **p,
zeertzjq0dff3152024-07-29 20:28:14 +02002776 unsigned int *uj,
2777 int overflow_err)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002778{
2779 *uj = **p - '0';
2780 ++*p;
2781
2782 while (VIM_ISDIGIT((int)(**p)) && *uj < MAX_ALLOWED_STRING_WIDTH)
2783 {
2784 *uj = 10 * *uj + (unsigned int)(**p - '0');
2785 ++*p;
2786 }
2787
2788 if (*uj > MAX_ALLOWED_STRING_WIDTH)
2789 {
zeertzjq0dff3152024-07-29 20:28:14 +02002790 if (overflow_err)
2791 {
2792 format_overflow_error(pstart);
2793 return FAIL;
2794 }
2795 else
2796 *uj = MAX_ALLOWED_STRING_WIDTH;
Christ van Willegenc35fc032024-03-14 18:30:41 +01002797 }
2798
2799 return OK;
2800}
2801
2802
zeertzjq7772c932023-08-15 22:48:40 +02002803 static int
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002804parse_fmt_types(
2805 const char ***ap_types,
2806 int *num_posarg,
2807 const char *fmt,
2808 typval_T *tvs UNUSED
2809 )
2810{
2811 const char *p = fmt;
2812 const char *arg = NULL;
2813
2814 int any_pos = 0;
2815 int any_arg = 0;
2816 int arg_idx;
2817
2818#define CHECK_POS_ARG do { \
2819 if (any_pos && any_arg) \
2820 { \
2821 semsg(_( e_cannot_mix_positional_and_non_positional_str), fmt); \
2822 goto error; \
2823 } \
2824} while (0);
2825
2826 if (p == NULL)
2827 return OK;
2828
2829 while (*p != NUL)
2830 {
2831 if (*p != '%')
2832 {
2833 char *q = strchr(p + 1, '%');
2834 size_t n = (q == NULL) ? STRLEN(p) : (size_t)(q - p);
2835
2836 p += n;
2837 }
2838 else
2839 {
2840 // allowed values: \0, h, l, L
2841 char length_modifier = '\0';
2842
2843 // variable for positional arg
2844 int pos_arg = -1;
2845 const char *ptype = NULL;
Christ van Willegenc35fc032024-03-14 18:30:41 +01002846 const char *pstart = p+1;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002847
2848 p++; // skip '%'
2849
2850 // First check to see if we find a positional
2851 // argument specifier
2852 ptype = p;
2853
2854 while (VIM_ISDIGIT(*ptype))
2855 ++ptype;
2856
2857 if (*ptype == '$')
2858 {
2859 if (*p == '0')
2860 {
2861 // 0 flag at the wrong place
2862 semsg(_( e_invalid_format_specifier_str), fmt);
2863 goto error;
2864 }
2865
2866 // Positional argument
Christ van Willegenc35fc032024-03-14 18:30:41 +01002867 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002868
zeertzjq0dff3152024-07-29 20:28:14 +02002869 if (get_unsigned_int(pstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002870 goto error;
2871
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002872 pos_arg = uj;
2873
2874 any_pos = 1;
2875 CHECK_POS_ARG;
2876
2877 ++p;
2878 }
2879
2880 // parse flags
2881 while (*p == '0' || *p == '-' || *p == '+' || *p == ' '
2882 || *p == '#' || *p == '\'')
2883 {
2884 switch (*p)
2885 {
2886 case '0': break;
2887 case '-': break;
2888 case '+': break;
2889 case ' ': // If both the ' ' and '+' flags appear, the ' '
2890 // flag should be ignored
2891 break;
2892 case '#': break;
2893 case '\'': break;
2894 }
2895 p++;
2896 }
2897 // If the '0' and '-' flags both appear, the '0' flag should be
2898 // ignored.
2899
2900 // parse field width
2901 if (*(arg = p) == '*')
2902 {
2903 p++;
2904
2905 if (VIM_ISDIGIT((int)(*p)))
2906 {
2907 // Positional argument field width
Christ van Willegenc35fc032024-03-14 18:30:41 +01002908 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002909
zeertzjq0dff3152024-07-29 20:28:14 +02002910 if (get_unsigned_int(arg + 1, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002911 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002912
2913 if (*p != '$')
2914 {
2915 semsg(_( e_invalid_format_specifier_str), fmt);
2916 goto error;
2917 }
2918 else
2919 {
2920 ++p;
2921 any_pos = 1;
2922 CHECK_POS_ARG;
2923
2924 if (adjust_types(ap_types, uj, num_posarg, arg) == FAIL)
2925 goto error;
2926 }
2927 }
2928 else
2929 {
2930 any_arg = 1;
2931 CHECK_POS_ARG;
2932 }
2933 }
dundargoc580c1fc2023-10-06 19:41:14 +02002934 else if (VIM_ISDIGIT((int)(*p)))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002935 {
2936 // size_t could be wider than unsigned int; make sure we treat
2937 // argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01002938 const char *digstart = p;
2939 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002940
zeertzjq0dff3152024-07-29 20:28:14 +02002941 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002942 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002943
2944 if (*p == '$')
2945 {
2946 semsg(_( e_invalid_format_specifier_str), fmt);
2947 goto error;
2948 }
2949 }
2950
2951 // parse precision
2952 if (*p == '.')
2953 {
2954 p++;
2955
2956 if (*(arg = p) == '*')
2957 {
2958 p++;
2959
2960 if (VIM_ISDIGIT((int)(*p)))
2961 {
2962 // Parse precision
Christ van Willegenc35fc032024-03-14 18:30:41 +01002963 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002964
zeertzjq0dff3152024-07-29 20:28:14 +02002965 if (get_unsigned_int(arg + 1, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002966 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002967
2968 if (*p == '$')
2969 {
2970 any_pos = 1;
2971 CHECK_POS_ARG;
2972
2973 ++p;
2974
2975 if (adjust_types(ap_types, uj, num_posarg, arg) == FAIL)
2976 goto error;
2977 }
2978 else
2979 {
2980 semsg(_( e_invalid_format_specifier_str), fmt);
2981 goto error;
2982 }
2983 }
2984 else
2985 {
2986 any_arg = 1;
2987 CHECK_POS_ARG;
2988 }
2989 }
dundargoc580c1fc2023-10-06 19:41:14 +02002990 else if (VIM_ISDIGIT((int)(*p)))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002991 {
2992 // size_t could be wider than unsigned int; make sure we
2993 // treat argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01002994 const char *digstart = p;
2995 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002996
zeertzjq0dff3152024-07-29 20:28:14 +02002997 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002998 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002999
3000 if (*p == '$')
3001 {
3002 semsg(_( e_invalid_format_specifier_str), fmt);
3003 goto error;
3004 }
3005 }
3006 }
3007
3008 if (pos_arg != -1)
3009 {
3010 any_pos = 1;
3011 CHECK_POS_ARG;
3012
3013 ptype = p;
3014 }
3015
3016 // parse 'h', 'l' and 'll' length modifiers
3017 if (*p == 'h' || *p == 'l')
3018 {
3019 length_modifier = *p;
3020 p++;
3021 if (length_modifier == 'l' && *p == 'l')
3022 {
3023 // double l = __int64 / varnumber_T
dundargoc580c1fc2023-10-06 19:41:14 +02003024 // length_modifier = 'L';
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003025 p++;
3026 }
3027 }
3028
3029 switch (*p)
3030 {
3031 // Check for known format specifiers. % is special!
3032 case 'i':
3033 case '*':
3034 case 'd':
3035 case 'u':
3036 case 'o':
3037 case 'D':
3038 case 'U':
3039 case 'O':
3040 case 'x':
3041 case 'X':
3042 case 'b':
3043 case 'B':
3044 case 'c':
3045 case 's':
3046 case 'S':
3047 case 'p':
3048 case 'f':
3049 case 'F':
3050 case 'e':
3051 case 'E':
3052 case 'g':
3053 case 'G':
3054 if (pos_arg != -1)
3055 {
3056 if (adjust_types(ap_types, pos_arg, num_posarg, ptype) == FAIL)
3057 goto error;
3058 }
3059 else
3060 {
3061 any_arg = 1;
3062 CHECK_POS_ARG;
3063 }
3064 break;
3065
3066 default:
3067 if (pos_arg != -1)
3068 {
3069 semsg(_( e_cannot_mix_positional_and_non_positional_str), fmt);
3070 goto error;
3071 }
3072 }
3073
3074 if (*p != NUL)
3075 p++; // step over the just processed conversion specifier
3076 }
3077 }
3078
3079 for (arg_idx = 0; arg_idx < *num_posarg; ++arg_idx)
3080 {
3081 if ((*ap_types)[arg_idx] == NULL)
3082 {
3083 semsg(_(e_fmt_arg_nr_unused_str), arg_idx + 1, fmt);
3084 goto error;
3085 }
3086
3087# if defined(FEAT_EVAL)
3088 if (tvs != NULL && tvs[arg_idx].v_type == VAR_UNKNOWN)
3089 {
3090 semsg(_(e_positional_nr_out_of_bounds_str), arg_idx + 1, fmt);
3091 goto error;
3092 }
3093# endif
3094 }
3095
3096 return OK;
3097
3098error:
K.Takata4c215ec2023-08-26 18:05:08 +02003099 vim_free((char**)*ap_types);
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003100 *ap_types = NULL;
3101 *num_posarg = 0;
3102 return FAIL;
3103}
3104
zeertzjq7772c932023-08-15 22:48:40 +02003105 static void
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003106skip_to_arg(
3107 const char **ap_types,
3108 va_list ap_start,
3109 va_list *ap,
3110 int *arg_idx,
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003111 int *arg_cur,
3112 const char *fmt)
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003113{
3114 int arg_min = 0;
3115
3116 if (*arg_cur + 1 == *arg_idx)
3117 {
3118 ++*arg_cur;
3119 ++*arg_idx;
3120 return;
3121 }
3122
3123 if (*arg_cur >= *arg_idx)
3124 {
3125 // Reset ap to ap_start and skip arg_idx - 1 types
3126 va_end(*ap);
3127 va_copy(*ap, ap_start);
3128 }
3129 else
3130 {
3131 // Skip over any we should skip
3132 arg_min = *arg_cur;
3133 }
3134
3135 for (*arg_cur = arg_min; *arg_cur < *arg_idx - 1; ++*arg_cur)
3136 {
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003137 const char *p;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003138
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003139 if (ap_types == NULL || ap_types[*arg_cur] == NULL)
3140 {
Christ van Willegen1bd2cb12023-09-08 19:17:09 +02003141 siemsg(e_aptypes_is_null_nr_str, *arg_cur, fmt);
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003142 return;
3143 }
3144
3145 p = ap_types[*arg_cur];
3146
3147 int fmt_type = format_typeof(p);
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003148
3149 // get parameter value, do initial processing
3150 switch (fmt_type)
3151 {
3152 case TYPE_PERCENT:
3153 case TYPE_UNKNOWN:
3154 break;
3155
3156 case TYPE_CHAR:
3157 va_arg(*ap, int);
3158 break;
3159
3160 case TYPE_STRING:
3161 va_arg(*ap, char *);
3162 break;
3163
3164 case TYPE_POINTER:
3165 va_arg(*ap, void *);
3166 break;
3167
3168 case TYPE_INT:
3169 va_arg(*ap, int);
3170 break;
3171
3172 case TYPE_LONGINT:
3173 va_arg(*ap, long int);
3174 break;
3175
3176 case TYPE_LONGLONGINT:
3177 va_arg(*ap, varnumber_T);
3178 break;
3179
3180 case TYPE_UNSIGNEDINT:
3181 va_arg(*ap, unsigned int);
3182 break;
3183
3184 case TYPE_UNSIGNEDLONGINT:
3185 va_arg(*ap, unsigned long int);
3186 break;
3187
3188 case TYPE_UNSIGNEDLONGLONGINT:
3189 va_arg(*ap, uvarnumber_T);
3190 break;
3191
3192 case TYPE_FLOAT:
3193 va_arg(*ap, double);
3194 break;
3195 }
3196 }
3197
3198 // Because we know that after we return from this call,
3199 // a va_arg() call is made, we can pre-emptively
3200 // increment the current argument index.
3201 ++*arg_cur;
3202 ++*arg_idx;
3203
3204 return;
3205}
3206
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003207 int
3208vim_vsnprintf_typval(
3209 char *str,
3210 size_t str_m,
3211 const char *fmt,
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003212 va_list ap_start,
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003213 typval_T *tvs)
3214{
3215 size_t str_l = 0;
3216 const char *p = fmt;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003217 int arg_cur = 0;
3218 int num_posarg = 0;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003219 int arg_idx = 1;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003220 va_list ap;
3221 const char **ap_types = NULL;
3222
3223 if (parse_fmt_types(&ap_types, &num_posarg, fmt, tvs) == FAIL)
3224 return 0;
3225
3226 va_copy(ap, ap_start);
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003227
3228 if (p == NULL)
3229 p = "";
3230 while (*p != NUL)
3231 {
3232 if (*p != '%')
3233 {
3234 char *q = strchr(p + 1, '%');
3235 size_t n = (q == NULL) ? STRLEN(p) : (size_t)(q - p);
3236
3237 // Copy up to the next '%' or NUL without any changes.
3238 if (str_l < str_m)
3239 {
3240 size_t avail = str_m - str_l;
3241
3242 mch_memmove(str + str_l, p, n > avail ? avail : n);
3243 }
3244 p += n;
3245 str_l += n;
3246 }
3247 else
3248 {
3249 size_t min_field_width = 0, precision = 0;
3250 int zero_padding = 0, precision_specified = 0, justify_left = 0;
3251 int alternate_form = 0, force_sign = 0;
3252
3253 // If both the ' ' and '+' flags appear, the ' ' flag should be
3254 // ignored.
3255 int space_for_positive = 1;
3256
3257 // allowed values: \0, h, l, L
3258 char length_modifier = '\0';
3259
3260 // temporary buffer for simple numeric->string conversion
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003261# define TMP_LEN 350 // On my system 1e308 is the biggest number possible.
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003262 // That sounds reasonable to use as the maximum
3263 // printable.
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003264 char tmp[TMP_LEN];
3265
3266 // string address in case of string argument
3267 const char *str_arg = NULL;
3268
3269 // natural field width of arg without padding and sign
3270 size_t str_arg_l;
3271
3272 // unsigned char argument value - only defined for c conversion.
3273 // N.B. standard explicitly states the char argument for the c
3274 // conversion is unsigned
3275 unsigned char uchar_arg;
3276
3277 // number of zeros to be inserted for numeric conversions as
3278 // required by the precision or minimal field width
3279 size_t number_of_zeros_to_pad = 0;
3280
3281 // index into tmp where zero padding is to be inserted
3282 size_t zero_padding_insertion_ind = 0;
3283
3284 // current conversion specifier character
3285 char fmt_spec = '\0';
3286
3287 // buffer for 's' and 'S' specs
3288 char_u *tofree = NULL;
3289
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003290 // variables for positional arg
3291 int pos_arg = -1;
3292 const char *ptype;
3293
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003294
3295 p++; // skip '%'
3296
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003297 // First check to see if we find a positional
3298 // argument specifier
3299 ptype = p;
3300
3301 while (VIM_ISDIGIT(*ptype))
3302 ++ptype;
3303
3304 if (*ptype == '$')
3305 {
3306 // Positional argument
Christ van Willegenc35fc032024-03-14 18:30:41 +01003307 const char *digstart = p;
3308 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003309
zeertzjq0dff3152024-07-29 20:28:14 +02003310 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003311 goto error;
3312
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003313 pos_arg = uj;
3314
3315 ++p;
3316 }
3317
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003318 // parse flags
3319 while (*p == '0' || *p == '-' || *p == '+' || *p == ' '
3320 || *p == '#' || *p == '\'')
3321 {
3322 switch (*p)
3323 {
3324 case '0': zero_padding = 1; break;
3325 case '-': justify_left = 1; break;
3326 case '+': force_sign = 1; space_for_positive = 0; break;
3327 case ' ': force_sign = 1;
3328 // If both the ' ' and '+' flags appear, the ' '
3329 // flag should be ignored
3330 break;
3331 case '#': alternate_form = 1; break;
3332 case '\'': break;
3333 }
3334 p++;
3335 }
3336 // If the '0' and '-' flags both appear, the '0' flag should be
3337 // ignored.
3338
3339 // parse field width
3340 if (*p == '*')
3341 {
3342 int j;
Christ van Willegenc35fc032024-03-14 18:30:41 +01003343 const char *digstart = p + 1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003344
3345 p++;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003346
3347 if (VIM_ISDIGIT((int)(*p)))
3348 {
3349 // Positional argument field width
Christ van Willegenc35fc032024-03-14 18:30:41 +01003350 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003351
zeertzjq0dff3152024-07-29 20:28:14 +02003352 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003353 goto error;
3354
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003355 arg_idx = uj;
3356
3357 ++p;
3358 }
3359
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003360 j =
3361# if defined(FEAT_EVAL)
3362 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3363# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003364 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3365 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003366 va_arg(ap, int));
3367
Christ van Willegenc35fc032024-03-14 18:30:41 +01003368 if (j > MAX_ALLOWED_STRING_WIDTH)
3369 {
zeertzjq0dff3152024-07-29 20:28:14 +02003370 if (tvs != NULL)
3371 {
3372 format_overflow_error(digstart);
3373 goto error;
3374 }
3375 else
3376 j = MAX_ALLOWED_STRING_WIDTH;
Christ van Willegenc35fc032024-03-14 18:30:41 +01003377 }
3378
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003379 if (j >= 0)
3380 min_field_width = j;
3381 else
3382 {
3383 min_field_width = -j;
3384 justify_left = 1;
3385 }
3386 }
3387 else if (VIM_ISDIGIT((int)(*p)))
3388 {
3389 // size_t could be wider than unsigned int; make sure we treat
3390 // argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01003391 const char *digstart = p;
3392 unsigned int uj;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003393
zeertzjq0dff3152024-07-29 20:28:14 +02003394 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003395 goto error;
3396
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003397 min_field_width = uj;
3398 }
3399
3400 // parse precision
3401 if (*p == '.')
3402 {
3403 p++;
3404 precision_specified = 1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003405
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003406 if (VIM_ISDIGIT((int)(*p)))
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003407 {
3408 // size_t could be wider than unsigned int; make sure we
3409 // treat argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01003410 const char *digstart = p;
3411 unsigned int uj;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003412
zeertzjq0dff3152024-07-29 20:28:14 +02003413 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003414 goto error;
3415
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003416 precision = uj;
3417 }
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003418 else if (*p == '*')
3419 {
3420 int j;
Christ van Willegenc35fc032024-03-14 18:30:41 +01003421 const char *digstart = p;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003422
3423 p++;
3424
3425 if (VIM_ISDIGIT((int)(*p)))
3426 {
3427 // positional argument
Christ van Willegenc35fc032024-03-14 18:30:41 +01003428 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003429
zeertzjq0dff3152024-07-29 20:28:14 +02003430 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003431 goto error;
3432
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003433 arg_idx = uj;
3434
3435 ++p;
3436 }
3437
3438 j =
3439# if defined(FEAT_EVAL)
3440 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3441# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003442 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3443 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003444 va_arg(ap, int));
3445
Christ van Willegenc35fc032024-03-14 18:30:41 +01003446 if (j > MAX_ALLOWED_STRING_WIDTH)
3447 {
zeertzjq0dff3152024-07-29 20:28:14 +02003448 if (tvs != NULL)
3449 {
3450 format_overflow_error(digstart);
3451 goto error;
3452 }
3453 else
3454 j = MAX_ALLOWED_STRING_WIDTH;
Christ van Willegenc35fc032024-03-14 18:30:41 +01003455 }
3456
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003457 if (j >= 0)
3458 precision = j;
3459 else
3460 {
3461 precision_specified = 0;
3462 precision = 0;
3463 }
3464 }
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003465 }
3466
3467 // parse 'h', 'l' and 'll' length modifiers
3468 if (*p == 'h' || *p == 'l')
3469 {
3470 length_modifier = *p;
3471 p++;
3472 if (length_modifier == 'l' && *p == 'l')
3473 {
3474 // double l = __int64 / varnumber_T
3475 length_modifier = 'L';
3476 p++;
3477 }
3478 }
3479 fmt_spec = *p;
3480
3481 // common synonyms:
3482 switch (fmt_spec)
3483 {
3484 case 'i': fmt_spec = 'd'; break;
3485 case 'D': fmt_spec = 'd'; length_modifier = 'l'; break;
3486 case 'U': fmt_spec = 'u'; length_modifier = 'l'; break;
3487 case 'O': fmt_spec = 'o'; length_modifier = 'l'; break;
3488 default: break;
3489 }
3490
3491# if defined(FEAT_EVAL)
3492 switch (fmt_spec)
3493 {
3494 case 'd': case 'u': case 'o': case 'x': case 'X':
3495 if (tvs != NULL && length_modifier == '\0')
3496 length_modifier = 'L';
3497 }
3498# endif
3499
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003500 if (pos_arg != -1)
3501 arg_idx = pos_arg;
3502
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003503 // get parameter value, do initial processing
3504 switch (fmt_spec)
3505 {
3506 // '%' and 'c' behave similar to 's' regarding flags and field
3507 // widths
3508 case '%':
3509 case 'c':
3510 case 's':
3511 case 'S':
3512 str_arg_l = 1;
3513 switch (fmt_spec)
3514 {
3515 case '%':
3516 str_arg = p;
3517 break;
3518
3519 case 'c':
3520 {
3521 int j;
3522
3523 j =
3524# if defined(FEAT_EVAL)
3525 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3526# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003527 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3528 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003529 va_arg(ap, int));
3530
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003531 // standard demands unsigned char
3532 uchar_arg = (unsigned char)j;
3533 str_arg = (char *)&uchar_arg;
3534 break;
3535 }
3536
3537 case 's':
3538 case 'S':
3539 str_arg =
3540# if defined(FEAT_EVAL)
3541 tvs != NULL ? tv_str(tvs, &arg_idx, &tofree) :
3542# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003543 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3544 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003545 va_arg(ap, char *));
3546
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003547 if (str_arg == NULL)
3548 {
3549 str_arg = "[NULL]";
3550 str_arg_l = 6;
3551 }
3552 // make sure not to address string beyond the specified
3553 // precision !!!
3554 else if (!precision_specified)
3555 str_arg_l = strlen(str_arg);
3556 // truncate string if necessary as requested by precision
3557 else if (precision == 0)
3558 str_arg_l = 0;
3559 else
3560 {
3561 // Don't put the #if inside memchr(), it can be a
3562 // macro.
3563 // memchr on HP does not like n > 2^31 !!!
3564 char *q = memchr(str_arg, '\0',
3565 precision <= (size_t)0x7fffffffL ? precision
3566 : (size_t)0x7fffffffL);
presukud85fccd2021-11-20 19:38:31 +00003567
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003568 str_arg_l = (q == NULL) ? precision
3569 : (size_t)(q - str_arg);
3570 }
3571 if (fmt_spec == 'S')
3572 {
presuku1f2453f2021-11-24 15:32:57 +00003573 char_u *p1;
3574 size_t i;
3575 int cell;
presukud85fccd2021-11-20 19:38:31 +00003576
presuku1f2453f2021-11-24 15:32:57 +00003577 for (i = 0, p1 = (char_u *)str_arg; *p1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003578 p1 += mb_ptr2len(p1))
presuku1f2453f2021-11-24 15:32:57 +00003579 {
3580 cell = mb_ptr2cells(p1);
3581 if (precision_specified && i + cell > precision)
3582 break;
3583 i += cell;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003584 }
presuku1f2453f2021-11-24 15:32:57 +00003585
3586 str_arg_l = p1 - (char_u *)str_arg;
presukud85fccd2021-11-20 19:38:31 +00003587 if (min_field_width != 0)
presuku1f2453f2021-11-24 15:32:57 +00003588 min_field_width += str_arg_l - i;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003589 }
3590 break;
3591
3592 default:
3593 break;
3594 }
3595 break;
3596
3597 case 'd': case 'u':
3598 case 'b': case 'B':
3599 case 'o':
3600 case 'x': case 'X':
3601 case 'p':
3602 {
3603 // NOTE: the u, b, o, x, X and p conversion specifiers
3604 // imply the value is unsigned; d implies a signed
3605 // value
3606
3607 // 0 if numeric argument is zero (or if pointer is
3608 // NULL for 'p'), +1 if greater than zero (or nonzero
3609 // for unsigned arguments), -1 if negative (unsigned
3610 // argument is never negative)
3611 int arg_sign = 0;
3612
3613 // only set for length modifier h, or for no length
3614 // modifiers
3615 int int_arg = 0;
3616 unsigned int uint_arg = 0;
3617
3618 // only set for length modifier l
3619 long int long_arg = 0;
3620 unsigned long int ulong_arg = 0;
3621
3622 // only set for length modifier ll
3623 varnumber_T llong_arg = 0;
3624 uvarnumber_T ullong_arg = 0;
3625
3626 // only set for b conversion
3627 uvarnumber_T bin_arg = 0;
3628
3629 // pointer argument value -only defined for p
3630 // conversion
3631 void *ptr_arg = NULL;
3632
3633 if (fmt_spec == 'p')
3634 {
3635 length_modifier = '\0';
3636 ptr_arg =
3637# if defined(FEAT_EVAL)
3638 tvs != NULL ? (void *)tv_str(tvs, &arg_idx,
3639 NULL) :
3640# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003641 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3642 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003643 va_arg(ap, void *));
3644
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003645 if (ptr_arg != NULL)
3646 arg_sign = 1;
3647 }
3648 else if (fmt_spec == 'b' || fmt_spec == 'B')
3649 {
3650 bin_arg =
3651# if defined(FEAT_EVAL)
3652 tvs != NULL ?
3653 (uvarnumber_T)tv_nr(tvs, &arg_idx) :
3654# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003655 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3656 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003657 va_arg(ap, uvarnumber_T));
3658
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003659 if (bin_arg != 0)
3660 arg_sign = 1;
3661 }
3662 else if (fmt_spec == 'd')
3663 {
3664 // signed
3665 switch (length_modifier)
3666 {
3667 case '\0':
3668 case 'h':
3669 // char and short arguments are passed as int.
3670 int_arg =
3671# if defined(FEAT_EVAL)
3672 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3673# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003674 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3675 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003676 va_arg(ap, int));
3677
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003678 if (int_arg > 0)
3679 arg_sign = 1;
3680 else if (int_arg < 0)
3681 arg_sign = -1;
3682 break;
3683 case 'l':
3684 long_arg =
3685# if defined(FEAT_EVAL)
3686 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3687# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003688 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3689 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003690 va_arg(ap, long int));
3691
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003692 if (long_arg > 0)
3693 arg_sign = 1;
3694 else if (long_arg < 0)
3695 arg_sign = -1;
3696 break;
3697 case 'L':
3698 llong_arg =
3699# if defined(FEAT_EVAL)
3700 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3701# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003702 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3703 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003704 va_arg(ap, varnumber_T));
3705
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003706 if (llong_arg > 0)
3707 arg_sign = 1;
3708 else if (llong_arg < 0)
3709 arg_sign = -1;
3710 break;
3711 }
3712 }
3713 else
3714 {
3715 // unsigned
3716 switch (length_modifier)
3717 {
3718 case '\0':
3719 case 'h':
3720 uint_arg =
3721# if defined(FEAT_EVAL)
3722 tvs != NULL ? (unsigned)
3723 tv_nr(tvs, &arg_idx) :
3724# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003725 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3726 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003727 va_arg(ap, unsigned int));
3728
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003729 if (uint_arg != 0)
3730 arg_sign = 1;
3731 break;
3732 case 'l':
3733 ulong_arg =
3734# if defined(FEAT_EVAL)
3735 tvs != NULL ? (unsigned long)
3736 tv_nr(tvs, &arg_idx) :
3737# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003738 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3739 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003740 va_arg(ap, unsigned long int));
3741
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003742 if (ulong_arg != 0)
3743 arg_sign = 1;
3744 break;
3745 case 'L':
3746 ullong_arg =
3747# if defined(FEAT_EVAL)
3748 tvs != NULL ? (uvarnumber_T)
3749 tv_nr(tvs, &arg_idx) :
3750# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003751 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3752 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003753 va_arg(ap, uvarnumber_T));
3754
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003755 if (ullong_arg != 0)
3756 arg_sign = 1;
3757 break;
3758 }
3759 }
3760
3761 str_arg = tmp;
3762 str_arg_l = 0;
3763
3764 // NOTE:
3765 // For d, i, u, o, x, and X conversions, if precision is
3766 // specified, the '0' flag should be ignored. This is so
3767 // with Solaris 2.6, Digital UNIX 4.0, HPUX 10, Linux,
3768 // FreeBSD, NetBSD; but not with Perl.
3769 if (precision_specified)
3770 zero_padding = 0;
3771 if (fmt_spec == 'd')
3772 {
3773 if (force_sign && arg_sign >= 0)
3774 tmp[str_arg_l++] = space_for_positive ? ' ' : '+';
3775 // leave negative numbers for sprintf to handle, to
3776 // avoid handling tricky cases like (short int)-32768
3777 }
3778 else if (alternate_form)
3779 {
3780 if (arg_sign != 0
3781 && (fmt_spec == 'b' || fmt_spec == 'B'
3782 || fmt_spec == 'x' || fmt_spec == 'X') )
3783 {
3784 tmp[str_arg_l++] = '0';
3785 tmp[str_arg_l++] = fmt_spec;
3786 }
3787 // alternate form should have no effect for p
3788 // conversion, but ...
3789 }
3790
3791 zero_padding_insertion_ind = str_arg_l;
3792 if (!precision_specified)
3793 precision = 1; // default precision is 1
3794 if (precision == 0 && arg_sign == 0)
3795 {
3796 // When zero value is formatted with an explicit
3797 // precision 0, the resulting formatted string is
3798 // empty (d, i, u, b, B, o, x, X, p).
3799 }
3800 else
3801 {
3802 char f[6];
3803 int f_l = 0;
3804
3805 // construct a simple format string for sprintf
3806 f[f_l++] = '%';
3807 if (!length_modifier)
3808 ;
3809 else if (length_modifier == 'L')
3810 {
3811# ifdef MSWIN
3812 f[f_l++] = 'I';
3813 f[f_l++] = '6';
3814 f[f_l++] = '4';
3815# else
3816 f[f_l++] = 'l';
3817 f[f_l++] = 'l';
3818# endif
3819 }
3820 else
3821 f[f_l++] = length_modifier;
3822 f[f_l++] = fmt_spec;
3823 f[f_l++] = '\0';
3824
3825 if (fmt_spec == 'p')
3826 str_arg_l += sprintf(tmp + str_arg_l, f, ptr_arg);
3827 else if (fmt_spec == 'b' || fmt_spec == 'B')
3828 {
3829 char b[8 * sizeof(uvarnumber_T)];
3830 size_t b_l = 0;
3831 uvarnumber_T bn = bin_arg;
3832
3833 do
3834 {
3835 b[sizeof(b) - ++b_l] = '0' + (bn & 0x1);
3836 bn >>= 1;
3837 }
3838 while (bn != 0);
3839
3840 memcpy(tmp + str_arg_l, b + sizeof(b) - b_l, b_l);
3841 str_arg_l += b_l;
3842 }
3843 else if (fmt_spec == 'd')
3844 {
3845 // signed
3846 switch (length_modifier)
3847 {
3848 case '\0': str_arg_l += sprintf(
3849 tmp + str_arg_l, f,
3850 int_arg);
3851 break;
3852 case 'h': str_arg_l += sprintf(
3853 tmp + str_arg_l, f,
3854 (short)int_arg);
3855 break;
3856 case 'l': str_arg_l += sprintf(
3857 tmp + str_arg_l, f, long_arg);
3858 break;
3859 case 'L': str_arg_l += sprintf(
3860 tmp + str_arg_l, f, llong_arg);
3861 break;
3862 }
3863 }
3864 else
3865 {
3866 // unsigned
3867 switch (length_modifier)
3868 {
3869 case '\0': str_arg_l += sprintf(
3870 tmp + str_arg_l, f,
3871 uint_arg);
3872 break;
3873 case 'h': str_arg_l += sprintf(
3874 tmp + str_arg_l, f,
3875 (unsigned short)uint_arg);
3876 break;
3877 case 'l': str_arg_l += sprintf(
3878 tmp + str_arg_l, f, ulong_arg);
3879 break;
3880 case 'L': str_arg_l += sprintf(
3881 tmp + str_arg_l, f, ullong_arg);
3882 break;
3883 }
3884 }
3885
3886 // include the optional minus sign and possible
3887 // "0x" in the region before the zero padding
3888 // insertion point
3889 if (zero_padding_insertion_ind < str_arg_l
3890 && tmp[zero_padding_insertion_ind] == '-')
3891 zero_padding_insertion_ind++;
3892 if (zero_padding_insertion_ind + 1 < str_arg_l
3893 && tmp[zero_padding_insertion_ind] == '0'
3894 && (tmp[zero_padding_insertion_ind + 1] == 'x'
3895 || tmp[zero_padding_insertion_ind + 1] == 'X'))
3896 zero_padding_insertion_ind += 2;
3897 }
3898
3899 {
3900 size_t num_of_digits = str_arg_l
3901 - zero_padding_insertion_ind;
3902
3903 if (alternate_form && fmt_spec == 'o'
3904 // unless zero is already the first
3905 // character
3906 && !(zero_padding_insertion_ind < str_arg_l
3907 && tmp[zero_padding_insertion_ind] == '0'))
3908 {
3909 // assure leading zero for alternate-form
3910 // octal numbers
3911 if (!precision_specified
3912 || precision < num_of_digits + 1)
3913 {
3914 // precision is increased to force the
3915 // first character to be zero, except if a
3916 // zero value is formatted with an
3917 // explicit precision of zero
3918 precision = num_of_digits + 1;
3919 }
3920 }
3921 // zero padding to specified precision?
3922 if (num_of_digits < precision)
3923 number_of_zeros_to_pad = precision - num_of_digits;
3924 }
3925 // zero padding to specified minimal field width?
3926 if (!justify_left && zero_padding)
3927 {
3928 int n = (int)(min_field_width - (str_arg_l
3929 + number_of_zeros_to_pad));
3930 if (n > 0)
3931 number_of_zeros_to_pad += n;
3932 }
3933 break;
3934 }
3935
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003936 case 'f':
3937 case 'F':
3938 case 'e':
3939 case 'E':
3940 case 'g':
3941 case 'G':
3942 {
3943 // Floating point.
3944 double f;
3945 double abs_f;
3946 char format[40];
3947 int l;
3948 int remove_trailing_zeroes = FALSE;
3949
3950 f =
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003951# if defined(FEAT_EVAL)
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003952 tvs != NULL ? tv_float(tvs, &arg_idx) :
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003953# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003954 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3955 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003956 va_arg(ap, double));
3957
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003958 abs_f = f < 0 ? -f : f;
3959
3960 if (fmt_spec == 'g' || fmt_spec == 'G')
3961 {
3962 // Would be nice to use %g directly, but it prints
3963 // "1.0" as "1", we don't want that.
3964 if ((abs_f >= 0.001 && abs_f < 10000000.0)
3965 || abs_f == 0.0)
3966 fmt_spec = ASCII_ISUPPER(fmt_spec) ? 'F' : 'f';
3967 else
3968 fmt_spec = fmt_spec == 'g' ? 'e' : 'E';
3969 remove_trailing_zeroes = TRUE;
3970 }
3971
3972 if ((fmt_spec == 'f' || fmt_spec == 'F') &&
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003973# ifdef VAX
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003974 abs_f > 1.0e38
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003975# else
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003976 abs_f > 1.0e307
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003977# endif
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003978 )
3979 {
3980 // Avoid a buffer overflow
3981 STRCPY(tmp, infinity_str(f > 0.0, fmt_spec,
3982 force_sign, space_for_positive));
3983 str_arg_l = STRLEN(tmp);
3984 zero_padding = 0;
3985 }
3986 else
3987 {
3988 if (isnan(f))
3989 {
3990 // Not a number: nan or NAN
3991 STRCPY(tmp, ASCII_ISUPPER(fmt_spec) ? "NAN"
3992 : "nan");
3993 str_arg_l = 3;
3994 zero_padding = 0;
3995 }
3996 else if (isinf(f))
3997 {
3998 STRCPY(tmp, infinity_str(f > 0.0, fmt_spec,
3999 force_sign, space_for_positive));
4000 str_arg_l = STRLEN(tmp);
4001 zero_padding = 0;
4002 }
4003 else
4004 {
4005 // Regular float number
4006 format[0] = '%';
4007 l = 1;
4008 if (force_sign)
4009 format[l++] = space_for_positive ? ' ' : '+';
4010 if (precision_specified)
4011 {
4012 size_t max_prec = TMP_LEN - 10;
4013
4014 // Make sure we don't get more digits than we
4015 // have room for.
4016 if ((fmt_spec == 'f' || fmt_spec == 'F')
4017 && abs_f > 1.0)
4018 max_prec -= (size_t)log10(abs_f);
4019 if (precision > max_prec)
4020 precision = max_prec;
4021 l += sprintf(format + l, ".%d", (int)precision);
4022 }
4023 format[l] = fmt_spec == 'F' ? 'f' : fmt_spec;
4024 format[l + 1] = NUL;
4025
4026 str_arg_l = sprintf(tmp, format, f);
4027 }
4028
4029 if (remove_trailing_zeroes)
4030 {
4031 int i;
4032 char *tp;
4033
4034 // Using %g or %G: remove superfluous zeroes.
4035 if (fmt_spec == 'f' || fmt_spec == 'F')
4036 tp = tmp + str_arg_l - 1;
4037 else
4038 {
4039 tp = (char *)vim_strchr((char_u *)tmp,
4040 fmt_spec == 'e' ? 'e' : 'E');
4041 if (tp != NULL)
4042 {
4043 // Remove superfluous '+' and leading
4044 // zeroes from the exponent.
4045 if (tp[1] == '+')
4046 {
4047 // Change "1.0e+07" to "1.0e07"
4048 STRMOVE(tp + 1, tp + 2);
4049 --str_arg_l;
4050 }
4051 i = (tp[1] == '-') ? 2 : 1;
4052 while (tp[i] == '0')
4053 {
4054 // Change "1.0e07" to "1.0e7"
4055 STRMOVE(tp + i, tp + i + 1);
4056 --str_arg_l;
4057 }
4058 --tp;
4059 }
4060 }
4061
4062 if (tp != NULL && !precision_specified)
4063 // Remove trailing zeroes, but keep the one
4064 // just after a dot.
4065 while (tp > tmp + 2 && *tp == '0'
4066 && tp[-1] != '.')
4067 {
4068 STRMOVE(tp, tp + 1);
4069 --tp;
4070 --str_arg_l;
4071 }
4072 }
4073 else
4074 {
4075 char *tp;
4076
4077 // Be consistent: some printf("%e") use 1.0e+12
4078 // and some 1.0e+012. Remove one zero in the last
4079 // case.
4080 tp = (char *)vim_strchr((char_u *)tmp,
4081 fmt_spec == 'e' ? 'e' : 'E');
4082 if (tp != NULL && (tp[1] == '+' || tp[1] == '-')
4083 && tp[2] == '0'
4084 && vim_isdigit(tp[3])
4085 && vim_isdigit(tp[4]))
4086 {
4087 STRMOVE(tp + 2, tp + 3);
4088 --str_arg_l;
4089 }
4090 }
4091 }
4092 if (zero_padding && min_field_width > str_arg_l
4093 && (tmp[0] == '-' || force_sign))
4094 {
4095 // padding 0's should be inserted after the sign
4096 number_of_zeros_to_pad = min_field_width - str_arg_l;
4097 zero_padding_insertion_ind = 1;
4098 }
4099 str_arg = tmp;
4100 break;
4101 }
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02004102
4103 default:
4104 // unrecognized conversion specifier, keep format string
4105 // as-is
4106 zero_padding = 0; // turn zero padding off for non-numeric
4107 // conversion
4108 justify_left = 1;
4109 min_field_width = 0; // reset flags
4110
4111 // discard the unrecognized conversion, just keep *
4112 // the unrecognized conversion character
4113 str_arg = p;
4114 str_arg_l = 0;
4115 if (*p != NUL)
4116 str_arg_l++; // include invalid conversion specifier
4117 // unchanged if not at end-of-string
4118 break;
4119 }
4120
4121 if (*p != NUL)
4122 p++; // step over the just processed conversion specifier
4123
4124 // insert padding to the left as requested by min_field_width;
4125 // this does not include the zero padding in case of numerical
4126 // conversions
4127 if (!justify_left)
4128 {
4129 // left padding with blank or zero
4130 int pn = (int)(min_field_width - (str_arg_l + number_of_zeros_to_pad));
4131
4132 if (pn > 0)
4133 {
4134 if (str_l < str_m)
4135 {
4136 size_t avail = str_m - str_l;
4137
4138 vim_memset(str + str_l, zero_padding ? '0' : ' ',
4139 (size_t)pn > avail ? avail
4140 : (size_t)pn);
4141 }
4142 str_l += pn;
4143 }
4144 }
4145
4146 // zero padding as requested by the precision or by the minimal
4147 // field width for numeric conversions required?
4148 if (number_of_zeros_to_pad == 0)
4149 {
4150 // will not copy first part of numeric right now, *
4151 // force it to be copied later in its entirety
4152 zero_padding_insertion_ind = 0;
4153 }
4154 else
4155 {
4156 // insert first part of numerics (sign or '0x') before zero
4157 // padding
4158 int zn = (int)zero_padding_insertion_ind;
4159
4160 if (zn > 0)
4161 {
4162 if (str_l < str_m)
4163 {
4164 size_t avail = str_m - str_l;
4165
4166 mch_memmove(str + str_l, str_arg,
4167 (size_t)zn > avail ? avail
4168 : (size_t)zn);
4169 }
4170 str_l += zn;
4171 }
4172
4173 // insert zero padding as requested by the precision or min
4174 // field width
4175 zn = (int)number_of_zeros_to_pad;
4176 if (zn > 0)
4177 {
4178 if (str_l < str_m)
4179 {
4180 size_t avail = str_m - str_l;
4181
4182 vim_memset(str + str_l, '0',
4183 (size_t)zn > avail ? avail
4184 : (size_t)zn);
4185 }
4186 str_l += zn;
4187 }
4188 }
4189
4190 // insert formatted string
4191 // (or as-is conversion specifier for unknown conversions)
4192 {
4193 int sn = (int)(str_arg_l - zero_padding_insertion_ind);
4194
4195 if (sn > 0)
4196 {
4197 if (str_l < str_m)
4198 {
4199 size_t avail = str_m - str_l;
4200
4201 mch_memmove(str + str_l,
4202 str_arg + zero_padding_insertion_ind,
4203 (size_t)sn > avail ? avail : (size_t)sn);
4204 }
4205 str_l += sn;
4206 }
4207 }
4208
4209 // insert right padding
4210 if (justify_left)
4211 {
4212 // right blank padding to the field width
4213 int pn = (int)(min_field_width
4214 - (str_arg_l + number_of_zeros_to_pad));
4215
4216 if (pn > 0)
4217 {
4218 if (str_l < str_m)
4219 {
4220 size_t avail = str_m - str_l;
4221
4222 vim_memset(str + str_l, ' ',
4223 (size_t)pn > avail ? avail
4224 : (size_t)pn);
4225 }
4226 str_l += pn;
4227 }
4228 }
4229 vim_free(tofree);
4230 }
4231 }
4232
4233 if (str_m > 0)
4234 {
4235 // make sure the string is nul-terminated even at the expense of
4236 // overwriting the last character (shouldn't happen, but just in case)
4237 //
4238 str[str_l <= str_m - 1 ? str_l : str_m - 1] = '\0';
4239 }
4240
Christ van Willegen0c6181f2023-08-13 18:03:14 +02004241 if (tvs != NULL && tvs[num_posarg != 0 ? num_posarg : arg_idx - 1].v_type != VAR_UNKNOWN)
Bram Moolenaar677658a2022-01-05 16:09:06 +00004242 emsg(_(e_too_many_arguments_to_printf));
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02004243
Christ van Willegenc35fc032024-03-14 18:30:41 +01004244error:
K.Takata4c215ec2023-08-26 18:05:08 +02004245 vim_free((char*)ap_types);
Christ van Willegen0c6181f2023-08-13 18:03:14 +02004246 va_end(ap);
4247
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02004248 // Return the number of characters formatted (excluding trailing nul
4249 // character), that is, the number of characters that would have been
4250 // written to the buffer if it were large enough.
4251 return (int)str_l;
4252}
4253
4254#endif // PROTO