blob: 5de3162bb0dd39d3a64ce050cdeeb3dc5d6afd01 [file] [log] [blame]
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001/* vi:set ts=8 sts=4 sw=4 noet:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * strings.c: string manipulation functions
12 */
13
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +020014#define USING_FLOAT_STUFF
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020015#include "vim.h"
16
17/*
18 * Copy "string" into newly allocated memory.
19 */
20 char_u *
21vim_strsave(char_u *string)
22{
23 char_u *p;
24 size_t len;
25
26 len = STRLEN(string) + 1;
27 p = alloc(len);
28 if (p != NULL)
29 mch_memmove(p, string, len);
30 return p;
31}
32
33/*
34 * Copy up to "len" bytes of "string" into newly allocated memory and
35 * terminate with a NUL.
36 * The allocated memory always has size "len + 1", also when "string" is
37 * shorter.
38 */
39 char_u *
40vim_strnsave(char_u *string, size_t len)
41{
42 char_u *p;
43
44 p = alloc(len + 1);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +000045 if (p == NULL)
46 return NULL;
47
48 STRNCPY(p, string, len);
49 p[len] = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020050 return p;
51}
52
53/*
54 * Same as vim_strsave(), but any characters found in esc_chars are preceded
55 * by a backslash.
56 */
57 char_u *
58vim_strsave_escaped(char_u *string, char_u *esc_chars)
59{
60 return vim_strsave_escaped_ext(string, esc_chars, '\\', FALSE);
61}
62
63/*
64 * Same as vim_strsave_escaped(), but when "bsl" is TRUE also escape
65 * characters where rem_backslash() would remove the backslash.
66 * Escape the characters with "cc".
67 */
68 char_u *
69vim_strsave_escaped_ext(
70 char_u *string,
71 char_u *esc_chars,
72 int cc,
73 int bsl)
74{
75 char_u *p;
76 char_u *p2;
77 char_u *escaped_string;
78 unsigned length;
79 int l;
80
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +020081 // First count the number of backslashes required.
82 // Then allocate the memory and insert them.
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020083 length = 1; // count the trailing NUL
84 for (p = string; *p; p++)
85 {
86 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
87 {
88 length += l; // count a multibyte char
89 p += l - 1;
90 continue;
91 }
92 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
93 ++length; // count a backslash
94 ++length; // count an ordinary char
95 }
96 escaped_string = alloc(length);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +000097 if (escaped_string == NULL)
98 return NULL;
99 p2 = escaped_string;
100 for (p = string; *p; p++)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200101 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000102 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200103 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000104 mch_memmove(p2, p, (size_t)l);
105 p2 += l;
106 p += l - 1; // skip multibyte char
107 continue;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200108 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000109 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
110 *p2++ = cc;
111 *p2++ = *p;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200112 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000113 *p2 = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200114 return escaped_string;
115}
116
117/*
118 * Return TRUE when 'shell' has "csh" in the tail.
119 */
120 int
121csh_like_shell(void)
122{
123 return (strstr((char *)gettail(p_sh), "csh") != NULL);
124}
125
126/*
Jason Cox6e823512021-08-29 12:36:49 +0200127 * Return TRUE when 'shell' has "fish" in the tail.
128 */
Dominique Pellede05ae72021-08-30 19:57:34 +0200129 static int
Jason Cox6e823512021-08-29 12:36:49 +0200130fish_like_shell(void)
131{
132 return (strstr((char *)gettail(p_sh), "fish") != NULL);
133}
134
135/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200136 * Escape "string" for use as a shell argument with system().
137 * This uses single quotes, except when we know we need to use double quotes
138 * (MS-DOS and MS-Windows not using PowerShell and without 'shellslash' set).
139 * PowerShell also uses a novel escaping for enclosed single quotes - double
140 * them up.
141 * Escape a newline, depending on the 'shell' option.
142 * When "do_special" is TRUE also replace "!", "%", "#" and things starting
143 * with "<" like "<cfile>".
144 * When "do_newline" is FALSE do not escape newline unless it is csh shell.
145 * Returns the result in allocated memory, NULL if we have run out.
146 */
147 char_u *
148vim_strsave_shellescape(char_u *string, int do_special, int do_newline)
149{
150 unsigned length;
151 char_u *p;
152 char_u *d;
153 char_u *escaped_string;
Mike Williams51024bb2024-05-30 07:46:30 +0200154 size_t l;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200155 int csh_like;
Jason Cox6e823512021-08-29 12:36:49 +0200156 int fish_like;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200157 char_u *shname;
158 int powershell;
159# ifdef MSWIN
160 int double_quotes;
161# endif
162
163 // Only csh and similar shells expand '!' within single quotes. For sh and
164 // the like we must not put a backslash before it, it will be taken
165 // literally. If do_special is set the '!' will be escaped twice.
166 // Csh also needs to have "\n" escaped twice when do_special is set.
167 csh_like = csh_like_shell();
168
Jason Cox6e823512021-08-29 12:36:49 +0200169 // Fish shell uses '\' as an escape character within single quotes, so '\'
170 // itself must be escaped to get a literal '\'.
171 fish_like = fish_like_shell();
172
Dominique Pelleaf4a61a2021-12-27 17:21:41 +0000173 // PowerShell uses its own version for quoting single quotes
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200174 shname = gettail(p_sh);
175 powershell = strstr((char *)shname, "pwsh") != NULL;
176# ifdef MSWIN
177 powershell = powershell || strstr((char *)shname, "powershell") != NULL;
178 // PowerShell only accepts single quotes so override shellslash.
179 double_quotes = !powershell && !p_ssl;
180# endif
181
182 // First count the number of extra bytes required.
183 length = (unsigned)STRLEN(string) + 3; // two quotes and a trailing NUL
184 for (p = string; *p != NUL; MB_PTR_ADV(p))
185 {
186# ifdef MSWIN
187 if (double_quotes)
188 {
189 if (*p == '"')
190 ++length; // " -> ""
191 }
192 else
193# endif
194 if (*p == '\'')
195 {
196 if (powershell)
197 length +=2; // ' => ''
198 else
199 length += 3; // ' => '\''
200 }
201 if ((*p == '\n' && (csh_like || do_newline))
202 || (*p == '!' && (csh_like || do_special)))
203 {
204 ++length; // insert backslash
205 if (csh_like && do_special)
206 ++length; // insert backslash
207 }
208 if (do_special && find_cmdline_var(p, &l) >= 0)
209 {
210 ++length; // insert backslash
211 p += l - 1;
212 }
Jason Cox6e823512021-08-29 12:36:49 +0200213 if (*p == '\\' && fish_like)
214 ++length; // insert backslash
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200215 }
216
217 // Allocate memory for the result and fill it.
218 escaped_string = alloc(length);
219 if (escaped_string != NULL)
220 {
221 d = escaped_string;
222
223 // add opening quote
224# ifdef MSWIN
225 if (double_quotes)
226 *d++ = '"';
227 else
228# endif
229 *d++ = '\'';
230
231 for (p = string; *p != NUL; )
232 {
233# ifdef MSWIN
234 if (double_quotes)
235 {
236 if (*p == '"')
237 {
238 *d++ = '"';
239 *d++ = '"';
240 ++p;
241 continue;
242 }
243 }
244 else
245# endif
246 if (*p == '\'')
247 {
248 if (powershell)
249 {
250 *d++ = '\'';
251 *d++ = '\'';
252 }
253 else
254 {
255 *d++ = '\'';
256 *d++ = '\\';
257 *d++ = '\'';
258 *d++ = '\'';
259 }
260 ++p;
261 continue;
262 }
263 if ((*p == '\n' && (csh_like || do_newline))
264 || (*p == '!' && (csh_like || do_special)))
265 {
266 *d++ = '\\';
267 if (csh_like && do_special)
268 *d++ = '\\';
269 *d++ = *p++;
270 continue;
271 }
zeertzjq88c8c542024-05-30 19:27:25 +0200272 if (do_special && find_cmdline_var(p, &l) >= 0)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200273 {
274 *d++ = '\\'; // insert backslash
zeertzjq88c8c542024-05-30 19:27:25 +0200275 memcpy(d, p, l); // copy the var
276 d += l;
277 p += l;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200278 continue;
279 }
Jason Cox6e823512021-08-29 12:36:49 +0200280 if (*p == '\\' && fish_like)
281 {
282 *d++ = '\\';
283 *d++ = *p++;
Bram Moolenaar66315972021-09-01 14:31:51 +0200284 continue;
Jason Cox6e823512021-08-29 12:36:49 +0200285 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200286
287 MB_COPY_CHAR(p, d);
288 }
289
290 // add terminating quote and finish with a NUL
291# ifdef MSWIN
292 if (double_quotes)
293 *d++ = '"';
294 else
295# endif
296 *d++ = '\'';
297 *d = NUL;
298 }
299
300 return escaped_string;
301}
302
303/*
304 * Like vim_strsave(), but make all characters uppercase.
305 * This uses ASCII lower-to-upper case translation, language independent.
306 */
307 char_u *
308vim_strsave_up(char_u *string)
309{
310 char_u *p1;
311
312 p1 = vim_strsave(string);
313 vim_strup(p1);
314 return p1;
315}
316
317/*
318 * Like vim_strnsave(), but make all characters uppercase.
319 * This uses ASCII lower-to-upper case translation, language independent.
320 */
321 char_u *
322vim_strnsave_up(char_u *string, size_t len)
323{
324 char_u *p1;
325
326 p1 = vim_strnsave(string, len);
327 vim_strup(p1);
328 return p1;
329}
330
331/*
332 * ASCII lower-to-upper case translation, language independent.
333 */
334 void
335vim_strup(
336 char_u *p)
337{
338 char_u *p2;
339 int c;
340
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000341 if (p == NULL)
342 return;
343
344 p2 = p;
345 while ((c = *p2) != NUL)
346 *p2++ = (c < 'a' || c > 'z') ? c : (c - 0x20);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200347}
348
349#if defined(FEAT_EVAL) || defined(FEAT_SPELL) || defined(PROTO)
350/*
351 * Make string "s" all upper-case and return it in allocated memory.
352 * Handles multi-byte characters as well as possible.
353 * Returns NULL when out of memory.
354 */
355 static char_u *
356strup_save(char_u *orig)
357{
358 char_u *p;
359 char_u *res;
360
361 res = p = vim_strsave(orig);
362
363 if (res != NULL)
364 while (*p != NUL)
365 {
366 int l;
367
368 if (enc_utf8)
369 {
370 int c, uc;
371 int newl;
372 char_u *s;
373
374 c = utf_ptr2char(p);
375 l = utf_ptr2len(p);
376 if (c == 0)
377 {
378 // overlong sequence, use only the first byte
379 c = *p;
380 l = 1;
381 }
382 uc = utf_toupper(c);
383
384 // Reallocate string when byte count changes. This is rare,
385 // thus it's OK to do another malloc()/free().
386 newl = utf_char2len(uc);
387 if (newl != l)
388 {
389 s = alloc(STRLEN(res) + 1 + newl - l);
390 if (s == NULL)
391 {
392 vim_free(res);
393 return NULL;
394 }
395 mch_memmove(s, res, p - res);
396 STRCPY(s + (p - res) + newl, p + l);
397 p = s + (p - res);
398 vim_free(res);
399 res = s;
400 }
401
402 utf_char2bytes(uc, p);
403 p += newl;
404 }
405 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
406 p += l; // skip multi-byte character
407 else
408 {
409 *p = TOUPPER_LOC(*p); // note that toupper() can be a macro
410 p++;
411 }
412 }
413
414 return res;
415}
416
417/*
418 * Make string "s" all lower-case and return it in allocated memory.
419 * Handles multi-byte characters as well as possible.
420 * Returns NULL when out of memory.
421 */
422 char_u *
423strlow_save(char_u *orig)
424{
425 char_u *p;
426 char_u *res;
427
428 res = p = vim_strsave(orig);
429
430 if (res != NULL)
431 while (*p != NUL)
432 {
433 int l;
434
435 if (enc_utf8)
436 {
437 int c, lc;
438 int newl;
439 char_u *s;
440
441 c = utf_ptr2char(p);
442 l = utf_ptr2len(p);
443 if (c == 0)
444 {
445 // overlong sequence, use only the first byte
446 c = *p;
447 l = 1;
448 }
449 lc = utf_tolower(c);
450
451 // Reallocate string when byte count changes. This is rare,
452 // thus it's OK to do another malloc()/free().
453 newl = utf_char2len(lc);
454 if (newl != l)
455 {
456 s = alloc(STRLEN(res) + 1 + newl - l);
457 if (s == NULL)
458 {
459 vim_free(res);
460 return NULL;
461 }
462 mch_memmove(s, res, p - res);
463 STRCPY(s + (p - res) + newl, p + l);
464 p = s + (p - res);
465 vim_free(res);
466 res = s;
467 }
468
469 utf_char2bytes(lc, p);
470 p += newl;
471 }
472 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
473 p += l; // skip multi-byte character
474 else
475 {
476 *p = TOLOWER_LOC(*p); // note that tolower() can be a macro
477 p++;
478 }
479 }
480
481 return res;
482}
483#endif
484
485/*
486 * delete spaces at the end of a string
487 */
488 void
489del_trailing_spaces(char_u *ptr)
490{
491 char_u *q;
492
493 q = ptr + STRLEN(ptr);
494 while (--q > ptr && VIM_ISWHITE(q[0]) && q[-1] != '\\' && q[-1] != Ctrl_V)
495 *q = NUL;
496}
497
498/*
499 * Like strncpy(), but always terminate the result with one NUL.
500 * "to" must be "len + 1" long!
501 */
502 void
503vim_strncpy(char_u *to, char_u *from, size_t len)
504{
505 STRNCPY(to, from, len);
506 to[len] = NUL;
507}
508
509/*
510 * Like strcat(), but make sure the result fits in "tosize" bytes and is
511 * always NUL terminated. "from" and "to" may overlap.
512 */
513 void
514vim_strcat(char_u *to, char_u *from, size_t tosize)
515{
516 size_t tolen = STRLEN(to);
517 size_t fromlen = STRLEN(from);
518
519 if (tolen + fromlen + 1 > tosize)
520 {
521 mch_memmove(to + tolen, from, tosize - tolen - 1);
522 to[tosize - 1] = NUL;
523 }
524 else
525 mch_memmove(to + tolen, from, fromlen + 1);
526}
527
Bram Moolenaarc32949b2023-01-04 15:56:51 +0000528/*
529 * A version of strlen() that has a maximum length.
530 */
531 size_t
532vim_strlen_maxlen(char *s, size_t maxlen)
533{
534 size_t i;
535 for (i = 0; i < maxlen; ++i)
536 if (s[i] == NUL)
537 break;
538 return i;
539}
540
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200541#if (!defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)) || defined(PROTO)
542/*
543 * Compare two strings, ignoring case, using current locale.
544 * Doesn't work for multi-byte characters.
545 * return 0 for match, < 0 for smaller, > 0 for bigger
546 */
547 int
548vim_stricmp(char *s1, char *s2)
549{
550 int i;
551
552 for (;;)
553 {
554 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
555 if (i != 0)
556 return i; // this character different
557 if (*s1 == NUL)
558 break; // strings match until NUL
559 ++s1;
560 ++s2;
561 }
562 return 0; // strings match
563}
564#endif
565
566#if (!defined(HAVE_STRNCASECMP) && !defined(HAVE_STRNICMP)) || defined(PROTO)
567/*
568 * Compare two strings, for length "len", ignoring case, using current locale.
569 * Doesn't work for multi-byte characters.
570 * return 0 for match, < 0 for smaller, > 0 for bigger
571 */
572 int
573vim_strnicmp(char *s1, char *s2, size_t len)
574{
575 int i;
576
577 while (len > 0)
578 {
579 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
580 if (i != 0)
581 return i; // this character different
582 if (*s1 == NUL)
583 break; // strings match until NUL
584 ++s1;
585 ++s2;
586 --len;
587 }
588 return 0; // strings match
589}
590#endif
591
592/*
Christian Brabandt84e31752024-09-02 09:59:18 +0200593 * Compare two ASCII strings, for length "len", ignoring case, ignoring locale
594 * (mostly matters for turkish locale where i I might be different).
595 * return 0 for match, < 0 for smaller, > 0 for bigger
596 */
597 int
598vim_strnicmp_asc(char *s1, char *s2, size_t len)
599{
John Marriottc847c122024-11-24 14:09:40 +0100600 int i = 0;
Christian Brabandt84e31752024-09-02 09:59:18 +0200601 int save_cmp_flags = cmp_flags;
602
603 cmp_flags |= CMP_KEEPASCII; // compare by ASCII value, ignoring locale
604 while (len > 0)
605 {
606 i = vim_tolower(*s1) - vim_tolower(*s2);
607 if (i != 0)
608 break; // this character is different
609 if (*s1 == NUL)
610 break; // strings match until NUL
611 ++s1;
612 ++s2;
613 --len;
614 }
615 cmp_flags = save_cmp_flags;
616 return i;
617}
618
619/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200620 * Search for first occurrence of "c" in "string".
621 * Version of strchr() that handles unsigned char strings with characters from
622 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
623 * end of the string.
624 */
Bram Moolenaarc32949b2023-01-04 15:56:51 +0000625 char_u *
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200626vim_strchr(char_u *string, int c)
627{
628 char_u *p;
629 int b;
630
631 p = string;
632 if (enc_utf8 && c >= 0x80)
633 {
634 while (*p != NUL)
635 {
636 int l = utfc_ptr2len(p);
637
638 // Avoid matching an illegal byte here.
639 if (utf_ptr2char(p) == c && l > 1)
640 return p;
641 p += l;
642 }
643 return NULL;
644 }
645 if (enc_dbcs != 0 && c > 255)
646 {
647 int n2 = c & 0xff;
648
649 c = ((unsigned)c >> 8) & 0xff;
650 while ((b = *p) != NUL)
651 {
652 if (b == c && p[1] == n2)
653 return p;
654 p += (*mb_ptr2len)(p);
655 }
656 return NULL;
657 }
658 if (has_mbyte)
659 {
660 while ((b = *p) != NUL)
661 {
662 if (b == c)
663 return p;
664 p += (*mb_ptr2len)(p);
665 }
666 return NULL;
667 }
668 while ((b = *p) != NUL)
669 {
670 if (b == c)
671 return p;
672 ++p;
673 }
674 return NULL;
675}
676
677/*
678 * Version of strchr() that only works for bytes and handles unsigned char
679 * strings with characters above 128 correctly. It also doesn't return a
680 * pointer to the NUL at the end of the string.
681 */
682 char_u *
683vim_strbyte(char_u *string, int c)
684{
685 char_u *p = string;
686
687 while (*p != NUL)
688 {
689 if (*p == c)
690 return p;
691 ++p;
692 }
693 return NULL;
694}
695
696/*
697 * Search for last occurrence of "c" in "string".
698 * Version of strrchr() that handles unsigned char strings with characters from
699 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
700 * end of the string.
701 * Return NULL if not found.
702 * Does not handle multi-byte char for "c"!
703 */
704 char_u *
705vim_strrchr(char_u *string, int c)
706{
707 char_u *retval = NULL;
708 char_u *p = string;
709
710 while (*p)
711 {
712 if (*p == c)
713 retval = p;
714 MB_PTR_ADV(p);
715 }
716 return retval;
717}
718
719/*
720 * Vim's version of strpbrk(), in case it's missing.
721 * Don't generate a prototype for this, causes problems when it's not used.
722 */
723#ifndef PROTO
724# ifndef HAVE_STRPBRK
725# ifdef vim_strpbrk
726# undef vim_strpbrk
727# endif
728 char_u *
729vim_strpbrk(char_u *s, char_u *charset)
730{
731 while (*s)
732 {
733 if (vim_strchr(charset, *s) != NULL)
734 return s;
735 MB_PTR_ADV(s);
736 }
737 return NULL;
738}
739# endif
740#endif
741
742/*
743 * Sort an array of strings.
744 */
745static int sort_compare(const void *s1, const void *s2);
746
747 static int
748sort_compare(const void *s1, const void *s2)
749{
750 return STRCMP(*(char **)s1, *(char **)s2);
751}
752
753 void
754sort_strings(
755 char_u **files,
756 int count)
757{
758 qsort((void *)files, (size_t)count, sizeof(char_u *), sort_compare);
759}
760
761#if defined(FEAT_QUICKFIX) || defined(FEAT_SPELL) || defined(PROTO)
762/*
763 * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
764 * When "s" is NULL FALSE is returned.
765 */
766 int
767has_non_ascii(char_u *s)
768{
769 char_u *p;
770
771 if (s != NULL)
772 for (p = s; *p != NUL; ++p)
773 if (*p >= 128)
774 return TRUE;
775 return FALSE;
776}
777#endif
778
779/*
780 * Concatenate two strings and return the result in allocated memory.
781 * Returns NULL when out of memory.
782 */
783 char_u *
784concat_str(char_u *str1, char_u *str2)
785{
786 char_u *dest;
787 size_t l = str1 == NULL ? 0 : STRLEN(str1);
788
789 dest = alloc(l + (str2 == NULL ? 0 : STRLEN(str2)) + 1L);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000790 if (dest == NULL)
791 return NULL;
792 if (str1 == NULL)
793 *dest = NUL;
794 else
795 STRCPY(dest, str1);
796 if (str2 != NULL)
797 STRCPY(dest + l, str2);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200798 return dest;
799}
800
zeertzjq4dd266c2023-08-19 11:35:03 +0200801#if defined(FEAT_EVAL) || defined(FEAT_RIGHTLEFT) || defined(PROTO)
802/*
803 * Reverse text into allocated memory.
804 * Returns the allocated string, NULL when out of memory.
805 */
806 char_u *
807reverse_text(char_u *s)
808{
809 size_t len = STRLEN(s);
810 char_u *rev = alloc(len + 1);
811 if (rev == NULL)
812 return NULL;
813
814 for (size_t s_i = 0, rev_i = len; s_i < len; ++s_i)
815 {
816 if (has_mbyte)
817 {
818 int mb_len = (*mb_ptr2len)(s + s_i);
819 rev_i -= mb_len;
820 mch_memmove(rev + rev_i, s + s_i, mb_len);
821 s_i += mb_len - 1;
822 }
823 else
824 rev[--rev_i] = s[s_i];
825 }
826 rev[len] = NUL;
827 return rev;
828}
829#endif
830
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200831#if defined(FEAT_EVAL) || defined(PROTO)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200832/*
833 * Return string "str" in ' quotes, doubling ' characters.
834 * If "str" is NULL an empty string is assumed.
835 * If "function" is TRUE make it function('string').
836 */
837 char_u *
838string_quote(char_u *str, int function)
839{
840 unsigned len;
841 char_u *p, *r, *s;
842
843 len = (function ? 13 : 3);
844 if (str != NULL)
845 {
846 len += (unsigned)STRLEN(str);
847 for (p = str; *p != NUL; MB_PTR_ADV(p))
848 if (*p == '\'')
849 ++len;
850 }
851 s = r = alloc(len);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000852 if (r == NULL)
853 return NULL;
854
855 if (function)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200856 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000857 STRCPY(r, "function('");
858 r += 10;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200859 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000860 else
861 *r++ = '\'';
862 if (str != NULL)
863 for (p = str; *p != NUL; )
864 {
865 if (*p == '\'')
866 *r++ = '\'';
867 MB_COPY_CHAR(p, r);
868 }
869 *r++ = '\'';
870 if (function)
871 *r++ = ')';
872 *r++ = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200873 return s;
874}
875
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000876/*
877 * Count the number of times "needle" occurs in string "haystack". Case is
878 * ignored if "ic" is TRUE.
879 */
880 long
881string_count(char_u *haystack, char_u *needle, int ic)
882{
883 long n = 0;
884 char_u *p = haystack;
885 char_u *next;
886
887 if (p == NULL || needle == NULL || *needle == NUL)
888 return 0;
889
890 if (ic)
891 {
892 size_t len = STRLEN(needle);
893
894 while (*p != NUL)
895 {
896 if (MB_STRNICMP(p, needle, len) == 0)
897 {
898 ++n;
899 p += len;
900 }
901 else
902 MB_PTR_ADV(p);
903 }
904 }
905 else
906 while ((next = (char_u *)strstr((char *)p, (char *)needle)) != NULL)
907 {
908 ++n;
909 p = next + STRLEN(needle);
910 }
911
912 return n;
913}
914
915/*
916 * Make a typval_T of the first character of "input" and store it in "output".
917 * Return OK or FAIL.
918 */
919 static int
920copy_first_char_to_tv(char_u *input, typval_T *output)
921{
922 char_u buf[MB_MAXBYTES + 1];
923 int len;
924
925 if (input == NULL || output == NULL)
926 return FAIL;
927
928 len = has_mbyte ? mb_ptr2len(input) : 1;
929 STRNCPY(buf, input, len);
930 buf[len] = NUL;
931 output->v_type = VAR_STRING;
932 output->vval.v_string = vim_strsave(buf);
933
934 return output->vval.v_string == NULL ? FAIL : OK;
935}
936
937/*
938 * Implementation of map() and filter() for a String. Apply "expr" to every
939 * character in string "str" and return the result in "rettv".
940 */
941 void
942string_filter_map(
943 char_u *str,
944 filtermap_T filtermap,
945 typval_T *expr,
946 typval_T *rettv)
947{
948 char_u *p;
949 typval_T tv;
950 garray_T ga;
951 int len = 0;
952 int idx = 0;
953 int rem;
Bram Moolenaar82418262022-09-28 16:16:15 +0100954 typval_T newtv;
955 funccall_T *fc;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000956
957 rettv->v_type = VAR_STRING;
958 rettv->vval.v_string = NULL;
959
960 // set_vim_var_nr() doesn't set the type
961 set_vim_var_type(VV_KEY, VAR_NUMBER);
962
zeertzjqe7d49462023-04-16 20:53:55 +0100963 // Create one funccall_T for all eval_expr_typval() calls.
Bram Moolenaar82418262022-09-28 16:16:15 +0100964 fc = eval_expr_get_funccal(expr, &newtv);
965
Bram Moolenaar04935fb2022-01-08 16:19:22 +0000966 ga_init2(&ga, sizeof(char), 80);
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000967 for (p = str; *p != NUL; p += len)
968 {
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000969 if (copy_first_char_to_tv(p, &tv) == FAIL)
970 break;
971 len = (int)STRLEN(tv.vval.v_string);
972
973 set_vim_var_nr(VV_KEY, idx);
Bram Moolenaar82418262022-09-28 16:16:15 +0100974 if (filter_map_one(&tv, expr, filtermap, fc, &newtv, &rem) == FAIL
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000975 || did_emsg)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000976 {
977 clear_tv(&newtv);
978 clear_tv(&tv);
979 break;
980 }
Ernie Raele79e2072024-01-13 11:47:33 +0100981 if (filtermap == FILTERMAP_MAP || filtermap == FILTERMAP_MAPNEW)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000982 {
983 if (newtv.v_type != VAR_STRING)
984 {
985 clear_tv(&newtv);
986 clear_tv(&tv);
Bram Moolenaare70cec92022-01-01 14:25:55 +0000987 emsg(_(e_string_required));
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000988 break;
989 }
990 else
991 ga_concat(&ga, newtv.vval.v_string);
992 }
Ernie Raele79e2072024-01-13 11:47:33 +0100993 else if (filtermap == FILTERMAP_FOREACH || !rem)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000994 ga_concat(&ga, tv.vval.v_string);
995
996 clear_tv(&newtv);
997 clear_tv(&tv);
998
999 ++idx;
1000 }
1001 ga_append(&ga, NUL);
1002 rettv->vval.v_string = ga.ga_data;
Bram Moolenaar82418262022-09-28 16:16:15 +01001003 if (fc != NULL)
1004 remove_funccal();
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001005}
1006
1007/*
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001008 * Implementation of reduce() for String "argvars[0]" using the function "expr"
1009 * starting with the optional initial value "argvars[2]" and return the result
1010 * in "rettv".
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001011 */
1012 void
1013string_reduce(
1014 typval_T *argvars,
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001015 typval_T *expr,
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001016 typval_T *rettv)
1017{
1018 char_u *p = tv_get_string(&argvars[0]);
1019 int len;
1020 typval_T argv[3];
1021 int r;
1022 int called_emsg_start = called_emsg;
Bram Moolenaar82418262022-09-28 16:16:15 +01001023 funccall_T *fc;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001024
1025 if (argvars[2].v_type == VAR_UNKNOWN)
1026 {
1027 if (*p == NUL)
1028 {
Bram Moolenaare70cec92022-01-01 14:25:55 +00001029 semsg(_(e_reduce_of_an_empty_str_with_no_initial_value), "String");
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001030 return;
1031 }
1032 if (copy_first_char_to_tv(p, rettv) == FAIL)
1033 return;
1034 p += STRLEN(rettv->vval.v_string);
1035 }
Yegappan Lakshmanan8deb2b32022-09-02 15:15:27 +01001036 else if (check_for_string_arg(argvars, 2) == FAIL)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001037 return;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001038 else
1039 copy_tv(&argvars[2], rettv);
1040
zeertzjqe7d49462023-04-16 20:53:55 +01001041 // Create one funccall_T for all eval_expr_typval() calls.
Bram Moolenaar82418262022-09-28 16:16:15 +01001042 fc = eval_expr_get_funccal(expr, rettv);
1043
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001044 for ( ; *p != NUL; p += len)
1045 {
1046 argv[0] = *rettv;
1047 if (copy_first_char_to_tv(p, &argv[1]) == FAIL)
1048 break;
1049 len = (int)STRLEN(argv[1].vval.v_string);
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001050
zeertzjqad0c4422023-08-17 22:15:47 +02001051 r = eval_expr_typval(expr, TRUE, argv, 2, fc, rettv);
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001052
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001053 clear_tv(&argv[0]);
1054 clear_tv(&argv[1]);
1055 if (r == FAIL || called_emsg != called_emsg_start)
1056 return;
1057 }
Bram Moolenaar82418262022-09-28 16:16:15 +01001058
1059 if (fc != NULL)
1060 remove_funccal();
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001061}
1062
Bram Moolenaare4098452023-05-07 18:53:49 +01001063/*
1064 * Implementation of "byteidx()" and "byteidxcomp()" functions
1065 */
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001066 static void
Dominique Pellé0268ff32024-07-28 21:12:20 +02001067byteidx_common(typval_T *argvars, typval_T *rettv, int comp)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001068{
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001069 rettv->vval.v_number = -1;
1070
1071 if (in_vim9script()
1072 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001073 || check_for_number_arg(argvars, 1) == FAIL
1074 || check_for_opt_bool_arg(argvars, 2) == FAIL))
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001075 return;
1076
Christian Brabandt67672ef2023-04-24 21:09:54 +01001077 char_u *str = tv_get_string_chk(&argvars[0]);
1078 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001079 if (str == NULL || idx < 0)
1080 return;
1081
Christian Brabandt67672ef2023-04-24 21:09:54 +01001082 varnumber_T utf16idx = FALSE;
1083 if (argvars[2].v_type != VAR_UNKNOWN)
1084 {
zeertzjq8cf51372023-05-08 15:31:38 +01001085 int error = FALSE;
1086 utf16idx = tv_get_bool_chk(&argvars[2], &error);
1087 if (error)
1088 return;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001089 if (utf16idx < 0 || utf16idx > 1)
1090 {
zeertzjq8cf51372023-05-08 15:31:38 +01001091 semsg(_(e_using_number_as_bool_nr), utf16idx);
Christian Brabandt67672ef2023-04-24 21:09:54 +01001092 return;
1093 }
1094 }
1095
1096 int (*ptr2len)(char_u *);
1097 if (enc_utf8 && comp)
1098 ptr2len = utf_ptr2len;
1099 else
1100 ptr2len = mb_ptr2len;
1101
1102 char_u *t = str;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001103 for ( ; idx > 0; idx--)
1104 {
1105 if (*t == NUL) // EOL reached
1106 return;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001107 if (utf16idx)
1108 {
1109 int clen = ptr2len(t);
1110 int c = (clen > 1) ? utf_ptr2char(t) : *t;
1111 if (c > 0xFFFF)
1112 idx--;
1113 }
1114 if (idx > 0)
1115 t += ptr2len(t);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001116 }
1117 rettv->vval.v_number = (varnumber_T)(t - str);
1118}
1119
1120/*
1121 * "byteidx()" function
1122 */
1123 void
1124f_byteidx(typval_T *argvars, typval_T *rettv)
1125{
Bram Moolenaare4098452023-05-07 18:53:49 +01001126 byteidx_common(argvars, rettv, FALSE);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001127}
1128
1129/*
1130 * "byteidxcomp()" function
1131 */
1132 void
1133f_byteidxcomp(typval_T *argvars, typval_T *rettv)
1134{
Bram Moolenaare4098452023-05-07 18:53:49 +01001135 byteidx_common(argvars, rettv, TRUE);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001136}
1137
1138/*
1139 * "charidx()" function
1140 */
1141 void
1142f_charidx(typval_T *argvars, typval_T *rettv)
1143{
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001144 rettv->vval.v_number = -1;
1145
Christian Brabandt67672ef2023-04-24 21:09:54 +01001146 if (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001147 || check_for_number_arg(argvars, 1) == FAIL
Christian Brabandt67672ef2023-04-24 21:09:54 +01001148 || check_for_opt_bool_arg(argvars, 2) == FAIL
1149 || (argvars[2].v_type != VAR_UNKNOWN
1150 && check_for_opt_bool_arg(argvars, 3) == FAIL))
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001151 return;
1152
Christian Brabandt67672ef2023-04-24 21:09:54 +01001153 char_u *str = tv_get_string_chk(&argvars[0]);
1154 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001155 if (str == NULL || idx < 0)
1156 return;
1157
Christian Brabandt67672ef2023-04-24 21:09:54 +01001158 varnumber_T countcc = FALSE;
1159 varnumber_T utf16idx = FALSE;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001160 if (argvars[2].v_type != VAR_UNKNOWN)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001161 {
Christian Brabandt67672ef2023-04-24 21:09:54 +01001162 countcc = tv_get_bool(&argvars[2]);
1163 if (argvars[3].v_type != VAR_UNKNOWN)
1164 utf16idx = tv_get_bool(&argvars[3]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001165 }
1166
Christian Brabandt67672ef2023-04-24 21:09:54 +01001167 int (*ptr2len)(char_u *);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001168 if (enc_utf8 && countcc)
1169 ptr2len = utf_ptr2len;
1170 else
1171 ptr2len = mb_ptr2len;
1172
Christian Brabandt67672ef2023-04-24 21:09:54 +01001173 char_u *p;
1174 int len;
1175 for (p = str, len = 0; utf16idx ? idx >= 0 : p <= str + idx; len++)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001176 {
1177 if (*p == NUL)
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001178 {
1179 // If the index is exactly the number of bytes or utf-16 code units
1180 // in the string then return the length of the string in
1181 // characters.
1182 if (utf16idx ? (idx == 0) : (p == (str + idx)))
1183 rettv->vval.v_number = len;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001184 return;
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001185 }
Christian Brabandt67672ef2023-04-24 21:09:54 +01001186 if (utf16idx)
1187 {
1188 idx--;
1189 int clen = ptr2len(p);
1190 int c = (clen > 1) ? utf_ptr2char(p) : *p;
1191 if (c > 0xFFFF)
1192 idx--;
1193 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001194 p += ptr2len(p);
1195 }
1196
1197 rettv->vval.v_number = len > 0 ? len - 1 : 0;
1198}
1199
1200/*
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001201 * Convert the string "str", from encoding "from" to encoding "to".
1202 */
1203 static char_u *
1204convert_string(char_u *str, char_u *from, char_u *to)
1205{
1206 vimconv_T vimconv;
1207
1208 vimconv.vc_type = CONV_NONE;
1209 if (convert_setup(&vimconv, from, to) == FAIL)
1210 return NULL;
1211 vimconv.vc_fail = TRUE;
1212 if (vimconv.vc_type == CONV_NONE)
1213 str = vim_strsave(str);
1214 else
1215 str = string_convert(&vimconv, str, NULL);
1216 convert_setup(&vimconv, NULL, NULL);
1217
1218 return str;
1219}
1220
1221/*
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001222 * Add the bytes from "str" to "blob".
1223 */
1224 static void
1225blob_from_string(char_u *str, blob_T *blob)
1226{
1227 size_t len = STRLEN(str);
1228
1229 for (size_t i = 0; i < len; i++)
1230 {
1231 int ch = str[i];
1232
1233 if (str[i] == NL)
1234 // Translate newlines in the string to NUL character
1235 ch = NUL;
1236
1237 ga_append(&blob->bv_ga, ch);
1238 }
1239}
1240
1241/*
1242 * Return a string created from the bytes in blob starting at "start_idx".
1243 * A NL character in the blob indicates end of string.
1244 * A NUL character in the blob is translated to a NL.
1245 * On return, "start_idx" points to next byte to process in blob.
1246 */
1247 static char_u *
1248string_from_blob(blob_T *blob, long *start_idx)
1249{
1250 garray_T str_ga;
1251 long blen;
Yegappan Lakshmanan5e9aaed2025-01-18 10:24:25 +01001252 int idx;
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001253
1254 ga_init2(&str_ga, sizeof(char), 80);
1255
1256 blen = blob_len(blob);
1257
1258 for (idx = *start_idx; idx < blen; idx++)
1259 {
1260 char_u byte = (char_u)blob_get(blob, idx);
1261 if (byte == NL)
1262 {
1263 idx++;
1264 break;
1265 }
1266
1267 if (byte == NUL)
1268 byte = NL;
1269
1270 ga_append(&str_ga, byte);
1271 }
1272
1273 ga_append(&str_ga, NUL);
1274
1275 char_u *ret_str = vim_strsave(str_ga.ga_data);
1276 *start_idx = idx;
1277
1278 ga_clear(&str_ga);
1279 return ret_str;
1280}
1281
1282/*
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001283 * "blob2str()" function
1284 * Converts a blob to a string, ensuring valid UTF-8 encoding.
1285 */
1286 void
1287f_blob2str(typval_T *argvars, typval_T *rettv)
1288{
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001289 blob_T *blob;
1290 int blen;
1291 long idx;
1292 int utf8_inuse = FALSE;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001293
1294 if (check_for_blob_arg(argvars, 0) == FAIL
1295 || check_for_opt_dict_arg(argvars, 1) == FAIL)
1296 return;
1297
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001298 if (rettv_list_alloc(rettv) == FAIL)
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001299 return;
1300
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001301 blob = argvars->vval.v_blob;
1302 if (blob == NULL)
1303 return;
1304 blen = blob_len(blob);
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001305
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001306 char_u *from_encoding = NULL;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001307 if (argvars[1].v_type != VAR_UNKNOWN)
1308 {
1309 dict_T *d = argvars[1].vval.v_dict;
1310 if (d != NULL)
1311 {
1312 char_u *enc = dict_get_string(d, "encoding", FALSE);
1313 if (enc != NULL)
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001314 from_encoding = enc_canonize(enc_skip(enc));
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001315 }
1316 }
1317
1318 if (STRCMP(p_enc, "utf-8") == 0 || STRCMP(p_enc, "utf8") == 0)
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001319 utf8_inuse = TRUE;
1320
1321 idx = 0;
1322 while (idx < blen)
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001323 {
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001324 char_u *str;
1325 char_u *converted_str;
1326
1327 str = string_from_blob(blob, &idx);
1328 if (str == NULL)
1329 break;
1330
1331 converted_str = str;
1332 if (from_encoding != NULL)
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001333 {
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001334 converted_str = convert_string(str, from_encoding, p_enc);
1335 vim_free(str);
1336 if (converted_str == NULL)
1337 {
1338 semsg(_(e_str_encoding_failed), "from", from_encoding);
1339 goto done;
1340 }
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001341 }
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001342
1343 if (utf8_inuse)
1344 {
1345 if (!utf_valid_string(converted_str, NULL))
1346 {
1347 semsg(_(e_str_encoding_failed), "from", p_enc);
1348 vim_free(converted_str);
1349 goto done;
1350 }
1351 }
1352
Yegappan Lakshmanan90b39752025-01-19 09:37:07 +01001353 int ret = list_append_string(rettv->vval.v_list, converted_str, -1);
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001354 vim_free(converted_str);
Yegappan Lakshmanan90b39752025-01-19 09:37:07 +01001355 if (ret == FAIL)
1356 break;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001357 }
1358
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001359done:
1360 vim_free(from_encoding);
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001361}
1362
1363/*
1364 * "str2blob()" function
1365 */
1366 void
1367f_str2blob(typval_T *argvars, typval_T *rettv)
1368{
1369 blob_T *blob;
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001370 list_T *list;
1371 listitem_T *li;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001372
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001373 if (check_for_list_arg(argvars, 0) == FAIL
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001374 || check_for_opt_dict_arg(argvars, 1) == FAIL)
1375 return;
1376
1377 if (rettv_blob_alloc(rettv) == FAIL)
1378 return;
1379
1380 blob = rettv->vval.v_blob;
1381
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001382 list = argvars[0].vval.v_list;
1383 if (list == NULL)
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001384 return;
1385
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001386 char_u *to_encoding = NULL;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001387 if (argvars[1].v_type != VAR_UNKNOWN)
1388 {
1389 dict_T *d = argvars[1].vval.v_dict;
1390 if (d != NULL)
1391 {
1392 char_u *enc = dict_get_string(d, "encoding", FALSE);
1393 if (enc != NULL)
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001394 to_encoding = enc_canonize(enc_skip(enc));
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001395 }
1396 }
1397
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001398 FOR_ALL_LIST_ITEMS(list, li)
1399 {
1400 if (li->li_tv.v_type != VAR_STRING)
1401 continue;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001402
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001403 char_u *str = li->li_tv.vval.v_string;
1404
1405 if (str == NULL)
1406 continue;
1407
1408 if (to_encoding != NULL)
1409 {
1410 str = convert_string(str, p_enc, to_encoding);
1411 if (str == NULL)
1412 {
1413 semsg(_(e_str_encoding_failed), "to", to_encoding);
1414 goto done;
1415 }
1416 }
1417
1418 if (li != list->lv_first)
1419 // Each list string item is separated by a newline in the blob
1420 ga_append(&blob->bv_ga, NL);
1421
1422 blob_from_string(str, blob);
1423
1424 if (to_encoding != NULL)
1425 vim_free(str);
1426 }
1427
1428done:
1429 if (to_encoding != NULL)
1430 vim_free(to_encoding);
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001431}
1432
1433/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001434 * "str2list()" function
1435 */
1436 void
1437f_str2list(typval_T *argvars, typval_T *rettv)
1438{
1439 char_u *p;
1440 int utf8 = FALSE;
1441
1442 if (rettv_list_alloc(rettv) == FAIL)
1443 return;
1444
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001445 if (in_vim9script()
1446 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001447 || check_for_opt_bool_arg(argvars, 1) == FAIL))
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001448 return;
1449
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001450 if (argvars[1].v_type != VAR_UNKNOWN)
1451 utf8 = (int)tv_get_bool_chk(&argvars[1], NULL);
1452
1453 p = tv_get_string(&argvars[0]);
1454
1455 if (has_mbyte || utf8)
1456 {
1457 int (*ptr2len)(char_u *);
1458 int (*ptr2char)(char_u *);
1459
1460 if (utf8 || enc_utf8)
1461 {
1462 ptr2len = utf_ptr2len;
1463 ptr2char = utf_ptr2char;
1464 }
1465 else
1466 {
1467 ptr2len = mb_ptr2len;
1468 ptr2char = mb_ptr2char;
1469 }
1470
1471 for ( ; *p != NUL; p += (*ptr2len)(p))
1472 list_append_number(rettv->vval.v_list, (*ptr2char)(p));
1473 }
1474 else
1475 for ( ; *p != NUL; ++p)
1476 list_append_number(rettv->vval.v_list, *p);
1477}
1478
1479/*
1480 * "str2nr()" function
1481 */
1482 void
1483f_str2nr(typval_T *argvars, typval_T *rettv)
1484{
1485 int base = 10;
1486 char_u *p;
1487 varnumber_T n;
1488 int what = 0;
1489 int isneg;
1490
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001491 if (in_vim9script()
1492 && (check_for_string_arg(argvars, 0) == FAIL
1493 || check_for_opt_number_arg(argvars, 1) == FAIL
1494 || (argvars[1].v_type != VAR_UNKNOWN
1495 && check_for_opt_bool_arg(argvars, 2) == FAIL)))
1496 return;
1497
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001498 if (argvars[1].v_type != VAR_UNKNOWN)
1499 {
1500 base = (int)tv_get_number(&argvars[1]);
1501 if (base != 2 && base != 8 && base != 10 && base != 16)
1502 {
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00001503 emsg(_(e_invalid_argument));
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001504 return;
1505 }
1506 if (argvars[2].v_type != VAR_UNKNOWN && tv_get_bool(&argvars[2]))
1507 what |= STR2NR_QUOTE;
1508 }
1509
1510 p = skipwhite(tv_get_string_strict(&argvars[0]));
1511 isneg = (*p == '-');
1512 if (*p == '+' || *p == '-')
1513 p = skipwhite(p + 1);
1514 switch (base)
1515 {
1516 case 2: what |= STR2NR_BIN + STR2NR_FORCE; break;
1517 case 8: what |= STR2NR_OCT + STR2NR_OOCT + STR2NR_FORCE; break;
1518 case 16: what |= STR2NR_HEX + STR2NR_FORCE; break;
1519 }
Bram Moolenaar5fb78c32023-03-04 20:47:39 +00001520 vim_str2nr(p, NULL, NULL, what, &n, NULL, 0, FALSE, NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001521 // Text after the number is silently ignored.
1522 if (isneg)
1523 rettv->vval.v_number = -n;
1524 else
1525 rettv->vval.v_number = n;
1526
1527}
1528
1529/*
1530 * "strgetchar()" function
1531 */
1532 void
1533f_strgetchar(typval_T *argvars, typval_T *rettv)
1534{
1535 char_u *str;
1536 int len;
1537 int error = FALSE;
1538 int charidx;
1539 int byteidx = 0;
1540
1541 rettv->vval.v_number = -1;
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001542
1543 if (in_vim9script()
1544 && (check_for_string_arg(argvars, 0) == FAIL
1545 || check_for_number_arg(argvars, 1) == FAIL))
1546 return;
1547
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001548 str = tv_get_string_chk(&argvars[0]);
1549 if (str == NULL)
1550 return;
1551 len = (int)STRLEN(str);
1552 charidx = (int)tv_get_number_chk(&argvars[1], &error);
1553 if (error)
1554 return;
1555
1556 while (charidx >= 0 && byteidx < len)
1557 {
1558 if (charidx == 0)
1559 {
1560 rettv->vval.v_number = mb_ptr2char(str + byteidx);
1561 break;
1562 }
1563 --charidx;
1564 byteidx += MB_CPTR2LEN(str + byteidx);
1565 }
1566}
1567
1568/*
1569 * "stridx()" function
1570 */
1571 void
1572f_stridx(typval_T *argvars, typval_T *rettv)
1573{
1574 char_u buf[NUMBUFLEN];
1575 char_u *needle;
1576 char_u *haystack;
1577 char_u *save_haystack;
1578 char_u *pos;
1579 int start_idx;
1580
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001581 if (in_vim9script()
1582 && (check_for_string_arg(argvars, 0) == FAIL
1583 || check_for_string_arg(argvars, 1) == FAIL
1584 || check_for_opt_number_arg(argvars, 2) == FAIL))
1585 return;
1586
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001587 needle = tv_get_string_chk(&argvars[1]);
1588 save_haystack = haystack = tv_get_string_buf_chk(&argvars[0], buf);
1589 rettv->vval.v_number = -1;
1590 if (needle == NULL || haystack == NULL)
1591 return; // type error; errmsg already given
1592
1593 if (argvars[2].v_type != VAR_UNKNOWN)
1594 {
1595 int error = FALSE;
1596
1597 start_idx = (int)tv_get_number_chk(&argvars[2], &error);
1598 if (error || start_idx >= (int)STRLEN(haystack))
1599 return;
1600 if (start_idx >= 0)
1601 haystack += start_idx;
1602 }
1603
1604 pos = (char_u *)strstr((char *)haystack, (char *)needle);
1605 if (pos != NULL)
1606 rettv->vval.v_number = (varnumber_T)(pos - save_haystack);
1607}
1608
1609/*
1610 * "string()" function
1611 */
1612 void
1613f_string(typval_T *argvars, typval_T *rettv)
1614{
1615 char_u *tofree;
1616 char_u numbuf[NUMBUFLEN];
1617
1618 rettv->v_type = VAR_STRING;
1619 rettv->vval.v_string = tv2string(&argvars[0], &tofree, numbuf,
1620 get_copyID());
1621 // Make a copy if we have a value but it's not in allocated memory.
1622 if (rettv->vval.v_string != NULL && tofree == NULL)
1623 rettv->vval.v_string = vim_strsave(rettv->vval.v_string);
1624}
1625
1626/*
1627 * "strlen()" function
1628 */
1629 void
1630f_strlen(typval_T *argvars, typval_T *rettv)
1631{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001632 if (in_vim9script()
1633 && check_for_string_or_number_arg(argvars, 0) == FAIL)
1634 return;
1635
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001636 rettv->vval.v_number = (varnumber_T)(STRLEN(
1637 tv_get_string(&argvars[0])));
1638}
1639
1640 static void
1641strchar_common(typval_T *argvars, typval_T *rettv, int skipcc)
1642{
1643 char_u *s = tv_get_string(&argvars[0]);
1644 varnumber_T len = 0;
1645 int (*func_mb_ptr2char_adv)(char_u **pp);
1646
1647 func_mb_ptr2char_adv = skipcc ? mb_ptr2char_adv : mb_cptr2char_adv;
1648 while (*s != NUL)
1649 {
1650 func_mb_ptr2char_adv(&s);
1651 ++len;
1652 }
1653 rettv->vval.v_number = len;
1654}
1655
1656/*
1657 * "strcharlen()" function
1658 */
1659 void
1660f_strcharlen(typval_T *argvars, typval_T *rettv)
1661{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001662 if (in_vim9script()
1663 && check_for_string_or_number_arg(argvars, 0) == FAIL)
1664 return;
1665
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001666 strchar_common(argvars, rettv, TRUE);
1667}
1668
1669/*
1670 * "strchars()" function
1671 */
1672 void
1673f_strchars(typval_T *argvars, typval_T *rettv)
1674{
1675 varnumber_T skipcc = FALSE;
1676
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001677 if (in_vim9script()
1678 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001679 || check_for_opt_bool_arg(argvars, 1) == FAIL))
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001680 return;
1681
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001682 if (argvars[1].v_type != VAR_UNKNOWN)
Bram Moolenaare4098452023-05-07 18:53:49 +01001683 {
zeertzjq8cf51372023-05-08 15:31:38 +01001684 int error = FALSE;
1685 skipcc = tv_get_bool_chk(&argvars[1], &error);
1686 if (error)
1687 return;
1688 if (skipcc < 0 || skipcc > 1)
1689 {
Bram Moolenaare4098452023-05-07 18:53:49 +01001690 semsg(_(e_using_number_as_bool_nr), skipcc);
zeertzjq8cf51372023-05-08 15:31:38 +01001691 return;
1692 }
Bram Moolenaare4098452023-05-07 18:53:49 +01001693 }
zeertzjq8cf51372023-05-08 15:31:38 +01001694
1695 strchar_common(argvars, rettv, skipcc);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001696}
1697
1698/*
Christian Brabandt67672ef2023-04-24 21:09:54 +01001699 * "strutf16len()" function
1700 */
1701 void
1702f_strutf16len(typval_T *argvars, typval_T *rettv)
1703{
1704 rettv->vval.v_number = -1;
1705
1706 if (check_for_string_arg(argvars, 0) == FAIL
1707 || check_for_opt_bool_arg(argvars, 1) == FAIL)
1708 return;
1709
1710 varnumber_T countcc = FALSE;
1711 if (argvars[1].v_type != VAR_UNKNOWN)
1712 countcc = tv_get_bool(&argvars[1]);
1713
1714 char_u *s = tv_get_string(&argvars[0]);
1715 varnumber_T len = 0;
1716 int (*func_mb_ptr2char_adv)(char_u **pp);
1717 int ch;
1718
1719 func_mb_ptr2char_adv = countcc ? mb_cptr2char_adv : mb_ptr2char_adv;
1720 while (*s != NUL)
1721 {
1722 ch = func_mb_ptr2char_adv(&s);
1723 if (ch > 0xFFFF)
1724 ++len;
1725 ++len;
1726 }
1727 rettv->vval.v_number = len;
1728}
1729
1730/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001731 * "strdisplaywidth()" function
1732 */
1733 void
1734f_strdisplaywidth(typval_T *argvars, typval_T *rettv)
1735{
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001736 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001737 int col = 0;
1738
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001739 rettv->vval.v_number = -1;
1740
1741 if (in_vim9script()
1742 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001743 || check_for_opt_number_arg(argvars, 1) == FAIL))
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001744 return;
1745
1746 s = tv_get_string(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001747 if (argvars[1].v_type != VAR_UNKNOWN)
1748 col = (int)tv_get_number(&argvars[1]);
1749
1750 rettv->vval.v_number = (varnumber_T)(linetabsize_col(col, s) - col);
1751}
1752
1753/*
1754 * "strwidth()" function
1755 */
1756 void
1757f_strwidth(typval_T *argvars, typval_T *rettv)
1758{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001759 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001760
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001761 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1762 return;
1763
1764 s = tv_get_string_strict(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001765 rettv->vval.v_number = (varnumber_T)(mb_string2cells(s, -1));
1766}
1767
1768/*
1769 * "strcharpart()" function
1770 */
1771 void
1772f_strcharpart(typval_T *argvars, typval_T *rettv)
1773{
1774 char_u *p;
1775 int nchar;
1776 int nbyte = 0;
1777 int charlen;
1778 int skipcc = FALSE;
1779 int len = 0;
1780 int slen;
1781 int error = FALSE;
1782
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001783 if (in_vim9script()
1784 && (check_for_string_arg(argvars, 0) == FAIL
1785 || check_for_number_arg(argvars, 1) == FAIL
1786 || check_for_opt_number_arg(argvars, 2) == FAIL
1787 || (argvars[2].v_type != VAR_UNKNOWN
1788 && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1789 return;
1790
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001791 p = tv_get_string(&argvars[0]);
1792 slen = (int)STRLEN(p);
1793
1794 nchar = (int)tv_get_number_chk(&argvars[1], &error);
1795 if (!error)
1796 {
1797 if (argvars[2].v_type != VAR_UNKNOWN
1798 && argvars[3].v_type != VAR_UNKNOWN)
1799 {
zeertzjq8cf51372023-05-08 15:31:38 +01001800 skipcc = tv_get_bool_chk(&argvars[3], &error);
1801 if (error)
1802 return;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001803 if (skipcc < 0 || skipcc > 1)
1804 {
zeertzjq8cf51372023-05-08 15:31:38 +01001805 semsg(_(e_using_number_as_bool_nr), skipcc);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001806 return;
1807 }
1808 }
1809
1810 if (nchar > 0)
1811 while (nchar > 0 && nbyte < slen)
1812 {
1813 if (skipcc)
1814 nbyte += mb_ptr2len(p + nbyte);
1815 else
1816 nbyte += MB_CPTR2LEN(p + nbyte);
1817 --nchar;
1818 }
1819 else
1820 nbyte = nchar;
1821 if (argvars[2].v_type != VAR_UNKNOWN)
1822 {
1823 charlen = (int)tv_get_number(&argvars[2]);
1824 while (charlen > 0 && nbyte + len < slen)
1825 {
1826 int off = nbyte + len;
1827
1828 if (off < 0)
1829 len += 1;
1830 else
1831 {
1832 if (skipcc)
1833 len += mb_ptr2len(p + off);
1834 else
1835 len += MB_CPTR2LEN(p + off);
1836 }
1837 --charlen;
1838 }
1839 }
1840 else
1841 len = slen - nbyte; // default: all bytes that are available.
1842 }
1843
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02001844 // Only return the overlap between the specified part and the actual
1845 // string.
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001846 if (nbyte < 0)
1847 {
1848 len += nbyte;
1849 nbyte = 0;
1850 }
1851 else if (nbyte > slen)
1852 nbyte = slen;
1853 if (len < 0)
1854 len = 0;
1855 else if (nbyte + len > slen)
1856 len = slen - nbyte;
1857
1858 rettv->v_type = VAR_STRING;
1859 rettv->vval.v_string = vim_strnsave(p + nbyte, len);
1860}
1861
1862/*
1863 * "strpart()" function
1864 */
1865 void
1866f_strpart(typval_T *argvars, typval_T *rettv)
1867{
1868 char_u *p;
1869 int n;
1870 int len;
1871 int slen;
1872 int error = FALSE;
1873
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001874 if (in_vim9script()
1875 && (check_for_string_arg(argvars, 0) == FAIL
1876 || check_for_number_arg(argvars, 1) == FAIL
1877 || check_for_opt_number_arg(argvars, 2) == FAIL
1878 || (argvars[2].v_type != VAR_UNKNOWN
1879 && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1880 return;
1881
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001882 p = tv_get_string(&argvars[0]);
1883 slen = (int)STRLEN(p);
1884
1885 n = (int)tv_get_number_chk(&argvars[1], &error);
1886 if (error)
1887 len = 0;
1888 else if (argvars[2].v_type != VAR_UNKNOWN)
1889 len = (int)tv_get_number(&argvars[2]);
1890 else
1891 len = slen - n; // default len: all bytes that are available.
1892
1893 // Only return the overlap between the specified part and the actual
1894 // string.
1895 if (n < 0)
1896 {
1897 len += n;
1898 n = 0;
1899 }
1900 else if (n > slen)
1901 n = slen;
1902 if (len < 0)
1903 len = 0;
1904 else if (n + len > slen)
1905 len = slen - n;
1906
1907 if (argvars[2].v_type != VAR_UNKNOWN && argvars[3].v_type != VAR_UNKNOWN)
1908 {
1909 int off;
1910
1911 // length in characters
1912 for (off = n; off < slen && len > 0; --len)
1913 off += mb_ptr2len(p + off);
1914 len = off - n;
1915 }
1916
1917 rettv->v_type = VAR_STRING;
1918 rettv->vval.v_string = vim_strnsave(p + n, len);
1919}
1920
1921/*
1922 * "strridx()" function
1923 */
1924 void
1925f_strridx(typval_T *argvars, typval_T *rettv)
1926{
1927 char_u buf[NUMBUFLEN];
1928 char_u *needle;
1929 char_u *haystack;
1930 char_u *rest;
1931 char_u *lastmatch = NULL;
1932 int haystack_len, end_idx;
1933
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001934 if (in_vim9script()
1935 && (check_for_string_arg(argvars, 0) == FAIL
1936 || check_for_string_arg(argvars, 1) == FAIL
1937 || check_for_opt_number_arg(argvars, 2) == FAIL))
1938 return;
1939
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001940 needle = tv_get_string_chk(&argvars[1]);
1941 haystack = tv_get_string_buf_chk(&argvars[0], buf);
1942
1943 rettv->vval.v_number = -1;
1944 if (needle == NULL || haystack == NULL)
1945 return; // type error; errmsg already given
1946
1947 haystack_len = (int)STRLEN(haystack);
1948 if (argvars[2].v_type != VAR_UNKNOWN)
1949 {
1950 // Third argument: upper limit for index
1951 end_idx = (int)tv_get_number_chk(&argvars[2], NULL);
1952 if (end_idx < 0)
1953 return; // can never find a match
1954 }
1955 else
1956 end_idx = haystack_len;
1957
1958 if (*needle == NUL)
1959 {
1960 // Empty string matches past the end.
1961 lastmatch = haystack + end_idx;
1962 }
1963 else
1964 {
1965 for (rest = haystack; *rest != '\0'; ++rest)
1966 {
1967 rest = (char_u *)strstr((char *)rest, (char *)needle);
1968 if (rest == NULL || rest > haystack + end_idx)
1969 break;
1970 lastmatch = rest;
1971 }
1972 }
1973
1974 if (lastmatch == NULL)
1975 rettv->vval.v_number = -1;
1976 else
1977 rettv->vval.v_number = (varnumber_T)(lastmatch - haystack);
1978}
1979
1980/*
1981 * "strtrans()" function
1982 */
1983 void
1984f_strtrans(typval_T *argvars, typval_T *rettv)
1985{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001986 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1987 return;
1988
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001989 rettv->v_type = VAR_STRING;
1990 rettv->vval.v_string = transstr(tv_get_string(&argvars[0]));
1991}
1992
Christian Brabandt67672ef2023-04-24 21:09:54 +01001993
1994/*
Christian Brabandt67672ef2023-04-24 21:09:54 +01001995 * "utf16idx()" function
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01001996 *
1997 * Converts a byte or character offset in a string to the corresponding UTF-16
1998 * code unit offset.
Christian Brabandt67672ef2023-04-24 21:09:54 +01001999 */
2000 void
2001f_utf16idx(typval_T *argvars, typval_T *rettv)
2002{
2003 rettv->vval.v_number = -1;
2004
2005 if (check_for_string_arg(argvars, 0) == FAIL
2006 || check_for_opt_number_arg(argvars, 1) == FAIL
2007 || check_for_opt_bool_arg(argvars, 2) == FAIL
2008 || (argvars[2].v_type != VAR_UNKNOWN
2009 && check_for_opt_bool_arg(argvars, 3) == FAIL))
2010 return;
2011
2012 char_u *str = tv_get_string_chk(&argvars[0]);
2013 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
2014 if (str == NULL || idx < 0)
2015 return;
2016
2017 varnumber_T countcc = FALSE;
2018 varnumber_T charidx = FALSE;
2019 if (argvars[2].v_type != VAR_UNKNOWN)
2020 {
2021 countcc = tv_get_bool(&argvars[2]);
2022 if (argvars[3].v_type != VAR_UNKNOWN)
2023 charidx = tv_get_bool(&argvars[3]);
2024 }
2025
2026 int (*ptr2len)(char_u *);
2027 if (enc_utf8 && countcc)
2028 ptr2len = utf_ptr2len;
2029 else
2030 ptr2len = mb_ptr2len;
2031
2032 char_u *p;
2033 int len;
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01002034 int utf16idx = 0;
Christian Brabandt67672ef2023-04-24 21:09:54 +01002035 for (p = str, len = 0; charidx ? idx >= 0 : p <= str + idx; len++)
2036 {
2037 if (*p == NUL)
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01002038 {
2039 // If the index is exactly the number of bytes or characters in the
2040 // string then return the length of the string in utf-16 code
2041 // units.
2042 if (charidx ? (idx == 0) : (p == (str + idx)))
2043 rettv->vval.v_number = len;
Christian Brabandt67672ef2023-04-24 21:09:54 +01002044 return;
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01002045 }
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01002046 utf16idx = len;
Christian Brabandt67672ef2023-04-24 21:09:54 +01002047 int clen = ptr2len(p);
2048 int c = (clen > 1) ? utf_ptr2char(p) : *p;
2049 if (c > 0xFFFF)
2050 len++;
2051 p += ptr2len(p);
2052 if (charidx)
2053 idx--;
2054 }
2055
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01002056 rettv->vval.v_number = utf16idx;
Christian Brabandt67672ef2023-04-24 21:09:54 +01002057}
2058
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002059/*
2060 * "tolower(string)" function
2061 */
2062 void
2063f_tolower(typval_T *argvars, typval_T *rettv)
2064{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002065 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
2066 return;
2067
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002068 rettv->v_type = VAR_STRING;
2069 rettv->vval.v_string = strlow_save(tv_get_string(&argvars[0]));
2070}
2071
2072/*
2073 * "toupper(string)" function
2074 */
2075 void
2076f_toupper(typval_T *argvars, typval_T *rettv)
2077{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002078 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
2079 return;
2080
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002081 rettv->v_type = VAR_STRING;
2082 rettv->vval.v_string = strup_save(tv_get_string(&argvars[0]));
2083}
2084
2085/*
2086 * "tr(string, fromstr, tostr)" function
2087 */
2088 void
2089f_tr(typval_T *argvars, typval_T *rettv)
2090{
2091 char_u *in_str;
2092 char_u *fromstr;
2093 char_u *tostr;
2094 char_u *p;
2095 int inlen;
2096 int fromlen;
2097 int tolen;
2098 int idx;
2099 char_u *cpstr;
2100 int cplen;
2101 int first = TRUE;
2102 char_u buf[NUMBUFLEN];
2103 char_u buf2[NUMBUFLEN];
2104 garray_T ga;
2105
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002106 if (in_vim9script()
2107 && (check_for_string_arg(argvars, 0) == FAIL
2108 || check_for_string_arg(argvars, 1) == FAIL
2109 || check_for_string_arg(argvars, 2) == FAIL))
2110 return;
2111
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002112 in_str = tv_get_string(&argvars[0]);
2113 fromstr = tv_get_string_buf_chk(&argvars[1], buf);
2114 tostr = tv_get_string_buf_chk(&argvars[2], buf2);
2115
2116 // Default return value: empty string.
2117 rettv->v_type = VAR_STRING;
2118 rettv->vval.v_string = NULL;
2119 if (fromstr == NULL || tostr == NULL)
2120 return; // type error; errmsg already given
Bram Moolenaar04935fb2022-01-08 16:19:22 +00002121 ga_init2(&ga, sizeof(char), 80);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002122
2123 if (!has_mbyte)
2124 // not multi-byte: fromstr and tostr must be the same length
2125 if (STRLEN(fromstr) != STRLEN(tostr))
2126 {
2127error:
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00002128 semsg(_(e_invalid_argument_str), fromstr);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002129 ga_clear(&ga);
2130 return;
2131 }
2132
2133 // fromstr and tostr have to contain the same number of chars
2134 while (*in_str != NUL)
2135 {
2136 if (has_mbyte)
2137 {
2138 inlen = (*mb_ptr2len)(in_str);
2139 cpstr = in_str;
2140 cplen = inlen;
2141 idx = 0;
2142 for (p = fromstr; *p != NUL; p += fromlen)
2143 {
2144 fromlen = (*mb_ptr2len)(p);
2145 if (fromlen == inlen && STRNCMP(in_str, p, inlen) == 0)
2146 {
2147 for (p = tostr; *p != NUL; p += tolen)
2148 {
2149 tolen = (*mb_ptr2len)(p);
2150 if (idx-- == 0)
2151 {
2152 cplen = tolen;
2153 cpstr = p;
2154 break;
2155 }
2156 }
2157 if (*p == NUL) // tostr is shorter than fromstr
2158 goto error;
2159 break;
2160 }
2161 ++idx;
2162 }
2163
2164 if (first && cpstr == in_str)
2165 {
2166 // Check that fromstr and tostr have the same number of
2167 // (multi-byte) characters. Done only once when a character
2168 // of in_str doesn't appear in fromstr.
2169 first = FALSE;
2170 for (p = tostr; *p != NUL; p += tolen)
2171 {
2172 tolen = (*mb_ptr2len)(p);
2173 --idx;
2174 }
2175 if (idx != 0)
2176 goto error;
2177 }
2178
2179 (void)ga_grow(&ga, cplen);
2180 mch_memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen);
2181 ga.ga_len += cplen;
2182
2183 in_str += inlen;
2184 }
2185 else
2186 {
2187 // When not using multi-byte chars we can do it faster.
2188 p = vim_strchr(fromstr, *in_str);
2189 if (p != NULL)
2190 ga_append(&ga, tostr[p - fromstr]);
2191 else
2192 ga_append(&ga, *in_str);
2193 ++in_str;
2194 }
2195 }
2196
2197 // add a terminating NUL
2198 (void)ga_grow(&ga, 1);
2199 ga_append(&ga, NUL);
2200
2201 rettv->vval.v_string = ga.ga_data;
2202}
2203
2204/*
2205 * "trim({expr})" function
2206 */
2207 void
2208f_trim(typval_T *argvars, typval_T *rettv)
2209{
2210 char_u buf1[NUMBUFLEN];
2211 char_u buf2[NUMBUFLEN];
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002212 char_u *head;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002213 char_u *mask = NULL;
2214 char_u *tail;
2215 char_u *prev;
2216 char_u *p;
2217 int c1;
2218 int dir = 0;
2219
2220 rettv->v_type = VAR_STRING;
2221 rettv->vval.v_string = NULL;
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002222
2223 if (in_vim9script()
2224 && (check_for_string_arg(argvars, 0) == FAIL
Illia Bobyr80799172023-10-17 18:00:50 +02002225 || check_for_opt_string_arg(argvars, 1) == FAIL
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002226 || (argvars[1].v_type != VAR_UNKNOWN
2227 && check_for_opt_number_arg(argvars, 2) == FAIL)))
2228 return;
2229
2230 head = tv_get_string_buf_chk(&argvars[0], buf1);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002231 if (head == NULL)
2232 return;
2233
Illia Bobyr80799172023-10-17 18:00:50 +02002234 if (check_for_opt_string_arg(argvars, 1) == FAIL)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002235 return;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002236
2237 if (argvars[1].v_type == VAR_STRING)
Illia Bobyr6e638672023-10-17 11:09:45 +02002238 {
Illia Bobyr80799172023-10-17 18:00:50 +02002239 mask = tv_get_string_buf_chk(&argvars[1], buf2);
2240 if (*mask == NUL)
2241 mask = NULL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002242
Illia Bobyr80799172023-10-17 18:00:50 +02002243 if (argvars[2].v_type != VAR_UNKNOWN)
Illia Bobyr6e638672023-10-17 11:09:45 +02002244 {
Illia Bobyr80799172023-10-17 18:00:50 +02002245 int error = 0;
2246
2247 // leading or trailing characters to trim
2248 dir = (int)tv_get_number_chk(&argvars[2], &error);
2249 if (error)
2250 return;
2251 if (dir < 0 || dir > 2)
2252 {
2253 semsg(_(e_invalid_argument_str), tv_get_string(&argvars[2]));
2254 return;
2255 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002256 }
2257 }
2258
2259 if (dir == 0 || dir == 1)
2260 {
2261 // Trim leading characters
2262 while (*head != NUL)
2263 {
2264 c1 = PTR2CHAR(head);
2265 if (mask == NULL)
2266 {
2267 if (c1 > ' ' && c1 != 0xa0)
2268 break;
2269 }
2270 else
2271 {
2272 for (p = mask; *p != NUL; MB_PTR_ADV(p))
2273 if (c1 == PTR2CHAR(p))
2274 break;
2275 if (*p == NUL)
2276 break;
2277 }
2278 MB_PTR_ADV(head);
2279 }
2280 }
2281
2282 tail = head + STRLEN(head);
2283 if (dir == 0 || dir == 2)
2284 {
2285 // Trim trailing characters
2286 for (; tail > head; tail = prev)
2287 {
2288 prev = tail;
2289 MB_PTR_BACK(head, prev);
2290 c1 = PTR2CHAR(prev);
2291 if (mask == NULL)
2292 {
2293 if (c1 > ' ' && c1 != 0xa0)
2294 break;
2295 }
2296 else
2297 {
2298 for (p = mask; *p != NUL; MB_PTR_ADV(p))
2299 if (c1 == PTR2CHAR(p))
2300 break;
2301 if (*p == NUL)
2302 break;
2303 }
2304 }
2305 }
2306 rettv->vval.v_string = vim_strnsave(head, tail - head);
2307}
2308
Bram Moolenaar677658a2022-01-05 16:09:06 +00002309static char *e_printf = N_(e_insufficient_arguments_for_printf);
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002310
2311/*
2312 * Get number argument from "idxp" entry in "tvs". First entry is 1.
2313 */
2314 static varnumber_T
2315tv_nr(typval_T *tvs, int *idxp)
2316{
2317 int idx = *idxp - 1;
2318 varnumber_T n = 0;
2319 int err = FALSE;
2320
2321 if (tvs[idx].v_type == VAR_UNKNOWN)
2322 emsg(_(e_printf));
2323 else
2324 {
2325 ++*idxp;
2326 n = tv_get_number_chk(&tvs[idx], &err);
2327 if (err)
2328 n = 0;
2329 }
2330 return n;
2331}
2332
2333/*
2334 * Get string argument from "idxp" entry in "tvs". First entry is 1.
2335 * If "tofree" is NULL tv_get_string_chk() is used. Some types (e.g. List)
2336 * are not converted to a string.
2337 * If "tofree" is not NULL echo_string() is used. All types are converted to
2338 * a string with the same format as ":echo". The caller must free "*tofree".
2339 * Returns NULL for an error.
2340 */
2341 static char *
2342tv_str(typval_T *tvs, int *idxp, char_u **tofree)
2343{
2344 int idx = *idxp - 1;
2345 char *s = NULL;
2346 static char_u numbuf[NUMBUFLEN];
2347
2348 if (tvs[idx].v_type == VAR_UNKNOWN)
2349 emsg(_(e_printf));
2350 else
2351 {
2352 ++*idxp;
2353 if (tofree != NULL)
2354 s = (char *)echo_string(&tvs[idx], tofree, numbuf, get_copyID());
2355 else
2356 s = (char *)tv_get_string_chk(&tvs[idx]);
2357 }
2358 return s;
2359}
2360
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002361/*
2362 * Get float argument from "idxp" entry in "tvs". First entry is 1.
2363 */
2364 static double
2365tv_float(typval_T *tvs, int *idxp)
2366{
2367 int idx = *idxp - 1;
2368 double f = 0;
2369
2370 if (tvs[idx].v_type == VAR_UNKNOWN)
2371 emsg(_(e_printf));
2372 else
2373 {
2374 ++*idxp;
2375 if (tvs[idx].v_type == VAR_FLOAT)
2376 f = tvs[idx].vval.v_float;
2377 else if (tvs[idx].v_type == VAR_NUMBER)
2378 f = (double)tvs[idx].vval.v_number;
2379 else
Bram Moolenaar9d00e4a2022-01-05 17:49:15 +00002380 emsg(_(e_expected_float_argument_for_printf));
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002381 }
2382 return f;
2383}
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00002384
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002385#endif
2386
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002387/*
2388 * Return the representation of infinity for printf() function:
2389 * "-inf", "inf", "+inf", " inf", "-INF", "INF", "+INF" or " INF".
2390 */
2391 static const char *
2392infinity_str(int positive,
2393 char fmt_spec,
2394 int force_sign,
2395 int space_for_positive)
2396{
2397 static const char *table[] =
2398 {
2399 "-inf", "inf", "+inf", " inf",
2400 "-INF", "INF", "+INF", " INF"
2401 };
2402 int idx = positive * (1 + force_sign + force_sign * space_for_positive);
2403
2404 if (ASCII_ISUPPER(fmt_spec))
2405 idx += 4;
2406 return table[idx];
2407}
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002408
2409/*
2410 * This code was included to provide a portable vsnprintf() and snprintf().
2411 * Some systems may provide their own, but we always use this one for
2412 * consistency.
2413 *
2414 * This code is based on snprintf.c - a portable implementation of snprintf
2415 * by Mark Martinec <mark.martinec@ijs.si>, Version 2.2, 2000-10-06.
2416 * Included with permission. It was heavily modified to fit in Vim.
2417 * The original code, including useful comments, can be found here:
2418 * http://www.ijs.si/software/snprintf/
2419 *
2420 * This snprintf() only supports the following conversion specifiers:
2421 * s, c, d, u, o, x, X, p (and synonyms: i, D, U, O - see below)
2422 * with flags: '-', '+', ' ', '0' and '#'.
2423 * An asterisk is supported for field width as well as precision.
2424 *
2425 * Limited support for floating point was added: 'f', 'F', 'e', 'E', 'g', 'G'.
2426 *
2427 * Length modifiers 'h' (short int) and 'l' (long int) and 'll' (long long int)
2428 * are supported. NOTE: for 'll' the argument is varnumber_T or uvarnumber_T.
2429 *
2430 * The locale is not used, the string is used as a byte string. This is only
2431 * relevant for double-byte encodings where the second byte may be '%'.
2432 *
2433 * It is permitted for "str_m" to be zero, and it is permitted to specify NULL
2434 * pointer for resulting string argument if "str_m" is zero (as per ISO C99).
2435 *
2436 * The return value is the number of characters which would be generated
2437 * for the given input, excluding the trailing NUL. If this value
2438 * is greater or equal to "str_m", not all characters from the result
2439 * have been stored in str, output bytes beyond the ("str_m"-1) -th character
2440 * are discarded. If "str_m" is greater than zero it is guaranteed
2441 * the resulting string will be NUL-terminated.
2442 */
2443
2444/*
2445 * When va_list is not supported we only define vim_snprintf().
2446 *
2447 * vim_vsnprintf_typval() can be invoked with either "va_list" or a list of
2448 * "typval_T". When the latter is not used it must be NULL.
2449 */
2450
2451// When generating prototypes all of this is skipped, cproto doesn't
2452// understand this.
2453#ifndef PROTO
2454
2455// Like vim_vsnprintf() but append to the string.
2456 int
2457vim_snprintf_add(char *str, size_t str_m, const char *fmt, ...)
2458{
2459 va_list ap;
2460 int str_l;
2461 size_t len = STRLEN(str);
2462 size_t space;
2463
2464 if (str_m <= len)
2465 space = 0;
2466 else
2467 space = str_m - len;
2468 va_start(ap, fmt);
2469 str_l = vim_vsnprintf(str + len, space, fmt, ap);
2470 va_end(ap);
2471 return str_l;
2472}
2473
2474 int
2475vim_snprintf(char *str, size_t str_m, const char *fmt, ...)
2476{
2477 va_list ap;
2478 int str_l;
2479
2480 va_start(ap, fmt);
2481 str_l = vim_vsnprintf(str, str_m, fmt, ap);
2482 va_end(ap);
2483 return str_l;
2484}
2485
2486 int
2487vim_vsnprintf(
2488 char *str,
2489 size_t str_m,
2490 const char *fmt,
2491 va_list ap)
2492{
2493 return vim_vsnprintf_typval(str, str_m, fmt, ap, NULL);
2494}
2495
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002496enum
2497{
2498 TYPE_UNKNOWN = -1,
2499 TYPE_INT,
2500 TYPE_LONGINT,
2501 TYPE_LONGLONGINT,
2502 TYPE_UNSIGNEDINT,
2503 TYPE_UNSIGNEDLONGINT,
2504 TYPE_UNSIGNEDLONGLONGINT,
2505 TYPE_POINTER,
2506 TYPE_PERCENT,
2507 TYPE_CHAR,
2508 TYPE_STRING,
2509 TYPE_FLOAT
2510};
2511
2512/* Types that can be used in a format string
2513 */
zeertzjq7772c932023-08-15 22:48:40 +02002514 static int
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002515format_typeof(
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002516 const char *type)
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002517{
2518 // allowed values: \0, h, l, L
2519 char length_modifier = '\0';
2520
2521 // current conversion specifier character
2522 char fmt_spec = '\0';
2523
2524 // parse 'h', 'l' and 'll' length modifiers
2525 if (*type == 'h' || *type == 'l')
2526 {
2527 length_modifier = *type;
2528 type++;
2529 if (length_modifier == 'l' && *type == 'l')
2530 {
2531 // double l = __int64 / varnumber_T
2532 length_modifier = 'L';
2533 type++;
2534 }
2535 }
2536 fmt_spec = *type;
2537
2538 // common synonyms:
2539 switch (fmt_spec)
2540 {
2541 case 'i': fmt_spec = 'd'; break;
2542 case '*': fmt_spec = 'd'; length_modifier = 'h'; break;
2543 case 'D': fmt_spec = 'd'; length_modifier = 'l'; break;
2544 case 'U': fmt_spec = 'u'; length_modifier = 'l'; break;
2545 case 'O': fmt_spec = 'o'; length_modifier = 'l'; break;
2546 default: break;
2547 }
2548
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002549 // get parameter value, do initial processing
2550 switch (fmt_spec)
2551 {
2552 // '%' and 'c' behave similar to 's' regarding flags and field
2553 // widths
2554 case '%':
2555 return TYPE_PERCENT;
2556
2557 case 'c':
2558 return TYPE_CHAR;
2559
2560 case 's':
2561 case 'S':
2562 return TYPE_STRING;
2563
2564 case 'd': case 'u':
2565 case 'b': case 'B':
2566 case 'o':
2567 case 'x': case 'X':
2568 case 'p':
2569 {
2570 // NOTE: the u, b, o, x, X and p conversion specifiers
2571 // imply the value is unsigned; d implies a signed
2572 // value
2573
2574 // 0 if numeric argument is zero (or if pointer is
2575 // NULL for 'p'), +1 if greater than zero (or nonzero
2576 // for unsigned arguments), -1 if negative (unsigned
2577 // argument is never negative)
2578
2579 if (fmt_spec == 'p')
2580 return TYPE_POINTER;
2581 else if (fmt_spec == 'b' || fmt_spec == 'B')
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002582 return TYPE_UNSIGNEDLONGLONGINT;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002583 else if (fmt_spec == 'd')
2584 {
2585 // signed
2586 switch (length_modifier)
2587 {
2588 case '\0':
2589 case 'h':
2590 // char and short arguments are passed as int.
2591 return TYPE_INT;
2592 case 'l':
2593 return TYPE_LONGINT;
2594 case 'L':
2595 return TYPE_LONGLONGINT;
2596 }
2597 }
2598 else
2599 {
2600 // unsigned
2601 switch (length_modifier)
2602 {
2603 case '\0':
2604 case 'h':
2605 return TYPE_UNSIGNEDINT;
2606 case 'l':
2607 return TYPE_UNSIGNEDLONGINT;
2608 case 'L':
2609 return TYPE_UNSIGNEDLONGLONGINT;
2610 }
2611 }
2612 }
2613 break;
2614
2615 case 'f':
2616 case 'F':
2617 case 'e':
2618 case 'E':
2619 case 'g':
2620 case 'G':
2621 return TYPE_FLOAT;
2622 }
2623
2624 return TYPE_UNKNOWN;
2625}
2626
zeertzjq7772c932023-08-15 22:48:40 +02002627 static char *
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002628format_typename(
2629 const char *type)
2630{
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002631 switch (format_typeof(type))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002632 {
2633 case TYPE_INT:
2634 return _(typename_int);
2635
2636 case TYPE_LONGINT:
2637 return _(typename_longint);
2638
2639 case TYPE_LONGLONGINT:
2640 return _(typename_longlongint);
2641
2642 case TYPE_UNSIGNEDINT:
2643 return _(typename_unsignedint);
2644
2645 case TYPE_UNSIGNEDLONGINT:
2646 return _(typename_unsignedlongint);
2647
2648 case TYPE_UNSIGNEDLONGLONGINT:
2649 return _(typename_unsignedlonglongint);
2650
2651 case TYPE_POINTER:
2652 return _(typename_pointer);
2653
2654 case TYPE_PERCENT:
2655 return _(typename_percent);
2656
2657 case TYPE_CHAR:
2658 return _(typename_char);
2659
2660 case TYPE_STRING:
2661 return _(typename_string);
2662
2663 case TYPE_FLOAT:
2664 return _(typename_float);
2665 }
2666
2667 return _(typename_unknown);
2668}
2669
zeertzjq7772c932023-08-15 22:48:40 +02002670 static int
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002671adjust_types(
2672 const char ***ap_types,
2673 int arg,
2674 int *num_posarg,
2675 const char *type)
2676{
2677 if (*ap_types == NULL || *num_posarg < arg)
2678 {
2679 int idx;
2680 const char **new_types;
2681
2682 if (*ap_types == NULL)
2683 new_types = ALLOC_CLEAR_MULT(const char *, arg);
2684 else
K.Takata4c215ec2023-08-26 18:05:08 +02002685 new_types = vim_realloc((char **)*ap_types,
2686 arg * sizeof(const char *));
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002687
2688 if (new_types == NULL)
2689 return FAIL;
2690
2691 for (idx = *num_posarg; idx < arg; ++idx)
2692 new_types[idx] = NULL;
2693
2694 *ap_types = new_types;
2695 *num_posarg = arg;
2696 }
2697
2698 if ((*ap_types)[arg - 1] != NULL)
2699 {
2700 if ((*ap_types)[arg - 1][0] == '*' || type[0] == '*')
2701 {
2702 const char *pt = type;
2703 if (pt[0] == '*')
2704 pt = (*ap_types)[arg - 1];
2705
2706 if (pt[0] != '*')
2707 {
2708 switch (pt[0])
2709 {
2710 case 'd': case 'i': break;
2711 default:
2712 semsg(_(e_positional_num_field_spec_reused_str_str), arg, format_typename((*ap_types)[arg - 1]), format_typename(type));
2713 return FAIL;
2714 }
2715 }
2716 }
2717 else
2718 {
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002719 if (format_typeof(type) != format_typeof((*ap_types)[arg - 1]))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002720 {
2721 semsg(_( e_positional_arg_num_type_inconsistent_str_str), arg, format_typename(type), format_typename((*ap_types)[arg - 1]));
2722 return FAIL;
2723 }
2724 }
2725 }
2726
2727 (*ap_types)[arg - 1] = type;
2728
2729 return OK;
2730}
2731
Christ van Willegenc35fc032024-03-14 18:30:41 +01002732 static void
2733format_overflow_error(const char *pstart)
2734{
2735 size_t arglen = 0;
2736 char *argcopy = NULL;
2737 const char *p = pstart;
2738
2739 while (VIM_ISDIGIT((int)(*p)))
2740 ++p;
2741
2742 arglen = p - pstart;
2743 argcopy = ALLOC_CLEAR_MULT(char, arglen + 1);
2744 if (argcopy != NULL)
2745 {
2746 strncpy(argcopy, pstart, arglen);
2747 semsg(_( e_val_too_large), argcopy);
2748 free(argcopy);
2749 }
2750 else
2751 semsg(_(e_out_of_memory_allocating_nr_bytes), arglen);
2752}
2753
2754#define MAX_ALLOWED_STRING_WIDTH 6400
2755
2756 static int
2757get_unsigned_int(
2758 const char *pstart,
2759 const char **p,
zeertzjq0dff3152024-07-29 20:28:14 +02002760 unsigned int *uj,
2761 int overflow_err)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002762{
2763 *uj = **p - '0';
2764 ++*p;
2765
2766 while (VIM_ISDIGIT((int)(**p)) && *uj < MAX_ALLOWED_STRING_WIDTH)
2767 {
2768 *uj = 10 * *uj + (unsigned int)(**p - '0');
2769 ++*p;
2770 }
2771
2772 if (*uj > MAX_ALLOWED_STRING_WIDTH)
2773 {
zeertzjq0dff3152024-07-29 20:28:14 +02002774 if (overflow_err)
2775 {
2776 format_overflow_error(pstart);
2777 return FAIL;
2778 }
2779 else
2780 *uj = MAX_ALLOWED_STRING_WIDTH;
Christ van Willegenc35fc032024-03-14 18:30:41 +01002781 }
2782
2783 return OK;
2784}
2785
2786
zeertzjq7772c932023-08-15 22:48:40 +02002787 static int
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002788parse_fmt_types(
2789 const char ***ap_types,
2790 int *num_posarg,
2791 const char *fmt,
2792 typval_T *tvs UNUSED
2793 )
2794{
2795 const char *p = fmt;
2796 const char *arg = NULL;
2797
2798 int any_pos = 0;
2799 int any_arg = 0;
2800 int arg_idx;
2801
2802#define CHECK_POS_ARG do { \
2803 if (any_pos && any_arg) \
2804 { \
2805 semsg(_( e_cannot_mix_positional_and_non_positional_str), fmt); \
2806 goto error; \
2807 } \
2808} while (0);
2809
2810 if (p == NULL)
2811 return OK;
2812
2813 while (*p != NUL)
2814 {
2815 if (*p != '%')
2816 {
2817 char *q = strchr(p + 1, '%');
2818 size_t n = (q == NULL) ? STRLEN(p) : (size_t)(q - p);
2819
2820 p += n;
2821 }
2822 else
2823 {
2824 // allowed values: \0, h, l, L
2825 char length_modifier = '\0';
2826
2827 // variable for positional arg
2828 int pos_arg = -1;
2829 const char *ptype = NULL;
Christ van Willegenc35fc032024-03-14 18:30:41 +01002830 const char *pstart = p+1;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002831
2832 p++; // skip '%'
2833
2834 // First check to see if we find a positional
2835 // argument specifier
2836 ptype = p;
2837
2838 while (VIM_ISDIGIT(*ptype))
2839 ++ptype;
2840
2841 if (*ptype == '$')
2842 {
2843 if (*p == '0')
2844 {
2845 // 0 flag at the wrong place
2846 semsg(_( e_invalid_format_specifier_str), fmt);
2847 goto error;
2848 }
2849
2850 // Positional argument
Christ van Willegenc35fc032024-03-14 18:30:41 +01002851 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002852
zeertzjq0dff3152024-07-29 20:28:14 +02002853 if (get_unsigned_int(pstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002854 goto error;
2855
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002856 pos_arg = uj;
2857
2858 any_pos = 1;
2859 CHECK_POS_ARG;
2860
2861 ++p;
2862 }
2863
2864 // parse flags
2865 while (*p == '0' || *p == '-' || *p == '+' || *p == ' '
2866 || *p == '#' || *p == '\'')
2867 {
2868 switch (*p)
2869 {
2870 case '0': break;
2871 case '-': break;
2872 case '+': break;
2873 case ' ': // If both the ' ' and '+' flags appear, the ' '
2874 // flag should be ignored
2875 break;
2876 case '#': break;
2877 case '\'': break;
2878 }
2879 p++;
2880 }
2881 // If the '0' and '-' flags both appear, the '0' flag should be
2882 // ignored.
2883
2884 // parse field width
2885 if (*(arg = p) == '*')
2886 {
2887 p++;
2888
2889 if (VIM_ISDIGIT((int)(*p)))
2890 {
2891 // Positional argument field width
Christ van Willegenc35fc032024-03-14 18:30:41 +01002892 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002893
zeertzjq0dff3152024-07-29 20:28:14 +02002894 if (get_unsigned_int(arg + 1, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002895 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002896
2897 if (*p != '$')
2898 {
2899 semsg(_( e_invalid_format_specifier_str), fmt);
2900 goto error;
2901 }
2902 else
2903 {
2904 ++p;
2905 any_pos = 1;
2906 CHECK_POS_ARG;
2907
2908 if (adjust_types(ap_types, uj, num_posarg, arg) == FAIL)
2909 goto error;
2910 }
2911 }
2912 else
2913 {
2914 any_arg = 1;
2915 CHECK_POS_ARG;
2916 }
2917 }
dundargoc580c1fc2023-10-06 19:41:14 +02002918 else if (VIM_ISDIGIT((int)(*p)))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002919 {
2920 // size_t could be wider than unsigned int; make sure we treat
2921 // argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01002922 const char *digstart = p;
2923 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002924
zeertzjq0dff3152024-07-29 20:28:14 +02002925 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002926 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002927
2928 if (*p == '$')
2929 {
2930 semsg(_( e_invalid_format_specifier_str), fmt);
2931 goto error;
2932 }
2933 }
2934
2935 // parse precision
2936 if (*p == '.')
2937 {
2938 p++;
2939
2940 if (*(arg = p) == '*')
2941 {
2942 p++;
2943
2944 if (VIM_ISDIGIT((int)(*p)))
2945 {
2946 // Parse precision
Christ van Willegenc35fc032024-03-14 18:30:41 +01002947 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002948
zeertzjq0dff3152024-07-29 20:28:14 +02002949 if (get_unsigned_int(arg + 1, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002950 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002951
2952 if (*p == '$')
2953 {
2954 any_pos = 1;
2955 CHECK_POS_ARG;
2956
2957 ++p;
2958
2959 if (adjust_types(ap_types, uj, num_posarg, arg) == FAIL)
2960 goto error;
2961 }
2962 else
2963 {
2964 semsg(_( e_invalid_format_specifier_str), fmt);
2965 goto error;
2966 }
2967 }
2968 else
2969 {
2970 any_arg = 1;
2971 CHECK_POS_ARG;
2972 }
2973 }
dundargoc580c1fc2023-10-06 19:41:14 +02002974 else if (VIM_ISDIGIT((int)(*p)))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002975 {
2976 // size_t could be wider than unsigned int; make sure we
2977 // treat argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01002978 const char *digstart = p;
2979 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002980
zeertzjq0dff3152024-07-29 20:28:14 +02002981 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002982 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002983
2984 if (*p == '$')
2985 {
2986 semsg(_( e_invalid_format_specifier_str), fmt);
2987 goto error;
2988 }
2989 }
2990 }
2991
2992 if (pos_arg != -1)
2993 {
2994 any_pos = 1;
2995 CHECK_POS_ARG;
2996
2997 ptype = p;
2998 }
2999
3000 // parse 'h', 'l' and 'll' length modifiers
3001 if (*p == 'h' || *p == 'l')
3002 {
3003 length_modifier = *p;
3004 p++;
3005 if (length_modifier == 'l' && *p == 'l')
3006 {
3007 // double l = __int64 / varnumber_T
dundargoc580c1fc2023-10-06 19:41:14 +02003008 // length_modifier = 'L';
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003009 p++;
3010 }
3011 }
3012
3013 switch (*p)
3014 {
3015 // Check for known format specifiers. % is special!
3016 case 'i':
3017 case '*':
3018 case 'd':
3019 case 'u':
3020 case 'o':
3021 case 'D':
3022 case 'U':
3023 case 'O':
3024 case 'x':
3025 case 'X':
3026 case 'b':
3027 case 'B':
3028 case 'c':
3029 case 's':
3030 case 'S':
3031 case 'p':
3032 case 'f':
3033 case 'F':
3034 case 'e':
3035 case 'E':
3036 case 'g':
3037 case 'G':
3038 if (pos_arg != -1)
3039 {
3040 if (adjust_types(ap_types, pos_arg, num_posarg, ptype) == FAIL)
3041 goto error;
3042 }
3043 else
3044 {
3045 any_arg = 1;
3046 CHECK_POS_ARG;
3047 }
3048 break;
3049
3050 default:
3051 if (pos_arg != -1)
3052 {
3053 semsg(_( e_cannot_mix_positional_and_non_positional_str), fmt);
3054 goto error;
3055 }
3056 }
3057
3058 if (*p != NUL)
3059 p++; // step over the just processed conversion specifier
3060 }
3061 }
3062
3063 for (arg_idx = 0; arg_idx < *num_posarg; ++arg_idx)
3064 {
3065 if ((*ap_types)[arg_idx] == NULL)
3066 {
3067 semsg(_(e_fmt_arg_nr_unused_str), arg_idx + 1, fmt);
3068 goto error;
3069 }
3070
3071# if defined(FEAT_EVAL)
3072 if (tvs != NULL && tvs[arg_idx].v_type == VAR_UNKNOWN)
3073 {
3074 semsg(_(e_positional_nr_out_of_bounds_str), arg_idx + 1, fmt);
3075 goto error;
3076 }
3077# endif
3078 }
3079
3080 return OK;
3081
3082error:
K.Takata4c215ec2023-08-26 18:05:08 +02003083 vim_free((char**)*ap_types);
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003084 *ap_types = NULL;
3085 *num_posarg = 0;
3086 return FAIL;
3087}
3088
zeertzjq7772c932023-08-15 22:48:40 +02003089 static void
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003090skip_to_arg(
3091 const char **ap_types,
3092 va_list ap_start,
3093 va_list *ap,
3094 int *arg_idx,
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003095 int *arg_cur,
3096 const char *fmt)
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003097{
3098 int arg_min = 0;
3099
3100 if (*arg_cur + 1 == *arg_idx)
3101 {
3102 ++*arg_cur;
3103 ++*arg_idx;
3104 return;
3105 }
3106
3107 if (*arg_cur >= *arg_idx)
3108 {
3109 // Reset ap to ap_start and skip arg_idx - 1 types
3110 va_end(*ap);
3111 va_copy(*ap, ap_start);
3112 }
3113 else
3114 {
3115 // Skip over any we should skip
3116 arg_min = *arg_cur;
3117 }
3118
3119 for (*arg_cur = arg_min; *arg_cur < *arg_idx - 1; ++*arg_cur)
3120 {
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003121 const char *p;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003122
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003123 if (ap_types == NULL || ap_types[*arg_cur] == NULL)
3124 {
Christ van Willegen1bd2cb12023-09-08 19:17:09 +02003125 siemsg(e_aptypes_is_null_nr_str, *arg_cur, fmt);
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003126 return;
3127 }
3128
3129 p = ap_types[*arg_cur];
3130
3131 int fmt_type = format_typeof(p);
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003132
3133 // get parameter value, do initial processing
3134 switch (fmt_type)
3135 {
3136 case TYPE_PERCENT:
3137 case TYPE_UNKNOWN:
3138 break;
3139
3140 case TYPE_CHAR:
3141 va_arg(*ap, int);
3142 break;
3143
3144 case TYPE_STRING:
3145 va_arg(*ap, char *);
3146 break;
3147
3148 case TYPE_POINTER:
3149 va_arg(*ap, void *);
3150 break;
3151
3152 case TYPE_INT:
3153 va_arg(*ap, int);
3154 break;
3155
3156 case TYPE_LONGINT:
3157 va_arg(*ap, long int);
3158 break;
3159
3160 case TYPE_LONGLONGINT:
3161 va_arg(*ap, varnumber_T);
3162 break;
3163
3164 case TYPE_UNSIGNEDINT:
3165 va_arg(*ap, unsigned int);
3166 break;
3167
3168 case TYPE_UNSIGNEDLONGINT:
3169 va_arg(*ap, unsigned long int);
3170 break;
3171
3172 case TYPE_UNSIGNEDLONGLONGINT:
3173 va_arg(*ap, uvarnumber_T);
3174 break;
3175
3176 case TYPE_FLOAT:
3177 va_arg(*ap, double);
3178 break;
3179 }
3180 }
3181
3182 // Because we know that after we return from this call,
3183 // a va_arg() call is made, we can pre-emptively
3184 // increment the current argument index.
3185 ++*arg_cur;
3186 ++*arg_idx;
3187
3188 return;
3189}
3190
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003191 int
3192vim_vsnprintf_typval(
3193 char *str,
3194 size_t str_m,
3195 const char *fmt,
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003196 va_list ap_start,
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003197 typval_T *tvs)
3198{
3199 size_t str_l = 0;
3200 const char *p = fmt;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003201 int arg_cur = 0;
3202 int num_posarg = 0;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003203 int arg_idx = 1;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003204 va_list ap;
3205 const char **ap_types = NULL;
3206
3207 if (parse_fmt_types(&ap_types, &num_posarg, fmt, tvs) == FAIL)
3208 return 0;
3209
3210 va_copy(ap, ap_start);
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003211
3212 if (p == NULL)
3213 p = "";
3214 while (*p != NUL)
3215 {
3216 if (*p != '%')
3217 {
3218 char *q = strchr(p + 1, '%');
3219 size_t n = (q == NULL) ? STRLEN(p) : (size_t)(q - p);
3220
3221 // Copy up to the next '%' or NUL without any changes.
3222 if (str_l < str_m)
3223 {
3224 size_t avail = str_m - str_l;
3225
3226 mch_memmove(str + str_l, p, n > avail ? avail : n);
3227 }
3228 p += n;
3229 str_l += n;
3230 }
3231 else
3232 {
3233 size_t min_field_width = 0, precision = 0;
3234 int zero_padding = 0, precision_specified = 0, justify_left = 0;
3235 int alternate_form = 0, force_sign = 0;
3236
3237 // If both the ' ' and '+' flags appear, the ' ' flag should be
3238 // ignored.
3239 int space_for_positive = 1;
3240
3241 // allowed values: \0, h, l, L
3242 char length_modifier = '\0';
3243
3244 // temporary buffer for simple numeric->string conversion
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003245# define TMP_LEN 350 // On my system 1e308 is the biggest number possible.
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003246 // That sounds reasonable to use as the maximum
3247 // printable.
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003248 char tmp[TMP_LEN];
3249
3250 // string address in case of string argument
3251 const char *str_arg = NULL;
3252
3253 // natural field width of arg without padding and sign
3254 size_t str_arg_l;
3255
3256 // unsigned char argument value - only defined for c conversion.
3257 // N.B. standard explicitly states the char argument for the c
3258 // conversion is unsigned
3259 unsigned char uchar_arg;
3260
3261 // number of zeros to be inserted for numeric conversions as
3262 // required by the precision or minimal field width
3263 size_t number_of_zeros_to_pad = 0;
3264
3265 // index into tmp where zero padding is to be inserted
3266 size_t zero_padding_insertion_ind = 0;
3267
3268 // current conversion specifier character
3269 char fmt_spec = '\0';
3270
3271 // buffer for 's' and 'S' specs
3272 char_u *tofree = NULL;
3273
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003274 // variables for positional arg
3275 int pos_arg = -1;
3276 const char *ptype;
3277
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003278
3279 p++; // skip '%'
3280
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003281 // First check to see if we find a positional
3282 // argument specifier
3283 ptype = p;
3284
3285 while (VIM_ISDIGIT(*ptype))
3286 ++ptype;
3287
3288 if (*ptype == '$')
3289 {
3290 // Positional argument
Christ van Willegenc35fc032024-03-14 18:30:41 +01003291 const char *digstart = p;
3292 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003293
zeertzjq0dff3152024-07-29 20:28:14 +02003294 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003295 goto error;
3296
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003297 pos_arg = uj;
3298
3299 ++p;
3300 }
3301
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003302 // parse flags
3303 while (*p == '0' || *p == '-' || *p == '+' || *p == ' '
3304 || *p == '#' || *p == '\'')
3305 {
3306 switch (*p)
3307 {
3308 case '0': zero_padding = 1; break;
3309 case '-': justify_left = 1; break;
3310 case '+': force_sign = 1; space_for_positive = 0; break;
3311 case ' ': force_sign = 1;
3312 // If both the ' ' and '+' flags appear, the ' '
3313 // flag should be ignored
3314 break;
3315 case '#': alternate_form = 1; break;
3316 case '\'': break;
3317 }
3318 p++;
3319 }
3320 // If the '0' and '-' flags both appear, the '0' flag should be
3321 // ignored.
3322
3323 // parse field width
3324 if (*p == '*')
3325 {
3326 int j;
Christ van Willegenc35fc032024-03-14 18:30:41 +01003327 const char *digstart = p + 1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003328
3329 p++;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003330
3331 if (VIM_ISDIGIT((int)(*p)))
3332 {
3333 // Positional argument field width
Christ van Willegenc35fc032024-03-14 18:30:41 +01003334 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003335
zeertzjq0dff3152024-07-29 20:28:14 +02003336 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003337 goto error;
3338
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003339 arg_idx = uj;
3340
3341 ++p;
3342 }
3343
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003344 j =
3345# if defined(FEAT_EVAL)
3346 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3347# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003348 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3349 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003350 va_arg(ap, int));
3351
Christ van Willegenc35fc032024-03-14 18:30:41 +01003352 if (j > MAX_ALLOWED_STRING_WIDTH)
3353 {
zeertzjq0dff3152024-07-29 20:28:14 +02003354 if (tvs != NULL)
3355 {
3356 format_overflow_error(digstart);
3357 goto error;
3358 }
3359 else
3360 j = MAX_ALLOWED_STRING_WIDTH;
Christ van Willegenc35fc032024-03-14 18:30:41 +01003361 }
3362
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003363 if (j >= 0)
3364 min_field_width = j;
3365 else
3366 {
3367 min_field_width = -j;
3368 justify_left = 1;
3369 }
3370 }
3371 else if (VIM_ISDIGIT((int)(*p)))
3372 {
3373 // size_t could be wider than unsigned int; make sure we treat
3374 // argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01003375 const char *digstart = p;
3376 unsigned int uj;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003377
zeertzjq0dff3152024-07-29 20:28:14 +02003378 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003379 goto error;
3380
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003381 min_field_width = uj;
3382 }
3383
3384 // parse precision
3385 if (*p == '.')
3386 {
3387 p++;
3388 precision_specified = 1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003389
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003390 if (VIM_ISDIGIT((int)(*p)))
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003391 {
3392 // size_t could be wider than unsigned int; make sure we
3393 // treat argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01003394 const char *digstart = p;
3395 unsigned int uj;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003396
zeertzjq0dff3152024-07-29 20:28:14 +02003397 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003398 goto error;
3399
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003400 precision = uj;
3401 }
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003402 else if (*p == '*')
3403 {
3404 int j;
Christ van Willegenc35fc032024-03-14 18:30:41 +01003405 const char *digstart = p;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003406
3407 p++;
3408
3409 if (VIM_ISDIGIT((int)(*p)))
3410 {
3411 // positional argument
Christ van Willegenc35fc032024-03-14 18:30:41 +01003412 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003413
zeertzjq0dff3152024-07-29 20:28:14 +02003414 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003415 goto error;
3416
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003417 arg_idx = uj;
3418
3419 ++p;
3420 }
3421
3422 j =
3423# if defined(FEAT_EVAL)
3424 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3425# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003426 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3427 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003428 va_arg(ap, int));
3429
Christ van Willegenc35fc032024-03-14 18:30:41 +01003430 if (j > MAX_ALLOWED_STRING_WIDTH)
3431 {
zeertzjq0dff3152024-07-29 20:28:14 +02003432 if (tvs != NULL)
3433 {
3434 format_overflow_error(digstart);
3435 goto error;
3436 }
3437 else
3438 j = MAX_ALLOWED_STRING_WIDTH;
Christ van Willegenc35fc032024-03-14 18:30:41 +01003439 }
3440
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003441 if (j >= 0)
3442 precision = j;
3443 else
3444 {
3445 precision_specified = 0;
3446 precision = 0;
3447 }
3448 }
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003449 }
3450
3451 // parse 'h', 'l' and 'll' length modifiers
3452 if (*p == 'h' || *p == 'l')
3453 {
3454 length_modifier = *p;
3455 p++;
3456 if (length_modifier == 'l' && *p == 'l')
3457 {
3458 // double l = __int64 / varnumber_T
3459 length_modifier = 'L';
3460 p++;
3461 }
3462 }
3463 fmt_spec = *p;
3464
3465 // common synonyms:
3466 switch (fmt_spec)
3467 {
3468 case 'i': fmt_spec = 'd'; break;
3469 case 'D': fmt_spec = 'd'; length_modifier = 'l'; break;
3470 case 'U': fmt_spec = 'u'; length_modifier = 'l'; break;
3471 case 'O': fmt_spec = 'o'; length_modifier = 'l'; break;
3472 default: break;
3473 }
3474
3475# if defined(FEAT_EVAL)
3476 switch (fmt_spec)
3477 {
3478 case 'd': case 'u': case 'o': case 'x': case 'X':
3479 if (tvs != NULL && length_modifier == '\0')
3480 length_modifier = 'L';
3481 }
3482# endif
3483
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003484 if (pos_arg != -1)
3485 arg_idx = pos_arg;
3486
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003487 // get parameter value, do initial processing
3488 switch (fmt_spec)
3489 {
3490 // '%' and 'c' behave similar to 's' regarding flags and field
3491 // widths
3492 case '%':
3493 case 'c':
3494 case 's':
3495 case 'S':
3496 str_arg_l = 1;
3497 switch (fmt_spec)
3498 {
3499 case '%':
3500 str_arg = p;
3501 break;
3502
3503 case 'c':
3504 {
3505 int j;
3506
3507 j =
3508# if defined(FEAT_EVAL)
3509 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3510# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003511 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3512 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003513 va_arg(ap, int));
3514
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003515 // standard demands unsigned char
3516 uchar_arg = (unsigned char)j;
3517 str_arg = (char *)&uchar_arg;
3518 break;
3519 }
3520
3521 case 's':
3522 case 'S':
3523 str_arg =
3524# if defined(FEAT_EVAL)
3525 tvs != NULL ? tv_str(tvs, &arg_idx, &tofree) :
3526# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003527 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3528 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003529 va_arg(ap, char *));
3530
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003531 if (str_arg == NULL)
3532 {
3533 str_arg = "[NULL]";
3534 str_arg_l = 6;
3535 }
3536 // make sure not to address string beyond the specified
3537 // precision !!!
3538 else if (!precision_specified)
3539 str_arg_l = strlen(str_arg);
3540 // truncate string if necessary as requested by precision
3541 else if (precision == 0)
3542 str_arg_l = 0;
3543 else
3544 {
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003545 // memchr on HP does not like n > 2^31 !!!
3546 char *q = memchr(str_arg, '\0',
3547 precision <= (size_t)0x7fffffffL ? precision
3548 : (size_t)0x7fffffffL);
presukud85fccd2021-11-20 19:38:31 +00003549
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003550 str_arg_l = (q == NULL) ? precision
3551 : (size_t)(q - str_arg);
3552 }
3553 if (fmt_spec == 'S')
3554 {
presuku1f2453f2021-11-24 15:32:57 +00003555 char_u *p1;
3556 size_t i;
3557 int cell;
presukud85fccd2021-11-20 19:38:31 +00003558
presuku1f2453f2021-11-24 15:32:57 +00003559 for (i = 0, p1 = (char_u *)str_arg; *p1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003560 p1 += mb_ptr2len(p1))
presuku1f2453f2021-11-24 15:32:57 +00003561 {
3562 cell = mb_ptr2cells(p1);
3563 if (precision_specified && i + cell > precision)
3564 break;
3565 i += cell;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003566 }
presuku1f2453f2021-11-24 15:32:57 +00003567
3568 str_arg_l = p1 - (char_u *)str_arg;
presukud85fccd2021-11-20 19:38:31 +00003569 if (min_field_width != 0)
presuku1f2453f2021-11-24 15:32:57 +00003570 min_field_width += str_arg_l - i;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003571 }
3572 break;
3573
3574 default:
3575 break;
3576 }
3577 break;
3578
3579 case 'd': case 'u':
3580 case 'b': case 'B':
3581 case 'o':
3582 case 'x': case 'X':
3583 case 'p':
3584 {
3585 // NOTE: the u, b, o, x, X and p conversion specifiers
3586 // imply the value is unsigned; d implies a signed
3587 // value
3588
3589 // 0 if numeric argument is zero (or if pointer is
3590 // NULL for 'p'), +1 if greater than zero (or nonzero
3591 // for unsigned arguments), -1 if negative (unsigned
3592 // argument is never negative)
3593 int arg_sign = 0;
3594
3595 // only set for length modifier h, or for no length
3596 // modifiers
3597 int int_arg = 0;
3598 unsigned int uint_arg = 0;
3599
3600 // only set for length modifier l
3601 long int long_arg = 0;
3602 unsigned long int ulong_arg = 0;
3603
3604 // only set for length modifier ll
3605 varnumber_T llong_arg = 0;
3606 uvarnumber_T ullong_arg = 0;
3607
3608 // only set for b conversion
3609 uvarnumber_T bin_arg = 0;
3610
3611 // pointer argument value -only defined for p
3612 // conversion
3613 void *ptr_arg = NULL;
3614
3615 if (fmt_spec == 'p')
3616 {
3617 length_modifier = '\0';
3618 ptr_arg =
3619# if defined(FEAT_EVAL)
3620 tvs != NULL ? (void *)tv_str(tvs, &arg_idx,
3621 NULL) :
3622# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003623 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3624 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003625 va_arg(ap, void *));
3626
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003627 if (ptr_arg != NULL)
3628 arg_sign = 1;
3629 }
3630 else if (fmt_spec == 'b' || fmt_spec == 'B')
3631 {
3632 bin_arg =
3633# if defined(FEAT_EVAL)
3634 tvs != NULL ?
3635 (uvarnumber_T)tv_nr(tvs, &arg_idx) :
3636# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003637 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3638 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003639 va_arg(ap, uvarnumber_T));
3640
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003641 if (bin_arg != 0)
3642 arg_sign = 1;
3643 }
3644 else if (fmt_spec == 'd')
3645 {
3646 // signed
3647 switch (length_modifier)
3648 {
3649 case '\0':
3650 case 'h':
3651 // char and short arguments are passed as int.
3652 int_arg =
3653# if defined(FEAT_EVAL)
3654 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3655# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003656 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3657 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003658 va_arg(ap, int));
3659
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003660 if (int_arg > 0)
3661 arg_sign = 1;
3662 else if (int_arg < 0)
3663 arg_sign = -1;
3664 break;
3665 case 'l':
3666 long_arg =
3667# if defined(FEAT_EVAL)
3668 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3669# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003670 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3671 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003672 va_arg(ap, long int));
3673
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003674 if (long_arg > 0)
3675 arg_sign = 1;
3676 else if (long_arg < 0)
3677 arg_sign = -1;
3678 break;
3679 case 'L':
3680 llong_arg =
3681# if defined(FEAT_EVAL)
3682 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3683# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003684 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3685 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003686 va_arg(ap, varnumber_T));
3687
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003688 if (llong_arg > 0)
3689 arg_sign = 1;
3690 else if (llong_arg < 0)
3691 arg_sign = -1;
3692 break;
3693 }
3694 }
3695 else
3696 {
3697 // unsigned
3698 switch (length_modifier)
3699 {
3700 case '\0':
3701 case 'h':
3702 uint_arg =
3703# if defined(FEAT_EVAL)
3704 tvs != NULL ? (unsigned)
3705 tv_nr(tvs, &arg_idx) :
3706# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003707 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3708 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003709 va_arg(ap, unsigned int));
3710
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003711 if (uint_arg != 0)
3712 arg_sign = 1;
3713 break;
3714 case 'l':
3715 ulong_arg =
3716# if defined(FEAT_EVAL)
3717 tvs != NULL ? (unsigned long)
3718 tv_nr(tvs, &arg_idx) :
3719# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003720 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3721 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003722 va_arg(ap, unsigned long int));
3723
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003724 if (ulong_arg != 0)
3725 arg_sign = 1;
3726 break;
3727 case 'L':
3728 ullong_arg =
3729# if defined(FEAT_EVAL)
3730 tvs != NULL ? (uvarnumber_T)
3731 tv_nr(tvs, &arg_idx) :
3732# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003733 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3734 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003735 va_arg(ap, uvarnumber_T));
3736
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003737 if (ullong_arg != 0)
3738 arg_sign = 1;
3739 break;
3740 }
3741 }
3742
3743 str_arg = tmp;
3744 str_arg_l = 0;
3745
3746 // NOTE:
3747 // For d, i, u, o, x, and X conversions, if precision is
3748 // specified, the '0' flag should be ignored. This is so
3749 // with Solaris 2.6, Digital UNIX 4.0, HPUX 10, Linux,
3750 // FreeBSD, NetBSD; but not with Perl.
3751 if (precision_specified)
3752 zero_padding = 0;
3753 if (fmt_spec == 'd')
3754 {
3755 if (force_sign && arg_sign >= 0)
3756 tmp[str_arg_l++] = space_for_positive ? ' ' : '+';
3757 // leave negative numbers for sprintf to handle, to
3758 // avoid handling tricky cases like (short int)-32768
3759 }
3760 else if (alternate_form)
3761 {
3762 if (arg_sign != 0
3763 && (fmt_spec == 'b' || fmt_spec == 'B'
3764 || fmt_spec == 'x' || fmt_spec == 'X') )
3765 {
3766 tmp[str_arg_l++] = '0';
3767 tmp[str_arg_l++] = fmt_spec;
3768 }
3769 // alternate form should have no effect for p
3770 // conversion, but ...
3771 }
3772
3773 zero_padding_insertion_ind = str_arg_l;
3774 if (!precision_specified)
3775 precision = 1; // default precision is 1
3776 if (precision == 0 && arg_sign == 0)
3777 {
3778 // When zero value is formatted with an explicit
3779 // precision 0, the resulting formatted string is
3780 // empty (d, i, u, b, B, o, x, X, p).
3781 }
3782 else
3783 {
3784 char f[6];
3785 int f_l = 0;
3786
3787 // construct a simple format string for sprintf
3788 f[f_l++] = '%';
3789 if (!length_modifier)
3790 ;
3791 else if (length_modifier == 'L')
3792 {
3793# ifdef MSWIN
3794 f[f_l++] = 'I';
3795 f[f_l++] = '6';
3796 f[f_l++] = '4';
3797# else
3798 f[f_l++] = 'l';
3799 f[f_l++] = 'l';
3800# endif
3801 }
3802 else
3803 f[f_l++] = length_modifier;
3804 f[f_l++] = fmt_spec;
3805 f[f_l++] = '\0';
3806
3807 if (fmt_spec == 'p')
3808 str_arg_l += sprintf(tmp + str_arg_l, f, ptr_arg);
3809 else if (fmt_spec == 'b' || fmt_spec == 'B')
3810 {
3811 char b[8 * sizeof(uvarnumber_T)];
3812 size_t b_l = 0;
3813 uvarnumber_T bn = bin_arg;
3814
3815 do
3816 {
3817 b[sizeof(b) - ++b_l] = '0' + (bn & 0x1);
3818 bn >>= 1;
3819 }
3820 while (bn != 0);
3821
3822 memcpy(tmp + str_arg_l, b + sizeof(b) - b_l, b_l);
3823 str_arg_l += b_l;
3824 }
3825 else if (fmt_spec == 'd')
3826 {
3827 // signed
3828 switch (length_modifier)
3829 {
3830 case '\0': str_arg_l += sprintf(
3831 tmp + str_arg_l, f,
3832 int_arg);
3833 break;
3834 case 'h': str_arg_l += sprintf(
3835 tmp + str_arg_l, f,
3836 (short)int_arg);
3837 break;
3838 case 'l': str_arg_l += sprintf(
3839 tmp + str_arg_l, f, long_arg);
3840 break;
3841 case 'L': str_arg_l += sprintf(
3842 tmp + str_arg_l, f, llong_arg);
3843 break;
3844 }
3845 }
3846 else
3847 {
3848 // unsigned
3849 switch (length_modifier)
3850 {
3851 case '\0': str_arg_l += sprintf(
3852 tmp + str_arg_l, f,
3853 uint_arg);
3854 break;
3855 case 'h': str_arg_l += sprintf(
3856 tmp + str_arg_l, f,
3857 (unsigned short)uint_arg);
3858 break;
3859 case 'l': str_arg_l += sprintf(
3860 tmp + str_arg_l, f, ulong_arg);
3861 break;
3862 case 'L': str_arg_l += sprintf(
3863 tmp + str_arg_l, f, ullong_arg);
3864 break;
3865 }
3866 }
3867
3868 // include the optional minus sign and possible
3869 // "0x" in the region before the zero padding
3870 // insertion point
3871 if (zero_padding_insertion_ind < str_arg_l
3872 && tmp[zero_padding_insertion_ind] == '-')
3873 zero_padding_insertion_ind++;
3874 if (zero_padding_insertion_ind + 1 < str_arg_l
3875 && tmp[zero_padding_insertion_ind] == '0'
3876 && (tmp[zero_padding_insertion_ind + 1] == 'x'
3877 || tmp[zero_padding_insertion_ind + 1] == 'X'))
3878 zero_padding_insertion_ind += 2;
3879 }
3880
3881 {
3882 size_t num_of_digits = str_arg_l
3883 - zero_padding_insertion_ind;
3884
3885 if (alternate_form && fmt_spec == 'o'
3886 // unless zero is already the first
3887 // character
3888 && !(zero_padding_insertion_ind < str_arg_l
3889 && tmp[zero_padding_insertion_ind] == '0'))
3890 {
3891 // assure leading zero for alternate-form
3892 // octal numbers
3893 if (!precision_specified
3894 || precision < num_of_digits + 1)
3895 {
3896 // precision is increased to force the
3897 // first character to be zero, except if a
3898 // zero value is formatted with an
3899 // explicit precision of zero
3900 precision = num_of_digits + 1;
3901 }
3902 }
3903 // zero padding to specified precision?
3904 if (num_of_digits < precision)
3905 number_of_zeros_to_pad = precision - num_of_digits;
3906 }
3907 // zero padding to specified minimal field width?
3908 if (!justify_left && zero_padding)
3909 {
3910 int n = (int)(min_field_width - (str_arg_l
3911 + number_of_zeros_to_pad));
3912 if (n > 0)
3913 number_of_zeros_to_pad += n;
3914 }
3915 break;
3916 }
3917
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003918 case 'f':
3919 case 'F':
3920 case 'e':
3921 case 'E':
3922 case 'g':
3923 case 'G':
3924 {
3925 // Floating point.
3926 double f;
3927 double abs_f;
3928 char format[40];
3929 int l;
3930 int remove_trailing_zeroes = FALSE;
3931
3932 f =
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003933# if defined(FEAT_EVAL)
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003934 tvs != NULL ? tv_float(tvs, &arg_idx) :
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003935# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003936 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3937 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003938 va_arg(ap, double));
3939
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003940 abs_f = f < 0 ? -f : f;
3941
3942 if (fmt_spec == 'g' || fmt_spec == 'G')
3943 {
3944 // Would be nice to use %g directly, but it prints
3945 // "1.0" as "1", we don't want that.
3946 if ((abs_f >= 0.001 && abs_f < 10000000.0)
3947 || abs_f == 0.0)
3948 fmt_spec = ASCII_ISUPPER(fmt_spec) ? 'F' : 'f';
3949 else
3950 fmt_spec = fmt_spec == 'g' ? 'e' : 'E';
3951 remove_trailing_zeroes = TRUE;
3952 }
3953
3954 if ((fmt_spec == 'f' || fmt_spec == 'F') &&
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003955# ifdef VAX
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003956 abs_f > 1.0e38
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003957# else
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003958 abs_f > 1.0e307
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003959# endif
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003960 )
3961 {
3962 // Avoid a buffer overflow
3963 STRCPY(tmp, infinity_str(f > 0.0, fmt_spec,
3964 force_sign, space_for_positive));
3965 str_arg_l = STRLEN(tmp);
3966 zero_padding = 0;
3967 }
3968 else
3969 {
3970 if (isnan(f))
3971 {
3972 // Not a number: nan or NAN
3973 STRCPY(tmp, ASCII_ISUPPER(fmt_spec) ? "NAN"
3974 : "nan");
3975 str_arg_l = 3;
3976 zero_padding = 0;
3977 }
3978 else if (isinf(f))
3979 {
3980 STRCPY(tmp, infinity_str(f > 0.0, fmt_spec,
3981 force_sign, space_for_positive));
3982 str_arg_l = STRLEN(tmp);
3983 zero_padding = 0;
3984 }
3985 else
3986 {
3987 // Regular float number
3988 format[0] = '%';
3989 l = 1;
3990 if (force_sign)
3991 format[l++] = space_for_positive ? ' ' : '+';
3992 if (precision_specified)
3993 {
3994 size_t max_prec = TMP_LEN - 10;
3995
3996 // Make sure we don't get more digits than we
3997 // have room for.
3998 if ((fmt_spec == 'f' || fmt_spec == 'F')
3999 && abs_f > 1.0)
4000 max_prec -= (size_t)log10(abs_f);
4001 if (precision > max_prec)
4002 precision = max_prec;
4003 l += sprintf(format + l, ".%d", (int)precision);
4004 }
4005 format[l] = fmt_spec == 'F' ? 'f' : fmt_spec;
4006 format[l + 1] = NUL;
4007
4008 str_arg_l = sprintf(tmp, format, f);
4009 }
4010
4011 if (remove_trailing_zeroes)
4012 {
4013 int i;
4014 char *tp;
4015
4016 // Using %g or %G: remove superfluous zeroes.
4017 if (fmt_spec == 'f' || fmt_spec == 'F')
4018 tp = tmp + str_arg_l - 1;
4019 else
4020 {
4021 tp = (char *)vim_strchr((char_u *)tmp,
4022 fmt_spec == 'e' ? 'e' : 'E');
4023 if (tp != NULL)
4024 {
4025 // Remove superfluous '+' and leading
4026 // zeroes from the exponent.
4027 if (tp[1] == '+')
4028 {
4029 // Change "1.0e+07" to "1.0e07"
4030 STRMOVE(tp + 1, tp + 2);
4031 --str_arg_l;
4032 }
4033 i = (tp[1] == '-') ? 2 : 1;
4034 while (tp[i] == '0')
4035 {
4036 // Change "1.0e07" to "1.0e7"
4037 STRMOVE(tp + i, tp + i + 1);
4038 --str_arg_l;
4039 }
4040 --tp;
4041 }
4042 }
4043
4044 if (tp != NULL && !precision_specified)
4045 // Remove trailing zeroes, but keep the one
4046 // just after a dot.
4047 while (tp > tmp + 2 && *tp == '0'
4048 && tp[-1] != '.')
4049 {
4050 STRMOVE(tp, tp + 1);
4051 --tp;
4052 --str_arg_l;
4053 }
4054 }
4055 else
4056 {
4057 char *tp;
4058
4059 // Be consistent: some printf("%e") use 1.0e+12
4060 // and some 1.0e+012. Remove one zero in the last
4061 // case.
4062 tp = (char *)vim_strchr((char_u *)tmp,
4063 fmt_spec == 'e' ? 'e' : 'E');
4064 if (tp != NULL && (tp[1] == '+' || tp[1] == '-')
4065 && tp[2] == '0'
4066 && vim_isdigit(tp[3])
4067 && vim_isdigit(tp[4]))
4068 {
4069 STRMOVE(tp + 2, tp + 3);
4070 --str_arg_l;
4071 }
4072 }
4073 }
4074 if (zero_padding && min_field_width > str_arg_l
4075 && (tmp[0] == '-' || force_sign))
4076 {
4077 // padding 0's should be inserted after the sign
4078 number_of_zeros_to_pad = min_field_width - str_arg_l;
4079 zero_padding_insertion_ind = 1;
4080 }
4081 str_arg = tmp;
4082 break;
4083 }
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02004084
4085 default:
4086 // unrecognized conversion specifier, keep format string
4087 // as-is
4088 zero_padding = 0; // turn zero padding off for non-numeric
4089 // conversion
4090 justify_left = 1;
4091 min_field_width = 0; // reset flags
4092
4093 // discard the unrecognized conversion, just keep *
4094 // the unrecognized conversion character
4095 str_arg = p;
4096 str_arg_l = 0;
4097 if (*p != NUL)
4098 str_arg_l++; // include invalid conversion specifier
4099 // unchanged if not at end-of-string
4100 break;
4101 }
4102
4103 if (*p != NUL)
4104 p++; // step over the just processed conversion specifier
4105
4106 // insert padding to the left as requested by min_field_width;
4107 // this does not include the zero padding in case of numerical
4108 // conversions
4109 if (!justify_left)
4110 {
4111 // left padding with blank or zero
4112 int pn = (int)(min_field_width - (str_arg_l + number_of_zeros_to_pad));
4113
4114 if (pn > 0)
4115 {
4116 if (str_l < str_m)
4117 {
4118 size_t avail = str_m - str_l;
4119
4120 vim_memset(str + str_l, zero_padding ? '0' : ' ',
4121 (size_t)pn > avail ? avail
4122 : (size_t)pn);
4123 }
4124 str_l += pn;
4125 }
4126 }
4127
4128 // zero padding as requested by the precision or by the minimal
4129 // field width for numeric conversions required?
4130 if (number_of_zeros_to_pad == 0)
4131 {
4132 // will not copy first part of numeric right now, *
4133 // force it to be copied later in its entirety
4134 zero_padding_insertion_ind = 0;
4135 }
4136 else
4137 {
4138 // insert first part of numerics (sign or '0x') before zero
4139 // padding
4140 int zn = (int)zero_padding_insertion_ind;
4141
4142 if (zn > 0)
4143 {
4144 if (str_l < str_m)
4145 {
4146 size_t avail = str_m - str_l;
4147
4148 mch_memmove(str + str_l, str_arg,
4149 (size_t)zn > avail ? avail
4150 : (size_t)zn);
4151 }
4152 str_l += zn;
4153 }
4154
4155 // insert zero padding as requested by the precision or min
4156 // field width
4157 zn = (int)number_of_zeros_to_pad;
4158 if (zn > 0)
4159 {
4160 if (str_l < str_m)
4161 {
4162 size_t avail = str_m - str_l;
4163
4164 vim_memset(str + str_l, '0',
4165 (size_t)zn > avail ? avail
4166 : (size_t)zn);
4167 }
4168 str_l += zn;
4169 }
4170 }
4171
4172 // insert formatted string
4173 // (or as-is conversion specifier for unknown conversions)
4174 {
4175 int sn = (int)(str_arg_l - zero_padding_insertion_ind);
4176
4177 if (sn > 0)
4178 {
4179 if (str_l < str_m)
4180 {
4181 size_t avail = str_m - str_l;
4182
4183 mch_memmove(str + str_l,
4184 str_arg + zero_padding_insertion_ind,
4185 (size_t)sn > avail ? avail : (size_t)sn);
4186 }
4187 str_l += sn;
4188 }
4189 }
4190
4191 // insert right padding
4192 if (justify_left)
4193 {
4194 // right blank padding to the field width
4195 int pn = (int)(min_field_width
4196 - (str_arg_l + number_of_zeros_to_pad));
4197
4198 if (pn > 0)
4199 {
4200 if (str_l < str_m)
4201 {
4202 size_t avail = str_m - str_l;
4203
4204 vim_memset(str + str_l, ' ',
4205 (size_t)pn > avail ? avail
4206 : (size_t)pn);
4207 }
4208 str_l += pn;
4209 }
4210 }
4211 vim_free(tofree);
4212 }
4213 }
4214
4215 if (str_m > 0)
4216 {
4217 // make sure the string is nul-terminated even at the expense of
4218 // overwriting the last character (shouldn't happen, but just in case)
4219 //
4220 str[str_l <= str_m - 1 ? str_l : str_m - 1] = '\0';
4221 }
4222
Christ van Willegen0c6181f2023-08-13 18:03:14 +02004223 if (tvs != NULL && tvs[num_posarg != 0 ? num_posarg : arg_idx - 1].v_type != VAR_UNKNOWN)
Bram Moolenaar677658a2022-01-05 16:09:06 +00004224 emsg(_(e_too_many_arguments_to_printf));
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02004225
Christ van Willegenc35fc032024-03-14 18:30:41 +01004226error:
K.Takata4c215ec2023-08-26 18:05:08 +02004227 vim_free((char*)ap_types);
Christ van Willegen0c6181f2023-08-13 18:03:14 +02004228 va_end(ap);
4229
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02004230 // Return the number of characters formatted (excluding trailing nul
4231 // character), that is, the number of characters that would have been
4232 // written to the buffer if it were large enough.
4233 return (int)str_l;
4234}
4235
4236#endif // PROTO