blob: c356c36665583f09f4cf5d6561cbc1e4bbcd0e70 [file] [log] [blame]
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001/* vi:set ts=8 sts=4 sw=4 noet:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * strings.c: string manipulation functions
12 */
13
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +020014#define USING_FLOAT_STUFF
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020015#include "vim.h"
16
17/*
18 * Copy "string" into newly allocated memory.
19 */
20 char_u *
21vim_strsave(char_u *string)
22{
23 char_u *p;
24 size_t len;
25
26 len = STRLEN(string) + 1;
27 p = alloc(len);
28 if (p != NULL)
29 mch_memmove(p, string, len);
30 return p;
31}
32
33/*
34 * Copy up to "len" bytes of "string" into newly allocated memory and
35 * terminate with a NUL.
36 * The allocated memory always has size "len + 1", also when "string" is
37 * shorter.
38 */
39 char_u *
40vim_strnsave(char_u *string, size_t len)
41{
42 char_u *p;
43
44 p = alloc(len + 1);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +000045 if (p == NULL)
46 return NULL;
47
48 STRNCPY(p, string, len);
49 p[len] = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020050 return p;
51}
52
53/*
54 * Same as vim_strsave(), but any characters found in esc_chars are preceded
55 * by a backslash.
56 */
57 char_u *
58vim_strsave_escaped(char_u *string, char_u *esc_chars)
59{
60 return vim_strsave_escaped_ext(string, esc_chars, '\\', FALSE);
61}
62
63/*
64 * Same as vim_strsave_escaped(), but when "bsl" is TRUE also escape
65 * characters where rem_backslash() would remove the backslash.
66 * Escape the characters with "cc".
67 */
68 char_u *
69vim_strsave_escaped_ext(
70 char_u *string,
71 char_u *esc_chars,
72 int cc,
73 int bsl)
74{
75 char_u *p;
76 char_u *p2;
77 char_u *escaped_string;
78 unsigned length;
79 int l;
80
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +020081 // First count the number of backslashes required.
82 // Then allocate the memory and insert them.
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020083 length = 1; // count the trailing NUL
84 for (p = string; *p; p++)
85 {
86 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
87 {
88 length += l; // count a multibyte char
89 p += l - 1;
90 continue;
91 }
92 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
93 ++length; // count a backslash
94 ++length; // count an ordinary char
95 }
96 escaped_string = alloc(length);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +000097 if (escaped_string == NULL)
98 return NULL;
99 p2 = escaped_string;
100 for (p = string; *p; p++)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200101 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000102 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200103 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000104 mch_memmove(p2, p, (size_t)l);
105 p2 += l;
106 p += l - 1; // skip multibyte char
107 continue;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200108 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000109 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
110 *p2++ = cc;
111 *p2++ = *p;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200112 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000113 *p2 = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200114 return escaped_string;
115}
116
117/*
118 * Return TRUE when 'shell' has "csh" in the tail.
119 */
120 int
121csh_like_shell(void)
122{
123 return (strstr((char *)gettail(p_sh), "csh") != NULL);
124}
125
126/*
Jason Cox6e823512021-08-29 12:36:49 +0200127 * Return TRUE when 'shell' has "fish" in the tail.
128 */
Dominique Pellede05ae72021-08-30 19:57:34 +0200129 static int
Jason Cox6e823512021-08-29 12:36:49 +0200130fish_like_shell(void)
131{
132 return (strstr((char *)gettail(p_sh), "fish") != NULL);
133}
134
135/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200136 * Escape "string" for use as a shell argument with system().
137 * This uses single quotes, except when we know we need to use double quotes
138 * (MS-DOS and MS-Windows not using PowerShell and without 'shellslash' set).
139 * PowerShell also uses a novel escaping for enclosed single quotes - double
140 * them up.
141 * Escape a newline, depending on the 'shell' option.
142 * When "do_special" is TRUE also replace "!", "%", "#" and things starting
143 * with "<" like "<cfile>".
144 * When "do_newline" is FALSE do not escape newline unless it is csh shell.
145 * Returns the result in allocated memory, NULL if we have run out.
146 */
147 char_u *
148vim_strsave_shellescape(char_u *string, int do_special, int do_newline)
149{
150 unsigned length;
151 char_u *p;
152 char_u *d;
153 char_u *escaped_string;
Mike Williams51024bb2024-05-30 07:46:30 +0200154 size_t l;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200155 int csh_like;
Jason Cox6e823512021-08-29 12:36:49 +0200156 int fish_like;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200157 char_u *shname;
158 int powershell;
159# ifdef MSWIN
160 int double_quotes;
161# endif
162
163 // Only csh and similar shells expand '!' within single quotes. For sh and
164 // the like we must not put a backslash before it, it will be taken
165 // literally. If do_special is set the '!' will be escaped twice.
166 // Csh also needs to have "\n" escaped twice when do_special is set.
167 csh_like = csh_like_shell();
168
Jason Cox6e823512021-08-29 12:36:49 +0200169 // Fish shell uses '\' as an escape character within single quotes, so '\'
170 // itself must be escaped to get a literal '\'.
171 fish_like = fish_like_shell();
172
Dominique Pelleaf4a61a2021-12-27 17:21:41 +0000173 // PowerShell uses its own version for quoting single quotes
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200174 shname = gettail(p_sh);
175 powershell = strstr((char *)shname, "pwsh") != NULL;
176# ifdef MSWIN
177 powershell = powershell || strstr((char *)shname, "powershell") != NULL;
178 // PowerShell only accepts single quotes so override shellslash.
179 double_quotes = !powershell && !p_ssl;
180# endif
181
182 // First count the number of extra bytes required.
183 length = (unsigned)STRLEN(string) + 3; // two quotes and a trailing NUL
184 for (p = string; *p != NUL; MB_PTR_ADV(p))
185 {
186# ifdef MSWIN
187 if (double_quotes)
188 {
189 if (*p == '"')
190 ++length; // " -> ""
191 }
192 else
193# endif
194 if (*p == '\'')
195 {
196 if (powershell)
197 length +=2; // ' => ''
198 else
199 length += 3; // ' => '\''
200 }
201 if ((*p == '\n' && (csh_like || do_newline))
202 || (*p == '!' && (csh_like || do_special)))
203 {
204 ++length; // insert backslash
205 if (csh_like && do_special)
206 ++length; // insert backslash
207 }
208 if (do_special && find_cmdline_var(p, &l) >= 0)
209 {
210 ++length; // insert backslash
211 p += l - 1;
212 }
Jason Cox6e823512021-08-29 12:36:49 +0200213 if (*p == '\\' && fish_like)
214 ++length; // insert backslash
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200215 }
216
217 // Allocate memory for the result and fill it.
218 escaped_string = alloc(length);
219 if (escaped_string != NULL)
220 {
221 d = escaped_string;
222
223 // add opening quote
224# ifdef MSWIN
225 if (double_quotes)
226 *d++ = '"';
227 else
228# endif
229 *d++ = '\'';
230
231 for (p = string; *p != NUL; )
232 {
233# ifdef MSWIN
234 if (double_quotes)
235 {
236 if (*p == '"')
237 {
238 *d++ = '"';
239 *d++ = '"';
240 ++p;
241 continue;
242 }
243 }
244 else
245# endif
246 if (*p == '\'')
247 {
248 if (powershell)
249 {
250 *d++ = '\'';
251 *d++ = '\'';
252 }
253 else
254 {
255 *d++ = '\'';
256 *d++ = '\\';
257 *d++ = '\'';
258 *d++ = '\'';
259 }
260 ++p;
261 continue;
262 }
263 if ((*p == '\n' && (csh_like || do_newline))
264 || (*p == '!' && (csh_like || do_special)))
265 {
266 *d++ = '\\';
267 if (csh_like && do_special)
268 *d++ = '\\';
269 *d++ = *p++;
270 continue;
271 }
zeertzjq88c8c542024-05-30 19:27:25 +0200272 if (do_special && find_cmdline_var(p, &l) >= 0)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200273 {
274 *d++ = '\\'; // insert backslash
zeertzjq88c8c542024-05-30 19:27:25 +0200275 memcpy(d, p, l); // copy the var
276 d += l;
277 p += l;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200278 continue;
279 }
Jason Cox6e823512021-08-29 12:36:49 +0200280 if (*p == '\\' && fish_like)
281 {
282 *d++ = '\\';
283 *d++ = *p++;
Bram Moolenaar66315972021-09-01 14:31:51 +0200284 continue;
Jason Cox6e823512021-08-29 12:36:49 +0200285 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200286
287 MB_COPY_CHAR(p, d);
288 }
289
290 // add terminating quote and finish with a NUL
291# ifdef MSWIN
292 if (double_quotes)
293 *d++ = '"';
294 else
295# endif
296 *d++ = '\'';
297 *d = NUL;
298 }
299
300 return escaped_string;
301}
302
303/*
304 * Like vim_strsave(), but make all characters uppercase.
305 * This uses ASCII lower-to-upper case translation, language independent.
306 */
307 char_u *
308vim_strsave_up(char_u *string)
309{
310 char_u *p1;
311
312 p1 = vim_strsave(string);
313 vim_strup(p1);
314 return p1;
315}
316
317/*
318 * Like vim_strnsave(), but make all characters uppercase.
319 * This uses ASCII lower-to-upper case translation, language independent.
320 */
321 char_u *
322vim_strnsave_up(char_u *string, size_t len)
323{
324 char_u *p1;
325
326 p1 = vim_strnsave(string, len);
327 vim_strup(p1);
328 return p1;
329}
330
331/*
332 * ASCII lower-to-upper case translation, language independent.
333 */
334 void
335vim_strup(
336 char_u *p)
337{
338 char_u *p2;
339 int c;
340
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000341 if (p == NULL)
342 return;
343
344 p2 = p;
345 while ((c = *p2) != NUL)
346 *p2++ = (c < 'a' || c > 'z') ? c : (c - 0x20);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200347}
348
349#if defined(FEAT_EVAL) || defined(FEAT_SPELL) || defined(PROTO)
350/*
351 * Make string "s" all upper-case and return it in allocated memory.
352 * Handles multi-byte characters as well as possible.
353 * Returns NULL when out of memory.
354 */
355 static char_u *
356strup_save(char_u *orig)
357{
358 char_u *p;
359 char_u *res;
360
361 res = p = vim_strsave(orig);
362
363 if (res != NULL)
364 while (*p != NUL)
365 {
366 int l;
367
368 if (enc_utf8)
369 {
370 int c, uc;
371 int newl;
372 char_u *s;
373
374 c = utf_ptr2char(p);
375 l = utf_ptr2len(p);
376 if (c == 0)
377 {
378 // overlong sequence, use only the first byte
379 c = *p;
380 l = 1;
381 }
382 uc = utf_toupper(c);
383
384 // Reallocate string when byte count changes. This is rare,
385 // thus it's OK to do another malloc()/free().
386 newl = utf_char2len(uc);
387 if (newl != l)
388 {
389 s = alloc(STRLEN(res) + 1 + newl - l);
390 if (s == NULL)
391 {
392 vim_free(res);
393 return NULL;
394 }
395 mch_memmove(s, res, p - res);
396 STRCPY(s + (p - res) + newl, p + l);
397 p = s + (p - res);
398 vim_free(res);
399 res = s;
400 }
401
402 utf_char2bytes(uc, p);
403 p += newl;
404 }
405 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
406 p += l; // skip multi-byte character
407 else
408 {
409 *p = TOUPPER_LOC(*p); // note that toupper() can be a macro
410 p++;
411 }
412 }
413
414 return res;
415}
416
417/*
418 * Make string "s" all lower-case and return it in allocated memory.
419 * Handles multi-byte characters as well as possible.
420 * Returns NULL when out of memory.
421 */
422 char_u *
423strlow_save(char_u *orig)
424{
425 char_u *p;
426 char_u *res;
427
428 res = p = vim_strsave(orig);
429
430 if (res != NULL)
431 while (*p != NUL)
432 {
433 int l;
434
435 if (enc_utf8)
436 {
437 int c, lc;
438 int newl;
439 char_u *s;
440
441 c = utf_ptr2char(p);
442 l = utf_ptr2len(p);
443 if (c == 0)
444 {
445 // overlong sequence, use only the first byte
446 c = *p;
447 l = 1;
448 }
449 lc = utf_tolower(c);
450
451 // Reallocate string when byte count changes. This is rare,
452 // thus it's OK to do another malloc()/free().
453 newl = utf_char2len(lc);
454 if (newl != l)
455 {
456 s = alloc(STRLEN(res) + 1 + newl - l);
457 if (s == NULL)
458 {
459 vim_free(res);
460 return NULL;
461 }
462 mch_memmove(s, res, p - res);
463 STRCPY(s + (p - res) + newl, p + l);
464 p = s + (p - res);
465 vim_free(res);
466 res = s;
467 }
468
469 utf_char2bytes(lc, p);
470 p += newl;
471 }
472 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
473 p += l; // skip multi-byte character
474 else
475 {
476 *p = TOLOWER_LOC(*p); // note that tolower() can be a macro
477 p++;
478 }
479 }
480
481 return res;
482}
483#endif
484
485/*
486 * delete spaces at the end of a string
487 */
488 void
489del_trailing_spaces(char_u *ptr)
490{
491 char_u *q;
492
493 q = ptr + STRLEN(ptr);
494 while (--q > ptr && VIM_ISWHITE(q[0]) && q[-1] != '\\' && q[-1] != Ctrl_V)
495 *q = NUL;
496}
497
498/*
499 * Like strncpy(), but always terminate the result with one NUL.
500 * "to" must be "len + 1" long!
501 */
502 void
503vim_strncpy(char_u *to, char_u *from, size_t len)
504{
505 STRNCPY(to, from, len);
506 to[len] = NUL;
507}
508
509/*
510 * Like strcat(), but make sure the result fits in "tosize" bytes and is
511 * always NUL terminated. "from" and "to" may overlap.
512 */
513 void
514vim_strcat(char_u *to, char_u *from, size_t tosize)
515{
516 size_t tolen = STRLEN(to);
517 size_t fromlen = STRLEN(from);
518
519 if (tolen + fromlen + 1 > tosize)
520 {
521 mch_memmove(to + tolen, from, tosize - tolen - 1);
522 to[tosize - 1] = NUL;
523 }
524 else
525 mch_memmove(to + tolen, from, fromlen + 1);
526}
527
Bram Moolenaarc32949b2023-01-04 15:56:51 +0000528/*
529 * A version of strlen() that has a maximum length.
530 */
531 size_t
532vim_strlen_maxlen(char *s, size_t maxlen)
533{
534 size_t i;
535 for (i = 0; i < maxlen; ++i)
536 if (s[i] == NUL)
537 break;
538 return i;
539}
540
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200541#if (!defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)) || defined(PROTO)
542/*
543 * Compare two strings, ignoring case, using current locale.
544 * Doesn't work for multi-byte characters.
545 * return 0 for match, < 0 for smaller, > 0 for bigger
546 */
547 int
548vim_stricmp(char *s1, char *s2)
549{
550 int i;
551
552 for (;;)
553 {
554 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
555 if (i != 0)
556 return i; // this character different
557 if (*s1 == NUL)
558 break; // strings match until NUL
559 ++s1;
560 ++s2;
561 }
562 return 0; // strings match
563}
564#endif
565
566#if (!defined(HAVE_STRNCASECMP) && !defined(HAVE_STRNICMP)) || defined(PROTO)
567/*
568 * Compare two strings, for length "len", ignoring case, using current locale.
569 * Doesn't work for multi-byte characters.
570 * return 0 for match, < 0 for smaller, > 0 for bigger
571 */
572 int
573vim_strnicmp(char *s1, char *s2, size_t len)
574{
575 int i;
576
577 while (len > 0)
578 {
579 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
580 if (i != 0)
581 return i; // this character different
582 if (*s1 == NUL)
583 break; // strings match until NUL
584 ++s1;
585 ++s2;
586 --len;
587 }
588 return 0; // strings match
589}
590#endif
591
592/*
Christian Brabandt84e31752024-09-02 09:59:18 +0200593 * Compare two ASCII strings, for length "len", ignoring case, ignoring locale
594 * (mostly matters for turkish locale where i I might be different).
595 * return 0 for match, < 0 for smaller, > 0 for bigger
596 */
597 int
598vim_strnicmp_asc(char *s1, char *s2, size_t len)
599{
John Marriottc847c122024-11-24 14:09:40 +0100600 int i = 0;
Christian Brabandt84e31752024-09-02 09:59:18 +0200601
Christian Brabandt84e31752024-09-02 09:59:18 +0200602 while (len > 0)
603 {
Yegappan Lakshmanane89aef32025-05-14 20:31:55 +0200604 i = TOLOWER_ASC(*s1) - TOLOWER_ASC(*s2);
605 if (i != 0)
606 break; // this character is different
607 if (*s1 == NUL)
608 break; // strings match until NUL
609 ++s1;
610 ++s2;
611 --len;
Christian Brabandt84e31752024-09-02 09:59:18 +0200612 }
Christian Brabandt84e31752024-09-02 09:59:18 +0200613 return i;
614}
615
616/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200617 * Search for first occurrence of "c" in "string".
618 * Version of strchr() that handles unsigned char strings with characters from
619 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
620 * end of the string.
621 */
Bram Moolenaarc32949b2023-01-04 15:56:51 +0000622 char_u *
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200623vim_strchr(char_u *string, int c)
624{
625 char_u *p;
626 int b;
627
628 p = string;
629 if (enc_utf8 && c >= 0x80)
630 {
631 while (*p != NUL)
632 {
633 int l = utfc_ptr2len(p);
634
635 // Avoid matching an illegal byte here.
636 if (utf_ptr2char(p) == c && l > 1)
637 return p;
638 p += l;
639 }
640 return NULL;
641 }
642 if (enc_dbcs != 0 && c > 255)
643 {
644 int n2 = c & 0xff;
645
646 c = ((unsigned)c >> 8) & 0xff;
647 while ((b = *p) != NUL)
648 {
649 if (b == c && p[1] == n2)
650 return p;
651 p += (*mb_ptr2len)(p);
652 }
653 return NULL;
654 }
655 if (has_mbyte)
656 {
657 while ((b = *p) != NUL)
658 {
659 if (b == c)
660 return p;
661 p += (*mb_ptr2len)(p);
662 }
663 return NULL;
664 }
665 while ((b = *p) != NUL)
666 {
667 if (b == c)
668 return p;
669 ++p;
670 }
671 return NULL;
672}
673
674/*
675 * Version of strchr() that only works for bytes and handles unsigned char
676 * strings with characters above 128 correctly. It also doesn't return a
677 * pointer to the NUL at the end of the string.
678 */
679 char_u *
680vim_strbyte(char_u *string, int c)
681{
682 char_u *p = string;
683
684 while (*p != NUL)
685 {
686 if (*p == c)
687 return p;
688 ++p;
689 }
690 return NULL;
691}
692
693/*
694 * Search for last occurrence of "c" in "string".
695 * Version of strrchr() that handles unsigned char strings with characters from
696 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
697 * end of the string.
698 * Return NULL if not found.
699 * Does not handle multi-byte char for "c"!
700 */
701 char_u *
702vim_strrchr(char_u *string, int c)
703{
704 char_u *retval = NULL;
705 char_u *p = string;
706
707 while (*p)
708 {
709 if (*p == c)
710 retval = p;
711 MB_PTR_ADV(p);
712 }
713 return retval;
714}
715
716/*
717 * Vim's version of strpbrk(), in case it's missing.
718 * Don't generate a prototype for this, causes problems when it's not used.
719 */
720#ifndef PROTO
721# ifndef HAVE_STRPBRK
722# ifdef vim_strpbrk
723# undef vim_strpbrk
724# endif
725 char_u *
726vim_strpbrk(char_u *s, char_u *charset)
727{
728 while (*s)
729 {
730 if (vim_strchr(charset, *s) != NULL)
731 return s;
732 MB_PTR_ADV(s);
733 }
734 return NULL;
735}
736# endif
737#endif
738
739/*
740 * Sort an array of strings.
741 */
742static int sort_compare(const void *s1, const void *s2);
743
744 static int
745sort_compare(const void *s1, const void *s2)
746{
747 return STRCMP(*(char **)s1, *(char **)s2);
748}
749
750 void
751sort_strings(
752 char_u **files,
753 int count)
754{
755 qsort((void *)files, (size_t)count, sizeof(char_u *), sort_compare);
756}
757
758#if defined(FEAT_QUICKFIX) || defined(FEAT_SPELL) || defined(PROTO)
759/*
760 * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
761 * When "s" is NULL FALSE is returned.
762 */
763 int
764has_non_ascii(char_u *s)
765{
766 char_u *p;
767
768 if (s != NULL)
769 for (p = s; *p != NUL; ++p)
770 if (*p >= 128)
771 return TRUE;
772 return FALSE;
773}
774#endif
775
776/*
777 * Concatenate two strings and return the result in allocated memory.
778 * Returns NULL when out of memory.
779 */
780 char_u *
781concat_str(char_u *str1, char_u *str2)
782{
783 char_u *dest;
784 size_t l = str1 == NULL ? 0 : STRLEN(str1);
785
786 dest = alloc(l + (str2 == NULL ? 0 : STRLEN(str2)) + 1L);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000787 if (dest == NULL)
788 return NULL;
789 if (str1 == NULL)
790 *dest = NUL;
791 else
792 STRCPY(dest, str1);
793 if (str2 != NULL)
794 STRCPY(dest + l, str2);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200795 return dest;
796}
797
zeertzjq4dd266c2023-08-19 11:35:03 +0200798#if defined(FEAT_EVAL) || defined(FEAT_RIGHTLEFT) || defined(PROTO)
799/*
800 * Reverse text into allocated memory.
801 * Returns the allocated string, NULL when out of memory.
802 */
803 char_u *
804reverse_text(char_u *s)
805{
806 size_t len = STRLEN(s);
807 char_u *rev = alloc(len + 1);
808 if (rev == NULL)
809 return NULL;
810
811 for (size_t s_i = 0, rev_i = len; s_i < len; ++s_i)
812 {
813 if (has_mbyte)
814 {
815 int mb_len = (*mb_ptr2len)(s + s_i);
816 rev_i -= mb_len;
817 mch_memmove(rev + rev_i, s + s_i, mb_len);
818 s_i += mb_len - 1;
819 }
820 else
821 rev[--rev_i] = s[s_i];
822 }
823 rev[len] = NUL;
824 return rev;
825}
826#endif
827
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200828#if defined(FEAT_EVAL) || defined(PROTO)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200829/*
830 * Return string "str" in ' quotes, doubling ' characters.
831 * If "str" is NULL an empty string is assumed.
832 * If "function" is TRUE make it function('string').
833 */
834 char_u *
835string_quote(char_u *str, int function)
836{
837 unsigned len;
838 char_u *p, *r, *s;
839
840 len = (function ? 13 : 3);
841 if (str != NULL)
842 {
843 len += (unsigned)STRLEN(str);
844 for (p = str; *p != NUL; MB_PTR_ADV(p))
845 if (*p == '\'')
846 ++len;
847 }
848 s = r = alloc(len);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000849 if (r == NULL)
850 return NULL;
851
852 if (function)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200853 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000854 STRCPY(r, "function('");
855 r += 10;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200856 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000857 else
858 *r++ = '\'';
859 if (str != NULL)
860 for (p = str; *p != NUL; )
861 {
862 if (*p == '\'')
863 *r++ = '\'';
864 MB_COPY_CHAR(p, r);
865 }
866 *r++ = '\'';
867 if (function)
868 *r++ = ')';
869 *r++ = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200870 return s;
871}
872
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000873/*
874 * Count the number of times "needle" occurs in string "haystack". Case is
875 * ignored if "ic" is TRUE.
876 */
877 long
878string_count(char_u *haystack, char_u *needle, int ic)
879{
880 long n = 0;
881 char_u *p = haystack;
882 char_u *next;
883
884 if (p == NULL || needle == NULL || *needle == NUL)
885 return 0;
886
887 if (ic)
888 {
889 size_t len = STRLEN(needle);
890
891 while (*p != NUL)
892 {
893 if (MB_STRNICMP(p, needle, len) == 0)
894 {
895 ++n;
896 p += len;
897 }
898 else
899 MB_PTR_ADV(p);
900 }
901 }
902 else
903 while ((next = (char_u *)strstr((char *)p, (char *)needle)) != NULL)
904 {
905 ++n;
906 p = next + STRLEN(needle);
907 }
908
909 return n;
910}
911
912/*
913 * Make a typval_T of the first character of "input" and store it in "output".
914 * Return OK or FAIL.
915 */
916 static int
917copy_first_char_to_tv(char_u *input, typval_T *output)
918{
919 char_u buf[MB_MAXBYTES + 1];
920 int len;
921
922 if (input == NULL || output == NULL)
923 return FAIL;
924
925 len = has_mbyte ? mb_ptr2len(input) : 1;
926 STRNCPY(buf, input, len);
927 buf[len] = NUL;
928 output->v_type = VAR_STRING;
929 output->vval.v_string = vim_strsave(buf);
930
931 return output->vval.v_string == NULL ? FAIL : OK;
932}
933
934/*
935 * Implementation of map() and filter() for a String. Apply "expr" to every
936 * character in string "str" and return the result in "rettv".
937 */
938 void
939string_filter_map(
940 char_u *str,
941 filtermap_T filtermap,
942 typval_T *expr,
943 typval_T *rettv)
944{
945 char_u *p;
946 typval_T tv;
947 garray_T ga;
948 int len = 0;
949 int idx = 0;
950 int rem;
Bram Moolenaar82418262022-09-28 16:16:15 +0100951 typval_T newtv;
952 funccall_T *fc;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000953
954 rettv->v_type = VAR_STRING;
955 rettv->vval.v_string = NULL;
956
957 // set_vim_var_nr() doesn't set the type
958 set_vim_var_type(VV_KEY, VAR_NUMBER);
959
zeertzjqe7d49462023-04-16 20:53:55 +0100960 // Create one funccall_T for all eval_expr_typval() calls.
Bram Moolenaar82418262022-09-28 16:16:15 +0100961 fc = eval_expr_get_funccal(expr, &newtv);
962
Bram Moolenaar04935fb2022-01-08 16:19:22 +0000963 ga_init2(&ga, sizeof(char), 80);
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000964 for (p = str; *p != NUL; p += len)
965 {
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000966 if (copy_first_char_to_tv(p, &tv) == FAIL)
967 break;
968 len = (int)STRLEN(tv.vval.v_string);
969
970 set_vim_var_nr(VV_KEY, idx);
Bram Moolenaar82418262022-09-28 16:16:15 +0100971 if (filter_map_one(&tv, expr, filtermap, fc, &newtv, &rem) == FAIL
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000972 || did_emsg)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000973 {
974 clear_tv(&newtv);
975 clear_tv(&tv);
976 break;
977 }
Ernie Raele79e2072024-01-13 11:47:33 +0100978 if (filtermap == FILTERMAP_MAP || filtermap == FILTERMAP_MAPNEW)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000979 {
980 if (newtv.v_type != VAR_STRING)
981 {
982 clear_tv(&newtv);
983 clear_tv(&tv);
Bram Moolenaare70cec92022-01-01 14:25:55 +0000984 emsg(_(e_string_required));
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000985 break;
986 }
987 else
988 ga_concat(&ga, newtv.vval.v_string);
989 }
Ernie Raele79e2072024-01-13 11:47:33 +0100990 else if (filtermap == FILTERMAP_FOREACH || !rem)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000991 ga_concat(&ga, tv.vval.v_string);
992
993 clear_tv(&newtv);
994 clear_tv(&tv);
995
996 ++idx;
997 }
998 ga_append(&ga, NUL);
999 rettv->vval.v_string = ga.ga_data;
Bram Moolenaar82418262022-09-28 16:16:15 +01001000 if (fc != NULL)
1001 remove_funccal();
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001002}
1003
1004/*
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001005 * Implementation of reduce() for String "argvars[0]" using the function "expr"
1006 * starting with the optional initial value "argvars[2]" and return the result
1007 * in "rettv".
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001008 */
1009 void
1010string_reduce(
1011 typval_T *argvars,
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001012 typval_T *expr,
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001013 typval_T *rettv)
1014{
1015 char_u *p = tv_get_string(&argvars[0]);
1016 int len;
1017 typval_T argv[3];
1018 int r;
1019 int called_emsg_start = called_emsg;
Bram Moolenaar82418262022-09-28 16:16:15 +01001020 funccall_T *fc;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001021
1022 if (argvars[2].v_type == VAR_UNKNOWN)
1023 {
1024 if (*p == NUL)
1025 {
Bram Moolenaare70cec92022-01-01 14:25:55 +00001026 semsg(_(e_reduce_of_an_empty_str_with_no_initial_value), "String");
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001027 return;
1028 }
1029 if (copy_first_char_to_tv(p, rettv) == FAIL)
1030 return;
1031 p += STRLEN(rettv->vval.v_string);
1032 }
Yegappan Lakshmanan8deb2b32022-09-02 15:15:27 +01001033 else if (check_for_string_arg(argvars, 2) == FAIL)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001034 return;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001035 else
1036 copy_tv(&argvars[2], rettv);
1037
zeertzjqe7d49462023-04-16 20:53:55 +01001038 // Create one funccall_T for all eval_expr_typval() calls.
Bram Moolenaar82418262022-09-28 16:16:15 +01001039 fc = eval_expr_get_funccal(expr, rettv);
1040
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001041 for ( ; *p != NUL; p += len)
1042 {
1043 argv[0] = *rettv;
1044 if (copy_first_char_to_tv(p, &argv[1]) == FAIL)
1045 break;
1046 len = (int)STRLEN(argv[1].vval.v_string);
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001047
zeertzjqad0c4422023-08-17 22:15:47 +02001048 r = eval_expr_typval(expr, TRUE, argv, 2, fc, rettv);
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001049
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001050 clear_tv(&argv[0]);
1051 clear_tv(&argv[1]);
1052 if (r == FAIL || called_emsg != called_emsg_start)
1053 return;
1054 }
Bram Moolenaar82418262022-09-28 16:16:15 +01001055
1056 if (fc != NULL)
1057 remove_funccal();
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001058}
1059
Bram Moolenaare4098452023-05-07 18:53:49 +01001060/*
1061 * Implementation of "byteidx()" and "byteidxcomp()" functions
1062 */
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001063 static void
Dominique Pellé0268ff32024-07-28 21:12:20 +02001064byteidx_common(typval_T *argvars, typval_T *rettv, int comp)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001065{
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001066 rettv->vval.v_number = -1;
1067
1068 if (in_vim9script()
1069 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001070 || check_for_number_arg(argvars, 1) == FAIL
1071 || check_for_opt_bool_arg(argvars, 2) == FAIL))
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001072 return;
1073
Christian Brabandt67672ef2023-04-24 21:09:54 +01001074 char_u *str = tv_get_string_chk(&argvars[0]);
1075 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001076 if (str == NULL || idx < 0)
1077 return;
1078
Christian Brabandt67672ef2023-04-24 21:09:54 +01001079 varnumber_T utf16idx = FALSE;
1080 if (argvars[2].v_type != VAR_UNKNOWN)
1081 {
zeertzjq8cf51372023-05-08 15:31:38 +01001082 int error = FALSE;
1083 utf16idx = tv_get_bool_chk(&argvars[2], &error);
1084 if (error)
1085 return;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001086 if (utf16idx < 0 || utf16idx > 1)
1087 {
zeertzjq8cf51372023-05-08 15:31:38 +01001088 semsg(_(e_using_number_as_bool_nr), utf16idx);
Christian Brabandt67672ef2023-04-24 21:09:54 +01001089 return;
1090 }
1091 }
1092
1093 int (*ptr2len)(char_u *);
1094 if (enc_utf8 && comp)
1095 ptr2len = utf_ptr2len;
1096 else
1097 ptr2len = mb_ptr2len;
1098
1099 char_u *t = str;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001100 for ( ; idx > 0; idx--)
1101 {
1102 if (*t == NUL) // EOL reached
1103 return;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001104 if (utf16idx)
1105 {
1106 int clen = ptr2len(t);
1107 int c = (clen > 1) ? utf_ptr2char(t) : *t;
1108 if (c > 0xFFFF)
1109 idx--;
1110 }
1111 if (idx > 0)
1112 t += ptr2len(t);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001113 }
1114 rettv->vval.v_number = (varnumber_T)(t - str);
1115}
1116
1117/*
1118 * "byteidx()" function
1119 */
1120 void
1121f_byteidx(typval_T *argvars, typval_T *rettv)
1122{
Bram Moolenaare4098452023-05-07 18:53:49 +01001123 byteidx_common(argvars, rettv, FALSE);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001124}
1125
1126/*
1127 * "byteidxcomp()" function
1128 */
1129 void
1130f_byteidxcomp(typval_T *argvars, typval_T *rettv)
1131{
Bram Moolenaare4098452023-05-07 18:53:49 +01001132 byteidx_common(argvars, rettv, TRUE);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001133}
1134
1135/*
1136 * "charidx()" function
1137 */
1138 void
1139f_charidx(typval_T *argvars, typval_T *rettv)
1140{
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001141 rettv->vval.v_number = -1;
1142
Christian Brabandt67672ef2023-04-24 21:09:54 +01001143 if (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001144 || check_for_number_arg(argvars, 1) == FAIL
Christian Brabandt67672ef2023-04-24 21:09:54 +01001145 || check_for_opt_bool_arg(argvars, 2) == FAIL
1146 || (argvars[2].v_type != VAR_UNKNOWN
1147 && check_for_opt_bool_arg(argvars, 3) == FAIL))
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001148 return;
1149
Christian Brabandt67672ef2023-04-24 21:09:54 +01001150 char_u *str = tv_get_string_chk(&argvars[0]);
1151 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001152 if (str == NULL || idx < 0)
1153 return;
1154
Christian Brabandt67672ef2023-04-24 21:09:54 +01001155 varnumber_T countcc = FALSE;
1156 varnumber_T utf16idx = FALSE;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001157 if (argvars[2].v_type != VAR_UNKNOWN)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001158 {
Christian Brabandt67672ef2023-04-24 21:09:54 +01001159 countcc = tv_get_bool(&argvars[2]);
1160 if (argvars[3].v_type != VAR_UNKNOWN)
1161 utf16idx = tv_get_bool(&argvars[3]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001162 }
1163
Christian Brabandt67672ef2023-04-24 21:09:54 +01001164 int (*ptr2len)(char_u *);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001165 if (enc_utf8 && countcc)
1166 ptr2len = utf_ptr2len;
1167 else
1168 ptr2len = mb_ptr2len;
1169
Christian Brabandt67672ef2023-04-24 21:09:54 +01001170 char_u *p;
1171 int len;
1172 for (p = str, len = 0; utf16idx ? idx >= 0 : p <= str + idx; len++)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001173 {
1174 if (*p == NUL)
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001175 {
1176 // If the index is exactly the number of bytes or utf-16 code units
1177 // in the string then return the length of the string in
1178 // characters.
1179 if (utf16idx ? (idx == 0) : (p == (str + idx)))
1180 rettv->vval.v_number = len;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001181 return;
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001182 }
Christian Brabandt67672ef2023-04-24 21:09:54 +01001183 if (utf16idx)
1184 {
1185 idx--;
1186 int clen = ptr2len(p);
1187 int c = (clen > 1) ? utf_ptr2char(p) : *p;
1188 if (c > 0xFFFF)
1189 idx--;
1190 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001191 p += ptr2len(p);
1192 }
1193
1194 rettv->vval.v_number = len > 0 ? len - 1 : 0;
1195}
1196
1197/*
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001198 * Convert the string "str", from encoding "from" to encoding "to".
1199 */
1200 static char_u *
1201convert_string(char_u *str, char_u *from, char_u *to)
1202{
1203 vimconv_T vimconv;
1204
1205 vimconv.vc_type = CONV_NONE;
1206 if (convert_setup(&vimconv, from, to) == FAIL)
1207 return NULL;
1208 vimconv.vc_fail = TRUE;
1209 if (vimconv.vc_type == CONV_NONE)
1210 str = vim_strsave(str);
1211 else
1212 str = string_convert(&vimconv, str, NULL);
1213 convert_setup(&vimconv, NULL, NULL);
1214
1215 return str;
1216}
1217
1218/*
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001219 * Add the bytes from "str" to "blob".
1220 */
1221 static void
1222blob_from_string(char_u *str, blob_T *blob)
1223{
1224 size_t len = STRLEN(str);
1225
1226 for (size_t i = 0; i < len; i++)
1227 {
1228 int ch = str[i];
1229
1230 if (str[i] == NL)
1231 // Translate newlines in the string to NUL character
1232 ch = NUL;
1233
1234 ga_append(&blob->bv_ga, ch);
1235 }
1236}
1237
1238/*
1239 * Return a string created from the bytes in blob starting at "start_idx".
1240 * A NL character in the blob indicates end of string.
1241 * A NUL character in the blob is translated to a NL.
1242 * On return, "start_idx" points to next byte to process in blob.
1243 */
1244 static char_u *
1245string_from_blob(blob_T *blob, long *start_idx)
1246{
1247 garray_T str_ga;
1248 long blen;
Yegappan Lakshmanan5e9aaed2025-01-18 10:24:25 +01001249 int idx;
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001250
1251 ga_init2(&str_ga, sizeof(char), 80);
1252
1253 blen = blob_len(blob);
1254
1255 for (idx = *start_idx; idx < blen; idx++)
1256 {
1257 char_u byte = (char_u)blob_get(blob, idx);
1258 if (byte == NL)
1259 {
1260 idx++;
1261 break;
1262 }
1263
1264 if (byte == NUL)
1265 byte = NL;
1266
1267 ga_append(&str_ga, byte);
1268 }
1269
1270 ga_append(&str_ga, NUL);
1271
1272 char_u *ret_str = vim_strsave(str_ga.ga_data);
1273 *start_idx = idx;
1274
1275 ga_clear(&str_ga);
1276 return ret_str;
1277}
1278
1279/*
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001280 * "blob2str()" function
1281 * Converts a blob to a string, ensuring valid UTF-8 encoding.
1282 */
1283 void
1284f_blob2str(typval_T *argvars, typval_T *rettv)
1285{
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001286 blob_T *blob;
1287 int blen;
1288 long idx;
Bakudankunb3854bf2025-02-23 20:29:21 +01001289 int validate_utf8 = FALSE;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001290
1291 if (check_for_blob_arg(argvars, 0) == FAIL
1292 || check_for_opt_dict_arg(argvars, 1) == FAIL)
1293 return;
1294
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001295 if (rettv_list_alloc(rettv) == FAIL)
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001296 return;
1297
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001298 blob = argvars->vval.v_blob;
1299 if (blob == NULL)
1300 return;
1301 blen = blob_len(blob);
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001302
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001303 char_u *from_encoding = NULL;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001304 if (argvars[1].v_type != VAR_UNKNOWN)
1305 {
1306 dict_T *d = argvars[1].vval.v_dict;
1307 if (d != NULL)
1308 {
1309 char_u *enc = dict_get_string(d, "encoding", FALSE);
1310 if (enc != NULL)
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001311 from_encoding = enc_canonize(enc_skip(enc));
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001312 }
1313 }
1314
1315 if (STRCMP(p_enc, "utf-8") == 0 || STRCMP(p_enc, "utf8") == 0)
Bakudankunb3854bf2025-02-23 20:29:21 +01001316 validate_utf8 = TRUE;
1317
1318 if (from_encoding != NULL && STRCMP(from_encoding, "none") == 0)
1319 {
1320 validate_utf8 = FALSE;
1321 vim_free(from_encoding);
1322 from_encoding = NULL;
1323 }
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001324
1325 idx = 0;
1326 while (idx < blen)
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001327 {
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001328 char_u *str;
1329 char_u *converted_str;
1330
1331 str = string_from_blob(blob, &idx);
1332 if (str == NULL)
1333 break;
1334
1335 converted_str = str;
1336 if (from_encoding != NULL)
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001337 {
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001338 converted_str = convert_string(str, from_encoding, p_enc);
1339 vim_free(str);
1340 if (converted_str == NULL)
1341 {
Christian Brabandtd5afc742025-03-18 20:55:42 +01001342 semsg(_(e_str_encoding_from_failed), from_encoding);
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001343 goto done;
1344 }
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001345 }
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001346
Bakudankunb3854bf2025-02-23 20:29:21 +01001347 if (validate_utf8)
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001348 {
1349 if (!utf_valid_string(converted_str, NULL))
1350 {
Christian Brabandtd5afc742025-03-18 20:55:42 +01001351 semsg(_(e_str_encoding_from_failed), p_enc);
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001352 vim_free(converted_str);
1353 goto done;
1354 }
1355 }
1356
Yegappan Lakshmanan90b39752025-01-19 09:37:07 +01001357 int ret = list_append_string(rettv->vval.v_list, converted_str, -1);
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001358 vim_free(converted_str);
Yegappan Lakshmanan90b39752025-01-19 09:37:07 +01001359 if (ret == FAIL)
1360 break;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001361 }
1362
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001363done:
1364 vim_free(from_encoding);
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001365}
1366
1367/*
1368 * "str2blob()" function
1369 */
1370 void
1371f_str2blob(typval_T *argvars, typval_T *rettv)
1372{
1373 blob_T *blob;
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001374 list_T *list;
1375 listitem_T *li;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001376
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001377 if (check_for_list_arg(argvars, 0) == FAIL
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001378 || check_for_opt_dict_arg(argvars, 1) == FAIL)
1379 return;
1380
1381 if (rettv_blob_alloc(rettv) == FAIL)
1382 return;
1383
1384 blob = rettv->vval.v_blob;
1385
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001386 list = argvars[0].vval.v_list;
1387 if (list == NULL)
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001388 return;
1389
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001390 char_u *to_encoding = NULL;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001391 if (argvars[1].v_type != VAR_UNKNOWN)
1392 {
1393 dict_T *d = argvars[1].vval.v_dict;
1394 if (d != NULL)
1395 {
1396 char_u *enc = dict_get_string(d, "encoding", FALSE);
1397 if (enc != NULL)
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001398 to_encoding = enc_canonize(enc_skip(enc));
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001399 }
1400 }
1401
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001402 FOR_ALL_LIST_ITEMS(list, li)
1403 {
1404 if (li->li_tv.v_type != VAR_STRING)
1405 continue;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001406
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001407 char_u *str = li->li_tv.vval.v_string;
1408
1409 if (str == NULL)
1410 continue;
1411
1412 if (to_encoding != NULL)
1413 {
1414 str = convert_string(str, p_enc, to_encoding);
1415 if (str == NULL)
1416 {
Christian Brabandtd5afc742025-03-18 20:55:42 +01001417 semsg(_(e_str_encoding_to_failed), to_encoding);
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001418 goto done;
1419 }
1420 }
1421
1422 if (li != list->lv_first)
1423 // Each list string item is separated by a newline in the blob
1424 ga_append(&blob->bv_ga, NL);
1425
1426 blob_from_string(str, blob);
1427
1428 if (to_encoding != NULL)
1429 vim_free(str);
1430 }
1431
1432done:
1433 if (to_encoding != NULL)
1434 vim_free(to_encoding);
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001435}
1436
1437/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001438 * "str2list()" function
1439 */
1440 void
1441f_str2list(typval_T *argvars, typval_T *rettv)
1442{
1443 char_u *p;
1444 int utf8 = FALSE;
1445
1446 if (rettv_list_alloc(rettv) == FAIL)
1447 return;
1448
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001449 if (in_vim9script()
1450 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001451 || check_for_opt_bool_arg(argvars, 1) == FAIL))
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001452 return;
1453
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001454 if (argvars[1].v_type != VAR_UNKNOWN)
1455 utf8 = (int)tv_get_bool_chk(&argvars[1], NULL);
1456
1457 p = tv_get_string(&argvars[0]);
1458
1459 if (has_mbyte || utf8)
1460 {
1461 int (*ptr2len)(char_u *);
1462 int (*ptr2char)(char_u *);
1463
1464 if (utf8 || enc_utf8)
1465 {
1466 ptr2len = utf_ptr2len;
1467 ptr2char = utf_ptr2char;
1468 }
1469 else
1470 {
1471 ptr2len = mb_ptr2len;
1472 ptr2char = mb_ptr2char;
1473 }
1474
1475 for ( ; *p != NUL; p += (*ptr2len)(p))
1476 list_append_number(rettv->vval.v_list, (*ptr2char)(p));
1477 }
1478 else
1479 for ( ; *p != NUL; ++p)
1480 list_append_number(rettv->vval.v_list, *p);
1481}
1482
1483/*
1484 * "str2nr()" function
1485 */
1486 void
1487f_str2nr(typval_T *argvars, typval_T *rettv)
1488{
1489 int base = 10;
1490 char_u *p;
1491 varnumber_T n;
1492 int what = 0;
1493 int isneg;
1494
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001495 if (in_vim9script()
1496 && (check_for_string_arg(argvars, 0) == FAIL
1497 || check_for_opt_number_arg(argvars, 1) == FAIL
1498 || (argvars[1].v_type != VAR_UNKNOWN
1499 && check_for_opt_bool_arg(argvars, 2) == FAIL)))
1500 return;
1501
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001502 if (argvars[1].v_type != VAR_UNKNOWN)
1503 {
1504 base = (int)tv_get_number(&argvars[1]);
1505 if (base != 2 && base != 8 && base != 10 && base != 16)
1506 {
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00001507 emsg(_(e_invalid_argument));
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001508 return;
1509 }
1510 if (argvars[2].v_type != VAR_UNKNOWN && tv_get_bool(&argvars[2]))
1511 what |= STR2NR_QUOTE;
1512 }
1513
1514 p = skipwhite(tv_get_string_strict(&argvars[0]));
1515 isneg = (*p == '-');
1516 if (*p == '+' || *p == '-')
1517 p = skipwhite(p + 1);
1518 switch (base)
1519 {
1520 case 2: what |= STR2NR_BIN + STR2NR_FORCE; break;
1521 case 8: what |= STR2NR_OCT + STR2NR_OOCT + STR2NR_FORCE; break;
1522 case 16: what |= STR2NR_HEX + STR2NR_FORCE; break;
1523 }
Bram Moolenaar5fb78c32023-03-04 20:47:39 +00001524 vim_str2nr(p, NULL, NULL, what, &n, NULL, 0, FALSE, NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001525 // Text after the number is silently ignored.
1526 if (isneg)
1527 rettv->vval.v_number = -n;
1528 else
1529 rettv->vval.v_number = n;
1530
1531}
1532
1533/*
1534 * "strgetchar()" function
1535 */
1536 void
1537f_strgetchar(typval_T *argvars, typval_T *rettv)
1538{
1539 char_u *str;
1540 int len;
1541 int error = FALSE;
1542 int charidx;
1543 int byteidx = 0;
1544
1545 rettv->vval.v_number = -1;
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001546
1547 if (in_vim9script()
1548 && (check_for_string_arg(argvars, 0) == FAIL
1549 || check_for_number_arg(argvars, 1) == FAIL))
1550 return;
1551
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001552 str = tv_get_string_chk(&argvars[0]);
1553 if (str == NULL)
1554 return;
1555 len = (int)STRLEN(str);
1556 charidx = (int)tv_get_number_chk(&argvars[1], &error);
1557 if (error)
1558 return;
1559
1560 while (charidx >= 0 && byteidx < len)
1561 {
1562 if (charidx == 0)
1563 {
1564 rettv->vval.v_number = mb_ptr2char(str + byteidx);
1565 break;
1566 }
1567 --charidx;
1568 byteidx += MB_CPTR2LEN(str + byteidx);
1569 }
1570}
1571
1572/*
1573 * "stridx()" function
1574 */
1575 void
1576f_stridx(typval_T *argvars, typval_T *rettv)
1577{
1578 char_u buf[NUMBUFLEN];
1579 char_u *needle;
1580 char_u *haystack;
1581 char_u *save_haystack;
1582 char_u *pos;
1583 int start_idx;
1584
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001585 if (in_vim9script()
1586 && (check_for_string_arg(argvars, 0) == FAIL
1587 || check_for_string_arg(argvars, 1) == FAIL
1588 || check_for_opt_number_arg(argvars, 2) == FAIL))
1589 return;
1590
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001591 needle = tv_get_string_chk(&argvars[1]);
1592 save_haystack = haystack = tv_get_string_buf_chk(&argvars[0], buf);
1593 rettv->vval.v_number = -1;
1594 if (needle == NULL || haystack == NULL)
1595 return; // type error; errmsg already given
1596
1597 if (argvars[2].v_type != VAR_UNKNOWN)
1598 {
1599 int error = FALSE;
1600
1601 start_idx = (int)tv_get_number_chk(&argvars[2], &error);
1602 if (error || start_idx >= (int)STRLEN(haystack))
1603 return;
1604 if (start_idx >= 0)
1605 haystack += start_idx;
1606 }
1607
1608 pos = (char_u *)strstr((char *)haystack, (char *)needle);
1609 if (pos != NULL)
1610 rettv->vval.v_number = (varnumber_T)(pos - save_haystack);
1611}
1612
1613/*
1614 * "string()" function
1615 */
1616 void
1617f_string(typval_T *argvars, typval_T *rettv)
1618{
1619 char_u *tofree;
1620 char_u numbuf[NUMBUFLEN];
1621
1622 rettv->v_type = VAR_STRING;
1623 rettv->vval.v_string = tv2string(&argvars[0], &tofree, numbuf,
1624 get_copyID());
1625 // Make a copy if we have a value but it's not in allocated memory.
1626 if (rettv->vval.v_string != NULL && tofree == NULL)
1627 rettv->vval.v_string = vim_strsave(rettv->vval.v_string);
1628}
1629
1630/*
1631 * "strlen()" function
1632 */
1633 void
1634f_strlen(typval_T *argvars, typval_T *rettv)
1635{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001636 if (in_vim9script()
1637 && check_for_string_or_number_arg(argvars, 0) == FAIL)
1638 return;
1639
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001640 rettv->vval.v_number = (varnumber_T)(STRLEN(
1641 tv_get_string(&argvars[0])));
1642}
1643
1644 static void
1645strchar_common(typval_T *argvars, typval_T *rettv, int skipcc)
1646{
1647 char_u *s = tv_get_string(&argvars[0]);
1648 varnumber_T len = 0;
1649 int (*func_mb_ptr2char_adv)(char_u **pp);
1650
1651 func_mb_ptr2char_adv = skipcc ? mb_ptr2char_adv : mb_cptr2char_adv;
1652 while (*s != NUL)
1653 {
1654 func_mb_ptr2char_adv(&s);
1655 ++len;
1656 }
1657 rettv->vval.v_number = len;
1658}
1659
1660/*
1661 * "strcharlen()" function
1662 */
1663 void
1664f_strcharlen(typval_T *argvars, typval_T *rettv)
1665{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001666 if (in_vim9script()
1667 && check_for_string_or_number_arg(argvars, 0) == FAIL)
1668 return;
1669
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001670 strchar_common(argvars, rettv, TRUE);
1671}
1672
1673/*
1674 * "strchars()" function
1675 */
1676 void
1677f_strchars(typval_T *argvars, typval_T *rettv)
1678{
1679 varnumber_T skipcc = FALSE;
1680
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001681 if (in_vim9script()
1682 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001683 || check_for_opt_bool_arg(argvars, 1) == FAIL))
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001684 return;
1685
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001686 if (argvars[1].v_type != VAR_UNKNOWN)
Bram Moolenaare4098452023-05-07 18:53:49 +01001687 {
zeertzjq8cf51372023-05-08 15:31:38 +01001688 int error = FALSE;
1689 skipcc = tv_get_bool_chk(&argvars[1], &error);
1690 if (error)
1691 return;
1692 if (skipcc < 0 || skipcc > 1)
1693 {
Bram Moolenaare4098452023-05-07 18:53:49 +01001694 semsg(_(e_using_number_as_bool_nr), skipcc);
zeertzjq8cf51372023-05-08 15:31:38 +01001695 return;
1696 }
Bram Moolenaare4098452023-05-07 18:53:49 +01001697 }
zeertzjq8cf51372023-05-08 15:31:38 +01001698
1699 strchar_common(argvars, rettv, skipcc);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001700}
1701
1702/*
Christian Brabandt67672ef2023-04-24 21:09:54 +01001703 * "strutf16len()" function
1704 */
1705 void
1706f_strutf16len(typval_T *argvars, typval_T *rettv)
1707{
1708 rettv->vval.v_number = -1;
1709
1710 if (check_for_string_arg(argvars, 0) == FAIL
1711 || check_for_opt_bool_arg(argvars, 1) == FAIL)
1712 return;
1713
1714 varnumber_T countcc = FALSE;
1715 if (argvars[1].v_type != VAR_UNKNOWN)
1716 countcc = tv_get_bool(&argvars[1]);
1717
1718 char_u *s = tv_get_string(&argvars[0]);
1719 varnumber_T len = 0;
1720 int (*func_mb_ptr2char_adv)(char_u **pp);
1721 int ch;
1722
1723 func_mb_ptr2char_adv = countcc ? mb_cptr2char_adv : mb_ptr2char_adv;
1724 while (*s != NUL)
1725 {
1726 ch = func_mb_ptr2char_adv(&s);
1727 if (ch > 0xFFFF)
1728 ++len;
1729 ++len;
1730 }
1731 rettv->vval.v_number = len;
1732}
1733
1734/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001735 * "strdisplaywidth()" function
1736 */
1737 void
1738f_strdisplaywidth(typval_T *argvars, typval_T *rettv)
1739{
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001740 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001741 int col = 0;
1742
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001743 rettv->vval.v_number = -1;
1744
1745 if (in_vim9script()
1746 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001747 || check_for_opt_number_arg(argvars, 1) == FAIL))
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001748 return;
1749
1750 s = tv_get_string(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001751 if (argvars[1].v_type != VAR_UNKNOWN)
1752 col = (int)tv_get_number(&argvars[1]);
1753
1754 rettv->vval.v_number = (varnumber_T)(linetabsize_col(col, s) - col);
1755}
1756
1757/*
1758 * "strwidth()" function
1759 */
1760 void
1761f_strwidth(typval_T *argvars, typval_T *rettv)
1762{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001763 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001764
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001765 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1766 return;
1767
1768 s = tv_get_string_strict(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001769 rettv->vval.v_number = (varnumber_T)(mb_string2cells(s, -1));
1770}
1771
1772/*
1773 * "strcharpart()" function
1774 */
1775 void
1776f_strcharpart(typval_T *argvars, typval_T *rettv)
1777{
1778 char_u *p;
1779 int nchar;
1780 int nbyte = 0;
1781 int charlen;
1782 int skipcc = FALSE;
1783 int len = 0;
1784 int slen;
1785 int error = FALSE;
1786
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001787 if (in_vim9script()
1788 && (check_for_string_arg(argvars, 0) == FAIL
1789 || check_for_number_arg(argvars, 1) == FAIL
1790 || check_for_opt_number_arg(argvars, 2) == FAIL
1791 || (argvars[2].v_type != VAR_UNKNOWN
1792 && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1793 return;
1794
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001795 p = tv_get_string(&argvars[0]);
1796 slen = (int)STRLEN(p);
1797
1798 nchar = (int)tv_get_number_chk(&argvars[1], &error);
1799 if (!error)
1800 {
1801 if (argvars[2].v_type != VAR_UNKNOWN
1802 && argvars[3].v_type != VAR_UNKNOWN)
1803 {
zeertzjq8cf51372023-05-08 15:31:38 +01001804 skipcc = tv_get_bool_chk(&argvars[3], &error);
1805 if (error)
1806 return;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001807 if (skipcc < 0 || skipcc > 1)
1808 {
zeertzjq8cf51372023-05-08 15:31:38 +01001809 semsg(_(e_using_number_as_bool_nr), skipcc);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001810 return;
1811 }
1812 }
1813
1814 if (nchar > 0)
1815 while (nchar > 0 && nbyte < slen)
1816 {
1817 if (skipcc)
1818 nbyte += mb_ptr2len(p + nbyte);
1819 else
1820 nbyte += MB_CPTR2LEN(p + nbyte);
1821 --nchar;
1822 }
1823 else
1824 nbyte = nchar;
1825 if (argvars[2].v_type != VAR_UNKNOWN)
1826 {
1827 charlen = (int)tv_get_number(&argvars[2]);
1828 while (charlen > 0 && nbyte + len < slen)
1829 {
1830 int off = nbyte + len;
1831
1832 if (off < 0)
1833 len += 1;
1834 else
1835 {
1836 if (skipcc)
1837 len += mb_ptr2len(p + off);
1838 else
1839 len += MB_CPTR2LEN(p + off);
1840 }
1841 --charlen;
1842 }
1843 }
1844 else
1845 len = slen - nbyte; // default: all bytes that are available.
1846 }
1847
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02001848 // Only return the overlap between the specified part and the actual
1849 // string.
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001850 if (nbyte < 0)
1851 {
1852 len += nbyte;
1853 nbyte = 0;
1854 }
1855 else if (nbyte > slen)
1856 nbyte = slen;
1857 if (len < 0)
1858 len = 0;
1859 else if (nbyte + len > slen)
1860 len = slen - nbyte;
1861
1862 rettv->v_type = VAR_STRING;
1863 rettv->vval.v_string = vim_strnsave(p + nbyte, len);
1864}
1865
1866/*
1867 * "strpart()" function
1868 */
1869 void
1870f_strpart(typval_T *argvars, typval_T *rettv)
1871{
1872 char_u *p;
1873 int n;
1874 int len;
1875 int slen;
1876 int error = FALSE;
1877
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001878 if (in_vim9script()
1879 && (check_for_string_arg(argvars, 0) == FAIL
1880 || check_for_number_arg(argvars, 1) == FAIL
1881 || check_for_opt_number_arg(argvars, 2) == FAIL
1882 || (argvars[2].v_type != VAR_UNKNOWN
1883 && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1884 return;
1885
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001886 p = tv_get_string(&argvars[0]);
1887 slen = (int)STRLEN(p);
1888
1889 n = (int)tv_get_number_chk(&argvars[1], &error);
1890 if (error)
1891 len = 0;
1892 else if (argvars[2].v_type != VAR_UNKNOWN)
1893 len = (int)tv_get_number(&argvars[2]);
1894 else
1895 len = slen - n; // default len: all bytes that are available.
1896
1897 // Only return the overlap between the specified part and the actual
1898 // string.
1899 if (n < 0)
1900 {
1901 len += n;
1902 n = 0;
1903 }
1904 else if (n > slen)
1905 n = slen;
1906 if (len < 0)
1907 len = 0;
1908 else if (n + len > slen)
1909 len = slen - n;
1910
1911 if (argvars[2].v_type != VAR_UNKNOWN && argvars[3].v_type != VAR_UNKNOWN)
1912 {
1913 int off;
1914
1915 // length in characters
1916 for (off = n; off < slen && len > 0; --len)
1917 off += mb_ptr2len(p + off);
1918 len = off - n;
1919 }
1920
1921 rettv->v_type = VAR_STRING;
1922 rettv->vval.v_string = vim_strnsave(p + n, len);
1923}
1924
1925/*
1926 * "strridx()" function
1927 */
1928 void
1929f_strridx(typval_T *argvars, typval_T *rettv)
1930{
1931 char_u buf[NUMBUFLEN];
1932 char_u *needle;
1933 char_u *haystack;
1934 char_u *rest;
1935 char_u *lastmatch = NULL;
1936 int haystack_len, end_idx;
1937
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001938 if (in_vim9script()
1939 && (check_for_string_arg(argvars, 0) == FAIL
1940 || check_for_string_arg(argvars, 1) == FAIL
1941 || check_for_opt_number_arg(argvars, 2) == FAIL))
1942 return;
1943
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001944 needle = tv_get_string_chk(&argvars[1]);
1945 haystack = tv_get_string_buf_chk(&argvars[0], buf);
1946
1947 rettv->vval.v_number = -1;
1948 if (needle == NULL || haystack == NULL)
1949 return; // type error; errmsg already given
1950
1951 haystack_len = (int)STRLEN(haystack);
1952 if (argvars[2].v_type != VAR_UNKNOWN)
1953 {
1954 // Third argument: upper limit for index
1955 end_idx = (int)tv_get_number_chk(&argvars[2], NULL);
1956 if (end_idx < 0)
1957 return; // can never find a match
1958 }
1959 else
1960 end_idx = haystack_len;
1961
1962 if (*needle == NUL)
1963 {
1964 // Empty string matches past the end.
1965 lastmatch = haystack + end_idx;
1966 }
1967 else
1968 {
1969 for (rest = haystack; *rest != '\0'; ++rest)
1970 {
1971 rest = (char_u *)strstr((char *)rest, (char *)needle);
1972 if (rest == NULL || rest > haystack + end_idx)
1973 break;
1974 lastmatch = rest;
1975 }
1976 }
1977
1978 if (lastmatch == NULL)
1979 rettv->vval.v_number = -1;
1980 else
1981 rettv->vval.v_number = (varnumber_T)(lastmatch - haystack);
1982}
1983
1984/*
1985 * "strtrans()" function
1986 */
1987 void
1988f_strtrans(typval_T *argvars, typval_T *rettv)
1989{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001990 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1991 return;
1992
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001993 rettv->v_type = VAR_STRING;
1994 rettv->vval.v_string = transstr(tv_get_string(&argvars[0]));
1995}
1996
Christian Brabandt67672ef2023-04-24 21:09:54 +01001997
1998/*
Christian Brabandt67672ef2023-04-24 21:09:54 +01001999 * "utf16idx()" function
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01002000 *
2001 * Converts a byte or character offset in a string to the corresponding UTF-16
2002 * code unit offset.
Christian Brabandt67672ef2023-04-24 21:09:54 +01002003 */
2004 void
2005f_utf16idx(typval_T *argvars, typval_T *rettv)
2006{
2007 rettv->vval.v_number = -1;
2008
2009 if (check_for_string_arg(argvars, 0) == FAIL
2010 || check_for_opt_number_arg(argvars, 1) == FAIL
2011 || check_for_opt_bool_arg(argvars, 2) == FAIL
2012 || (argvars[2].v_type != VAR_UNKNOWN
2013 && check_for_opt_bool_arg(argvars, 3) == FAIL))
2014 return;
2015
2016 char_u *str = tv_get_string_chk(&argvars[0]);
2017 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
2018 if (str == NULL || idx < 0)
2019 return;
2020
2021 varnumber_T countcc = FALSE;
2022 varnumber_T charidx = FALSE;
2023 if (argvars[2].v_type != VAR_UNKNOWN)
2024 {
2025 countcc = tv_get_bool(&argvars[2]);
2026 if (argvars[3].v_type != VAR_UNKNOWN)
2027 charidx = tv_get_bool(&argvars[3]);
2028 }
2029
2030 int (*ptr2len)(char_u *);
2031 if (enc_utf8 && countcc)
2032 ptr2len = utf_ptr2len;
2033 else
2034 ptr2len = mb_ptr2len;
2035
2036 char_u *p;
2037 int len;
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01002038 int utf16idx = 0;
Christian Brabandt67672ef2023-04-24 21:09:54 +01002039 for (p = str, len = 0; charidx ? idx >= 0 : p <= str + idx; len++)
2040 {
2041 if (*p == NUL)
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01002042 {
2043 // If the index is exactly the number of bytes or characters in the
2044 // string then return the length of the string in utf-16 code
2045 // units.
2046 if (charidx ? (idx == 0) : (p == (str + idx)))
2047 rettv->vval.v_number = len;
Christian Brabandt67672ef2023-04-24 21:09:54 +01002048 return;
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01002049 }
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01002050 utf16idx = len;
Christian Brabandt67672ef2023-04-24 21:09:54 +01002051 int clen = ptr2len(p);
2052 int c = (clen > 1) ? utf_ptr2char(p) : *p;
2053 if (c > 0xFFFF)
2054 len++;
2055 p += ptr2len(p);
2056 if (charidx)
2057 idx--;
2058 }
2059
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01002060 rettv->vval.v_number = utf16idx;
Christian Brabandt67672ef2023-04-24 21:09:54 +01002061}
2062
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002063/*
2064 * "tolower(string)" function
2065 */
2066 void
2067f_tolower(typval_T *argvars, typval_T *rettv)
2068{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002069 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
2070 return;
2071
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002072 rettv->v_type = VAR_STRING;
2073 rettv->vval.v_string = strlow_save(tv_get_string(&argvars[0]));
2074}
2075
2076/*
2077 * "toupper(string)" function
2078 */
2079 void
2080f_toupper(typval_T *argvars, typval_T *rettv)
2081{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002082 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
2083 return;
2084
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002085 rettv->v_type = VAR_STRING;
2086 rettv->vval.v_string = strup_save(tv_get_string(&argvars[0]));
2087}
2088
2089/*
2090 * "tr(string, fromstr, tostr)" function
2091 */
2092 void
2093f_tr(typval_T *argvars, typval_T *rettv)
2094{
2095 char_u *in_str;
2096 char_u *fromstr;
2097 char_u *tostr;
2098 char_u *p;
2099 int inlen;
2100 int fromlen;
2101 int tolen;
2102 int idx;
2103 char_u *cpstr;
2104 int cplen;
2105 int first = TRUE;
2106 char_u buf[NUMBUFLEN];
2107 char_u buf2[NUMBUFLEN];
2108 garray_T ga;
2109
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002110 if (in_vim9script()
2111 && (check_for_string_arg(argvars, 0) == FAIL
2112 || check_for_string_arg(argvars, 1) == FAIL
2113 || check_for_string_arg(argvars, 2) == FAIL))
2114 return;
2115
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002116 in_str = tv_get_string(&argvars[0]);
2117 fromstr = tv_get_string_buf_chk(&argvars[1], buf);
2118 tostr = tv_get_string_buf_chk(&argvars[2], buf2);
2119
2120 // Default return value: empty string.
2121 rettv->v_type = VAR_STRING;
2122 rettv->vval.v_string = NULL;
2123 if (fromstr == NULL || tostr == NULL)
2124 return; // type error; errmsg already given
Bram Moolenaar04935fb2022-01-08 16:19:22 +00002125 ga_init2(&ga, sizeof(char), 80);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002126
2127 if (!has_mbyte)
2128 // not multi-byte: fromstr and tostr must be the same length
2129 if (STRLEN(fromstr) != STRLEN(tostr))
2130 {
2131error:
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00002132 semsg(_(e_invalid_argument_str), fromstr);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002133 ga_clear(&ga);
2134 return;
2135 }
2136
2137 // fromstr and tostr have to contain the same number of chars
2138 while (*in_str != NUL)
2139 {
2140 if (has_mbyte)
2141 {
2142 inlen = (*mb_ptr2len)(in_str);
2143 cpstr = in_str;
2144 cplen = inlen;
2145 idx = 0;
2146 for (p = fromstr; *p != NUL; p += fromlen)
2147 {
2148 fromlen = (*mb_ptr2len)(p);
2149 if (fromlen == inlen && STRNCMP(in_str, p, inlen) == 0)
2150 {
2151 for (p = tostr; *p != NUL; p += tolen)
2152 {
2153 tolen = (*mb_ptr2len)(p);
2154 if (idx-- == 0)
2155 {
2156 cplen = tolen;
2157 cpstr = p;
2158 break;
2159 }
2160 }
2161 if (*p == NUL) // tostr is shorter than fromstr
2162 goto error;
2163 break;
2164 }
2165 ++idx;
2166 }
2167
2168 if (first && cpstr == in_str)
2169 {
2170 // Check that fromstr and tostr have the same number of
2171 // (multi-byte) characters. Done only once when a character
2172 // of in_str doesn't appear in fromstr.
2173 first = FALSE;
2174 for (p = tostr; *p != NUL; p += tolen)
2175 {
2176 tolen = (*mb_ptr2len)(p);
2177 --idx;
2178 }
2179 if (idx != 0)
2180 goto error;
2181 }
2182
2183 (void)ga_grow(&ga, cplen);
2184 mch_memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen);
2185 ga.ga_len += cplen;
2186
2187 in_str += inlen;
2188 }
2189 else
2190 {
2191 // When not using multi-byte chars we can do it faster.
2192 p = vim_strchr(fromstr, *in_str);
2193 if (p != NULL)
2194 ga_append(&ga, tostr[p - fromstr]);
2195 else
2196 ga_append(&ga, *in_str);
2197 ++in_str;
2198 }
2199 }
2200
2201 // add a terminating NUL
2202 (void)ga_grow(&ga, 1);
2203 ga_append(&ga, NUL);
2204
2205 rettv->vval.v_string = ga.ga_data;
2206}
2207
2208/*
2209 * "trim({expr})" function
2210 */
2211 void
2212f_trim(typval_T *argvars, typval_T *rettv)
2213{
2214 char_u buf1[NUMBUFLEN];
2215 char_u buf2[NUMBUFLEN];
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002216 char_u *head;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002217 char_u *mask = NULL;
2218 char_u *tail;
2219 char_u *prev;
2220 char_u *p;
2221 int c1;
2222 int dir = 0;
2223
2224 rettv->v_type = VAR_STRING;
2225 rettv->vval.v_string = NULL;
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002226
2227 if (in_vim9script()
2228 && (check_for_string_arg(argvars, 0) == FAIL
Illia Bobyr80799172023-10-17 18:00:50 +02002229 || check_for_opt_string_arg(argvars, 1) == FAIL
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002230 || (argvars[1].v_type != VAR_UNKNOWN
2231 && check_for_opt_number_arg(argvars, 2) == FAIL)))
2232 return;
2233
2234 head = tv_get_string_buf_chk(&argvars[0], buf1);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002235 if (head == NULL)
2236 return;
2237
Illia Bobyr80799172023-10-17 18:00:50 +02002238 if (check_for_opt_string_arg(argvars, 1) == FAIL)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002239 return;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002240
2241 if (argvars[1].v_type == VAR_STRING)
Illia Bobyr6e638672023-10-17 11:09:45 +02002242 {
Illia Bobyr80799172023-10-17 18:00:50 +02002243 mask = tv_get_string_buf_chk(&argvars[1], buf2);
2244 if (*mask == NUL)
2245 mask = NULL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002246
Illia Bobyr80799172023-10-17 18:00:50 +02002247 if (argvars[2].v_type != VAR_UNKNOWN)
Illia Bobyr6e638672023-10-17 11:09:45 +02002248 {
Illia Bobyr80799172023-10-17 18:00:50 +02002249 int error = 0;
2250
2251 // leading or trailing characters to trim
2252 dir = (int)tv_get_number_chk(&argvars[2], &error);
2253 if (error)
2254 return;
2255 if (dir < 0 || dir > 2)
2256 {
2257 semsg(_(e_invalid_argument_str), tv_get_string(&argvars[2]));
2258 return;
2259 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002260 }
2261 }
2262
2263 if (dir == 0 || dir == 1)
2264 {
2265 // Trim leading characters
2266 while (*head != NUL)
2267 {
2268 c1 = PTR2CHAR(head);
2269 if (mask == NULL)
2270 {
2271 if (c1 > ' ' && c1 != 0xa0)
2272 break;
2273 }
2274 else
2275 {
2276 for (p = mask; *p != NUL; MB_PTR_ADV(p))
2277 if (c1 == PTR2CHAR(p))
2278 break;
2279 if (*p == NUL)
2280 break;
2281 }
2282 MB_PTR_ADV(head);
2283 }
2284 }
2285
2286 tail = head + STRLEN(head);
2287 if (dir == 0 || dir == 2)
2288 {
2289 // Trim trailing characters
2290 for (; tail > head; tail = prev)
2291 {
2292 prev = tail;
2293 MB_PTR_BACK(head, prev);
2294 c1 = PTR2CHAR(prev);
2295 if (mask == NULL)
2296 {
2297 if (c1 > ' ' && c1 != 0xa0)
2298 break;
2299 }
2300 else
2301 {
2302 for (p = mask; *p != NUL; MB_PTR_ADV(p))
2303 if (c1 == PTR2CHAR(p))
2304 break;
2305 if (*p == NUL)
2306 break;
2307 }
2308 }
2309 }
2310 rettv->vval.v_string = vim_strnsave(head, tail - head);
2311}
2312
Bram Moolenaar677658a2022-01-05 16:09:06 +00002313static char *e_printf = N_(e_insufficient_arguments_for_printf);
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002314
2315/*
2316 * Get number argument from "idxp" entry in "tvs". First entry is 1.
2317 */
2318 static varnumber_T
2319tv_nr(typval_T *tvs, int *idxp)
2320{
2321 int idx = *idxp - 1;
2322 varnumber_T n = 0;
2323 int err = FALSE;
2324
2325 if (tvs[idx].v_type == VAR_UNKNOWN)
2326 emsg(_(e_printf));
2327 else
2328 {
2329 ++*idxp;
2330 n = tv_get_number_chk(&tvs[idx], &err);
2331 if (err)
2332 n = 0;
2333 }
2334 return n;
2335}
2336
2337/*
2338 * Get string argument from "idxp" entry in "tvs". First entry is 1.
2339 * If "tofree" is NULL tv_get_string_chk() is used. Some types (e.g. List)
2340 * are not converted to a string.
2341 * If "tofree" is not NULL echo_string() is used. All types are converted to
2342 * a string with the same format as ":echo". The caller must free "*tofree".
2343 * Returns NULL for an error.
2344 */
2345 static char *
2346tv_str(typval_T *tvs, int *idxp, char_u **tofree)
2347{
2348 int idx = *idxp - 1;
2349 char *s = NULL;
2350 static char_u numbuf[NUMBUFLEN];
2351
2352 if (tvs[idx].v_type == VAR_UNKNOWN)
2353 emsg(_(e_printf));
2354 else
2355 {
2356 ++*idxp;
2357 if (tofree != NULL)
2358 s = (char *)echo_string(&tvs[idx], tofree, numbuf, get_copyID());
2359 else
2360 s = (char *)tv_get_string_chk(&tvs[idx]);
2361 }
2362 return s;
2363}
2364
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002365/*
2366 * Get float argument from "idxp" entry in "tvs". First entry is 1.
2367 */
2368 static double
2369tv_float(typval_T *tvs, int *idxp)
2370{
2371 int idx = *idxp - 1;
2372 double f = 0;
2373
2374 if (tvs[idx].v_type == VAR_UNKNOWN)
2375 emsg(_(e_printf));
2376 else
2377 {
2378 ++*idxp;
2379 if (tvs[idx].v_type == VAR_FLOAT)
2380 f = tvs[idx].vval.v_float;
2381 else if (tvs[idx].v_type == VAR_NUMBER)
2382 f = (double)tvs[idx].vval.v_number;
2383 else
Bram Moolenaar9d00e4a2022-01-05 17:49:15 +00002384 emsg(_(e_expected_float_argument_for_printf));
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002385 }
2386 return f;
2387}
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00002388
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002389#endif
2390
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002391/*
2392 * Return the representation of infinity for printf() function:
2393 * "-inf", "inf", "+inf", " inf", "-INF", "INF", "+INF" or " INF".
2394 */
2395 static const char *
2396infinity_str(int positive,
2397 char fmt_spec,
2398 int force_sign,
2399 int space_for_positive)
2400{
2401 static const char *table[] =
2402 {
2403 "-inf", "inf", "+inf", " inf",
2404 "-INF", "INF", "+INF", " INF"
2405 };
2406 int idx = positive * (1 + force_sign + force_sign * space_for_positive);
2407
2408 if (ASCII_ISUPPER(fmt_spec))
2409 idx += 4;
2410 return table[idx];
2411}
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002412
2413/*
2414 * This code was included to provide a portable vsnprintf() and snprintf().
2415 * Some systems may provide their own, but we always use this one for
2416 * consistency.
2417 *
2418 * This code is based on snprintf.c - a portable implementation of snprintf
2419 * by Mark Martinec <mark.martinec@ijs.si>, Version 2.2, 2000-10-06.
2420 * Included with permission. It was heavily modified to fit in Vim.
2421 * The original code, including useful comments, can be found here:
2422 * http://www.ijs.si/software/snprintf/
2423 *
2424 * This snprintf() only supports the following conversion specifiers:
2425 * s, c, d, u, o, x, X, p (and synonyms: i, D, U, O - see below)
2426 * with flags: '-', '+', ' ', '0' and '#'.
2427 * An asterisk is supported for field width as well as precision.
2428 *
2429 * Limited support for floating point was added: 'f', 'F', 'e', 'E', 'g', 'G'.
2430 *
2431 * Length modifiers 'h' (short int) and 'l' (long int) and 'll' (long long int)
2432 * are supported. NOTE: for 'll' the argument is varnumber_T or uvarnumber_T.
2433 *
2434 * The locale is not used, the string is used as a byte string. This is only
2435 * relevant for double-byte encodings where the second byte may be '%'.
2436 *
2437 * It is permitted for "str_m" to be zero, and it is permitted to specify NULL
2438 * pointer for resulting string argument if "str_m" is zero (as per ISO C99).
2439 *
2440 * The return value is the number of characters which would be generated
2441 * for the given input, excluding the trailing NUL. If this value
2442 * is greater or equal to "str_m", not all characters from the result
2443 * have been stored in str, output bytes beyond the ("str_m"-1) -th character
2444 * are discarded. If "str_m" is greater than zero it is guaranteed
2445 * the resulting string will be NUL-terminated.
2446 */
2447
2448/*
2449 * When va_list is not supported we only define vim_snprintf().
2450 *
2451 * vim_vsnprintf_typval() can be invoked with either "va_list" or a list of
2452 * "typval_T". When the latter is not used it must be NULL.
2453 */
2454
2455// When generating prototypes all of this is skipped, cproto doesn't
2456// understand this.
2457#ifndef PROTO
2458
2459// Like vim_vsnprintf() but append to the string.
2460 int
2461vim_snprintf_add(char *str, size_t str_m, const char *fmt, ...)
2462{
2463 va_list ap;
2464 int str_l;
2465 size_t len = STRLEN(str);
2466 size_t space;
2467
2468 if (str_m <= len)
2469 space = 0;
2470 else
2471 space = str_m - len;
2472 va_start(ap, fmt);
2473 str_l = vim_vsnprintf(str + len, space, fmt, ap);
2474 va_end(ap);
2475 return str_l;
2476}
2477
2478 int
2479vim_snprintf(char *str, size_t str_m, const char *fmt, ...)
2480{
2481 va_list ap;
2482 int str_l;
2483
2484 va_start(ap, fmt);
2485 str_l = vim_vsnprintf(str, str_m, fmt, ap);
2486 va_end(ap);
2487 return str_l;
2488}
2489
John Marriottec032de2025-04-10 21:34:19 +02002490/*
2491 * Like vim_snprintf() except the return value can be safely used to increment a
2492 * buffer length.
2493 * Normal `snprintf()` (and `vim_snprintf()`) returns the number of bytes that
2494 * would have been copied if the destination buffer was large enough.
2495 * This means that you cannot rely on it's return value for the destination
2496 * length because the destination may be shorter than the source. This function
2497 * guarantees the returned length will never be greater than the destination length.
2498 */
2499 size_t
2500vim_snprintf_safelen(char *str, size_t str_m, const char *fmt, ...)
2501{
2502 va_list ap;
2503 int str_l;
2504
2505 va_start(ap, fmt);
2506 str_l = vim_vsnprintf(str, str_m, fmt, ap);
2507 va_end(ap);
2508
2509 if (str_l < 0)
2510 {
2511 *str = NUL;
2512 return 0;
2513 }
2514 return ((size_t)str_l >= str_m) ? str_m - 1 : (size_t)str_l;
2515}
2516
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002517 int
2518vim_vsnprintf(
2519 char *str,
2520 size_t str_m,
2521 const char *fmt,
2522 va_list ap)
2523{
2524 return vim_vsnprintf_typval(str, str_m, fmt, ap, NULL);
2525}
2526
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002527enum
2528{
2529 TYPE_UNKNOWN = -1,
2530 TYPE_INT,
2531 TYPE_LONGINT,
2532 TYPE_LONGLONGINT,
2533 TYPE_UNSIGNEDINT,
2534 TYPE_UNSIGNEDLONGINT,
2535 TYPE_UNSIGNEDLONGLONGINT,
2536 TYPE_POINTER,
2537 TYPE_PERCENT,
2538 TYPE_CHAR,
2539 TYPE_STRING,
2540 TYPE_FLOAT
2541};
2542
2543/* Types that can be used in a format string
2544 */
zeertzjq7772c932023-08-15 22:48:40 +02002545 static int
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002546format_typeof(
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002547 const char *type)
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002548{
2549 // allowed values: \0, h, l, L
2550 char length_modifier = '\0';
2551
2552 // current conversion specifier character
2553 char fmt_spec = '\0';
2554
2555 // parse 'h', 'l' and 'll' length modifiers
2556 if (*type == 'h' || *type == 'l')
2557 {
2558 length_modifier = *type;
2559 type++;
2560 if (length_modifier == 'l' && *type == 'l')
2561 {
2562 // double l = __int64 / varnumber_T
2563 length_modifier = 'L';
2564 type++;
2565 }
2566 }
2567 fmt_spec = *type;
2568
2569 // common synonyms:
2570 switch (fmt_spec)
2571 {
2572 case 'i': fmt_spec = 'd'; break;
2573 case '*': fmt_spec = 'd'; length_modifier = 'h'; break;
2574 case 'D': fmt_spec = 'd'; length_modifier = 'l'; break;
2575 case 'U': fmt_spec = 'u'; length_modifier = 'l'; break;
2576 case 'O': fmt_spec = 'o'; length_modifier = 'l'; break;
2577 default: break;
2578 }
2579
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002580 // get parameter value, do initial processing
2581 switch (fmt_spec)
2582 {
2583 // '%' and 'c' behave similar to 's' regarding flags and field
2584 // widths
2585 case '%':
2586 return TYPE_PERCENT;
2587
2588 case 'c':
2589 return TYPE_CHAR;
2590
2591 case 's':
2592 case 'S':
2593 return TYPE_STRING;
2594
2595 case 'd': case 'u':
2596 case 'b': case 'B':
2597 case 'o':
2598 case 'x': case 'X':
2599 case 'p':
2600 {
2601 // NOTE: the u, b, o, x, X and p conversion specifiers
2602 // imply the value is unsigned; d implies a signed
2603 // value
2604
2605 // 0 if numeric argument is zero (or if pointer is
2606 // NULL for 'p'), +1 if greater than zero (or nonzero
2607 // for unsigned arguments), -1 if negative (unsigned
2608 // argument is never negative)
2609
2610 if (fmt_spec == 'p')
2611 return TYPE_POINTER;
2612 else if (fmt_spec == 'b' || fmt_spec == 'B')
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002613 return TYPE_UNSIGNEDLONGLONGINT;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002614 else if (fmt_spec == 'd')
2615 {
2616 // signed
2617 switch (length_modifier)
2618 {
2619 case '\0':
2620 case 'h':
2621 // char and short arguments are passed as int.
2622 return TYPE_INT;
2623 case 'l':
2624 return TYPE_LONGINT;
2625 case 'L':
2626 return TYPE_LONGLONGINT;
2627 }
2628 }
2629 else
2630 {
2631 // unsigned
2632 switch (length_modifier)
2633 {
2634 case '\0':
2635 case 'h':
2636 return TYPE_UNSIGNEDINT;
2637 case 'l':
2638 return TYPE_UNSIGNEDLONGINT;
2639 case 'L':
2640 return TYPE_UNSIGNEDLONGLONGINT;
2641 }
2642 }
2643 }
2644 break;
2645
2646 case 'f':
2647 case 'F':
2648 case 'e':
2649 case 'E':
2650 case 'g':
2651 case 'G':
2652 return TYPE_FLOAT;
2653 }
2654
2655 return TYPE_UNKNOWN;
2656}
2657
zeertzjq7772c932023-08-15 22:48:40 +02002658 static char *
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002659format_typename(
2660 const char *type)
2661{
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002662 switch (format_typeof(type))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002663 {
2664 case TYPE_INT:
2665 return _(typename_int);
2666
2667 case TYPE_LONGINT:
2668 return _(typename_longint);
2669
2670 case TYPE_LONGLONGINT:
2671 return _(typename_longlongint);
2672
2673 case TYPE_UNSIGNEDINT:
2674 return _(typename_unsignedint);
2675
2676 case TYPE_UNSIGNEDLONGINT:
2677 return _(typename_unsignedlongint);
2678
2679 case TYPE_UNSIGNEDLONGLONGINT:
2680 return _(typename_unsignedlonglongint);
2681
2682 case TYPE_POINTER:
2683 return _(typename_pointer);
2684
2685 case TYPE_PERCENT:
2686 return _(typename_percent);
2687
2688 case TYPE_CHAR:
2689 return _(typename_char);
2690
2691 case TYPE_STRING:
2692 return _(typename_string);
2693
2694 case TYPE_FLOAT:
2695 return _(typename_float);
2696 }
2697
2698 return _(typename_unknown);
2699}
2700
zeertzjq7772c932023-08-15 22:48:40 +02002701 static int
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002702adjust_types(
2703 const char ***ap_types,
2704 int arg,
2705 int *num_posarg,
2706 const char *type)
2707{
2708 if (*ap_types == NULL || *num_posarg < arg)
2709 {
2710 int idx;
2711 const char **new_types;
2712
2713 if (*ap_types == NULL)
2714 new_types = ALLOC_CLEAR_MULT(const char *, arg);
2715 else
K.Takata4c215ec2023-08-26 18:05:08 +02002716 new_types = vim_realloc((char **)*ap_types,
2717 arg * sizeof(const char *));
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002718
2719 if (new_types == NULL)
2720 return FAIL;
2721
2722 for (idx = *num_posarg; idx < arg; ++idx)
2723 new_types[idx] = NULL;
2724
2725 *ap_types = new_types;
2726 *num_posarg = arg;
2727 }
2728
2729 if ((*ap_types)[arg - 1] != NULL)
2730 {
2731 if ((*ap_types)[arg - 1][0] == '*' || type[0] == '*')
2732 {
2733 const char *pt = type;
2734 if (pt[0] == '*')
2735 pt = (*ap_types)[arg - 1];
2736
2737 if (pt[0] != '*')
2738 {
2739 switch (pt[0])
2740 {
2741 case 'd': case 'i': break;
2742 default:
2743 semsg(_(e_positional_num_field_spec_reused_str_str), arg, format_typename((*ap_types)[arg - 1]), format_typename(type));
2744 return FAIL;
2745 }
2746 }
2747 }
2748 else
2749 {
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002750 if (format_typeof(type) != format_typeof((*ap_types)[arg - 1]))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002751 {
2752 semsg(_( e_positional_arg_num_type_inconsistent_str_str), arg, format_typename(type), format_typename((*ap_types)[arg - 1]));
2753 return FAIL;
2754 }
2755 }
2756 }
2757
2758 (*ap_types)[arg - 1] = type;
2759
2760 return OK;
2761}
2762
Christ van Willegenc35fc032024-03-14 18:30:41 +01002763 static void
2764format_overflow_error(const char *pstart)
2765{
2766 size_t arglen = 0;
2767 char *argcopy = NULL;
2768 const char *p = pstart;
2769
2770 while (VIM_ISDIGIT((int)(*p)))
2771 ++p;
2772
2773 arglen = p - pstart;
2774 argcopy = ALLOC_CLEAR_MULT(char, arglen + 1);
2775 if (argcopy != NULL)
2776 {
2777 strncpy(argcopy, pstart, arglen);
2778 semsg(_( e_val_too_large), argcopy);
2779 free(argcopy);
2780 }
2781 else
2782 semsg(_(e_out_of_memory_allocating_nr_bytes), arglen);
2783}
2784
Hirohito Higashi06fdfa12025-04-17 20:13:05 +02002785#define MAX_ALLOWED_STRING_WIDTH 1048576 // 1 MiB
Christ van Willegenc35fc032024-03-14 18:30:41 +01002786
2787 static int
2788get_unsigned_int(
2789 const char *pstart,
2790 const char **p,
zeertzjq0dff3152024-07-29 20:28:14 +02002791 unsigned int *uj,
2792 int overflow_err)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002793{
2794 *uj = **p - '0';
2795 ++*p;
2796
2797 while (VIM_ISDIGIT((int)(**p)) && *uj < MAX_ALLOWED_STRING_WIDTH)
2798 {
2799 *uj = 10 * *uj + (unsigned int)(**p - '0');
2800 ++*p;
2801 }
2802
2803 if (*uj > MAX_ALLOWED_STRING_WIDTH)
2804 {
zeertzjq0dff3152024-07-29 20:28:14 +02002805 if (overflow_err)
2806 {
2807 format_overflow_error(pstart);
2808 return FAIL;
2809 }
2810 else
2811 *uj = MAX_ALLOWED_STRING_WIDTH;
Christ van Willegenc35fc032024-03-14 18:30:41 +01002812 }
2813
2814 return OK;
2815}
2816
2817
zeertzjq7772c932023-08-15 22:48:40 +02002818 static int
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002819parse_fmt_types(
2820 const char ***ap_types,
2821 int *num_posarg,
2822 const char *fmt,
2823 typval_T *tvs UNUSED
2824 )
2825{
2826 const char *p = fmt;
2827 const char *arg = NULL;
2828
2829 int any_pos = 0;
2830 int any_arg = 0;
2831 int arg_idx;
2832
2833#define CHECK_POS_ARG do { \
2834 if (any_pos && any_arg) \
2835 { \
2836 semsg(_( e_cannot_mix_positional_and_non_positional_str), fmt); \
2837 goto error; \
2838 } \
2839} while (0);
2840
2841 if (p == NULL)
2842 return OK;
2843
2844 while (*p != NUL)
2845 {
2846 if (*p != '%')
2847 {
2848 char *q = strchr(p + 1, '%');
2849 size_t n = (q == NULL) ? STRLEN(p) : (size_t)(q - p);
2850
2851 p += n;
2852 }
2853 else
2854 {
2855 // allowed values: \0, h, l, L
2856 char length_modifier = '\0';
2857
2858 // variable for positional arg
2859 int pos_arg = -1;
2860 const char *ptype = NULL;
Christ van Willegenc35fc032024-03-14 18:30:41 +01002861 const char *pstart = p+1;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002862
2863 p++; // skip '%'
2864
2865 // First check to see if we find a positional
2866 // argument specifier
2867 ptype = p;
2868
2869 while (VIM_ISDIGIT(*ptype))
2870 ++ptype;
2871
2872 if (*ptype == '$')
2873 {
2874 if (*p == '0')
2875 {
2876 // 0 flag at the wrong place
2877 semsg(_( e_invalid_format_specifier_str), fmt);
2878 goto error;
2879 }
2880
2881 // Positional argument
Christ van Willegenc35fc032024-03-14 18:30:41 +01002882 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002883
zeertzjq0dff3152024-07-29 20:28:14 +02002884 if (get_unsigned_int(pstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002885 goto error;
2886
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002887 pos_arg = uj;
2888
2889 any_pos = 1;
2890 CHECK_POS_ARG;
2891
2892 ++p;
2893 }
2894
2895 // parse flags
2896 while (*p == '0' || *p == '-' || *p == '+' || *p == ' '
2897 || *p == '#' || *p == '\'')
2898 {
2899 switch (*p)
2900 {
2901 case '0': break;
2902 case '-': break;
2903 case '+': break;
2904 case ' ': // If both the ' ' and '+' flags appear, the ' '
2905 // flag should be ignored
2906 break;
2907 case '#': break;
2908 case '\'': break;
2909 }
2910 p++;
2911 }
2912 // If the '0' and '-' flags both appear, the '0' flag should be
2913 // ignored.
2914
2915 // parse field width
2916 if (*(arg = p) == '*')
2917 {
2918 p++;
2919
2920 if (VIM_ISDIGIT((int)(*p)))
2921 {
2922 // Positional argument field width
Christ van Willegenc35fc032024-03-14 18:30:41 +01002923 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002924
zeertzjq0dff3152024-07-29 20:28:14 +02002925 if (get_unsigned_int(arg + 1, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002926 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002927
2928 if (*p != '$')
2929 {
2930 semsg(_( e_invalid_format_specifier_str), fmt);
2931 goto error;
2932 }
2933 else
2934 {
2935 ++p;
2936 any_pos = 1;
2937 CHECK_POS_ARG;
2938
2939 if (adjust_types(ap_types, uj, num_posarg, arg) == FAIL)
2940 goto error;
2941 }
2942 }
2943 else
2944 {
2945 any_arg = 1;
2946 CHECK_POS_ARG;
2947 }
2948 }
dundargoc580c1fc2023-10-06 19:41:14 +02002949 else if (VIM_ISDIGIT((int)(*p)))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002950 {
2951 // size_t could be wider than unsigned int; make sure we treat
2952 // argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01002953 const char *digstart = p;
2954 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002955
zeertzjq0dff3152024-07-29 20:28:14 +02002956 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002957 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002958
2959 if (*p == '$')
2960 {
2961 semsg(_( e_invalid_format_specifier_str), fmt);
2962 goto error;
2963 }
2964 }
2965
2966 // parse precision
2967 if (*p == '.')
2968 {
2969 p++;
2970
2971 if (*(arg = p) == '*')
2972 {
2973 p++;
2974
2975 if (VIM_ISDIGIT((int)(*p)))
2976 {
2977 // Parse precision
Christ van Willegenc35fc032024-03-14 18:30:41 +01002978 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002979
zeertzjq0dff3152024-07-29 20:28:14 +02002980 if (get_unsigned_int(arg + 1, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002981 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002982
2983 if (*p == '$')
2984 {
2985 any_pos = 1;
2986 CHECK_POS_ARG;
2987
2988 ++p;
2989
2990 if (adjust_types(ap_types, uj, num_posarg, arg) == FAIL)
2991 goto error;
2992 }
2993 else
2994 {
2995 semsg(_( e_invalid_format_specifier_str), fmt);
2996 goto error;
2997 }
2998 }
2999 else
3000 {
3001 any_arg = 1;
3002 CHECK_POS_ARG;
3003 }
3004 }
dundargoc580c1fc2023-10-06 19:41:14 +02003005 else if (VIM_ISDIGIT((int)(*p)))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003006 {
3007 // size_t could be wider than unsigned int; make sure we
3008 // treat argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01003009 const char *digstart = p;
3010 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003011
zeertzjq0dff3152024-07-29 20:28:14 +02003012 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003013 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003014
3015 if (*p == '$')
3016 {
3017 semsg(_( e_invalid_format_specifier_str), fmt);
3018 goto error;
3019 }
3020 }
3021 }
3022
3023 if (pos_arg != -1)
3024 {
3025 any_pos = 1;
3026 CHECK_POS_ARG;
3027
3028 ptype = p;
3029 }
3030
3031 // parse 'h', 'l' and 'll' length modifiers
3032 if (*p == 'h' || *p == 'l')
3033 {
3034 length_modifier = *p;
3035 p++;
3036 if (length_modifier == 'l' && *p == 'l')
3037 {
3038 // double l = __int64 / varnumber_T
dundargoc580c1fc2023-10-06 19:41:14 +02003039 // length_modifier = 'L';
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003040 p++;
3041 }
3042 }
3043
3044 switch (*p)
3045 {
3046 // Check for known format specifiers. % is special!
3047 case 'i':
3048 case '*':
3049 case 'd':
3050 case 'u':
3051 case 'o':
3052 case 'D':
3053 case 'U':
3054 case 'O':
3055 case 'x':
3056 case 'X':
3057 case 'b':
3058 case 'B':
3059 case 'c':
3060 case 's':
3061 case 'S':
3062 case 'p':
3063 case 'f':
3064 case 'F':
3065 case 'e':
3066 case 'E':
3067 case 'g':
3068 case 'G':
3069 if (pos_arg != -1)
3070 {
3071 if (adjust_types(ap_types, pos_arg, num_posarg, ptype) == FAIL)
3072 goto error;
3073 }
3074 else
3075 {
3076 any_arg = 1;
3077 CHECK_POS_ARG;
3078 }
3079 break;
3080
3081 default:
3082 if (pos_arg != -1)
3083 {
3084 semsg(_( e_cannot_mix_positional_and_non_positional_str), fmt);
3085 goto error;
3086 }
3087 }
3088
3089 if (*p != NUL)
3090 p++; // step over the just processed conversion specifier
3091 }
3092 }
3093
3094 for (arg_idx = 0; arg_idx < *num_posarg; ++arg_idx)
3095 {
3096 if ((*ap_types)[arg_idx] == NULL)
3097 {
3098 semsg(_(e_fmt_arg_nr_unused_str), arg_idx + 1, fmt);
3099 goto error;
3100 }
3101
3102# if defined(FEAT_EVAL)
3103 if (tvs != NULL && tvs[arg_idx].v_type == VAR_UNKNOWN)
3104 {
3105 semsg(_(e_positional_nr_out_of_bounds_str), arg_idx + 1, fmt);
3106 goto error;
3107 }
3108# endif
3109 }
3110
3111 return OK;
3112
3113error:
K.Takata4c215ec2023-08-26 18:05:08 +02003114 vim_free((char**)*ap_types);
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003115 *ap_types = NULL;
3116 *num_posarg = 0;
3117 return FAIL;
3118}
3119
zeertzjq7772c932023-08-15 22:48:40 +02003120 static void
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003121skip_to_arg(
3122 const char **ap_types,
3123 va_list ap_start,
3124 va_list *ap,
3125 int *arg_idx,
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003126 int *arg_cur,
3127 const char *fmt)
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003128{
3129 int arg_min = 0;
3130
3131 if (*arg_cur + 1 == *arg_idx)
3132 {
3133 ++*arg_cur;
3134 ++*arg_idx;
3135 return;
3136 }
3137
3138 if (*arg_cur >= *arg_idx)
3139 {
3140 // Reset ap to ap_start and skip arg_idx - 1 types
3141 va_end(*ap);
3142 va_copy(*ap, ap_start);
3143 }
3144 else
3145 {
3146 // Skip over any we should skip
3147 arg_min = *arg_cur;
3148 }
3149
3150 for (*arg_cur = arg_min; *arg_cur < *arg_idx - 1; ++*arg_cur)
3151 {
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003152 const char *p;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003153
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003154 if (ap_types == NULL || ap_types[*arg_cur] == NULL)
3155 {
Christ van Willegen1bd2cb12023-09-08 19:17:09 +02003156 siemsg(e_aptypes_is_null_nr_str, *arg_cur, fmt);
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003157 return;
3158 }
3159
3160 p = ap_types[*arg_cur];
3161
3162 int fmt_type = format_typeof(p);
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003163
3164 // get parameter value, do initial processing
3165 switch (fmt_type)
3166 {
3167 case TYPE_PERCENT:
3168 case TYPE_UNKNOWN:
3169 break;
3170
3171 case TYPE_CHAR:
3172 va_arg(*ap, int);
3173 break;
3174
3175 case TYPE_STRING:
3176 va_arg(*ap, char *);
3177 break;
3178
3179 case TYPE_POINTER:
3180 va_arg(*ap, void *);
3181 break;
3182
3183 case TYPE_INT:
3184 va_arg(*ap, int);
3185 break;
3186
3187 case TYPE_LONGINT:
3188 va_arg(*ap, long int);
3189 break;
3190
3191 case TYPE_LONGLONGINT:
3192 va_arg(*ap, varnumber_T);
3193 break;
3194
3195 case TYPE_UNSIGNEDINT:
3196 va_arg(*ap, unsigned int);
3197 break;
3198
3199 case TYPE_UNSIGNEDLONGINT:
3200 va_arg(*ap, unsigned long int);
3201 break;
3202
3203 case TYPE_UNSIGNEDLONGLONGINT:
3204 va_arg(*ap, uvarnumber_T);
3205 break;
3206
3207 case TYPE_FLOAT:
3208 va_arg(*ap, double);
3209 break;
3210 }
3211 }
3212
3213 // Because we know that after we return from this call,
3214 // a va_arg() call is made, we can pre-emptively
3215 // increment the current argument index.
3216 ++*arg_cur;
3217 ++*arg_idx;
3218
3219 return;
3220}
3221
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003222 int
3223vim_vsnprintf_typval(
3224 char *str,
3225 size_t str_m,
3226 const char *fmt,
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003227 va_list ap_start,
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003228 typval_T *tvs)
3229{
3230 size_t str_l = 0;
3231 const char *p = fmt;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003232 int arg_cur = 0;
3233 int num_posarg = 0;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003234 int arg_idx = 1;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003235 va_list ap;
3236 const char **ap_types = NULL;
3237
3238 if (parse_fmt_types(&ap_types, &num_posarg, fmt, tvs) == FAIL)
3239 return 0;
3240
3241 va_copy(ap, ap_start);
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003242
3243 if (p == NULL)
3244 p = "";
3245 while (*p != NUL)
3246 {
3247 if (*p != '%')
3248 {
3249 char *q = strchr(p + 1, '%');
3250 size_t n = (q == NULL) ? STRLEN(p) : (size_t)(q - p);
3251
3252 // Copy up to the next '%' or NUL without any changes.
3253 if (str_l < str_m)
3254 {
3255 size_t avail = str_m - str_l;
3256
3257 mch_memmove(str + str_l, p, n > avail ? avail : n);
3258 }
3259 p += n;
3260 str_l += n;
3261 }
3262 else
3263 {
3264 size_t min_field_width = 0, precision = 0;
3265 int zero_padding = 0, precision_specified = 0, justify_left = 0;
3266 int alternate_form = 0, force_sign = 0;
3267
3268 // If both the ' ' and '+' flags appear, the ' ' flag should be
3269 // ignored.
3270 int space_for_positive = 1;
3271
3272 // allowed values: \0, h, l, L
3273 char length_modifier = '\0';
3274
3275 // temporary buffer for simple numeric->string conversion
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003276# define TMP_LEN 350 // On my system 1e308 is the biggest number possible.
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003277 // That sounds reasonable to use as the maximum
3278 // printable.
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003279 char tmp[TMP_LEN];
3280
3281 // string address in case of string argument
3282 const char *str_arg = NULL;
3283
3284 // natural field width of arg without padding and sign
3285 size_t str_arg_l;
3286
3287 // unsigned char argument value - only defined for c conversion.
3288 // N.B. standard explicitly states the char argument for the c
3289 // conversion is unsigned
3290 unsigned char uchar_arg;
3291
3292 // number of zeros to be inserted for numeric conversions as
3293 // required by the precision or minimal field width
3294 size_t number_of_zeros_to_pad = 0;
3295
3296 // index into tmp where zero padding is to be inserted
3297 size_t zero_padding_insertion_ind = 0;
3298
3299 // current conversion specifier character
3300 char fmt_spec = '\0';
3301
3302 // buffer for 's' and 'S' specs
3303 char_u *tofree = NULL;
3304
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003305 // variables for positional arg
3306 int pos_arg = -1;
3307 const char *ptype;
3308
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003309
3310 p++; // skip '%'
3311
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003312 // First check to see if we find a positional
3313 // argument specifier
3314 ptype = p;
3315
3316 while (VIM_ISDIGIT(*ptype))
3317 ++ptype;
3318
3319 if (*ptype == '$')
3320 {
3321 // Positional argument
Christ van Willegenc35fc032024-03-14 18:30:41 +01003322 const char *digstart = p;
3323 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003324
zeertzjq0dff3152024-07-29 20:28:14 +02003325 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003326 goto error;
3327
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003328 pos_arg = uj;
3329
3330 ++p;
3331 }
3332
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003333 // parse flags
3334 while (*p == '0' || *p == '-' || *p == '+' || *p == ' '
3335 || *p == '#' || *p == '\'')
3336 {
3337 switch (*p)
3338 {
3339 case '0': zero_padding = 1; break;
3340 case '-': justify_left = 1; break;
3341 case '+': force_sign = 1; space_for_positive = 0; break;
3342 case ' ': force_sign = 1;
3343 // If both the ' ' and '+' flags appear, the ' '
3344 // flag should be ignored
3345 break;
3346 case '#': alternate_form = 1; break;
3347 case '\'': break;
3348 }
3349 p++;
3350 }
3351 // If the '0' and '-' flags both appear, the '0' flag should be
3352 // ignored.
3353
3354 // parse field width
3355 if (*p == '*')
3356 {
3357 int j;
Christ van Willegenc35fc032024-03-14 18:30:41 +01003358 const char *digstart = p + 1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003359
3360 p++;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003361
3362 if (VIM_ISDIGIT((int)(*p)))
3363 {
3364 // Positional argument field width
Christ van Willegenc35fc032024-03-14 18:30:41 +01003365 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003366
zeertzjq0dff3152024-07-29 20:28:14 +02003367 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003368 goto error;
3369
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003370 arg_idx = uj;
3371
3372 ++p;
3373 }
3374
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003375 j =
3376# if defined(FEAT_EVAL)
3377 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3378# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003379 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3380 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003381 va_arg(ap, int));
3382
Christ van Willegenc35fc032024-03-14 18:30:41 +01003383 if (j > MAX_ALLOWED_STRING_WIDTH)
3384 {
zeertzjq0dff3152024-07-29 20:28:14 +02003385 if (tvs != NULL)
3386 {
3387 format_overflow_error(digstart);
3388 goto error;
3389 }
3390 else
3391 j = MAX_ALLOWED_STRING_WIDTH;
Christ van Willegenc35fc032024-03-14 18:30:41 +01003392 }
3393
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003394 if (j >= 0)
3395 min_field_width = j;
3396 else
3397 {
3398 min_field_width = -j;
3399 justify_left = 1;
3400 }
3401 }
3402 else if (VIM_ISDIGIT((int)(*p)))
3403 {
3404 // size_t could be wider than unsigned int; make sure we treat
3405 // argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01003406 const char *digstart = p;
3407 unsigned int uj;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003408
zeertzjq0dff3152024-07-29 20:28:14 +02003409 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003410 goto error;
3411
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003412 min_field_width = uj;
3413 }
3414
3415 // parse precision
3416 if (*p == '.')
3417 {
3418 p++;
3419 precision_specified = 1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003420
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003421 if (VIM_ISDIGIT((int)(*p)))
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003422 {
3423 // size_t could be wider than unsigned int; make sure we
3424 // treat argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01003425 const char *digstart = p;
3426 unsigned int uj;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003427
zeertzjq0dff3152024-07-29 20:28:14 +02003428 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003429 goto error;
3430
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003431 precision = uj;
3432 }
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003433 else if (*p == '*')
3434 {
3435 int j;
Christ van Willegenc35fc032024-03-14 18:30:41 +01003436 const char *digstart = p;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003437
3438 p++;
3439
3440 if (VIM_ISDIGIT((int)(*p)))
3441 {
3442 // positional argument
Christ van Willegenc35fc032024-03-14 18:30:41 +01003443 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003444
zeertzjq0dff3152024-07-29 20:28:14 +02003445 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003446 goto error;
3447
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003448 arg_idx = uj;
3449
3450 ++p;
3451 }
3452
3453 j =
3454# if defined(FEAT_EVAL)
3455 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3456# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003457 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3458 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003459 va_arg(ap, int));
3460
Christ van Willegenc35fc032024-03-14 18:30:41 +01003461 if (j > MAX_ALLOWED_STRING_WIDTH)
3462 {
zeertzjq0dff3152024-07-29 20:28:14 +02003463 if (tvs != NULL)
3464 {
3465 format_overflow_error(digstart);
3466 goto error;
3467 }
3468 else
3469 j = MAX_ALLOWED_STRING_WIDTH;
Christ van Willegenc35fc032024-03-14 18:30:41 +01003470 }
3471
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003472 if (j >= 0)
3473 precision = j;
3474 else
3475 {
3476 precision_specified = 0;
3477 precision = 0;
3478 }
3479 }
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003480 }
3481
3482 // parse 'h', 'l' and 'll' length modifiers
3483 if (*p == 'h' || *p == 'l')
3484 {
3485 length_modifier = *p;
3486 p++;
3487 if (length_modifier == 'l' && *p == 'l')
3488 {
3489 // double l = __int64 / varnumber_T
3490 length_modifier = 'L';
3491 p++;
3492 }
3493 }
3494 fmt_spec = *p;
3495
3496 // common synonyms:
3497 switch (fmt_spec)
3498 {
3499 case 'i': fmt_spec = 'd'; break;
3500 case 'D': fmt_spec = 'd'; length_modifier = 'l'; break;
3501 case 'U': fmt_spec = 'u'; length_modifier = 'l'; break;
3502 case 'O': fmt_spec = 'o'; length_modifier = 'l'; break;
3503 default: break;
3504 }
3505
3506# if defined(FEAT_EVAL)
3507 switch (fmt_spec)
3508 {
3509 case 'd': case 'u': case 'o': case 'x': case 'X':
3510 if (tvs != NULL && length_modifier == '\0')
3511 length_modifier = 'L';
3512 }
3513# endif
3514
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003515 if (pos_arg != -1)
3516 arg_idx = pos_arg;
3517
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003518 // get parameter value, do initial processing
3519 switch (fmt_spec)
3520 {
3521 // '%' and 'c' behave similar to 's' regarding flags and field
3522 // widths
3523 case '%':
3524 case 'c':
3525 case 's':
3526 case 'S':
3527 str_arg_l = 1;
3528 switch (fmt_spec)
3529 {
3530 case '%':
3531 str_arg = p;
3532 break;
3533
3534 case 'c':
3535 {
3536 int j;
3537
3538 j =
3539# if defined(FEAT_EVAL)
3540 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3541# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003542 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3543 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003544 va_arg(ap, int));
3545
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003546 // standard demands unsigned char
3547 uchar_arg = (unsigned char)j;
3548 str_arg = (char *)&uchar_arg;
3549 break;
3550 }
3551
3552 case 's':
3553 case 'S':
3554 str_arg =
3555# if defined(FEAT_EVAL)
3556 tvs != NULL ? tv_str(tvs, &arg_idx, &tofree) :
3557# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003558 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3559 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003560 va_arg(ap, char *));
3561
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003562 if (str_arg == NULL)
3563 {
3564 str_arg = "[NULL]";
3565 str_arg_l = 6;
3566 }
3567 // make sure not to address string beyond the specified
3568 // precision !!!
3569 else if (!precision_specified)
3570 str_arg_l = strlen(str_arg);
3571 // truncate string if necessary as requested by precision
3572 else if (precision == 0)
3573 str_arg_l = 0;
3574 else
3575 {
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003576 // memchr on HP does not like n > 2^31 !!!
3577 char *q = memchr(str_arg, '\0',
3578 precision <= (size_t)0x7fffffffL ? precision
3579 : (size_t)0x7fffffffL);
presukud85fccd2021-11-20 19:38:31 +00003580
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003581 str_arg_l = (q == NULL) ? precision
3582 : (size_t)(q - str_arg);
3583 }
3584 if (fmt_spec == 'S')
3585 {
presuku1f2453f2021-11-24 15:32:57 +00003586 char_u *p1;
3587 size_t i;
3588 int cell;
presukud85fccd2021-11-20 19:38:31 +00003589
presuku1f2453f2021-11-24 15:32:57 +00003590 for (i = 0, p1 = (char_u *)str_arg; *p1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003591 p1 += mb_ptr2len(p1))
presuku1f2453f2021-11-24 15:32:57 +00003592 {
3593 cell = mb_ptr2cells(p1);
3594 if (precision_specified && i + cell > precision)
3595 break;
3596 i += cell;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003597 }
presuku1f2453f2021-11-24 15:32:57 +00003598
3599 str_arg_l = p1 - (char_u *)str_arg;
presukud85fccd2021-11-20 19:38:31 +00003600 if (min_field_width != 0)
presuku1f2453f2021-11-24 15:32:57 +00003601 min_field_width += str_arg_l - i;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003602 }
3603 break;
3604
3605 default:
3606 break;
3607 }
3608 break;
3609
3610 case 'd': case 'u':
3611 case 'b': case 'B':
3612 case 'o':
3613 case 'x': case 'X':
3614 case 'p':
3615 {
3616 // NOTE: the u, b, o, x, X and p conversion specifiers
3617 // imply the value is unsigned; d implies a signed
3618 // value
3619
3620 // 0 if numeric argument is zero (or if pointer is
3621 // NULL for 'p'), +1 if greater than zero (or nonzero
3622 // for unsigned arguments), -1 if negative (unsigned
3623 // argument is never negative)
3624 int arg_sign = 0;
3625
3626 // only set for length modifier h, or for no length
3627 // modifiers
3628 int int_arg = 0;
3629 unsigned int uint_arg = 0;
3630
3631 // only set for length modifier l
3632 long int long_arg = 0;
3633 unsigned long int ulong_arg = 0;
3634
3635 // only set for length modifier ll
3636 varnumber_T llong_arg = 0;
3637 uvarnumber_T ullong_arg = 0;
3638
3639 // only set for b conversion
3640 uvarnumber_T bin_arg = 0;
3641
3642 // pointer argument value -only defined for p
3643 // conversion
3644 void *ptr_arg = NULL;
3645
3646 if (fmt_spec == 'p')
3647 {
3648 length_modifier = '\0';
3649 ptr_arg =
3650# if defined(FEAT_EVAL)
3651 tvs != NULL ? (void *)tv_str(tvs, &arg_idx,
3652 NULL) :
3653# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003654 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3655 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003656 va_arg(ap, void *));
3657
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003658 if (ptr_arg != NULL)
3659 arg_sign = 1;
3660 }
3661 else if (fmt_spec == 'b' || fmt_spec == 'B')
3662 {
3663 bin_arg =
3664# if defined(FEAT_EVAL)
3665 tvs != NULL ?
3666 (uvarnumber_T)tv_nr(tvs, &arg_idx) :
3667# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003668 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3669 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003670 va_arg(ap, uvarnumber_T));
3671
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003672 if (bin_arg != 0)
3673 arg_sign = 1;
3674 }
3675 else if (fmt_spec == 'd')
3676 {
3677 // signed
3678 switch (length_modifier)
3679 {
3680 case '\0':
3681 case 'h':
3682 // char and short arguments are passed as int.
3683 int_arg =
3684# if defined(FEAT_EVAL)
3685 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3686# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003687 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3688 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003689 va_arg(ap, int));
3690
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003691 if (int_arg > 0)
3692 arg_sign = 1;
3693 else if (int_arg < 0)
3694 arg_sign = -1;
3695 break;
3696 case 'l':
3697 long_arg =
3698# if defined(FEAT_EVAL)
3699 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3700# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003701 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3702 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003703 va_arg(ap, long int));
3704
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003705 if (long_arg > 0)
3706 arg_sign = 1;
3707 else if (long_arg < 0)
3708 arg_sign = -1;
3709 break;
3710 case 'L':
3711 llong_arg =
3712# if defined(FEAT_EVAL)
3713 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3714# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003715 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3716 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003717 va_arg(ap, varnumber_T));
3718
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003719 if (llong_arg > 0)
3720 arg_sign = 1;
3721 else if (llong_arg < 0)
3722 arg_sign = -1;
3723 break;
3724 }
3725 }
3726 else
3727 {
3728 // unsigned
3729 switch (length_modifier)
3730 {
3731 case '\0':
3732 case 'h':
3733 uint_arg =
3734# if defined(FEAT_EVAL)
3735 tvs != NULL ? (unsigned)
3736 tv_nr(tvs, &arg_idx) :
3737# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003738 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3739 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003740 va_arg(ap, unsigned int));
3741
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003742 if (uint_arg != 0)
3743 arg_sign = 1;
3744 break;
3745 case 'l':
3746 ulong_arg =
3747# if defined(FEAT_EVAL)
3748 tvs != NULL ? (unsigned long)
3749 tv_nr(tvs, &arg_idx) :
3750# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003751 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3752 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003753 va_arg(ap, unsigned long int));
3754
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003755 if (ulong_arg != 0)
3756 arg_sign = 1;
3757 break;
3758 case 'L':
3759 ullong_arg =
3760# if defined(FEAT_EVAL)
3761 tvs != NULL ? (uvarnumber_T)
3762 tv_nr(tvs, &arg_idx) :
3763# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003764 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3765 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003766 va_arg(ap, uvarnumber_T));
3767
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003768 if (ullong_arg != 0)
3769 arg_sign = 1;
3770 break;
3771 }
3772 }
3773
3774 str_arg = tmp;
3775 str_arg_l = 0;
3776
3777 // NOTE:
3778 // For d, i, u, o, x, and X conversions, if precision is
3779 // specified, the '0' flag should be ignored. This is so
3780 // with Solaris 2.6, Digital UNIX 4.0, HPUX 10, Linux,
3781 // FreeBSD, NetBSD; but not with Perl.
3782 if (precision_specified)
3783 zero_padding = 0;
3784 if (fmt_spec == 'd')
3785 {
3786 if (force_sign && arg_sign >= 0)
3787 tmp[str_arg_l++] = space_for_positive ? ' ' : '+';
3788 // leave negative numbers for sprintf to handle, to
3789 // avoid handling tricky cases like (short int)-32768
3790 }
3791 else if (alternate_form)
3792 {
3793 if (arg_sign != 0
3794 && (fmt_spec == 'b' || fmt_spec == 'B'
3795 || fmt_spec == 'x' || fmt_spec == 'X') )
3796 {
3797 tmp[str_arg_l++] = '0';
3798 tmp[str_arg_l++] = fmt_spec;
3799 }
3800 // alternate form should have no effect for p
3801 // conversion, but ...
3802 }
3803
3804 zero_padding_insertion_ind = str_arg_l;
3805 if (!precision_specified)
3806 precision = 1; // default precision is 1
3807 if (precision == 0 && arg_sign == 0)
3808 {
3809 // When zero value is formatted with an explicit
3810 // precision 0, the resulting formatted string is
3811 // empty (d, i, u, b, B, o, x, X, p).
3812 }
3813 else
3814 {
3815 char f[6];
3816 int f_l = 0;
3817
3818 // construct a simple format string for sprintf
3819 f[f_l++] = '%';
3820 if (!length_modifier)
3821 ;
3822 else if (length_modifier == 'L')
3823 {
3824# ifdef MSWIN
3825 f[f_l++] = 'I';
3826 f[f_l++] = '6';
3827 f[f_l++] = '4';
3828# else
3829 f[f_l++] = 'l';
3830 f[f_l++] = 'l';
3831# endif
3832 }
3833 else
3834 f[f_l++] = length_modifier;
3835 f[f_l++] = fmt_spec;
3836 f[f_l++] = '\0';
3837
3838 if (fmt_spec == 'p')
3839 str_arg_l += sprintf(tmp + str_arg_l, f, ptr_arg);
3840 else if (fmt_spec == 'b' || fmt_spec == 'B')
3841 {
3842 char b[8 * sizeof(uvarnumber_T)];
3843 size_t b_l = 0;
3844 uvarnumber_T bn = bin_arg;
3845
3846 do
3847 {
3848 b[sizeof(b) - ++b_l] = '0' + (bn & 0x1);
3849 bn >>= 1;
3850 }
3851 while (bn != 0);
3852
3853 memcpy(tmp + str_arg_l, b + sizeof(b) - b_l, b_l);
3854 str_arg_l += b_l;
3855 }
3856 else if (fmt_spec == 'd')
3857 {
3858 // signed
3859 switch (length_modifier)
3860 {
3861 case '\0': str_arg_l += sprintf(
3862 tmp + str_arg_l, f,
3863 int_arg);
3864 break;
3865 case 'h': str_arg_l += sprintf(
3866 tmp + str_arg_l, f,
3867 (short)int_arg);
3868 break;
3869 case 'l': str_arg_l += sprintf(
3870 tmp + str_arg_l, f, long_arg);
3871 break;
3872 case 'L': str_arg_l += sprintf(
3873 tmp + str_arg_l, f, llong_arg);
3874 break;
3875 }
3876 }
3877 else
3878 {
3879 // unsigned
3880 switch (length_modifier)
3881 {
3882 case '\0': str_arg_l += sprintf(
3883 tmp + str_arg_l, f,
3884 uint_arg);
3885 break;
3886 case 'h': str_arg_l += sprintf(
3887 tmp + str_arg_l, f,
3888 (unsigned short)uint_arg);
3889 break;
3890 case 'l': str_arg_l += sprintf(
3891 tmp + str_arg_l, f, ulong_arg);
3892 break;
3893 case 'L': str_arg_l += sprintf(
3894 tmp + str_arg_l, f, ullong_arg);
3895 break;
3896 }
3897 }
3898
3899 // include the optional minus sign and possible
3900 // "0x" in the region before the zero padding
3901 // insertion point
3902 if (zero_padding_insertion_ind < str_arg_l
3903 && tmp[zero_padding_insertion_ind] == '-')
3904 zero_padding_insertion_ind++;
3905 if (zero_padding_insertion_ind + 1 < str_arg_l
3906 && tmp[zero_padding_insertion_ind] == '0'
3907 && (tmp[zero_padding_insertion_ind + 1] == 'x'
3908 || tmp[zero_padding_insertion_ind + 1] == 'X'))
3909 zero_padding_insertion_ind += 2;
3910 }
3911
3912 {
3913 size_t num_of_digits = str_arg_l
3914 - zero_padding_insertion_ind;
3915
3916 if (alternate_form && fmt_spec == 'o'
3917 // unless zero is already the first
3918 // character
3919 && !(zero_padding_insertion_ind < str_arg_l
3920 && tmp[zero_padding_insertion_ind] == '0'))
3921 {
3922 // assure leading zero for alternate-form
3923 // octal numbers
3924 if (!precision_specified
3925 || precision < num_of_digits + 1)
3926 {
3927 // precision is increased to force the
3928 // first character to be zero, except if a
3929 // zero value is formatted with an
3930 // explicit precision of zero
3931 precision = num_of_digits + 1;
3932 }
3933 }
3934 // zero padding to specified precision?
3935 if (num_of_digits < precision)
3936 number_of_zeros_to_pad = precision - num_of_digits;
3937 }
3938 // zero padding to specified minimal field width?
3939 if (!justify_left && zero_padding)
3940 {
3941 int n = (int)(min_field_width - (str_arg_l
3942 + number_of_zeros_to_pad));
3943 if (n > 0)
3944 number_of_zeros_to_pad += n;
3945 }
3946 break;
3947 }
3948
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003949 case 'f':
3950 case 'F':
3951 case 'e':
3952 case 'E':
3953 case 'g':
3954 case 'G':
3955 {
3956 // Floating point.
3957 double f;
3958 double abs_f;
3959 char format[40];
3960 int l;
3961 int remove_trailing_zeroes = FALSE;
3962
3963 f =
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003964# if defined(FEAT_EVAL)
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003965 tvs != NULL ? tv_float(tvs, &arg_idx) :
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003966# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003967 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3968 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003969 va_arg(ap, double));
3970
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003971 abs_f = f < 0 ? -f : f;
3972
3973 if (fmt_spec == 'g' || fmt_spec == 'G')
3974 {
3975 // Would be nice to use %g directly, but it prints
3976 // "1.0" as "1", we don't want that.
3977 if ((abs_f >= 0.001 && abs_f < 10000000.0)
3978 || abs_f == 0.0)
3979 fmt_spec = ASCII_ISUPPER(fmt_spec) ? 'F' : 'f';
3980 else
3981 fmt_spec = fmt_spec == 'g' ? 'e' : 'E';
3982 remove_trailing_zeroes = TRUE;
3983 }
3984
3985 if ((fmt_spec == 'f' || fmt_spec == 'F') &&
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003986# ifdef VAX
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003987 abs_f > 1.0e38
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003988# else
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003989 abs_f > 1.0e307
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003990# endif
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003991 )
3992 {
3993 // Avoid a buffer overflow
3994 STRCPY(tmp, infinity_str(f > 0.0, fmt_spec,
3995 force_sign, space_for_positive));
3996 str_arg_l = STRLEN(tmp);
3997 zero_padding = 0;
3998 }
3999 else
4000 {
4001 if (isnan(f))
4002 {
4003 // Not a number: nan or NAN
4004 STRCPY(tmp, ASCII_ISUPPER(fmt_spec) ? "NAN"
4005 : "nan");
4006 str_arg_l = 3;
4007 zero_padding = 0;
4008 }
4009 else if (isinf(f))
4010 {
4011 STRCPY(tmp, infinity_str(f > 0.0, fmt_spec,
4012 force_sign, space_for_positive));
4013 str_arg_l = STRLEN(tmp);
4014 zero_padding = 0;
4015 }
4016 else
4017 {
4018 // Regular float number
4019 format[0] = '%';
4020 l = 1;
4021 if (force_sign)
4022 format[l++] = space_for_positive ? ' ' : '+';
4023 if (precision_specified)
4024 {
4025 size_t max_prec = TMP_LEN - 10;
4026
4027 // Make sure we don't get more digits than we
4028 // have room for.
4029 if ((fmt_spec == 'f' || fmt_spec == 'F')
4030 && abs_f > 1.0)
4031 max_prec -= (size_t)log10(abs_f);
4032 if (precision > max_prec)
4033 precision = max_prec;
4034 l += sprintf(format + l, ".%d", (int)precision);
4035 }
4036 format[l] = fmt_spec == 'F' ? 'f' : fmt_spec;
4037 format[l + 1] = NUL;
4038
4039 str_arg_l = sprintf(tmp, format, f);
4040 }
4041
4042 if (remove_trailing_zeroes)
4043 {
4044 int i;
4045 char *tp;
4046
4047 // Using %g or %G: remove superfluous zeroes.
4048 if (fmt_spec == 'f' || fmt_spec == 'F')
4049 tp = tmp + str_arg_l - 1;
4050 else
4051 {
4052 tp = (char *)vim_strchr((char_u *)tmp,
4053 fmt_spec == 'e' ? 'e' : 'E');
4054 if (tp != NULL)
4055 {
4056 // Remove superfluous '+' and leading
4057 // zeroes from the exponent.
4058 if (tp[1] == '+')
4059 {
4060 // Change "1.0e+07" to "1.0e07"
4061 STRMOVE(tp + 1, tp + 2);
4062 --str_arg_l;
4063 }
4064 i = (tp[1] == '-') ? 2 : 1;
4065 while (tp[i] == '0')
4066 {
4067 // Change "1.0e07" to "1.0e7"
4068 STRMOVE(tp + i, tp + i + 1);
4069 --str_arg_l;
4070 }
4071 --tp;
4072 }
4073 }
4074
4075 if (tp != NULL && !precision_specified)
4076 // Remove trailing zeroes, but keep the one
4077 // just after a dot.
4078 while (tp > tmp + 2 && *tp == '0'
4079 && tp[-1] != '.')
4080 {
4081 STRMOVE(tp, tp + 1);
4082 --tp;
4083 --str_arg_l;
4084 }
4085 }
4086 else
4087 {
4088 char *tp;
4089
4090 // Be consistent: some printf("%e") use 1.0e+12
4091 // and some 1.0e+012. Remove one zero in the last
4092 // case.
4093 tp = (char *)vim_strchr((char_u *)tmp,
4094 fmt_spec == 'e' ? 'e' : 'E');
4095 if (tp != NULL && (tp[1] == '+' || tp[1] == '-')
4096 && tp[2] == '0'
4097 && vim_isdigit(tp[3])
4098 && vim_isdigit(tp[4]))
4099 {
4100 STRMOVE(tp + 2, tp + 3);
4101 --str_arg_l;
4102 }
4103 }
4104 }
4105 if (zero_padding && min_field_width > str_arg_l
4106 && (tmp[0] == '-' || force_sign))
4107 {
4108 // padding 0's should be inserted after the sign
4109 number_of_zeros_to_pad = min_field_width - str_arg_l;
4110 zero_padding_insertion_ind = 1;
4111 }
4112 str_arg = tmp;
4113 break;
4114 }
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02004115
4116 default:
4117 // unrecognized conversion specifier, keep format string
4118 // as-is
4119 zero_padding = 0; // turn zero padding off for non-numeric
4120 // conversion
4121 justify_left = 1;
4122 min_field_width = 0; // reset flags
4123
4124 // discard the unrecognized conversion, just keep *
4125 // the unrecognized conversion character
4126 str_arg = p;
4127 str_arg_l = 0;
4128 if (*p != NUL)
4129 str_arg_l++; // include invalid conversion specifier
4130 // unchanged if not at end-of-string
4131 break;
4132 }
4133
4134 if (*p != NUL)
4135 p++; // step over the just processed conversion specifier
4136
4137 // insert padding to the left as requested by min_field_width;
4138 // this does not include the zero padding in case of numerical
4139 // conversions
4140 if (!justify_left)
4141 {
4142 // left padding with blank or zero
4143 int pn = (int)(min_field_width - (str_arg_l + number_of_zeros_to_pad));
4144
4145 if (pn > 0)
4146 {
4147 if (str_l < str_m)
4148 {
4149 size_t avail = str_m - str_l;
4150
4151 vim_memset(str + str_l, zero_padding ? '0' : ' ',
4152 (size_t)pn > avail ? avail
4153 : (size_t)pn);
4154 }
4155 str_l += pn;
4156 }
4157 }
4158
4159 // zero padding as requested by the precision or by the minimal
4160 // field width for numeric conversions required?
4161 if (number_of_zeros_to_pad == 0)
4162 {
4163 // will not copy first part of numeric right now, *
4164 // force it to be copied later in its entirety
4165 zero_padding_insertion_ind = 0;
4166 }
4167 else
4168 {
4169 // insert first part of numerics (sign or '0x') before zero
4170 // padding
4171 int zn = (int)zero_padding_insertion_ind;
4172
4173 if (zn > 0)
4174 {
4175 if (str_l < str_m)
4176 {
4177 size_t avail = str_m - str_l;
4178
4179 mch_memmove(str + str_l, str_arg,
4180 (size_t)zn > avail ? avail
4181 : (size_t)zn);
4182 }
4183 str_l += zn;
4184 }
4185
4186 // insert zero padding as requested by the precision or min
4187 // field width
4188 zn = (int)number_of_zeros_to_pad;
4189 if (zn > 0)
4190 {
4191 if (str_l < str_m)
4192 {
4193 size_t avail = str_m - str_l;
4194
4195 vim_memset(str + str_l, '0',
4196 (size_t)zn > avail ? avail
4197 : (size_t)zn);
4198 }
4199 str_l += zn;
4200 }
4201 }
4202
4203 // insert formatted string
4204 // (or as-is conversion specifier for unknown conversions)
4205 {
4206 int sn = (int)(str_arg_l - zero_padding_insertion_ind);
4207
4208 if (sn > 0)
4209 {
4210 if (str_l < str_m)
4211 {
4212 size_t avail = str_m - str_l;
4213
4214 mch_memmove(str + str_l,
4215 str_arg + zero_padding_insertion_ind,
4216 (size_t)sn > avail ? avail : (size_t)sn);
4217 }
4218 str_l += sn;
4219 }
4220 }
4221
4222 // insert right padding
4223 if (justify_left)
4224 {
4225 // right blank padding to the field width
4226 int pn = (int)(min_field_width
4227 - (str_arg_l + number_of_zeros_to_pad));
4228
4229 if (pn > 0)
4230 {
4231 if (str_l < str_m)
4232 {
4233 size_t avail = str_m - str_l;
4234
4235 vim_memset(str + str_l, ' ',
4236 (size_t)pn > avail ? avail
4237 : (size_t)pn);
4238 }
4239 str_l += pn;
4240 }
4241 }
4242 vim_free(tofree);
4243 }
4244 }
4245
4246 if (str_m > 0)
4247 {
4248 // make sure the string is nul-terminated even at the expense of
4249 // overwriting the last character (shouldn't happen, but just in case)
4250 //
4251 str[str_l <= str_m - 1 ? str_l : str_m - 1] = '\0';
4252 }
4253
Christ van Willegen0c6181f2023-08-13 18:03:14 +02004254 if (tvs != NULL && tvs[num_posarg != 0 ? num_posarg : arg_idx - 1].v_type != VAR_UNKNOWN)
Bram Moolenaar677658a2022-01-05 16:09:06 +00004255 emsg(_(e_too_many_arguments_to_printf));
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02004256
Christ van Willegenc35fc032024-03-14 18:30:41 +01004257error:
K.Takata4c215ec2023-08-26 18:05:08 +02004258 vim_free((char*)ap_types);
Christ van Willegen0c6181f2023-08-13 18:03:14 +02004259 va_end(ap);
4260
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02004261 // Return the number of characters formatted (excluding trailing nul
4262 // character), that is, the number of characters that would have been
4263 // written to the buffer if it were large enough.
4264 return (int)str_l;
4265}
4266
4267#endif // PROTO