blob: 9b66ee0e4adce268e9a4cb4cc53c55c1578fcb18 [file] [log] [blame]
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001/* vi:set ts=8 sts=4 sw=4 noet:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * strings.c: string manipulation functions
12 */
13
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +020014#define USING_FLOAT_STUFF
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020015#include "vim.h"
16
17/*
18 * Copy "string" into newly allocated memory.
19 */
20 char_u *
21vim_strsave(char_u *string)
22{
23 char_u *p;
24 size_t len;
25
26 len = STRLEN(string) + 1;
27 p = alloc(len);
28 if (p != NULL)
29 mch_memmove(p, string, len);
30 return p;
31}
32
33/*
34 * Copy up to "len" bytes of "string" into newly allocated memory and
35 * terminate with a NUL.
36 * The allocated memory always has size "len + 1", also when "string" is
37 * shorter.
38 */
39 char_u *
40vim_strnsave(char_u *string, size_t len)
41{
42 char_u *p;
43
44 p = alloc(len + 1);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +000045 if (p == NULL)
46 return NULL;
47
48 STRNCPY(p, string, len);
49 p[len] = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020050 return p;
51}
52
53/*
54 * Same as vim_strsave(), but any characters found in esc_chars are preceded
55 * by a backslash.
56 */
57 char_u *
58vim_strsave_escaped(char_u *string, char_u *esc_chars)
59{
60 return vim_strsave_escaped_ext(string, esc_chars, '\\', FALSE);
61}
62
63/*
64 * Same as vim_strsave_escaped(), but when "bsl" is TRUE also escape
65 * characters where rem_backslash() would remove the backslash.
66 * Escape the characters with "cc".
67 */
68 char_u *
69vim_strsave_escaped_ext(
70 char_u *string,
71 char_u *esc_chars,
72 int cc,
73 int bsl)
74{
75 char_u *p;
76 char_u *p2;
77 char_u *escaped_string;
78 unsigned length;
79 int l;
80
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +020081 // First count the number of backslashes required.
82 // Then allocate the memory and insert them.
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020083 length = 1; // count the trailing NUL
84 for (p = string; *p; p++)
85 {
86 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
87 {
88 length += l; // count a multibyte char
89 p += l - 1;
90 continue;
91 }
92 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
93 ++length; // count a backslash
94 ++length; // count an ordinary char
95 }
96 escaped_string = alloc(length);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +000097 if (escaped_string == NULL)
98 return NULL;
99 p2 = escaped_string;
100 for (p = string; *p; p++)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200101 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000102 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200103 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000104 mch_memmove(p2, p, (size_t)l);
105 p2 += l;
106 p += l - 1; // skip multibyte char
107 continue;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200108 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000109 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
110 *p2++ = cc;
111 *p2++ = *p;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200112 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000113 *p2 = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200114 return escaped_string;
115}
116
117/*
118 * Return TRUE when 'shell' has "csh" in the tail.
119 */
120 int
121csh_like_shell(void)
122{
123 return (strstr((char *)gettail(p_sh), "csh") != NULL);
124}
125
126/*
Jason Cox6e823512021-08-29 12:36:49 +0200127 * Return TRUE when 'shell' has "fish" in the tail.
128 */
Dominique Pellede05ae72021-08-30 19:57:34 +0200129 static int
Jason Cox6e823512021-08-29 12:36:49 +0200130fish_like_shell(void)
131{
132 return (strstr((char *)gettail(p_sh), "fish") != NULL);
133}
134
135/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200136 * Escape "string" for use as a shell argument with system().
137 * This uses single quotes, except when we know we need to use double quotes
138 * (MS-DOS and MS-Windows not using PowerShell and without 'shellslash' set).
139 * PowerShell also uses a novel escaping for enclosed single quotes - double
140 * them up.
141 * Escape a newline, depending on the 'shell' option.
142 * When "do_special" is TRUE also replace "!", "%", "#" and things starting
143 * with "<" like "<cfile>".
144 * When "do_newline" is FALSE do not escape newline unless it is csh shell.
145 * Returns the result in allocated memory, NULL if we have run out.
146 */
147 char_u *
148vim_strsave_shellescape(char_u *string, int do_special, int do_newline)
149{
150 unsigned length;
151 char_u *p;
152 char_u *d;
153 char_u *escaped_string;
Mike Williams51024bb2024-05-30 07:46:30 +0200154 size_t l;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200155 int csh_like;
Jason Cox6e823512021-08-29 12:36:49 +0200156 int fish_like;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200157 char_u *shname;
158 int powershell;
159# ifdef MSWIN
160 int double_quotes;
161# endif
162
163 // Only csh and similar shells expand '!' within single quotes. For sh and
164 // the like we must not put a backslash before it, it will be taken
165 // literally. If do_special is set the '!' will be escaped twice.
166 // Csh also needs to have "\n" escaped twice when do_special is set.
167 csh_like = csh_like_shell();
168
Jason Cox6e823512021-08-29 12:36:49 +0200169 // Fish shell uses '\' as an escape character within single quotes, so '\'
170 // itself must be escaped to get a literal '\'.
171 fish_like = fish_like_shell();
172
Dominique Pelleaf4a61a2021-12-27 17:21:41 +0000173 // PowerShell uses its own version for quoting single quotes
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200174 shname = gettail(p_sh);
175 powershell = strstr((char *)shname, "pwsh") != NULL;
176# ifdef MSWIN
177 powershell = powershell || strstr((char *)shname, "powershell") != NULL;
178 // PowerShell only accepts single quotes so override shellslash.
179 double_quotes = !powershell && !p_ssl;
180# endif
181
182 // First count the number of extra bytes required.
183 length = (unsigned)STRLEN(string) + 3; // two quotes and a trailing NUL
184 for (p = string; *p != NUL; MB_PTR_ADV(p))
185 {
186# ifdef MSWIN
187 if (double_quotes)
188 {
189 if (*p == '"')
190 ++length; // " -> ""
191 }
192 else
193# endif
194 if (*p == '\'')
195 {
196 if (powershell)
197 length +=2; // ' => ''
198 else
199 length += 3; // ' => '\''
200 }
201 if ((*p == '\n' && (csh_like || do_newline))
202 || (*p == '!' && (csh_like || do_special)))
203 {
204 ++length; // insert backslash
205 if (csh_like && do_special)
206 ++length; // insert backslash
207 }
208 if (do_special && find_cmdline_var(p, &l) >= 0)
209 {
210 ++length; // insert backslash
211 p += l - 1;
212 }
Jason Cox6e823512021-08-29 12:36:49 +0200213 if (*p == '\\' && fish_like)
214 ++length; // insert backslash
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200215 }
216
217 // Allocate memory for the result and fill it.
218 escaped_string = alloc(length);
219 if (escaped_string != NULL)
220 {
221 d = escaped_string;
222
223 // add opening quote
224# ifdef MSWIN
225 if (double_quotes)
226 *d++ = '"';
227 else
228# endif
229 *d++ = '\'';
230
231 for (p = string; *p != NUL; )
232 {
233# ifdef MSWIN
234 if (double_quotes)
235 {
236 if (*p == '"')
237 {
238 *d++ = '"';
239 *d++ = '"';
240 ++p;
241 continue;
242 }
243 }
244 else
245# endif
246 if (*p == '\'')
247 {
248 if (powershell)
249 {
250 *d++ = '\'';
251 *d++ = '\'';
252 }
253 else
254 {
255 *d++ = '\'';
256 *d++ = '\\';
257 *d++ = '\'';
258 *d++ = '\'';
259 }
260 ++p;
261 continue;
262 }
263 if ((*p == '\n' && (csh_like || do_newline))
264 || (*p == '!' && (csh_like || do_special)))
265 {
266 *d++ = '\\';
267 if (csh_like && do_special)
268 *d++ = '\\';
269 *d++ = *p++;
270 continue;
271 }
Mike Williams51024bb2024-05-30 07:46:30 +0200272 if (do_special && find_cmdline_var(p, &l) >= 0 && l > 0)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200273 {
274 *d++ = '\\'; // insert backslash
Mike Williams51024bb2024-05-30 07:46:30 +0200275 do // copy the var
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200276 *d++ = *p++;
Mike Williams51024bb2024-05-30 07:46:30 +0200277 while (--l > 0);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200278 continue;
279 }
Jason Cox6e823512021-08-29 12:36:49 +0200280 if (*p == '\\' && fish_like)
281 {
282 *d++ = '\\';
283 *d++ = *p++;
Bram Moolenaar66315972021-09-01 14:31:51 +0200284 continue;
Jason Cox6e823512021-08-29 12:36:49 +0200285 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200286
287 MB_COPY_CHAR(p, d);
288 }
289
290 // add terminating quote and finish with a NUL
291# ifdef MSWIN
292 if (double_quotes)
293 *d++ = '"';
294 else
295# endif
296 *d++ = '\'';
297 *d = NUL;
298 }
299
300 return escaped_string;
301}
302
303/*
304 * Like vim_strsave(), but make all characters uppercase.
305 * This uses ASCII lower-to-upper case translation, language independent.
306 */
307 char_u *
308vim_strsave_up(char_u *string)
309{
310 char_u *p1;
311
312 p1 = vim_strsave(string);
313 vim_strup(p1);
314 return p1;
315}
316
317/*
318 * Like vim_strnsave(), but make all characters uppercase.
319 * This uses ASCII lower-to-upper case translation, language independent.
320 */
321 char_u *
322vim_strnsave_up(char_u *string, size_t len)
323{
324 char_u *p1;
325
326 p1 = vim_strnsave(string, len);
327 vim_strup(p1);
328 return p1;
329}
330
331/*
332 * ASCII lower-to-upper case translation, language independent.
333 */
334 void
335vim_strup(
336 char_u *p)
337{
338 char_u *p2;
339 int c;
340
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000341 if (p == NULL)
342 return;
343
344 p2 = p;
345 while ((c = *p2) != NUL)
346 *p2++ = (c < 'a' || c > 'z') ? c : (c - 0x20);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200347}
348
349#if defined(FEAT_EVAL) || defined(FEAT_SPELL) || defined(PROTO)
350/*
351 * Make string "s" all upper-case and return it in allocated memory.
352 * Handles multi-byte characters as well as possible.
353 * Returns NULL when out of memory.
354 */
355 static char_u *
356strup_save(char_u *orig)
357{
358 char_u *p;
359 char_u *res;
360
361 res = p = vim_strsave(orig);
362
363 if (res != NULL)
364 while (*p != NUL)
365 {
366 int l;
367
368 if (enc_utf8)
369 {
370 int c, uc;
371 int newl;
372 char_u *s;
373
374 c = utf_ptr2char(p);
375 l = utf_ptr2len(p);
376 if (c == 0)
377 {
378 // overlong sequence, use only the first byte
379 c = *p;
380 l = 1;
381 }
382 uc = utf_toupper(c);
383
384 // Reallocate string when byte count changes. This is rare,
385 // thus it's OK to do another malloc()/free().
386 newl = utf_char2len(uc);
387 if (newl != l)
388 {
389 s = alloc(STRLEN(res) + 1 + newl - l);
390 if (s == NULL)
391 {
392 vim_free(res);
393 return NULL;
394 }
395 mch_memmove(s, res, p - res);
396 STRCPY(s + (p - res) + newl, p + l);
397 p = s + (p - res);
398 vim_free(res);
399 res = s;
400 }
401
402 utf_char2bytes(uc, p);
403 p += newl;
404 }
405 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
406 p += l; // skip multi-byte character
407 else
408 {
409 *p = TOUPPER_LOC(*p); // note that toupper() can be a macro
410 p++;
411 }
412 }
413
414 return res;
415}
416
417/*
418 * Make string "s" all lower-case and return it in allocated memory.
419 * Handles multi-byte characters as well as possible.
420 * Returns NULL when out of memory.
421 */
422 char_u *
423strlow_save(char_u *orig)
424{
425 char_u *p;
426 char_u *res;
427
428 res = p = vim_strsave(orig);
429
430 if (res != NULL)
431 while (*p != NUL)
432 {
433 int l;
434
435 if (enc_utf8)
436 {
437 int c, lc;
438 int newl;
439 char_u *s;
440
441 c = utf_ptr2char(p);
442 l = utf_ptr2len(p);
443 if (c == 0)
444 {
445 // overlong sequence, use only the first byte
446 c = *p;
447 l = 1;
448 }
449 lc = utf_tolower(c);
450
451 // Reallocate string when byte count changes. This is rare,
452 // thus it's OK to do another malloc()/free().
453 newl = utf_char2len(lc);
454 if (newl != l)
455 {
456 s = alloc(STRLEN(res) + 1 + newl - l);
457 if (s == NULL)
458 {
459 vim_free(res);
460 return NULL;
461 }
462 mch_memmove(s, res, p - res);
463 STRCPY(s + (p - res) + newl, p + l);
464 p = s + (p - res);
465 vim_free(res);
466 res = s;
467 }
468
469 utf_char2bytes(lc, p);
470 p += newl;
471 }
472 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
473 p += l; // skip multi-byte character
474 else
475 {
476 *p = TOLOWER_LOC(*p); // note that tolower() can be a macro
477 p++;
478 }
479 }
480
481 return res;
482}
483#endif
484
485/*
486 * delete spaces at the end of a string
487 */
488 void
489del_trailing_spaces(char_u *ptr)
490{
491 char_u *q;
492
493 q = ptr + STRLEN(ptr);
494 while (--q > ptr && VIM_ISWHITE(q[0]) && q[-1] != '\\' && q[-1] != Ctrl_V)
495 *q = NUL;
496}
497
498/*
499 * Like strncpy(), but always terminate the result with one NUL.
500 * "to" must be "len + 1" long!
501 */
502 void
503vim_strncpy(char_u *to, char_u *from, size_t len)
504{
505 STRNCPY(to, from, len);
506 to[len] = NUL;
507}
508
509/*
510 * Like strcat(), but make sure the result fits in "tosize" bytes and is
511 * always NUL terminated. "from" and "to" may overlap.
512 */
513 void
514vim_strcat(char_u *to, char_u *from, size_t tosize)
515{
516 size_t tolen = STRLEN(to);
517 size_t fromlen = STRLEN(from);
518
519 if (tolen + fromlen + 1 > tosize)
520 {
521 mch_memmove(to + tolen, from, tosize - tolen - 1);
522 to[tosize - 1] = NUL;
523 }
524 else
525 mch_memmove(to + tolen, from, fromlen + 1);
526}
527
Bram Moolenaarc32949b2023-01-04 15:56:51 +0000528/*
529 * A version of strlen() that has a maximum length.
530 */
531 size_t
532vim_strlen_maxlen(char *s, size_t maxlen)
533{
534 size_t i;
535 for (i = 0; i < maxlen; ++i)
536 if (s[i] == NUL)
537 break;
538 return i;
539}
540
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200541#if (!defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)) || defined(PROTO)
542/*
543 * Compare two strings, ignoring case, using current locale.
544 * Doesn't work for multi-byte characters.
545 * return 0 for match, < 0 for smaller, > 0 for bigger
546 */
547 int
548vim_stricmp(char *s1, char *s2)
549{
550 int i;
551
552 for (;;)
553 {
554 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
555 if (i != 0)
556 return i; // this character different
557 if (*s1 == NUL)
558 break; // strings match until NUL
559 ++s1;
560 ++s2;
561 }
562 return 0; // strings match
563}
564#endif
565
566#if (!defined(HAVE_STRNCASECMP) && !defined(HAVE_STRNICMP)) || defined(PROTO)
567/*
568 * Compare two strings, for length "len", ignoring case, using current locale.
569 * Doesn't work for multi-byte characters.
570 * return 0 for match, < 0 for smaller, > 0 for bigger
571 */
572 int
573vim_strnicmp(char *s1, char *s2, size_t len)
574{
575 int i;
576
577 while (len > 0)
578 {
579 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
580 if (i != 0)
581 return i; // this character different
582 if (*s1 == NUL)
583 break; // strings match until NUL
584 ++s1;
585 ++s2;
586 --len;
587 }
588 return 0; // strings match
589}
590#endif
591
592/*
593 * Search for first occurrence of "c" in "string".
594 * Version of strchr() that handles unsigned char strings with characters from
595 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
596 * end of the string.
597 */
Bram Moolenaarc32949b2023-01-04 15:56:51 +0000598 char_u *
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200599vim_strchr(char_u *string, int c)
600{
601 char_u *p;
602 int b;
603
604 p = string;
605 if (enc_utf8 && c >= 0x80)
606 {
607 while (*p != NUL)
608 {
609 int l = utfc_ptr2len(p);
610
611 // Avoid matching an illegal byte here.
612 if (utf_ptr2char(p) == c && l > 1)
613 return p;
614 p += l;
615 }
616 return NULL;
617 }
618 if (enc_dbcs != 0 && c > 255)
619 {
620 int n2 = c & 0xff;
621
622 c = ((unsigned)c >> 8) & 0xff;
623 while ((b = *p) != NUL)
624 {
625 if (b == c && p[1] == n2)
626 return p;
627 p += (*mb_ptr2len)(p);
628 }
629 return NULL;
630 }
631 if (has_mbyte)
632 {
633 while ((b = *p) != NUL)
634 {
635 if (b == c)
636 return p;
637 p += (*mb_ptr2len)(p);
638 }
639 return NULL;
640 }
641 while ((b = *p) != NUL)
642 {
643 if (b == c)
644 return p;
645 ++p;
646 }
647 return NULL;
648}
649
650/*
651 * Version of strchr() that only works for bytes and handles unsigned char
652 * strings with characters above 128 correctly. It also doesn't return a
653 * pointer to the NUL at the end of the string.
654 */
655 char_u *
656vim_strbyte(char_u *string, int c)
657{
658 char_u *p = string;
659
660 while (*p != NUL)
661 {
662 if (*p == c)
663 return p;
664 ++p;
665 }
666 return NULL;
667}
668
669/*
670 * Search for last occurrence of "c" in "string".
671 * Version of strrchr() that handles unsigned char strings with characters from
672 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
673 * end of the string.
674 * Return NULL if not found.
675 * Does not handle multi-byte char for "c"!
676 */
677 char_u *
678vim_strrchr(char_u *string, int c)
679{
680 char_u *retval = NULL;
681 char_u *p = string;
682
683 while (*p)
684 {
685 if (*p == c)
686 retval = p;
687 MB_PTR_ADV(p);
688 }
689 return retval;
690}
691
692/*
693 * Vim's version of strpbrk(), in case it's missing.
694 * Don't generate a prototype for this, causes problems when it's not used.
695 */
696#ifndef PROTO
697# ifndef HAVE_STRPBRK
698# ifdef vim_strpbrk
699# undef vim_strpbrk
700# endif
701 char_u *
702vim_strpbrk(char_u *s, char_u *charset)
703{
704 while (*s)
705 {
706 if (vim_strchr(charset, *s) != NULL)
707 return s;
708 MB_PTR_ADV(s);
709 }
710 return NULL;
711}
712# endif
713#endif
714
715/*
716 * Sort an array of strings.
717 */
718static int sort_compare(const void *s1, const void *s2);
719
720 static int
721sort_compare(const void *s1, const void *s2)
722{
723 return STRCMP(*(char **)s1, *(char **)s2);
724}
725
726 void
727sort_strings(
728 char_u **files,
729 int count)
730{
731 qsort((void *)files, (size_t)count, sizeof(char_u *), sort_compare);
732}
733
734#if defined(FEAT_QUICKFIX) || defined(FEAT_SPELL) || defined(PROTO)
735/*
736 * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
737 * When "s" is NULL FALSE is returned.
738 */
739 int
740has_non_ascii(char_u *s)
741{
742 char_u *p;
743
744 if (s != NULL)
745 for (p = s; *p != NUL; ++p)
746 if (*p >= 128)
747 return TRUE;
748 return FALSE;
749}
750#endif
751
752/*
753 * Concatenate two strings and return the result in allocated memory.
754 * Returns NULL when out of memory.
755 */
756 char_u *
757concat_str(char_u *str1, char_u *str2)
758{
759 char_u *dest;
760 size_t l = str1 == NULL ? 0 : STRLEN(str1);
761
762 dest = alloc(l + (str2 == NULL ? 0 : STRLEN(str2)) + 1L);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000763 if (dest == NULL)
764 return NULL;
765 if (str1 == NULL)
766 *dest = NUL;
767 else
768 STRCPY(dest, str1);
769 if (str2 != NULL)
770 STRCPY(dest + l, str2);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200771 return dest;
772}
773
zeertzjq4dd266c2023-08-19 11:35:03 +0200774#if defined(FEAT_EVAL) || defined(FEAT_RIGHTLEFT) || defined(PROTO)
775/*
776 * Reverse text into allocated memory.
777 * Returns the allocated string, NULL when out of memory.
778 */
779 char_u *
780reverse_text(char_u *s)
781{
782 size_t len = STRLEN(s);
783 char_u *rev = alloc(len + 1);
784 if (rev == NULL)
785 return NULL;
786
787 for (size_t s_i = 0, rev_i = len; s_i < len; ++s_i)
788 {
789 if (has_mbyte)
790 {
791 int mb_len = (*mb_ptr2len)(s + s_i);
792 rev_i -= mb_len;
793 mch_memmove(rev + rev_i, s + s_i, mb_len);
794 s_i += mb_len - 1;
795 }
796 else
797 rev[--rev_i] = s[s_i];
798 }
799 rev[len] = NUL;
800 return rev;
801}
802#endif
803
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200804#if defined(FEAT_EVAL) || defined(PROTO)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200805/*
806 * Return string "str" in ' quotes, doubling ' characters.
807 * If "str" is NULL an empty string is assumed.
808 * If "function" is TRUE make it function('string').
809 */
810 char_u *
811string_quote(char_u *str, int function)
812{
813 unsigned len;
814 char_u *p, *r, *s;
815
816 len = (function ? 13 : 3);
817 if (str != NULL)
818 {
819 len += (unsigned)STRLEN(str);
820 for (p = str; *p != NUL; MB_PTR_ADV(p))
821 if (*p == '\'')
822 ++len;
823 }
824 s = r = alloc(len);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000825 if (r == NULL)
826 return NULL;
827
828 if (function)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200829 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000830 STRCPY(r, "function('");
831 r += 10;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200832 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000833 else
834 *r++ = '\'';
835 if (str != NULL)
836 for (p = str; *p != NUL; )
837 {
838 if (*p == '\'')
839 *r++ = '\'';
840 MB_COPY_CHAR(p, r);
841 }
842 *r++ = '\'';
843 if (function)
844 *r++ = ')';
845 *r++ = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200846 return s;
847}
848
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000849/*
850 * Count the number of times "needle" occurs in string "haystack". Case is
851 * ignored if "ic" is TRUE.
852 */
853 long
854string_count(char_u *haystack, char_u *needle, int ic)
855{
856 long n = 0;
857 char_u *p = haystack;
858 char_u *next;
859
860 if (p == NULL || needle == NULL || *needle == NUL)
861 return 0;
862
863 if (ic)
864 {
865 size_t len = STRLEN(needle);
866
867 while (*p != NUL)
868 {
869 if (MB_STRNICMP(p, needle, len) == 0)
870 {
871 ++n;
872 p += len;
873 }
874 else
875 MB_PTR_ADV(p);
876 }
877 }
878 else
879 while ((next = (char_u *)strstr((char *)p, (char *)needle)) != NULL)
880 {
881 ++n;
882 p = next + STRLEN(needle);
883 }
884
885 return n;
886}
887
888/*
889 * Make a typval_T of the first character of "input" and store it in "output".
890 * Return OK or FAIL.
891 */
892 static int
893copy_first_char_to_tv(char_u *input, typval_T *output)
894{
895 char_u buf[MB_MAXBYTES + 1];
896 int len;
897
898 if (input == NULL || output == NULL)
899 return FAIL;
900
901 len = has_mbyte ? mb_ptr2len(input) : 1;
902 STRNCPY(buf, input, len);
903 buf[len] = NUL;
904 output->v_type = VAR_STRING;
905 output->vval.v_string = vim_strsave(buf);
906
907 return output->vval.v_string == NULL ? FAIL : OK;
908}
909
910/*
911 * Implementation of map() and filter() for a String. Apply "expr" to every
912 * character in string "str" and return the result in "rettv".
913 */
914 void
915string_filter_map(
916 char_u *str,
917 filtermap_T filtermap,
918 typval_T *expr,
919 typval_T *rettv)
920{
921 char_u *p;
922 typval_T tv;
923 garray_T ga;
924 int len = 0;
925 int idx = 0;
926 int rem;
Bram Moolenaar82418262022-09-28 16:16:15 +0100927 typval_T newtv;
928 funccall_T *fc;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000929
930 rettv->v_type = VAR_STRING;
931 rettv->vval.v_string = NULL;
932
933 // set_vim_var_nr() doesn't set the type
934 set_vim_var_type(VV_KEY, VAR_NUMBER);
935
zeertzjqe7d49462023-04-16 20:53:55 +0100936 // Create one funccall_T for all eval_expr_typval() calls.
Bram Moolenaar82418262022-09-28 16:16:15 +0100937 fc = eval_expr_get_funccal(expr, &newtv);
938
Bram Moolenaar04935fb2022-01-08 16:19:22 +0000939 ga_init2(&ga, sizeof(char), 80);
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000940 for (p = str; *p != NUL; p += len)
941 {
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000942 if (copy_first_char_to_tv(p, &tv) == FAIL)
943 break;
944 len = (int)STRLEN(tv.vval.v_string);
945
946 set_vim_var_nr(VV_KEY, idx);
Bram Moolenaar82418262022-09-28 16:16:15 +0100947 if (filter_map_one(&tv, expr, filtermap, fc, &newtv, &rem) == FAIL
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000948 || did_emsg)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000949 {
950 clear_tv(&newtv);
951 clear_tv(&tv);
952 break;
953 }
Ernie Raele79e2072024-01-13 11:47:33 +0100954 if (filtermap == FILTERMAP_MAP || filtermap == FILTERMAP_MAPNEW)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000955 {
956 if (newtv.v_type != VAR_STRING)
957 {
958 clear_tv(&newtv);
959 clear_tv(&tv);
Bram Moolenaare70cec92022-01-01 14:25:55 +0000960 emsg(_(e_string_required));
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000961 break;
962 }
963 else
964 ga_concat(&ga, newtv.vval.v_string);
965 }
Ernie Raele79e2072024-01-13 11:47:33 +0100966 else if (filtermap == FILTERMAP_FOREACH || !rem)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000967 ga_concat(&ga, tv.vval.v_string);
968
969 clear_tv(&newtv);
970 clear_tv(&tv);
971
972 ++idx;
973 }
974 ga_append(&ga, NUL);
975 rettv->vval.v_string = ga.ga_data;
Bram Moolenaar82418262022-09-28 16:16:15 +0100976 if (fc != NULL)
977 remove_funccal();
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000978}
979
980/*
Bram Moolenaarf1c60d42022-09-22 17:07:00 +0100981 * Implementation of reduce() for String "argvars[0]" using the function "expr"
982 * starting with the optional initial value "argvars[2]" and return the result
983 * in "rettv".
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000984 */
985 void
986string_reduce(
987 typval_T *argvars,
Bram Moolenaarf1c60d42022-09-22 17:07:00 +0100988 typval_T *expr,
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000989 typval_T *rettv)
990{
991 char_u *p = tv_get_string(&argvars[0]);
992 int len;
993 typval_T argv[3];
994 int r;
995 int called_emsg_start = called_emsg;
Bram Moolenaar82418262022-09-28 16:16:15 +0100996 funccall_T *fc;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000997
998 if (argvars[2].v_type == VAR_UNKNOWN)
999 {
1000 if (*p == NUL)
1001 {
Bram Moolenaare70cec92022-01-01 14:25:55 +00001002 semsg(_(e_reduce_of_an_empty_str_with_no_initial_value), "String");
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001003 return;
1004 }
1005 if (copy_first_char_to_tv(p, rettv) == FAIL)
1006 return;
1007 p += STRLEN(rettv->vval.v_string);
1008 }
Yegappan Lakshmanan8deb2b32022-09-02 15:15:27 +01001009 else if (check_for_string_arg(argvars, 2) == FAIL)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001010 return;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001011 else
1012 copy_tv(&argvars[2], rettv);
1013
zeertzjqe7d49462023-04-16 20:53:55 +01001014 // Create one funccall_T for all eval_expr_typval() calls.
Bram Moolenaar82418262022-09-28 16:16:15 +01001015 fc = eval_expr_get_funccal(expr, rettv);
1016
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001017 for ( ; *p != NUL; p += len)
1018 {
1019 argv[0] = *rettv;
1020 if (copy_first_char_to_tv(p, &argv[1]) == FAIL)
1021 break;
1022 len = (int)STRLEN(argv[1].vval.v_string);
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001023
zeertzjqad0c4422023-08-17 22:15:47 +02001024 r = eval_expr_typval(expr, TRUE, argv, 2, fc, rettv);
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001025
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001026 clear_tv(&argv[0]);
1027 clear_tv(&argv[1]);
1028 if (r == FAIL || called_emsg != called_emsg_start)
1029 return;
1030 }
Bram Moolenaar82418262022-09-28 16:16:15 +01001031
1032 if (fc != NULL)
1033 remove_funccal();
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001034}
1035
Bram Moolenaare4098452023-05-07 18:53:49 +01001036/*
1037 * Implementation of "byteidx()" and "byteidxcomp()" functions
1038 */
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001039 static void
Bram Moolenaare4098452023-05-07 18:53:49 +01001040byteidx_common(typval_T *argvars, typval_T *rettv, int comp UNUSED)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001041{
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001042 rettv->vval.v_number = -1;
1043
1044 if (in_vim9script()
1045 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001046 || check_for_number_arg(argvars, 1) == FAIL
1047 || check_for_opt_bool_arg(argvars, 2) == FAIL))
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001048 return;
1049
Christian Brabandt67672ef2023-04-24 21:09:54 +01001050 char_u *str = tv_get_string_chk(&argvars[0]);
1051 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001052 if (str == NULL || idx < 0)
1053 return;
1054
Christian Brabandt67672ef2023-04-24 21:09:54 +01001055 varnumber_T utf16idx = FALSE;
1056 if (argvars[2].v_type != VAR_UNKNOWN)
1057 {
zeertzjq8cf51372023-05-08 15:31:38 +01001058 int error = FALSE;
1059 utf16idx = tv_get_bool_chk(&argvars[2], &error);
1060 if (error)
1061 return;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001062 if (utf16idx < 0 || utf16idx > 1)
1063 {
zeertzjq8cf51372023-05-08 15:31:38 +01001064 semsg(_(e_using_number_as_bool_nr), utf16idx);
Christian Brabandt67672ef2023-04-24 21:09:54 +01001065 return;
1066 }
1067 }
1068
1069 int (*ptr2len)(char_u *);
1070 if (enc_utf8 && comp)
1071 ptr2len = utf_ptr2len;
1072 else
1073 ptr2len = mb_ptr2len;
1074
1075 char_u *t = str;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001076 for ( ; idx > 0; idx--)
1077 {
1078 if (*t == NUL) // EOL reached
1079 return;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001080 if (utf16idx)
1081 {
1082 int clen = ptr2len(t);
1083 int c = (clen > 1) ? utf_ptr2char(t) : *t;
1084 if (c > 0xFFFF)
1085 idx--;
1086 }
1087 if (idx > 0)
1088 t += ptr2len(t);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001089 }
1090 rettv->vval.v_number = (varnumber_T)(t - str);
1091}
1092
1093/*
1094 * "byteidx()" function
1095 */
1096 void
1097f_byteidx(typval_T *argvars, typval_T *rettv)
1098{
Bram Moolenaare4098452023-05-07 18:53:49 +01001099 byteidx_common(argvars, rettv, FALSE);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001100}
1101
1102/*
1103 * "byteidxcomp()" function
1104 */
1105 void
1106f_byteidxcomp(typval_T *argvars, typval_T *rettv)
1107{
Bram Moolenaare4098452023-05-07 18:53:49 +01001108 byteidx_common(argvars, rettv, TRUE);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001109}
1110
1111/*
1112 * "charidx()" function
1113 */
1114 void
1115f_charidx(typval_T *argvars, typval_T *rettv)
1116{
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001117 rettv->vval.v_number = -1;
1118
Christian Brabandt67672ef2023-04-24 21:09:54 +01001119 if (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001120 || check_for_number_arg(argvars, 1) == FAIL
Christian Brabandt67672ef2023-04-24 21:09:54 +01001121 || check_for_opt_bool_arg(argvars, 2) == FAIL
1122 || (argvars[2].v_type != VAR_UNKNOWN
1123 && check_for_opt_bool_arg(argvars, 3) == FAIL))
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001124 return;
1125
Christian Brabandt67672ef2023-04-24 21:09:54 +01001126 char_u *str = tv_get_string_chk(&argvars[0]);
1127 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001128 if (str == NULL || idx < 0)
1129 return;
1130
Christian Brabandt67672ef2023-04-24 21:09:54 +01001131 varnumber_T countcc = FALSE;
1132 varnumber_T utf16idx = FALSE;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001133 if (argvars[2].v_type != VAR_UNKNOWN)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001134 {
Christian Brabandt67672ef2023-04-24 21:09:54 +01001135 countcc = tv_get_bool(&argvars[2]);
1136 if (argvars[3].v_type != VAR_UNKNOWN)
1137 utf16idx = tv_get_bool(&argvars[3]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001138 }
1139
Christian Brabandt67672ef2023-04-24 21:09:54 +01001140 int (*ptr2len)(char_u *);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001141 if (enc_utf8 && countcc)
1142 ptr2len = utf_ptr2len;
1143 else
1144 ptr2len = mb_ptr2len;
1145
Christian Brabandt67672ef2023-04-24 21:09:54 +01001146 char_u *p;
1147 int len;
1148 for (p = str, len = 0; utf16idx ? idx >= 0 : p <= str + idx; len++)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001149 {
1150 if (*p == NUL)
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001151 {
1152 // If the index is exactly the number of bytes or utf-16 code units
1153 // in the string then return the length of the string in
1154 // characters.
1155 if (utf16idx ? (idx == 0) : (p == (str + idx)))
1156 rettv->vval.v_number = len;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001157 return;
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001158 }
Christian Brabandt67672ef2023-04-24 21:09:54 +01001159 if (utf16idx)
1160 {
1161 idx--;
1162 int clen = ptr2len(p);
1163 int c = (clen > 1) ? utf_ptr2char(p) : *p;
1164 if (c > 0xFFFF)
1165 idx--;
1166 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001167 p += ptr2len(p);
1168 }
1169
1170 rettv->vval.v_number = len > 0 ? len - 1 : 0;
1171}
1172
1173/*
1174 * "str2list()" function
1175 */
1176 void
1177f_str2list(typval_T *argvars, typval_T *rettv)
1178{
1179 char_u *p;
1180 int utf8 = FALSE;
1181
1182 if (rettv_list_alloc(rettv) == FAIL)
1183 return;
1184
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001185 if (in_vim9script()
1186 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001187 || check_for_opt_bool_arg(argvars, 1) == FAIL))
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001188 return;
1189
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001190 if (argvars[1].v_type != VAR_UNKNOWN)
1191 utf8 = (int)tv_get_bool_chk(&argvars[1], NULL);
1192
1193 p = tv_get_string(&argvars[0]);
1194
1195 if (has_mbyte || utf8)
1196 {
1197 int (*ptr2len)(char_u *);
1198 int (*ptr2char)(char_u *);
1199
1200 if (utf8 || enc_utf8)
1201 {
1202 ptr2len = utf_ptr2len;
1203 ptr2char = utf_ptr2char;
1204 }
1205 else
1206 {
1207 ptr2len = mb_ptr2len;
1208 ptr2char = mb_ptr2char;
1209 }
1210
1211 for ( ; *p != NUL; p += (*ptr2len)(p))
1212 list_append_number(rettv->vval.v_list, (*ptr2char)(p));
1213 }
1214 else
1215 for ( ; *p != NUL; ++p)
1216 list_append_number(rettv->vval.v_list, *p);
1217}
1218
1219/*
1220 * "str2nr()" function
1221 */
1222 void
1223f_str2nr(typval_T *argvars, typval_T *rettv)
1224{
1225 int base = 10;
1226 char_u *p;
1227 varnumber_T n;
1228 int what = 0;
1229 int isneg;
1230
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001231 if (in_vim9script()
1232 && (check_for_string_arg(argvars, 0) == FAIL
1233 || check_for_opt_number_arg(argvars, 1) == FAIL
1234 || (argvars[1].v_type != VAR_UNKNOWN
1235 && check_for_opt_bool_arg(argvars, 2) == FAIL)))
1236 return;
1237
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001238 if (argvars[1].v_type != VAR_UNKNOWN)
1239 {
1240 base = (int)tv_get_number(&argvars[1]);
1241 if (base != 2 && base != 8 && base != 10 && base != 16)
1242 {
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00001243 emsg(_(e_invalid_argument));
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001244 return;
1245 }
1246 if (argvars[2].v_type != VAR_UNKNOWN && tv_get_bool(&argvars[2]))
1247 what |= STR2NR_QUOTE;
1248 }
1249
1250 p = skipwhite(tv_get_string_strict(&argvars[0]));
1251 isneg = (*p == '-');
1252 if (*p == '+' || *p == '-')
1253 p = skipwhite(p + 1);
1254 switch (base)
1255 {
1256 case 2: what |= STR2NR_BIN + STR2NR_FORCE; break;
1257 case 8: what |= STR2NR_OCT + STR2NR_OOCT + STR2NR_FORCE; break;
1258 case 16: what |= STR2NR_HEX + STR2NR_FORCE; break;
1259 }
Bram Moolenaar5fb78c32023-03-04 20:47:39 +00001260 vim_str2nr(p, NULL, NULL, what, &n, NULL, 0, FALSE, NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001261 // Text after the number is silently ignored.
1262 if (isneg)
1263 rettv->vval.v_number = -n;
1264 else
1265 rettv->vval.v_number = n;
1266
1267}
1268
1269/*
1270 * "strgetchar()" function
1271 */
1272 void
1273f_strgetchar(typval_T *argvars, typval_T *rettv)
1274{
1275 char_u *str;
1276 int len;
1277 int error = FALSE;
1278 int charidx;
1279 int byteidx = 0;
1280
1281 rettv->vval.v_number = -1;
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001282
1283 if (in_vim9script()
1284 && (check_for_string_arg(argvars, 0) == FAIL
1285 || check_for_number_arg(argvars, 1) == FAIL))
1286 return;
1287
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001288 str = tv_get_string_chk(&argvars[0]);
1289 if (str == NULL)
1290 return;
1291 len = (int)STRLEN(str);
1292 charidx = (int)tv_get_number_chk(&argvars[1], &error);
1293 if (error)
1294 return;
1295
1296 while (charidx >= 0 && byteidx < len)
1297 {
1298 if (charidx == 0)
1299 {
1300 rettv->vval.v_number = mb_ptr2char(str + byteidx);
1301 break;
1302 }
1303 --charidx;
1304 byteidx += MB_CPTR2LEN(str + byteidx);
1305 }
1306}
1307
1308/*
1309 * "stridx()" function
1310 */
1311 void
1312f_stridx(typval_T *argvars, typval_T *rettv)
1313{
1314 char_u buf[NUMBUFLEN];
1315 char_u *needle;
1316 char_u *haystack;
1317 char_u *save_haystack;
1318 char_u *pos;
1319 int start_idx;
1320
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001321 if (in_vim9script()
1322 && (check_for_string_arg(argvars, 0) == FAIL
1323 || check_for_string_arg(argvars, 1) == FAIL
1324 || check_for_opt_number_arg(argvars, 2) == FAIL))
1325 return;
1326
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001327 needle = tv_get_string_chk(&argvars[1]);
1328 save_haystack = haystack = tv_get_string_buf_chk(&argvars[0], buf);
1329 rettv->vval.v_number = -1;
1330 if (needle == NULL || haystack == NULL)
1331 return; // type error; errmsg already given
1332
1333 if (argvars[2].v_type != VAR_UNKNOWN)
1334 {
1335 int error = FALSE;
1336
1337 start_idx = (int)tv_get_number_chk(&argvars[2], &error);
1338 if (error || start_idx >= (int)STRLEN(haystack))
1339 return;
1340 if (start_idx >= 0)
1341 haystack += start_idx;
1342 }
1343
1344 pos = (char_u *)strstr((char *)haystack, (char *)needle);
1345 if (pos != NULL)
1346 rettv->vval.v_number = (varnumber_T)(pos - save_haystack);
1347}
1348
1349/*
1350 * "string()" function
1351 */
1352 void
1353f_string(typval_T *argvars, typval_T *rettv)
1354{
1355 char_u *tofree;
1356 char_u numbuf[NUMBUFLEN];
1357
1358 rettv->v_type = VAR_STRING;
1359 rettv->vval.v_string = tv2string(&argvars[0], &tofree, numbuf,
1360 get_copyID());
1361 // Make a copy if we have a value but it's not in allocated memory.
1362 if (rettv->vval.v_string != NULL && tofree == NULL)
1363 rettv->vval.v_string = vim_strsave(rettv->vval.v_string);
1364}
1365
1366/*
1367 * "strlen()" function
1368 */
1369 void
1370f_strlen(typval_T *argvars, typval_T *rettv)
1371{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001372 if (in_vim9script()
1373 && check_for_string_or_number_arg(argvars, 0) == FAIL)
1374 return;
1375
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001376 rettv->vval.v_number = (varnumber_T)(STRLEN(
1377 tv_get_string(&argvars[0])));
1378}
1379
1380 static void
1381strchar_common(typval_T *argvars, typval_T *rettv, int skipcc)
1382{
1383 char_u *s = tv_get_string(&argvars[0]);
1384 varnumber_T len = 0;
1385 int (*func_mb_ptr2char_adv)(char_u **pp);
1386
1387 func_mb_ptr2char_adv = skipcc ? mb_ptr2char_adv : mb_cptr2char_adv;
1388 while (*s != NUL)
1389 {
1390 func_mb_ptr2char_adv(&s);
1391 ++len;
1392 }
1393 rettv->vval.v_number = len;
1394}
1395
1396/*
1397 * "strcharlen()" function
1398 */
1399 void
1400f_strcharlen(typval_T *argvars, typval_T *rettv)
1401{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001402 if (in_vim9script()
1403 && check_for_string_or_number_arg(argvars, 0) == FAIL)
1404 return;
1405
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001406 strchar_common(argvars, rettv, TRUE);
1407}
1408
1409/*
1410 * "strchars()" function
1411 */
1412 void
1413f_strchars(typval_T *argvars, typval_T *rettv)
1414{
1415 varnumber_T skipcc = FALSE;
1416
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001417 if (in_vim9script()
1418 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001419 || check_for_opt_bool_arg(argvars, 1) == FAIL))
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001420 return;
1421
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001422 if (argvars[1].v_type != VAR_UNKNOWN)
Bram Moolenaare4098452023-05-07 18:53:49 +01001423 {
zeertzjq8cf51372023-05-08 15:31:38 +01001424 int error = FALSE;
1425 skipcc = tv_get_bool_chk(&argvars[1], &error);
1426 if (error)
1427 return;
1428 if (skipcc < 0 || skipcc > 1)
1429 {
Bram Moolenaare4098452023-05-07 18:53:49 +01001430 semsg(_(e_using_number_as_bool_nr), skipcc);
zeertzjq8cf51372023-05-08 15:31:38 +01001431 return;
1432 }
Bram Moolenaare4098452023-05-07 18:53:49 +01001433 }
zeertzjq8cf51372023-05-08 15:31:38 +01001434
1435 strchar_common(argvars, rettv, skipcc);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001436}
1437
1438/*
Christian Brabandt67672ef2023-04-24 21:09:54 +01001439 * "strutf16len()" function
1440 */
1441 void
1442f_strutf16len(typval_T *argvars, typval_T *rettv)
1443{
1444 rettv->vval.v_number = -1;
1445
1446 if (check_for_string_arg(argvars, 0) == FAIL
1447 || check_for_opt_bool_arg(argvars, 1) == FAIL)
1448 return;
1449
1450 varnumber_T countcc = FALSE;
1451 if (argvars[1].v_type != VAR_UNKNOWN)
1452 countcc = tv_get_bool(&argvars[1]);
1453
1454 char_u *s = tv_get_string(&argvars[0]);
1455 varnumber_T len = 0;
1456 int (*func_mb_ptr2char_adv)(char_u **pp);
1457 int ch;
1458
1459 func_mb_ptr2char_adv = countcc ? mb_cptr2char_adv : mb_ptr2char_adv;
1460 while (*s != NUL)
1461 {
1462 ch = func_mb_ptr2char_adv(&s);
1463 if (ch > 0xFFFF)
1464 ++len;
1465 ++len;
1466 }
1467 rettv->vval.v_number = len;
1468}
1469
1470/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001471 * "strdisplaywidth()" function
1472 */
1473 void
1474f_strdisplaywidth(typval_T *argvars, typval_T *rettv)
1475{
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001476 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001477 int col = 0;
1478
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001479 rettv->vval.v_number = -1;
1480
1481 if (in_vim9script()
1482 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001483 || check_for_opt_number_arg(argvars, 1) == FAIL))
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001484 return;
1485
1486 s = tv_get_string(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001487 if (argvars[1].v_type != VAR_UNKNOWN)
1488 col = (int)tv_get_number(&argvars[1]);
1489
1490 rettv->vval.v_number = (varnumber_T)(linetabsize_col(col, s) - col);
1491}
1492
1493/*
1494 * "strwidth()" function
1495 */
1496 void
1497f_strwidth(typval_T *argvars, typval_T *rettv)
1498{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001499 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001500
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001501 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1502 return;
1503
1504 s = tv_get_string_strict(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001505 rettv->vval.v_number = (varnumber_T)(mb_string2cells(s, -1));
1506}
1507
1508/*
1509 * "strcharpart()" function
1510 */
1511 void
1512f_strcharpart(typval_T *argvars, typval_T *rettv)
1513{
1514 char_u *p;
1515 int nchar;
1516 int nbyte = 0;
1517 int charlen;
1518 int skipcc = FALSE;
1519 int len = 0;
1520 int slen;
1521 int error = FALSE;
1522
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001523 if (in_vim9script()
1524 && (check_for_string_arg(argvars, 0) == FAIL
1525 || check_for_number_arg(argvars, 1) == FAIL
1526 || check_for_opt_number_arg(argvars, 2) == FAIL
1527 || (argvars[2].v_type != VAR_UNKNOWN
1528 && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1529 return;
1530
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001531 p = tv_get_string(&argvars[0]);
1532 slen = (int)STRLEN(p);
1533
1534 nchar = (int)tv_get_number_chk(&argvars[1], &error);
1535 if (!error)
1536 {
1537 if (argvars[2].v_type != VAR_UNKNOWN
1538 && argvars[3].v_type != VAR_UNKNOWN)
1539 {
zeertzjq8cf51372023-05-08 15:31:38 +01001540 skipcc = tv_get_bool_chk(&argvars[3], &error);
1541 if (error)
1542 return;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001543 if (skipcc < 0 || skipcc > 1)
1544 {
zeertzjq8cf51372023-05-08 15:31:38 +01001545 semsg(_(e_using_number_as_bool_nr), skipcc);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001546 return;
1547 }
1548 }
1549
1550 if (nchar > 0)
1551 while (nchar > 0 && nbyte < slen)
1552 {
1553 if (skipcc)
1554 nbyte += mb_ptr2len(p + nbyte);
1555 else
1556 nbyte += MB_CPTR2LEN(p + nbyte);
1557 --nchar;
1558 }
1559 else
1560 nbyte = nchar;
1561 if (argvars[2].v_type != VAR_UNKNOWN)
1562 {
1563 charlen = (int)tv_get_number(&argvars[2]);
1564 while (charlen > 0 && nbyte + len < slen)
1565 {
1566 int off = nbyte + len;
1567
1568 if (off < 0)
1569 len += 1;
1570 else
1571 {
1572 if (skipcc)
1573 len += mb_ptr2len(p + off);
1574 else
1575 len += MB_CPTR2LEN(p + off);
1576 }
1577 --charlen;
1578 }
1579 }
1580 else
1581 len = slen - nbyte; // default: all bytes that are available.
1582 }
1583
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02001584 // Only return the overlap between the specified part and the actual
1585 // string.
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001586 if (nbyte < 0)
1587 {
1588 len += nbyte;
1589 nbyte = 0;
1590 }
1591 else if (nbyte > slen)
1592 nbyte = slen;
1593 if (len < 0)
1594 len = 0;
1595 else if (nbyte + len > slen)
1596 len = slen - nbyte;
1597
1598 rettv->v_type = VAR_STRING;
1599 rettv->vval.v_string = vim_strnsave(p + nbyte, len);
1600}
1601
1602/*
1603 * "strpart()" function
1604 */
1605 void
1606f_strpart(typval_T *argvars, typval_T *rettv)
1607{
1608 char_u *p;
1609 int n;
1610 int len;
1611 int slen;
1612 int error = FALSE;
1613
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001614 if (in_vim9script()
1615 && (check_for_string_arg(argvars, 0) == FAIL
1616 || check_for_number_arg(argvars, 1) == FAIL
1617 || check_for_opt_number_arg(argvars, 2) == FAIL
1618 || (argvars[2].v_type != VAR_UNKNOWN
1619 && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1620 return;
1621
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001622 p = tv_get_string(&argvars[0]);
1623 slen = (int)STRLEN(p);
1624
1625 n = (int)tv_get_number_chk(&argvars[1], &error);
1626 if (error)
1627 len = 0;
1628 else if (argvars[2].v_type != VAR_UNKNOWN)
1629 len = (int)tv_get_number(&argvars[2]);
1630 else
1631 len = slen - n; // default len: all bytes that are available.
1632
1633 // Only return the overlap between the specified part and the actual
1634 // string.
1635 if (n < 0)
1636 {
1637 len += n;
1638 n = 0;
1639 }
1640 else if (n > slen)
1641 n = slen;
1642 if (len < 0)
1643 len = 0;
1644 else if (n + len > slen)
1645 len = slen - n;
1646
1647 if (argvars[2].v_type != VAR_UNKNOWN && argvars[3].v_type != VAR_UNKNOWN)
1648 {
1649 int off;
1650
1651 // length in characters
1652 for (off = n; off < slen && len > 0; --len)
1653 off += mb_ptr2len(p + off);
1654 len = off - n;
1655 }
1656
1657 rettv->v_type = VAR_STRING;
1658 rettv->vval.v_string = vim_strnsave(p + n, len);
1659}
1660
1661/*
1662 * "strridx()" function
1663 */
1664 void
1665f_strridx(typval_T *argvars, typval_T *rettv)
1666{
1667 char_u buf[NUMBUFLEN];
1668 char_u *needle;
1669 char_u *haystack;
1670 char_u *rest;
1671 char_u *lastmatch = NULL;
1672 int haystack_len, end_idx;
1673
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001674 if (in_vim9script()
1675 && (check_for_string_arg(argvars, 0) == FAIL
1676 || check_for_string_arg(argvars, 1) == FAIL
1677 || check_for_opt_number_arg(argvars, 2) == FAIL))
1678 return;
1679
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001680 needle = tv_get_string_chk(&argvars[1]);
1681 haystack = tv_get_string_buf_chk(&argvars[0], buf);
1682
1683 rettv->vval.v_number = -1;
1684 if (needle == NULL || haystack == NULL)
1685 return; // type error; errmsg already given
1686
1687 haystack_len = (int)STRLEN(haystack);
1688 if (argvars[2].v_type != VAR_UNKNOWN)
1689 {
1690 // Third argument: upper limit for index
1691 end_idx = (int)tv_get_number_chk(&argvars[2], NULL);
1692 if (end_idx < 0)
1693 return; // can never find a match
1694 }
1695 else
1696 end_idx = haystack_len;
1697
1698 if (*needle == NUL)
1699 {
1700 // Empty string matches past the end.
1701 lastmatch = haystack + end_idx;
1702 }
1703 else
1704 {
1705 for (rest = haystack; *rest != '\0'; ++rest)
1706 {
1707 rest = (char_u *)strstr((char *)rest, (char *)needle);
1708 if (rest == NULL || rest > haystack + end_idx)
1709 break;
1710 lastmatch = rest;
1711 }
1712 }
1713
1714 if (lastmatch == NULL)
1715 rettv->vval.v_number = -1;
1716 else
1717 rettv->vval.v_number = (varnumber_T)(lastmatch - haystack);
1718}
1719
1720/*
1721 * "strtrans()" function
1722 */
1723 void
1724f_strtrans(typval_T *argvars, typval_T *rettv)
1725{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001726 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1727 return;
1728
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001729 rettv->v_type = VAR_STRING;
1730 rettv->vval.v_string = transstr(tv_get_string(&argvars[0]));
1731}
1732
Christian Brabandt67672ef2023-04-24 21:09:54 +01001733
1734/*
Christian Brabandt67672ef2023-04-24 21:09:54 +01001735 * "utf16idx()" function
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01001736 *
1737 * Converts a byte or character offset in a string to the corresponding UTF-16
1738 * code unit offset.
Christian Brabandt67672ef2023-04-24 21:09:54 +01001739 */
1740 void
1741f_utf16idx(typval_T *argvars, typval_T *rettv)
1742{
1743 rettv->vval.v_number = -1;
1744
1745 if (check_for_string_arg(argvars, 0) == FAIL
1746 || check_for_opt_number_arg(argvars, 1) == FAIL
1747 || check_for_opt_bool_arg(argvars, 2) == FAIL
1748 || (argvars[2].v_type != VAR_UNKNOWN
1749 && check_for_opt_bool_arg(argvars, 3) == FAIL))
1750 return;
1751
1752 char_u *str = tv_get_string_chk(&argvars[0]);
1753 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
1754 if (str == NULL || idx < 0)
1755 return;
1756
1757 varnumber_T countcc = FALSE;
1758 varnumber_T charidx = FALSE;
1759 if (argvars[2].v_type != VAR_UNKNOWN)
1760 {
1761 countcc = tv_get_bool(&argvars[2]);
1762 if (argvars[3].v_type != VAR_UNKNOWN)
1763 charidx = tv_get_bool(&argvars[3]);
1764 }
1765
1766 int (*ptr2len)(char_u *);
1767 if (enc_utf8 && countcc)
1768 ptr2len = utf_ptr2len;
1769 else
1770 ptr2len = mb_ptr2len;
1771
1772 char_u *p;
1773 int len;
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01001774 int utf16idx = 0;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001775 for (p = str, len = 0; charidx ? idx >= 0 : p <= str + idx; len++)
1776 {
1777 if (*p == NUL)
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001778 {
1779 // If the index is exactly the number of bytes or characters in the
1780 // string then return the length of the string in utf-16 code
1781 // units.
1782 if (charidx ? (idx == 0) : (p == (str + idx)))
1783 rettv->vval.v_number = len;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001784 return;
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001785 }
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01001786 utf16idx = len;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001787 int clen = ptr2len(p);
1788 int c = (clen > 1) ? utf_ptr2char(p) : *p;
1789 if (c > 0xFFFF)
1790 len++;
1791 p += ptr2len(p);
1792 if (charidx)
1793 idx--;
1794 }
1795
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01001796 rettv->vval.v_number = utf16idx;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001797}
1798
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001799/*
1800 * "tolower(string)" function
1801 */
1802 void
1803f_tolower(typval_T *argvars, typval_T *rettv)
1804{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001805 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1806 return;
1807
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001808 rettv->v_type = VAR_STRING;
1809 rettv->vval.v_string = strlow_save(tv_get_string(&argvars[0]));
1810}
1811
1812/*
1813 * "toupper(string)" function
1814 */
1815 void
1816f_toupper(typval_T *argvars, typval_T *rettv)
1817{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001818 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1819 return;
1820
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001821 rettv->v_type = VAR_STRING;
1822 rettv->vval.v_string = strup_save(tv_get_string(&argvars[0]));
1823}
1824
1825/*
1826 * "tr(string, fromstr, tostr)" function
1827 */
1828 void
1829f_tr(typval_T *argvars, typval_T *rettv)
1830{
1831 char_u *in_str;
1832 char_u *fromstr;
1833 char_u *tostr;
1834 char_u *p;
1835 int inlen;
1836 int fromlen;
1837 int tolen;
1838 int idx;
1839 char_u *cpstr;
1840 int cplen;
1841 int first = TRUE;
1842 char_u buf[NUMBUFLEN];
1843 char_u buf2[NUMBUFLEN];
1844 garray_T ga;
1845
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001846 if (in_vim9script()
1847 && (check_for_string_arg(argvars, 0) == FAIL
1848 || check_for_string_arg(argvars, 1) == FAIL
1849 || check_for_string_arg(argvars, 2) == FAIL))
1850 return;
1851
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001852 in_str = tv_get_string(&argvars[0]);
1853 fromstr = tv_get_string_buf_chk(&argvars[1], buf);
1854 tostr = tv_get_string_buf_chk(&argvars[2], buf2);
1855
1856 // Default return value: empty string.
1857 rettv->v_type = VAR_STRING;
1858 rettv->vval.v_string = NULL;
1859 if (fromstr == NULL || tostr == NULL)
1860 return; // type error; errmsg already given
Bram Moolenaar04935fb2022-01-08 16:19:22 +00001861 ga_init2(&ga, sizeof(char), 80);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001862
1863 if (!has_mbyte)
1864 // not multi-byte: fromstr and tostr must be the same length
1865 if (STRLEN(fromstr) != STRLEN(tostr))
1866 {
1867error:
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00001868 semsg(_(e_invalid_argument_str), fromstr);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001869 ga_clear(&ga);
1870 return;
1871 }
1872
1873 // fromstr and tostr have to contain the same number of chars
1874 while (*in_str != NUL)
1875 {
1876 if (has_mbyte)
1877 {
1878 inlen = (*mb_ptr2len)(in_str);
1879 cpstr = in_str;
1880 cplen = inlen;
1881 idx = 0;
1882 for (p = fromstr; *p != NUL; p += fromlen)
1883 {
1884 fromlen = (*mb_ptr2len)(p);
1885 if (fromlen == inlen && STRNCMP(in_str, p, inlen) == 0)
1886 {
1887 for (p = tostr; *p != NUL; p += tolen)
1888 {
1889 tolen = (*mb_ptr2len)(p);
1890 if (idx-- == 0)
1891 {
1892 cplen = tolen;
1893 cpstr = p;
1894 break;
1895 }
1896 }
1897 if (*p == NUL) // tostr is shorter than fromstr
1898 goto error;
1899 break;
1900 }
1901 ++idx;
1902 }
1903
1904 if (first && cpstr == in_str)
1905 {
1906 // Check that fromstr and tostr have the same number of
1907 // (multi-byte) characters. Done only once when a character
1908 // of in_str doesn't appear in fromstr.
1909 first = FALSE;
1910 for (p = tostr; *p != NUL; p += tolen)
1911 {
1912 tolen = (*mb_ptr2len)(p);
1913 --idx;
1914 }
1915 if (idx != 0)
1916 goto error;
1917 }
1918
1919 (void)ga_grow(&ga, cplen);
1920 mch_memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen);
1921 ga.ga_len += cplen;
1922
1923 in_str += inlen;
1924 }
1925 else
1926 {
1927 // When not using multi-byte chars we can do it faster.
1928 p = vim_strchr(fromstr, *in_str);
1929 if (p != NULL)
1930 ga_append(&ga, tostr[p - fromstr]);
1931 else
1932 ga_append(&ga, *in_str);
1933 ++in_str;
1934 }
1935 }
1936
1937 // add a terminating NUL
1938 (void)ga_grow(&ga, 1);
1939 ga_append(&ga, NUL);
1940
1941 rettv->vval.v_string = ga.ga_data;
1942}
1943
1944/*
1945 * "trim({expr})" function
1946 */
1947 void
1948f_trim(typval_T *argvars, typval_T *rettv)
1949{
1950 char_u buf1[NUMBUFLEN];
1951 char_u buf2[NUMBUFLEN];
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001952 char_u *head;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001953 char_u *mask = NULL;
1954 char_u *tail;
1955 char_u *prev;
1956 char_u *p;
1957 int c1;
1958 int dir = 0;
1959
1960 rettv->v_type = VAR_STRING;
1961 rettv->vval.v_string = NULL;
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001962
1963 if (in_vim9script()
1964 && (check_for_string_arg(argvars, 0) == FAIL
Illia Bobyr80799172023-10-17 18:00:50 +02001965 || check_for_opt_string_arg(argvars, 1) == FAIL
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001966 || (argvars[1].v_type != VAR_UNKNOWN
1967 && check_for_opt_number_arg(argvars, 2) == FAIL)))
1968 return;
1969
1970 head = tv_get_string_buf_chk(&argvars[0], buf1);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001971 if (head == NULL)
1972 return;
1973
Illia Bobyr80799172023-10-17 18:00:50 +02001974 if (check_for_opt_string_arg(argvars, 1) == FAIL)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001975 return;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001976
1977 if (argvars[1].v_type == VAR_STRING)
Illia Bobyr6e638672023-10-17 11:09:45 +02001978 {
Illia Bobyr80799172023-10-17 18:00:50 +02001979 mask = tv_get_string_buf_chk(&argvars[1], buf2);
1980 if (*mask == NUL)
1981 mask = NULL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001982
Illia Bobyr80799172023-10-17 18:00:50 +02001983 if (argvars[2].v_type != VAR_UNKNOWN)
Illia Bobyr6e638672023-10-17 11:09:45 +02001984 {
Illia Bobyr80799172023-10-17 18:00:50 +02001985 int error = 0;
1986
1987 // leading or trailing characters to trim
1988 dir = (int)tv_get_number_chk(&argvars[2], &error);
1989 if (error)
1990 return;
1991 if (dir < 0 || dir > 2)
1992 {
1993 semsg(_(e_invalid_argument_str), tv_get_string(&argvars[2]));
1994 return;
1995 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001996 }
1997 }
1998
1999 if (dir == 0 || dir == 1)
2000 {
2001 // Trim leading characters
2002 while (*head != NUL)
2003 {
2004 c1 = PTR2CHAR(head);
2005 if (mask == NULL)
2006 {
2007 if (c1 > ' ' && c1 != 0xa0)
2008 break;
2009 }
2010 else
2011 {
2012 for (p = mask; *p != NUL; MB_PTR_ADV(p))
2013 if (c1 == PTR2CHAR(p))
2014 break;
2015 if (*p == NUL)
2016 break;
2017 }
2018 MB_PTR_ADV(head);
2019 }
2020 }
2021
2022 tail = head + STRLEN(head);
2023 if (dir == 0 || dir == 2)
2024 {
2025 // Trim trailing characters
2026 for (; tail > head; tail = prev)
2027 {
2028 prev = tail;
2029 MB_PTR_BACK(head, prev);
2030 c1 = PTR2CHAR(prev);
2031 if (mask == NULL)
2032 {
2033 if (c1 > ' ' && c1 != 0xa0)
2034 break;
2035 }
2036 else
2037 {
2038 for (p = mask; *p != NUL; MB_PTR_ADV(p))
2039 if (c1 == PTR2CHAR(p))
2040 break;
2041 if (*p == NUL)
2042 break;
2043 }
2044 }
2045 }
2046 rettv->vval.v_string = vim_strnsave(head, tail - head);
2047}
2048
Bram Moolenaar677658a2022-01-05 16:09:06 +00002049static char *e_printf = N_(e_insufficient_arguments_for_printf);
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002050
2051/*
2052 * Get number argument from "idxp" entry in "tvs". First entry is 1.
2053 */
2054 static varnumber_T
2055tv_nr(typval_T *tvs, int *idxp)
2056{
2057 int idx = *idxp - 1;
2058 varnumber_T n = 0;
2059 int err = FALSE;
2060
2061 if (tvs[idx].v_type == VAR_UNKNOWN)
2062 emsg(_(e_printf));
2063 else
2064 {
2065 ++*idxp;
2066 n = tv_get_number_chk(&tvs[idx], &err);
2067 if (err)
2068 n = 0;
2069 }
2070 return n;
2071}
2072
2073/*
2074 * Get string argument from "idxp" entry in "tvs". First entry is 1.
2075 * If "tofree" is NULL tv_get_string_chk() is used. Some types (e.g. List)
2076 * are not converted to a string.
2077 * If "tofree" is not NULL echo_string() is used. All types are converted to
2078 * a string with the same format as ":echo". The caller must free "*tofree".
2079 * Returns NULL for an error.
2080 */
2081 static char *
2082tv_str(typval_T *tvs, int *idxp, char_u **tofree)
2083{
2084 int idx = *idxp - 1;
2085 char *s = NULL;
2086 static char_u numbuf[NUMBUFLEN];
2087
2088 if (tvs[idx].v_type == VAR_UNKNOWN)
2089 emsg(_(e_printf));
2090 else
2091 {
2092 ++*idxp;
2093 if (tofree != NULL)
2094 s = (char *)echo_string(&tvs[idx], tofree, numbuf, get_copyID());
2095 else
2096 s = (char *)tv_get_string_chk(&tvs[idx]);
2097 }
2098 return s;
2099}
2100
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002101/*
2102 * Get float argument from "idxp" entry in "tvs". First entry is 1.
2103 */
2104 static double
2105tv_float(typval_T *tvs, int *idxp)
2106{
2107 int idx = *idxp - 1;
2108 double f = 0;
2109
2110 if (tvs[idx].v_type == VAR_UNKNOWN)
2111 emsg(_(e_printf));
2112 else
2113 {
2114 ++*idxp;
2115 if (tvs[idx].v_type == VAR_FLOAT)
2116 f = tvs[idx].vval.v_float;
2117 else if (tvs[idx].v_type == VAR_NUMBER)
2118 f = (double)tvs[idx].vval.v_number;
2119 else
Bram Moolenaar9d00e4a2022-01-05 17:49:15 +00002120 emsg(_(e_expected_float_argument_for_printf));
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002121 }
2122 return f;
2123}
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00002124
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002125#endif
2126
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002127/*
2128 * Return the representation of infinity for printf() function:
2129 * "-inf", "inf", "+inf", " inf", "-INF", "INF", "+INF" or " INF".
2130 */
2131 static const char *
2132infinity_str(int positive,
2133 char fmt_spec,
2134 int force_sign,
2135 int space_for_positive)
2136{
2137 static const char *table[] =
2138 {
2139 "-inf", "inf", "+inf", " inf",
2140 "-INF", "INF", "+INF", " INF"
2141 };
2142 int idx = positive * (1 + force_sign + force_sign * space_for_positive);
2143
2144 if (ASCII_ISUPPER(fmt_spec))
2145 idx += 4;
2146 return table[idx];
2147}
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002148
2149/*
2150 * This code was included to provide a portable vsnprintf() and snprintf().
2151 * Some systems may provide their own, but we always use this one for
2152 * consistency.
2153 *
2154 * This code is based on snprintf.c - a portable implementation of snprintf
2155 * by Mark Martinec <mark.martinec@ijs.si>, Version 2.2, 2000-10-06.
2156 * Included with permission. It was heavily modified to fit in Vim.
2157 * The original code, including useful comments, can be found here:
2158 * http://www.ijs.si/software/snprintf/
2159 *
2160 * This snprintf() only supports the following conversion specifiers:
2161 * s, c, d, u, o, x, X, p (and synonyms: i, D, U, O - see below)
2162 * with flags: '-', '+', ' ', '0' and '#'.
2163 * An asterisk is supported for field width as well as precision.
2164 *
2165 * Limited support for floating point was added: 'f', 'F', 'e', 'E', 'g', 'G'.
2166 *
2167 * Length modifiers 'h' (short int) and 'l' (long int) and 'll' (long long int)
2168 * are supported. NOTE: for 'll' the argument is varnumber_T or uvarnumber_T.
2169 *
2170 * The locale is not used, the string is used as a byte string. This is only
2171 * relevant for double-byte encodings where the second byte may be '%'.
2172 *
2173 * It is permitted for "str_m" to be zero, and it is permitted to specify NULL
2174 * pointer for resulting string argument if "str_m" is zero (as per ISO C99).
2175 *
2176 * The return value is the number of characters which would be generated
2177 * for the given input, excluding the trailing NUL. If this value
2178 * is greater or equal to "str_m", not all characters from the result
2179 * have been stored in str, output bytes beyond the ("str_m"-1) -th character
2180 * are discarded. If "str_m" is greater than zero it is guaranteed
2181 * the resulting string will be NUL-terminated.
2182 */
2183
2184/*
2185 * When va_list is not supported we only define vim_snprintf().
2186 *
2187 * vim_vsnprintf_typval() can be invoked with either "va_list" or a list of
2188 * "typval_T". When the latter is not used it must be NULL.
2189 */
2190
2191// When generating prototypes all of this is skipped, cproto doesn't
2192// understand this.
2193#ifndef PROTO
2194
2195// Like vim_vsnprintf() but append to the string.
2196 int
2197vim_snprintf_add(char *str, size_t str_m, const char *fmt, ...)
2198{
2199 va_list ap;
2200 int str_l;
2201 size_t len = STRLEN(str);
2202 size_t space;
2203
2204 if (str_m <= len)
2205 space = 0;
2206 else
2207 space = str_m - len;
2208 va_start(ap, fmt);
2209 str_l = vim_vsnprintf(str + len, space, fmt, ap);
2210 va_end(ap);
2211 return str_l;
2212}
2213
2214 int
2215vim_snprintf(char *str, size_t str_m, const char *fmt, ...)
2216{
2217 va_list ap;
2218 int str_l;
2219
2220 va_start(ap, fmt);
2221 str_l = vim_vsnprintf(str, str_m, fmt, ap);
2222 va_end(ap);
2223 return str_l;
2224}
2225
2226 int
2227vim_vsnprintf(
2228 char *str,
2229 size_t str_m,
2230 const char *fmt,
2231 va_list ap)
2232{
2233 return vim_vsnprintf_typval(str, str_m, fmt, ap, NULL);
2234}
2235
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002236enum
2237{
2238 TYPE_UNKNOWN = -1,
2239 TYPE_INT,
2240 TYPE_LONGINT,
2241 TYPE_LONGLONGINT,
2242 TYPE_UNSIGNEDINT,
2243 TYPE_UNSIGNEDLONGINT,
2244 TYPE_UNSIGNEDLONGLONGINT,
2245 TYPE_POINTER,
2246 TYPE_PERCENT,
2247 TYPE_CHAR,
2248 TYPE_STRING,
2249 TYPE_FLOAT
2250};
2251
2252/* Types that can be used in a format string
2253 */
zeertzjq7772c932023-08-15 22:48:40 +02002254 static int
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002255format_typeof(
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002256 const char *type)
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002257{
2258 // allowed values: \0, h, l, L
2259 char length_modifier = '\0';
2260
2261 // current conversion specifier character
2262 char fmt_spec = '\0';
2263
2264 // parse 'h', 'l' and 'll' length modifiers
2265 if (*type == 'h' || *type == 'l')
2266 {
2267 length_modifier = *type;
2268 type++;
2269 if (length_modifier == 'l' && *type == 'l')
2270 {
2271 // double l = __int64 / varnumber_T
2272 length_modifier = 'L';
2273 type++;
2274 }
2275 }
2276 fmt_spec = *type;
2277
2278 // common synonyms:
2279 switch (fmt_spec)
2280 {
2281 case 'i': fmt_spec = 'd'; break;
2282 case '*': fmt_spec = 'd'; length_modifier = 'h'; break;
2283 case 'D': fmt_spec = 'd'; length_modifier = 'l'; break;
2284 case 'U': fmt_spec = 'u'; length_modifier = 'l'; break;
2285 case 'O': fmt_spec = 'o'; length_modifier = 'l'; break;
2286 default: break;
2287 }
2288
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002289 // get parameter value, do initial processing
2290 switch (fmt_spec)
2291 {
2292 // '%' and 'c' behave similar to 's' regarding flags and field
2293 // widths
2294 case '%':
2295 return TYPE_PERCENT;
2296
2297 case 'c':
2298 return TYPE_CHAR;
2299
2300 case 's':
2301 case 'S':
2302 return TYPE_STRING;
2303
2304 case 'd': case 'u':
2305 case 'b': case 'B':
2306 case 'o':
2307 case 'x': case 'X':
2308 case 'p':
2309 {
2310 // NOTE: the u, b, o, x, X and p conversion specifiers
2311 // imply the value is unsigned; d implies a signed
2312 // value
2313
2314 // 0 if numeric argument is zero (or if pointer is
2315 // NULL for 'p'), +1 if greater than zero (or nonzero
2316 // for unsigned arguments), -1 if negative (unsigned
2317 // argument is never negative)
2318
2319 if (fmt_spec == 'p')
2320 return TYPE_POINTER;
2321 else if (fmt_spec == 'b' || fmt_spec == 'B')
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002322 return TYPE_UNSIGNEDLONGLONGINT;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002323 else if (fmt_spec == 'd')
2324 {
2325 // signed
2326 switch (length_modifier)
2327 {
2328 case '\0':
2329 case 'h':
2330 // char and short arguments are passed as int.
2331 return TYPE_INT;
2332 case 'l':
2333 return TYPE_LONGINT;
2334 case 'L':
2335 return TYPE_LONGLONGINT;
2336 }
2337 }
2338 else
2339 {
2340 // unsigned
2341 switch (length_modifier)
2342 {
2343 case '\0':
2344 case 'h':
2345 return TYPE_UNSIGNEDINT;
2346 case 'l':
2347 return TYPE_UNSIGNEDLONGINT;
2348 case 'L':
2349 return TYPE_UNSIGNEDLONGLONGINT;
2350 }
2351 }
2352 }
2353 break;
2354
2355 case 'f':
2356 case 'F':
2357 case 'e':
2358 case 'E':
2359 case 'g':
2360 case 'G':
2361 return TYPE_FLOAT;
2362 }
2363
2364 return TYPE_UNKNOWN;
2365}
2366
zeertzjq7772c932023-08-15 22:48:40 +02002367 static char *
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002368format_typename(
2369 const char *type)
2370{
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002371 switch (format_typeof(type))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002372 {
2373 case TYPE_INT:
2374 return _(typename_int);
2375
2376 case TYPE_LONGINT:
2377 return _(typename_longint);
2378
2379 case TYPE_LONGLONGINT:
2380 return _(typename_longlongint);
2381
2382 case TYPE_UNSIGNEDINT:
2383 return _(typename_unsignedint);
2384
2385 case TYPE_UNSIGNEDLONGINT:
2386 return _(typename_unsignedlongint);
2387
2388 case TYPE_UNSIGNEDLONGLONGINT:
2389 return _(typename_unsignedlonglongint);
2390
2391 case TYPE_POINTER:
2392 return _(typename_pointer);
2393
2394 case TYPE_PERCENT:
2395 return _(typename_percent);
2396
2397 case TYPE_CHAR:
2398 return _(typename_char);
2399
2400 case TYPE_STRING:
2401 return _(typename_string);
2402
2403 case TYPE_FLOAT:
2404 return _(typename_float);
2405 }
2406
2407 return _(typename_unknown);
2408}
2409
zeertzjq7772c932023-08-15 22:48:40 +02002410 static int
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002411adjust_types(
2412 const char ***ap_types,
2413 int arg,
2414 int *num_posarg,
2415 const char *type)
2416{
2417 if (*ap_types == NULL || *num_posarg < arg)
2418 {
2419 int idx;
2420 const char **new_types;
2421
2422 if (*ap_types == NULL)
2423 new_types = ALLOC_CLEAR_MULT(const char *, arg);
2424 else
K.Takata4c215ec2023-08-26 18:05:08 +02002425 new_types = vim_realloc((char **)*ap_types,
2426 arg * sizeof(const char *));
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002427
2428 if (new_types == NULL)
2429 return FAIL;
2430
2431 for (idx = *num_posarg; idx < arg; ++idx)
2432 new_types[idx] = NULL;
2433
2434 *ap_types = new_types;
2435 *num_posarg = arg;
2436 }
2437
2438 if ((*ap_types)[arg - 1] != NULL)
2439 {
2440 if ((*ap_types)[arg - 1][0] == '*' || type[0] == '*')
2441 {
2442 const char *pt = type;
2443 if (pt[0] == '*')
2444 pt = (*ap_types)[arg - 1];
2445
2446 if (pt[0] != '*')
2447 {
2448 switch (pt[0])
2449 {
2450 case 'd': case 'i': break;
2451 default:
2452 semsg(_(e_positional_num_field_spec_reused_str_str), arg, format_typename((*ap_types)[arg - 1]), format_typename(type));
2453 return FAIL;
2454 }
2455 }
2456 }
2457 else
2458 {
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002459 if (format_typeof(type) != format_typeof((*ap_types)[arg - 1]))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002460 {
2461 semsg(_( e_positional_arg_num_type_inconsistent_str_str), arg, format_typename(type), format_typename((*ap_types)[arg - 1]));
2462 return FAIL;
2463 }
2464 }
2465 }
2466
2467 (*ap_types)[arg - 1] = type;
2468
2469 return OK;
2470}
2471
Christ van Willegenc35fc032024-03-14 18:30:41 +01002472 static void
2473format_overflow_error(const char *pstart)
2474{
2475 size_t arglen = 0;
2476 char *argcopy = NULL;
2477 const char *p = pstart;
2478
2479 while (VIM_ISDIGIT((int)(*p)))
2480 ++p;
2481
2482 arglen = p - pstart;
2483 argcopy = ALLOC_CLEAR_MULT(char, arglen + 1);
2484 if (argcopy != NULL)
2485 {
2486 strncpy(argcopy, pstart, arglen);
2487 semsg(_( e_val_too_large), argcopy);
2488 free(argcopy);
2489 }
2490 else
2491 semsg(_(e_out_of_memory_allocating_nr_bytes), arglen);
2492}
2493
2494#define MAX_ALLOWED_STRING_WIDTH 6400
2495
2496 static int
2497get_unsigned_int(
2498 const char *pstart,
2499 const char **p,
2500 unsigned int *uj)
2501{
2502 *uj = **p - '0';
2503 ++*p;
2504
2505 while (VIM_ISDIGIT((int)(**p)) && *uj < MAX_ALLOWED_STRING_WIDTH)
2506 {
2507 *uj = 10 * *uj + (unsigned int)(**p - '0');
2508 ++*p;
2509 }
2510
2511 if (*uj > MAX_ALLOWED_STRING_WIDTH)
2512 {
2513 format_overflow_error(pstart);
2514 return FAIL;
2515 }
2516
2517 return OK;
2518}
2519
2520
zeertzjq7772c932023-08-15 22:48:40 +02002521 static int
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002522parse_fmt_types(
2523 const char ***ap_types,
2524 int *num_posarg,
2525 const char *fmt,
2526 typval_T *tvs UNUSED
2527 )
2528{
2529 const char *p = fmt;
2530 const char *arg = NULL;
2531
2532 int any_pos = 0;
2533 int any_arg = 0;
2534 int arg_idx;
2535
2536#define CHECK_POS_ARG do { \
2537 if (any_pos && any_arg) \
2538 { \
2539 semsg(_( e_cannot_mix_positional_and_non_positional_str), fmt); \
2540 goto error; \
2541 } \
2542} while (0);
2543
2544 if (p == NULL)
2545 return OK;
2546
2547 while (*p != NUL)
2548 {
2549 if (*p != '%')
2550 {
2551 char *q = strchr(p + 1, '%');
2552 size_t n = (q == NULL) ? STRLEN(p) : (size_t)(q - p);
2553
2554 p += n;
2555 }
2556 else
2557 {
2558 // allowed values: \0, h, l, L
2559 char length_modifier = '\0';
2560
2561 // variable for positional arg
2562 int pos_arg = -1;
2563 const char *ptype = NULL;
Christ van Willegenc35fc032024-03-14 18:30:41 +01002564 const char *pstart = p+1;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002565
2566 p++; // skip '%'
2567
2568 // First check to see if we find a positional
2569 // argument specifier
2570 ptype = p;
2571
2572 while (VIM_ISDIGIT(*ptype))
2573 ++ptype;
2574
2575 if (*ptype == '$')
2576 {
2577 if (*p == '0')
2578 {
2579 // 0 flag at the wrong place
2580 semsg(_( e_invalid_format_specifier_str), fmt);
2581 goto error;
2582 }
2583
2584 // Positional argument
Christ van Willegenc35fc032024-03-14 18:30:41 +01002585 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002586
Christ van Willegenc35fc032024-03-14 18:30:41 +01002587 if (get_unsigned_int(pstart, &p, &uj) == FAIL)
2588 goto error;
2589
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002590 pos_arg = uj;
2591
2592 any_pos = 1;
2593 CHECK_POS_ARG;
2594
2595 ++p;
2596 }
2597
2598 // parse flags
2599 while (*p == '0' || *p == '-' || *p == '+' || *p == ' '
2600 || *p == '#' || *p == '\'')
2601 {
2602 switch (*p)
2603 {
2604 case '0': break;
2605 case '-': break;
2606 case '+': break;
2607 case ' ': // If both the ' ' and '+' flags appear, the ' '
2608 // flag should be ignored
2609 break;
2610 case '#': break;
2611 case '\'': break;
2612 }
2613 p++;
2614 }
2615 // If the '0' and '-' flags both appear, the '0' flag should be
2616 // ignored.
2617
2618 // parse field width
2619 if (*(arg = p) == '*')
2620 {
2621 p++;
2622
2623 if (VIM_ISDIGIT((int)(*p)))
2624 {
2625 // Positional argument field width
Christ van Willegenc35fc032024-03-14 18:30:41 +01002626 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002627
Christ van Willegenc35fc032024-03-14 18:30:41 +01002628 if (get_unsigned_int(arg + 1, &p, &uj) == FAIL)
2629 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002630
2631 if (*p != '$')
2632 {
2633 semsg(_( e_invalid_format_specifier_str), fmt);
2634 goto error;
2635 }
2636 else
2637 {
2638 ++p;
2639 any_pos = 1;
2640 CHECK_POS_ARG;
2641
2642 if (adjust_types(ap_types, uj, num_posarg, arg) == FAIL)
2643 goto error;
2644 }
2645 }
2646 else
2647 {
2648 any_arg = 1;
2649 CHECK_POS_ARG;
2650 }
2651 }
dundargoc580c1fc2023-10-06 19:41:14 +02002652 else if (VIM_ISDIGIT((int)(*p)))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002653 {
2654 // size_t could be wider than unsigned int; make sure we treat
2655 // argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01002656 const char *digstart = p;
2657 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002658
Christ van Willegenc35fc032024-03-14 18:30:41 +01002659 if (get_unsigned_int(digstart, &p, &uj) == FAIL)
2660 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002661
2662 if (*p == '$')
2663 {
2664 semsg(_( e_invalid_format_specifier_str), fmt);
2665 goto error;
2666 }
2667 }
2668
2669 // parse precision
2670 if (*p == '.')
2671 {
2672 p++;
2673
2674 if (*(arg = p) == '*')
2675 {
2676 p++;
2677
2678 if (VIM_ISDIGIT((int)(*p)))
2679 {
2680 // Parse precision
Christ van Willegenc35fc032024-03-14 18:30:41 +01002681 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002682
Christ van Willegenc35fc032024-03-14 18:30:41 +01002683 if (get_unsigned_int(arg + 1, &p, &uj) == FAIL)
2684 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002685
2686 if (*p == '$')
2687 {
2688 any_pos = 1;
2689 CHECK_POS_ARG;
2690
2691 ++p;
2692
2693 if (adjust_types(ap_types, uj, num_posarg, arg) == FAIL)
2694 goto error;
2695 }
2696 else
2697 {
2698 semsg(_( e_invalid_format_specifier_str), fmt);
2699 goto error;
2700 }
2701 }
2702 else
2703 {
2704 any_arg = 1;
2705 CHECK_POS_ARG;
2706 }
2707 }
dundargoc580c1fc2023-10-06 19:41:14 +02002708 else if (VIM_ISDIGIT((int)(*p)))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002709 {
2710 // size_t could be wider than unsigned int; make sure we
2711 // treat argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01002712 const char *digstart = p;
2713 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002714
Christ van Willegenc35fc032024-03-14 18:30:41 +01002715 if (get_unsigned_int(digstart, &p, &uj) == FAIL)
2716 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002717
2718 if (*p == '$')
2719 {
2720 semsg(_( e_invalid_format_specifier_str), fmt);
2721 goto error;
2722 }
2723 }
2724 }
2725
2726 if (pos_arg != -1)
2727 {
2728 any_pos = 1;
2729 CHECK_POS_ARG;
2730
2731 ptype = p;
2732 }
2733
2734 // parse 'h', 'l' and 'll' length modifiers
2735 if (*p == 'h' || *p == 'l')
2736 {
2737 length_modifier = *p;
2738 p++;
2739 if (length_modifier == 'l' && *p == 'l')
2740 {
2741 // double l = __int64 / varnumber_T
dundargoc580c1fc2023-10-06 19:41:14 +02002742 // length_modifier = 'L';
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002743 p++;
2744 }
2745 }
2746
2747 switch (*p)
2748 {
2749 // Check for known format specifiers. % is special!
2750 case 'i':
2751 case '*':
2752 case 'd':
2753 case 'u':
2754 case 'o':
2755 case 'D':
2756 case 'U':
2757 case 'O':
2758 case 'x':
2759 case 'X':
2760 case 'b':
2761 case 'B':
2762 case 'c':
2763 case 's':
2764 case 'S':
2765 case 'p':
2766 case 'f':
2767 case 'F':
2768 case 'e':
2769 case 'E':
2770 case 'g':
2771 case 'G':
2772 if (pos_arg != -1)
2773 {
2774 if (adjust_types(ap_types, pos_arg, num_posarg, ptype) == FAIL)
2775 goto error;
2776 }
2777 else
2778 {
2779 any_arg = 1;
2780 CHECK_POS_ARG;
2781 }
2782 break;
2783
2784 default:
2785 if (pos_arg != -1)
2786 {
2787 semsg(_( e_cannot_mix_positional_and_non_positional_str), fmt);
2788 goto error;
2789 }
2790 }
2791
2792 if (*p != NUL)
2793 p++; // step over the just processed conversion specifier
2794 }
2795 }
2796
2797 for (arg_idx = 0; arg_idx < *num_posarg; ++arg_idx)
2798 {
2799 if ((*ap_types)[arg_idx] == NULL)
2800 {
2801 semsg(_(e_fmt_arg_nr_unused_str), arg_idx + 1, fmt);
2802 goto error;
2803 }
2804
2805# if defined(FEAT_EVAL)
2806 if (tvs != NULL && tvs[arg_idx].v_type == VAR_UNKNOWN)
2807 {
2808 semsg(_(e_positional_nr_out_of_bounds_str), arg_idx + 1, fmt);
2809 goto error;
2810 }
2811# endif
2812 }
2813
2814 return OK;
2815
2816error:
K.Takata4c215ec2023-08-26 18:05:08 +02002817 vim_free((char**)*ap_types);
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002818 *ap_types = NULL;
2819 *num_posarg = 0;
2820 return FAIL;
2821}
2822
zeertzjq7772c932023-08-15 22:48:40 +02002823 static void
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002824skip_to_arg(
2825 const char **ap_types,
2826 va_list ap_start,
2827 va_list *ap,
2828 int *arg_idx,
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002829 int *arg_cur,
2830 const char *fmt)
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002831{
2832 int arg_min = 0;
2833
2834 if (*arg_cur + 1 == *arg_idx)
2835 {
2836 ++*arg_cur;
2837 ++*arg_idx;
2838 return;
2839 }
2840
2841 if (*arg_cur >= *arg_idx)
2842 {
2843 // Reset ap to ap_start and skip arg_idx - 1 types
2844 va_end(*ap);
2845 va_copy(*ap, ap_start);
2846 }
2847 else
2848 {
2849 // Skip over any we should skip
2850 arg_min = *arg_cur;
2851 }
2852
2853 for (*arg_cur = arg_min; *arg_cur < *arg_idx - 1; ++*arg_cur)
2854 {
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002855 const char *p;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002856
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002857 if (ap_types == NULL || ap_types[*arg_cur] == NULL)
2858 {
Christ van Willegen1bd2cb12023-09-08 19:17:09 +02002859 siemsg(e_aptypes_is_null_nr_str, *arg_cur, fmt);
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002860 return;
2861 }
2862
2863 p = ap_types[*arg_cur];
2864
2865 int fmt_type = format_typeof(p);
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002866
2867 // get parameter value, do initial processing
2868 switch (fmt_type)
2869 {
2870 case TYPE_PERCENT:
2871 case TYPE_UNKNOWN:
2872 break;
2873
2874 case TYPE_CHAR:
2875 va_arg(*ap, int);
2876 break;
2877
2878 case TYPE_STRING:
2879 va_arg(*ap, char *);
2880 break;
2881
2882 case TYPE_POINTER:
2883 va_arg(*ap, void *);
2884 break;
2885
2886 case TYPE_INT:
2887 va_arg(*ap, int);
2888 break;
2889
2890 case TYPE_LONGINT:
2891 va_arg(*ap, long int);
2892 break;
2893
2894 case TYPE_LONGLONGINT:
2895 va_arg(*ap, varnumber_T);
2896 break;
2897
2898 case TYPE_UNSIGNEDINT:
2899 va_arg(*ap, unsigned int);
2900 break;
2901
2902 case TYPE_UNSIGNEDLONGINT:
2903 va_arg(*ap, unsigned long int);
2904 break;
2905
2906 case TYPE_UNSIGNEDLONGLONGINT:
2907 va_arg(*ap, uvarnumber_T);
2908 break;
2909
2910 case TYPE_FLOAT:
2911 va_arg(*ap, double);
2912 break;
2913 }
2914 }
2915
2916 // Because we know that after we return from this call,
2917 // a va_arg() call is made, we can pre-emptively
2918 // increment the current argument index.
2919 ++*arg_cur;
2920 ++*arg_idx;
2921
2922 return;
2923}
2924
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002925 int
2926vim_vsnprintf_typval(
2927 char *str,
2928 size_t str_m,
2929 const char *fmt,
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002930 va_list ap_start,
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002931 typval_T *tvs)
2932{
2933 size_t str_l = 0;
2934 const char *p = fmt;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002935 int arg_cur = 0;
2936 int num_posarg = 0;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002937 int arg_idx = 1;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002938 va_list ap;
2939 const char **ap_types = NULL;
2940
2941 if (parse_fmt_types(&ap_types, &num_posarg, fmt, tvs) == FAIL)
2942 return 0;
2943
2944 va_copy(ap, ap_start);
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002945
2946 if (p == NULL)
2947 p = "";
2948 while (*p != NUL)
2949 {
2950 if (*p != '%')
2951 {
2952 char *q = strchr(p + 1, '%');
2953 size_t n = (q == NULL) ? STRLEN(p) : (size_t)(q - p);
2954
2955 // Copy up to the next '%' or NUL without any changes.
2956 if (str_l < str_m)
2957 {
2958 size_t avail = str_m - str_l;
2959
2960 mch_memmove(str + str_l, p, n > avail ? avail : n);
2961 }
2962 p += n;
2963 str_l += n;
2964 }
2965 else
2966 {
2967 size_t min_field_width = 0, precision = 0;
2968 int zero_padding = 0, precision_specified = 0, justify_left = 0;
2969 int alternate_form = 0, force_sign = 0;
2970
2971 // If both the ' ' and '+' flags appear, the ' ' flag should be
2972 // ignored.
2973 int space_for_positive = 1;
2974
2975 // allowed values: \0, h, l, L
2976 char length_modifier = '\0';
2977
2978 // temporary buffer for simple numeric->string conversion
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01002979# define TMP_LEN 350 // On my system 1e308 is the biggest number possible.
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002980 // That sounds reasonable to use as the maximum
2981 // printable.
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002982 char tmp[TMP_LEN];
2983
2984 // string address in case of string argument
2985 const char *str_arg = NULL;
2986
2987 // natural field width of arg without padding and sign
2988 size_t str_arg_l;
2989
2990 // unsigned char argument value - only defined for c conversion.
2991 // N.B. standard explicitly states the char argument for the c
2992 // conversion is unsigned
2993 unsigned char uchar_arg;
2994
2995 // number of zeros to be inserted for numeric conversions as
2996 // required by the precision or minimal field width
2997 size_t number_of_zeros_to_pad = 0;
2998
2999 // index into tmp where zero padding is to be inserted
3000 size_t zero_padding_insertion_ind = 0;
3001
3002 // current conversion specifier character
3003 char fmt_spec = '\0';
3004
3005 // buffer for 's' and 'S' specs
3006 char_u *tofree = NULL;
3007
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003008 // variables for positional arg
3009 int pos_arg = -1;
3010 const char *ptype;
3011
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003012
3013 p++; // skip '%'
3014
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003015 // First check to see if we find a positional
3016 // argument specifier
3017 ptype = p;
3018
3019 while (VIM_ISDIGIT(*ptype))
3020 ++ptype;
3021
3022 if (*ptype == '$')
3023 {
3024 // Positional argument
Christ van Willegenc35fc032024-03-14 18:30:41 +01003025 const char *digstart = p;
3026 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003027
Christ van Willegenc35fc032024-03-14 18:30:41 +01003028 if (get_unsigned_int(digstart, &p, &uj) == FAIL)
3029 goto error;
3030
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003031 pos_arg = uj;
3032
3033 ++p;
3034 }
3035
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003036 // parse flags
3037 while (*p == '0' || *p == '-' || *p == '+' || *p == ' '
3038 || *p == '#' || *p == '\'')
3039 {
3040 switch (*p)
3041 {
3042 case '0': zero_padding = 1; break;
3043 case '-': justify_left = 1; break;
3044 case '+': force_sign = 1; space_for_positive = 0; break;
3045 case ' ': force_sign = 1;
3046 // If both the ' ' and '+' flags appear, the ' '
3047 // flag should be ignored
3048 break;
3049 case '#': alternate_form = 1; break;
3050 case '\'': break;
3051 }
3052 p++;
3053 }
3054 // If the '0' and '-' flags both appear, the '0' flag should be
3055 // ignored.
3056
3057 // parse field width
3058 if (*p == '*')
3059 {
3060 int j;
Christ van Willegenc35fc032024-03-14 18:30:41 +01003061 const char *digstart = p + 1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003062
3063 p++;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003064
3065 if (VIM_ISDIGIT((int)(*p)))
3066 {
3067 // Positional argument field width
Christ van Willegenc35fc032024-03-14 18:30:41 +01003068 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003069
Christ van Willegenc35fc032024-03-14 18:30:41 +01003070 if (get_unsigned_int(digstart, &p, &uj) == FAIL)
3071 goto error;
3072
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003073 arg_idx = uj;
3074
3075 ++p;
3076 }
3077
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003078 j =
3079# if defined(FEAT_EVAL)
3080 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3081# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003082 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3083 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003084 va_arg(ap, int));
3085
Christ van Willegenc35fc032024-03-14 18:30:41 +01003086 if (j > MAX_ALLOWED_STRING_WIDTH)
3087 {
3088 format_overflow_error(digstart);
3089 goto error;
3090 }
3091
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003092 if (j >= 0)
3093 min_field_width = j;
3094 else
3095 {
3096 min_field_width = -j;
3097 justify_left = 1;
3098 }
3099 }
3100 else if (VIM_ISDIGIT((int)(*p)))
3101 {
3102 // size_t could be wider than unsigned int; make sure we treat
3103 // argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01003104 const char *digstart = p;
3105 unsigned int uj;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003106
Christ van Willegenc35fc032024-03-14 18:30:41 +01003107 if (get_unsigned_int(digstart, &p, &uj) == FAIL)
3108 goto error;
3109
3110 if (uj > MAX_ALLOWED_STRING_WIDTH)
3111 {
3112 format_overflow_error(digstart);
3113 goto error;
3114 }
3115
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003116 min_field_width = uj;
3117 }
3118
3119 // parse precision
3120 if (*p == '.')
3121 {
3122 p++;
3123 precision_specified = 1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003124
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003125 if (VIM_ISDIGIT((int)(*p)))
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003126 {
3127 // size_t could be wider than unsigned int; make sure we
3128 // treat argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01003129 const char *digstart = p;
3130 unsigned int uj;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003131
Christ van Willegenc35fc032024-03-14 18:30:41 +01003132 if (get_unsigned_int(digstart, &p, &uj) == FAIL)
3133 goto error;
3134
3135 if (uj > MAX_ALLOWED_STRING_WIDTH)
3136 {
3137 format_overflow_error(digstart);
3138 goto error;
3139 }
3140
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003141 precision = uj;
3142 }
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003143 else if (*p == '*')
3144 {
3145 int j;
Christ van Willegenc35fc032024-03-14 18:30:41 +01003146 const char *digstart = p;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003147
3148 p++;
3149
3150 if (VIM_ISDIGIT((int)(*p)))
3151 {
3152 // positional argument
Christ van Willegenc35fc032024-03-14 18:30:41 +01003153 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003154
Christ van Willegenc35fc032024-03-14 18:30:41 +01003155 if (get_unsigned_int(digstart, &p, &uj) == FAIL)
3156 goto error;
3157
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003158 arg_idx = uj;
3159
3160 ++p;
3161 }
3162
3163 j =
3164# if defined(FEAT_EVAL)
3165 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3166# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003167 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3168 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003169 va_arg(ap, int));
3170
Christ van Willegenc35fc032024-03-14 18:30:41 +01003171 if (j > MAX_ALLOWED_STRING_WIDTH)
3172 {
3173 format_overflow_error(digstart);
3174 goto error;
3175 }
3176
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003177 if (j >= 0)
3178 precision = j;
3179 else
3180 {
3181 precision_specified = 0;
3182 precision = 0;
3183 }
3184 }
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003185 }
3186
3187 // parse 'h', 'l' and 'll' length modifiers
3188 if (*p == 'h' || *p == 'l')
3189 {
3190 length_modifier = *p;
3191 p++;
3192 if (length_modifier == 'l' && *p == 'l')
3193 {
3194 // double l = __int64 / varnumber_T
3195 length_modifier = 'L';
3196 p++;
3197 }
3198 }
3199 fmt_spec = *p;
3200
3201 // common synonyms:
3202 switch (fmt_spec)
3203 {
3204 case 'i': fmt_spec = 'd'; break;
3205 case 'D': fmt_spec = 'd'; length_modifier = 'l'; break;
3206 case 'U': fmt_spec = 'u'; length_modifier = 'l'; break;
3207 case 'O': fmt_spec = 'o'; length_modifier = 'l'; break;
3208 default: break;
3209 }
3210
3211# if defined(FEAT_EVAL)
3212 switch (fmt_spec)
3213 {
3214 case 'd': case 'u': case 'o': case 'x': case 'X':
3215 if (tvs != NULL && length_modifier == '\0')
3216 length_modifier = 'L';
3217 }
3218# endif
3219
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003220 if (pos_arg != -1)
3221 arg_idx = pos_arg;
3222
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003223 // get parameter value, do initial processing
3224 switch (fmt_spec)
3225 {
3226 // '%' and 'c' behave similar to 's' regarding flags and field
3227 // widths
3228 case '%':
3229 case 'c':
3230 case 's':
3231 case 'S':
3232 str_arg_l = 1;
3233 switch (fmt_spec)
3234 {
3235 case '%':
3236 str_arg = p;
3237 break;
3238
3239 case 'c':
3240 {
3241 int j;
3242
3243 j =
3244# if defined(FEAT_EVAL)
3245 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3246# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003247 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3248 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003249 va_arg(ap, int));
3250
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003251 // standard demands unsigned char
3252 uchar_arg = (unsigned char)j;
3253 str_arg = (char *)&uchar_arg;
3254 break;
3255 }
3256
3257 case 's':
3258 case 'S':
3259 str_arg =
3260# if defined(FEAT_EVAL)
3261 tvs != NULL ? tv_str(tvs, &arg_idx, &tofree) :
3262# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003263 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3264 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003265 va_arg(ap, char *));
3266
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003267 if (str_arg == NULL)
3268 {
3269 str_arg = "[NULL]";
3270 str_arg_l = 6;
3271 }
3272 // make sure not to address string beyond the specified
3273 // precision !!!
3274 else if (!precision_specified)
3275 str_arg_l = strlen(str_arg);
3276 // truncate string if necessary as requested by precision
3277 else if (precision == 0)
3278 str_arg_l = 0;
3279 else
3280 {
3281 // Don't put the #if inside memchr(), it can be a
3282 // macro.
3283 // memchr on HP does not like n > 2^31 !!!
3284 char *q = memchr(str_arg, '\0',
3285 precision <= (size_t)0x7fffffffL ? precision
3286 : (size_t)0x7fffffffL);
presukud85fccd2021-11-20 19:38:31 +00003287
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003288 str_arg_l = (q == NULL) ? precision
3289 : (size_t)(q - str_arg);
3290 }
3291 if (fmt_spec == 'S')
3292 {
presuku1f2453f2021-11-24 15:32:57 +00003293 char_u *p1;
3294 size_t i;
3295 int cell;
presukud85fccd2021-11-20 19:38:31 +00003296
presuku1f2453f2021-11-24 15:32:57 +00003297 for (i = 0, p1 = (char_u *)str_arg; *p1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003298 p1 += mb_ptr2len(p1))
presuku1f2453f2021-11-24 15:32:57 +00003299 {
3300 cell = mb_ptr2cells(p1);
3301 if (precision_specified && i + cell > precision)
3302 break;
3303 i += cell;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003304 }
presuku1f2453f2021-11-24 15:32:57 +00003305
3306 str_arg_l = p1 - (char_u *)str_arg;
presukud85fccd2021-11-20 19:38:31 +00003307 if (min_field_width != 0)
presuku1f2453f2021-11-24 15:32:57 +00003308 min_field_width += str_arg_l - i;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003309 }
3310 break;
3311
3312 default:
3313 break;
3314 }
3315 break;
3316
3317 case 'd': case 'u':
3318 case 'b': case 'B':
3319 case 'o':
3320 case 'x': case 'X':
3321 case 'p':
3322 {
3323 // NOTE: the u, b, o, x, X and p conversion specifiers
3324 // imply the value is unsigned; d implies a signed
3325 // value
3326
3327 // 0 if numeric argument is zero (or if pointer is
3328 // NULL for 'p'), +1 if greater than zero (or nonzero
3329 // for unsigned arguments), -1 if negative (unsigned
3330 // argument is never negative)
3331 int arg_sign = 0;
3332
3333 // only set for length modifier h, or for no length
3334 // modifiers
3335 int int_arg = 0;
3336 unsigned int uint_arg = 0;
3337
3338 // only set for length modifier l
3339 long int long_arg = 0;
3340 unsigned long int ulong_arg = 0;
3341
3342 // only set for length modifier ll
3343 varnumber_T llong_arg = 0;
3344 uvarnumber_T ullong_arg = 0;
3345
3346 // only set for b conversion
3347 uvarnumber_T bin_arg = 0;
3348
3349 // pointer argument value -only defined for p
3350 // conversion
3351 void *ptr_arg = NULL;
3352
3353 if (fmt_spec == 'p')
3354 {
3355 length_modifier = '\0';
3356 ptr_arg =
3357# if defined(FEAT_EVAL)
3358 tvs != NULL ? (void *)tv_str(tvs, &arg_idx,
3359 NULL) :
3360# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003361 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3362 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003363 va_arg(ap, void *));
3364
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003365 if (ptr_arg != NULL)
3366 arg_sign = 1;
3367 }
3368 else if (fmt_spec == 'b' || fmt_spec == 'B')
3369 {
3370 bin_arg =
3371# if defined(FEAT_EVAL)
3372 tvs != NULL ?
3373 (uvarnumber_T)tv_nr(tvs, &arg_idx) :
3374# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003375 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3376 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003377 va_arg(ap, uvarnumber_T));
3378
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003379 if (bin_arg != 0)
3380 arg_sign = 1;
3381 }
3382 else if (fmt_spec == 'd')
3383 {
3384 // signed
3385 switch (length_modifier)
3386 {
3387 case '\0':
3388 case 'h':
3389 // char and short arguments are passed as int.
3390 int_arg =
3391# if defined(FEAT_EVAL)
3392 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3393# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003394 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3395 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003396 va_arg(ap, int));
3397
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003398 if (int_arg > 0)
3399 arg_sign = 1;
3400 else if (int_arg < 0)
3401 arg_sign = -1;
3402 break;
3403 case 'l':
3404 long_arg =
3405# if defined(FEAT_EVAL)
3406 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3407# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003408 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3409 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003410 va_arg(ap, long int));
3411
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003412 if (long_arg > 0)
3413 arg_sign = 1;
3414 else if (long_arg < 0)
3415 arg_sign = -1;
3416 break;
3417 case 'L':
3418 llong_arg =
3419# if defined(FEAT_EVAL)
3420 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3421# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003422 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3423 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003424 va_arg(ap, varnumber_T));
3425
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003426 if (llong_arg > 0)
3427 arg_sign = 1;
3428 else if (llong_arg < 0)
3429 arg_sign = -1;
3430 break;
3431 }
3432 }
3433 else
3434 {
3435 // unsigned
3436 switch (length_modifier)
3437 {
3438 case '\0':
3439 case 'h':
3440 uint_arg =
3441# if defined(FEAT_EVAL)
3442 tvs != NULL ? (unsigned)
3443 tv_nr(tvs, &arg_idx) :
3444# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003445 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3446 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003447 va_arg(ap, unsigned int));
3448
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003449 if (uint_arg != 0)
3450 arg_sign = 1;
3451 break;
3452 case 'l':
3453 ulong_arg =
3454# if defined(FEAT_EVAL)
3455 tvs != NULL ? (unsigned long)
3456 tv_nr(tvs, &arg_idx) :
3457# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003458 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3459 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003460 va_arg(ap, unsigned long int));
3461
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003462 if (ulong_arg != 0)
3463 arg_sign = 1;
3464 break;
3465 case 'L':
3466 ullong_arg =
3467# if defined(FEAT_EVAL)
3468 tvs != NULL ? (uvarnumber_T)
3469 tv_nr(tvs, &arg_idx) :
3470# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003471 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3472 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003473 va_arg(ap, uvarnumber_T));
3474
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003475 if (ullong_arg != 0)
3476 arg_sign = 1;
3477 break;
3478 }
3479 }
3480
3481 str_arg = tmp;
3482 str_arg_l = 0;
3483
3484 // NOTE:
3485 // For d, i, u, o, x, and X conversions, if precision is
3486 // specified, the '0' flag should be ignored. This is so
3487 // with Solaris 2.6, Digital UNIX 4.0, HPUX 10, Linux,
3488 // FreeBSD, NetBSD; but not with Perl.
3489 if (precision_specified)
3490 zero_padding = 0;
3491 if (fmt_spec == 'd')
3492 {
3493 if (force_sign && arg_sign >= 0)
3494 tmp[str_arg_l++] = space_for_positive ? ' ' : '+';
3495 // leave negative numbers for sprintf to handle, to
3496 // avoid handling tricky cases like (short int)-32768
3497 }
3498 else if (alternate_form)
3499 {
3500 if (arg_sign != 0
3501 && (fmt_spec == 'b' || fmt_spec == 'B'
3502 || fmt_spec == 'x' || fmt_spec == 'X') )
3503 {
3504 tmp[str_arg_l++] = '0';
3505 tmp[str_arg_l++] = fmt_spec;
3506 }
3507 // alternate form should have no effect for p
3508 // conversion, but ...
3509 }
3510
3511 zero_padding_insertion_ind = str_arg_l;
3512 if (!precision_specified)
3513 precision = 1; // default precision is 1
3514 if (precision == 0 && arg_sign == 0)
3515 {
3516 // When zero value is formatted with an explicit
3517 // precision 0, the resulting formatted string is
3518 // empty (d, i, u, b, B, o, x, X, p).
3519 }
3520 else
3521 {
3522 char f[6];
3523 int f_l = 0;
3524
3525 // construct a simple format string for sprintf
3526 f[f_l++] = '%';
3527 if (!length_modifier)
3528 ;
3529 else if (length_modifier == 'L')
3530 {
3531# ifdef MSWIN
3532 f[f_l++] = 'I';
3533 f[f_l++] = '6';
3534 f[f_l++] = '4';
3535# else
3536 f[f_l++] = 'l';
3537 f[f_l++] = 'l';
3538# endif
3539 }
3540 else
3541 f[f_l++] = length_modifier;
3542 f[f_l++] = fmt_spec;
3543 f[f_l++] = '\0';
3544
3545 if (fmt_spec == 'p')
3546 str_arg_l += sprintf(tmp + str_arg_l, f, ptr_arg);
3547 else if (fmt_spec == 'b' || fmt_spec == 'B')
3548 {
3549 char b[8 * sizeof(uvarnumber_T)];
3550 size_t b_l = 0;
3551 uvarnumber_T bn = bin_arg;
3552
3553 do
3554 {
3555 b[sizeof(b) - ++b_l] = '0' + (bn & 0x1);
3556 bn >>= 1;
3557 }
3558 while (bn != 0);
3559
3560 memcpy(tmp + str_arg_l, b + sizeof(b) - b_l, b_l);
3561 str_arg_l += b_l;
3562 }
3563 else if (fmt_spec == 'd')
3564 {
3565 // signed
3566 switch (length_modifier)
3567 {
3568 case '\0': str_arg_l += sprintf(
3569 tmp + str_arg_l, f,
3570 int_arg);
3571 break;
3572 case 'h': str_arg_l += sprintf(
3573 tmp + str_arg_l, f,
3574 (short)int_arg);
3575 break;
3576 case 'l': str_arg_l += sprintf(
3577 tmp + str_arg_l, f, long_arg);
3578 break;
3579 case 'L': str_arg_l += sprintf(
3580 tmp + str_arg_l, f, llong_arg);
3581 break;
3582 }
3583 }
3584 else
3585 {
3586 // unsigned
3587 switch (length_modifier)
3588 {
3589 case '\0': str_arg_l += sprintf(
3590 tmp + str_arg_l, f,
3591 uint_arg);
3592 break;
3593 case 'h': str_arg_l += sprintf(
3594 tmp + str_arg_l, f,
3595 (unsigned short)uint_arg);
3596 break;
3597 case 'l': str_arg_l += sprintf(
3598 tmp + str_arg_l, f, ulong_arg);
3599 break;
3600 case 'L': str_arg_l += sprintf(
3601 tmp + str_arg_l, f, ullong_arg);
3602 break;
3603 }
3604 }
3605
3606 // include the optional minus sign and possible
3607 // "0x" in the region before the zero padding
3608 // insertion point
3609 if (zero_padding_insertion_ind < str_arg_l
3610 && tmp[zero_padding_insertion_ind] == '-')
3611 zero_padding_insertion_ind++;
3612 if (zero_padding_insertion_ind + 1 < str_arg_l
3613 && tmp[zero_padding_insertion_ind] == '0'
3614 && (tmp[zero_padding_insertion_ind + 1] == 'x'
3615 || tmp[zero_padding_insertion_ind + 1] == 'X'))
3616 zero_padding_insertion_ind += 2;
3617 }
3618
3619 {
3620 size_t num_of_digits = str_arg_l
3621 - zero_padding_insertion_ind;
3622
3623 if (alternate_form && fmt_spec == 'o'
3624 // unless zero is already the first
3625 // character
3626 && !(zero_padding_insertion_ind < str_arg_l
3627 && tmp[zero_padding_insertion_ind] == '0'))
3628 {
3629 // assure leading zero for alternate-form
3630 // octal numbers
3631 if (!precision_specified
3632 || precision < num_of_digits + 1)
3633 {
3634 // precision is increased to force the
3635 // first character to be zero, except if a
3636 // zero value is formatted with an
3637 // explicit precision of zero
3638 precision = num_of_digits + 1;
3639 }
3640 }
3641 // zero padding to specified precision?
3642 if (num_of_digits < precision)
3643 number_of_zeros_to_pad = precision - num_of_digits;
3644 }
3645 // zero padding to specified minimal field width?
3646 if (!justify_left && zero_padding)
3647 {
3648 int n = (int)(min_field_width - (str_arg_l
3649 + number_of_zeros_to_pad));
3650 if (n > 0)
3651 number_of_zeros_to_pad += n;
3652 }
3653 break;
3654 }
3655
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003656 case 'f':
3657 case 'F':
3658 case 'e':
3659 case 'E':
3660 case 'g':
3661 case 'G':
3662 {
3663 // Floating point.
3664 double f;
3665 double abs_f;
3666 char format[40];
3667 int l;
3668 int remove_trailing_zeroes = FALSE;
3669
3670 f =
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003671# if defined(FEAT_EVAL)
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003672 tvs != NULL ? tv_float(tvs, &arg_idx) :
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003673# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003674 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3675 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003676 va_arg(ap, double));
3677
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003678 abs_f = f < 0 ? -f : f;
3679
3680 if (fmt_spec == 'g' || fmt_spec == 'G')
3681 {
3682 // Would be nice to use %g directly, but it prints
3683 // "1.0" as "1", we don't want that.
3684 if ((abs_f >= 0.001 && abs_f < 10000000.0)
3685 || abs_f == 0.0)
3686 fmt_spec = ASCII_ISUPPER(fmt_spec) ? 'F' : 'f';
3687 else
3688 fmt_spec = fmt_spec == 'g' ? 'e' : 'E';
3689 remove_trailing_zeroes = TRUE;
3690 }
3691
3692 if ((fmt_spec == 'f' || fmt_spec == 'F') &&
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003693# ifdef VAX
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003694 abs_f > 1.0e38
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003695# else
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003696 abs_f > 1.0e307
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003697# endif
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003698 )
3699 {
3700 // Avoid a buffer overflow
3701 STRCPY(tmp, infinity_str(f > 0.0, fmt_spec,
3702 force_sign, space_for_positive));
3703 str_arg_l = STRLEN(tmp);
3704 zero_padding = 0;
3705 }
3706 else
3707 {
3708 if (isnan(f))
3709 {
3710 // Not a number: nan or NAN
3711 STRCPY(tmp, ASCII_ISUPPER(fmt_spec) ? "NAN"
3712 : "nan");
3713 str_arg_l = 3;
3714 zero_padding = 0;
3715 }
3716 else if (isinf(f))
3717 {
3718 STRCPY(tmp, infinity_str(f > 0.0, fmt_spec,
3719 force_sign, space_for_positive));
3720 str_arg_l = STRLEN(tmp);
3721 zero_padding = 0;
3722 }
3723 else
3724 {
3725 // Regular float number
3726 format[0] = '%';
3727 l = 1;
3728 if (force_sign)
3729 format[l++] = space_for_positive ? ' ' : '+';
3730 if (precision_specified)
3731 {
3732 size_t max_prec = TMP_LEN - 10;
3733
3734 // Make sure we don't get more digits than we
3735 // have room for.
3736 if ((fmt_spec == 'f' || fmt_spec == 'F')
3737 && abs_f > 1.0)
3738 max_prec -= (size_t)log10(abs_f);
3739 if (precision > max_prec)
3740 precision = max_prec;
3741 l += sprintf(format + l, ".%d", (int)precision);
3742 }
3743 format[l] = fmt_spec == 'F' ? 'f' : fmt_spec;
3744 format[l + 1] = NUL;
3745
3746 str_arg_l = sprintf(tmp, format, f);
3747 }
3748
3749 if (remove_trailing_zeroes)
3750 {
3751 int i;
3752 char *tp;
3753
3754 // Using %g or %G: remove superfluous zeroes.
3755 if (fmt_spec == 'f' || fmt_spec == 'F')
3756 tp = tmp + str_arg_l - 1;
3757 else
3758 {
3759 tp = (char *)vim_strchr((char_u *)tmp,
3760 fmt_spec == 'e' ? 'e' : 'E');
3761 if (tp != NULL)
3762 {
3763 // Remove superfluous '+' and leading
3764 // zeroes from the exponent.
3765 if (tp[1] == '+')
3766 {
3767 // Change "1.0e+07" to "1.0e07"
3768 STRMOVE(tp + 1, tp + 2);
3769 --str_arg_l;
3770 }
3771 i = (tp[1] == '-') ? 2 : 1;
3772 while (tp[i] == '0')
3773 {
3774 // Change "1.0e07" to "1.0e7"
3775 STRMOVE(tp + i, tp + i + 1);
3776 --str_arg_l;
3777 }
3778 --tp;
3779 }
3780 }
3781
3782 if (tp != NULL && !precision_specified)
3783 // Remove trailing zeroes, but keep the one
3784 // just after a dot.
3785 while (tp > tmp + 2 && *tp == '0'
3786 && tp[-1] != '.')
3787 {
3788 STRMOVE(tp, tp + 1);
3789 --tp;
3790 --str_arg_l;
3791 }
3792 }
3793 else
3794 {
3795 char *tp;
3796
3797 // Be consistent: some printf("%e") use 1.0e+12
3798 // and some 1.0e+012. Remove one zero in the last
3799 // case.
3800 tp = (char *)vim_strchr((char_u *)tmp,
3801 fmt_spec == 'e' ? 'e' : 'E');
3802 if (tp != NULL && (tp[1] == '+' || tp[1] == '-')
3803 && tp[2] == '0'
3804 && vim_isdigit(tp[3])
3805 && vim_isdigit(tp[4]))
3806 {
3807 STRMOVE(tp + 2, tp + 3);
3808 --str_arg_l;
3809 }
3810 }
3811 }
3812 if (zero_padding && min_field_width > str_arg_l
3813 && (tmp[0] == '-' || force_sign))
3814 {
3815 // padding 0's should be inserted after the sign
3816 number_of_zeros_to_pad = min_field_width - str_arg_l;
3817 zero_padding_insertion_ind = 1;
3818 }
3819 str_arg = tmp;
3820 break;
3821 }
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003822
3823 default:
3824 // unrecognized conversion specifier, keep format string
3825 // as-is
3826 zero_padding = 0; // turn zero padding off for non-numeric
3827 // conversion
3828 justify_left = 1;
3829 min_field_width = 0; // reset flags
3830
3831 // discard the unrecognized conversion, just keep *
3832 // the unrecognized conversion character
3833 str_arg = p;
3834 str_arg_l = 0;
3835 if (*p != NUL)
3836 str_arg_l++; // include invalid conversion specifier
3837 // unchanged if not at end-of-string
3838 break;
3839 }
3840
3841 if (*p != NUL)
3842 p++; // step over the just processed conversion specifier
3843
3844 // insert padding to the left as requested by min_field_width;
3845 // this does not include the zero padding in case of numerical
3846 // conversions
3847 if (!justify_left)
3848 {
3849 // left padding with blank or zero
3850 int pn = (int)(min_field_width - (str_arg_l + number_of_zeros_to_pad));
3851
3852 if (pn > 0)
3853 {
3854 if (str_l < str_m)
3855 {
3856 size_t avail = str_m - str_l;
3857
3858 vim_memset(str + str_l, zero_padding ? '0' : ' ',
3859 (size_t)pn > avail ? avail
3860 : (size_t)pn);
3861 }
3862 str_l += pn;
3863 }
3864 }
3865
3866 // zero padding as requested by the precision or by the minimal
3867 // field width for numeric conversions required?
3868 if (number_of_zeros_to_pad == 0)
3869 {
3870 // will not copy first part of numeric right now, *
3871 // force it to be copied later in its entirety
3872 zero_padding_insertion_ind = 0;
3873 }
3874 else
3875 {
3876 // insert first part of numerics (sign or '0x') before zero
3877 // padding
3878 int zn = (int)zero_padding_insertion_ind;
3879
3880 if (zn > 0)
3881 {
3882 if (str_l < str_m)
3883 {
3884 size_t avail = str_m - str_l;
3885
3886 mch_memmove(str + str_l, str_arg,
3887 (size_t)zn > avail ? avail
3888 : (size_t)zn);
3889 }
3890 str_l += zn;
3891 }
3892
3893 // insert zero padding as requested by the precision or min
3894 // field width
3895 zn = (int)number_of_zeros_to_pad;
3896 if (zn > 0)
3897 {
3898 if (str_l < str_m)
3899 {
3900 size_t avail = str_m - str_l;
3901
3902 vim_memset(str + str_l, '0',
3903 (size_t)zn > avail ? avail
3904 : (size_t)zn);
3905 }
3906 str_l += zn;
3907 }
3908 }
3909
3910 // insert formatted string
3911 // (or as-is conversion specifier for unknown conversions)
3912 {
3913 int sn = (int)(str_arg_l - zero_padding_insertion_ind);
3914
3915 if (sn > 0)
3916 {
3917 if (str_l < str_m)
3918 {
3919 size_t avail = str_m - str_l;
3920
3921 mch_memmove(str + str_l,
3922 str_arg + zero_padding_insertion_ind,
3923 (size_t)sn > avail ? avail : (size_t)sn);
3924 }
3925 str_l += sn;
3926 }
3927 }
3928
3929 // insert right padding
3930 if (justify_left)
3931 {
3932 // right blank padding to the field width
3933 int pn = (int)(min_field_width
3934 - (str_arg_l + number_of_zeros_to_pad));
3935
3936 if (pn > 0)
3937 {
3938 if (str_l < str_m)
3939 {
3940 size_t avail = str_m - str_l;
3941
3942 vim_memset(str + str_l, ' ',
3943 (size_t)pn > avail ? avail
3944 : (size_t)pn);
3945 }
3946 str_l += pn;
3947 }
3948 }
3949 vim_free(tofree);
3950 }
3951 }
3952
3953 if (str_m > 0)
3954 {
3955 // make sure the string is nul-terminated even at the expense of
3956 // overwriting the last character (shouldn't happen, but just in case)
3957 //
3958 str[str_l <= str_m - 1 ? str_l : str_m - 1] = '\0';
3959 }
3960
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003961 if (tvs != NULL && tvs[num_posarg != 0 ? num_posarg : arg_idx - 1].v_type != VAR_UNKNOWN)
Bram Moolenaar677658a2022-01-05 16:09:06 +00003962 emsg(_(e_too_many_arguments_to_printf));
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003963
Christ van Willegenc35fc032024-03-14 18:30:41 +01003964error:
K.Takata4c215ec2023-08-26 18:05:08 +02003965 vim_free((char*)ap_types);
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003966 va_end(ap);
3967
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003968 // Return the number of characters formatted (excluding trailing nul
3969 // character), that is, the number of characters that would have been
3970 // written to the buffer if it were large enough.
3971 return (int)str_l;
3972}
3973
3974#endif // PROTO