blob: 9e39c45acbb19372b4aaae25276356254042dd8b [file] [log] [blame]
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001/* vi:set ts=8 sts=4 sw=4 noet:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * strings.c: string manipulation functions
12 */
13
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +020014#define USING_FLOAT_STUFF
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020015#include "vim.h"
16
17/*
18 * Copy "string" into newly allocated memory.
19 */
20 char_u *
21vim_strsave(char_u *string)
22{
23 char_u *p;
24 size_t len;
25
26 len = STRLEN(string) + 1;
27 p = alloc(len);
28 if (p != NULL)
29 mch_memmove(p, string, len);
30 return p;
31}
32
33/*
34 * Copy up to "len" bytes of "string" into newly allocated memory and
35 * terminate with a NUL.
36 * The allocated memory always has size "len + 1", also when "string" is
37 * shorter.
38 */
39 char_u *
40vim_strnsave(char_u *string, size_t len)
41{
42 char_u *p;
43
44 p = alloc(len + 1);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +000045 if (p == NULL)
46 return NULL;
47
48 STRNCPY(p, string, len);
49 p[len] = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020050 return p;
51}
52
53/*
54 * Same as vim_strsave(), but any characters found in esc_chars are preceded
55 * by a backslash.
56 */
57 char_u *
58vim_strsave_escaped(char_u *string, char_u *esc_chars)
59{
60 return vim_strsave_escaped_ext(string, esc_chars, '\\', FALSE);
61}
62
63/*
64 * Same as vim_strsave_escaped(), but when "bsl" is TRUE also escape
65 * characters where rem_backslash() would remove the backslash.
66 * Escape the characters with "cc".
67 */
68 char_u *
69vim_strsave_escaped_ext(
70 char_u *string,
71 char_u *esc_chars,
72 int cc,
73 int bsl)
74{
75 char_u *p;
76 char_u *p2;
77 char_u *escaped_string;
78 unsigned length;
79 int l;
80
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +020081 // First count the number of backslashes required.
82 // Then allocate the memory and insert them.
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020083 length = 1; // count the trailing NUL
84 for (p = string; *p; p++)
85 {
86 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
87 {
88 length += l; // count a multibyte char
89 p += l - 1;
90 continue;
91 }
92 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
93 ++length; // count a backslash
94 ++length; // count an ordinary char
95 }
96 escaped_string = alloc(length);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +000097 if (escaped_string == NULL)
98 return NULL;
99 p2 = escaped_string;
100 for (p = string; *p; p++)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200101 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000102 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200103 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000104 mch_memmove(p2, p, (size_t)l);
105 p2 += l;
106 p += l - 1; // skip multibyte char
107 continue;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200108 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000109 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
110 *p2++ = cc;
111 *p2++ = *p;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200112 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000113 *p2 = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200114 return escaped_string;
115}
116
117/*
118 * Return TRUE when 'shell' has "csh" in the tail.
119 */
120 int
121csh_like_shell(void)
122{
123 return (strstr((char *)gettail(p_sh), "csh") != NULL);
124}
125
126/*
Jason Cox6e823512021-08-29 12:36:49 +0200127 * Return TRUE when 'shell' has "fish" in the tail.
128 */
Dominique Pellede05ae72021-08-30 19:57:34 +0200129 static int
Jason Cox6e823512021-08-29 12:36:49 +0200130fish_like_shell(void)
131{
132 return (strstr((char *)gettail(p_sh), "fish") != NULL);
133}
134
135/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200136 * Escape "string" for use as a shell argument with system().
137 * This uses single quotes, except when we know we need to use double quotes
138 * (MS-DOS and MS-Windows not using PowerShell and without 'shellslash' set).
139 * PowerShell also uses a novel escaping for enclosed single quotes - double
140 * them up.
141 * Escape a newline, depending on the 'shell' option.
142 * When "do_special" is TRUE also replace "!", "%", "#" and things starting
143 * with "<" like "<cfile>".
144 * When "do_newline" is FALSE do not escape newline unless it is csh shell.
145 * Returns the result in allocated memory, NULL if we have run out.
146 */
147 char_u *
148vim_strsave_shellescape(char_u *string, int do_special, int do_newline)
149{
150 unsigned length;
151 char_u *p;
152 char_u *d;
153 char_u *escaped_string;
Mike Williams51024bb2024-05-30 07:46:30 +0200154 size_t l;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200155 int csh_like;
Jason Cox6e823512021-08-29 12:36:49 +0200156 int fish_like;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200157 char_u *shname;
158 int powershell;
159# ifdef MSWIN
160 int double_quotes;
161# endif
162
163 // Only csh and similar shells expand '!' within single quotes. For sh and
164 // the like we must not put a backslash before it, it will be taken
165 // literally. If do_special is set the '!' will be escaped twice.
166 // Csh also needs to have "\n" escaped twice when do_special is set.
167 csh_like = csh_like_shell();
168
Jason Cox6e823512021-08-29 12:36:49 +0200169 // Fish shell uses '\' as an escape character within single quotes, so '\'
170 // itself must be escaped to get a literal '\'.
171 fish_like = fish_like_shell();
172
Dominique Pelleaf4a61a2021-12-27 17:21:41 +0000173 // PowerShell uses its own version for quoting single quotes
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200174 shname = gettail(p_sh);
175 powershell = strstr((char *)shname, "pwsh") != NULL;
176# ifdef MSWIN
177 powershell = powershell || strstr((char *)shname, "powershell") != NULL;
178 // PowerShell only accepts single quotes so override shellslash.
179 double_quotes = !powershell && !p_ssl;
180# endif
181
182 // First count the number of extra bytes required.
183 length = (unsigned)STRLEN(string) + 3; // two quotes and a trailing NUL
184 for (p = string; *p != NUL; MB_PTR_ADV(p))
185 {
186# ifdef MSWIN
187 if (double_quotes)
188 {
189 if (*p == '"')
190 ++length; // " -> ""
191 }
192 else
193# endif
194 if (*p == '\'')
195 {
196 if (powershell)
197 length +=2; // ' => ''
198 else
199 length += 3; // ' => '\''
200 }
201 if ((*p == '\n' && (csh_like || do_newline))
202 || (*p == '!' && (csh_like || do_special)))
203 {
204 ++length; // insert backslash
205 if (csh_like && do_special)
206 ++length; // insert backslash
207 }
208 if (do_special && find_cmdline_var(p, &l) >= 0)
209 {
210 ++length; // insert backslash
211 p += l - 1;
212 }
Jason Cox6e823512021-08-29 12:36:49 +0200213 if (*p == '\\' && fish_like)
214 ++length; // insert backslash
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200215 }
216
217 // Allocate memory for the result and fill it.
218 escaped_string = alloc(length);
219 if (escaped_string != NULL)
220 {
221 d = escaped_string;
222
223 // add opening quote
224# ifdef MSWIN
225 if (double_quotes)
226 *d++ = '"';
227 else
228# endif
229 *d++ = '\'';
230
231 for (p = string; *p != NUL; )
232 {
233# ifdef MSWIN
234 if (double_quotes)
235 {
236 if (*p == '"')
237 {
238 *d++ = '"';
239 *d++ = '"';
240 ++p;
241 continue;
242 }
243 }
244 else
245# endif
246 if (*p == '\'')
247 {
248 if (powershell)
249 {
250 *d++ = '\'';
251 *d++ = '\'';
252 }
253 else
254 {
255 *d++ = '\'';
256 *d++ = '\\';
257 *d++ = '\'';
258 *d++ = '\'';
259 }
260 ++p;
261 continue;
262 }
263 if ((*p == '\n' && (csh_like || do_newline))
264 || (*p == '!' && (csh_like || do_special)))
265 {
266 *d++ = '\\';
267 if (csh_like && do_special)
268 *d++ = '\\';
269 *d++ = *p++;
270 continue;
271 }
zeertzjq88c8c542024-05-30 19:27:25 +0200272 if (do_special && find_cmdline_var(p, &l) >= 0)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200273 {
274 *d++ = '\\'; // insert backslash
zeertzjq88c8c542024-05-30 19:27:25 +0200275 memcpy(d, p, l); // copy the var
276 d += l;
277 p += l;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200278 continue;
279 }
Jason Cox6e823512021-08-29 12:36:49 +0200280 if (*p == '\\' && fish_like)
281 {
282 *d++ = '\\';
283 *d++ = *p++;
Bram Moolenaar66315972021-09-01 14:31:51 +0200284 continue;
Jason Cox6e823512021-08-29 12:36:49 +0200285 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200286
287 MB_COPY_CHAR(p, d);
288 }
289
290 // add terminating quote and finish with a NUL
291# ifdef MSWIN
292 if (double_quotes)
293 *d++ = '"';
294 else
295# endif
296 *d++ = '\'';
297 *d = NUL;
298 }
299
300 return escaped_string;
301}
302
303/*
304 * Like vim_strsave(), but make all characters uppercase.
305 * This uses ASCII lower-to-upper case translation, language independent.
306 */
307 char_u *
308vim_strsave_up(char_u *string)
309{
310 char_u *p1;
311
312 p1 = vim_strsave(string);
313 vim_strup(p1);
314 return p1;
315}
316
317/*
318 * Like vim_strnsave(), but make all characters uppercase.
319 * This uses ASCII lower-to-upper case translation, language independent.
320 */
321 char_u *
322vim_strnsave_up(char_u *string, size_t len)
323{
324 char_u *p1;
325
326 p1 = vim_strnsave(string, len);
327 vim_strup(p1);
328 return p1;
329}
330
331/*
332 * ASCII lower-to-upper case translation, language independent.
333 */
334 void
335vim_strup(
336 char_u *p)
337{
338 char_u *p2;
339 int c;
340
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000341 if (p == NULL)
342 return;
343
344 p2 = p;
345 while ((c = *p2) != NUL)
346 *p2++ = (c < 'a' || c > 'z') ? c : (c - 0x20);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200347}
348
349#if defined(FEAT_EVAL) || defined(FEAT_SPELL) || defined(PROTO)
350/*
351 * Make string "s" all upper-case and return it in allocated memory.
352 * Handles multi-byte characters as well as possible.
353 * Returns NULL when out of memory.
354 */
355 static char_u *
356strup_save(char_u *orig)
357{
358 char_u *p;
359 char_u *res;
360
361 res = p = vim_strsave(orig);
362
363 if (res != NULL)
364 while (*p != NUL)
365 {
366 int l;
367
368 if (enc_utf8)
369 {
370 int c, uc;
371 int newl;
372 char_u *s;
373
374 c = utf_ptr2char(p);
375 l = utf_ptr2len(p);
376 if (c == 0)
377 {
378 // overlong sequence, use only the first byte
379 c = *p;
380 l = 1;
381 }
382 uc = utf_toupper(c);
383
384 // Reallocate string when byte count changes. This is rare,
385 // thus it's OK to do another malloc()/free().
386 newl = utf_char2len(uc);
387 if (newl != l)
388 {
389 s = alloc(STRLEN(res) + 1 + newl - l);
390 if (s == NULL)
391 {
392 vim_free(res);
393 return NULL;
394 }
395 mch_memmove(s, res, p - res);
396 STRCPY(s + (p - res) + newl, p + l);
397 p = s + (p - res);
398 vim_free(res);
399 res = s;
400 }
401
402 utf_char2bytes(uc, p);
403 p += newl;
404 }
405 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
406 p += l; // skip multi-byte character
407 else
408 {
409 *p = TOUPPER_LOC(*p); // note that toupper() can be a macro
410 p++;
411 }
412 }
413
414 return res;
415}
416
417/*
418 * Make string "s" all lower-case and return it in allocated memory.
419 * Handles multi-byte characters as well as possible.
420 * Returns NULL when out of memory.
421 */
422 char_u *
423strlow_save(char_u *orig)
424{
425 char_u *p;
426 char_u *res;
427
428 res = p = vim_strsave(orig);
429
430 if (res != NULL)
431 while (*p != NUL)
432 {
433 int l;
434
435 if (enc_utf8)
436 {
437 int c, lc;
438 int newl;
439 char_u *s;
440
441 c = utf_ptr2char(p);
442 l = utf_ptr2len(p);
443 if (c == 0)
444 {
445 // overlong sequence, use only the first byte
446 c = *p;
447 l = 1;
448 }
449 lc = utf_tolower(c);
450
451 // Reallocate string when byte count changes. This is rare,
452 // thus it's OK to do another malloc()/free().
453 newl = utf_char2len(lc);
454 if (newl != l)
455 {
456 s = alloc(STRLEN(res) + 1 + newl - l);
457 if (s == NULL)
458 {
459 vim_free(res);
460 return NULL;
461 }
462 mch_memmove(s, res, p - res);
463 STRCPY(s + (p - res) + newl, p + l);
464 p = s + (p - res);
465 vim_free(res);
466 res = s;
467 }
468
469 utf_char2bytes(lc, p);
470 p += newl;
471 }
472 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
473 p += l; // skip multi-byte character
474 else
475 {
476 *p = TOLOWER_LOC(*p); // note that tolower() can be a macro
477 p++;
478 }
479 }
480
481 return res;
482}
483#endif
484
485/*
486 * delete spaces at the end of a string
487 */
488 void
489del_trailing_spaces(char_u *ptr)
490{
491 char_u *q;
492
493 q = ptr + STRLEN(ptr);
494 while (--q > ptr && VIM_ISWHITE(q[0]) && q[-1] != '\\' && q[-1] != Ctrl_V)
495 *q = NUL;
496}
497
498/*
499 * Like strncpy(), but always terminate the result with one NUL.
500 * "to" must be "len + 1" long!
501 */
502 void
503vim_strncpy(char_u *to, char_u *from, size_t len)
504{
505 STRNCPY(to, from, len);
506 to[len] = NUL;
507}
508
509/*
510 * Like strcat(), but make sure the result fits in "tosize" bytes and is
511 * always NUL terminated. "from" and "to" may overlap.
512 */
513 void
514vim_strcat(char_u *to, char_u *from, size_t tosize)
515{
516 size_t tolen = STRLEN(to);
517 size_t fromlen = STRLEN(from);
518
519 if (tolen + fromlen + 1 > tosize)
520 {
521 mch_memmove(to + tolen, from, tosize - tolen - 1);
522 to[tosize - 1] = NUL;
523 }
524 else
525 mch_memmove(to + tolen, from, fromlen + 1);
526}
527
Bram Moolenaarc32949b2023-01-04 15:56:51 +0000528/*
529 * A version of strlen() that has a maximum length.
530 */
531 size_t
532vim_strlen_maxlen(char *s, size_t maxlen)
533{
534 size_t i;
535 for (i = 0; i < maxlen; ++i)
536 if (s[i] == NUL)
537 break;
538 return i;
539}
540
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200541#if (!defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)) || defined(PROTO)
542/*
543 * Compare two strings, ignoring case, using current locale.
544 * Doesn't work for multi-byte characters.
545 * return 0 for match, < 0 for smaller, > 0 for bigger
546 */
547 int
548vim_stricmp(char *s1, char *s2)
549{
550 int i;
551
552 for (;;)
553 {
554 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
555 if (i != 0)
556 return i; // this character different
557 if (*s1 == NUL)
558 break; // strings match until NUL
559 ++s1;
560 ++s2;
561 }
562 return 0; // strings match
563}
564#endif
565
566#if (!defined(HAVE_STRNCASECMP) && !defined(HAVE_STRNICMP)) || defined(PROTO)
567/*
568 * Compare two strings, for length "len", ignoring case, using current locale.
569 * Doesn't work for multi-byte characters.
570 * return 0 for match, < 0 for smaller, > 0 for bigger
571 */
572 int
573vim_strnicmp(char *s1, char *s2, size_t len)
574{
575 int i;
576
577 while (len > 0)
578 {
579 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
580 if (i != 0)
581 return i; // this character different
582 if (*s1 == NUL)
583 break; // strings match until NUL
584 ++s1;
585 ++s2;
586 --len;
587 }
588 return 0; // strings match
589}
590#endif
591
592/*
Christian Brabandt84e31752024-09-02 09:59:18 +0200593 * Compare two ASCII strings, for length "len", ignoring case, ignoring locale
594 * (mostly matters for turkish locale where i I might be different).
595 * return 0 for match, < 0 for smaller, > 0 for bigger
596 */
597 int
598vim_strnicmp_asc(char *s1, char *s2, size_t len)
599{
John Marriottc847c122024-11-24 14:09:40 +0100600 int i = 0;
Christian Brabandt84e31752024-09-02 09:59:18 +0200601 int save_cmp_flags = cmp_flags;
602
603 cmp_flags |= CMP_KEEPASCII; // compare by ASCII value, ignoring locale
604 while (len > 0)
605 {
606 i = vim_tolower(*s1) - vim_tolower(*s2);
607 if (i != 0)
608 break; // this character is different
609 if (*s1 == NUL)
610 break; // strings match until NUL
611 ++s1;
612 ++s2;
613 --len;
614 }
615 cmp_flags = save_cmp_flags;
616 return i;
617}
618
619/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200620 * Search for first occurrence of "c" in "string".
621 * Version of strchr() that handles unsigned char strings with characters from
622 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
623 * end of the string.
624 */
Bram Moolenaarc32949b2023-01-04 15:56:51 +0000625 char_u *
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200626vim_strchr(char_u *string, int c)
627{
628 char_u *p;
629 int b;
630
631 p = string;
632 if (enc_utf8 && c >= 0x80)
633 {
634 while (*p != NUL)
635 {
636 int l = utfc_ptr2len(p);
637
638 // Avoid matching an illegal byte here.
639 if (utf_ptr2char(p) == c && l > 1)
640 return p;
641 p += l;
642 }
643 return NULL;
644 }
645 if (enc_dbcs != 0 && c > 255)
646 {
647 int n2 = c & 0xff;
648
649 c = ((unsigned)c >> 8) & 0xff;
650 while ((b = *p) != NUL)
651 {
652 if (b == c && p[1] == n2)
653 return p;
654 p += (*mb_ptr2len)(p);
655 }
656 return NULL;
657 }
658 if (has_mbyte)
659 {
660 while ((b = *p) != NUL)
661 {
662 if (b == c)
663 return p;
664 p += (*mb_ptr2len)(p);
665 }
666 return NULL;
667 }
668 while ((b = *p) != NUL)
669 {
670 if (b == c)
671 return p;
672 ++p;
673 }
674 return NULL;
675}
676
677/*
678 * Version of strchr() that only works for bytes and handles unsigned char
679 * strings with characters above 128 correctly. It also doesn't return a
680 * pointer to the NUL at the end of the string.
681 */
682 char_u *
683vim_strbyte(char_u *string, int c)
684{
685 char_u *p = string;
686
687 while (*p != NUL)
688 {
689 if (*p == c)
690 return p;
691 ++p;
692 }
693 return NULL;
694}
695
696/*
697 * Search for last occurrence of "c" in "string".
698 * Version of strrchr() that handles unsigned char strings with characters from
699 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
700 * end of the string.
701 * Return NULL if not found.
702 * Does not handle multi-byte char for "c"!
703 */
704 char_u *
705vim_strrchr(char_u *string, int c)
706{
707 char_u *retval = NULL;
708 char_u *p = string;
709
710 while (*p)
711 {
712 if (*p == c)
713 retval = p;
714 MB_PTR_ADV(p);
715 }
716 return retval;
717}
718
719/*
720 * Vim's version of strpbrk(), in case it's missing.
721 * Don't generate a prototype for this, causes problems when it's not used.
722 */
723#ifndef PROTO
724# ifndef HAVE_STRPBRK
725# ifdef vim_strpbrk
726# undef vim_strpbrk
727# endif
728 char_u *
729vim_strpbrk(char_u *s, char_u *charset)
730{
731 while (*s)
732 {
733 if (vim_strchr(charset, *s) != NULL)
734 return s;
735 MB_PTR_ADV(s);
736 }
737 return NULL;
738}
739# endif
740#endif
741
742/*
743 * Sort an array of strings.
744 */
745static int sort_compare(const void *s1, const void *s2);
746
747 static int
748sort_compare(const void *s1, const void *s2)
749{
750 return STRCMP(*(char **)s1, *(char **)s2);
751}
752
753 void
754sort_strings(
755 char_u **files,
756 int count)
757{
758 qsort((void *)files, (size_t)count, sizeof(char_u *), sort_compare);
759}
760
761#if defined(FEAT_QUICKFIX) || defined(FEAT_SPELL) || defined(PROTO)
762/*
763 * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
764 * When "s" is NULL FALSE is returned.
765 */
766 int
767has_non_ascii(char_u *s)
768{
769 char_u *p;
770
771 if (s != NULL)
772 for (p = s; *p != NUL; ++p)
773 if (*p >= 128)
774 return TRUE;
775 return FALSE;
776}
777#endif
778
779/*
780 * Concatenate two strings and return the result in allocated memory.
781 * Returns NULL when out of memory.
782 */
783 char_u *
784concat_str(char_u *str1, char_u *str2)
785{
786 char_u *dest;
787 size_t l = str1 == NULL ? 0 : STRLEN(str1);
788
789 dest = alloc(l + (str2 == NULL ? 0 : STRLEN(str2)) + 1L);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000790 if (dest == NULL)
791 return NULL;
792 if (str1 == NULL)
793 *dest = NUL;
794 else
795 STRCPY(dest, str1);
796 if (str2 != NULL)
797 STRCPY(dest + l, str2);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200798 return dest;
799}
800
zeertzjq4dd266c2023-08-19 11:35:03 +0200801#if defined(FEAT_EVAL) || defined(FEAT_RIGHTLEFT) || defined(PROTO)
802/*
803 * Reverse text into allocated memory.
804 * Returns the allocated string, NULL when out of memory.
805 */
806 char_u *
807reverse_text(char_u *s)
808{
809 size_t len = STRLEN(s);
810 char_u *rev = alloc(len + 1);
811 if (rev == NULL)
812 return NULL;
813
814 for (size_t s_i = 0, rev_i = len; s_i < len; ++s_i)
815 {
816 if (has_mbyte)
817 {
818 int mb_len = (*mb_ptr2len)(s + s_i);
819 rev_i -= mb_len;
820 mch_memmove(rev + rev_i, s + s_i, mb_len);
821 s_i += mb_len - 1;
822 }
823 else
824 rev[--rev_i] = s[s_i];
825 }
826 rev[len] = NUL;
827 return rev;
828}
829#endif
830
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200831#if defined(FEAT_EVAL) || defined(PROTO)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200832/*
833 * Return string "str" in ' quotes, doubling ' characters.
834 * If "str" is NULL an empty string is assumed.
835 * If "function" is TRUE make it function('string').
836 */
837 char_u *
838string_quote(char_u *str, int function)
839{
840 unsigned len;
841 char_u *p, *r, *s;
842
843 len = (function ? 13 : 3);
844 if (str != NULL)
845 {
846 len += (unsigned)STRLEN(str);
847 for (p = str; *p != NUL; MB_PTR_ADV(p))
848 if (*p == '\'')
849 ++len;
850 }
851 s = r = alloc(len);
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000852 if (r == NULL)
853 return NULL;
854
855 if (function)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200856 {
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000857 STRCPY(r, "function('");
858 r += 10;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200859 }
Yegappan Lakshmanan6ec66662023-01-23 20:46:21 +0000860 else
861 *r++ = '\'';
862 if (str != NULL)
863 for (p = str; *p != NUL; )
864 {
865 if (*p == '\'')
866 *r++ = '\'';
867 MB_COPY_CHAR(p, r);
868 }
869 *r++ = '\'';
870 if (function)
871 *r++ = ')';
872 *r++ = NUL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200873 return s;
874}
875
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000876/*
877 * Count the number of times "needle" occurs in string "haystack". Case is
878 * ignored if "ic" is TRUE.
879 */
880 long
881string_count(char_u *haystack, char_u *needle, int ic)
882{
883 long n = 0;
884 char_u *p = haystack;
885 char_u *next;
886
887 if (p == NULL || needle == NULL || *needle == NUL)
888 return 0;
889
890 if (ic)
891 {
892 size_t len = STRLEN(needle);
893
894 while (*p != NUL)
895 {
896 if (MB_STRNICMP(p, needle, len) == 0)
897 {
898 ++n;
899 p += len;
900 }
901 else
902 MB_PTR_ADV(p);
903 }
904 }
905 else
906 while ((next = (char_u *)strstr((char *)p, (char *)needle)) != NULL)
907 {
908 ++n;
909 p = next + STRLEN(needle);
910 }
911
912 return n;
913}
914
915/*
916 * Make a typval_T of the first character of "input" and store it in "output".
917 * Return OK or FAIL.
918 */
919 static int
920copy_first_char_to_tv(char_u *input, typval_T *output)
921{
922 char_u buf[MB_MAXBYTES + 1];
923 int len;
924
925 if (input == NULL || output == NULL)
926 return FAIL;
927
928 len = has_mbyte ? mb_ptr2len(input) : 1;
929 STRNCPY(buf, input, len);
930 buf[len] = NUL;
931 output->v_type = VAR_STRING;
932 output->vval.v_string = vim_strsave(buf);
933
934 return output->vval.v_string == NULL ? FAIL : OK;
935}
936
937/*
938 * Implementation of map() and filter() for a String. Apply "expr" to every
939 * character in string "str" and return the result in "rettv".
940 */
941 void
942string_filter_map(
943 char_u *str,
944 filtermap_T filtermap,
945 typval_T *expr,
946 typval_T *rettv)
947{
948 char_u *p;
949 typval_T tv;
950 garray_T ga;
951 int len = 0;
952 int idx = 0;
953 int rem;
Bram Moolenaar82418262022-09-28 16:16:15 +0100954 typval_T newtv;
955 funccall_T *fc;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000956
957 rettv->v_type = VAR_STRING;
958 rettv->vval.v_string = NULL;
959
960 // set_vim_var_nr() doesn't set the type
961 set_vim_var_type(VV_KEY, VAR_NUMBER);
962
zeertzjqe7d49462023-04-16 20:53:55 +0100963 // Create one funccall_T for all eval_expr_typval() calls.
Bram Moolenaar82418262022-09-28 16:16:15 +0100964 fc = eval_expr_get_funccal(expr, &newtv);
965
Bram Moolenaar04935fb2022-01-08 16:19:22 +0000966 ga_init2(&ga, sizeof(char), 80);
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000967 for (p = str; *p != NUL; p += len)
968 {
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000969 if (copy_first_char_to_tv(p, &tv) == FAIL)
970 break;
971 len = (int)STRLEN(tv.vval.v_string);
972
973 set_vim_var_nr(VV_KEY, idx);
Bram Moolenaar82418262022-09-28 16:16:15 +0100974 if (filter_map_one(&tv, expr, filtermap, fc, &newtv, &rem) == FAIL
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000975 || did_emsg)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000976 {
977 clear_tv(&newtv);
978 clear_tv(&tv);
979 break;
980 }
Ernie Raele79e2072024-01-13 11:47:33 +0100981 if (filtermap == FILTERMAP_MAP || filtermap == FILTERMAP_MAPNEW)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000982 {
983 if (newtv.v_type != VAR_STRING)
984 {
985 clear_tv(&newtv);
986 clear_tv(&tv);
Bram Moolenaare70cec92022-01-01 14:25:55 +0000987 emsg(_(e_string_required));
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000988 break;
989 }
990 else
991 ga_concat(&ga, newtv.vval.v_string);
992 }
Ernie Raele79e2072024-01-13 11:47:33 +0100993 else if (filtermap == FILTERMAP_FOREACH || !rem)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000994 ga_concat(&ga, tv.vval.v_string);
995
996 clear_tv(&newtv);
997 clear_tv(&tv);
998
999 ++idx;
1000 }
1001 ga_append(&ga, NUL);
1002 rettv->vval.v_string = ga.ga_data;
Bram Moolenaar82418262022-09-28 16:16:15 +01001003 if (fc != NULL)
1004 remove_funccal();
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001005}
1006
1007/*
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001008 * Implementation of reduce() for String "argvars[0]" using the function "expr"
1009 * starting with the optional initial value "argvars[2]" and return the result
1010 * in "rettv".
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001011 */
1012 void
1013string_reduce(
1014 typval_T *argvars,
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001015 typval_T *expr,
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001016 typval_T *rettv)
1017{
1018 char_u *p = tv_get_string(&argvars[0]);
1019 int len;
1020 typval_T argv[3];
1021 int r;
1022 int called_emsg_start = called_emsg;
Bram Moolenaar82418262022-09-28 16:16:15 +01001023 funccall_T *fc;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001024
1025 if (argvars[2].v_type == VAR_UNKNOWN)
1026 {
1027 if (*p == NUL)
1028 {
Bram Moolenaare70cec92022-01-01 14:25:55 +00001029 semsg(_(e_reduce_of_an_empty_str_with_no_initial_value), "String");
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001030 return;
1031 }
1032 if (copy_first_char_to_tv(p, rettv) == FAIL)
1033 return;
1034 p += STRLEN(rettv->vval.v_string);
1035 }
Yegappan Lakshmanan8deb2b32022-09-02 15:15:27 +01001036 else if (check_for_string_arg(argvars, 2) == FAIL)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001037 return;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001038 else
1039 copy_tv(&argvars[2], rettv);
1040
zeertzjqe7d49462023-04-16 20:53:55 +01001041 // Create one funccall_T for all eval_expr_typval() calls.
Bram Moolenaar82418262022-09-28 16:16:15 +01001042 fc = eval_expr_get_funccal(expr, rettv);
1043
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001044 for ( ; *p != NUL; p += len)
1045 {
1046 argv[0] = *rettv;
1047 if (copy_first_char_to_tv(p, &argv[1]) == FAIL)
1048 break;
1049 len = (int)STRLEN(argv[1].vval.v_string);
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001050
zeertzjqad0c4422023-08-17 22:15:47 +02001051 r = eval_expr_typval(expr, TRUE, argv, 2, fc, rettv);
Bram Moolenaarf1c60d42022-09-22 17:07:00 +01001052
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001053 clear_tv(&argv[0]);
1054 clear_tv(&argv[1]);
1055 if (r == FAIL || called_emsg != called_emsg_start)
1056 return;
1057 }
Bram Moolenaar82418262022-09-28 16:16:15 +01001058
1059 if (fc != NULL)
1060 remove_funccal();
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001061}
1062
Bram Moolenaare4098452023-05-07 18:53:49 +01001063/*
1064 * Implementation of "byteidx()" and "byteidxcomp()" functions
1065 */
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001066 static void
Dominique Pellé0268ff32024-07-28 21:12:20 +02001067byteidx_common(typval_T *argvars, typval_T *rettv, int comp)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001068{
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001069 rettv->vval.v_number = -1;
1070
1071 if (in_vim9script()
1072 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001073 || check_for_number_arg(argvars, 1) == FAIL
1074 || check_for_opt_bool_arg(argvars, 2) == FAIL))
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001075 return;
1076
Christian Brabandt67672ef2023-04-24 21:09:54 +01001077 char_u *str = tv_get_string_chk(&argvars[0]);
1078 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001079 if (str == NULL || idx < 0)
1080 return;
1081
Christian Brabandt67672ef2023-04-24 21:09:54 +01001082 varnumber_T utf16idx = FALSE;
1083 if (argvars[2].v_type != VAR_UNKNOWN)
1084 {
zeertzjq8cf51372023-05-08 15:31:38 +01001085 int error = FALSE;
1086 utf16idx = tv_get_bool_chk(&argvars[2], &error);
1087 if (error)
1088 return;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001089 if (utf16idx < 0 || utf16idx > 1)
1090 {
zeertzjq8cf51372023-05-08 15:31:38 +01001091 semsg(_(e_using_number_as_bool_nr), utf16idx);
Christian Brabandt67672ef2023-04-24 21:09:54 +01001092 return;
1093 }
1094 }
1095
1096 int (*ptr2len)(char_u *);
1097 if (enc_utf8 && comp)
1098 ptr2len = utf_ptr2len;
1099 else
1100 ptr2len = mb_ptr2len;
1101
1102 char_u *t = str;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001103 for ( ; idx > 0; idx--)
1104 {
1105 if (*t == NUL) // EOL reached
1106 return;
Christian Brabandt67672ef2023-04-24 21:09:54 +01001107 if (utf16idx)
1108 {
1109 int clen = ptr2len(t);
1110 int c = (clen > 1) ? utf_ptr2char(t) : *t;
1111 if (c > 0xFFFF)
1112 idx--;
1113 }
1114 if (idx > 0)
1115 t += ptr2len(t);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001116 }
1117 rettv->vval.v_number = (varnumber_T)(t - str);
1118}
1119
1120/*
1121 * "byteidx()" function
1122 */
1123 void
1124f_byteidx(typval_T *argvars, typval_T *rettv)
1125{
Bram Moolenaare4098452023-05-07 18:53:49 +01001126 byteidx_common(argvars, rettv, FALSE);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001127}
1128
1129/*
1130 * "byteidxcomp()" function
1131 */
1132 void
1133f_byteidxcomp(typval_T *argvars, typval_T *rettv)
1134{
Bram Moolenaare4098452023-05-07 18:53:49 +01001135 byteidx_common(argvars, rettv, TRUE);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001136}
1137
1138/*
1139 * "charidx()" function
1140 */
1141 void
1142f_charidx(typval_T *argvars, typval_T *rettv)
1143{
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001144 rettv->vval.v_number = -1;
1145
Christian Brabandt67672ef2023-04-24 21:09:54 +01001146 if (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001147 || check_for_number_arg(argvars, 1) == FAIL
Christian Brabandt67672ef2023-04-24 21:09:54 +01001148 || check_for_opt_bool_arg(argvars, 2) == FAIL
1149 || (argvars[2].v_type != VAR_UNKNOWN
1150 && check_for_opt_bool_arg(argvars, 3) == FAIL))
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001151 return;
1152
Christian Brabandt67672ef2023-04-24 21:09:54 +01001153 char_u *str = tv_get_string_chk(&argvars[0]);
1154 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001155 if (str == NULL || idx < 0)
1156 return;
1157
Christian Brabandt67672ef2023-04-24 21:09:54 +01001158 varnumber_T countcc = FALSE;
1159 varnumber_T utf16idx = FALSE;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001160 if (argvars[2].v_type != VAR_UNKNOWN)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001161 {
Christian Brabandt67672ef2023-04-24 21:09:54 +01001162 countcc = tv_get_bool(&argvars[2]);
1163 if (argvars[3].v_type != VAR_UNKNOWN)
1164 utf16idx = tv_get_bool(&argvars[3]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001165 }
1166
Christian Brabandt67672ef2023-04-24 21:09:54 +01001167 int (*ptr2len)(char_u *);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001168 if (enc_utf8 && countcc)
1169 ptr2len = utf_ptr2len;
1170 else
1171 ptr2len = mb_ptr2len;
1172
Christian Brabandt67672ef2023-04-24 21:09:54 +01001173 char_u *p;
1174 int len;
1175 for (p = str, len = 0; utf16idx ? idx >= 0 : p <= str + idx; len++)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001176 {
1177 if (*p == NUL)
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001178 {
1179 // If the index is exactly the number of bytes or utf-16 code units
1180 // in the string then return the length of the string in
1181 // characters.
1182 if (utf16idx ? (idx == 0) : (p == (str + idx)))
1183 rettv->vval.v_number = len;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001184 return;
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01001185 }
Christian Brabandt67672ef2023-04-24 21:09:54 +01001186 if (utf16idx)
1187 {
1188 idx--;
1189 int clen = ptr2len(p);
1190 int c = (clen > 1) ? utf_ptr2char(p) : *p;
1191 if (c > 0xFFFF)
1192 idx--;
1193 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001194 p += ptr2len(p);
1195 }
1196
1197 rettv->vval.v_number = len > 0 ? len - 1 : 0;
1198}
1199
1200/*
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001201 * Convert the string "str", from encoding "from" to encoding "to".
1202 */
1203 static char_u *
1204convert_string(char_u *str, char_u *from, char_u *to)
1205{
1206 vimconv_T vimconv;
1207
1208 vimconv.vc_type = CONV_NONE;
1209 if (convert_setup(&vimconv, from, to) == FAIL)
1210 return NULL;
1211 vimconv.vc_fail = TRUE;
1212 if (vimconv.vc_type == CONV_NONE)
1213 str = vim_strsave(str);
1214 else
1215 str = string_convert(&vimconv, str, NULL);
1216 convert_setup(&vimconv, NULL, NULL);
1217
1218 return str;
1219}
1220
1221/*
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001222 * Add the bytes from "str" to "blob".
1223 */
1224 static void
1225blob_from_string(char_u *str, blob_T *blob)
1226{
1227 size_t len = STRLEN(str);
1228
1229 for (size_t i = 0; i < len; i++)
1230 {
1231 int ch = str[i];
1232
1233 if (str[i] == NL)
1234 // Translate newlines in the string to NUL character
1235 ch = NUL;
1236
1237 ga_append(&blob->bv_ga, ch);
1238 }
1239}
1240
1241/*
1242 * Return a string created from the bytes in blob starting at "start_idx".
1243 * A NL character in the blob indicates end of string.
1244 * A NUL character in the blob is translated to a NL.
1245 * On return, "start_idx" points to next byte to process in blob.
1246 */
1247 static char_u *
1248string_from_blob(blob_T *blob, long *start_idx)
1249{
1250 garray_T str_ga;
1251 long blen;
Yegappan Lakshmanan5e9aaed2025-01-18 10:24:25 +01001252 int idx;
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001253
1254 ga_init2(&str_ga, sizeof(char), 80);
1255
1256 blen = blob_len(blob);
1257
1258 for (idx = *start_idx; idx < blen; idx++)
1259 {
1260 char_u byte = (char_u)blob_get(blob, idx);
1261 if (byte == NL)
1262 {
1263 idx++;
1264 break;
1265 }
1266
1267 if (byte == NUL)
1268 byte = NL;
1269
1270 ga_append(&str_ga, byte);
1271 }
1272
1273 ga_append(&str_ga, NUL);
1274
1275 char_u *ret_str = vim_strsave(str_ga.ga_data);
1276 *start_idx = idx;
1277
1278 ga_clear(&str_ga);
1279 return ret_str;
1280}
1281
1282/*
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001283 * "blob2str()" function
1284 * Converts a blob to a string, ensuring valid UTF-8 encoding.
1285 */
1286 void
1287f_blob2str(typval_T *argvars, typval_T *rettv)
1288{
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001289 blob_T *blob;
1290 int blen;
1291 long idx;
Bakudankunb3854bf2025-02-23 20:29:21 +01001292 int validate_utf8 = FALSE;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001293
1294 if (check_for_blob_arg(argvars, 0) == FAIL
1295 || check_for_opt_dict_arg(argvars, 1) == FAIL)
1296 return;
1297
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001298 if (rettv_list_alloc(rettv) == FAIL)
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001299 return;
1300
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001301 blob = argvars->vval.v_blob;
1302 if (blob == NULL)
1303 return;
1304 blen = blob_len(blob);
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001305
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001306 char_u *from_encoding = NULL;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001307 if (argvars[1].v_type != VAR_UNKNOWN)
1308 {
1309 dict_T *d = argvars[1].vval.v_dict;
1310 if (d != NULL)
1311 {
1312 char_u *enc = dict_get_string(d, "encoding", FALSE);
1313 if (enc != NULL)
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001314 from_encoding = enc_canonize(enc_skip(enc));
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001315 }
1316 }
1317
1318 if (STRCMP(p_enc, "utf-8") == 0 || STRCMP(p_enc, "utf8") == 0)
Bakudankunb3854bf2025-02-23 20:29:21 +01001319 validate_utf8 = TRUE;
1320
1321 if (from_encoding != NULL && STRCMP(from_encoding, "none") == 0)
1322 {
1323 validate_utf8 = FALSE;
1324 vim_free(from_encoding);
1325 from_encoding = NULL;
1326 }
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001327
1328 idx = 0;
1329 while (idx < blen)
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001330 {
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001331 char_u *str;
1332 char_u *converted_str;
1333
1334 str = string_from_blob(blob, &idx);
1335 if (str == NULL)
1336 break;
1337
1338 converted_str = str;
1339 if (from_encoding != NULL)
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001340 {
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001341 converted_str = convert_string(str, from_encoding, p_enc);
1342 vim_free(str);
1343 if (converted_str == NULL)
1344 {
1345 semsg(_(e_str_encoding_failed), "from", from_encoding);
1346 goto done;
1347 }
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001348 }
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001349
Bakudankunb3854bf2025-02-23 20:29:21 +01001350 if (validate_utf8)
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001351 {
1352 if (!utf_valid_string(converted_str, NULL))
1353 {
1354 semsg(_(e_str_encoding_failed), "from", p_enc);
1355 vim_free(converted_str);
1356 goto done;
1357 }
1358 }
1359
Yegappan Lakshmanan90b39752025-01-19 09:37:07 +01001360 int ret = list_append_string(rettv->vval.v_list, converted_str, -1);
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001361 vim_free(converted_str);
Yegappan Lakshmanan90b39752025-01-19 09:37:07 +01001362 if (ret == FAIL)
1363 break;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001364 }
1365
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001366done:
1367 vim_free(from_encoding);
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001368}
1369
1370/*
1371 * "str2blob()" function
1372 */
1373 void
1374f_str2blob(typval_T *argvars, typval_T *rettv)
1375{
1376 blob_T *blob;
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001377 list_T *list;
1378 listitem_T *li;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001379
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001380 if (check_for_list_arg(argvars, 0) == FAIL
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001381 || check_for_opt_dict_arg(argvars, 1) == FAIL)
1382 return;
1383
1384 if (rettv_blob_alloc(rettv) == FAIL)
1385 return;
1386
1387 blob = rettv->vval.v_blob;
1388
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001389 list = argvars[0].vval.v_list;
1390 if (list == NULL)
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001391 return;
1392
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001393 char_u *to_encoding = NULL;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001394 if (argvars[1].v_type != VAR_UNKNOWN)
1395 {
1396 dict_T *d = argvars[1].vval.v_dict;
1397 if (d != NULL)
1398 {
1399 char_u *enc = dict_get_string(d, "encoding", FALSE);
1400 if (enc != NULL)
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001401 to_encoding = enc_canonize(enc_skip(enc));
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001402 }
1403 }
1404
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001405 FOR_ALL_LIST_ITEMS(list, li)
1406 {
1407 if (li->li_tv.v_type != VAR_STRING)
1408 continue;
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001409
Yegappan Lakshmanana11b23c2025-01-16 19:16:42 +01001410 char_u *str = li->li_tv.vval.v_string;
1411
1412 if (str == NULL)
1413 continue;
1414
1415 if (to_encoding != NULL)
1416 {
1417 str = convert_string(str, p_enc, to_encoding);
1418 if (str == NULL)
1419 {
1420 semsg(_(e_str_encoding_failed), "to", to_encoding);
1421 goto done;
1422 }
1423 }
1424
1425 if (li != list->lv_first)
1426 // Each list string item is separated by a newline in the blob
1427 ga_append(&blob->bv_ga, NL);
1428
1429 blob_from_string(str, blob);
1430
1431 if (to_encoding != NULL)
1432 vim_free(str);
1433 }
1434
1435done:
1436 if (to_encoding != NULL)
1437 vim_free(to_encoding);
Yegappan Lakshmanan1aefe1d2025-01-14 17:29:42 +01001438}
1439
1440/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001441 * "str2list()" function
1442 */
1443 void
1444f_str2list(typval_T *argvars, typval_T *rettv)
1445{
1446 char_u *p;
1447 int utf8 = FALSE;
1448
1449 if (rettv_list_alloc(rettv) == FAIL)
1450 return;
1451
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001452 if (in_vim9script()
1453 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001454 || check_for_opt_bool_arg(argvars, 1) == FAIL))
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001455 return;
1456
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001457 if (argvars[1].v_type != VAR_UNKNOWN)
1458 utf8 = (int)tv_get_bool_chk(&argvars[1], NULL);
1459
1460 p = tv_get_string(&argvars[0]);
1461
1462 if (has_mbyte || utf8)
1463 {
1464 int (*ptr2len)(char_u *);
1465 int (*ptr2char)(char_u *);
1466
1467 if (utf8 || enc_utf8)
1468 {
1469 ptr2len = utf_ptr2len;
1470 ptr2char = utf_ptr2char;
1471 }
1472 else
1473 {
1474 ptr2len = mb_ptr2len;
1475 ptr2char = mb_ptr2char;
1476 }
1477
1478 for ( ; *p != NUL; p += (*ptr2len)(p))
1479 list_append_number(rettv->vval.v_list, (*ptr2char)(p));
1480 }
1481 else
1482 for ( ; *p != NUL; ++p)
1483 list_append_number(rettv->vval.v_list, *p);
1484}
1485
1486/*
1487 * "str2nr()" function
1488 */
1489 void
1490f_str2nr(typval_T *argvars, typval_T *rettv)
1491{
1492 int base = 10;
1493 char_u *p;
1494 varnumber_T n;
1495 int what = 0;
1496 int isneg;
1497
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001498 if (in_vim9script()
1499 && (check_for_string_arg(argvars, 0) == FAIL
1500 || check_for_opt_number_arg(argvars, 1) == FAIL
1501 || (argvars[1].v_type != VAR_UNKNOWN
1502 && check_for_opt_bool_arg(argvars, 2) == FAIL)))
1503 return;
1504
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001505 if (argvars[1].v_type != VAR_UNKNOWN)
1506 {
1507 base = (int)tv_get_number(&argvars[1]);
1508 if (base != 2 && base != 8 && base != 10 && base != 16)
1509 {
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00001510 emsg(_(e_invalid_argument));
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001511 return;
1512 }
1513 if (argvars[2].v_type != VAR_UNKNOWN && tv_get_bool(&argvars[2]))
1514 what |= STR2NR_QUOTE;
1515 }
1516
1517 p = skipwhite(tv_get_string_strict(&argvars[0]));
1518 isneg = (*p == '-');
1519 if (*p == '+' || *p == '-')
1520 p = skipwhite(p + 1);
1521 switch (base)
1522 {
1523 case 2: what |= STR2NR_BIN + STR2NR_FORCE; break;
1524 case 8: what |= STR2NR_OCT + STR2NR_OOCT + STR2NR_FORCE; break;
1525 case 16: what |= STR2NR_HEX + STR2NR_FORCE; break;
1526 }
Bram Moolenaar5fb78c32023-03-04 20:47:39 +00001527 vim_str2nr(p, NULL, NULL, what, &n, NULL, 0, FALSE, NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001528 // Text after the number is silently ignored.
1529 if (isneg)
1530 rettv->vval.v_number = -n;
1531 else
1532 rettv->vval.v_number = n;
1533
1534}
1535
1536/*
1537 * "strgetchar()" function
1538 */
1539 void
1540f_strgetchar(typval_T *argvars, typval_T *rettv)
1541{
1542 char_u *str;
1543 int len;
1544 int error = FALSE;
1545 int charidx;
1546 int byteidx = 0;
1547
1548 rettv->vval.v_number = -1;
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001549
1550 if (in_vim9script()
1551 && (check_for_string_arg(argvars, 0) == FAIL
1552 || check_for_number_arg(argvars, 1) == FAIL))
1553 return;
1554
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001555 str = tv_get_string_chk(&argvars[0]);
1556 if (str == NULL)
1557 return;
1558 len = (int)STRLEN(str);
1559 charidx = (int)tv_get_number_chk(&argvars[1], &error);
1560 if (error)
1561 return;
1562
1563 while (charidx >= 0 && byteidx < len)
1564 {
1565 if (charidx == 0)
1566 {
1567 rettv->vval.v_number = mb_ptr2char(str + byteidx);
1568 break;
1569 }
1570 --charidx;
1571 byteidx += MB_CPTR2LEN(str + byteidx);
1572 }
1573}
1574
1575/*
1576 * "stridx()" function
1577 */
1578 void
1579f_stridx(typval_T *argvars, typval_T *rettv)
1580{
1581 char_u buf[NUMBUFLEN];
1582 char_u *needle;
1583 char_u *haystack;
1584 char_u *save_haystack;
1585 char_u *pos;
1586 int start_idx;
1587
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001588 if (in_vim9script()
1589 && (check_for_string_arg(argvars, 0) == FAIL
1590 || check_for_string_arg(argvars, 1) == FAIL
1591 || check_for_opt_number_arg(argvars, 2) == FAIL))
1592 return;
1593
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001594 needle = tv_get_string_chk(&argvars[1]);
1595 save_haystack = haystack = tv_get_string_buf_chk(&argvars[0], buf);
1596 rettv->vval.v_number = -1;
1597 if (needle == NULL || haystack == NULL)
1598 return; // type error; errmsg already given
1599
1600 if (argvars[2].v_type != VAR_UNKNOWN)
1601 {
1602 int error = FALSE;
1603
1604 start_idx = (int)tv_get_number_chk(&argvars[2], &error);
1605 if (error || start_idx >= (int)STRLEN(haystack))
1606 return;
1607 if (start_idx >= 0)
1608 haystack += start_idx;
1609 }
1610
1611 pos = (char_u *)strstr((char *)haystack, (char *)needle);
1612 if (pos != NULL)
1613 rettv->vval.v_number = (varnumber_T)(pos - save_haystack);
1614}
1615
1616/*
1617 * "string()" function
1618 */
1619 void
1620f_string(typval_T *argvars, typval_T *rettv)
1621{
1622 char_u *tofree;
1623 char_u numbuf[NUMBUFLEN];
1624
1625 rettv->v_type = VAR_STRING;
1626 rettv->vval.v_string = tv2string(&argvars[0], &tofree, numbuf,
1627 get_copyID());
1628 // Make a copy if we have a value but it's not in allocated memory.
1629 if (rettv->vval.v_string != NULL && tofree == NULL)
1630 rettv->vval.v_string = vim_strsave(rettv->vval.v_string);
1631}
1632
1633/*
1634 * "strlen()" function
1635 */
1636 void
1637f_strlen(typval_T *argvars, typval_T *rettv)
1638{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001639 if (in_vim9script()
1640 && check_for_string_or_number_arg(argvars, 0) == FAIL)
1641 return;
1642
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001643 rettv->vval.v_number = (varnumber_T)(STRLEN(
1644 tv_get_string(&argvars[0])));
1645}
1646
1647 static void
1648strchar_common(typval_T *argvars, typval_T *rettv, int skipcc)
1649{
1650 char_u *s = tv_get_string(&argvars[0]);
1651 varnumber_T len = 0;
1652 int (*func_mb_ptr2char_adv)(char_u **pp);
1653
1654 func_mb_ptr2char_adv = skipcc ? mb_ptr2char_adv : mb_cptr2char_adv;
1655 while (*s != NUL)
1656 {
1657 func_mb_ptr2char_adv(&s);
1658 ++len;
1659 }
1660 rettv->vval.v_number = len;
1661}
1662
1663/*
1664 * "strcharlen()" function
1665 */
1666 void
1667f_strcharlen(typval_T *argvars, typval_T *rettv)
1668{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001669 if (in_vim9script()
1670 && check_for_string_or_number_arg(argvars, 0) == FAIL)
1671 return;
1672
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001673 strchar_common(argvars, rettv, TRUE);
1674}
1675
1676/*
1677 * "strchars()" function
1678 */
1679 void
1680f_strchars(typval_T *argvars, typval_T *rettv)
1681{
1682 varnumber_T skipcc = FALSE;
1683
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001684 if (in_vim9script()
1685 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001686 || check_for_opt_bool_arg(argvars, 1) == FAIL))
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001687 return;
1688
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001689 if (argvars[1].v_type != VAR_UNKNOWN)
Bram Moolenaare4098452023-05-07 18:53:49 +01001690 {
zeertzjq8cf51372023-05-08 15:31:38 +01001691 int error = FALSE;
1692 skipcc = tv_get_bool_chk(&argvars[1], &error);
1693 if (error)
1694 return;
1695 if (skipcc < 0 || skipcc > 1)
1696 {
Bram Moolenaare4098452023-05-07 18:53:49 +01001697 semsg(_(e_using_number_as_bool_nr), skipcc);
zeertzjq8cf51372023-05-08 15:31:38 +01001698 return;
1699 }
Bram Moolenaare4098452023-05-07 18:53:49 +01001700 }
zeertzjq8cf51372023-05-08 15:31:38 +01001701
1702 strchar_common(argvars, rettv, skipcc);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001703}
1704
1705/*
Christian Brabandt67672ef2023-04-24 21:09:54 +01001706 * "strutf16len()" function
1707 */
1708 void
1709f_strutf16len(typval_T *argvars, typval_T *rettv)
1710{
1711 rettv->vval.v_number = -1;
1712
1713 if (check_for_string_arg(argvars, 0) == FAIL
1714 || check_for_opt_bool_arg(argvars, 1) == FAIL)
1715 return;
1716
1717 varnumber_T countcc = FALSE;
1718 if (argvars[1].v_type != VAR_UNKNOWN)
1719 countcc = tv_get_bool(&argvars[1]);
1720
1721 char_u *s = tv_get_string(&argvars[0]);
1722 varnumber_T len = 0;
1723 int (*func_mb_ptr2char_adv)(char_u **pp);
1724 int ch;
1725
1726 func_mb_ptr2char_adv = countcc ? mb_cptr2char_adv : mb_ptr2char_adv;
1727 while (*s != NUL)
1728 {
1729 ch = func_mb_ptr2char_adv(&s);
1730 if (ch > 0xFFFF)
1731 ++len;
1732 ++len;
1733 }
1734 rettv->vval.v_number = len;
1735}
1736
1737/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001738 * "strdisplaywidth()" function
1739 */
1740 void
1741f_strdisplaywidth(typval_T *argvars, typval_T *rettv)
1742{
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001743 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001744 int col = 0;
1745
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001746 rettv->vval.v_number = -1;
1747
1748 if (in_vim9script()
1749 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001750 || check_for_opt_number_arg(argvars, 1) == FAIL))
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001751 return;
1752
1753 s = tv_get_string(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001754 if (argvars[1].v_type != VAR_UNKNOWN)
1755 col = (int)tv_get_number(&argvars[1]);
1756
1757 rettv->vval.v_number = (varnumber_T)(linetabsize_col(col, s) - col);
1758}
1759
1760/*
1761 * "strwidth()" function
1762 */
1763 void
1764f_strwidth(typval_T *argvars, typval_T *rettv)
1765{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001766 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001767
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001768 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1769 return;
1770
1771 s = tv_get_string_strict(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001772 rettv->vval.v_number = (varnumber_T)(mb_string2cells(s, -1));
1773}
1774
1775/*
1776 * "strcharpart()" function
1777 */
1778 void
1779f_strcharpart(typval_T *argvars, typval_T *rettv)
1780{
1781 char_u *p;
1782 int nchar;
1783 int nbyte = 0;
1784 int charlen;
1785 int skipcc = FALSE;
1786 int len = 0;
1787 int slen;
1788 int error = FALSE;
1789
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001790 if (in_vim9script()
1791 && (check_for_string_arg(argvars, 0) == FAIL
1792 || check_for_number_arg(argvars, 1) == FAIL
1793 || check_for_opt_number_arg(argvars, 2) == FAIL
1794 || (argvars[2].v_type != VAR_UNKNOWN
1795 && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1796 return;
1797
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001798 p = tv_get_string(&argvars[0]);
1799 slen = (int)STRLEN(p);
1800
1801 nchar = (int)tv_get_number_chk(&argvars[1], &error);
1802 if (!error)
1803 {
1804 if (argvars[2].v_type != VAR_UNKNOWN
1805 && argvars[3].v_type != VAR_UNKNOWN)
1806 {
zeertzjq8cf51372023-05-08 15:31:38 +01001807 skipcc = tv_get_bool_chk(&argvars[3], &error);
1808 if (error)
1809 return;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001810 if (skipcc < 0 || skipcc > 1)
1811 {
zeertzjq8cf51372023-05-08 15:31:38 +01001812 semsg(_(e_using_number_as_bool_nr), skipcc);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001813 return;
1814 }
1815 }
1816
1817 if (nchar > 0)
1818 while (nchar > 0 && nbyte < slen)
1819 {
1820 if (skipcc)
1821 nbyte += mb_ptr2len(p + nbyte);
1822 else
1823 nbyte += MB_CPTR2LEN(p + nbyte);
1824 --nchar;
1825 }
1826 else
1827 nbyte = nchar;
1828 if (argvars[2].v_type != VAR_UNKNOWN)
1829 {
1830 charlen = (int)tv_get_number(&argvars[2]);
1831 while (charlen > 0 && nbyte + len < slen)
1832 {
1833 int off = nbyte + len;
1834
1835 if (off < 0)
1836 len += 1;
1837 else
1838 {
1839 if (skipcc)
1840 len += mb_ptr2len(p + off);
1841 else
1842 len += MB_CPTR2LEN(p + off);
1843 }
1844 --charlen;
1845 }
1846 }
1847 else
1848 len = slen - nbyte; // default: all bytes that are available.
1849 }
1850
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02001851 // Only return the overlap between the specified part and the actual
1852 // string.
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001853 if (nbyte < 0)
1854 {
1855 len += nbyte;
1856 nbyte = 0;
1857 }
1858 else if (nbyte > slen)
1859 nbyte = slen;
1860 if (len < 0)
1861 len = 0;
1862 else if (nbyte + len > slen)
1863 len = slen - nbyte;
1864
1865 rettv->v_type = VAR_STRING;
1866 rettv->vval.v_string = vim_strnsave(p + nbyte, len);
1867}
1868
1869/*
1870 * "strpart()" function
1871 */
1872 void
1873f_strpart(typval_T *argvars, typval_T *rettv)
1874{
1875 char_u *p;
1876 int n;
1877 int len;
1878 int slen;
1879 int error = FALSE;
1880
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001881 if (in_vim9script()
1882 && (check_for_string_arg(argvars, 0) == FAIL
1883 || check_for_number_arg(argvars, 1) == FAIL
1884 || check_for_opt_number_arg(argvars, 2) == FAIL
1885 || (argvars[2].v_type != VAR_UNKNOWN
1886 && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1887 return;
1888
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001889 p = tv_get_string(&argvars[0]);
1890 slen = (int)STRLEN(p);
1891
1892 n = (int)tv_get_number_chk(&argvars[1], &error);
1893 if (error)
1894 len = 0;
1895 else if (argvars[2].v_type != VAR_UNKNOWN)
1896 len = (int)tv_get_number(&argvars[2]);
1897 else
1898 len = slen - n; // default len: all bytes that are available.
1899
1900 // Only return the overlap between the specified part and the actual
1901 // string.
1902 if (n < 0)
1903 {
1904 len += n;
1905 n = 0;
1906 }
1907 else if (n > slen)
1908 n = slen;
1909 if (len < 0)
1910 len = 0;
1911 else if (n + len > slen)
1912 len = slen - n;
1913
1914 if (argvars[2].v_type != VAR_UNKNOWN && argvars[3].v_type != VAR_UNKNOWN)
1915 {
1916 int off;
1917
1918 // length in characters
1919 for (off = n; off < slen && len > 0; --len)
1920 off += mb_ptr2len(p + off);
1921 len = off - n;
1922 }
1923
1924 rettv->v_type = VAR_STRING;
1925 rettv->vval.v_string = vim_strnsave(p + n, len);
1926}
1927
1928/*
1929 * "strridx()" function
1930 */
1931 void
1932f_strridx(typval_T *argvars, typval_T *rettv)
1933{
1934 char_u buf[NUMBUFLEN];
1935 char_u *needle;
1936 char_u *haystack;
1937 char_u *rest;
1938 char_u *lastmatch = NULL;
1939 int haystack_len, end_idx;
1940
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001941 if (in_vim9script()
1942 && (check_for_string_arg(argvars, 0) == FAIL
1943 || check_for_string_arg(argvars, 1) == FAIL
1944 || check_for_opt_number_arg(argvars, 2) == FAIL))
1945 return;
1946
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001947 needle = tv_get_string_chk(&argvars[1]);
1948 haystack = tv_get_string_buf_chk(&argvars[0], buf);
1949
1950 rettv->vval.v_number = -1;
1951 if (needle == NULL || haystack == NULL)
1952 return; // type error; errmsg already given
1953
1954 haystack_len = (int)STRLEN(haystack);
1955 if (argvars[2].v_type != VAR_UNKNOWN)
1956 {
1957 // Third argument: upper limit for index
1958 end_idx = (int)tv_get_number_chk(&argvars[2], NULL);
1959 if (end_idx < 0)
1960 return; // can never find a match
1961 }
1962 else
1963 end_idx = haystack_len;
1964
1965 if (*needle == NUL)
1966 {
1967 // Empty string matches past the end.
1968 lastmatch = haystack + end_idx;
1969 }
1970 else
1971 {
1972 for (rest = haystack; *rest != '\0'; ++rest)
1973 {
1974 rest = (char_u *)strstr((char *)rest, (char *)needle);
1975 if (rest == NULL || rest > haystack + end_idx)
1976 break;
1977 lastmatch = rest;
1978 }
1979 }
1980
1981 if (lastmatch == NULL)
1982 rettv->vval.v_number = -1;
1983 else
1984 rettv->vval.v_number = (varnumber_T)(lastmatch - haystack);
1985}
1986
1987/*
1988 * "strtrans()" function
1989 */
1990 void
1991f_strtrans(typval_T *argvars, typval_T *rettv)
1992{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001993 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1994 return;
1995
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001996 rettv->v_type = VAR_STRING;
1997 rettv->vval.v_string = transstr(tv_get_string(&argvars[0]));
1998}
1999
Christian Brabandt67672ef2023-04-24 21:09:54 +01002000
2001/*
Christian Brabandt67672ef2023-04-24 21:09:54 +01002002 * "utf16idx()" function
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01002003 *
2004 * Converts a byte or character offset in a string to the corresponding UTF-16
2005 * code unit offset.
Christian Brabandt67672ef2023-04-24 21:09:54 +01002006 */
2007 void
2008f_utf16idx(typval_T *argvars, typval_T *rettv)
2009{
2010 rettv->vval.v_number = -1;
2011
2012 if (check_for_string_arg(argvars, 0) == FAIL
2013 || check_for_opt_number_arg(argvars, 1) == FAIL
2014 || check_for_opt_bool_arg(argvars, 2) == FAIL
2015 || (argvars[2].v_type != VAR_UNKNOWN
2016 && check_for_opt_bool_arg(argvars, 3) == FAIL))
2017 return;
2018
2019 char_u *str = tv_get_string_chk(&argvars[0]);
2020 varnumber_T idx = tv_get_number_chk(&argvars[1], NULL);
2021 if (str == NULL || idx < 0)
2022 return;
2023
2024 varnumber_T countcc = FALSE;
2025 varnumber_T charidx = FALSE;
2026 if (argvars[2].v_type != VAR_UNKNOWN)
2027 {
2028 countcc = tv_get_bool(&argvars[2]);
2029 if (argvars[3].v_type != VAR_UNKNOWN)
2030 charidx = tv_get_bool(&argvars[3]);
2031 }
2032
2033 int (*ptr2len)(char_u *);
2034 if (enc_utf8 && countcc)
2035 ptr2len = utf_ptr2len;
2036 else
2037 ptr2len = mb_ptr2len;
2038
2039 char_u *p;
2040 int len;
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01002041 int utf16idx = 0;
Christian Brabandt67672ef2023-04-24 21:09:54 +01002042 for (p = str, len = 0; charidx ? idx >= 0 : p <= str + idx; len++)
2043 {
2044 if (*p == NUL)
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01002045 {
2046 // If the index is exactly the number of bytes or characters in the
2047 // string then return the length of the string in utf-16 code
2048 // units.
2049 if (charidx ? (idx == 0) : (p == (str + idx)))
2050 rettv->vval.v_number = len;
Christian Brabandt67672ef2023-04-24 21:09:54 +01002051 return;
Yegappan Lakshmanan577922b2023-06-08 17:09:45 +01002052 }
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01002053 utf16idx = len;
Christian Brabandt67672ef2023-04-24 21:09:54 +01002054 int clen = ptr2len(p);
2055 int c = (clen > 1) ? utf_ptr2char(p) : *p;
2056 if (c > 0xFFFF)
2057 len++;
2058 p += ptr2len(p);
2059 if (charidx)
2060 idx--;
2061 }
2062
Yegappan Lakshmanan95707032023-06-14 13:10:15 +01002063 rettv->vval.v_number = utf16idx;
Christian Brabandt67672ef2023-04-24 21:09:54 +01002064}
2065
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002066/*
2067 * "tolower(string)" function
2068 */
2069 void
2070f_tolower(typval_T *argvars, typval_T *rettv)
2071{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002072 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
2073 return;
2074
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002075 rettv->v_type = VAR_STRING;
2076 rettv->vval.v_string = strlow_save(tv_get_string(&argvars[0]));
2077}
2078
2079/*
2080 * "toupper(string)" function
2081 */
2082 void
2083f_toupper(typval_T *argvars, typval_T *rettv)
2084{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002085 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
2086 return;
2087
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002088 rettv->v_type = VAR_STRING;
2089 rettv->vval.v_string = strup_save(tv_get_string(&argvars[0]));
2090}
2091
2092/*
2093 * "tr(string, fromstr, tostr)" function
2094 */
2095 void
2096f_tr(typval_T *argvars, typval_T *rettv)
2097{
2098 char_u *in_str;
2099 char_u *fromstr;
2100 char_u *tostr;
2101 char_u *p;
2102 int inlen;
2103 int fromlen;
2104 int tolen;
2105 int idx;
2106 char_u *cpstr;
2107 int cplen;
2108 int first = TRUE;
2109 char_u buf[NUMBUFLEN];
2110 char_u buf2[NUMBUFLEN];
2111 garray_T ga;
2112
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002113 if (in_vim9script()
2114 && (check_for_string_arg(argvars, 0) == FAIL
2115 || check_for_string_arg(argvars, 1) == FAIL
2116 || check_for_string_arg(argvars, 2) == FAIL))
2117 return;
2118
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002119 in_str = tv_get_string(&argvars[0]);
2120 fromstr = tv_get_string_buf_chk(&argvars[1], buf);
2121 tostr = tv_get_string_buf_chk(&argvars[2], buf2);
2122
2123 // Default return value: empty string.
2124 rettv->v_type = VAR_STRING;
2125 rettv->vval.v_string = NULL;
2126 if (fromstr == NULL || tostr == NULL)
2127 return; // type error; errmsg already given
Bram Moolenaar04935fb2022-01-08 16:19:22 +00002128 ga_init2(&ga, sizeof(char), 80);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002129
2130 if (!has_mbyte)
2131 // not multi-byte: fromstr and tostr must be the same length
2132 if (STRLEN(fromstr) != STRLEN(tostr))
2133 {
2134error:
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00002135 semsg(_(e_invalid_argument_str), fromstr);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002136 ga_clear(&ga);
2137 return;
2138 }
2139
2140 // fromstr and tostr have to contain the same number of chars
2141 while (*in_str != NUL)
2142 {
2143 if (has_mbyte)
2144 {
2145 inlen = (*mb_ptr2len)(in_str);
2146 cpstr = in_str;
2147 cplen = inlen;
2148 idx = 0;
2149 for (p = fromstr; *p != NUL; p += fromlen)
2150 {
2151 fromlen = (*mb_ptr2len)(p);
2152 if (fromlen == inlen && STRNCMP(in_str, p, inlen) == 0)
2153 {
2154 for (p = tostr; *p != NUL; p += tolen)
2155 {
2156 tolen = (*mb_ptr2len)(p);
2157 if (idx-- == 0)
2158 {
2159 cplen = tolen;
2160 cpstr = p;
2161 break;
2162 }
2163 }
2164 if (*p == NUL) // tostr is shorter than fromstr
2165 goto error;
2166 break;
2167 }
2168 ++idx;
2169 }
2170
2171 if (first && cpstr == in_str)
2172 {
2173 // Check that fromstr and tostr have the same number of
2174 // (multi-byte) characters. Done only once when a character
2175 // of in_str doesn't appear in fromstr.
2176 first = FALSE;
2177 for (p = tostr; *p != NUL; p += tolen)
2178 {
2179 tolen = (*mb_ptr2len)(p);
2180 --idx;
2181 }
2182 if (idx != 0)
2183 goto error;
2184 }
2185
2186 (void)ga_grow(&ga, cplen);
2187 mch_memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen);
2188 ga.ga_len += cplen;
2189
2190 in_str += inlen;
2191 }
2192 else
2193 {
2194 // When not using multi-byte chars we can do it faster.
2195 p = vim_strchr(fromstr, *in_str);
2196 if (p != NULL)
2197 ga_append(&ga, tostr[p - fromstr]);
2198 else
2199 ga_append(&ga, *in_str);
2200 ++in_str;
2201 }
2202 }
2203
2204 // add a terminating NUL
2205 (void)ga_grow(&ga, 1);
2206 ga_append(&ga, NUL);
2207
2208 rettv->vval.v_string = ga.ga_data;
2209}
2210
2211/*
2212 * "trim({expr})" function
2213 */
2214 void
2215f_trim(typval_T *argvars, typval_T *rettv)
2216{
2217 char_u buf1[NUMBUFLEN];
2218 char_u buf2[NUMBUFLEN];
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002219 char_u *head;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002220 char_u *mask = NULL;
2221 char_u *tail;
2222 char_u *prev;
2223 char_u *p;
2224 int c1;
2225 int dir = 0;
2226
2227 rettv->v_type = VAR_STRING;
2228 rettv->vval.v_string = NULL;
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002229
2230 if (in_vim9script()
2231 && (check_for_string_arg(argvars, 0) == FAIL
Illia Bobyr80799172023-10-17 18:00:50 +02002232 || check_for_opt_string_arg(argvars, 1) == FAIL
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02002233 || (argvars[1].v_type != VAR_UNKNOWN
2234 && check_for_opt_number_arg(argvars, 2) == FAIL)))
2235 return;
2236
2237 head = tv_get_string_buf_chk(&argvars[0], buf1);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002238 if (head == NULL)
2239 return;
2240
Illia Bobyr80799172023-10-17 18:00:50 +02002241 if (check_for_opt_string_arg(argvars, 1) == FAIL)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002242 return;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002243
2244 if (argvars[1].v_type == VAR_STRING)
Illia Bobyr6e638672023-10-17 11:09:45 +02002245 {
Illia Bobyr80799172023-10-17 18:00:50 +02002246 mask = tv_get_string_buf_chk(&argvars[1], buf2);
2247 if (*mask == NUL)
2248 mask = NULL;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002249
Illia Bobyr80799172023-10-17 18:00:50 +02002250 if (argvars[2].v_type != VAR_UNKNOWN)
Illia Bobyr6e638672023-10-17 11:09:45 +02002251 {
Illia Bobyr80799172023-10-17 18:00:50 +02002252 int error = 0;
2253
2254 // leading or trailing characters to trim
2255 dir = (int)tv_get_number_chk(&argvars[2], &error);
2256 if (error)
2257 return;
2258 if (dir < 0 || dir > 2)
2259 {
2260 semsg(_(e_invalid_argument_str), tv_get_string(&argvars[2]));
2261 return;
2262 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02002263 }
2264 }
2265
2266 if (dir == 0 || dir == 1)
2267 {
2268 // Trim leading characters
2269 while (*head != NUL)
2270 {
2271 c1 = PTR2CHAR(head);
2272 if (mask == NULL)
2273 {
2274 if (c1 > ' ' && c1 != 0xa0)
2275 break;
2276 }
2277 else
2278 {
2279 for (p = mask; *p != NUL; MB_PTR_ADV(p))
2280 if (c1 == PTR2CHAR(p))
2281 break;
2282 if (*p == NUL)
2283 break;
2284 }
2285 MB_PTR_ADV(head);
2286 }
2287 }
2288
2289 tail = head + STRLEN(head);
2290 if (dir == 0 || dir == 2)
2291 {
2292 // Trim trailing characters
2293 for (; tail > head; tail = prev)
2294 {
2295 prev = tail;
2296 MB_PTR_BACK(head, prev);
2297 c1 = PTR2CHAR(prev);
2298 if (mask == NULL)
2299 {
2300 if (c1 > ' ' && c1 != 0xa0)
2301 break;
2302 }
2303 else
2304 {
2305 for (p = mask; *p != NUL; MB_PTR_ADV(p))
2306 if (c1 == PTR2CHAR(p))
2307 break;
2308 if (*p == NUL)
2309 break;
2310 }
2311 }
2312 }
2313 rettv->vval.v_string = vim_strnsave(head, tail - head);
2314}
2315
Bram Moolenaar677658a2022-01-05 16:09:06 +00002316static char *e_printf = N_(e_insufficient_arguments_for_printf);
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002317
2318/*
2319 * Get number argument from "idxp" entry in "tvs". First entry is 1.
2320 */
2321 static varnumber_T
2322tv_nr(typval_T *tvs, int *idxp)
2323{
2324 int idx = *idxp - 1;
2325 varnumber_T n = 0;
2326 int err = FALSE;
2327
2328 if (tvs[idx].v_type == VAR_UNKNOWN)
2329 emsg(_(e_printf));
2330 else
2331 {
2332 ++*idxp;
2333 n = tv_get_number_chk(&tvs[idx], &err);
2334 if (err)
2335 n = 0;
2336 }
2337 return n;
2338}
2339
2340/*
2341 * Get string argument from "idxp" entry in "tvs". First entry is 1.
2342 * If "tofree" is NULL tv_get_string_chk() is used. Some types (e.g. List)
2343 * are not converted to a string.
2344 * If "tofree" is not NULL echo_string() is used. All types are converted to
2345 * a string with the same format as ":echo". The caller must free "*tofree".
2346 * Returns NULL for an error.
2347 */
2348 static char *
2349tv_str(typval_T *tvs, int *idxp, char_u **tofree)
2350{
2351 int idx = *idxp - 1;
2352 char *s = NULL;
2353 static char_u numbuf[NUMBUFLEN];
2354
2355 if (tvs[idx].v_type == VAR_UNKNOWN)
2356 emsg(_(e_printf));
2357 else
2358 {
2359 ++*idxp;
2360 if (tofree != NULL)
2361 s = (char *)echo_string(&tvs[idx], tofree, numbuf, get_copyID());
2362 else
2363 s = (char *)tv_get_string_chk(&tvs[idx]);
2364 }
2365 return s;
2366}
2367
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002368/*
2369 * Get float argument from "idxp" entry in "tvs". First entry is 1.
2370 */
2371 static double
2372tv_float(typval_T *tvs, int *idxp)
2373{
2374 int idx = *idxp - 1;
2375 double f = 0;
2376
2377 if (tvs[idx].v_type == VAR_UNKNOWN)
2378 emsg(_(e_printf));
2379 else
2380 {
2381 ++*idxp;
2382 if (tvs[idx].v_type == VAR_FLOAT)
2383 f = tvs[idx].vval.v_float;
2384 else if (tvs[idx].v_type == VAR_NUMBER)
2385 f = (double)tvs[idx].vval.v_number;
2386 else
Bram Moolenaar9d00e4a2022-01-05 17:49:15 +00002387 emsg(_(e_expected_float_argument_for_printf));
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002388 }
2389 return f;
2390}
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00002391
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002392#endif
2393
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002394/*
2395 * Return the representation of infinity for printf() function:
2396 * "-inf", "inf", "+inf", " inf", "-INF", "INF", "+INF" or " INF".
2397 */
2398 static const char *
2399infinity_str(int positive,
2400 char fmt_spec,
2401 int force_sign,
2402 int space_for_positive)
2403{
2404 static const char *table[] =
2405 {
2406 "-inf", "inf", "+inf", " inf",
2407 "-INF", "INF", "+INF", " INF"
2408 };
2409 int idx = positive * (1 + force_sign + force_sign * space_for_positive);
2410
2411 if (ASCII_ISUPPER(fmt_spec))
2412 idx += 4;
2413 return table[idx];
2414}
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002415
2416/*
2417 * This code was included to provide a portable vsnprintf() and snprintf().
2418 * Some systems may provide their own, but we always use this one for
2419 * consistency.
2420 *
2421 * This code is based on snprintf.c - a portable implementation of snprintf
2422 * by Mark Martinec <mark.martinec@ijs.si>, Version 2.2, 2000-10-06.
2423 * Included with permission. It was heavily modified to fit in Vim.
2424 * The original code, including useful comments, can be found here:
2425 * http://www.ijs.si/software/snprintf/
2426 *
2427 * This snprintf() only supports the following conversion specifiers:
2428 * s, c, d, u, o, x, X, p (and synonyms: i, D, U, O - see below)
2429 * with flags: '-', '+', ' ', '0' and '#'.
2430 * An asterisk is supported for field width as well as precision.
2431 *
2432 * Limited support for floating point was added: 'f', 'F', 'e', 'E', 'g', 'G'.
2433 *
2434 * Length modifiers 'h' (short int) and 'l' (long int) and 'll' (long long int)
2435 * are supported. NOTE: for 'll' the argument is varnumber_T or uvarnumber_T.
2436 *
2437 * The locale is not used, the string is used as a byte string. This is only
2438 * relevant for double-byte encodings where the second byte may be '%'.
2439 *
2440 * It is permitted for "str_m" to be zero, and it is permitted to specify NULL
2441 * pointer for resulting string argument if "str_m" is zero (as per ISO C99).
2442 *
2443 * The return value is the number of characters which would be generated
2444 * for the given input, excluding the trailing NUL. If this value
2445 * is greater or equal to "str_m", not all characters from the result
2446 * have been stored in str, output bytes beyond the ("str_m"-1) -th character
2447 * are discarded. If "str_m" is greater than zero it is guaranteed
2448 * the resulting string will be NUL-terminated.
2449 */
2450
2451/*
2452 * When va_list is not supported we only define vim_snprintf().
2453 *
2454 * vim_vsnprintf_typval() can be invoked with either "va_list" or a list of
2455 * "typval_T". When the latter is not used it must be NULL.
2456 */
2457
2458// When generating prototypes all of this is skipped, cproto doesn't
2459// understand this.
2460#ifndef PROTO
2461
2462// Like vim_vsnprintf() but append to the string.
2463 int
2464vim_snprintf_add(char *str, size_t str_m, const char *fmt, ...)
2465{
2466 va_list ap;
2467 int str_l;
2468 size_t len = STRLEN(str);
2469 size_t space;
2470
2471 if (str_m <= len)
2472 space = 0;
2473 else
2474 space = str_m - len;
2475 va_start(ap, fmt);
2476 str_l = vim_vsnprintf(str + len, space, fmt, ap);
2477 va_end(ap);
2478 return str_l;
2479}
2480
2481 int
2482vim_snprintf(char *str, size_t str_m, const char *fmt, ...)
2483{
2484 va_list ap;
2485 int str_l;
2486
2487 va_start(ap, fmt);
2488 str_l = vim_vsnprintf(str, str_m, fmt, ap);
2489 va_end(ap);
2490 return str_l;
2491}
2492
2493 int
2494vim_vsnprintf(
2495 char *str,
2496 size_t str_m,
2497 const char *fmt,
2498 va_list ap)
2499{
2500 return vim_vsnprintf_typval(str, str_m, fmt, ap, NULL);
2501}
2502
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002503enum
2504{
2505 TYPE_UNKNOWN = -1,
2506 TYPE_INT,
2507 TYPE_LONGINT,
2508 TYPE_LONGLONGINT,
2509 TYPE_UNSIGNEDINT,
2510 TYPE_UNSIGNEDLONGINT,
2511 TYPE_UNSIGNEDLONGLONGINT,
2512 TYPE_POINTER,
2513 TYPE_PERCENT,
2514 TYPE_CHAR,
2515 TYPE_STRING,
2516 TYPE_FLOAT
2517};
2518
2519/* Types that can be used in a format string
2520 */
zeertzjq7772c932023-08-15 22:48:40 +02002521 static int
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002522format_typeof(
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002523 const char *type)
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002524{
2525 // allowed values: \0, h, l, L
2526 char length_modifier = '\0';
2527
2528 // current conversion specifier character
2529 char fmt_spec = '\0';
2530
2531 // parse 'h', 'l' and 'll' length modifiers
2532 if (*type == 'h' || *type == 'l')
2533 {
2534 length_modifier = *type;
2535 type++;
2536 if (length_modifier == 'l' && *type == 'l')
2537 {
2538 // double l = __int64 / varnumber_T
2539 length_modifier = 'L';
2540 type++;
2541 }
2542 }
2543 fmt_spec = *type;
2544
2545 // common synonyms:
2546 switch (fmt_spec)
2547 {
2548 case 'i': fmt_spec = 'd'; break;
2549 case '*': fmt_spec = 'd'; length_modifier = 'h'; break;
2550 case 'D': fmt_spec = 'd'; length_modifier = 'l'; break;
2551 case 'U': fmt_spec = 'u'; length_modifier = 'l'; break;
2552 case 'O': fmt_spec = 'o'; length_modifier = 'l'; break;
2553 default: break;
2554 }
2555
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002556 // get parameter value, do initial processing
2557 switch (fmt_spec)
2558 {
2559 // '%' and 'c' behave similar to 's' regarding flags and field
2560 // widths
2561 case '%':
2562 return TYPE_PERCENT;
2563
2564 case 'c':
2565 return TYPE_CHAR;
2566
2567 case 's':
2568 case 'S':
2569 return TYPE_STRING;
2570
2571 case 'd': case 'u':
2572 case 'b': case 'B':
2573 case 'o':
2574 case 'x': case 'X':
2575 case 'p':
2576 {
2577 // NOTE: the u, b, o, x, X and p conversion specifiers
2578 // imply the value is unsigned; d implies a signed
2579 // value
2580
2581 // 0 if numeric argument is zero (or if pointer is
2582 // NULL for 'p'), +1 if greater than zero (or nonzero
2583 // for unsigned arguments), -1 if negative (unsigned
2584 // argument is never negative)
2585
2586 if (fmt_spec == 'p')
2587 return TYPE_POINTER;
2588 else if (fmt_spec == 'b' || fmt_spec == 'B')
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002589 return TYPE_UNSIGNEDLONGLONGINT;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002590 else if (fmt_spec == 'd')
2591 {
2592 // signed
2593 switch (length_modifier)
2594 {
2595 case '\0':
2596 case 'h':
2597 // char and short arguments are passed as int.
2598 return TYPE_INT;
2599 case 'l':
2600 return TYPE_LONGINT;
2601 case 'L':
2602 return TYPE_LONGLONGINT;
2603 }
2604 }
2605 else
2606 {
2607 // unsigned
2608 switch (length_modifier)
2609 {
2610 case '\0':
2611 case 'h':
2612 return TYPE_UNSIGNEDINT;
2613 case 'l':
2614 return TYPE_UNSIGNEDLONGINT;
2615 case 'L':
2616 return TYPE_UNSIGNEDLONGLONGINT;
2617 }
2618 }
2619 }
2620 break;
2621
2622 case 'f':
2623 case 'F':
2624 case 'e':
2625 case 'E':
2626 case 'g':
2627 case 'G':
2628 return TYPE_FLOAT;
2629 }
2630
2631 return TYPE_UNKNOWN;
2632}
2633
zeertzjq7772c932023-08-15 22:48:40 +02002634 static char *
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002635format_typename(
2636 const char *type)
2637{
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002638 switch (format_typeof(type))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002639 {
2640 case TYPE_INT:
2641 return _(typename_int);
2642
2643 case TYPE_LONGINT:
2644 return _(typename_longint);
2645
2646 case TYPE_LONGLONGINT:
2647 return _(typename_longlongint);
2648
2649 case TYPE_UNSIGNEDINT:
2650 return _(typename_unsignedint);
2651
2652 case TYPE_UNSIGNEDLONGINT:
2653 return _(typename_unsignedlongint);
2654
2655 case TYPE_UNSIGNEDLONGLONGINT:
2656 return _(typename_unsignedlonglongint);
2657
2658 case TYPE_POINTER:
2659 return _(typename_pointer);
2660
2661 case TYPE_PERCENT:
2662 return _(typename_percent);
2663
2664 case TYPE_CHAR:
2665 return _(typename_char);
2666
2667 case TYPE_STRING:
2668 return _(typename_string);
2669
2670 case TYPE_FLOAT:
2671 return _(typename_float);
2672 }
2673
2674 return _(typename_unknown);
2675}
2676
zeertzjq7772c932023-08-15 22:48:40 +02002677 static int
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002678adjust_types(
2679 const char ***ap_types,
2680 int arg,
2681 int *num_posarg,
2682 const char *type)
2683{
2684 if (*ap_types == NULL || *num_posarg < arg)
2685 {
2686 int idx;
2687 const char **new_types;
2688
2689 if (*ap_types == NULL)
2690 new_types = ALLOC_CLEAR_MULT(const char *, arg);
2691 else
K.Takata4c215ec2023-08-26 18:05:08 +02002692 new_types = vim_realloc((char **)*ap_types,
2693 arg * sizeof(const char *));
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002694
2695 if (new_types == NULL)
2696 return FAIL;
2697
2698 for (idx = *num_posarg; idx < arg; ++idx)
2699 new_types[idx] = NULL;
2700
2701 *ap_types = new_types;
2702 *num_posarg = arg;
2703 }
2704
2705 if ((*ap_types)[arg - 1] != NULL)
2706 {
2707 if ((*ap_types)[arg - 1][0] == '*' || type[0] == '*')
2708 {
2709 const char *pt = type;
2710 if (pt[0] == '*')
2711 pt = (*ap_types)[arg - 1];
2712
2713 if (pt[0] != '*')
2714 {
2715 switch (pt[0])
2716 {
2717 case 'd': case 'i': break;
2718 default:
2719 semsg(_(e_positional_num_field_spec_reused_str_str), arg, format_typename((*ap_types)[arg - 1]), format_typename(type));
2720 return FAIL;
2721 }
2722 }
2723 }
2724 else
2725 {
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02002726 if (format_typeof(type) != format_typeof((*ap_types)[arg - 1]))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002727 {
2728 semsg(_( e_positional_arg_num_type_inconsistent_str_str), arg, format_typename(type), format_typename((*ap_types)[arg - 1]));
2729 return FAIL;
2730 }
2731 }
2732 }
2733
2734 (*ap_types)[arg - 1] = type;
2735
2736 return OK;
2737}
2738
Christ van Willegenc35fc032024-03-14 18:30:41 +01002739 static void
2740format_overflow_error(const char *pstart)
2741{
2742 size_t arglen = 0;
2743 char *argcopy = NULL;
2744 const char *p = pstart;
2745
2746 while (VIM_ISDIGIT((int)(*p)))
2747 ++p;
2748
2749 arglen = p - pstart;
2750 argcopy = ALLOC_CLEAR_MULT(char, arglen + 1);
2751 if (argcopy != NULL)
2752 {
2753 strncpy(argcopy, pstart, arglen);
2754 semsg(_( e_val_too_large), argcopy);
2755 free(argcopy);
2756 }
2757 else
2758 semsg(_(e_out_of_memory_allocating_nr_bytes), arglen);
2759}
2760
2761#define MAX_ALLOWED_STRING_WIDTH 6400
2762
2763 static int
2764get_unsigned_int(
2765 const char *pstart,
2766 const char **p,
zeertzjq0dff3152024-07-29 20:28:14 +02002767 unsigned int *uj,
2768 int overflow_err)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002769{
2770 *uj = **p - '0';
2771 ++*p;
2772
2773 while (VIM_ISDIGIT((int)(**p)) && *uj < MAX_ALLOWED_STRING_WIDTH)
2774 {
2775 *uj = 10 * *uj + (unsigned int)(**p - '0');
2776 ++*p;
2777 }
2778
2779 if (*uj > MAX_ALLOWED_STRING_WIDTH)
2780 {
zeertzjq0dff3152024-07-29 20:28:14 +02002781 if (overflow_err)
2782 {
2783 format_overflow_error(pstart);
2784 return FAIL;
2785 }
2786 else
2787 *uj = MAX_ALLOWED_STRING_WIDTH;
Christ van Willegenc35fc032024-03-14 18:30:41 +01002788 }
2789
2790 return OK;
2791}
2792
2793
zeertzjq7772c932023-08-15 22:48:40 +02002794 static int
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002795parse_fmt_types(
2796 const char ***ap_types,
2797 int *num_posarg,
2798 const char *fmt,
2799 typval_T *tvs UNUSED
2800 )
2801{
2802 const char *p = fmt;
2803 const char *arg = NULL;
2804
2805 int any_pos = 0;
2806 int any_arg = 0;
2807 int arg_idx;
2808
2809#define CHECK_POS_ARG do { \
2810 if (any_pos && any_arg) \
2811 { \
2812 semsg(_( e_cannot_mix_positional_and_non_positional_str), fmt); \
2813 goto error; \
2814 } \
2815} while (0);
2816
2817 if (p == NULL)
2818 return OK;
2819
2820 while (*p != NUL)
2821 {
2822 if (*p != '%')
2823 {
2824 char *q = strchr(p + 1, '%');
2825 size_t n = (q == NULL) ? STRLEN(p) : (size_t)(q - p);
2826
2827 p += n;
2828 }
2829 else
2830 {
2831 // allowed values: \0, h, l, L
2832 char length_modifier = '\0';
2833
2834 // variable for positional arg
2835 int pos_arg = -1;
2836 const char *ptype = NULL;
Christ van Willegenc35fc032024-03-14 18:30:41 +01002837 const char *pstart = p+1;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002838
2839 p++; // skip '%'
2840
2841 // First check to see if we find a positional
2842 // argument specifier
2843 ptype = p;
2844
2845 while (VIM_ISDIGIT(*ptype))
2846 ++ptype;
2847
2848 if (*ptype == '$')
2849 {
2850 if (*p == '0')
2851 {
2852 // 0 flag at the wrong place
2853 semsg(_( e_invalid_format_specifier_str), fmt);
2854 goto error;
2855 }
2856
2857 // Positional argument
Christ van Willegenc35fc032024-03-14 18:30:41 +01002858 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002859
zeertzjq0dff3152024-07-29 20:28:14 +02002860 if (get_unsigned_int(pstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002861 goto error;
2862
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002863 pos_arg = uj;
2864
2865 any_pos = 1;
2866 CHECK_POS_ARG;
2867
2868 ++p;
2869 }
2870
2871 // parse flags
2872 while (*p == '0' || *p == '-' || *p == '+' || *p == ' '
2873 || *p == '#' || *p == '\'')
2874 {
2875 switch (*p)
2876 {
2877 case '0': break;
2878 case '-': break;
2879 case '+': break;
2880 case ' ': // If both the ' ' and '+' flags appear, the ' '
2881 // flag should be ignored
2882 break;
2883 case '#': break;
2884 case '\'': break;
2885 }
2886 p++;
2887 }
2888 // If the '0' and '-' flags both appear, the '0' flag should be
2889 // ignored.
2890
2891 // parse field width
2892 if (*(arg = p) == '*')
2893 {
2894 p++;
2895
2896 if (VIM_ISDIGIT((int)(*p)))
2897 {
2898 // Positional argument field width
Christ van Willegenc35fc032024-03-14 18:30:41 +01002899 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002900
zeertzjq0dff3152024-07-29 20:28:14 +02002901 if (get_unsigned_int(arg + 1, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002902 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002903
2904 if (*p != '$')
2905 {
2906 semsg(_( e_invalid_format_specifier_str), fmt);
2907 goto error;
2908 }
2909 else
2910 {
2911 ++p;
2912 any_pos = 1;
2913 CHECK_POS_ARG;
2914
2915 if (adjust_types(ap_types, uj, num_posarg, arg) == FAIL)
2916 goto error;
2917 }
2918 }
2919 else
2920 {
2921 any_arg = 1;
2922 CHECK_POS_ARG;
2923 }
2924 }
dundargoc580c1fc2023-10-06 19:41:14 +02002925 else if (VIM_ISDIGIT((int)(*p)))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002926 {
2927 // size_t could be wider than unsigned int; make sure we treat
2928 // argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01002929 const char *digstart = p;
2930 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002931
zeertzjq0dff3152024-07-29 20:28:14 +02002932 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002933 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002934
2935 if (*p == '$')
2936 {
2937 semsg(_( e_invalid_format_specifier_str), fmt);
2938 goto error;
2939 }
2940 }
2941
2942 // parse precision
2943 if (*p == '.')
2944 {
2945 p++;
2946
2947 if (*(arg = p) == '*')
2948 {
2949 p++;
2950
2951 if (VIM_ISDIGIT((int)(*p)))
2952 {
2953 // Parse precision
Christ van Willegenc35fc032024-03-14 18:30:41 +01002954 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002955
zeertzjq0dff3152024-07-29 20:28:14 +02002956 if (get_unsigned_int(arg + 1, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002957 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002958
2959 if (*p == '$')
2960 {
2961 any_pos = 1;
2962 CHECK_POS_ARG;
2963
2964 ++p;
2965
2966 if (adjust_types(ap_types, uj, num_posarg, arg) == FAIL)
2967 goto error;
2968 }
2969 else
2970 {
2971 semsg(_( e_invalid_format_specifier_str), fmt);
2972 goto error;
2973 }
2974 }
2975 else
2976 {
2977 any_arg = 1;
2978 CHECK_POS_ARG;
2979 }
2980 }
dundargoc580c1fc2023-10-06 19:41:14 +02002981 else if (VIM_ISDIGIT((int)(*p)))
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002982 {
2983 // size_t could be wider than unsigned int; make sure we
2984 // treat argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01002985 const char *digstart = p;
2986 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002987
zeertzjq0dff3152024-07-29 20:28:14 +02002988 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01002989 goto error;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02002990
2991 if (*p == '$')
2992 {
2993 semsg(_( e_invalid_format_specifier_str), fmt);
2994 goto error;
2995 }
2996 }
2997 }
2998
2999 if (pos_arg != -1)
3000 {
3001 any_pos = 1;
3002 CHECK_POS_ARG;
3003
3004 ptype = p;
3005 }
3006
3007 // parse 'h', 'l' and 'll' length modifiers
3008 if (*p == 'h' || *p == 'l')
3009 {
3010 length_modifier = *p;
3011 p++;
3012 if (length_modifier == 'l' && *p == 'l')
3013 {
3014 // double l = __int64 / varnumber_T
dundargoc580c1fc2023-10-06 19:41:14 +02003015 // length_modifier = 'L';
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003016 p++;
3017 }
3018 }
3019
3020 switch (*p)
3021 {
3022 // Check for known format specifiers. % is special!
3023 case 'i':
3024 case '*':
3025 case 'd':
3026 case 'u':
3027 case 'o':
3028 case 'D':
3029 case 'U':
3030 case 'O':
3031 case 'x':
3032 case 'X':
3033 case 'b':
3034 case 'B':
3035 case 'c':
3036 case 's':
3037 case 'S':
3038 case 'p':
3039 case 'f':
3040 case 'F':
3041 case 'e':
3042 case 'E':
3043 case 'g':
3044 case 'G':
3045 if (pos_arg != -1)
3046 {
3047 if (adjust_types(ap_types, pos_arg, num_posarg, ptype) == FAIL)
3048 goto error;
3049 }
3050 else
3051 {
3052 any_arg = 1;
3053 CHECK_POS_ARG;
3054 }
3055 break;
3056
3057 default:
3058 if (pos_arg != -1)
3059 {
3060 semsg(_( e_cannot_mix_positional_and_non_positional_str), fmt);
3061 goto error;
3062 }
3063 }
3064
3065 if (*p != NUL)
3066 p++; // step over the just processed conversion specifier
3067 }
3068 }
3069
3070 for (arg_idx = 0; arg_idx < *num_posarg; ++arg_idx)
3071 {
3072 if ((*ap_types)[arg_idx] == NULL)
3073 {
3074 semsg(_(e_fmt_arg_nr_unused_str), arg_idx + 1, fmt);
3075 goto error;
3076 }
3077
3078# if defined(FEAT_EVAL)
3079 if (tvs != NULL && tvs[arg_idx].v_type == VAR_UNKNOWN)
3080 {
3081 semsg(_(e_positional_nr_out_of_bounds_str), arg_idx + 1, fmt);
3082 goto error;
3083 }
3084# endif
3085 }
3086
3087 return OK;
3088
3089error:
K.Takata4c215ec2023-08-26 18:05:08 +02003090 vim_free((char**)*ap_types);
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003091 *ap_types = NULL;
3092 *num_posarg = 0;
3093 return FAIL;
3094}
3095
zeertzjq7772c932023-08-15 22:48:40 +02003096 static void
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003097skip_to_arg(
3098 const char **ap_types,
3099 va_list ap_start,
3100 va_list *ap,
3101 int *arg_idx,
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003102 int *arg_cur,
3103 const char *fmt)
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003104{
3105 int arg_min = 0;
3106
3107 if (*arg_cur + 1 == *arg_idx)
3108 {
3109 ++*arg_cur;
3110 ++*arg_idx;
3111 return;
3112 }
3113
3114 if (*arg_cur >= *arg_idx)
3115 {
3116 // Reset ap to ap_start and skip arg_idx - 1 types
3117 va_end(*ap);
3118 va_copy(*ap, ap_start);
3119 }
3120 else
3121 {
3122 // Skip over any we should skip
3123 arg_min = *arg_cur;
3124 }
3125
3126 for (*arg_cur = arg_min; *arg_cur < *arg_idx - 1; ++*arg_cur)
3127 {
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003128 const char *p;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003129
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003130 if (ap_types == NULL || ap_types[*arg_cur] == NULL)
3131 {
Christ van Willegen1bd2cb12023-09-08 19:17:09 +02003132 siemsg(e_aptypes_is_null_nr_str, *arg_cur, fmt);
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003133 return;
3134 }
3135
3136 p = ap_types[*arg_cur];
3137
3138 int fmt_type = format_typeof(p);
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003139
3140 // get parameter value, do initial processing
3141 switch (fmt_type)
3142 {
3143 case TYPE_PERCENT:
3144 case TYPE_UNKNOWN:
3145 break;
3146
3147 case TYPE_CHAR:
3148 va_arg(*ap, int);
3149 break;
3150
3151 case TYPE_STRING:
3152 va_arg(*ap, char *);
3153 break;
3154
3155 case TYPE_POINTER:
3156 va_arg(*ap, void *);
3157 break;
3158
3159 case TYPE_INT:
3160 va_arg(*ap, int);
3161 break;
3162
3163 case TYPE_LONGINT:
3164 va_arg(*ap, long int);
3165 break;
3166
3167 case TYPE_LONGLONGINT:
3168 va_arg(*ap, varnumber_T);
3169 break;
3170
3171 case TYPE_UNSIGNEDINT:
3172 va_arg(*ap, unsigned int);
3173 break;
3174
3175 case TYPE_UNSIGNEDLONGINT:
3176 va_arg(*ap, unsigned long int);
3177 break;
3178
3179 case TYPE_UNSIGNEDLONGLONGINT:
3180 va_arg(*ap, uvarnumber_T);
3181 break;
3182
3183 case TYPE_FLOAT:
3184 va_arg(*ap, double);
3185 break;
3186 }
3187 }
3188
3189 // Because we know that after we return from this call,
3190 // a va_arg() call is made, we can pre-emptively
3191 // increment the current argument index.
3192 ++*arg_cur;
3193 ++*arg_idx;
3194
3195 return;
3196}
3197
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003198 int
3199vim_vsnprintf_typval(
3200 char *str,
3201 size_t str_m,
3202 const char *fmt,
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003203 va_list ap_start,
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003204 typval_T *tvs)
3205{
3206 size_t str_l = 0;
3207 const char *p = fmt;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003208 int arg_cur = 0;
3209 int num_posarg = 0;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003210 int arg_idx = 1;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003211 va_list ap;
3212 const char **ap_types = NULL;
3213
3214 if (parse_fmt_types(&ap_types, &num_posarg, fmt, tvs) == FAIL)
3215 return 0;
3216
3217 va_copy(ap, ap_start);
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003218
3219 if (p == NULL)
3220 p = "";
3221 while (*p != NUL)
3222 {
3223 if (*p != '%')
3224 {
3225 char *q = strchr(p + 1, '%');
3226 size_t n = (q == NULL) ? STRLEN(p) : (size_t)(q - p);
3227
3228 // Copy up to the next '%' or NUL without any changes.
3229 if (str_l < str_m)
3230 {
3231 size_t avail = str_m - str_l;
3232
3233 mch_memmove(str + str_l, p, n > avail ? avail : n);
3234 }
3235 p += n;
3236 str_l += n;
3237 }
3238 else
3239 {
3240 size_t min_field_width = 0, precision = 0;
3241 int zero_padding = 0, precision_specified = 0, justify_left = 0;
3242 int alternate_form = 0, force_sign = 0;
3243
3244 // If both the ' ' and '+' flags appear, the ' ' flag should be
3245 // ignored.
3246 int space_for_positive = 1;
3247
3248 // allowed values: \0, h, l, L
3249 char length_modifier = '\0';
3250
3251 // temporary buffer for simple numeric->string conversion
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003252# define TMP_LEN 350 // On my system 1e308 is the biggest number possible.
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003253 // That sounds reasonable to use as the maximum
3254 // printable.
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003255 char tmp[TMP_LEN];
3256
3257 // string address in case of string argument
3258 const char *str_arg = NULL;
3259
3260 // natural field width of arg without padding and sign
3261 size_t str_arg_l;
3262
3263 // unsigned char argument value - only defined for c conversion.
3264 // N.B. standard explicitly states the char argument for the c
3265 // conversion is unsigned
3266 unsigned char uchar_arg;
3267
3268 // number of zeros to be inserted for numeric conversions as
3269 // required by the precision or minimal field width
3270 size_t number_of_zeros_to_pad = 0;
3271
3272 // index into tmp where zero padding is to be inserted
3273 size_t zero_padding_insertion_ind = 0;
3274
3275 // current conversion specifier character
3276 char fmt_spec = '\0';
3277
3278 // buffer for 's' and 'S' specs
3279 char_u *tofree = NULL;
3280
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003281 // variables for positional arg
3282 int pos_arg = -1;
3283 const char *ptype;
3284
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003285
3286 p++; // skip '%'
3287
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003288 // First check to see if we find a positional
3289 // argument specifier
3290 ptype = p;
3291
3292 while (VIM_ISDIGIT(*ptype))
3293 ++ptype;
3294
3295 if (*ptype == '$')
3296 {
3297 // Positional argument
Christ van Willegenc35fc032024-03-14 18:30:41 +01003298 const char *digstart = p;
3299 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003300
zeertzjq0dff3152024-07-29 20:28:14 +02003301 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003302 goto error;
3303
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003304 pos_arg = uj;
3305
3306 ++p;
3307 }
3308
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003309 // parse flags
3310 while (*p == '0' || *p == '-' || *p == '+' || *p == ' '
3311 || *p == '#' || *p == '\'')
3312 {
3313 switch (*p)
3314 {
3315 case '0': zero_padding = 1; break;
3316 case '-': justify_left = 1; break;
3317 case '+': force_sign = 1; space_for_positive = 0; break;
3318 case ' ': force_sign = 1;
3319 // If both the ' ' and '+' flags appear, the ' '
3320 // flag should be ignored
3321 break;
3322 case '#': alternate_form = 1; break;
3323 case '\'': break;
3324 }
3325 p++;
3326 }
3327 // If the '0' and '-' flags both appear, the '0' flag should be
3328 // ignored.
3329
3330 // parse field width
3331 if (*p == '*')
3332 {
3333 int j;
Christ van Willegenc35fc032024-03-14 18:30:41 +01003334 const char *digstart = p + 1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003335
3336 p++;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003337
3338 if (VIM_ISDIGIT((int)(*p)))
3339 {
3340 // Positional argument field width
Christ van Willegenc35fc032024-03-14 18:30:41 +01003341 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003342
zeertzjq0dff3152024-07-29 20:28:14 +02003343 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003344 goto error;
3345
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003346 arg_idx = uj;
3347
3348 ++p;
3349 }
3350
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003351 j =
3352# if defined(FEAT_EVAL)
3353 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3354# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003355 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3356 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003357 va_arg(ap, int));
3358
Christ van Willegenc35fc032024-03-14 18:30:41 +01003359 if (j > MAX_ALLOWED_STRING_WIDTH)
3360 {
zeertzjq0dff3152024-07-29 20:28:14 +02003361 if (tvs != NULL)
3362 {
3363 format_overflow_error(digstart);
3364 goto error;
3365 }
3366 else
3367 j = MAX_ALLOWED_STRING_WIDTH;
Christ van Willegenc35fc032024-03-14 18:30:41 +01003368 }
3369
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003370 if (j >= 0)
3371 min_field_width = j;
3372 else
3373 {
3374 min_field_width = -j;
3375 justify_left = 1;
3376 }
3377 }
3378 else if (VIM_ISDIGIT((int)(*p)))
3379 {
3380 // size_t could be wider than unsigned int; make sure we treat
3381 // argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01003382 const char *digstart = p;
3383 unsigned int uj;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003384
zeertzjq0dff3152024-07-29 20:28:14 +02003385 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003386 goto error;
3387
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003388 min_field_width = uj;
3389 }
3390
3391 // parse precision
3392 if (*p == '.')
3393 {
3394 p++;
3395 precision_specified = 1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003396
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003397 if (VIM_ISDIGIT((int)(*p)))
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003398 {
3399 // size_t could be wider than unsigned int; make sure we
3400 // treat argument like common implementations do
Christ van Willegenc35fc032024-03-14 18:30:41 +01003401 const char *digstart = p;
3402 unsigned int uj;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003403
zeertzjq0dff3152024-07-29 20:28:14 +02003404 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003405 goto error;
3406
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003407 precision = uj;
3408 }
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003409 else if (*p == '*')
3410 {
3411 int j;
Christ van Willegenc35fc032024-03-14 18:30:41 +01003412 const char *digstart = p;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003413
3414 p++;
3415
3416 if (VIM_ISDIGIT((int)(*p)))
3417 {
3418 // positional argument
Christ van Willegenc35fc032024-03-14 18:30:41 +01003419 unsigned int uj;
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003420
zeertzjq0dff3152024-07-29 20:28:14 +02003421 if (get_unsigned_int(digstart, &p, &uj, tvs != NULL) == FAIL)
Christ van Willegenc35fc032024-03-14 18:30:41 +01003422 goto error;
3423
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003424 arg_idx = uj;
3425
3426 ++p;
3427 }
3428
3429 j =
3430# if defined(FEAT_EVAL)
3431 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3432# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003433 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3434 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003435 va_arg(ap, int));
3436
Christ van Willegenc35fc032024-03-14 18:30:41 +01003437 if (j > MAX_ALLOWED_STRING_WIDTH)
3438 {
zeertzjq0dff3152024-07-29 20:28:14 +02003439 if (tvs != NULL)
3440 {
3441 format_overflow_error(digstart);
3442 goto error;
3443 }
3444 else
3445 j = MAX_ALLOWED_STRING_WIDTH;
Christ van Willegenc35fc032024-03-14 18:30:41 +01003446 }
3447
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003448 if (j >= 0)
3449 precision = j;
3450 else
3451 {
3452 precision_specified = 0;
3453 precision = 0;
3454 }
3455 }
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003456 }
3457
3458 // parse 'h', 'l' and 'll' length modifiers
3459 if (*p == 'h' || *p == 'l')
3460 {
3461 length_modifier = *p;
3462 p++;
3463 if (length_modifier == 'l' && *p == 'l')
3464 {
3465 // double l = __int64 / varnumber_T
3466 length_modifier = 'L';
3467 p++;
3468 }
3469 }
3470 fmt_spec = *p;
3471
3472 // common synonyms:
3473 switch (fmt_spec)
3474 {
3475 case 'i': fmt_spec = 'd'; break;
3476 case 'D': fmt_spec = 'd'; length_modifier = 'l'; break;
3477 case 'U': fmt_spec = 'u'; length_modifier = 'l'; break;
3478 case 'O': fmt_spec = 'o'; length_modifier = 'l'; break;
3479 default: break;
3480 }
3481
3482# if defined(FEAT_EVAL)
3483 switch (fmt_spec)
3484 {
3485 case 'd': case 'u': case 'o': case 'x': case 'X':
3486 if (tvs != NULL && length_modifier == '\0')
3487 length_modifier = 'L';
3488 }
3489# endif
3490
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003491 if (pos_arg != -1)
3492 arg_idx = pos_arg;
3493
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003494 // get parameter value, do initial processing
3495 switch (fmt_spec)
3496 {
3497 // '%' and 'c' behave similar to 's' regarding flags and field
3498 // widths
3499 case '%':
3500 case 'c':
3501 case 's':
3502 case 'S':
3503 str_arg_l = 1;
3504 switch (fmt_spec)
3505 {
3506 case '%':
3507 str_arg = p;
3508 break;
3509
3510 case 'c':
3511 {
3512 int j;
3513
3514 j =
3515# if defined(FEAT_EVAL)
3516 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3517# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003518 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3519 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003520 va_arg(ap, int));
3521
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003522 // standard demands unsigned char
3523 uchar_arg = (unsigned char)j;
3524 str_arg = (char *)&uchar_arg;
3525 break;
3526 }
3527
3528 case 's':
3529 case 'S':
3530 str_arg =
3531# if defined(FEAT_EVAL)
3532 tvs != NULL ? tv_str(tvs, &arg_idx, &tofree) :
3533# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003534 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3535 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003536 va_arg(ap, char *));
3537
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003538 if (str_arg == NULL)
3539 {
3540 str_arg = "[NULL]";
3541 str_arg_l = 6;
3542 }
3543 // make sure not to address string beyond the specified
3544 // precision !!!
3545 else if (!precision_specified)
3546 str_arg_l = strlen(str_arg);
3547 // truncate string if necessary as requested by precision
3548 else if (precision == 0)
3549 str_arg_l = 0;
3550 else
3551 {
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003552 // memchr on HP does not like n > 2^31 !!!
3553 char *q = memchr(str_arg, '\0',
3554 precision <= (size_t)0x7fffffffL ? precision
3555 : (size_t)0x7fffffffL);
presukud85fccd2021-11-20 19:38:31 +00003556
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003557 str_arg_l = (q == NULL) ? precision
3558 : (size_t)(q - str_arg);
3559 }
3560 if (fmt_spec == 'S')
3561 {
presuku1f2453f2021-11-24 15:32:57 +00003562 char_u *p1;
3563 size_t i;
3564 int cell;
presukud85fccd2021-11-20 19:38:31 +00003565
presuku1f2453f2021-11-24 15:32:57 +00003566 for (i = 0, p1 = (char_u *)str_arg; *p1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003567 p1 += mb_ptr2len(p1))
presuku1f2453f2021-11-24 15:32:57 +00003568 {
3569 cell = mb_ptr2cells(p1);
3570 if (precision_specified && i + cell > precision)
3571 break;
3572 i += cell;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003573 }
presuku1f2453f2021-11-24 15:32:57 +00003574
3575 str_arg_l = p1 - (char_u *)str_arg;
presukud85fccd2021-11-20 19:38:31 +00003576 if (min_field_width != 0)
presuku1f2453f2021-11-24 15:32:57 +00003577 min_field_width += str_arg_l - i;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003578 }
3579 break;
3580
3581 default:
3582 break;
3583 }
3584 break;
3585
3586 case 'd': case 'u':
3587 case 'b': case 'B':
3588 case 'o':
3589 case 'x': case 'X':
3590 case 'p':
3591 {
3592 // NOTE: the u, b, o, x, X and p conversion specifiers
3593 // imply the value is unsigned; d implies a signed
3594 // value
3595
3596 // 0 if numeric argument is zero (or if pointer is
3597 // NULL for 'p'), +1 if greater than zero (or nonzero
3598 // for unsigned arguments), -1 if negative (unsigned
3599 // argument is never negative)
3600 int arg_sign = 0;
3601
3602 // only set for length modifier h, or for no length
3603 // modifiers
3604 int int_arg = 0;
3605 unsigned int uint_arg = 0;
3606
3607 // only set for length modifier l
3608 long int long_arg = 0;
3609 unsigned long int ulong_arg = 0;
3610
3611 // only set for length modifier ll
3612 varnumber_T llong_arg = 0;
3613 uvarnumber_T ullong_arg = 0;
3614
3615 // only set for b conversion
3616 uvarnumber_T bin_arg = 0;
3617
3618 // pointer argument value -only defined for p
3619 // conversion
3620 void *ptr_arg = NULL;
3621
3622 if (fmt_spec == 'p')
3623 {
3624 length_modifier = '\0';
3625 ptr_arg =
3626# if defined(FEAT_EVAL)
3627 tvs != NULL ? (void *)tv_str(tvs, &arg_idx,
3628 NULL) :
3629# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003630 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3631 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003632 va_arg(ap, void *));
3633
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003634 if (ptr_arg != NULL)
3635 arg_sign = 1;
3636 }
3637 else if (fmt_spec == 'b' || fmt_spec == 'B')
3638 {
3639 bin_arg =
3640# if defined(FEAT_EVAL)
3641 tvs != NULL ?
3642 (uvarnumber_T)tv_nr(tvs, &arg_idx) :
3643# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003644 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3645 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003646 va_arg(ap, uvarnumber_T));
3647
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003648 if (bin_arg != 0)
3649 arg_sign = 1;
3650 }
3651 else if (fmt_spec == 'd')
3652 {
3653 // signed
3654 switch (length_modifier)
3655 {
3656 case '\0':
3657 case 'h':
3658 // char and short arguments are passed as int.
3659 int_arg =
3660# if defined(FEAT_EVAL)
3661 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3662# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003663 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3664 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003665 va_arg(ap, int));
3666
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003667 if (int_arg > 0)
3668 arg_sign = 1;
3669 else if (int_arg < 0)
3670 arg_sign = -1;
3671 break;
3672 case 'l':
3673 long_arg =
3674# if defined(FEAT_EVAL)
3675 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3676# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003677 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3678 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003679 va_arg(ap, long int));
3680
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003681 if (long_arg > 0)
3682 arg_sign = 1;
3683 else if (long_arg < 0)
3684 arg_sign = -1;
3685 break;
3686 case 'L':
3687 llong_arg =
3688# if defined(FEAT_EVAL)
3689 tvs != NULL ? tv_nr(tvs, &arg_idx) :
3690# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003691 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3692 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003693 va_arg(ap, varnumber_T));
3694
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003695 if (llong_arg > 0)
3696 arg_sign = 1;
3697 else if (llong_arg < 0)
3698 arg_sign = -1;
3699 break;
3700 }
3701 }
3702 else
3703 {
3704 // unsigned
3705 switch (length_modifier)
3706 {
3707 case '\0':
3708 case 'h':
3709 uint_arg =
3710# if defined(FEAT_EVAL)
3711 tvs != NULL ? (unsigned)
3712 tv_nr(tvs, &arg_idx) :
3713# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003714 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3715 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003716 va_arg(ap, unsigned int));
3717
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003718 if (uint_arg != 0)
3719 arg_sign = 1;
3720 break;
3721 case 'l':
3722 ulong_arg =
3723# if defined(FEAT_EVAL)
3724 tvs != NULL ? (unsigned long)
3725 tv_nr(tvs, &arg_idx) :
3726# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003727 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3728 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003729 va_arg(ap, unsigned long int));
3730
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003731 if (ulong_arg != 0)
3732 arg_sign = 1;
3733 break;
3734 case 'L':
3735 ullong_arg =
3736# if defined(FEAT_EVAL)
3737 tvs != NULL ? (uvarnumber_T)
3738 tv_nr(tvs, &arg_idx) :
3739# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003740 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3741 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003742 va_arg(ap, uvarnumber_T));
3743
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003744 if (ullong_arg != 0)
3745 arg_sign = 1;
3746 break;
3747 }
3748 }
3749
3750 str_arg = tmp;
3751 str_arg_l = 0;
3752
3753 // NOTE:
3754 // For d, i, u, o, x, and X conversions, if precision is
3755 // specified, the '0' flag should be ignored. This is so
3756 // with Solaris 2.6, Digital UNIX 4.0, HPUX 10, Linux,
3757 // FreeBSD, NetBSD; but not with Perl.
3758 if (precision_specified)
3759 zero_padding = 0;
3760 if (fmt_spec == 'd')
3761 {
3762 if (force_sign && arg_sign >= 0)
3763 tmp[str_arg_l++] = space_for_positive ? ' ' : '+';
3764 // leave negative numbers for sprintf to handle, to
3765 // avoid handling tricky cases like (short int)-32768
3766 }
3767 else if (alternate_form)
3768 {
3769 if (arg_sign != 0
3770 && (fmt_spec == 'b' || fmt_spec == 'B'
3771 || fmt_spec == 'x' || fmt_spec == 'X') )
3772 {
3773 tmp[str_arg_l++] = '0';
3774 tmp[str_arg_l++] = fmt_spec;
3775 }
3776 // alternate form should have no effect for p
3777 // conversion, but ...
3778 }
3779
3780 zero_padding_insertion_ind = str_arg_l;
3781 if (!precision_specified)
3782 precision = 1; // default precision is 1
3783 if (precision == 0 && arg_sign == 0)
3784 {
3785 // When zero value is formatted with an explicit
3786 // precision 0, the resulting formatted string is
3787 // empty (d, i, u, b, B, o, x, X, p).
3788 }
3789 else
3790 {
3791 char f[6];
3792 int f_l = 0;
3793
3794 // construct a simple format string for sprintf
3795 f[f_l++] = '%';
3796 if (!length_modifier)
3797 ;
3798 else if (length_modifier == 'L')
3799 {
3800# ifdef MSWIN
3801 f[f_l++] = 'I';
3802 f[f_l++] = '6';
3803 f[f_l++] = '4';
3804# else
3805 f[f_l++] = 'l';
3806 f[f_l++] = 'l';
3807# endif
3808 }
3809 else
3810 f[f_l++] = length_modifier;
3811 f[f_l++] = fmt_spec;
3812 f[f_l++] = '\0';
3813
3814 if (fmt_spec == 'p')
3815 str_arg_l += sprintf(tmp + str_arg_l, f, ptr_arg);
3816 else if (fmt_spec == 'b' || fmt_spec == 'B')
3817 {
3818 char b[8 * sizeof(uvarnumber_T)];
3819 size_t b_l = 0;
3820 uvarnumber_T bn = bin_arg;
3821
3822 do
3823 {
3824 b[sizeof(b) - ++b_l] = '0' + (bn & 0x1);
3825 bn >>= 1;
3826 }
3827 while (bn != 0);
3828
3829 memcpy(tmp + str_arg_l, b + sizeof(b) - b_l, b_l);
3830 str_arg_l += b_l;
3831 }
3832 else if (fmt_spec == 'd')
3833 {
3834 // signed
3835 switch (length_modifier)
3836 {
3837 case '\0': str_arg_l += sprintf(
3838 tmp + str_arg_l, f,
3839 int_arg);
3840 break;
3841 case 'h': str_arg_l += sprintf(
3842 tmp + str_arg_l, f,
3843 (short)int_arg);
3844 break;
3845 case 'l': str_arg_l += sprintf(
3846 tmp + str_arg_l, f, long_arg);
3847 break;
3848 case 'L': str_arg_l += sprintf(
3849 tmp + str_arg_l, f, llong_arg);
3850 break;
3851 }
3852 }
3853 else
3854 {
3855 // unsigned
3856 switch (length_modifier)
3857 {
3858 case '\0': str_arg_l += sprintf(
3859 tmp + str_arg_l, f,
3860 uint_arg);
3861 break;
3862 case 'h': str_arg_l += sprintf(
3863 tmp + str_arg_l, f,
3864 (unsigned short)uint_arg);
3865 break;
3866 case 'l': str_arg_l += sprintf(
3867 tmp + str_arg_l, f, ulong_arg);
3868 break;
3869 case 'L': str_arg_l += sprintf(
3870 tmp + str_arg_l, f, ullong_arg);
3871 break;
3872 }
3873 }
3874
3875 // include the optional minus sign and possible
3876 // "0x" in the region before the zero padding
3877 // insertion point
3878 if (zero_padding_insertion_ind < str_arg_l
3879 && tmp[zero_padding_insertion_ind] == '-')
3880 zero_padding_insertion_ind++;
3881 if (zero_padding_insertion_ind + 1 < str_arg_l
3882 && tmp[zero_padding_insertion_ind] == '0'
3883 && (tmp[zero_padding_insertion_ind + 1] == 'x'
3884 || tmp[zero_padding_insertion_ind + 1] == 'X'))
3885 zero_padding_insertion_ind += 2;
3886 }
3887
3888 {
3889 size_t num_of_digits = str_arg_l
3890 - zero_padding_insertion_ind;
3891
3892 if (alternate_form && fmt_spec == 'o'
3893 // unless zero is already the first
3894 // character
3895 && !(zero_padding_insertion_ind < str_arg_l
3896 && tmp[zero_padding_insertion_ind] == '0'))
3897 {
3898 // assure leading zero for alternate-form
3899 // octal numbers
3900 if (!precision_specified
3901 || precision < num_of_digits + 1)
3902 {
3903 // precision is increased to force the
3904 // first character to be zero, except if a
3905 // zero value is formatted with an
3906 // explicit precision of zero
3907 precision = num_of_digits + 1;
3908 }
3909 }
3910 // zero padding to specified precision?
3911 if (num_of_digits < precision)
3912 number_of_zeros_to_pad = precision - num_of_digits;
3913 }
3914 // zero padding to specified minimal field width?
3915 if (!justify_left && zero_padding)
3916 {
3917 int n = (int)(min_field_width - (str_arg_l
3918 + number_of_zeros_to_pad));
3919 if (n > 0)
3920 number_of_zeros_to_pad += n;
3921 }
3922 break;
3923 }
3924
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003925 case 'f':
3926 case 'F':
3927 case 'e':
3928 case 'E':
3929 case 'g':
3930 case 'G':
3931 {
3932 // Floating point.
3933 double f;
3934 double abs_f;
3935 char format[40];
3936 int l;
3937 int remove_trailing_zeroes = FALSE;
3938
3939 f =
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003940# if defined(FEAT_EVAL)
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003941 tvs != NULL ? tv_float(tvs, &arg_idx) :
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003942# endif
Christ van Willegenaa90d4f2023-09-03 17:22:37 +02003943 (skip_to_arg(ap_types, ap_start, &ap, &arg_idx,
3944 &arg_cur, fmt),
Christ van Willegen0c6181f2023-08-13 18:03:14 +02003945 va_arg(ap, double));
3946
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003947 abs_f = f < 0 ? -f : f;
3948
3949 if (fmt_spec == 'g' || fmt_spec == 'G')
3950 {
3951 // Would be nice to use %g directly, but it prints
3952 // "1.0" as "1", we don't want that.
3953 if ((abs_f >= 0.001 && abs_f < 10000000.0)
3954 || abs_f == 0.0)
3955 fmt_spec = ASCII_ISUPPER(fmt_spec) ? 'F' : 'f';
3956 else
3957 fmt_spec = fmt_spec == 'g' ? 'e' : 'E';
3958 remove_trailing_zeroes = TRUE;
3959 }
3960
3961 if ((fmt_spec == 'f' || fmt_spec == 'F') &&
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003962# ifdef VAX
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003963 abs_f > 1.0e38
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003964# else
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003965 abs_f > 1.0e307
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01003966# endif
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02003967 )
3968 {
3969 // Avoid a buffer overflow
3970 STRCPY(tmp, infinity_str(f > 0.0, fmt_spec,
3971 force_sign, space_for_positive));
3972 str_arg_l = STRLEN(tmp);
3973 zero_padding = 0;
3974 }
3975 else
3976 {
3977 if (isnan(f))
3978 {
3979 // Not a number: nan or NAN
3980 STRCPY(tmp, ASCII_ISUPPER(fmt_spec) ? "NAN"
3981 : "nan");
3982 str_arg_l = 3;
3983 zero_padding = 0;
3984 }
3985 else if (isinf(f))
3986 {
3987 STRCPY(tmp, infinity_str(f > 0.0, fmt_spec,
3988 force_sign, space_for_positive));
3989 str_arg_l = STRLEN(tmp);
3990 zero_padding = 0;
3991 }
3992 else
3993 {
3994 // Regular float number
3995 format[0] = '%';
3996 l = 1;
3997 if (force_sign)
3998 format[l++] = space_for_positive ? ' ' : '+';
3999 if (precision_specified)
4000 {
4001 size_t max_prec = TMP_LEN - 10;
4002
4003 // Make sure we don't get more digits than we
4004 // have room for.
4005 if ((fmt_spec == 'f' || fmt_spec == 'F')
4006 && abs_f > 1.0)
4007 max_prec -= (size_t)log10(abs_f);
4008 if (precision > max_prec)
4009 precision = max_prec;
4010 l += sprintf(format + l, ".%d", (int)precision);
4011 }
4012 format[l] = fmt_spec == 'F' ? 'f' : fmt_spec;
4013 format[l + 1] = NUL;
4014
4015 str_arg_l = sprintf(tmp, format, f);
4016 }
4017
4018 if (remove_trailing_zeroes)
4019 {
4020 int i;
4021 char *tp;
4022
4023 // Using %g or %G: remove superfluous zeroes.
4024 if (fmt_spec == 'f' || fmt_spec == 'F')
4025 tp = tmp + str_arg_l - 1;
4026 else
4027 {
4028 tp = (char *)vim_strchr((char_u *)tmp,
4029 fmt_spec == 'e' ? 'e' : 'E');
4030 if (tp != NULL)
4031 {
4032 // Remove superfluous '+' and leading
4033 // zeroes from the exponent.
4034 if (tp[1] == '+')
4035 {
4036 // Change "1.0e+07" to "1.0e07"
4037 STRMOVE(tp + 1, tp + 2);
4038 --str_arg_l;
4039 }
4040 i = (tp[1] == '-') ? 2 : 1;
4041 while (tp[i] == '0')
4042 {
4043 // Change "1.0e07" to "1.0e7"
4044 STRMOVE(tp + i, tp + i + 1);
4045 --str_arg_l;
4046 }
4047 --tp;
4048 }
4049 }
4050
4051 if (tp != NULL && !precision_specified)
4052 // Remove trailing zeroes, but keep the one
4053 // just after a dot.
4054 while (tp > tmp + 2 && *tp == '0'
4055 && tp[-1] != '.')
4056 {
4057 STRMOVE(tp, tp + 1);
4058 --tp;
4059 --str_arg_l;
4060 }
4061 }
4062 else
4063 {
4064 char *tp;
4065
4066 // Be consistent: some printf("%e") use 1.0e+12
4067 // and some 1.0e+012. Remove one zero in the last
4068 // case.
4069 tp = (char *)vim_strchr((char_u *)tmp,
4070 fmt_spec == 'e' ? 'e' : 'E');
4071 if (tp != NULL && (tp[1] == '+' || tp[1] == '-')
4072 && tp[2] == '0'
4073 && vim_isdigit(tp[3])
4074 && vim_isdigit(tp[4]))
4075 {
4076 STRMOVE(tp + 2, tp + 3);
4077 --str_arg_l;
4078 }
4079 }
4080 }
4081 if (zero_padding && min_field_width > str_arg_l
4082 && (tmp[0] == '-' || force_sign))
4083 {
4084 // padding 0's should be inserted after the sign
4085 number_of_zeros_to_pad = min_field_width - str_arg_l;
4086 zero_padding_insertion_ind = 1;
4087 }
4088 str_arg = tmp;
4089 break;
4090 }
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02004091
4092 default:
4093 // unrecognized conversion specifier, keep format string
4094 // as-is
4095 zero_padding = 0; // turn zero padding off for non-numeric
4096 // conversion
4097 justify_left = 1;
4098 min_field_width = 0; // reset flags
4099
4100 // discard the unrecognized conversion, just keep *
4101 // the unrecognized conversion character
4102 str_arg = p;
4103 str_arg_l = 0;
4104 if (*p != NUL)
4105 str_arg_l++; // include invalid conversion specifier
4106 // unchanged if not at end-of-string
4107 break;
4108 }
4109
4110 if (*p != NUL)
4111 p++; // step over the just processed conversion specifier
4112
4113 // insert padding to the left as requested by min_field_width;
4114 // this does not include the zero padding in case of numerical
4115 // conversions
4116 if (!justify_left)
4117 {
4118 // left padding with blank or zero
4119 int pn = (int)(min_field_width - (str_arg_l + number_of_zeros_to_pad));
4120
4121 if (pn > 0)
4122 {
4123 if (str_l < str_m)
4124 {
4125 size_t avail = str_m - str_l;
4126
4127 vim_memset(str + str_l, zero_padding ? '0' : ' ',
4128 (size_t)pn > avail ? avail
4129 : (size_t)pn);
4130 }
4131 str_l += pn;
4132 }
4133 }
4134
4135 // zero padding as requested by the precision or by the minimal
4136 // field width for numeric conversions required?
4137 if (number_of_zeros_to_pad == 0)
4138 {
4139 // will not copy first part of numeric right now, *
4140 // force it to be copied later in its entirety
4141 zero_padding_insertion_ind = 0;
4142 }
4143 else
4144 {
4145 // insert first part of numerics (sign or '0x') before zero
4146 // padding
4147 int zn = (int)zero_padding_insertion_ind;
4148
4149 if (zn > 0)
4150 {
4151 if (str_l < str_m)
4152 {
4153 size_t avail = str_m - str_l;
4154
4155 mch_memmove(str + str_l, str_arg,
4156 (size_t)zn > avail ? avail
4157 : (size_t)zn);
4158 }
4159 str_l += zn;
4160 }
4161
4162 // insert zero padding as requested by the precision or min
4163 // field width
4164 zn = (int)number_of_zeros_to_pad;
4165 if (zn > 0)
4166 {
4167 if (str_l < str_m)
4168 {
4169 size_t avail = str_m - str_l;
4170
4171 vim_memset(str + str_l, '0',
4172 (size_t)zn > avail ? avail
4173 : (size_t)zn);
4174 }
4175 str_l += zn;
4176 }
4177 }
4178
4179 // insert formatted string
4180 // (or as-is conversion specifier for unknown conversions)
4181 {
4182 int sn = (int)(str_arg_l - zero_padding_insertion_ind);
4183
4184 if (sn > 0)
4185 {
4186 if (str_l < str_m)
4187 {
4188 size_t avail = str_m - str_l;
4189
4190 mch_memmove(str + str_l,
4191 str_arg + zero_padding_insertion_ind,
4192 (size_t)sn > avail ? avail : (size_t)sn);
4193 }
4194 str_l += sn;
4195 }
4196 }
4197
4198 // insert right padding
4199 if (justify_left)
4200 {
4201 // right blank padding to the field width
4202 int pn = (int)(min_field_width
4203 - (str_arg_l + number_of_zeros_to_pad));
4204
4205 if (pn > 0)
4206 {
4207 if (str_l < str_m)
4208 {
4209 size_t avail = str_m - str_l;
4210
4211 vim_memset(str + str_l, ' ',
4212 (size_t)pn > avail ? avail
4213 : (size_t)pn);
4214 }
4215 str_l += pn;
4216 }
4217 }
4218 vim_free(tofree);
4219 }
4220 }
4221
4222 if (str_m > 0)
4223 {
4224 // make sure the string is nul-terminated even at the expense of
4225 // overwriting the last character (shouldn't happen, but just in case)
4226 //
4227 str[str_l <= str_m - 1 ? str_l : str_m - 1] = '\0';
4228 }
4229
Christ van Willegen0c6181f2023-08-13 18:03:14 +02004230 if (tvs != NULL && tvs[num_posarg != 0 ? num_posarg : arg_idx - 1].v_type != VAR_UNKNOWN)
Bram Moolenaar677658a2022-01-05 16:09:06 +00004231 emsg(_(e_too_many_arguments_to_printf));
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02004232
Christ van Willegenc35fc032024-03-14 18:30:41 +01004233error:
K.Takata4c215ec2023-08-26 18:05:08 +02004234 vim_free((char*)ap_types);
Christ van Willegen0c6181f2023-08-13 18:03:14 +02004235 va_end(ap);
4236
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02004237 // Return the number of characters formatted (excluding trailing nul
4238 // character), that is, the number of characters that would have been
4239 // written to the buffer if it were large enough.
4240 return (int)str_l;
4241}
4242
4243#endif // PROTO