blob: 7a99cd96425fdbc63785dc4076c1079c05a523e9 [file] [log] [blame]
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001/* vi:set ts=8 sts=4 sw=4 noet:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * strings.c: string manipulation functions
12 */
13
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +020014#define USING_FLOAT_STUFF
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020015#include "vim.h"
16
17/*
18 * Copy "string" into newly allocated memory.
19 */
20 char_u *
21vim_strsave(char_u *string)
22{
23 char_u *p;
24 size_t len;
25
26 len = STRLEN(string) + 1;
27 p = alloc(len);
28 if (p != NULL)
29 mch_memmove(p, string, len);
30 return p;
31}
32
33/*
34 * Copy up to "len" bytes of "string" into newly allocated memory and
35 * terminate with a NUL.
36 * The allocated memory always has size "len + 1", also when "string" is
37 * shorter.
38 */
39 char_u *
40vim_strnsave(char_u *string, size_t len)
41{
42 char_u *p;
43
44 p = alloc(len + 1);
45 if (p != NULL)
46 {
47 STRNCPY(p, string, len);
48 p[len] = NUL;
49 }
50 return p;
51}
52
53/*
54 * Same as vim_strsave(), but any characters found in esc_chars are preceded
55 * by a backslash.
56 */
57 char_u *
58vim_strsave_escaped(char_u *string, char_u *esc_chars)
59{
60 return vim_strsave_escaped_ext(string, esc_chars, '\\', FALSE);
61}
62
63/*
64 * Same as vim_strsave_escaped(), but when "bsl" is TRUE also escape
65 * characters where rem_backslash() would remove the backslash.
66 * Escape the characters with "cc".
67 */
68 char_u *
69vim_strsave_escaped_ext(
70 char_u *string,
71 char_u *esc_chars,
72 int cc,
73 int bsl)
74{
75 char_u *p;
76 char_u *p2;
77 char_u *escaped_string;
78 unsigned length;
79 int l;
80
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +020081 // First count the number of backslashes required.
82 // Then allocate the memory and insert them.
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020083 length = 1; // count the trailing NUL
84 for (p = string; *p; p++)
85 {
86 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
87 {
88 length += l; // count a multibyte char
89 p += l - 1;
90 continue;
91 }
92 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
93 ++length; // count a backslash
94 ++length; // count an ordinary char
95 }
96 escaped_string = alloc(length);
97 if (escaped_string != NULL)
98 {
99 p2 = escaped_string;
100 for (p = string; *p; p++)
101 {
102 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
103 {
104 mch_memmove(p2, p, (size_t)l);
105 p2 += l;
106 p += l - 1; // skip multibyte char
107 continue;
108 }
109 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
110 *p2++ = cc;
111 *p2++ = *p;
112 }
113 *p2 = NUL;
114 }
115 return escaped_string;
116}
117
118/*
119 * Return TRUE when 'shell' has "csh" in the tail.
120 */
121 int
122csh_like_shell(void)
123{
124 return (strstr((char *)gettail(p_sh), "csh") != NULL);
125}
126
127/*
Jason Cox6e823512021-08-29 12:36:49 +0200128 * Return TRUE when 'shell' has "fish" in the tail.
129 */
Dominique Pellede05ae72021-08-30 19:57:34 +0200130 static int
Jason Cox6e823512021-08-29 12:36:49 +0200131fish_like_shell(void)
132{
133 return (strstr((char *)gettail(p_sh), "fish") != NULL);
134}
135
136/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200137 * Escape "string" for use as a shell argument with system().
138 * This uses single quotes, except when we know we need to use double quotes
139 * (MS-DOS and MS-Windows not using PowerShell and without 'shellslash' set).
140 * PowerShell also uses a novel escaping for enclosed single quotes - double
141 * them up.
142 * Escape a newline, depending on the 'shell' option.
143 * When "do_special" is TRUE also replace "!", "%", "#" and things starting
144 * with "<" like "<cfile>".
145 * When "do_newline" is FALSE do not escape newline unless it is csh shell.
146 * Returns the result in allocated memory, NULL if we have run out.
147 */
148 char_u *
149vim_strsave_shellescape(char_u *string, int do_special, int do_newline)
150{
151 unsigned length;
152 char_u *p;
153 char_u *d;
154 char_u *escaped_string;
155 int l;
156 int csh_like;
Jason Cox6e823512021-08-29 12:36:49 +0200157 int fish_like;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200158 char_u *shname;
159 int powershell;
160# ifdef MSWIN
161 int double_quotes;
162# endif
163
164 // Only csh and similar shells expand '!' within single quotes. For sh and
165 // the like we must not put a backslash before it, it will be taken
166 // literally. If do_special is set the '!' will be escaped twice.
167 // Csh also needs to have "\n" escaped twice when do_special is set.
168 csh_like = csh_like_shell();
169
Jason Cox6e823512021-08-29 12:36:49 +0200170 // Fish shell uses '\' as an escape character within single quotes, so '\'
171 // itself must be escaped to get a literal '\'.
172 fish_like = fish_like_shell();
173
Dominique Pelleaf4a61a2021-12-27 17:21:41 +0000174 // PowerShell uses its own version for quoting single quotes
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200175 shname = gettail(p_sh);
176 powershell = strstr((char *)shname, "pwsh") != NULL;
177# ifdef MSWIN
178 powershell = powershell || strstr((char *)shname, "powershell") != NULL;
179 // PowerShell only accepts single quotes so override shellslash.
180 double_quotes = !powershell && !p_ssl;
181# endif
182
183 // First count the number of extra bytes required.
184 length = (unsigned)STRLEN(string) + 3; // two quotes and a trailing NUL
185 for (p = string; *p != NUL; MB_PTR_ADV(p))
186 {
187# ifdef MSWIN
188 if (double_quotes)
189 {
190 if (*p == '"')
191 ++length; // " -> ""
192 }
193 else
194# endif
195 if (*p == '\'')
196 {
197 if (powershell)
198 length +=2; // ' => ''
199 else
200 length += 3; // ' => '\''
201 }
202 if ((*p == '\n' && (csh_like || do_newline))
203 || (*p == '!' && (csh_like || do_special)))
204 {
205 ++length; // insert backslash
206 if (csh_like && do_special)
207 ++length; // insert backslash
208 }
209 if (do_special && find_cmdline_var(p, &l) >= 0)
210 {
211 ++length; // insert backslash
212 p += l - 1;
213 }
Jason Cox6e823512021-08-29 12:36:49 +0200214 if (*p == '\\' && fish_like)
215 ++length; // insert backslash
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200216 }
217
218 // Allocate memory for the result and fill it.
219 escaped_string = alloc(length);
220 if (escaped_string != NULL)
221 {
222 d = escaped_string;
223
224 // add opening quote
225# ifdef MSWIN
226 if (double_quotes)
227 *d++ = '"';
228 else
229# endif
230 *d++ = '\'';
231
232 for (p = string; *p != NUL; )
233 {
234# ifdef MSWIN
235 if (double_quotes)
236 {
237 if (*p == '"')
238 {
239 *d++ = '"';
240 *d++ = '"';
241 ++p;
242 continue;
243 }
244 }
245 else
246# endif
247 if (*p == '\'')
248 {
249 if (powershell)
250 {
251 *d++ = '\'';
252 *d++ = '\'';
253 }
254 else
255 {
256 *d++ = '\'';
257 *d++ = '\\';
258 *d++ = '\'';
259 *d++ = '\'';
260 }
261 ++p;
262 continue;
263 }
264 if ((*p == '\n' && (csh_like || do_newline))
265 || (*p == '!' && (csh_like || do_special)))
266 {
267 *d++ = '\\';
268 if (csh_like && do_special)
269 *d++ = '\\';
270 *d++ = *p++;
271 continue;
272 }
273 if (do_special && find_cmdline_var(p, &l) >= 0)
274 {
275 *d++ = '\\'; // insert backslash
276 while (--l >= 0) // copy the var
277 *d++ = *p++;
278 continue;
279 }
Jason Cox6e823512021-08-29 12:36:49 +0200280 if (*p == '\\' && fish_like)
281 {
282 *d++ = '\\';
283 *d++ = *p++;
Bram Moolenaar66315972021-09-01 14:31:51 +0200284 continue;
Jason Cox6e823512021-08-29 12:36:49 +0200285 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200286
287 MB_COPY_CHAR(p, d);
288 }
289
290 // add terminating quote and finish with a NUL
291# ifdef MSWIN
292 if (double_quotes)
293 *d++ = '"';
294 else
295# endif
296 *d++ = '\'';
297 *d = NUL;
298 }
299
300 return escaped_string;
301}
302
303/*
304 * Like vim_strsave(), but make all characters uppercase.
305 * This uses ASCII lower-to-upper case translation, language independent.
306 */
307 char_u *
308vim_strsave_up(char_u *string)
309{
310 char_u *p1;
311
312 p1 = vim_strsave(string);
313 vim_strup(p1);
314 return p1;
315}
316
317/*
318 * Like vim_strnsave(), but make all characters uppercase.
319 * This uses ASCII lower-to-upper case translation, language independent.
320 */
321 char_u *
322vim_strnsave_up(char_u *string, size_t len)
323{
324 char_u *p1;
325
326 p1 = vim_strnsave(string, len);
327 vim_strup(p1);
328 return p1;
329}
330
331/*
332 * ASCII lower-to-upper case translation, language independent.
333 */
334 void
335vim_strup(
336 char_u *p)
337{
338 char_u *p2;
339 int c;
340
341 if (p != NULL)
342 {
343 p2 = p;
344 while ((c = *p2) != NUL)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200345 *p2++ = (c < 'a' || c > 'z') ? c : (c - 0x20);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200346 }
347}
348
349#if defined(FEAT_EVAL) || defined(FEAT_SPELL) || defined(PROTO)
350/*
351 * Make string "s" all upper-case and return it in allocated memory.
352 * Handles multi-byte characters as well as possible.
353 * Returns NULL when out of memory.
354 */
355 static char_u *
356strup_save(char_u *orig)
357{
358 char_u *p;
359 char_u *res;
360
361 res = p = vim_strsave(orig);
362
363 if (res != NULL)
364 while (*p != NUL)
365 {
366 int l;
367
368 if (enc_utf8)
369 {
370 int c, uc;
371 int newl;
372 char_u *s;
373
374 c = utf_ptr2char(p);
375 l = utf_ptr2len(p);
376 if (c == 0)
377 {
378 // overlong sequence, use only the first byte
379 c = *p;
380 l = 1;
381 }
382 uc = utf_toupper(c);
383
384 // Reallocate string when byte count changes. This is rare,
385 // thus it's OK to do another malloc()/free().
386 newl = utf_char2len(uc);
387 if (newl != l)
388 {
389 s = alloc(STRLEN(res) + 1 + newl - l);
390 if (s == NULL)
391 {
392 vim_free(res);
393 return NULL;
394 }
395 mch_memmove(s, res, p - res);
396 STRCPY(s + (p - res) + newl, p + l);
397 p = s + (p - res);
398 vim_free(res);
399 res = s;
400 }
401
402 utf_char2bytes(uc, p);
403 p += newl;
404 }
405 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
406 p += l; // skip multi-byte character
407 else
408 {
409 *p = TOUPPER_LOC(*p); // note that toupper() can be a macro
410 p++;
411 }
412 }
413
414 return res;
415}
416
417/*
418 * Make string "s" all lower-case and return it in allocated memory.
419 * Handles multi-byte characters as well as possible.
420 * Returns NULL when out of memory.
421 */
422 char_u *
423strlow_save(char_u *orig)
424{
425 char_u *p;
426 char_u *res;
427
428 res = p = vim_strsave(orig);
429
430 if (res != NULL)
431 while (*p != NUL)
432 {
433 int l;
434
435 if (enc_utf8)
436 {
437 int c, lc;
438 int newl;
439 char_u *s;
440
441 c = utf_ptr2char(p);
442 l = utf_ptr2len(p);
443 if (c == 0)
444 {
445 // overlong sequence, use only the first byte
446 c = *p;
447 l = 1;
448 }
449 lc = utf_tolower(c);
450
451 // Reallocate string when byte count changes. This is rare,
452 // thus it's OK to do another malloc()/free().
453 newl = utf_char2len(lc);
454 if (newl != l)
455 {
456 s = alloc(STRLEN(res) + 1 + newl - l);
457 if (s == NULL)
458 {
459 vim_free(res);
460 return NULL;
461 }
462 mch_memmove(s, res, p - res);
463 STRCPY(s + (p - res) + newl, p + l);
464 p = s + (p - res);
465 vim_free(res);
466 res = s;
467 }
468
469 utf_char2bytes(lc, p);
470 p += newl;
471 }
472 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
473 p += l; // skip multi-byte character
474 else
475 {
476 *p = TOLOWER_LOC(*p); // note that tolower() can be a macro
477 p++;
478 }
479 }
480
481 return res;
482}
483#endif
484
485/*
486 * delete spaces at the end of a string
487 */
488 void
489del_trailing_spaces(char_u *ptr)
490{
491 char_u *q;
492
493 q = ptr + STRLEN(ptr);
494 while (--q > ptr && VIM_ISWHITE(q[0]) && q[-1] != '\\' && q[-1] != Ctrl_V)
495 *q = NUL;
496}
497
498/*
499 * Like strncpy(), but always terminate the result with one NUL.
500 * "to" must be "len + 1" long!
501 */
502 void
503vim_strncpy(char_u *to, char_u *from, size_t len)
504{
505 STRNCPY(to, from, len);
506 to[len] = NUL;
507}
508
509/*
510 * Like strcat(), but make sure the result fits in "tosize" bytes and is
511 * always NUL terminated. "from" and "to" may overlap.
512 */
513 void
514vim_strcat(char_u *to, char_u *from, size_t tosize)
515{
516 size_t tolen = STRLEN(to);
517 size_t fromlen = STRLEN(from);
518
519 if (tolen + fromlen + 1 > tosize)
520 {
521 mch_memmove(to + tolen, from, tosize - tolen - 1);
522 to[tosize - 1] = NUL;
523 }
524 else
525 mch_memmove(to + tolen, from, fromlen + 1);
526}
527
528#if (!defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)) || defined(PROTO)
529/*
530 * Compare two strings, ignoring case, using current locale.
531 * Doesn't work for multi-byte characters.
532 * return 0 for match, < 0 for smaller, > 0 for bigger
533 */
534 int
535vim_stricmp(char *s1, char *s2)
536{
537 int i;
538
539 for (;;)
540 {
541 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
542 if (i != 0)
543 return i; // this character different
544 if (*s1 == NUL)
545 break; // strings match until NUL
546 ++s1;
547 ++s2;
548 }
549 return 0; // strings match
550}
551#endif
552
553#if (!defined(HAVE_STRNCASECMP) && !defined(HAVE_STRNICMP)) || defined(PROTO)
554/*
555 * Compare two strings, for length "len", ignoring case, using current locale.
556 * Doesn't work for multi-byte characters.
557 * return 0 for match, < 0 for smaller, > 0 for bigger
558 */
559 int
560vim_strnicmp(char *s1, char *s2, size_t len)
561{
562 int i;
563
564 while (len > 0)
565 {
566 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
567 if (i != 0)
568 return i; // this character different
569 if (*s1 == NUL)
570 break; // strings match until NUL
571 ++s1;
572 ++s2;
573 --len;
574 }
575 return 0; // strings match
576}
577#endif
578
579/*
580 * Search for first occurrence of "c" in "string".
581 * Version of strchr() that handles unsigned char strings with characters from
582 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
583 * end of the string.
584 */
585 char_u *
586vim_strchr(char_u *string, int c)
587{
588 char_u *p;
589 int b;
590
591 p = string;
592 if (enc_utf8 && c >= 0x80)
593 {
594 while (*p != NUL)
595 {
596 int l = utfc_ptr2len(p);
597
598 // Avoid matching an illegal byte here.
599 if (utf_ptr2char(p) == c && l > 1)
600 return p;
601 p += l;
602 }
603 return NULL;
604 }
605 if (enc_dbcs != 0 && c > 255)
606 {
607 int n2 = c & 0xff;
608
609 c = ((unsigned)c >> 8) & 0xff;
610 while ((b = *p) != NUL)
611 {
612 if (b == c && p[1] == n2)
613 return p;
614 p += (*mb_ptr2len)(p);
615 }
616 return NULL;
617 }
618 if (has_mbyte)
619 {
620 while ((b = *p) != NUL)
621 {
622 if (b == c)
623 return p;
624 p += (*mb_ptr2len)(p);
625 }
626 return NULL;
627 }
628 while ((b = *p) != NUL)
629 {
630 if (b == c)
631 return p;
632 ++p;
633 }
634 return NULL;
635}
636
637/*
638 * Version of strchr() that only works for bytes and handles unsigned char
639 * strings with characters above 128 correctly. It also doesn't return a
640 * pointer to the NUL at the end of the string.
641 */
642 char_u *
643vim_strbyte(char_u *string, int c)
644{
645 char_u *p = string;
646
647 while (*p != NUL)
648 {
649 if (*p == c)
650 return p;
651 ++p;
652 }
653 return NULL;
654}
655
656/*
657 * Search for last occurrence of "c" in "string".
658 * Version of strrchr() that handles unsigned char strings with characters from
659 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
660 * end of the string.
661 * Return NULL if not found.
662 * Does not handle multi-byte char for "c"!
663 */
664 char_u *
665vim_strrchr(char_u *string, int c)
666{
667 char_u *retval = NULL;
668 char_u *p = string;
669
670 while (*p)
671 {
672 if (*p == c)
673 retval = p;
674 MB_PTR_ADV(p);
675 }
676 return retval;
677}
678
679/*
680 * Vim's version of strpbrk(), in case it's missing.
681 * Don't generate a prototype for this, causes problems when it's not used.
682 */
683#ifndef PROTO
684# ifndef HAVE_STRPBRK
685# ifdef vim_strpbrk
686# undef vim_strpbrk
687# endif
688 char_u *
689vim_strpbrk(char_u *s, char_u *charset)
690{
691 while (*s)
692 {
693 if (vim_strchr(charset, *s) != NULL)
694 return s;
695 MB_PTR_ADV(s);
696 }
697 return NULL;
698}
699# endif
700#endif
701
702/*
703 * Sort an array of strings.
704 */
705static int sort_compare(const void *s1, const void *s2);
706
707 static int
708sort_compare(const void *s1, const void *s2)
709{
710 return STRCMP(*(char **)s1, *(char **)s2);
711}
712
713 void
714sort_strings(
715 char_u **files,
716 int count)
717{
718 qsort((void *)files, (size_t)count, sizeof(char_u *), sort_compare);
719}
720
721#if defined(FEAT_QUICKFIX) || defined(FEAT_SPELL) || defined(PROTO)
722/*
723 * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
724 * When "s" is NULL FALSE is returned.
725 */
726 int
727has_non_ascii(char_u *s)
728{
729 char_u *p;
730
731 if (s != NULL)
732 for (p = s; *p != NUL; ++p)
733 if (*p >= 128)
734 return TRUE;
735 return FALSE;
736}
737#endif
738
739/*
740 * Concatenate two strings and return the result in allocated memory.
741 * Returns NULL when out of memory.
742 */
743 char_u *
744concat_str(char_u *str1, char_u *str2)
745{
746 char_u *dest;
747 size_t l = str1 == NULL ? 0 : STRLEN(str1);
748
749 dest = alloc(l + (str2 == NULL ? 0 : STRLEN(str2)) + 1L);
750 if (dest != NULL)
751 {
752 if (str1 == NULL)
753 *dest = NUL;
754 else
755 STRCPY(dest, str1);
756 if (str2 != NULL)
757 STRCPY(dest + l, str2);
758 }
759 return dest;
760}
761
762#if defined(FEAT_EVAL) || defined(PROTO)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200763/*
764 * Return string "str" in ' quotes, doubling ' characters.
765 * If "str" is NULL an empty string is assumed.
766 * If "function" is TRUE make it function('string').
767 */
768 char_u *
769string_quote(char_u *str, int function)
770{
771 unsigned len;
772 char_u *p, *r, *s;
773
774 len = (function ? 13 : 3);
775 if (str != NULL)
776 {
777 len += (unsigned)STRLEN(str);
778 for (p = str; *p != NUL; MB_PTR_ADV(p))
779 if (*p == '\'')
780 ++len;
781 }
782 s = r = alloc(len);
783 if (r != NULL)
784 {
785 if (function)
786 {
787 STRCPY(r, "function('");
788 r += 10;
789 }
790 else
791 *r++ = '\'';
792 if (str != NULL)
793 for (p = str; *p != NUL; )
794 {
795 if (*p == '\'')
796 *r++ = '\'';
797 MB_COPY_CHAR(p, r);
798 }
799 *r++ = '\'';
800 if (function)
801 *r++ = ')';
802 *r++ = NUL;
803 }
804 return s;
805}
806
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000807/*
808 * Count the number of times "needle" occurs in string "haystack". Case is
809 * ignored if "ic" is TRUE.
810 */
811 long
812string_count(char_u *haystack, char_u *needle, int ic)
813{
814 long n = 0;
815 char_u *p = haystack;
816 char_u *next;
817
818 if (p == NULL || needle == NULL || *needle == NUL)
819 return 0;
820
821 if (ic)
822 {
823 size_t len = STRLEN(needle);
824
825 while (*p != NUL)
826 {
827 if (MB_STRNICMP(p, needle, len) == 0)
828 {
829 ++n;
830 p += len;
831 }
832 else
833 MB_PTR_ADV(p);
834 }
835 }
836 else
837 while ((next = (char_u *)strstr((char *)p, (char *)needle)) != NULL)
838 {
839 ++n;
840 p = next + STRLEN(needle);
841 }
842
843 return n;
844}
845
846/*
847 * Make a typval_T of the first character of "input" and store it in "output".
848 * Return OK or FAIL.
849 */
850 static int
851copy_first_char_to_tv(char_u *input, typval_T *output)
852{
853 char_u buf[MB_MAXBYTES + 1];
854 int len;
855
856 if (input == NULL || output == NULL)
857 return FAIL;
858
859 len = has_mbyte ? mb_ptr2len(input) : 1;
860 STRNCPY(buf, input, len);
861 buf[len] = NUL;
862 output->v_type = VAR_STRING;
863 output->vval.v_string = vim_strsave(buf);
864
865 return output->vval.v_string == NULL ? FAIL : OK;
866}
867
868/*
869 * Implementation of map() and filter() for a String. Apply "expr" to every
870 * character in string "str" and return the result in "rettv".
871 */
872 void
873string_filter_map(
874 char_u *str,
875 filtermap_T filtermap,
876 typval_T *expr,
877 typval_T *rettv)
878{
879 char_u *p;
880 typval_T tv;
881 garray_T ga;
882 int len = 0;
883 int idx = 0;
884 int rem;
885
886 rettv->v_type = VAR_STRING;
887 rettv->vval.v_string = NULL;
888
889 // set_vim_var_nr() doesn't set the type
890 set_vim_var_type(VV_KEY, VAR_NUMBER);
891
Bram Moolenaar04935fb2022-01-08 16:19:22 +0000892 ga_init2(&ga, sizeof(char), 80);
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000893 for (p = str; *p != NUL; p += len)
894 {
895 typval_T newtv;
896
897 if (copy_first_char_to_tv(p, &tv) == FAIL)
898 break;
899 len = (int)STRLEN(tv.vval.v_string);
900
901 set_vim_var_nr(VV_KEY, idx);
902 if (filter_map_one(&tv, expr, filtermap, &newtv, &rem) == FAIL
903 || did_emsg)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000904 {
905 clear_tv(&newtv);
906 clear_tv(&tv);
907 break;
908 }
909 else if (filtermap != FILTERMAP_FILTER)
910 {
911 if (newtv.v_type != VAR_STRING)
912 {
913 clear_tv(&newtv);
914 clear_tv(&tv);
Bram Moolenaare70cec92022-01-01 14:25:55 +0000915 emsg(_(e_string_required));
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000916 break;
917 }
918 else
919 ga_concat(&ga, newtv.vval.v_string);
920 }
921 else if (!rem)
922 ga_concat(&ga, tv.vval.v_string);
923
924 clear_tv(&newtv);
925 clear_tv(&tv);
926
927 ++idx;
928 }
929 ga_append(&ga, NUL);
930 rettv->vval.v_string = ga.ga_data;
931}
932
933/*
934 * reduce() String argvars[0] using the function 'funcname' with arguments in
935 * 'funcexe' starting with the initial value argvars[2] and return the result
936 * in 'rettv'.
937 */
938 void
939string_reduce(
940 typval_T *argvars,
941 char_u *func_name,
942 funcexe_T *funcexe,
943 typval_T *rettv)
944{
945 char_u *p = tv_get_string(&argvars[0]);
946 int len;
947 typval_T argv[3];
948 int r;
949 int called_emsg_start = called_emsg;
950
951 if (argvars[2].v_type == VAR_UNKNOWN)
952 {
953 if (*p == NUL)
954 {
Bram Moolenaare70cec92022-01-01 14:25:55 +0000955 semsg(_(e_reduce_of_an_empty_str_with_no_initial_value), "String");
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000956 return;
957 }
958 if (copy_first_char_to_tv(p, rettv) == FAIL)
959 return;
960 p += STRLEN(rettv->vval.v_string);
961 }
962 else if (argvars[2].v_type != VAR_STRING)
963 {
964 semsg(_(e_string_expected_for_argument_nr), 3);
965 return;
966 }
967 else
968 copy_tv(&argvars[2], rettv);
969
970 for ( ; *p != NUL; p += len)
971 {
972 argv[0] = *rettv;
973 if (copy_first_char_to_tv(p, &argv[1]) == FAIL)
974 break;
975 len = (int)STRLEN(argv[1].vval.v_string);
976 r = call_func(func_name, -1, rettv, 2, argv, funcexe);
977 clear_tv(&argv[0]);
978 clear_tv(&argv[1]);
979 if (r == FAIL || called_emsg != called_emsg_start)
980 return;
981 }
982}
983
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200984 static void
985byteidx(typval_T *argvars, typval_T *rettv, int comp UNUSED)
986{
987 char_u *t;
988 char_u *str;
989 varnumber_T idx;
990
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +0200991 rettv->vval.v_number = -1;
992
993 if (in_vim9script()
994 && (check_for_string_arg(argvars, 0) == FAIL
995 || check_for_number_arg(argvars, 1) == FAIL))
996 return;
997
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200998 str = tv_get_string_chk(&argvars[0]);
999 idx = tv_get_number_chk(&argvars[1], NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001000 if (str == NULL || idx < 0)
1001 return;
1002
1003 t = str;
1004 for ( ; idx > 0; idx--)
1005 {
1006 if (*t == NUL) // EOL reached
1007 return;
1008 if (enc_utf8 && comp)
1009 t += utf_ptr2len(t);
1010 else
1011 t += (*mb_ptr2len)(t);
1012 }
1013 rettv->vval.v_number = (varnumber_T)(t - str);
1014}
1015
1016/*
1017 * "byteidx()" function
1018 */
1019 void
1020f_byteidx(typval_T *argvars, typval_T *rettv)
1021{
1022 byteidx(argvars, rettv, FALSE);
1023}
1024
1025/*
1026 * "byteidxcomp()" function
1027 */
1028 void
1029f_byteidxcomp(typval_T *argvars, typval_T *rettv)
1030{
1031 byteidx(argvars, rettv, TRUE);
1032}
1033
1034/*
1035 * "charidx()" function
1036 */
1037 void
1038f_charidx(typval_T *argvars, typval_T *rettv)
1039{
1040 char_u *str;
1041 varnumber_T idx;
1042 varnumber_T countcc = FALSE;
1043 char_u *p;
1044 int len;
1045 int (*ptr2len)(char_u *);
1046
1047 rettv->vval.v_number = -1;
1048
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001049 if (in_vim9script()
1050 && (check_for_string_arg(argvars, 0) == FAIL
1051 || check_for_number_arg(argvars, 1) == FAIL
1052 || check_for_opt_bool_arg(argvars, 2) == FAIL))
1053 return;
1054
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001055 if (argvars[0].v_type != VAR_STRING || argvars[1].v_type != VAR_NUMBER
1056 || (argvars[2].v_type != VAR_UNKNOWN
1057 && argvars[2].v_type != VAR_NUMBER
1058 && argvars[2].v_type != VAR_BOOL))
1059 {
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00001060 emsg(_(e_invalid_argument));
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001061 return;
1062 }
1063
1064 str = tv_get_string_chk(&argvars[0]);
1065 idx = tv_get_number_chk(&argvars[1], NULL);
1066 if (str == NULL || idx < 0)
1067 return;
1068
1069 if (argvars[2].v_type != VAR_UNKNOWN)
1070 countcc = tv_get_bool(&argvars[2]);
1071 if (countcc < 0 || countcc > 1)
1072 {
1073 semsg(_(e_using_number_as_bool_nr), countcc);
1074 return;
1075 }
1076
1077 if (enc_utf8 && countcc)
1078 ptr2len = utf_ptr2len;
1079 else
1080 ptr2len = mb_ptr2len;
1081
1082 for (p = str, len = 0; p <= str + idx; len++)
1083 {
1084 if (*p == NUL)
1085 return;
1086 p += ptr2len(p);
1087 }
1088
1089 rettv->vval.v_number = len > 0 ? len - 1 : 0;
1090}
1091
1092/*
1093 * "str2list()" function
1094 */
1095 void
1096f_str2list(typval_T *argvars, typval_T *rettv)
1097{
1098 char_u *p;
1099 int utf8 = FALSE;
1100
1101 if (rettv_list_alloc(rettv) == FAIL)
1102 return;
1103
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001104 if (in_vim9script()
1105 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001106 || check_for_opt_bool_arg(argvars, 1) == FAIL))
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001107 return;
1108
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001109 if (argvars[1].v_type != VAR_UNKNOWN)
1110 utf8 = (int)tv_get_bool_chk(&argvars[1], NULL);
1111
1112 p = tv_get_string(&argvars[0]);
1113
1114 if (has_mbyte || utf8)
1115 {
1116 int (*ptr2len)(char_u *);
1117 int (*ptr2char)(char_u *);
1118
1119 if (utf8 || enc_utf8)
1120 {
1121 ptr2len = utf_ptr2len;
1122 ptr2char = utf_ptr2char;
1123 }
1124 else
1125 {
1126 ptr2len = mb_ptr2len;
1127 ptr2char = mb_ptr2char;
1128 }
1129
1130 for ( ; *p != NUL; p += (*ptr2len)(p))
1131 list_append_number(rettv->vval.v_list, (*ptr2char)(p));
1132 }
1133 else
1134 for ( ; *p != NUL; ++p)
1135 list_append_number(rettv->vval.v_list, *p);
1136}
1137
1138/*
1139 * "str2nr()" function
1140 */
1141 void
1142f_str2nr(typval_T *argvars, typval_T *rettv)
1143{
1144 int base = 10;
1145 char_u *p;
1146 varnumber_T n;
1147 int what = 0;
1148 int isneg;
1149
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001150 if (in_vim9script()
1151 && (check_for_string_arg(argvars, 0) == FAIL
1152 || check_for_opt_number_arg(argvars, 1) == FAIL
1153 || (argvars[1].v_type != VAR_UNKNOWN
1154 && check_for_opt_bool_arg(argvars, 2) == FAIL)))
1155 return;
1156
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001157 if (argvars[1].v_type != VAR_UNKNOWN)
1158 {
1159 base = (int)tv_get_number(&argvars[1]);
1160 if (base != 2 && base != 8 && base != 10 && base != 16)
1161 {
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00001162 emsg(_(e_invalid_argument));
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001163 return;
1164 }
1165 if (argvars[2].v_type != VAR_UNKNOWN && tv_get_bool(&argvars[2]))
1166 what |= STR2NR_QUOTE;
1167 }
1168
1169 p = skipwhite(tv_get_string_strict(&argvars[0]));
1170 isneg = (*p == '-');
1171 if (*p == '+' || *p == '-')
1172 p = skipwhite(p + 1);
1173 switch (base)
1174 {
1175 case 2: what |= STR2NR_BIN + STR2NR_FORCE; break;
1176 case 8: what |= STR2NR_OCT + STR2NR_OOCT + STR2NR_FORCE; break;
1177 case 16: what |= STR2NR_HEX + STR2NR_FORCE; break;
1178 }
1179 vim_str2nr(p, NULL, NULL, what, &n, NULL, 0, FALSE);
1180 // Text after the number is silently ignored.
1181 if (isneg)
1182 rettv->vval.v_number = -n;
1183 else
1184 rettv->vval.v_number = n;
1185
1186}
1187
1188/*
1189 * "strgetchar()" function
1190 */
1191 void
1192f_strgetchar(typval_T *argvars, typval_T *rettv)
1193{
1194 char_u *str;
1195 int len;
1196 int error = FALSE;
1197 int charidx;
1198 int byteidx = 0;
1199
1200 rettv->vval.v_number = -1;
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001201
1202 if (in_vim9script()
1203 && (check_for_string_arg(argvars, 0) == FAIL
1204 || check_for_number_arg(argvars, 1) == FAIL))
1205 return;
1206
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001207 str = tv_get_string_chk(&argvars[0]);
1208 if (str == NULL)
1209 return;
1210 len = (int)STRLEN(str);
1211 charidx = (int)tv_get_number_chk(&argvars[1], &error);
1212 if (error)
1213 return;
1214
1215 while (charidx >= 0 && byteidx < len)
1216 {
1217 if (charidx == 0)
1218 {
1219 rettv->vval.v_number = mb_ptr2char(str + byteidx);
1220 break;
1221 }
1222 --charidx;
1223 byteidx += MB_CPTR2LEN(str + byteidx);
1224 }
1225}
1226
1227/*
1228 * "stridx()" function
1229 */
1230 void
1231f_stridx(typval_T *argvars, typval_T *rettv)
1232{
1233 char_u buf[NUMBUFLEN];
1234 char_u *needle;
1235 char_u *haystack;
1236 char_u *save_haystack;
1237 char_u *pos;
1238 int start_idx;
1239
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001240 if (in_vim9script()
1241 && (check_for_string_arg(argvars, 0) == FAIL
1242 || check_for_string_arg(argvars, 1) == FAIL
1243 || check_for_opt_number_arg(argvars, 2) == FAIL))
1244 return;
1245
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001246 needle = tv_get_string_chk(&argvars[1]);
1247 save_haystack = haystack = tv_get_string_buf_chk(&argvars[0], buf);
1248 rettv->vval.v_number = -1;
1249 if (needle == NULL || haystack == NULL)
1250 return; // type error; errmsg already given
1251
1252 if (argvars[2].v_type != VAR_UNKNOWN)
1253 {
1254 int error = FALSE;
1255
1256 start_idx = (int)tv_get_number_chk(&argvars[2], &error);
1257 if (error || start_idx >= (int)STRLEN(haystack))
1258 return;
1259 if (start_idx >= 0)
1260 haystack += start_idx;
1261 }
1262
1263 pos = (char_u *)strstr((char *)haystack, (char *)needle);
1264 if (pos != NULL)
1265 rettv->vval.v_number = (varnumber_T)(pos - save_haystack);
1266}
1267
1268/*
1269 * "string()" function
1270 */
1271 void
1272f_string(typval_T *argvars, typval_T *rettv)
1273{
1274 char_u *tofree;
1275 char_u numbuf[NUMBUFLEN];
1276
1277 rettv->v_type = VAR_STRING;
1278 rettv->vval.v_string = tv2string(&argvars[0], &tofree, numbuf,
1279 get_copyID());
1280 // Make a copy if we have a value but it's not in allocated memory.
1281 if (rettv->vval.v_string != NULL && tofree == NULL)
1282 rettv->vval.v_string = vim_strsave(rettv->vval.v_string);
1283}
1284
1285/*
1286 * "strlen()" function
1287 */
1288 void
1289f_strlen(typval_T *argvars, typval_T *rettv)
1290{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001291 if (in_vim9script()
1292 && check_for_string_or_number_arg(argvars, 0) == FAIL)
1293 return;
1294
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001295 rettv->vval.v_number = (varnumber_T)(STRLEN(
1296 tv_get_string(&argvars[0])));
1297}
1298
1299 static void
1300strchar_common(typval_T *argvars, typval_T *rettv, int skipcc)
1301{
1302 char_u *s = tv_get_string(&argvars[0]);
1303 varnumber_T len = 0;
1304 int (*func_mb_ptr2char_adv)(char_u **pp);
1305
1306 func_mb_ptr2char_adv = skipcc ? mb_ptr2char_adv : mb_cptr2char_adv;
1307 while (*s != NUL)
1308 {
1309 func_mb_ptr2char_adv(&s);
1310 ++len;
1311 }
1312 rettv->vval.v_number = len;
1313}
1314
1315/*
1316 * "strcharlen()" function
1317 */
1318 void
1319f_strcharlen(typval_T *argvars, typval_T *rettv)
1320{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001321 if (in_vim9script()
1322 && check_for_string_or_number_arg(argvars, 0) == FAIL)
1323 return;
1324
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001325 strchar_common(argvars, rettv, TRUE);
1326}
1327
1328/*
1329 * "strchars()" function
1330 */
1331 void
1332f_strchars(typval_T *argvars, typval_T *rettv)
1333{
1334 varnumber_T skipcc = FALSE;
1335
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001336 if (in_vim9script()
1337 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001338 || check_for_opt_bool_arg(argvars, 1) == FAIL))
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001339 return;
1340
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001341 if (argvars[1].v_type != VAR_UNKNOWN)
1342 skipcc = tv_get_bool(&argvars[1]);
1343 if (skipcc < 0 || skipcc > 1)
1344 semsg(_(e_using_number_as_bool_nr), skipcc);
1345 else
1346 strchar_common(argvars, rettv, skipcc);
1347}
1348
1349/*
1350 * "strdisplaywidth()" function
1351 */
1352 void
1353f_strdisplaywidth(typval_T *argvars, typval_T *rettv)
1354{
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001355 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001356 int col = 0;
1357
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001358 rettv->vval.v_number = -1;
1359
1360 if (in_vim9script()
1361 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001362 || check_for_opt_number_arg(argvars, 1) == FAIL))
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001363 return;
1364
1365 s = tv_get_string(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001366 if (argvars[1].v_type != VAR_UNKNOWN)
1367 col = (int)tv_get_number(&argvars[1]);
1368
1369 rettv->vval.v_number = (varnumber_T)(linetabsize_col(col, s) - col);
1370}
1371
1372/*
1373 * "strwidth()" function
1374 */
1375 void
1376f_strwidth(typval_T *argvars, typval_T *rettv)
1377{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001378 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001379
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001380 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1381 return;
1382
1383 s = tv_get_string_strict(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001384 rettv->vval.v_number = (varnumber_T)(mb_string2cells(s, -1));
1385}
1386
1387/*
1388 * "strcharpart()" function
1389 */
1390 void
1391f_strcharpart(typval_T *argvars, typval_T *rettv)
1392{
1393 char_u *p;
1394 int nchar;
1395 int nbyte = 0;
1396 int charlen;
1397 int skipcc = FALSE;
1398 int len = 0;
1399 int slen;
1400 int error = FALSE;
1401
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001402 if (in_vim9script()
1403 && (check_for_string_arg(argvars, 0) == FAIL
1404 || check_for_number_arg(argvars, 1) == FAIL
1405 || check_for_opt_number_arg(argvars, 2) == FAIL
1406 || (argvars[2].v_type != VAR_UNKNOWN
1407 && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1408 return;
1409
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001410 p = tv_get_string(&argvars[0]);
1411 slen = (int)STRLEN(p);
1412
1413 nchar = (int)tv_get_number_chk(&argvars[1], &error);
1414 if (!error)
1415 {
1416 if (argvars[2].v_type != VAR_UNKNOWN
1417 && argvars[3].v_type != VAR_UNKNOWN)
1418 {
1419 skipcc = tv_get_bool(&argvars[3]);
1420 if (skipcc < 0 || skipcc > 1)
1421 {
1422 semsg(_(e_using_number_as_bool_nr), skipcc);
1423 return;
1424 }
1425 }
1426
1427 if (nchar > 0)
1428 while (nchar > 0 && nbyte < slen)
1429 {
1430 if (skipcc)
1431 nbyte += mb_ptr2len(p + nbyte);
1432 else
1433 nbyte += MB_CPTR2LEN(p + nbyte);
1434 --nchar;
1435 }
1436 else
1437 nbyte = nchar;
1438 if (argvars[2].v_type != VAR_UNKNOWN)
1439 {
1440 charlen = (int)tv_get_number(&argvars[2]);
1441 while (charlen > 0 && nbyte + len < slen)
1442 {
1443 int off = nbyte + len;
1444
1445 if (off < 0)
1446 len += 1;
1447 else
1448 {
1449 if (skipcc)
1450 len += mb_ptr2len(p + off);
1451 else
1452 len += MB_CPTR2LEN(p + off);
1453 }
1454 --charlen;
1455 }
1456 }
1457 else
1458 len = slen - nbyte; // default: all bytes that are available.
1459 }
1460
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02001461 // Only return the overlap between the specified part and the actual
1462 // string.
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001463 if (nbyte < 0)
1464 {
1465 len += nbyte;
1466 nbyte = 0;
1467 }
1468 else if (nbyte > slen)
1469 nbyte = slen;
1470 if (len < 0)
1471 len = 0;
1472 else if (nbyte + len > slen)
1473 len = slen - nbyte;
1474
1475 rettv->v_type = VAR_STRING;
1476 rettv->vval.v_string = vim_strnsave(p + nbyte, len);
1477}
1478
1479/*
1480 * "strpart()" function
1481 */
1482 void
1483f_strpart(typval_T *argvars, typval_T *rettv)
1484{
1485 char_u *p;
1486 int n;
1487 int len;
1488 int slen;
1489 int error = FALSE;
1490
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001491 if (in_vim9script()
1492 && (check_for_string_arg(argvars, 0) == FAIL
1493 || check_for_number_arg(argvars, 1) == FAIL
1494 || check_for_opt_number_arg(argvars, 2) == FAIL
1495 || (argvars[2].v_type != VAR_UNKNOWN
1496 && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1497 return;
1498
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001499 p = tv_get_string(&argvars[0]);
1500 slen = (int)STRLEN(p);
1501
1502 n = (int)tv_get_number_chk(&argvars[1], &error);
1503 if (error)
1504 len = 0;
1505 else if (argvars[2].v_type != VAR_UNKNOWN)
1506 len = (int)tv_get_number(&argvars[2]);
1507 else
1508 len = slen - n; // default len: all bytes that are available.
1509
1510 // Only return the overlap between the specified part and the actual
1511 // string.
1512 if (n < 0)
1513 {
1514 len += n;
1515 n = 0;
1516 }
1517 else if (n > slen)
1518 n = slen;
1519 if (len < 0)
1520 len = 0;
1521 else if (n + len > slen)
1522 len = slen - n;
1523
1524 if (argvars[2].v_type != VAR_UNKNOWN && argvars[3].v_type != VAR_UNKNOWN)
1525 {
1526 int off;
1527
1528 // length in characters
1529 for (off = n; off < slen && len > 0; --len)
1530 off += mb_ptr2len(p + off);
1531 len = off - n;
1532 }
1533
1534 rettv->v_type = VAR_STRING;
1535 rettv->vval.v_string = vim_strnsave(p + n, len);
1536}
1537
1538/*
1539 * "strridx()" function
1540 */
1541 void
1542f_strridx(typval_T *argvars, typval_T *rettv)
1543{
1544 char_u buf[NUMBUFLEN];
1545 char_u *needle;
1546 char_u *haystack;
1547 char_u *rest;
1548 char_u *lastmatch = NULL;
1549 int haystack_len, end_idx;
1550
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001551 if (in_vim9script()
1552 && (check_for_string_arg(argvars, 0) == FAIL
1553 || check_for_string_arg(argvars, 1) == FAIL
1554 || check_for_opt_number_arg(argvars, 2) == FAIL))
1555 return;
1556
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001557 needle = tv_get_string_chk(&argvars[1]);
1558 haystack = tv_get_string_buf_chk(&argvars[0], buf);
1559
1560 rettv->vval.v_number = -1;
1561 if (needle == NULL || haystack == NULL)
1562 return; // type error; errmsg already given
1563
1564 haystack_len = (int)STRLEN(haystack);
1565 if (argvars[2].v_type != VAR_UNKNOWN)
1566 {
1567 // Third argument: upper limit for index
1568 end_idx = (int)tv_get_number_chk(&argvars[2], NULL);
1569 if (end_idx < 0)
1570 return; // can never find a match
1571 }
1572 else
1573 end_idx = haystack_len;
1574
1575 if (*needle == NUL)
1576 {
1577 // Empty string matches past the end.
1578 lastmatch = haystack + end_idx;
1579 }
1580 else
1581 {
1582 for (rest = haystack; *rest != '\0'; ++rest)
1583 {
1584 rest = (char_u *)strstr((char *)rest, (char *)needle);
1585 if (rest == NULL || rest > haystack + end_idx)
1586 break;
1587 lastmatch = rest;
1588 }
1589 }
1590
1591 if (lastmatch == NULL)
1592 rettv->vval.v_number = -1;
1593 else
1594 rettv->vval.v_number = (varnumber_T)(lastmatch - haystack);
1595}
1596
1597/*
1598 * "strtrans()" function
1599 */
1600 void
1601f_strtrans(typval_T *argvars, typval_T *rettv)
1602{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001603 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1604 return;
1605
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001606 rettv->v_type = VAR_STRING;
1607 rettv->vval.v_string = transstr(tv_get_string(&argvars[0]));
1608}
1609
1610/*
1611 * "tolower(string)" function
1612 */
1613 void
1614f_tolower(typval_T *argvars, typval_T *rettv)
1615{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001616 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1617 return;
1618
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001619 rettv->v_type = VAR_STRING;
1620 rettv->vval.v_string = strlow_save(tv_get_string(&argvars[0]));
1621}
1622
1623/*
1624 * "toupper(string)" function
1625 */
1626 void
1627f_toupper(typval_T *argvars, typval_T *rettv)
1628{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001629 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1630 return;
1631
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001632 rettv->v_type = VAR_STRING;
1633 rettv->vval.v_string = strup_save(tv_get_string(&argvars[0]));
1634}
1635
1636/*
1637 * "tr(string, fromstr, tostr)" function
1638 */
1639 void
1640f_tr(typval_T *argvars, typval_T *rettv)
1641{
1642 char_u *in_str;
1643 char_u *fromstr;
1644 char_u *tostr;
1645 char_u *p;
1646 int inlen;
1647 int fromlen;
1648 int tolen;
1649 int idx;
1650 char_u *cpstr;
1651 int cplen;
1652 int first = TRUE;
1653 char_u buf[NUMBUFLEN];
1654 char_u buf2[NUMBUFLEN];
1655 garray_T ga;
1656
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001657 if (in_vim9script()
1658 && (check_for_string_arg(argvars, 0) == FAIL
1659 || check_for_string_arg(argvars, 1) == FAIL
1660 || check_for_string_arg(argvars, 2) == FAIL))
1661 return;
1662
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001663 in_str = tv_get_string(&argvars[0]);
1664 fromstr = tv_get_string_buf_chk(&argvars[1], buf);
1665 tostr = tv_get_string_buf_chk(&argvars[2], buf2);
1666
1667 // Default return value: empty string.
1668 rettv->v_type = VAR_STRING;
1669 rettv->vval.v_string = NULL;
1670 if (fromstr == NULL || tostr == NULL)
1671 return; // type error; errmsg already given
Bram Moolenaar04935fb2022-01-08 16:19:22 +00001672 ga_init2(&ga, sizeof(char), 80);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001673
1674 if (!has_mbyte)
1675 // not multi-byte: fromstr and tostr must be the same length
1676 if (STRLEN(fromstr) != STRLEN(tostr))
1677 {
1678error:
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00001679 semsg(_(e_invalid_argument_str), fromstr);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001680 ga_clear(&ga);
1681 return;
1682 }
1683
1684 // fromstr and tostr have to contain the same number of chars
1685 while (*in_str != NUL)
1686 {
1687 if (has_mbyte)
1688 {
1689 inlen = (*mb_ptr2len)(in_str);
1690 cpstr = in_str;
1691 cplen = inlen;
1692 idx = 0;
1693 for (p = fromstr; *p != NUL; p += fromlen)
1694 {
1695 fromlen = (*mb_ptr2len)(p);
1696 if (fromlen == inlen && STRNCMP(in_str, p, inlen) == 0)
1697 {
1698 for (p = tostr; *p != NUL; p += tolen)
1699 {
1700 tolen = (*mb_ptr2len)(p);
1701 if (idx-- == 0)
1702 {
1703 cplen = tolen;
1704 cpstr = p;
1705 break;
1706 }
1707 }
1708 if (*p == NUL) // tostr is shorter than fromstr
1709 goto error;
1710 break;
1711 }
1712 ++idx;
1713 }
1714
1715 if (first && cpstr == in_str)
1716 {
1717 // Check that fromstr and tostr have the same number of
1718 // (multi-byte) characters. Done only once when a character
1719 // of in_str doesn't appear in fromstr.
1720 first = FALSE;
1721 for (p = tostr; *p != NUL; p += tolen)
1722 {
1723 tolen = (*mb_ptr2len)(p);
1724 --idx;
1725 }
1726 if (idx != 0)
1727 goto error;
1728 }
1729
1730 (void)ga_grow(&ga, cplen);
1731 mch_memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen);
1732 ga.ga_len += cplen;
1733
1734 in_str += inlen;
1735 }
1736 else
1737 {
1738 // When not using multi-byte chars we can do it faster.
1739 p = vim_strchr(fromstr, *in_str);
1740 if (p != NULL)
1741 ga_append(&ga, tostr[p - fromstr]);
1742 else
1743 ga_append(&ga, *in_str);
1744 ++in_str;
1745 }
1746 }
1747
1748 // add a terminating NUL
1749 (void)ga_grow(&ga, 1);
1750 ga_append(&ga, NUL);
1751
1752 rettv->vval.v_string = ga.ga_data;
1753}
1754
1755/*
1756 * "trim({expr})" function
1757 */
1758 void
1759f_trim(typval_T *argvars, typval_T *rettv)
1760{
1761 char_u buf1[NUMBUFLEN];
1762 char_u buf2[NUMBUFLEN];
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001763 char_u *head;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001764 char_u *mask = NULL;
1765 char_u *tail;
1766 char_u *prev;
1767 char_u *p;
1768 int c1;
1769 int dir = 0;
1770
1771 rettv->v_type = VAR_STRING;
1772 rettv->vval.v_string = NULL;
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001773
1774 if (in_vim9script()
1775 && (check_for_string_arg(argvars, 0) == FAIL
1776 || check_for_opt_string_arg(argvars, 1) == FAIL
1777 || (argvars[1].v_type != VAR_UNKNOWN
1778 && check_for_opt_number_arg(argvars, 2) == FAIL)))
1779 return;
1780
1781 head = tv_get_string_buf_chk(&argvars[0], buf1);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001782 if (head == NULL)
1783 return;
1784
1785 if (argvars[1].v_type != VAR_UNKNOWN && argvars[1].v_type != VAR_STRING)
1786 {
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00001787 semsg(_(e_invalid_argument_str), tv_get_string(&argvars[1]));
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001788 return;
1789 }
1790
1791 if (argvars[1].v_type == VAR_STRING)
1792 {
1793 mask = tv_get_string_buf_chk(&argvars[1], buf2);
1794
1795 if (argvars[2].v_type != VAR_UNKNOWN)
1796 {
1797 int error = 0;
1798
1799 // leading or trailing characters to trim
1800 dir = (int)tv_get_number_chk(&argvars[2], &error);
1801 if (error)
1802 return;
1803 if (dir < 0 || dir > 2)
1804 {
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00001805 semsg(_(e_invalid_argument_str), tv_get_string(&argvars[2]));
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001806 return;
1807 }
1808 }
1809 }
1810
1811 if (dir == 0 || dir == 1)
1812 {
1813 // Trim leading characters
1814 while (*head != NUL)
1815 {
1816 c1 = PTR2CHAR(head);
1817 if (mask == NULL)
1818 {
1819 if (c1 > ' ' && c1 != 0xa0)
1820 break;
1821 }
1822 else
1823 {
1824 for (p = mask; *p != NUL; MB_PTR_ADV(p))
1825 if (c1 == PTR2CHAR(p))
1826 break;
1827 if (*p == NUL)
1828 break;
1829 }
1830 MB_PTR_ADV(head);
1831 }
1832 }
1833
1834 tail = head + STRLEN(head);
1835 if (dir == 0 || dir == 2)
1836 {
1837 // Trim trailing characters
1838 for (; tail > head; tail = prev)
1839 {
1840 prev = tail;
1841 MB_PTR_BACK(head, prev);
1842 c1 = PTR2CHAR(prev);
1843 if (mask == NULL)
1844 {
1845 if (c1 > ' ' && c1 != 0xa0)
1846 break;
1847 }
1848 else
1849 {
1850 for (p = mask; *p != NUL; MB_PTR_ADV(p))
1851 if (c1 == PTR2CHAR(p))
1852 break;
1853 if (*p == NUL)
1854 break;
1855 }
1856 }
1857 }
1858 rettv->vval.v_string = vim_strnsave(head, tail - head);
1859}
1860
Bram Moolenaar677658a2022-01-05 16:09:06 +00001861static char *e_printf = N_(e_insufficient_arguments_for_printf);
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02001862
1863/*
1864 * Get number argument from "idxp" entry in "tvs". First entry is 1.
1865 */
1866 static varnumber_T
1867tv_nr(typval_T *tvs, int *idxp)
1868{
1869 int idx = *idxp - 1;
1870 varnumber_T n = 0;
1871 int err = FALSE;
1872
1873 if (tvs[idx].v_type == VAR_UNKNOWN)
1874 emsg(_(e_printf));
1875 else
1876 {
1877 ++*idxp;
1878 n = tv_get_number_chk(&tvs[idx], &err);
1879 if (err)
1880 n = 0;
1881 }
1882 return n;
1883}
1884
1885/*
1886 * Get string argument from "idxp" entry in "tvs". First entry is 1.
1887 * If "tofree" is NULL tv_get_string_chk() is used. Some types (e.g. List)
1888 * are not converted to a string.
1889 * If "tofree" is not NULL echo_string() is used. All types are converted to
1890 * a string with the same format as ":echo". The caller must free "*tofree".
1891 * Returns NULL for an error.
1892 */
1893 static char *
1894tv_str(typval_T *tvs, int *idxp, char_u **tofree)
1895{
1896 int idx = *idxp - 1;
1897 char *s = NULL;
1898 static char_u numbuf[NUMBUFLEN];
1899
1900 if (tvs[idx].v_type == VAR_UNKNOWN)
1901 emsg(_(e_printf));
1902 else
1903 {
1904 ++*idxp;
1905 if (tofree != NULL)
1906 s = (char *)echo_string(&tvs[idx], tofree, numbuf, get_copyID());
1907 else
1908 s = (char *)tv_get_string_chk(&tvs[idx]);
1909 }
1910 return s;
1911}
1912
1913# ifdef FEAT_FLOAT
1914/*
1915 * Get float argument from "idxp" entry in "tvs". First entry is 1.
1916 */
1917 static double
1918tv_float(typval_T *tvs, int *idxp)
1919{
1920 int idx = *idxp - 1;
1921 double f = 0;
1922
1923 if (tvs[idx].v_type == VAR_UNKNOWN)
1924 emsg(_(e_printf));
1925 else
1926 {
1927 ++*idxp;
1928 if (tvs[idx].v_type == VAR_FLOAT)
1929 f = tvs[idx].vval.v_float;
1930 else if (tvs[idx].v_type == VAR_NUMBER)
1931 f = (double)tvs[idx].vval.v_number;
1932 else
Bram Moolenaar9d00e4a2022-01-05 17:49:15 +00001933 emsg(_(e_expected_float_argument_for_printf));
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02001934 }
1935 return f;
1936}
1937# endif
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001938
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02001939#endif
1940
1941#ifdef FEAT_FLOAT
1942/*
1943 * Return the representation of infinity for printf() function:
1944 * "-inf", "inf", "+inf", " inf", "-INF", "INF", "+INF" or " INF".
1945 */
1946 static const char *
1947infinity_str(int positive,
1948 char fmt_spec,
1949 int force_sign,
1950 int space_for_positive)
1951{
1952 static const char *table[] =
1953 {
1954 "-inf", "inf", "+inf", " inf",
1955 "-INF", "INF", "+INF", " INF"
1956 };
1957 int idx = positive * (1 + force_sign + force_sign * space_for_positive);
1958
1959 if (ASCII_ISUPPER(fmt_spec))
1960 idx += 4;
1961 return table[idx];
1962}
1963#endif
1964
1965/*
1966 * This code was included to provide a portable vsnprintf() and snprintf().
1967 * Some systems may provide their own, but we always use this one for
1968 * consistency.
1969 *
1970 * This code is based on snprintf.c - a portable implementation of snprintf
1971 * by Mark Martinec <mark.martinec@ijs.si>, Version 2.2, 2000-10-06.
1972 * Included with permission. It was heavily modified to fit in Vim.
1973 * The original code, including useful comments, can be found here:
1974 * http://www.ijs.si/software/snprintf/
1975 *
1976 * This snprintf() only supports the following conversion specifiers:
1977 * s, c, d, u, o, x, X, p (and synonyms: i, D, U, O - see below)
1978 * with flags: '-', '+', ' ', '0' and '#'.
1979 * An asterisk is supported for field width as well as precision.
1980 *
1981 * Limited support for floating point was added: 'f', 'F', 'e', 'E', 'g', 'G'.
1982 *
1983 * Length modifiers 'h' (short int) and 'l' (long int) and 'll' (long long int)
1984 * are supported. NOTE: for 'll' the argument is varnumber_T or uvarnumber_T.
1985 *
1986 * The locale is not used, the string is used as a byte string. This is only
1987 * relevant for double-byte encodings where the second byte may be '%'.
1988 *
1989 * It is permitted for "str_m" to be zero, and it is permitted to specify NULL
1990 * pointer for resulting string argument if "str_m" is zero (as per ISO C99).
1991 *
1992 * The return value is the number of characters which would be generated
1993 * for the given input, excluding the trailing NUL. If this value
1994 * is greater or equal to "str_m", not all characters from the result
1995 * have been stored in str, output bytes beyond the ("str_m"-1) -th character
1996 * are discarded. If "str_m" is greater than zero it is guaranteed
1997 * the resulting string will be NUL-terminated.
1998 */
1999
2000/*
2001 * When va_list is not supported we only define vim_snprintf().
2002 *
2003 * vim_vsnprintf_typval() can be invoked with either "va_list" or a list of
2004 * "typval_T". When the latter is not used it must be NULL.
2005 */
2006
2007// When generating prototypes all of this is skipped, cproto doesn't
2008// understand this.
2009#ifndef PROTO
2010
2011// Like vim_vsnprintf() but append to the string.
2012 int
2013vim_snprintf_add(char *str, size_t str_m, const char *fmt, ...)
2014{
2015 va_list ap;
2016 int str_l;
2017 size_t len = STRLEN(str);
2018 size_t space;
2019
2020 if (str_m <= len)
2021 space = 0;
2022 else
2023 space = str_m - len;
2024 va_start(ap, fmt);
2025 str_l = vim_vsnprintf(str + len, space, fmt, ap);
2026 va_end(ap);
2027 return str_l;
2028}
2029
2030 int
2031vim_snprintf(char *str, size_t str_m, const char *fmt, ...)
2032{
2033 va_list ap;
2034 int str_l;
2035
2036 va_start(ap, fmt);
2037 str_l = vim_vsnprintf(str, str_m, fmt, ap);
2038 va_end(ap);
2039 return str_l;
2040}
2041
2042 int
2043vim_vsnprintf(
2044 char *str,
2045 size_t str_m,
2046 const char *fmt,
2047 va_list ap)
2048{
2049 return vim_vsnprintf_typval(str, str_m, fmt, ap, NULL);
2050}
2051
2052 int
2053vim_vsnprintf_typval(
2054 char *str,
2055 size_t str_m,
2056 const char *fmt,
2057 va_list ap,
2058 typval_T *tvs)
2059{
2060 size_t str_l = 0;
2061 const char *p = fmt;
2062 int arg_idx = 1;
2063
2064 if (p == NULL)
2065 p = "";
2066 while (*p != NUL)
2067 {
2068 if (*p != '%')
2069 {
2070 char *q = strchr(p + 1, '%');
2071 size_t n = (q == NULL) ? STRLEN(p) : (size_t)(q - p);
2072
2073 // Copy up to the next '%' or NUL without any changes.
2074 if (str_l < str_m)
2075 {
2076 size_t avail = str_m - str_l;
2077
2078 mch_memmove(str + str_l, p, n > avail ? avail : n);
2079 }
2080 p += n;
2081 str_l += n;
2082 }
2083 else
2084 {
2085 size_t min_field_width = 0, precision = 0;
2086 int zero_padding = 0, precision_specified = 0, justify_left = 0;
2087 int alternate_form = 0, force_sign = 0;
2088
2089 // If both the ' ' and '+' flags appear, the ' ' flag should be
2090 // ignored.
2091 int space_for_positive = 1;
2092
2093 // allowed values: \0, h, l, L
2094 char length_modifier = '\0';
2095
2096 // temporary buffer for simple numeric->string conversion
2097# if defined(FEAT_FLOAT)
2098# define TMP_LEN 350 // On my system 1e308 is the biggest number possible.
2099 // That sounds reasonable to use as the maximum
2100 // printable.
2101# else
2102# define TMP_LEN 66
2103# endif
2104 char tmp[TMP_LEN];
2105
2106 // string address in case of string argument
2107 const char *str_arg = NULL;
2108
2109 // natural field width of arg without padding and sign
2110 size_t str_arg_l;
2111
2112 // unsigned char argument value - only defined for c conversion.
2113 // N.B. standard explicitly states the char argument for the c
2114 // conversion is unsigned
2115 unsigned char uchar_arg;
2116
2117 // number of zeros to be inserted for numeric conversions as
2118 // required by the precision or minimal field width
2119 size_t number_of_zeros_to_pad = 0;
2120
2121 // index into tmp where zero padding is to be inserted
2122 size_t zero_padding_insertion_ind = 0;
2123
2124 // current conversion specifier character
2125 char fmt_spec = '\0';
2126
2127 // buffer for 's' and 'S' specs
2128 char_u *tofree = NULL;
2129
2130
2131 p++; // skip '%'
2132
2133 // parse flags
2134 while (*p == '0' || *p == '-' || *p == '+' || *p == ' '
2135 || *p == '#' || *p == '\'')
2136 {
2137 switch (*p)
2138 {
2139 case '0': zero_padding = 1; break;
2140 case '-': justify_left = 1; break;
2141 case '+': force_sign = 1; space_for_positive = 0; break;
2142 case ' ': force_sign = 1;
2143 // If both the ' ' and '+' flags appear, the ' '
2144 // flag should be ignored
2145 break;
2146 case '#': alternate_form = 1; break;
2147 case '\'': break;
2148 }
2149 p++;
2150 }
2151 // If the '0' and '-' flags both appear, the '0' flag should be
2152 // ignored.
2153
2154 // parse field width
2155 if (*p == '*')
2156 {
2157 int j;
2158
2159 p++;
2160 j =
2161# if defined(FEAT_EVAL)
2162 tvs != NULL ? tv_nr(tvs, &arg_idx) :
2163# endif
2164 va_arg(ap, int);
2165 if (j >= 0)
2166 min_field_width = j;
2167 else
2168 {
2169 min_field_width = -j;
2170 justify_left = 1;
2171 }
2172 }
2173 else if (VIM_ISDIGIT((int)(*p)))
2174 {
2175 // size_t could be wider than unsigned int; make sure we treat
2176 // argument like common implementations do
2177 unsigned int uj = *p++ - '0';
2178
2179 while (VIM_ISDIGIT((int)(*p)))
2180 uj = 10 * uj + (unsigned int)(*p++ - '0');
2181 min_field_width = uj;
2182 }
2183
2184 // parse precision
2185 if (*p == '.')
2186 {
2187 p++;
2188 precision_specified = 1;
2189 if (*p == '*')
2190 {
2191 int j;
2192
2193 j =
2194# if defined(FEAT_EVAL)
2195 tvs != NULL ? tv_nr(tvs, &arg_idx) :
2196# endif
2197 va_arg(ap, int);
2198 p++;
2199 if (j >= 0)
2200 precision = j;
2201 else
2202 {
2203 precision_specified = 0;
2204 precision = 0;
2205 }
2206 }
2207 else if (VIM_ISDIGIT((int)(*p)))
2208 {
2209 // size_t could be wider than unsigned int; make sure we
2210 // treat argument like common implementations do
2211 unsigned int uj = *p++ - '0';
2212
2213 while (VIM_ISDIGIT((int)(*p)))
2214 uj = 10 * uj + (unsigned int)(*p++ - '0');
2215 precision = uj;
2216 }
2217 }
2218
2219 // parse 'h', 'l' and 'll' length modifiers
2220 if (*p == 'h' || *p == 'l')
2221 {
2222 length_modifier = *p;
2223 p++;
2224 if (length_modifier == 'l' && *p == 'l')
2225 {
2226 // double l = __int64 / varnumber_T
2227 length_modifier = 'L';
2228 p++;
2229 }
2230 }
2231 fmt_spec = *p;
2232
2233 // common synonyms:
2234 switch (fmt_spec)
2235 {
2236 case 'i': fmt_spec = 'd'; break;
2237 case 'D': fmt_spec = 'd'; length_modifier = 'l'; break;
2238 case 'U': fmt_spec = 'u'; length_modifier = 'l'; break;
2239 case 'O': fmt_spec = 'o'; length_modifier = 'l'; break;
2240 default: break;
2241 }
2242
2243# if defined(FEAT_EVAL)
2244 switch (fmt_spec)
2245 {
2246 case 'd': case 'u': case 'o': case 'x': case 'X':
2247 if (tvs != NULL && length_modifier == '\0')
2248 length_modifier = 'L';
2249 }
2250# endif
2251
2252 // get parameter value, do initial processing
2253 switch (fmt_spec)
2254 {
2255 // '%' and 'c' behave similar to 's' regarding flags and field
2256 // widths
2257 case '%':
2258 case 'c':
2259 case 's':
2260 case 'S':
2261 str_arg_l = 1;
2262 switch (fmt_spec)
2263 {
2264 case '%':
2265 str_arg = p;
2266 break;
2267
2268 case 'c':
2269 {
2270 int j;
2271
2272 j =
2273# if defined(FEAT_EVAL)
2274 tvs != NULL ? tv_nr(tvs, &arg_idx) :
2275# endif
2276 va_arg(ap, int);
2277 // standard demands unsigned char
2278 uchar_arg = (unsigned char)j;
2279 str_arg = (char *)&uchar_arg;
2280 break;
2281 }
2282
2283 case 's':
2284 case 'S':
2285 str_arg =
2286# if defined(FEAT_EVAL)
2287 tvs != NULL ? tv_str(tvs, &arg_idx, &tofree) :
2288# endif
2289 va_arg(ap, char *);
2290 if (str_arg == NULL)
2291 {
2292 str_arg = "[NULL]";
2293 str_arg_l = 6;
2294 }
2295 // make sure not to address string beyond the specified
2296 // precision !!!
2297 else if (!precision_specified)
2298 str_arg_l = strlen(str_arg);
2299 // truncate string if necessary as requested by precision
2300 else if (precision == 0)
2301 str_arg_l = 0;
2302 else
2303 {
2304 // Don't put the #if inside memchr(), it can be a
2305 // macro.
2306 // memchr on HP does not like n > 2^31 !!!
2307 char *q = memchr(str_arg, '\0',
2308 precision <= (size_t)0x7fffffffL ? precision
2309 : (size_t)0x7fffffffL);
presukud85fccd2021-11-20 19:38:31 +00002310
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002311 str_arg_l = (q == NULL) ? precision
2312 : (size_t)(q - str_arg);
2313 }
2314 if (fmt_spec == 'S')
2315 {
presuku1f2453f2021-11-24 15:32:57 +00002316 char_u *p1;
2317 size_t i;
2318 int cell;
presukud85fccd2021-11-20 19:38:31 +00002319
presuku1f2453f2021-11-24 15:32:57 +00002320 for (i = 0, p1 = (char_u *)str_arg; *p1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002321 p1 += mb_ptr2len(p1))
presuku1f2453f2021-11-24 15:32:57 +00002322 {
2323 cell = mb_ptr2cells(p1);
2324 if (precision_specified && i + cell > precision)
2325 break;
2326 i += cell;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002327 }
presuku1f2453f2021-11-24 15:32:57 +00002328
2329 str_arg_l = p1 - (char_u *)str_arg;
presukud85fccd2021-11-20 19:38:31 +00002330 if (min_field_width != 0)
presuku1f2453f2021-11-24 15:32:57 +00002331 min_field_width += str_arg_l - i;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002332 }
2333 break;
2334
2335 default:
2336 break;
2337 }
2338 break;
2339
2340 case 'd': case 'u':
2341 case 'b': case 'B':
2342 case 'o':
2343 case 'x': case 'X':
2344 case 'p':
2345 {
2346 // NOTE: the u, b, o, x, X and p conversion specifiers
2347 // imply the value is unsigned; d implies a signed
2348 // value
2349
2350 // 0 if numeric argument is zero (or if pointer is
2351 // NULL for 'p'), +1 if greater than zero (or nonzero
2352 // for unsigned arguments), -1 if negative (unsigned
2353 // argument is never negative)
2354 int arg_sign = 0;
2355
2356 // only set for length modifier h, or for no length
2357 // modifiers
2358 int int_arg = 0;
2359 unsigned int uint_arg = 0;
2360
2361 // only set for length modifier l
2362 long int long_arg = 0;
2363 unsigned long int ulong_arg = 0;
2364
2365 // only set for length modifier ll
2366 varnumber_T llong_arg = 0;
2367 uvarnumber_T ullong_arg = 0;
2368
2369 // only set for b conversion
2370 uvarnumber_T bin_arg = 0;
2371
2372 // pointer argument value -only defined for p
2373 // conversion
2374 void *ptr_arg = NULL;
2375
2376 if (fmt_spec == 'p')
2377 {
2378 length_modifier = '\0';
2379 ptr_arg =
2380# if defined(FEAT_EVAL)
2381 tvs != NULL ? (void *)tv_str(tvs, &arg_idx,
2382 NULL) :
2383# endif
2384 va_arg(ap, void *);
2385 if (ptr_arg != NULL)
2386 arg_sign = 1;
2387 }
2388 else if (fmt_spec == 'b' || fmt_spec == 'B')
2389 {
2390 bin_arg =
2391# if defined(FEAT_EVAL)
2392 tvs != NULL ?
2393 (uvarnumber_T)tv_nr(tvs, &arg_idx) :
2394# endif
2395 va_arg(ap, uvarnumber_T);
2396 if (bin_arg != 0)
2397 arg_sign = 1;
2398 }
2399 else if (fmt_spec == 'd')
2400 {
2401 // signed
2402 switch (length_modifier)
2403 {
2404 case '\0':
2405 case 'h':
2406 // char and short arguments are passed as int.
2407 int_arg =
2408# if defined(FEAT_EVAL)
2409 tvs != NULL ? tv_nr(tvs, &arg_idx) :
2410# endif
2411 va_arg(ap, int);
2412 if (int_arg > 0)
2413 arg_sign = 1;
2414 else if (int_arg < 0)
2415 arg_sign = -1;
2416 break;
2417 case 'l':
2418 long_arg =
2419# if defined(FEAT_EVAL)
2420 tvs != NULL ? tv_nr(tvs, &arg_idx) :
2421# endif
2422 va_arg(ap, long int);
2423 if (long_arg > 0)
2424 arg_sign = 1;
2425 else if (long_arg < 0)
2426 arg_sign = -1;
2427 break;
2428 case 'L':
2429 llong_arg =
2430# if defined(FEAT_EVAL)
2431 tvs != NULL ? tv_nr(tvs, &arg_idx) :
2432# endif
2433 va_arg(ap, varnumber_T);
2434 if (llong_arg > 0)
2435 arg_sign = 1;
2436 else if (llong_arg < 0)
2437 arg_sign = -1;
2438 break;
2439 }
2440 }
2441 else
2442 {
2443 // unsigned
2444 switch (length_modifier)
2445 {
2446 case '\0':
2447 case 'h':
2448 uint_arg =
2449# if defined(FEAT_EVAL)
2450 tvs != NULL ? (unsigned)
2451 tv_nr(tvs, &arg_idx) :
2452# endif
2453 va_arg(ap, unsigned int);
2454 if (uint_arg != 0)
2455 arg_sign = 1;
2456 break;
2457 case 'l':
2458 ulong_arg =
2459# if defined(FEAT_EVAL)
2460 tvs != NULL ? (unsigned long)
2461 tv_nr(tvs, &arg_idx) :
2462# endif
2463 va_arg(ap, unsigned long int);
2464 if (ulong_arg != 0)
2465 arg_sign = 1;
2466 break;
2467 case 'L':
2468 ullong_arg =
2469# if defined(FEAT_EVAL)
2470 tvs != NULL ? (uvarnumber_T)
2471 tv_nr(tvs, &arg_idx) :
2472# endif
2473 va_arg(ap, uvarnumber_T);
2474 if (ullong_arg != 0)
2475 arg_sign = 1;
2476 break;
2477 }
2478 }
2479
2480 str_arg = tmp;
2481 str_arg_l = 0;
2482
2483 // NOTE:
2484 // For d, i, u, o, x, and X conversions, if precision is
2485 // specified, the '0' flag should be ignored. This is so
2486 // with Solaris 2.6, Digital UNIX 4.0, HPUX 10, Linux,
2487 // FreeBSD, NetBSD; but not with Perl.
2488 if (precision_specified)
2489 zero_padding = 0;
2490 if (fmt_spec == 'd')
2491 {
2492 if (force_sign && arg_sign >= 0)
2493 tmp[str_arg_l++] = space_for_positive ? ' ' : '+';
2494 // leave negative numbers for sprintf to handle, to
2495 // avoid handling tricky cases like (short int)-32768
2496 }
2497 else if (alternate_form)
2498 {
2499 if (arg_sign != 0
2500 && (fmt_spec == 'b' || fmt_spec == 'B'
2501 || fmt_spec == 'x' || fmt_spec == 'X') )
2502 {
2503 tmp[str_arg_l++] = '0';
2504 tmp[str_arg_l++] = fmt_spec;
2505 }
2506 // alternate form should have no effect for p
2507 // conversion, but ...
2508 }
2509
2510 zero_padding_insertion_ind = str_arg_l;
2511 if (!precision_specified)
2512 precision = 1; // default precision is 1
2513 if (precision == 0 && arg_sign == 0)
2514 {
2515 // When zero value is formatted with an explicit
2516 // precision 0, the resulting formatted string is
2517 // empty (d, i, u, b, B, o, x, X, p).
2518 }
2519 else
2520 {
2521 char f[6];
2522 int f_l = 0;
2523
2524 // construct a simple format string for sprintf
2525 f[f_l++] = '%';
2526 if (!length_modifier)
2527 ;
2528 else if (length_modifier == 'L')
2529 {
2530# ifdef MSWIN
2531 f[f_l++] = 'I';
2532 f[f_l++] = '6';
2533 f[f_l++] = '4';
2534# else
2535 f[f_l++] = 'l';
2536 f[f_l++] = 'l';
2537# endif
2538 }
2539 else
2540 f[f_l++] = length_modifier;
2541 f[f_l++] = fmt_spec;
2542 f[f_l++] = '\0';
2543
2544 if (fmt_spec == 'p')
2545 str_arg_l += sprintf(tmp + str_arg_l, f, ptr_arg);
2546 else if (fmt_spec == 'b' || fmt_spec == 'B')
2547 {
2548 char b[8 * sizeof(uvarnumber_T)];
2549 size_t b_l = 0;
2550 uvarnumber_T bn = bin_arg;
2551
2552 do
2553 {
2554 b[sizeof(b) - ++b_l] = '0' + (bn & 0x1);
2555 bn >>= 1;
2556 }
2557 while (bn != 0);
2558
2559 memcpy(tmp + str_arg_l, b + sizeof(b) - b_l, b_l);
2560 str_arg_l += b_l;
2561 }
2562 else if (fmt_spec == 'd')
2563 {
2564 // signed
2565 switch (length_modifier)
2566 {
2567 case '\0': str_arg_l += sprintf(
2568 tmp + str_arg_l, f,
2569 int_arg);
2570 break;
2571 case 'h': str_arg_l += sprintf(
2572 tmp + str_arg_l, f,
2573 (short)int_arg);
2574 break;
2575 case 'l': str_arg_l += sprintf(
2576 tmp + str_arg_l, f, long_arg);
2577 break;
2578 case 'L': str_arg_l += sprintf(
2579 tmp + str_arg_l, f, llong_arg);
2580 break;
2581 }
2582 }
2583 else
2584 {
2585 // unsigned
2586 switch (length_modifier)
2587 {
2588 case '\0': str_arg_l += sprintf(
2589 tmp + str_arg_l, f,
2590 uint_arg);
2591 break;
2592 case 'h': str_arg_l += sprintf(
2593 tmp + str_arg_l, f,
2594 (unsigned short)uint_arg);
2595 break;
2596 case 'l': str_arg_l += sprintf(
2597 tmp + str_arg_l, f, ulong_arg);
2598 break;
2599 case 'L': str_arg_l += sprintf(
2600 tmp + str_arg_l, f, ullong_arg);
2601 break;
2602 }
2603 }
2604
2605 // include the optional minus sign and possible
2606 // "0x" in the region before the zero padding
2607 // insertion point
2608 if (zero_padding_insertion_ind < str_arg_l
2609 && tmp[zero_padding_insertion_ind] == '-')
2610 zero_padding_insertion_ind++;
2611 if (zero_padding_insertion_ind + 1 < str_arg_l
2612 && tmp[zero_padding_insertion_ind] == '0'
2613 && (tmp[zero_padding_insertion_ind + 1] == 'x'
2614 || tmp[zero_padding_insertion_ind + 1] == 'X'))
2615 zero_padding_insertion_ind += 2;
2616 }
2617
2618 {
2619 size_t num_of_digits = str_arg_l
2620 - zero_padding_insertion_ind;
2621
2622 if (alternate_form && fmt_spec == 'o'
2623 // unless zero is already the first
2624 // character
2625 && !(zero_padding_insertion_ind < str_arg_l
2626 && tmp[zero_padding_insertion_ind] == '0'))
2627 {
2628 // assure leading zero for alternate-form
2629 // octal numbers
2630 if (!precision_specified
2631 || precision < num_of_digits + 1)
2632 {
2633 // precision is increased to force the
2634 // first character to be zero, except if a
2635 // zero value is formatted with an
2636 // explicit precision of zero
2637 precision = num_of_digits + 1;
2638 }
2639 }
2640 // zero padding to specified precision?
2641 if (num_of_digits < precision)
2642 number_of_zeros_to_pad = precision - num_of_digits;
2643 }
2644 // zero padding to specified minimal field width?
2645 if (!justify_left && zero_padding)
2646 {
2647 int n = (int)(min_field_width - (str_arg_l
2648 + number_of_zeros_to_pad));
2649 if (n > 0)
2650 number_of_zeros_to_pad += n;
2651 }
2652 break;
2653 }
2654
2655# ifdef FEAT_FLOAT
2656 case 'f':
2657 case 'F':
2658 case 'e':
2659 case 'E':
2660 case 'g':
2661 case 'G':
2662 {
2663 // Floating point.
2664 double f;
2665 double abs_f;
2666 char format[40];
2667 int l;
2668 int remove_trailing_zeroes = FALSE;
2669
2670 f =
2671# if defined(FEAT_EVAL)
2672 tvs != NULL ? tv_float(tvs, &arg_idx) :
2673# endif
2674 va_arg(ap, double);
2675 abs_f = f < 0 ? -f : f;
2676
2677 if (fmt_spec == 'g' || fmt_spec == 'G')
2678 {
2679 // Would be nice to use %g directly, but it prints
2680 // "1.0" as "1", we don't want that.
2681 if ((abs_f >= 0.001 && abs_f < 10000000.0)
2682 || abs_f == 0.0)
2683 fmt_spec = ASCII_ISUPPER(fmt_spec) ? 'F' : 'f';
2684 else
2685 fmt_spec = fmt_spec == 'g' ? 'e' : 'E';
2686 remove_trailing_zeroes = TRUE;
2687 }
2688
2689 if ((fmt_spec == 'f' || fmt_spec == 'F') &&
2690# ifdef VAX
2691 abs_f > 1.0e38
2692# else
2693 abs_f > 1.0e307
2694# endif
2695 )
2696 {
2697 // Avoid a buffer overflow
2698 STRCPY(tmp, infinity_str(f > 0.0, fmt_spec,
2699 force_sign, space_for_positive));
2700 str_arg_l = STRLEN(tmp);
2701 zero_padding = 0;
2702 }
2703 else
2704 {
2705 if (isnan(f))
2706 {
2707 // Not a number: nan or NAN
2708 STRCPY(tmp, ASCII_ISUPPER(fmt_spec) ? "NAN"
2709 : "nan");
2710 str_arg_l = 3;
2711 zero_padding = 0;
2712 }
2713 else if (isinf(f))
2714 {
2715 STRCPY(tmp, infinity_str(f > 0.0, fmt_spec,
2716 force_sign, space_for_positive));
2717 str_arg_l = STRLEN(tmp);
2718 zero_padding = 0;
2719 }
2720 else
2721 {
2722 // Regular float number
2723 format[0] = '%';
2724 l = 1;
2725 if (force_sign)
2726 format[l++] = space_for_positive ? ' ' : '+';
2727 if (precision_specified)
2728 {
2729 size_t max_prec = TMP_LEN - 10;
2730
2731 // Make sure we don't get more digits than we
2732 // have room for.
2733 if ((fmt_spec == 'f' || fmt_spec == 'F')
2734 && abs_f > 1.0)
2735 max_prec -= (size_t)log10(abs_f);
2736 if (precision > max_prec)
2737 precision = max_prec;
2738 l += sprintf(format + l, ".%d", (int)precision);
2739 }
2740 format[l] = fmt_spec == 'F' ? 'f' : fmt_spec;
2741 format[l + 1] = NUL;
2742
2743 str_arg_l = sprintf(tmp, format, f);
2744 }
2745
2746 if (remove_trailing_zeroes)
2747 {
2748 int i;
2749 char *tp;
2750
2751 // Using %g or %G: remove superfluous zeroes.
2752 if (fmt_spec == 'f' || fmt_spec == 'F')
2753 tp = tmp + str_arg_l - 1;
2754 else
2755 {
2756 tp = (char *)vim_strchr((char_u *)tmp,
2757 fmt_spec == 'e' ? 'e' : 'E');
2758 if (tp != NULL)
2759 {
2760 // Remove superfluous '+' and leading
2761 // zeroes from the exponent.
2762 if (tp[1] == '+')
2763 {
2764 // Change "1.0e+07" to "1.0e07"
2765 STRMOVE(tp + 1, tp + 2);
2766 --str_arg_l;
2767 }
2768 i = (tp[1] == '-') ? 2 : 1;
2769 while (tp[i] == '0')
2770 {
2771 // Change "1.0e07" to "1.0e7"
2772 STRMOVE(tp + i, tp + i + 1);
2773 --str_arg_l;
2774 }
2775 --tp;
2776 }
2777 }
2778
2779 if (tp != NULL && !precision_specified)
2780 // Remove trailing zeroes, but keep the one
2781 // just after a dot.
2782 while (tp > tmp + 2 && *tp == '0'
2783 && tp[-1] != '.')
2784 {
2785 STRMOVE(tp, tp + 1);
2786 --tp;
2787 --str_arg_l;
2788 }
2789 }
2790 else
2791 {
2792 char *tp;
2793
2794 // Be consistent: some printf("%e") use 1.0e+12
2795 // and some 1.0e+012. Remove one zero in the last
2796 // case.
2797 tp = (char *)vim_strchr((char_u *)tmp,
2798 fmt_spec == 'e' ? 'e' : 'E');
2799 if (tp != NULL && (tp[1] == '+' || tp[1] == '-')
2800 && tp[2] == '0'
2801 && vim_isdigit(tp[3])
2802 && vim_isdigit(tp[4]))
2803 {
2804 STRMOVE(tp + 2, tp + 3);
2805 --str_arg_l;
2806 }
2807 }
2808 }
2809 if (zero_padding && min_field_width > str_arg_l
2810 && (tmp[0] == '-' || force_sign))
2811 {
2812 // padding 0's should be inserted after the sign
2813 number_of_zeros_to_pad = min_field_width - str_arg_l;
2814 zero_padding_insertion_ind = 1;
2815 }
2816 str_arg = tmp;
2817 break;
2818 }
2819# endif
2820
2821 default:
2822 // unrecognized conversion specifier, keep format string
2823 // as-is
2824 zero_padding = 0; // turn zero padding off for non-numeric
2825 // conversion
2826 justify_left = 1;
2827 min_field_width = 0; // reset flags
2828
2829 // discard the unrecognized conversion, just keep *
2830 // the unrecognized conversion character
2831 str_arg = p;
2832 str_arg_l = 0;
2833 if (*p != NUL)
2834 str_arg_l++; // include invalid conversion specifier
2835 // unchanged if not at end-of-string
2836 break;
2837 }
2838
2839 if (*p != NUL)
2840 p++; // step over the just processed conversion specifier
2841
2842 // insert padding to the left as requested by min_field_width;
2843 // this does not include the zero padding in case of numerical
2844 // conversions
2845 if (!justify_left)
2846 {
2847 // left padding with blank or zero
2848 int pn = (int)(min_field_width - (str_arg_l + number_of_zeros_to_pad));
2849
2850 if (pn > 0)
2851 {
2852 if (str_l < str_m)
2853 {
2854 size_t avail = str_m - str_l;
2855
2856 vim_memset(str + str_l, zero_padding ? '0' : ' ',
2857 (size_t)pn > avail ? avail
2858 : (size_t)pn);
2859 }
2860 str_l += pn;
2861 }
2862 }
2863
2864 // zero padding as requested by the precision or by the minimal
2865 // field width for numeric conversions required?
2866 if (number_of_zeros_to_pad == 0)
2867 {
2868 // will not copy first part of numeric right now, *
2869 // force it to be copied later in its entirety
2870 zero_padding_insertion_ind = 0;
2871 }
2872 else
2873 {
2874 // insert first part of numerics (sign or '0x') before zero
2875 // padding
2876 int zn = (int)zero_padding_insertion_ind;
2877
2878 if (zn > 0)
2879 {
2880 if (str_l < str_m)
2881 {
2882 size_t avail = str_m - str_l;
2883
2884 mch_memmove(str + str_l, str_arg,
2885 (size_t)zn > avail ? avail
2886 : (size_t)zn);
2887 }
2888 str_l += zn;
2889 }
2890
2891 // insert zero padding as requested by the precision or min
2892 // field width
2893 zn = (int)number_of_zeros_to_pad;
2894 if (zn > 0)
2895 {
2896 if (str_l < str_m)
2897 {
2898 size_t avail = str_m - str_l;
2899
2900 vim_memset(str + str_l, '0',
2901 (size_t)zn > avail ? avail
2902 : (size_t)zn);
2903 }
2904 str_l += zn;
2905 }
2906 }
2907
2908 // insert formatted string
2909 // (or as-is conversion specifier for unknown conversions)
2910 {
2911 int sn = (int)(str_arg_l - zero_padding_insertion_ind);
2912
2913 if (sn > 0)
2914 {
2915 if (str_l < str_m)
2916 {
2917 size_t avail = str_m - str_l;
2918
2919 mch_memmove(str + str_l,
2920 str_arg + zero_padding_insertion_ind,
2921 (size_t)sn > avail ? avail : (size_t)sn);
2922 }
2923 str_l += sn;
2924 }
2925 }
2926
2927 // insert right padding
2928 if (justify_left)
2929 {
2930 // right blank padding to the field width
2931 int pn = (int)(min_field_width
2932 - (str_arg_l + number_of_zeros_to_pad));
2933
2934 if (pn > 0)
2935 {
2936 if (str_l < str_m)
2937 {
2938 size_t avail = str_m - str_l;
2939
2940 vim_memset(str + str_l, ' ',
2941 (size_t)pn > avail ? avail
2942 : (size_t)pn);
2943 }
2944 str_l += pn;
2945 }
2946 }
2947 vim_free(tofree);
2948 }
2949 }
2950
2951 if (str_m > 0)
2952 {
2953 // make sure the string is nul-terminated even at the expense of
2954 // overwriting the last character (shouldn't happen, but just in case)
2955 //
2956 str[str_l <= str_m - 1 ? str_l : str_m - 1] = '\0';
2957 }
2958
2959 if (tvs != NULL && tvs[arg_idx - 1].v_type != VAR_UNKNOWN)
Bram Moolenaar677658a2022-01-05 16:09:06 +00002960 emsg(_(e_too_many_arguments_to_printf));
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002961
2962 // Return the number of characters formatted (excluding trailing nul
2963 // character), that is, the number of characters that would have been
2964 // written to the buffer if it were large enough.
2965 return (int)str_l;
2966}
2967
2968#endif // PROTO