blob: 3befb86eae08795e52748ba0dcd7d6d4dfe12278 [file] [log] [blame]
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001/* vi:set ts=8 sts=4 sw=4 noet:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * strings.c: string manipulation functions
12 */
13
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +020014#define USING_FLOAT_STUFF
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020015#include "vim.h"
16
17/*
18 * Copy "string" into newly allocated memory.
19 */
20 char_u *
21vim_strsave(char_u *string)
22{
23 char_u *p;
24 size_t len;
25
26 len = STRLEN(string) + 1;
27 p = alloc(len);
28 if (p != NULL)
29 mch_memmove(p, string, len);
30 return p;
31}
32
33/*
34 * Copy up to "len" bytes of "string" into newly allocated memory and
35 * terminate with a NUL.
36 * The allocated memory always has size "len + 1", also when "string" is
37 * shorter.
38 */
39 char_u *
40vim_strnsave(char_u *string, size_t len)
41{
42 char_u *p;
43
44 p = alloc(len + 1);
45 if (p != NULL)
46 {
47 STRNCPY(p, string, len);
48 p[len] = NUL;
49 }
50 return p;
51}
52
53/*
54 * Same as vim_strsave(), but any characters found in esc_chars are preceded
55 * by a backslash.
56 */
57 char_u *
58vim_strsave_escaped(char_u *string, char_u *esc_chars)
59{
60 return vim_strsave_escaped_ext(string, esc_chars, '\\', FALSE);
61}
62
63/*
64 * Same as vim_strsave_escaped(), but when "bsl" is TRUE also escape
65 * characters where rem_backslash() would remove the backslash.
66 * Escape the characters with "cc".
67 */
68 char_u *
69vim_strsave_escaped_ext(
70 char_u *string,
71 char_u *esc_chars,
72 int cc,
73 int bsl)
74{
75 char_u *p;
76 char_u *p2;
77 char_u *escaped_string;
78 unsigned length;
79 int l;
80
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +020081 // First count the number of backslashes required.
82 // Then allocate the memory and insert them.
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020083 length = 1; // count the trailing NUL
84 for (p = string; *p; p++)
85 {
86 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
87 {
88 length += l; // count a multibyte char
89 p += l - 1;
90 continue;
91 }
92 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
93 ++length; // count a backslash
94 ++length; // count an ordinary char
95 }
96 escaped_string = alloc(length);
97 if (escaped_string != NULL)
98 {
99 p2 = escaped_string;
100 for (p = string; *p; p++)
101 {
102 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
103 {
104 mch_memmove(p2, p, (size_t)l);
105 p2 += l;
106 p += l - 1; // skip multibyte char
107 continue;
108 }
109 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
110 *p2++ = cc;
111 *p2++ = *p;
112 }
113 *p2 = NUL;
114 }
115 return escaped_string;
116}
117
118/*
119 * Return TRUE when 'shell' has "csh" in the tail.
120 */
121 int
122csh_like_shell(void)
123{
124 return (strstr((char *)gettail(p_sh), "csh") != NULL);
125}
126
127/*
Jason Cox6e823512021-08-29 12:36:49 +0200128 * Return TRUE when 'shell' has "fish" in the tail.
129 */
Dominique Pellede05ae72021-08-30 19:57:34 +0200130 static int
Jason Cox6e823512021-08-29 12:36:49 +0200131fish_like_shell(void)
132{
133 return (strstr((char *)gettail(p_sh), "fish") != NULL);
134}
135
136/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200137 * Escape "string" for use as a shell argument with system().
138 * This uses single quotes, except when we know we need to use double quotes
139 * (MS-DOS and MS-Windows not using PowerShell and without 'shellslash' set).
140 * PowerShell also uses a novel escaping for enclosed single quotes - double
141 * them up.
142 * Escape a newline, depending on the 'shell' option.
143 * When "do_special" is TRUE also replace "!", "%", "#" and things starting
144 * with "<" like "<cfile>".
145 * When "do_newline" is FALSE do not escape newline unless it is csh shell.
146 * Returns the result in allocated memory, NULL if we have run out.
147 */
148 char_u *
149vim_strsave_shellescape(char_u *string, int do_special, int do_newline)
150{
151 unsigned length;
152 char_u *p;
153 char_u *d;
154 char_u *escaped_string;
155 int l;
156 int csh_like;
Jason Cox6e823512021-08-29 12:36:49 +0200157 int fish_like;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200158 char_u *shname;
159 int powershell;
160# ifdef MSWIN
161 int double_quotes;
162# endif
163
164 // Only csh and similar shells expand '!' within single quotes. For sh and
165 // the like we must not put a backslash before it, it will be taken
166 // literally. If do_special is set the '!' will be escaped twice.
167 // Csh also needs to have "\n" escaped twice when do_special is set.
168 csh_like = csh_like_shell();
169
Jason Cox6e823512021-08-29 12:36:49 +0200170 // Fish shell uses '\' as an escape character within single quotes, so '\'
171 // itself must be escaped to get a literal '\'.
172 fish_like = fish_like_shell();
173
Dominique Pelleaf4a61a2021-12-27 17:21:41 +0000174 // PowerShell uses its own version for quoting single quotes
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200175 shname = gettail(p_sh);
176 powershell = strstr((char *)shname, "pwsh") != NULL;
177# ifdef MSWIN
178 powershell = powershell || strstr((char *)shname, "powershell") != NULL;
179 // PowerShell only accepts single quotes so override shellslash.
180 double_quotes = !powershell && !p_ssl;
181# endif
182
183 // First count the number of extra bytes required.
184 length = (unsigned)STRLEN(string) + 3; // two quotes and a trailing NUL
185 for (p = string; *p != NUL; MB_PTR_ADV(p))
186 {
187# ifdef MSWIN
188 if (double_quotes)
189 {
190 if (*p == '"')
191 ++length; // " -> ""
192 }
193 else
194# endif
195 if (*p == '\'')
196 {
197 if (powershell)
198 length +=2; // ' => ''
199 else
200 length += 3; // ' => '\''
201 }
202 if ((*p == '\n' && (csh_like || do_newline))
203 || (*p == '!' && (csh_like || do_special)))
204 {
205 ++length; // insert backslash
206 if (csh_like && do_special)
207 ++length; // insert backslash
208 }
209 if (do_special && find_cmdline_var(p, &l) >= 0)
210 {
211 ++length; // insert backslash
212 p += l - 1;
213 }
Jason Cox6e823512021-08-29 12:36:49 +0200214 if (*p == '\\' && fish_like)
215 ++length; // insert backslash
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200216 }
217
218 // Allocate memory for the result and fill it.
219 escaped_string = alloc(length);
220 if (escaped_string != NULL)
221 {
222 d = escaped_string;
223
224 // add opening quote
225# ifdef MSWIN
226 if (double_quotes)
227 *d++ = '"';
228 else
229# endif
230 *d++ = '\'';
231
232 for (p = string; *p != NUL; )
233 {
234# ifdef MSWIN
235 if (double_quotes)
236 {
237 if (*p == '"')
238 {
239 *d++ = '"';
240 *d++ = '"';
241 ++p;
242 continue;
243 }
244 }
245 else
246# endif
247 if (*p == '\'')
248 {
249 if (powershell)
250 {
251 *d++ = '\'';
252 *d++ = '\'';
253 }
254 else
255 {
256 *d++ = '\'';
257 *d++ = '\\';
258 *d++ = '\'';
259 *d++ = '\'';
260 }
261 ++p;
262 continue;
263 }
264 if ((*p == '\n' && (csh_like || do_newline))
265 || (*p == '!' && (csh_like || do_special)))
266 {
267 *d++ = '\\';
268 if (csh_like && do_special)
269 *d++ = '\\';
270 *d++ = *p++;
271 continue;
272 }
273 if (do_special && find_cmdline_var(p, &l) >= 0)
274 {
275 *d++ = '\\'; // insert backslash
276 while (--l >= 0) // copy the var
277 *d++ = *p++;
278 continue;
279 }
Jason Cox6e823512021-08-29 12:36:49 +0200280 if (*p == '\\' && fish_like)
281 {
282 *d++ = '\\';
283 *d++ = *p++;
Bram Moolenaar66315972021-09-01 14:31:51 +0200284 continue;
Jason Cox6e823512021-08-29 12:36:49 +0200285 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200286
287 MB_COPY_CHAR(p, d);
288 }
289
290 // add terminating quote and finish with a NUL
291# ifdef MSWIN
292 if (double_quotes)
293 *d++ = '"';
294 else
295# endif
296 *d++ = '\'';
297 *d = NUL;
298 }
299
300 return escaped_string;
301}
302
303/*
304 * Like vim_strsave(), but make all characters uppercase.
305 * This uses ASCII lower-to-upper case translation, language independent.
306 */
307 char_u *
308vim_strsave_up(char_u *string)
309{
310 char_u *p1;
311
312 p1 = vim_strsave(string);
313 vim_strup(p1);
314 return p1;
315}
316
317/*
318 * Like vim_strnsave(), but make all characters uppercase.
319 * This uses ASCII lower-to-upper case translation, language independent.
320 */
321 char_u *
322vim_strnsave_up(char_u *string, size_t len)
323{
324 char_u *p1;
325
326 p1 = vim_strnsave(string, len);
327 vim_strup(p1);
328 return p1;
329}
330
331/*
332 * ASCII lower-to-upper case translation, language independent.
333 */
334 void
335vim_strup(
336 char_u *p)
337{
338 char_u *p2;
339 int c;
340
341 if (p != NULL)
342 {
343 p2 = p;
344 while ((c = *p2) != NUL)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200345 *p2++ = (c < 'a' || c > 'z') ? c : (c - 0x20);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200346 }
347}
348
349#if defined(FEAT_EVAL) || defined(FEAT_SPELL) || defined(PROTO)
350/*
351 * Make string "s" all upper-case and return it in allocated memory.
352 * Handles multi-byte characters as well as possible.
353 * Returns NULL when out of memory.
354 */
355 static char_u *
356strup_save(char_u *orig)
357{
358 char_u *p;
359 char_u *res;
360
361 res = p = vim_strsave(orig);
362
363 if (res != NULL)
364 while (*p != NUL)
365 {
366 int l;
367
368 if (enc_utf8)
369 {
370 int c, uc;
371 int newl;
372 char_u *s;
373
374 c = utf_ptr2char(p);
375 l = utf_ptr2len(p);
376 if (c == 0)
377 {
378 // overlong sequence, use only the first byte
379 c = *p;
380 l = 1;
381 }
382 uc = utf_toupper(c);
383
384 // Reallocate string when byte count changes. This is rare,
385 // thus it's OK to do another malloc()/free().
386 newl = utf_char2len(uc);
387 if (newl != l)
388 {
389 s = alloc(STRLEN(res) + 1 + newl - l);
390 if (s == NULL)
391 {
392 vim_free(res);
393 return NULL;
394 }
395 mch_memmove(s, res, p - res);
396 STRCPY(s + (p - res) + newl, p + l);
397 p = s + (p - res);
398 vim_free(res);
399 res = s;
400 }
401
402 utf_char2bytes(uc, p);
403 p += newl;
404 }
405 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
406 p += l; // skip multi-byte character
407 else
408 {
409 *p = TOUPPER_LOC(*p); // note that toupper() can be a macro
410 p++;
411 }
412 }
413
414 return res;
415}
416
417/*
418 * Make string "s" all lower-case and return it in allocated memory.
419 * Handles multi-byte characters as well as possible.
420 * Returns NULL when out of memory.
421 */
422 char_u *
423strlow_save(char_u *orig)
424{
425 char_u *p;
426 char_u *res;
427
428 res = p = vim_strsave(orig);
429
430 if (res != NULL)
431 while (*p != NUL)
432 {
433 int l;
434
435 if (enc_utf8)
436 {
437 int c, lc;
438 int newl;
439 char_u *s;
440
441 c = utf_ptr2char(p);
442 l = utf_ptr2len(p);
443 if (c == 0)
444 {
445 // overlong sequence, use only the first byte
446 c = *p;
447 l = 1;
448 }
449 lc = utf_tolower(c);
450
451 // Reallocate string when byte count changes. This is rare,
452 // thus it's OK to do another malloc()/free().
453 newl = utf_char2len(lc);
454 if (newl != l)
455 {
456 s = alloc(STRLEN(res) + 1 + newl - l);
457 if (s == NULL)
458 {
459 vim_free(res);
460 return NULL;
461 }
462 mch_memmove(s, res, p - res);
463 STRCPY(s + (p - res) + newl, p + l);
464 p = s + (p - res);
465 vim_free(res);
466 res = s;
467 }
468
469 utf_char2bytes(lc, p);
470 p += newl;
471 }
472 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
473 p += l; // skip multi-byte character
474 else
475 {
476 *p = TOLOWER_LOC(*p); // note that tolower() can be a macro
477 p++;
478 }
479 }
480
481 return res;
482}
483#endif
484
485/*
486 * delete spaces at the end of a string
487 */
488 void
489del_trailing_spaces(char_u *ptr)
490{
491 char_u *q;
492
493 q = ptr + STRLEN(ptr);
494 while (--q > ptr && VIM_ISWHITE(q[0]) && q[-1] != '\\' && q[-1] != Ctrl_V)
495 *q = NUL;
496}
497
498/*
499 * Like strncpy(), but always terminate the result with one NUL.
500 * "to" must be "len + 1" long!
501 */
502 void
503vim_strncpy(char_u *to, char_u *from, size_t len)
504{
505 STRNCPY(to, from, len);
506 to[len] = NUL;
507}
508
509/*
510 * Like strcat(), but make sure the result fits in "tosize" bytes and is
511 * always NUL terminated. "from" and "to" may overlap.
512 */
513 void
514vim_strcat(char_u *to, char_u *from, size_t tosize)
515{
516 size_t tolen = STRLEN(to);
517 size_t fromlen = STRLEN(from);
518
519 if (tolen + fromlen + 1 > tosize)
520 {
521 mch_memmove(to + tolen, from, tosize - tolen - 1);
522 to[tosize - 1] = NUL;
523 }
524 else
525 mch_memmove(to + tolen, from, fromlen + 1);
526}
527
528#if (!defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)) || defined(PROTO)
529/*
530 * Compare two strings, ignoring case, using current locale.
531 * Doesn't work for multi-byte characters.
532 * return 0 for match, < 0 for smaller, > 0 for bigger
533 */
534 int
535vim_stricmp(char *s1, char *s2)
536{
537 int i;
538
539 for (;;)
540 {
541 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
542 if (i != 0)
543 return i; // this character different
544 if (*s1 == NUL)
545 break; // strings match until NUL
546 ++s1;
547 ++s2;
548 }
549 return 0; // strings match
550}
551#endif
552
553#if (!defined(HAVE_STRNCASECMP) && !defined(HAVE_STRNICMP)) || defined(PROTO)
554/*
555 * Compare two strings, for length "len", ignoring case, using current locale.
556 * Doesn't work for multi-byte characters.
557 * return 0 for match, < 0 for smaller, > 0 for bigger
558 */
559 int
560vim_strnicmp(char *s1, char *s2, size_t len)
561{
562 int i;
563
564 while (len > 0)
565 {
566 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
567 if (i != 0)
568 return i; // this character different
569 if (*s1 == NUL)
570 break; // strings match until NUL
571 ++s1;
572 ++s2;
573 --len;
574 }
575 return 0; // strings match
576}
577#endif
578
579/*
580 * Search for first occurrence of "c" in "string".
581 * Version of strchr() that handles unsigned char strings with characters from
582 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
583 * end of the string.
584 */
585 char_u *
586vim_strchr(char_u *string, int c)
587{
588 char_u *p;
589 int b;
590
591 p = string;
592 if (enc_utf8 && c >= 0x80)
593 {
594 while (*p != NUL)
595 {
596 int l = utfc_ptr2len(p);
597
598 // Avoid matching an illegal byte here.
599 if (utf_ptr2char(p) == c && l > 1)
600 return p;
601 p += l;
602 }
603 return NULL;
604 }
605 if (enc_dbcs != 0 && c > 255)
606 {
607 int n2 = c & 0xff;
608
609 c = ((unsigned)c >> 8) & 0xff;
610 while ((b = *p) != NUL)
611 {
612 if (b == c && p[1] == n2)
613 return p;
614 p += (*mb_ptr2len)(p);
615 }
616 return NULL;
617 }
618 if (has_mbyte)
619 {
620 while ((b = *p) != NUL)
621 {
622 if (b == c)
623 return p;
624 p += (*mb_ptr2len)(p);
625 }
626 return NULL;
627 }
628 while ((b = *p) != NUL)
629 {
630 if (b == c)
631 return p;
632 ++p;
633 }
634 return NULL;
635}
636
637/*
638 * Version of strchr() that only works for bytes and handles unsigned char
639 * strings with characters above 128 correctly. It also doesn't return a
640 * pointer to the NUL at the end of the string.
641 */
642 char_u *
643vim_strbyte(char_u *string, int c)
644{
645 char_u *p = string;
646
647 while (*p != NUL)
648 {
649 if (*p == c)
650 return p;
651 ++p;
652 }
653 return NULL;
654}
655
656/*
657 * Search for last occurrence of "c" in "string".
658 * Version of strrchr() that handles unsigned char strings with characters from
659 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
660 * end of the string.
661 * Return NULL if not found.
662 * Does not handle multi-byte char for "c"!
663 */
664 char_u *
665vim_strrchr(char_u *string, int c)
666{
667 char_u *retval = NULL;
668 char_u *p = string;
669
670 while (*p)
671 {
672 if (*p == c)
673 retval = p;
674 MB_PTR_ADV(p);
675 }
676 return retval;
677}
678
679/*
680 * Vim's version of strpbrk(), in case it's missing.
681 * Don't generate a prototype for this, causes problems when it's not used.
682 */
683#ifndef PROTO
684# ifndef HAVE_STRPBRK
685# ifdef vim_strpbrk
686# undef vim_strpbrk
687# endif
688 char_u *
689vim_strpbrk(char_u *s, char_u *charset)
690{
691 while (*s)
692 {
693 if (vim_strchr(charset, *s) != NULL)
694 return s;
695 MB_PTR_ADV(s);
696 }
697 return NULL;
698}
699# endif
700#endif
701
702/*
703 * Sort an array of strings.
704 */
705static int sort_compare(const void *s1, const void *s2);
706
707 static int
708sort_compare(const void *s1, const void *s2)
709{
710 return STRCMP(*(char **)s1, *(char **)s2);
711}
712
713 void
714sort_strings(
715 char_u **files,
716 int count)
717{
718 qsort((void *)files, (size_t)count, sizeof(char_u *), sort_compare);
719}
720
721#if defined(FEAT_QUICKFIX) || defined(FEAT_SPELL) || defined(PROTO)
722/*
723 * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
724 * When "s" is NULL FALSE is returned.
725 */
726 int
727has_non_ascii(char_u *s)
728{
729 char_u *p;
730
731 if (s != NULL)
732 for (p = s; *p != NUL; ++p)
733 if (*p >= 128)
734 return TRUE;
735 return FALSE;
736}
737#endif
738
739/*
740 * Concatenate two strings and return the result in allocated memory.
741 * Returns NULL when out of memory.
742 */
743 char_u *
744concat_str(char_u *str1, char_u *str2)
745{
746 char_u *dest;
747 size_t l = str1 == NULL ? 0 : STRLEN(str1);
748
749 dest = alloc(l + (str2 == NULL ? 0 : STRLEN(str2)) + 1L);
750 if (dest != NULL)
751 {
752 if (str1 == NULL)
753 *dest = NUL;
754 else
755 STRCPY(dest, str1);
756 if (str2 != NULL)
757 STRCPY(dest + l, str2);
758 }
759 return dest;
760}
761
762#if defined(FEAT_EVAL) || defined(PROTO)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200763/*
764 * Return string "str" in ' quotes, doubling ' characters.
765 * If "str" is NULL an empty string is assumed.
766 * If "function" is TRUE make it function('string').
767 */
768 char_u *
769string_quote(char_u *str, int function)
770{
771 unsigned len;
772 char_u *p, *r, *s;
773
774 len = (function ? 13 : 3);
775 if (str != NULL)
776 {
777 len += (unsigned)STRLEN(str);
778 for (p = str; *p != NUL; MB_PTR_ADV(p))
779 if (*p == '\'')
780 ++len;
781 }
782 s = r = alloc(len);
783 if (r != NULL)
784 {
785 if (function)
786 {
787 STRCPY(r, "function('");
788 r += 10;
789 }
790 else
791 *r++ = '\'';
792 if (str != NULL)
793 for (p = str; *p != NUL; )
794 {
795 if (*p == '\'')
796 *r++ = '\'';
797 MB_COPY_CHAR(p, r);
798 }
799 *r++ = '\'';
800 if (function)
801 *r++ = ')';
802 *r++ = NUL;
803 }
804 return s;
805}
806
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000807/*
808 * Count the number of times "needle" occurs in string "haystack". Case is
809 * ignored if "ic" is TRUE.
810 */
811 long
812string_count(char_u *haystack, char_u *needle, int ic)
813{
814 long n = 0;
815 char_u *p = haystack;
816 char_u *next;
817
818 if (p == NULL || needle == NULL || *needle == NUL)
819 return 0;
820
821 if (ic)
822 {
823 size_t len = STRLEN(needle);
824
825 while (*p != NUL)
826 {
827 if (MB_STRNICMP(p, needle, len) == 0)
828 {
829 ++n;
830 p += len;
831 }
832 else
833 MB_PTR_ADV(p);
834 }
835 }
836 else
837 while ((next = (char_u *)strstr((char *)p, (char *)needle)) != NULL)
838 {
839 ++n;
840 p = next + STRLEN(needle);
841 }
842
843 return n;
844}
845
846/*
847 * Make a typval_T of the first character of "input" and store it in "output".
848 * Return OK or FAIL.
849 */
850 static int
851copy_first_char_to_tv(char_u *input, typval_T *output)
852{
853 char_u buf[MB_MAXBYTES + 1];
854 int len;
855
856 if (input == NULL || output == NULL)
857 return FAIL;
858
859 len = has_mbyte ? mb_ptr2len(input) : 1;
860 STRNCPY(buf, input, len);
861 buf[len] = NUL;
862 output->v_type = VAR_STRING;
863 output->vval.v_string = vim_strsave(buf);
864
865 return output->vval.v_string == NULL ? FAIL : OK;
866}
867
868/*
869 * Implementation of map() and filter() for a String. Apply "expr" to every
870 * character in string "str" and return the result in "rettv".
871 */
872 void
873string_filter_map(
874 char_u *str,
875 filtermap_T filtermap,
876 typval_T *expr,
877 typval_T *rettv)
878{
879 char_u *p;
880 typval_T tv;
881 garray_T ga;
882 int len = 0;
883 int idx = 0;
884 int rem;
885
886 rettv->v_type = VAR_STRING;
887 rettv->vval.v_string = NULL;
888
889 // set_vim_var_nr() doesn't set the type
890 set_vim_var_type(VV_KEY, VAR_NUMBER);
891
Bram Moolenaar04935fb2022-01-08 16:19:22 +0000892 ga_init2(&ga, sizeof(char), 80);
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000893 for (p = str; *p != NUL; p += len)
894 {
895 typval_T newtv;
896
897 if (copy_first_char_to_tv(p, &tv) == FAIL)
898 break;
899 len = (int)STRLEN(tv.vval.v_string);
900
Bram Moolenaardd7eff02022-05-06 11:02:05 +0100901 newtv.v_type = VAR_UNKNOWN;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000902 set_vim_var_nr(VV_KEY, idx);
903 if (filter_map_one(&tv, expr, filtermap, &newtv, &rem) == FAIL
904 || did_emsg)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000905 {
906 clear_tv(&newtv);
907 clear_tv(&tv);
908 break;
909 }
910 else if (filtermap != FILTERMAP_FILTER)
911 {
912 if (newtv.v_type != VAR_STRING)
913 {
914 clear_tv(&newtv);
915 clear_tv(&tv);
Bram Moolenaare70cec92022-01-01 14:25:55 +0000916 emsg(_(e_string_required));
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000917 break;
918 }
919 else
920 ga_concat(&ga, newtv.vval.v_string);
921 }
922 else if (!rem)
923 ga_concat(&ga, tv.vval.v_string);
924
925 clear_tv(&newtv);
926 clear_tv(&tv);
927
928 ++idx;
929 }
930 ga_append(&ga, NUL);
931 rettv->vval.v_string = ga.ga_data;
932}
933
934/*
935 * reduce() String argvars[0] using the function 'funcname' with arguments in
936 * 'funcexe' starting with the initial value argvars[2] and return the result
937 * in 'rettv'.
938 */
939 void
940string_reduce(
941 typval_T *argvars,
942 char_u *func_name,
943 funcexe_T *funcexe,
944 typval_T *rettv)
945{
946 char_u *p = tv_get_string(&argvars[0]);
947 int len;
948 typval_T argv[3];
949 int r;
950 int called_emsg_start = called_emsg;
951
952 if (argvars[2].v_type == VAR_UNKNOWN)
953 {
954 if (*p == NUL)
955 {
Bram Moolenaare70cec92022-01-01 14:25:55 +0000956 semsg(_(e_reduce_of_an_empty_str_with_no_initial_value), "String");
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000957 return;
958 }
959 if (copy_first_char_to_tv(p, rettv) == FAIL)
960 return;
961 p += STRLEN(rettv->vval.v_string);
962 }
Yegappan Lakshmanan8deb2b32022-09-02 15:15:27 +0100963 else if (check_for_string_arg(argvars, 2) == FAIL)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000964 return;
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000965 else
966 copy_tv(&argvars[2], rettv);
967
968 for ( ; *p != NUL; p += len)
969 {
970 argv[0] = *rettv;
971 if (copy_first_char_to_tv(p, &argv[1]) == FAIL)
972 break;
973 len = (int)STRLEN(argv[1].vval.v_string);
974 r = call_func(func_name, -1, rettv, 2, argv, funcexe);
975 clear_tv(&argv[0]);
976 clear_tv(&argv[1]);
977 if (r == FAIL || called_emsg != called_emsg_start)
978 return;
979 }
980}
981
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200982 static void
983byteidx(typval_T *argvars, typval_T *rettv, int comp UNUSED)
984{
985 char_u *t;
986 char_u *str;
987 varnumber_T idx;
988
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +0200989 rettv->vval.v_number = -1;
990
991 if (in_vim9script()
992 && (check_for_string_arg(argvars, 0) == FAIL
993 || check_for_number_arg(argvars, 1) == FAIL))
994 return;
995
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200996 str = tv_get_string_chk(&argvars[0]);
997 idx = tv_get_number_chk(&argvars[1], NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200998 if (str == NULL || idx < 0)
999 return;
1000
1001 t = str;
1002 for ( ; idx > 0; idx--)
1003 {
1004 if (*t == NUL) // EOL reached
1005 return;
1006 if (enc_utf8 && comp)
1007 t += utf_ptr2len(t);
1008 else
1009 t += (*mb_ptr2len)(t);
1010 }
1011 rettv->vval.v_number = (varnumber_T)(t - str);
1012}
1013
1014/*
1015 * "byteidx()" function
1016 */
1017 void
1018f_byteidx(typval_T *argvars, typval_T *rettv)
1019{
1020 byteidx(argvars, rettv, FALSE);
1021}
1022
1023/*
1024 * "byteidxcomp()" function
1025 */
1026 void
1027f_byteidxcomp(typval_T *argvars, typval_T *rettv)
1028{
1029 byteidx(argvars, rettv, TRUE);
1030}
1031
1032/*
1033 * "charidx()" function
1034 */
1035 void
1036f_charidx(typval_T *argvars, typval_T *rettv)
1037{
1038 char_u *str;
1039 varnumber_T idx;
1040 varnumber_T countcc = FALSE;
1041 char_u *p;
1042 int len;
1043 int (*ptr2len)(char_u *);
1044
1045 rettv->vval.v_number = -1;
1046
Yegappan Lakshmanan8deb2b32022-09-02 15:15:27 +01001047 if ((check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001048 || check_for_number_arg(argvars, 1) == FAIL
1049 || check_for_opt_bool_arg(argvars, 2) == FAIL))
1050 return;
1051
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001052 str = tv_get_string_chk(&argvars[0]);
1053 idx = tv_get_number_chk(&argvars[1], NULL);
1054 if (str == NULL || idx < 0)
1055 return;
1056
1057 if (argvars[2].v_type != VAR_UNKNOWN)
1058 countcc = tv_get_bool(&argvars[2]);
1059 if (countcc < 0 || countcc > 1)
1060 {
1061 semsg(_(e_using_number_as_bool_nr), countcc);
1062 return;
1063 }
1064
1065 if (enc_utf8 && countcc)
1066 ptr2len = utf_ptr2len;
1067 else
1068 ptr2len = mb_ptr2len;
1069
1070 for (p = str, len = 0; p <= str + idx; len++)
1071 {
1072 if (*p == NUL)
1073 return;
1074 p += ptr2len(p);
1075 }
1076
1077 rettv->vval.v_number = len > 0 ? len - 1 : 0;
1078}
1079
1080/*
1081 * "str2list()" function
1082 */
1083 void
1084f_str2list(typval_T *argvars, typval_T *rettv)
1085{
1086 char_u *p;
1087 int utf8 = FALSE;
1088
1089 if (rettv_list_alloc(rettv) == FAIL)
1090 return;
1091
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001092 if (in_vim9script()
1093 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001094 || check_for_opt_bool_arg(argvars, 1) == FAIL))
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001095 return;
1096
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001097 if (argvars[1].v_type != VAR_UNKNOWN)
1098 utf8 = (int)tv_get_bool_chk(&argvars[1], NULL);
1099
1100 p = tv_get_string(&argvars[0]);
1101
1102 if (has_mbyte || utf8)
1103 {
1104 int (*ptr2len)(char_u *);
1105 int (*ptr2char)(char_u *);
1106
1107 if (utf8 || enc_utf8)
1108 {
1109 ptr2len = utf_ptr2len;
1110 ptr2char = utf_ptr2char;
1111 }
1112 else
1113 {
1114 ptr2len = mb_ptr2len;
1115 ptr2char = mb_ptr2char;
1116 }
1117
1118 for ( ; *p != NUL; p += (*ptr2len)(p))
1119 list_append_number(rettv->vval.v_list, (*ptr2char)(p));
1120 }
1121 else
1122 for ( ; *p != NUL; ++p)
1123 list_append_number(rettv->vval.v_list, *p);
1124}
1125
1126/*
1127 * "str2nr()" function
1128 */
1129 void
1130f_str2nr(typval_T *argvars, typval_T *rettv)
1131{
1132 int base = 10;
1133 char_u *p;
1134 varnumber_T n;
1135 int what = 0;
1136 int isneg;
1137
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001138 if (in_vim9script()
1139 && (check_for_string_arg(argvars, 0) == FAIL
1140 || check_for_opt_number_arg(argvars, 1) == FAIL
1141 || (argvars[1].v_type != VAR_UNKNOWN
1142 && check_for_opt_bool_arg(argvars, 2) == FAIL)))
1143 return;
1144
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001145 if (argvars[1].v_type != VAR_UNKNOWN)
1146 {
1147 base = (int)tv_get_number(&argvars[1]);
1148 if (base != 2 && base != 8 && base != 10 && base != 16)
1149 {
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00001150 emsg(_(e_invalid_argument));
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001151 return;
1152 }
1153 if (argvars[2].v_type != VAR_UNKNOWN && tv_get_bool(&argvars[2]))
1154 what |= STR2NR_QUOTE;
1155 }
1156
1157 p = skipwhite(tv_get_string_strict(&argvars[0]));
1158 isneg = (*p == '-');
1159 if (*p == '+' || *p == '-')
1160 p = skipwhite(p + 1);
1161 switch (base)
1162 {
1163 case 2: what |= STR2NR_BIN + STR2NR_FORCE; break;
1164 case 8: what |= STR2NR_OCT + STR2NR_OOCT + STR2NR_FORCE; break;
1165 case 16: what |= STR2NR_HEX + STR2NR_FORCE; break;
1166 }
1167 vim_str2nr(p, NULL, NULL, what, &n, NULL, 0, FALSE);
1168 // Text after the number is silently ignored.
1169 if (isneg)
1170 rettv->vval.v_number = -n;
1171 else
1172 rettv->vval.v_number = n;
1173
1174}
1175
1176/*
1177 * "strgetchar()" function
1178 */
1179 void
1180f_strgetchar(typval_T *argvars, typval_T *rettv)
1181{
1182 char_u *str;
1183 int len;
1184 int error = FALSE;
1185 int charidx;
1186 int byteidx = 0;
1187
1188 rettv->vval.v_number = -1;
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001189
1190 if (in_vim9script()
1191 && (check_for_string_arg(argvars, 0) == FAIL
1192 || check_for_number_arg(argvars, 1) == FAIL))
1193 return;
1194
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001195 str = tv_get_string_chk(&argvars[0]);
1196 if (str == NULL)
1197 return;
1198 len = (int)STRLEN(str);
1199 charidx = (int)tv_get_number_chk(&argvars[1], &error);
1200 if (error)
1201 return;
1202
1203 while (charidx >= 0 && byteidx < len)
1204 {
1205 if (charidx == 0)
1206 {
1207 rettv->vval.v_number = mb_ptr2char(str + byteidx);
1208 break;
1209 }
1210 --charidx;
1211 byteidx += MB_CPTR2LEN(str + byteidx);
1212 }
1213}
1214
1215/*
1216 * "stridx()" function
1217 */
1218 void
1219f_stridx(typval_T *argvars, typval_T *rettv)
1220{
1221 char_u buf[NUMBUFLEN];
1222 char_u *needle;
1223 char_u *haystack;
1224 char_u *save_haystack;
1225 char_u *pos;
1226 int start_idx;
1227
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001228 if (in_vim9script()
1229 && (check_for_string_arg(argvars, 0) == FAIL
1230 || check_for_string_arg(argvars, 1) == FAIL
1231 || check_for_opt_number_arg(argvars, 2) == FAIL))
1232 return;
1233
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001234 needle = tv_get_string_chk(&argvars[1]);
1235 save_haystack = haystack = tv_get_string_buf_chk(&argvars[0], buf);
1236 rettv->vval.v_number = -1;
1237 if (needle == NULL || haystack == NULL)
1238 return; // type error; errmsg already given
1239
1240 if (argvars[2].v_type != VAR_UNKNOWN)
1241 {
1242 int error = FALSE;
1243
1244 start_idx = (int)tv_get_number_chk(&argvars[2], &error);
1245 if (error || start_idx >= (int)STRLEN(haystack))
1246 return;
1247 if (start_idx >= 0)
1248 haystack += start_idx;
1249 }
1250
1251 pos = (char_u *)strstr((char *)haystack, (char *)needle);
1252 if (pos != NULL)
1253 rettv->vval.v_number = (varnumber_T)(pos - save_haystack);
1254}
1255
1256/*
1257 * "string()" function
1258 */
1259 void
1260f_string(typval_T *argvars, typval_T *rettv)
1261{
1262 char_u *tofree;
1263 char_u numbuf[NUMBUFLEN];
1264
1265 rettv->v_type = VAR_STRING;
1266 rettv->vval.v_string = tv2string(&argvars[0], &tofree, numbuf,
1267 get_copyID());
1268 // Make a copy if we have a value but it's not in allocated memory.
1269 if (rettv->vval.v_string != NULL && tofree == NULL)
1270 rettv->vval.v_string = vim_strsave(rettv->vval.v_string);
1271}
1272
1273/*
1274 * "strlen()" function
1275 */
1276 void
1277f_strlen(typval_T *argvars, typval_T *rettv)
1278{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001279 if (in_vim9script()
1280 && check_for_string_or_number_arg(argvars, 0) == FAIL)
1281 return;
1282
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001283 rettv->vval.v_number = (varnumber_T)(STRLEN(
1284 tv_get_string(&argvars[0])));
1285}
1286
1287 static void
1288strchar_common(typval_T *argvars, typval_T *rettv, int skipcc)
1289{
1290 char_u *s = tv_get_string(&argvars[0]);
1291 varnumber_T len = 0;
1292 int (*func_mb_ptr2char_adv)(char_u **pp);
1293
1294 func_mb_ptr2char_adv = skipcc ? mb_ptr2char_adv : mb_cptr2char_adv;
1295 while (*s != NUL)
1296 {
1297 func_mb_ptr2char_adv(&s);
1298 ++len;
1299 }
1300 rettv->vval.v_number = len;
1301}
1302
1303/*
1304 * "strcharlen()" function
1305 */
1306 void
1307f_strcharlen(typval_T *argvars, typval_T *rettv)
1308{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001309 if (in_vim9script()
1310 && check_for_string_or_number_arg(argvars, 0) == FAIL)
1311 return;
1312
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001313 strchar_common(argvars, rettv, TRUE);
1314}
1315
1316/*
1317 * "strchars()" function
1318 */
1319 void
1320f_strchars(typval_T *argvars, typval_T *rettv)
1321{
1322 varnumber_T skipcc = FALSE;
1323
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001324 if (in_vim9script()
1325 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001326 || check_for_opt_bool_arg(argvars, 1) == FAIL))
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001327 return;
1328
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001329 if (argvars[1].v_type != VAR_UNKNOWN)
1330 skipcc = tv_get_bool(&argvars[1]);
1331 if (skipcc < 0 || skipcc > 1)
1332 semsg(_(e_using_number_as_bool_nr), skipcc);
1333 else
1334 strchar_common(argvars, rettv, skipcc);
1335}
1336
1337/*
1338 * "strdisplaywidth()" function
1339 */
1340 void
1341f_strdisplaywidth(typval_T *argvars, typval_T *rettv)
1342{
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001343 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001344 int col = 0;
1345
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001346 rettv->vval.v_number = -1;
1347
1348 if (in_vim9script()
1349 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001350 || check_for_opt_number_arg(argvars, 1) == FAIL))
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001351 return;
1352
1353 s = tv_get_string(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001354 if (argvars[1].v_type != VAR_UNKNOWN)
1355 col = (int)tv_get_number(&argvars[1]);
1356
1357 rettv->vval.v_number = (varnumber_T)(linetabsize_col(col, s) - col);
1358}
1359
1360/*
1361 * "strwidth()" function
1362 */
1363 void
1364f_strwidth(typval_T *argvars, typval_T *rettv)
1365{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001366 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001367
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001368 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1369 return;
1370
1371 s = tv_get_string_strict(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001372 rettv->vval.v_number = (varnumber_T)(mb_string2cells(s, -1));
1373}
1374
1375/*
1376 * "strcharpart()" function
1377 */
1378 void
1379f_strcharpart(typval_T *argvars, typval_T *rettv)
1380{
1381 char_u *p;
1382 int nchar;
1383 int nbyte = 0;
1384 int charlen;
1385 int skipcc = FALSE;
1386 int len = 0;
1387 int slen;
1388 int error = FALSE;
1389
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001390 if (in_vim9script()
1391 && (check_for_string_arg(argvars, 0) == FAIL
1392 || check_for_number_arg(argvars, 1) == FAIL
1393 || check_for_opt_number_arg(argvars, 2) == FAIL
1394 || (argvars[2].v_type != VAR_UNKNOWN
1395 && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1396 return;
1397
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001398 p = tv_get_string(&argvars[0]);
1399 slen = (int)STRLEN(p);
1400
1401 nchar = (int)tv_get_number_chk(&argvars[1], &error);
1402 if (!error)
1403 {
1404 if (argvars[2].v_type != VAR_UNKNOWN
1405 && argvars[3].v_type != VAR_UNKNOWN)
1406 {
1407 skipcc = tv_get_bool(&argvars[3]);
1408 if (skipcc < 0 || skipcc > 1)
1409 {
1410 semsg(_(e_using_number_as_bool_nr), skipcc);
1411 return;
1412 }
1413 }
1414
1415 if (nchar > 0)
1416 while (nchar > 0 && nbyte < slen)
1417 {
1418 if (skipcc)
1419 nbyte += mb_ptr2len(p + nbyte);
1420 else
1421 nbyte += MB_CPTR2LEN(p + nbyte);
1422 --nchar;
1423 }
1424 else
1425 nbyte = nchar;
1426 if (argvars[2].v_type != VAR_UNKNOWN)
1427 {
1428 charlen = (int)tv_get_number(&argvars[2]);
1429 while (charlen > 0 && nbyte + len < slen)
1430 {
1431 int off = nbyte + len;
1432
1433 if (off < 0)
1434 len += 1;
1435 else
1436 {
1437 if (skipcc)
1438 len += mb_ptr2len(p + off);
1439 else
1440 len += MB_CPTR2LEN(p + off);
1441 }
1442 --charlen;
1443 }
1444 }
1445 else
1446 len = slen - nbyte; // default: all bytes that are available.
1447 }
1448
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02001449 // Only return the overlap between the specified part and the actual
1450 // string.
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001451 if (nbyte < 0)
1452 {
1453 len += nbyte;
1454 nbyte = 0;
1455 }
1456 else if (nbyte > slen)
1457 nbyte = slen;
1458 if (len < 0)
1459 len = 0;
1460 else if (nbyte + len > slen)
1461 len = slen - nbyte;
1462
1463 rettv->v_type = VAR_STRING;
1464 rettv->vval.v_string = vim_strnsave(p + nbyte, len);
1465}
1466
1467/*
1468 * "strpart()" function
1469 */
1470 void
1471f_strpart(typval_T *argvars, typval_T *rettv)
1472{
1473 char_u *p;
1474 int n;
1475 int len;
1476 int slen;
1477 int error = FALSE;
1478
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001479 if (in_vim9script()
1480 && (check_for_string_arg(argvars, 0) == FAIL
1481 || check_for_number_arg(argvars, 1) == FAIL
1482 || check_for_opt_number_arg(argvars, 2) == FAIL
1483 || (argvars[2].v_type != VAR_UNKNOWN
1484 && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1485 return;
1486
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001487 p = tv_get_string(&argvars[0]);
1488 slen = (int)STRLEN(p);
1489
1490 n = (int)tv_get_number_chk(&argvars[1], &error);
1491 if (error)
1492 len = 0;
1493 else if (argvars[2].v_type != VAR_UNKNOWN)
1494 len = (int)tv_get_number(&argvars[2]);
1495 else
1496 len = slen - n; // default len: all bytes that are available.
1497
1498 // Only return the overlap between the specified part and the actual
1499 // string.
1500 if (n < 0)
1501 {
1502 len += n;
1503 n = 0;
1504 }
1505 else if (n > slen)
1506 n = slen;
1507 if (len < 0)
1508 len = 0;
1509 else if (n + len > slen)
1510 len = slen - n;
1511
1512 if (argvars[2].v_type != VAR_UNKNOWN && argvars[3].v_type != VAR_UNKNOWN)
1513 {
1514 int off;
1515
1516 // length in characters
1517 for (off = n; off < slen && len > 0; --len)
1518 off += mb_ptr2len(p + off);
1519 len = off - n;
1520 }
1521
1522 rettv->v_type = VAR_STRING;
1523 rettv->vval.v_string = vim_strnsave(p + n, len);
1524}
1525
1526/*
1527 * "strridx()" function
1528 */
1529 void
1530f_strridx(typval_T *argvars, typval_T *rettv)
1531{
1532 char_u buf[NUMBUFLEN];
1533 char_u *needle;
1534 char_u *haystack;
1535 char_u *rest;
1536 char_u *lastmatch = NULL;
1537 int haystack_len, end_idx;
1538
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001539 if (in_vim9script()
1540 && (check_for_string_arg(argvars, 0) == FAIL
1541 || check_for_string_arg(argvars, 1) == FAIL
1542 || check_for_opt_number_arg(argvars, 2) == FAIL))
1543 return;
1544
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001545 needle = tv_get_string_chk(&argvars[1]);
1546 haystack = tv_get_string_buf_chk(&argvars[0], buf);
1547
1548 rettv->vval.v_number = -1;
1549 if (needle == NULL || haystack == NULL)
1550 return; // type error; errmsg already given
1551
1552 haystack_len = (int)STRLEN(haystack);
1553 if (argvars[2].v_type != VAR_UNKNOWN)
1554 {
1555 // Third argument: upper limit for index
1556 end_idx = (int)tv_get_number_chk(&argvars[2], NULL);
1557 if (end_idx < 0)
1558 return; // can never find a match
1559 }
1560 else
1561 end_idx = haystack_len;
1562
1563 if (*needle == NUL)
1564 {
1565 // Empty string matches past the end.
1566 lastmatch = haystack + end_idx;
1567 }
1568 else
1569 {
1570 for (rest = haystack; *rest != '\0'; ++rest)
1571 {
1572 rest = (char_u *)strstr((char *)rest, (char *)needle);
1573 if (rest == NULL || rest > haystack + end_idx)
1574 break;
1575 lastmatch = rest;
1576 }
1577 }
1578
1579 if (lastmatch == NULL)
1580 rettv->vval.v_number = -1;
1581 else
1582 rettv->vval.v_number = (varnumber_T)(lastmatch - haystack);
1583}
1584
1585/*
1586 * "strtrans()" function
1587 */
1588 void
1589f_strtrans(typval_T *argvars, typval_T *rettv)
1590{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001591 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1592 return;
1593
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001594 rettv->v_type = VAR_STRING;
1595 rettv->vval.v_string = transstr(tv_get_string(&argvars[0]));
1596}
1597
1598/*
1599 * "tolower(string)" function
1600 */
1601 void
1602f_tolower(typval_T *argvars, typval_T *rettv)
1603{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001604 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1605 return;
1606
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001607 rettv->v_type = VAR_STRING;
1608 rettv->vval.v_string = strlow_save(tv_get_string(&argvars[0]));
1609}
1610
1611/*
1612 * "toupper(string)" function
1613 */
1614 void
1615f_toupper(typval_T *argvars, typval_T *rettv)
1616{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001617 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1618 return;
1619
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001620 rettv->v_type = VAR_STRING;
1621 rettv->vval.v_string = strup_save(tv_get_string(&argvars[0]));
1622}
1623
1624/*
1625 * "tr(string, fromstr, tostr)" function
1626 */
1627 void
1628f_tr(typval_T *argvars, typval_T *rettv)
1629{
1630 char_u *in_str;
1631 char_u *fromstr;
1632 char_u *tostr;
1633 char_u *p;
1634 int inlen;
1635 int fromlen;
1636 int tolen;
1637 int idx;
1638 char_u *cpstr;
1639 int cplen;
1640 int first = TRUE;
1641 char_u buf[NUMBUFLEN];
1642 char_u buf2[NUMBUFLEN];
1643 garray_T ga;
1644
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001645 if (in_vim9script()
1646 && (check_for_string_arg(argvars, 0) == FAIL
1647 || check_for_string_arg(argvars, 1) == FAIL
1648 || check_for_string_arg(argvars, 2) == FAIL))
1649 return;
1650
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001651 in_str = tv_get_string(&argvars[0]);
1652 fromstr = tv_get_string_buf_chk(&argvars[1], buf);
1653 tostr = tv_get_string_buf_chk(&argvars[2], buf2);
1654
1655 // Default return value: empty string.
1656 rettv->v_type = VAR_STRING;
1657 rettv->vval.v_string = NULL;
1658 if (fromstr == NULL || tostr == NULL)
1659 return; // type error; errmsg already given
Bram Moolenaar04935fb2022-01-08 16:19:22 +00001660 ga_init2(&ga, sizeof(char), 80);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001661
1662 if (!has_mbyte)
1663 // not multi-byte: fromstr and tostr must be the same length
1664 if (STRLEN(fromstr) != STRLEN(tostr))
1665 {
1666error:
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00001667 semsg(_(e_invalid_argument_str), fromstr);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001668 ga_clear(&ga);
1669 return;
1670 }
1671
1672 // fromstr and tostr have to contain the same number of chars
1673 while (*in_str != NUL)
1674 {
1675 if (has_mbyte)
1676 {
1677 inlen = (*mb_ptr2len)(in_str);
1678 cpstr = in_str;
1679 cplen = inlen;
1680 idx = 0;
1681 for (p = fromstr; *p != NUL; p += fromlen)
1682 {
1683 fromlen = (*mb_ptr2len)(p);
1684 if (fromlen == inlen && STRNCMP(in_str, p, inlen) == 0)
1685 {
1686 for (p = tostr; *p != NUL; p += tolen)
1687 {
1688 tolen = (*mb_ptr2len)(p);
1689 if (idx-- == 0)
1690 {
1691 cplen = tolen;
1692 cpstr = p;
1693 break;
1694 }
1695 }
1696 if (*p == NUL) // tostr is shorter than fromstr
1697 goto error;
1698 break;
1699 }
1700 ++idx;
1701 }
1702
1703 if (first && cpstr == in_str)
1704 {
1705 // Check that fromstr and tostr have the same number of
1706 // (multi-byte) characters. Done only once when a character
1707 // of in_str doesn't appear in fromstr.
1708 first = FALSE;
1709 for (p = tostr; *p != NUL; p += tolen)
1710 {
1711 tolen = (*mb_ptr2len)(p);
1712 --idx;
1713 }
1714 if (idx != 0)
1715 goto error;
1716 }
1717
1718 (void)ga_grow(&ga, cplen);
1719 mch_memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen);
1720 ga.ga_len += cplen;
1721
1722 in_str += inlen;
1723 }
1724 else
1725 {
1726 // When not using multi-byte chars we can do it faster.
1727 p = vim_strchr(fromstr, *in_str);
1728 if (p != NULL)
1729 ga_append(&ga, tostr[p - fromstr]);
1730 else
1731 ga_append(&ga, *in_str);
1732 ++in_str;
1733 }
1734 }
1735
1736 // add a terminating NUL
1737 (void)ga_grow(&ga, 1);
1738 ga_append(&ga, NUL);
1739
1740 rettv->vval.v_string = ga.ga_data;
1741}
1742
1743/*
1744 * "trim({expr})" function
1745 */
1746 void
1747f_trim(typval_T *argvars, typval_T *rettv)
1748{
1749 char_u buf1[NUMBUFLEN];
1750 char_u buf2[NUMBUFLEN];
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001751 char_u *head;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001752 char_u *mask = NULL;
1753 char_u *tail;
1754 char_u *prev;
1755 char_u *p;
1756 int c1;
1757 int dir = 0;
1758
1759 rettv->v_type = VAR_STRING;
1760 rettv->vval.v_string = NULL;
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001761
1762 if (in_vim9script()
1763 && (check_for_string_arg(argvars, 0) == FAIL
1764 || check_for_opt_string_arg(argvars, 1) == FAIL
1765 || (argvars[1].v_type != VAR_UNKNOWN
1766 && check_for_opt_number_arg(argvars, 2) == FAIL)))
1767 return;
1768
1769 head = tv_get_string_buf_chk(&argvars[0], buf1);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001770 if (head == NULL)
1771 return;
1772
Yegappan Lakshmanan8deb2b32022-09-02 15:15:27 +01001773 if (check_for_opt_string_arg(argvars, 1) == FAIL)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001774 return;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001775
1776 if (argvars[1].v_type == VAR_STRING)
1777 {
1778 mask = tv_get_string_buf_chk(&argvars[1], buf2);
1779
1780 if (argvars[2].v_type != VAR_UNKNOWN)
1781 {
1782 int error = 0;
1783
1784 // leading or trailing characters to trim
1785 dir = (int)tv_get_number_chk(&argvars[2], &error);
1786 if (error)
1787 return;
1788 if (dir < 0 || dir > 2)
1789 {
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00001790 semsg(_(e_invalid_argument_str), tv_get_string(&argvars[2]));
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001791 return;
1792 }
1793 }
1794 }
1795
1796 if (dir == 0 || dir == 1)
1797 {
1798 // Trim leading characters
1799 while (*head != NUL)
1800 {
1801 c1 = PTR2CHAR(head);
1802 if (mask == NULL)
1803 {
1804 if (c1 > ' ' && c1 != 0xa0)
1805 break;
1806 }
1807 else
1808 {
1809 for (p = mask; *p != NUL; MB_PTR_ADV(p))
1810 if (c1 == PTR2CHAR(p))
1811 break;
1812 if (*p == NUL)
1813 break;
1814 }
1815 MB_PTR_ADV(head);
1816 }
1817 }
1818
1819 tail = head + STRLEN(head);
1820 if (dir == 0 || dir == 2)
1821 {
1822 // Trim trailing characters
1823 for (; tail > head; tail = prev)
1824 {
1825 prev = tail;
1826 MB_PTR_BACK(head, prev);
1827 c1 = PTR2CHAR(prev);
1828 if (mask == NULL)
1829 {
1830 if (c1 > ' ' && c1 != 0xa0)
1831 break;
1832 }
1833 else
1834 {
1835 for (p = mask; *p != NUL; MB_PTR_ADV(p))
1836 if (c1 == PTR2CHAR(p))
1837 break;
1838 if (*p == NUL)
1839 break;
1840 }
1841 }
1842 }
1843 rettv->vval.v_string = vim_strnsave(head, tail - head);
1844}
1845
Bram Moolenaar677658a2022-01-05 16:09:06 +00001846static char *e_printf = N_(e_insufficient_arguments_for_printf);
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02001847
1848/*
1849 * Get number argument from "idxp" entry in "tvs". First entry is 1.
1850 */
1851 static varnumber_T
1852tv_nr(typval_T *tvs, int *idxp)
1853{
1854 int idx = *idxp - 1;
1855 varnumber_T n = 0;
1856 int err = FALSE;
1857
1858 if (tvs[idx].v_type == VAR_UNKNOWN)
1859 emsg(_(e_printf));
1860 else
1861 {
1862 ++*idxp;
1863 n = tv_get_number_chk(&tvs[idx], &err);
1864 if (err)
1865 n = 0;
1866 }
1867 return n;
1868}
1869
1870/*
1871 * Get string argument from "idxp" entry in "tvs". First entry is 1.
1872 * If "tofree" is NULL tv_get_string_chk() is used. Some types (e.g. List)
1873 * are not converted to a string.
1874 * If "tofree" is not NULL echo_string() is used. All types are converted to
1875 * a string with the same format as ":echo". The caller must free "*tofree".
1876 * Returns NULL for an error.
1877 */
1878 static char *
1879tv_str(typval_T *tvs, int *idxp, char_u **tofree)
1880{
1881 int idx = *idxp - 1;
1882 char *s = NULL;
1883 static char_u numbuf[NUMBUFLEN];
1884
1885 if (tvs[idx].v_type == VAR_UNKNOWN)
1886 emsg(_(e_printf));
1887 else
1888 {
1889 ++*idxp;
1890 if (tofree != NULL)
1891 s = (char *)echo_string(&tvs[idx], tofree, numbuf, get_copyID());
1892 else
1893 s = (char *)tv_get_string_chk(&tvs[idx]);
1894 }
1895 return s;
1896}
1897
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02001898/*
1899 * Get float argument from "idxp" entry in "tvs". First entry is 1.
1900 */
1901 static double
1902tv_float(typval_T *tvs, int *idxp)
1903{
1904 int idx = *idxp - 1;
1905 double f = 0;
1906
1907 if (tvs[idx].v_type == VAR_UNKNOWN)
1908 emsg(_(e_printf));
1909 else
1910 {
1911 ++*idxp;
1912 if (tvs[idx].v_type == VAR_FLOAT)
1913 f = tvs[idx].vval.v_float;
1914 else if (tvs[idx].v_type == VAR_NUMBER)
1915 f = (double)tvs[idx].vval.v_number;
1916 else
Bram Moolenaar9d00e4a2022-01-05 17:49:15 +00001917 emsg(_(e_expected_float_argument_for_printf));
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02001918 }
1919 return f;
1920}
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001921
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02001922#endif
1923
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02001924/*
1925 * Return the representation of infinity for printf() function:
1926 * "-inf", "inf", "+inf", " inf", "-INF", "INF", "+INF" or " INF".
1927 */
1928 static const char *
1929infinity_str(int positive,
1930 char fmt_spec,
1931 int force_sign,
1932 int space_for_positive)
1933{
1934 static const char *table[] =
1935 {
1936 "-inf", "inf", "+inf", " inf",
1937 "-INF", "INF", "+INF", " INF"
1938 };
1939 int idx = positive * (1 + force_sign + force_sign * space_for_positive);
1940
1941 if (ASCII_ISUPPER(fmt_spec))
1942 idx += 4;
1943 return table[idx];
1944}
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02001945
1946/*
1947 * This code was included to provide a portable vsnprintf() and snprintf().
1948 * Some systems may provide their own, but we always use this one for
1949 * consistency.
1950 *
1951 * This code is based on snprintf.c - a portable implementation of snprintf
1952 * by Mark Martinec <mark.martinec@ijs.si>, Version 2.2, 2000-10-06.
1953 * Included with permission. It was heavily modified to fit in Vim.
1954 * The original code, including useful comments, can be found here:
1955 * http://www.ijs.si/software/snprintf/
1956 *
1957 * This snprintf() only supports the following conversion specifiers:
1958 * s, c, d, u, o, x, X, p (and synonyms: i, D, U, O - see below)
1959 * with flags: '-', '+', ' ', '0' and '#'.
1960 * An asterisk is supported for field width as well as precision.
1961 *
1962 * Limited support for floating point was added: 'f', 'F', 'e', 'E', 'g', 'G'.
1963 *
1964 * Length modifiers 'h' (short int) and 'l' (long int) and 'll' (long long int)
1965 * are supported. NOTE: for 'll' the argument is varnumber_T or uvarnumber_T.
1966 *
1967 * The locale is not used, the string is used as a byte string. This is only
1968 * relevant for double-byte encodings where the second byte may be '%'.
1969 *
1970 * It is permitted for "str_m" to be zero, and it is permitted to specify NULL
1971 * pointer for resulting string argument if "str_m" is zero (as per ISO C99).
1972 *
1973 * The return value is the number of characters which would be generated
1974 * for the given input, excluding the trailing NUL. If this value
1975 * is greater or equal to "str_m", not all characters from the result
1976 * have been stored in str, output bytes beyond the ("str_m"-1) -th character
1977 * are discarded. If "str_m" is greater than zero it is guaranteed
1978 * the resulting string will be NUL-terminated.
1979 */
1980
1981/*
1982 * When va_list is not supported we only define vim_snprintf().
1983 *
1984 * vim_vsnprintf_typval() can be invoked with either "va_list" or a list of
1985 * "typval_T". When the latter is not used it must be NULL.
1986 */
1987
1988// When generating prototypes all of this is skipped, cproto doesn't
1989// understand this.
1990#ifndef PROTO
1991
1992// Like vim_vsnprintf() but append to the string.
1993 int
1994vim_snprintf_add(char *str, size_t str_m, const char *fmt, ...)
1995{
1996 va_list ap;
1997 int str_l;
1998 size_t len = STRLEN(str);
1999 size_t space;
2000
2001 if (str_m <= len)
2002 space = 0;
2003 else
2004 space = str_m - len;
2005 va_start(ap, fmt);
2006 str_l = vim_vsnprintf(str + len, space, fmt, ap);
2007 va_end(ap);
2008 return str_l;
2009}
2010
2011 int
2012vim_snprintf(char *str, size_t str_m, const char *fmt, ...)
2013{
2014 va_list ap;
2015 int str_l;
2016
2017 va_start(ap, fmt);
2018 str_l = vim_vsnprintf(str, str_m, fmt, ap);
2019 va_end(ap);
2020 return str_l;
2021}
2022
2023 int
2024vim_vsnprintf(
2025 char *str,
2026 size_t str_m,
2027 const char *fmt,
2028 va_list ap)
2029{
2030 return vim_vsnprintf_typval(str, str_m, fmt, ap, NULL);
2031}
2032
2033 int
2034vim_vsnprintf_typval(
2035 char *str,
2036 size_t str_m,
2037 const char *fmt,
2038 va_list ap,
2039 typval_T *tvs)
2040{
2041 size_t str_l = 0;
2042 const char *p = fmt;
2043 int arg_idx = 1;
2044
2045 if (p == NULL)
2046 p = "";
2047 while (*p != NUL)
2048 {
2049 if (*p != '%')
2050 {
2051 char *q = strchr(p + 1, '%');
2052 size_t n = (q == NULL) ? STRLEN(p) : (size_t)(q - p);
2053
2054 // Copy up to the next '%' or NUL without any changes.
2055 if (str_l < str_m)
2056 {
2057 size_t avail = str_m - str_l;
2058
2059 mch_memmove(str + str_l, p, n > avail ? avail : n);
2060 }
2061 p += n;
2062 str_l += n;
2063 }
2064 else
2065 {
2066 size_t min_field_width = 0, precision = 0;
2067 int zero_padding = 0, precision_specified = 0, justify_left = 0;
2068 int alternate_form = 0, force_sign = 0;
2069
2070 // If both the ' ' and '+' flags appear, the ' ' flag should be
2071 // ignored.
2072 int space_for_positive = 1;
2073
2074 // allowed values: \0, h, l, L
2075 char length_modifier = '\0';
2076
2077 // temporary buffer for simple numeric->string conversion
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01002078# define TMP_LEN 350 // On my system 1e308 is the biggest number possible.
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002079 // That sounds reasonable to use as the maximum
2080 // printable.
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002081 char tmp[TMP_LEN];
2082
2083 // string address in case of string argument
2084 const char *str_arg = NULL;
2085
2086 // natural field width of arg without padding and sign
2087 size_t str_arg_l;
2088
2089 // unsigned char argument value - only defined for c conversion.
2090 // N.B. standard explicitly states the char argument for the c
2091 // conversion is unsigned
2092 unsigned char uchar_arg;
2093
2094 // number of zeros to be inserted for numeric conversions as
2095 // required by the precision or minimal field width
2096 size_t number_of_zeros_to_pad = 0;
2097
2098 // index into tmp where zero padding is to be inserted
2099 size_t zero_padding_insertion_ind = 0;
2100
2101 // current conversion specifier character
2102 char fmt_spec = '\0';
2103
2104 // buffer for 's' and 'S' specs
2105 char_u *tofree = NULL;
2106
2107
2108 p++; // skip '%'
2109
2110 // parse flags
2111 while (*p == '0' || *p == '-' || *p == '+' || *p == ' '
2112 || *p == '#' || *p == '\'')
2113 {
2114 switch (*p)
2115 {
2116 case '0': zero_padding = 1; break;
2117 case '-': justify_left = 1; break;
2118 case '+': force_sign = 1; space_for_positive = 0; break;
2119 case ' ': force_sign = 1;
2120 // If both the ' ' and '+' flags appear, the ' '
2121 // flag should be ignored
2122 break;
2123 case '#': alternate_form = 1; break;
2124 case '\'': break;
2125 }
2126 p++;
2127 }
2128 // If the '0' and '-' flags both appear, the '0' flag should be
2129 // ignored.
2130
2131 // parse field width
2132 if (*p == '*')
2133 {
2134 int j;
2135
2136 p++;
2137 j =
2138# if defined(FEAT_EVAL)
2139 tvs != NULL ? tv_nr(tvs, &arg_idx) :
2140# endif
2141 va_arg(ap, int);
2142 if (j >= 0)
2143 min_field_width = j;
2144 else
2145 {
2146 min_field_width = -j;
2147 justify_left = 1;
2148 }
2149 }
2150 else if (VIM_ISDIGIT((int)(*p)))
2151 {
2152 // size_t could be wider than unsigned int; make sure we treat
2153 // argument like common implementations do
2154 unsigned int uj = *p++ - '0';
2155
2156 while (VIM_ISDIGIT((int)(*p)))
2157 uj = 10 * uj + (unsigned int)(*p++ - '0');
2158 min_field_width = uj;
2159 }
2160
2161 // parse precision
2162 if (*p == '.')
2163 {
2164 p++;
2165 precision_specified = 1;
2166 if (*p == '*')
2167 {
2168 int j;
2169
2170 j =
2171# if defined(FEAT_EVAL)
2172 tvs != NULL ? tv_nr(tvs, &arg_idx) :
2173# endif
2174 va_arg(ap, int);
2175 p++;
2176 if (j >= 0)
2177 precision = j;
2178 else
2179 {
2180 precision_specified = 0;
2181 precision = 0;
2182 }
2183 }
2184 else if (VIM_ISDIGIT((int)(*p)))
2185 {
2186 // size_t could be wider than unsigned int; make sure we
2187 // treat argument like common implementations do
2188 unsigned int uj = *p++ - '0';
2189
2190 while (VIM_ISDIGIT((int)(*p)))
2191 uj = 10 * uj + (unsigned int)(*p++ - '0');
2192 precision = uj;
2193 }
2194 }
2195
2196 // parse 'h', 'l' and 'll' length modifiers
2197 if (*p == 'h' || *p == 'l')
2198 {
2199 length_modifier = *p;
2200 p++;
2201 if (length_modifier == 'l' && *p == 'l')
2202 {
2203 // double l = __int64 / varnumber_T
2204 length_modifier = 'L';
2205 p++;
2206 }
2207 }
2208 fmt_spec = *p;
2209
2210 // common synonyms:
2211 switch (fmt_spec)
2212 {
2213 case 'i': fmt_spec = 'd'; break;
2214 case 'D': fmt_spec = 'd'; length_modifier = 'l'; break;
2215 case 'U': fmt_spec = 'u'; length_modifier = 'l'; break;
2216 case 'O': fmt_spec = 'o'; length_modifier = 'l'; break;
2217 default: break;
2218 }
2219
2220# if defined(FEAT_EVAL)
2221 switch (fmt_spec)
2222 {
2223 case 'd': case 'u': case 'o': case 'x': case 'X':
2224 if (tvs != NULL && length_modifier == '\0')
2225 length_modifier = 'L';
2226 }
2227# endif
2228
2229 // get parameter value, do initial processing
2230 switch (fmt_spec)
2231 {
2232 // '%' and 'c' behave similar to 's' regarding flags and field
2233 // widths
2234 case '%':
2235 case 'c':
2236 case 's':
2237 case 'S':
2238 str_arg_l = 1;
2239 switch (fmt_spec)
2240 {
2241 case '%':
2242 str_arg = p;
2243 break;
2244
2245 case 'c':
2246 {
2247 int j;
2248
2249 j =
2250# if defined(FEAT_EVAL)
2251 tvs != NULL ? tv_nr(tvs, &arg_idx) :
2252# endif
2253 va_arg(ap, int);
2254 // standard demands unsigned char
2255 uchar_arg = (unsigned char)j;
2256 str_arg = (char *)&uchar_arg;
2257 break;
2258 }
2259
2260 case 's':
2261 case 'S':
2262 str_arg =
2263# if defined(FEAT_EVAL)
2264 tvs != NULL ? tv_str(tvs, &arg_idx, &tofree) :
2265# endif
2266 va_arg(ap, char *);
2267 if (str_arg == NULL)
2268 {
2269 str_arg = "[NULL]";
2270 str_arg_l = 6;
2271 }
2272 // make sure not to address string beyond the specified
2273 // precision !!!
2274 else if (!precision_specified)
2275 str_arg_l = strlen(str_arg);
2276 // truncate string if necessary as requested by precision
2277 else if (precision == 0)
2278 str_arg_l = 0;
2279 else
2280 {
2281 // Don't put the #if inside memchr(), it can be a
2282 // macro.
2283 // memchr on HP does not like n > 2^31 !!!
2284 char *q = memchr(str_arg, '\0',
2285 precision <= (size_t)0x7fffffffL ? precision
2286 : (size_t)0x7fffffffL);
presukud85fccd2021-11-20 19:38:31 +00002287
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002288 str_arg_l = (q == NULL) ? precision
2289 : (size_t)(q - str_arg);
2290 }
2291 if (fmt_spec == 'S')
2292 {
presuku1f2453f2021-11-24 15:32:57 +00002293 char_u *p1;
2294 size_t i;
2295 int cell;
presukud85fccd2021-11-20 19:38:31 +00002296
presuku1f2453f2021-11-24 15:32:57 +00002297 for (i = 0, p1 = (char_u *)str_arg; *p1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002298 p1 += mb_ptr2len(p1))
presuku1f2453f2021-11-24 15:32:57 +00002299 {
2300 cell = mb_ptr2cells(p1);
2301 if (precision_specified && i + cell > precision)
2302 break;
2303 i += cell;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002304 }
presuku1f2453f2021-11-24 15:32:57 +00002305
2306 str_arg_l = p1 - (char_u *)str_arg;
presukud85fccd2021-11-20 19:38:31 +00002307 if (min_field_width != 0)
presuku1f2453f2021-11-24 15:32:57 +00002308 min_field_width += str_arg_l - i;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002309 }
2310 break;
2311
2312 default:
2313 break;
2314 }
2315 break;
2316
2317 case 'd': case 'u':
2318 case 'b': case 'B':
2319 case 'o':
2320 case 'x': case 'X':
2321 case 'p':
2322 {
2323 // NOTE: the u, b, o, x, X and p conversion specifiers
2324 // imply the value is unsigned; d implies a signed
2325 // value
2326
2327 // 0 if numeric argument is zero (or if pointer is
2328 // NULL for 'p'), +1 if greater than zero (or nonzero
2329 // for unsigned arguments), -1 if negative (unsigned
2330 // argument is never negative)
2331 int arg_sign = 0;
2332
2333 // only set for length modifier h, or for no length
2334 // modifiers
2335 int int_arg = 0;
2336 unsigned int uint_arg = 0;
2337
2338 // only set for length modifier l
2339 long int long_arg = 0;
2340 unsigned long int ulong_arg = 0;
2341
2342 // only set for length modifier ll
2343 varnumber_T llong_arg = 0;
2344 uvarnumber_T ullong_arg = 0;
2345
2346 // only set for b conversion
2347 uvarnumber_T bin_arg = 0;
2348
2349 // pointer argument value -only defined for p
2350 // conversion
2351 void *ptr_arg = NULL;
2352
2353 if (fmt_spec == 'p')
2354 {
2355 length_modifier = '\0';
2356 ptr_arg =
2357# if defined(FEAT_EVAL)
2358 tvs != NULL ? (void *)tv_str(tvs, &arg_idx,
2359 NULL) :
2360# endif
2361 va_arg(ap, void *);
2362 if (ptr_arg != NULL)
2363 arg_sign = 1;
2364 }
2365 else if (fmt_spec == 'b' || fmt_spec == 'B')
2366 {
2367 bin_arg =
2368# if defined(FEAT_EVAL)
2369 tvs != NULL ?
2370 (uvarnumber_T)tv_nr(tvs, &arg_idx) :
2371# endif
2372 va_arg(ap, uvarnumber_T);
2373 if (bin_arg != 0)
2374 arg_sign = 1;
2375 }
2376 else if (fmt_spec == 'd')
2377 {
2378 // signed
2379 switch (length_modifier)
2380 {
2381 case '\0':
2382 case 'h':
2383 // char and short arguments are passed as int.
2384 int_arg =
2385# if defined(FEAT_EVAL)
2386 tvs != NULL ? tv_nr(tvs, &arg_idx) :
2387# endif
2388 va_arg(ap, int);
2389 if (int_arg > 0)
2390 arg_sign = 1;
2391 else if (int_arg < 0)
2392 arg_sign = -1;
2393 break;
2394 case 'l':
2395 long_arg =
2396# if defined(FEAT_EVAL)
2397 tvs != NULL ? tv_nr(tvs, &arg_idx) :
2398# endif
2399 va_arg(ap, long int);
2400 if (long_arg > 0)
2401 arg_sign = 1;
2402 else if (long_arg < 0)
2403 arg_sign = -1;
2404 break;
2405 case 'L':
2406 llong_arg =
2407# if defined(FEAT_EVAL)
2408 tvs != NULL ? tv_nr(tvs, &arg_idx) :
2409# endif
2410 va_arg(ap, varnumber_T);
2411 if (llong_arg > 0)
2412 arg_sign = 1;
2413 else if (llong_arg < 0)
2414 arg_sign = -1;
2415 break;
2416 }
2417 }
2418 else
2419 {
2420 // unsigned
2421 switch (length_modifier)
2422 {
2423 case '\0':
2424 case 'h':
2425 uint_arg =
2426# if defined(FEAT_EVAL)
2427 tvs != NULL ? (unsigned)
2428 tv_nr(tvs, &arg_idx) :
2429# endif
2430 va_arg(ap, unsigned int);
2431 if (uint_arg != 0)
2432 arg_sign = 1;
2433 break;
2434 case 'l':
2435 ulong_arg =
2436# if defined(FEAT_EVAL)
2437 tvs != NULL ? (unsigned long)
2438 tv_nr(tvs, &arg_idx) :
2439# endif
2440 va_arg(ap, unsigned long int);
2441 if (ulong_arg != 0)
2442 arg_sign = 1;
2443 break;
2444 case 'L':
2445 ullong_arg =
2446# if defined(FEAT_EVAL)
2447 tvs != NULL ? (uvarnumber_T)
2448 tv_nr(tvs, &arg_idx) :
2449# endif
2450 va_arg(ap, uvarnumber_T);
2451 if (ullong_arg != 0)
2452 arg_sign = 1;
2453 break;
2454 }
2455 }
2456
2457 str_arg = tmp;
2458 str_arg_l = 0;
2459
2460 // NOTE:
2461 // For d, i, u, o, x, and X conversions, if precision is
2462 // specified, the '0' flag should be ignored. This is so
2463 // with Solaris 2.6, Digital UNIX 4.0, HPUX 10, Linux,
2464 // FreeBSD, NetBSD; but not with Perl.
2465 if (precision_specified)
2466 zero_padding = 0;
2467 if (fmt_spec == 'd')
2468 {
2469 if (force_sign && arg_sign >= 0)
2470 tmp[str_arg_l++] = space_for_positive ? ' ' : '+';
2471 // leave negative numbers for sprintf to handle, to
2472 // avoid handling tricky cases like (short int)-32768
2473 }
2474 else if (alternate_form)
2475 {
2476 if (arg_sign != 0
2477 && (fmt_spec == 'b' || fmt_spec == 'B'
2478 || fmt_spec == 'x' || fmt_spec == 'X') )
2479 {
2480 tmp[str_arg_l++] = '0';
2481 tmp[str_arg_l++] = fmt_spec;
2482 }
2483 // alternate form should have no effect for p
2484 // conversion, but ...
2485 }
2486
2487 zero_padding_insertion_ind = str_arg_l;
2488 if (!precision_specified)
2489 precision = 1; // default precision is 1
2490 if (precision == 0 && arg_sign == 0)
2491 {
2492 // When zero value is formatted with an explicit
2493 // precision 0, the resulting formatted string is
2494 // empty (d, i, u, b, B, o, x, X, p).
2495 }
2496 else
2497 {
2498 char f[6];
2499 int f_l = 0;
2500
2501 // construct a simple format string for sprintf
2502 f[f_l++] = '%';
2503 if (!length_modifier)
2504 ;
2505 else if (length_modifier == 'L')
2506 {
2507# ifdef MSWIN
2508 f[f_l++] = 'I';
2509 f[f_l++] = '6';
2510 f[f_l++] = '4';
2511# else
2512 f[f_l++] = 'l';
2513 f[f_l++] = 'l';
2514# endif
2515 }
2516 else
2517 f[f_l++] = length_modifier;
2518 f[f_l++] = fmt_spec;
2519 f[f_l++] = '\0';
2520
2521 if (fmt_spec == 'p')
2522 str_arg_l += sprintf(tmp + str_arg_l, f, ptr_arg);
2523 else if (fmt_spec == 'b' || fmt_spec == 'B')
2524 {
2525 char b[8 * sizeof(uvarnumber_T)];
2526 size_t b_l = 0;
2527 uvarnumber_T bn = bin_arg;
2528
2529 do
2530 {
2531 b[sizeof(b) - ++b_l] = '0' + (bn & 0x1);
2532 bn >>= 1;
2533 }
2534 while (bn != 0);
2535
2536 memcpy(tmp + str_arg_l, b + sizeof(b) - b_l, b_l);
2537 str_arg_l += b_l;
2538 }
2539 else if (fmt_spec == 'd')
2540 {
2541 // signed
2542 switch (length_modifier)
2543 {
2544 case '\0': str_arg_l += sprintf(
2545 tmp + str_arg_l, f,
2546 int_arg);
2547 break;
2548 case 'h': str_arg_l += sprintf(
2549 tmp + str_arg_l, f,
2550 (short)int_arg);
2551 break;
2552 case 'l': str_arg_l += sprintf(
2553 tmp + str_arg_l, f, long_arg);
2554 break;
2555 case 'L': str_arg_l += sprintf(
2556 tmp + str_arg_l, f, llong_arg);
2557 break;
2558 }
2559 }
2560 else
2561 {
2562 // unsigned
2563 switch (length_modifier)
2564 {
2565 case '\0': str_arg_l += sprintf(
2566 tmp + str_arg_l, f,
2567 uint_arg);
2568 break;
2569 case 'h': str_arg_l += sprintf(
2570 tmp + str_arg_l, f,
2571 (unsigned short)uint_arg);
2572 break;
2573 case 'l': str_arg_l += sprintf(
2574 tmp + str_arg_l, f, ulong_arg);
2575 break;
2576 case 'L': str_arg_l += sprintf(
2577 tmp + str_arg_l, f, ullong_arg);
2578 break;
2579 }
2580 }
2581
2582 // include the optional minus sign and possible
2583 // "0x" in the region before the zero padding
2584 // insertion point
2585 if (zero_padding_insertion_ind < str_arg_l
2586 && tmp[zero_padding_insertion_ind] == '-')
2587 zero_padding_insertion_ind++;
2588 if (zero_padding_insertion_ind + 1 < str_arg_l
2589 && tmp[zero_padding_insertion_ind] == '0'
2590 && (tmp[zero_padding_insertion_ind + 1] == 'x'
2591 || tmp[zero_padding_insertion_ind + 1] == 'X'))
2592 zero_padding_insertion_ind += 2;
2593 }
2594
2595 {
2596 size_t num_of_digits = str_arg_l
2597 - zero_padding_insertion_ind;
2598
2599 if (alternate_form && fmt_spec == 'o'
2600 // unless zero is already the first
2601 // character
2602 && !(zero_padding_insertion_ind < str_arg_l
2603 && tmp[zero_padding_insertion_ind] == '0'))
2604 {
2605 // assure leading zero for alternate-form
2606 // octal numbers
2607 if (!precision_specified
2608 || precision < num_of_digits + 1)
2609 {
2610 // precision is increased to force the
2611 // first character to be zero, except if a
2612 // zero value is formatted with an
2613 // explicit precision of zero
2614 precision = num_of_digits + 1;
2615 }
2616 }
2617 // zero padding to specified precision?
2618 if (num_of_digits < precision)
2619 number_of_zeros_to_pad = precision - num_of_digits;
2620 }
2621 // zero padding to specified minimal field width?
2622 if (!justify_left && zero_padding)
2623 {
2624 int n = (int)(min_field_width - (str_arg_l
2625 + number_of_zeros_to_pad));
2626 if (n > 0)
2627 number_of_zeros_to_pad += n;
2628 }
2629 break;
2630 }
2631
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002632 case 'f':
2633 case 'F':
2634 case 'e':
2635 case 'E':
2636 case 'g':
2637 case 'G':
2638 {
2639 // Floating point.
2640 double f;
2641 double abs_f;
2642 char format[40];
2643 int l;
2644 int remove_trailing_zeroes = FALSE;
2645
2646 f =
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01002647# if defined(FEAT_EVAL)
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002648 tvs != NULL ? tv_float(tvs, &arg_idx) :
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01002649# endif
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002650 va_arg(ap, double);
2651 abs_f = f < 0 ? -f : f;
2652
2653 if (fmt_spec == 'g' || fmt_spec == 'G')
2654 {
2655 // Would be nice to use %g directly, but it prints
2656 // "1.0" as "1", we don't want that.
2657 if ((abs_f >= 0.001 && abs_f < 10000000.0)
2658 || abs_f == 0.0)
2659 fmt_spec = ASCII_ISUPPER(fmt_spec) ? 'F' : 'f';
2660 else
2661 fmt_spec = fmt_spec == 'g' ? 'e' : 'E';
2662 remove_trailing_zeroes = TRUE;
2663 }
2664
2665 if ((fmt_spec == 'f' || fmt_spec == 'F') &&
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01002666# ifdef VAX
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002667 abs_f > 1.0e38
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01002668# else
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002669 abs_f > 1.0e307
Bram Moolenaar73e28dc2022-09-17 21:08:33 +01002670# endif
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002671 )
2672 {
2673 // Avoid a buffer overflow
2674 STRCPY(tmp, infinity_str(f > 0.0, fmt_spec,
2675 force_sign, space_for_positive));
2676 str_arg_l = STRLEN(tmp);
2677 zero_padding = 0;
2678 }
2679 else
2680 {
2681 if (isnan(f))
2682 {
2683 // Not a number: nan or NAN
2684 STRCPY(tmp, ASCII_ISUPPER(fmt_spec) ? "NAN"
2685 : "nan");
2686 str_arg_l = 3;
2687 zero_padding = 0;
2688 }
2689 else if (isinf(f))
2690 {
2691 STRCPY(tmp, infinity_str(f > 0.0, fmt_spec,
2692 force_sign, space_for_positive));
2693 str_arg_l = STRLEN(tmp);
2694 zero_padding = 0;
2695 }
2696 else
2697 {
2698 // Regular float number
2699 format[0] = '%';
2700 l = 1;
2701 if (force_sign)
2702 format[l++] = space_for_positive ? ' ' : '+';
2703 if (precision_specified)
2704 {
2705 size_t max_prec = TMP_LEN - 10;
2706
2707 // Make sure we don't get more digits than we
2708 // have room for.
2709 if ((fmt_spec == 'f' || fmt_spec == 'F')
2710 && abs_f > 1.0)
2711 max_prec -= (size_t)log10(abs_f);
2712 if (precision > max_prec)
2713 precision = max_prec;
2714 l += sprintf(format + l, ".%d", (int)precision);
2715 }
2716 format[l] = fmt_spec == 'F' ? 'f' : fmt_spec;
2717 format[l + 1] = NUL;
2718
2719 str_arg_l = sprintf(tmp, format, f);
2720 }
2721
2722 if (remove_trailing_zeroes)
2723 {
2724 int i;
2725 char *tp;
2726
2727 // Using %g or %G: remove superfluous zeroes.
2728 if (fmt_spec == 'f' || fmt_spec == 'F')
2729 tp = tmp + str_arg_l - 1;
2730 else
2731 {
2732 tp = (char *)vim_strchr((char_u *)tmp,
2733 fmt_spec == 'e' ? 'e' : 'E');
2734 if (tp != NULL)
2735 {
2736 // Remove superfluous '+' and leading
2737 // zeroes from the exponent.
2738 if (tp[1] == '+')
2739 {
2740 // Change "1.0e+07" to "1.0e07"
2741 STRMOVE(tp + 1, tp + 2);
2742 --str_arg_l;
2743 }
2744 i = (tp[1] == '-') ? 2 : 1;
2745 while (tp[i] == '0')
2746 {
2747 // Change "1.0e07" to "1.0e7"
2748 STRMOVE(tp + i, tp + i + 1);
2749 --str_arg_l;
2750 }
2751 --tp;
2752 }
2753 }
2754
2755 if (tp != NULL && !precision_specified)
2756 // Remove trailing zeroes, but keep the one
2757 // just after a dot.
2758 while (tp > tmp + 2 && *tp == '0'
2759 && tp[-1] != '.')
2760 {
2761 STRMOVE(tp, tp + 1);
2762 --tp;
2763 --str_arg_l;
2764 }
2765 }
2766 else
2767 {
2768 char *tp;
2769
2770 // Be consistent: some printf("%e") use 1.0e+12
2771 // and some 1.0e+012. Remove one zero in the last
2772 // case.
2773 tp = (char *)vim_strchr((char_u *)tmp,
2774 fmt_spec == 'e' ? 'e' : 'E');
2775 if (tp != NULL && (tp[1] == '+' || tp[1] == '-')
2776 && tp[2] == '0'
2777 && vim_isdigit(tp[3])
2778 && vim_isdigit(tp[4]))
2779 {
2780 STRMOVE(tp + 2, tp + 3);
2781 --str_arg_l;
2782 }
2783 }
2784 }
2785 if (zero_padding && min_field_width > str_arg_l
2786 && (tmp[0] == '-' || force_sign))
2787 {
2788 // padding 0's should be inserted after the sign
2789 number_of_zeros_to_pad = min_field_width - str_arg_l;
2790 zero_padding_insertion_ind = 1;
2791 }
2792 str_arg = tmp;
2793 break;
2794 }
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002795
2796 default:
2797 // unrecognized conversion specifier, keep format string
2798 // as-is
2799 zero_padding = 0; // turn zero padding off for non-numeric
2800 // conversion
2801 justify_left = 1;
2802 min_field_width = 0; // reset flags
2803
2804 // discard the unrecognized conversion, just keep *
2805 // the unrecognized conversion character
2806 str_arg = p;
2807 str_arg_l = 0;
2808 if (*p != NUL)
2809 str_arg_l++; // include invalid conversion specifier
2810 // unchanged if not at end-of-string
2811 break;
2812 }
2813
2814 if (*p != NUL)
2815 p++; // step over the just processed conversion specifier
2816
2817 // insert padding to the left as requested by min_field_width;
2818 // this does not include the zero padding in case of numerical
2819 // conversions
2820 if (!justify_left)
2821 {
2822 // left padding with blank or zero
2823 int pn = (int)(min_field_width - (str_arg_l + number_of_zeros_to_pad));
2824
2825 if (pn > 0)
2826 {
2827 if (str_l < str_m)
2828 {
2829 size_t avail = str_m - str_l;
2830
2831 vim_memset(str + str_l, zero_padding ? '0' : ' ',
2832 (size_t)pn > avail ? avail
2833 : (size_t)pn);
2834 }
2835 str_l += pn;
2836 }
2837 }
2838
2839 // zero padding as requested by the precision or by the minimal
2840 // field width for numeric conversions required?
2841 if (number_of_zeros_to_pad == 0)
2842 {
2843 // will not copy first part of numeric right now, *
2844 // force it to be copied later in its entirety
2845 zero_padding_insertion_ind = 0;
2846 }
2847 else
2848 {
2849 // insert first part of numerics (sign or '0x') before zero
2850 // padding
2851 int zn = (int)zero_padding_insertion_ind;
2852
2853 if (zn > 0)
2854 {
2855 if (str_l < str_m)
2856 {
2857 size_t avail = str_m - str_l;
2858
2859 mch_memmove(str + str_l, str_arg,
2860 (size_t)zn > avail ? avail
2861 : (size_t)zn);
2862 }
2863 str_l += zn;
2864 }
2865
2866 // insert zero padding as requested by the precision or min
2867 // field width
2868 zn = (int)number_of_zeros_to_pad;
2869 if (zn > 0)
2870 {
2871 if (str_l < str_m)
2872 {
2873 size_t avail = str_m - str_l;
2874
2875 vim_memset(str + str_l, '0',
2876 (size_t)zn > avail ? avail
2877 : (size_t)zn);
2878 }
2879 str_l += zn;
2880 }
2881 }
2882
2883 // insert formatted string
2884 // (or as-is conversion specifier for unknown conversions)
2885 {
2886 int sn = (int)(str_arg_l - zero_padding_insertion_ind);
2887
2888 if (sn > 0)
2889 {
2890 if (str_l < str_m)
2891 {
2892 size_t avail = str_m - str_l;
2893
2894 mch_memmove(str + str_l,
2895 str_arg + zero_padding_insertion_ind,
2896 (size_t)sn > avail ? avail : (size_t)sn);
2897 }
2898 str_l += sn;
2899 }
2900 }
2901
2902 // insert right padding
2903 if (justify_left)
2904 {
2905 // right blank padding to the field width
2906 int pn = (int)(min_field_width
2907 - (str_arg_l + number_of_zeros_to_pad));
2908
2909 if (pn > 0)
2910 {
2911 if (str_l < str_m)
2912 {
2913 size_t avail = str_m - str_l;
2914
2915 vim_memset(str + str_l, ' ',
2916 (size_t)pn > avail ? avail
2917 : (size_t)pn);
2918 }
2919 str_l += pn;
2920 }
2921 }
2922 vim_free(tofree);
2923 }
2924 }
2925
2926 if (str_m > 0)
2927 {
2928 // make sure the string is nul-terminated even at the expense of
2929 // overwriting the last character (shouldn't happen, but just in case)
2930 //
2931 str[str_l <= str_m - 1 ? str_l : str_m - 1] = '\0';
2932 }
2933
2934 if (tvs != NULL && tvs[arg_idx - 1].v_type != VAR_UNKNOWN)
Bram Moolenaar677658a2022-01-05 16:09:06 +00002935 emsg(_(e_too_many_arguments_to_printf));
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002936
2937 // Return the number of characters formatted (excluding trailing nul
2938 // character), that is, the number of characters that would have been
2939 // written to the buffer if it were large enough.
2940 return (int)str_l;
2941}
2942
2943#endif // PROTO