blob: 6abf89a35b83bcf542b90ceb2a919479b8f44415 [file] [log] [blame]
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001/* vi:set ts=8 sts=4 sw=4 noet:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * strings.c: string manipulation functions
12 */
13
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +020014#define USING_FLOAT_STUFF
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020015#include "vim.h"
16
17/*
18 * Copy "string" into newly allocated memory.
19 */
20 char_u *
21vim_strsave(char_u *string)
22{
23 char_u *p;
24 size_t len;
25
26 len = STRLEN(string) + 1;
27 p = alloc(len);
28 if (p != NULL)
29 mch_memmove(p, string, len);
30 return p;
31}
32
33/*
34 * Copy up to "len" bytes of "string" into newly allocated memory and
35 * terminate with a NUL.
36 * The allocated memory always has size "len + 1", also when "string" is
37 * shorter.
38 */
39 char_u *
40vim_strnsave(char_u *string, size_t len)
41{
42 char_u *p;
43
44 p = alloc(len + 1);
45 if (p != NULL)
46 {
47 STRNCPY(p, string, len);
48 p[len] = NUL;
49 }
50 return p;
51}
52
53/*
54 * Same as vim_strsave(), but any characters found in esc_chars are preceded
55 * by a backslash.
56 */
57 char_u *
58vim_strsave_escaped(char_u *string, char_u *esc_chars)
59{
60 return vim_strsave_escaped_ext(string, esc_chars, '\\', FALSE);
61}
62
63/*
64 * Same as vim_strsave_escaped(), but when "bsl" is TRUE also escape
65 * characters where rem_backslash() would remove the backslash.
66 * Escape the characters with "cc".
67 */
68 char_u *
69vim_strsave_escaped_ext(
70 char_u *string,
71 char_u *esc_chars,
72 int cc,
73 int bsl)
74{
75 char_u *p;
76 char_u *p2;
77 char_u *escaped_string;
78 unsigned length;
79 int l;
80
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +020081 // First count the number of backslashes required.
82 // Then allocate the memory and insert them.
Yegappan Lakshmanana2438132021-07-10 21:29:18 +020083 length = 1; // count the trailing NUL
84 for (p = string; *p; p++)
85 {
86 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
87 {
88 length += l; // count a multibyte char
89 p += l - 1;
90 continue;
91 }
92 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
93 ++length; // count a backslash
94 ++length; // count an ordinary char
95 }
96 escaped_string = alloc(length);
97 if (escaped_string != NULL)
98 {
99 p2 = escaped_string;
100 for (p = string; *p; p++)
101 {
102 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
103 {
104 mch_memmove(p2, p, (size_t)l);
105 p2 += l;
106 p += l - 1; // skip multibyte char
107 continue;
108 }
109 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
110 *p2++ = cc;
111 *p2++ = *p;
112 }
113 *p2 = NUL;
114 }
115 return escaped_string;
116}
117
118/*
119 * Return TRUE when 'shell' has "csh" in the tail.
120 */
121 int
122csh_like_shell(void)
123{
124 return (strstr((char *)gettail(p_sh), "csh") != NULL);
125}
126
127/*
Jason Cox6e823512021-08-29 12:36:49 +0200128 * Return TRUE when 'shell' has "fish" in the tail.
129 */
Dominique Pellede05ae72021-08-30 19:57:34 +0200130 static int
Jason Cox6e823512021-08-29 12:36:49 +0200131fish_like_shell(void)
132{
133 return (strstr((char *)gettail(p_sh), "fish") != NULL);
134}
135
136/*
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200137 * Escape "string" for use as a shell argument with system().
138 * This uses single quotes, except when we know we need to use double quotes
139 * (MS-DOS and MS-Windows not using PowerShell and without 'shellslash' set).
140 * PowerShell also uses a novel escaping for enclosed single quotes - double
141 * them up.
142 * Escape a newline, depending on the 'shell' option.
143 * When "do_special" is TRUE also replace "!", "%", "#" and things starting
144 * with "<" like "<cfile>".
145 * When "do_newline" is FALSE do not escape newline unless it is csh shell.
146 * Returns the result in allocated memory, NULL if we have run out.
147 */
148 char_u *
149vim_strsave_shellescape(char_u *string, int do_special, int do_newline)
150{
151 unsigned length;
152 char_u *p;
153 char_u *d;
154 char_u *escaped_string;
155 int l;
156 int csh_like;
Jason Cox6e823512021-08-29 12:36:49 +0200157 int fish_like;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200158 char_u *shname;
159 int powershell;
160# ifdef MSWIN
161 int double_quotes;
162# endif
163
164 // Only csh and similar shells expand '!' within single quotes. For sh and
165 // the like we must not put a backslash before it, it will be taken
166 // literally. If do_special is set the '!' will be escaped twice.
167 // Csh also needs to have "\n" escaped twice when do_special is set.
168 csh_like = csh_like_shell();
169
Jason Cox6e823512021-08-29 12:36:49 +0200170 // Fish shell uses '\' as an escape character within single quotes, so '\'
171 // itself must be escaped to get a literal '\'.
172 fish_like = fish_like_shell();
173
Dominique Pelleaf4a61a2021-12-27 17:21:41 +0000174 // PowerShell uses its own version for quoting single quotes
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200175 shname = gettail(p_sh);
176 powershell = strstr((char *)shname, "pwsh") != NULL;
177# ifdef MSWIN
178 powershell = powershell || strstr((char *)shname, "powershell") != NULL;
179 // PowerShell only accepts single quotes so override shellslash.
180 double_quotes = !powershell && !p_ssl;
181# endif
182
183 // First count the number of extra bytes required.
184 length = (unsigned)STRLEN(string) + 3; // two quotes and a trailing NUL
185 for (p = string; *p != NUL; MB_PTR_ADV(p))
186 {
187# ifdef MSWIN
188 if (double_quotes)
189 {
190 if (*p == '"')
191 ++length; // " -> ""
192 }
193 else
194# endif
195 if (*p == '\'')
196 {
197 if (powershell)
198 length +=2; // ' => ''
199 else
200 length += 3; // ' => '\''
201 }
202 if ((*p == '\n' && (csh_like || do_newline))
203 || (*p == '!' && (csh_like || do_special)))
204 {
205 ++length; // insert backslash
206 if (csh_like && do_special)
207 ++length; // insert backslash
208 }
209 if (do_special && find_cmdline_var(p, &l) >= 0)
210 {
211 ++length; // insert backslash
212 p += l - 1;
213 }
Jason Cox6e823512021-08-29 12:36:49 +0200214 if (*p == '\\' && fish_like)
215 ++length; // insert backslash
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200216 }
217
218 // Allocate memory for the result and fill it.
219 escaped_string = alloc(length);
220 if (escaped_string != NULL)
221 {
222 d = escaped_string;
223
224 // add opening quote
225# ifdef MSWIN
226 if (double_quotes)
227 *d++ = '"';
228 else
229# endif
230 *d++ = '\'';
231
232 for (p = string; *p != NUL; )
233 {
234# ifdef MSWIN
235 if (double_quotes)
236 {
237 if (*p == '"')
238 {
239 *d++ = '"';
240 *d++ = '"';
241 ++p;
242 continue;
243 }
244 }
245 else
246# endif
247 if (*p == '\'')
248 {
249 if (powershell)
250 {
251 *d++ = '\'';
252 *d++ = '\'';
253 }
254 else
255 {
256 *d++ = '\'';
257 *d++ = '\\';
258 *d++ = '\'';
259 *d++ = '\'';
260 }
261 ++p;
262 continue;
263 }
264 if ((*p == '\n' && (csh_like || do_newline))
265 || (*p == '!' && (csh_like || do_special)))
266 {
267 *d++ = '\\';
268 if (csh_like && do_special)
269 *d++ = '\\';
270 *d++ = *p++;
271 continue;
272 }
273 if (do_special && find_cmdline_var(p, &l) >= 0)
274 {
275 *d++ = '\\'; // insert backslash
276 while (--l >= 0) // copy the var
277 *d++ = *p++;
278 continue;
279 }
Jason Cox6e823512021-08-29 12:36:49 +0200280 if (*p == '\\' && fish_like)
281 {
282 *d++ = '\\';
283 *d++ = *p++;
Bram Moolenaar66315972021-09-01 14:31:51 +0200284 continue;
Jason Cox6e823512021-08-29 12:36:49 +0200285 }
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200286
287 MB_COPY_CHAR(p, d);
288 }
289
290 // add terminating quote and finish with a NUL
291# ifdef MSWIN
292 if (double_quotes)
293 *d++ = '"';
294 else
295# endif
296 *d++ = '\'';
297 *d = NUL;
298 }
299
300 return escaped_string;
301}
302
303/*
304 * Like vim_strsave(), but make all characters uppercase.
305 * This uses ASCII lower-to-upper case translation, language independent.
306 */
307 char_u *
308vim_strsave_up(char_u *string)
309{
310 char_u *p1;
311
312 p1 = vim_strsave(string);
313 vim_strup(p1);
314 return p1;
315}
316
317/*
318 * Like vim_strnsave(), but make all characters uppercase.
319 * This uses ASCII lower-to-upper case translation, language independent.
320 */
321 char_u *
322vim_strnsave_up(char_u *string, size_t len)
323{
324 char_u *p1;
325
326 p1 = vim_strnsave(string, len);
327 vim_strup(p1);
328 return p1;
329}
330
331/*
332 * ASCII lower-to-upper case translation, language independent.
333 */
334 void
335vim_strup(
336 char_u *p)
337{
338 char_u *p2;
339 int c;
340
341 if (p != NULL)
342 {
343 p2 = p;
344 while ((c = *p2) != NUL)
345#ifdef EBCDIC
346 *p2++ = isalpha(c) ? toupper(c) : c;
347#else
348 *p2++ = (c < 'a' || c > 'z') ? c : (c - 0x20);
349#endif
350 }
351}
352
353#if defined(FEAT_EVAL) || defined(FEAT_SPELL) || defined(PROTO)
354/*
355 * Make string "s" all upper-case and return it in allocated memory.
356 * Handles multi-byte characters as well as possible.
357 * Returns NULL when out of memory.
358 */
359 static char_u *
360strup_save(char_u *orig)
361{
362 char_u *p;
363 char_u *res;
364
365 res = p = vim_strsave(orig);
366
367 if (res != NULL)
368 while (*p != NUL)
369 {
370 int l;
371
372 if (enc_utf8)
373 {
374 int c, uc;
375 int newl;
376 char_u *s;
377
378 c = utf_ptr2char(p);
379 l = utf_ptr2len(p);
380 if (c == 0)
381 {
382 // overlong sequence, use only the first byte
383 c = *p;
384 l = 1;
385 }
386 uc = utf_toupper(c);
387
388 // Reallocate string when byte count changes. This is rare,
389 // thus it's OK to do another malloc()/free().
390 newl = utf_char2len(uc);
391 if (newl != l)
392 {
393 s = alloc(STRLEN(res) + 1 + newl - l);
394 if (s == NULL)
395 {
396 vim_free(res);
397 return NULL;
398 }
399 mch_memmove(s, res, p - res);
400 STRCPY(s + (p - res) + newl, p + l);
401 p = s + (p - res);
402 vim_free(res);
403 res = s;
404 }
405
406 utf_char2bytes(uc, p);
407 p += newl;
408 }
409 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
410 p += l; // skip multi-byte character
411 else
412 {
413 *p = TOUPPER_LOC(*p); // note that toupper() can be a macro
414 p++;
415 }
416 }
417
418 return res;
419}
420
421/*
422 * Make string "s" all lower-case and return it in allocated memory.
423 * Handles multi-byte characters as well as possible.
424 * Returns NULL when out of memory.
425 */
426 char_u *
427strlow_save(char_u *orig)
428{
429 char_u *p;
430 char_u *res;
431
432 res = p = vim_strsave(orig);
433
434 if (res != NULL)
435 while (*p != NUL)
436 {
437 int l;
438
439 if (enc_utf8)
440 {
441 int c, lc;
442 int newl;
443 char_u *s;
444
445 c = utf_ptr2char(p);
446 l = utf_ptr2len(p);
447 if (c == 0)
448 {
449 // overlong sequence, use only the first byte
450 c = *p;
451 l = 1;
452 }
453 lc = utf_tolower(c);
454
455 // Reallocate string when byte count changes. This is rare,
456 // thus it's OK to do another malloc()/free().
457 newl = utf_char2len(lc);
458 if (newl != l)
459 {
460 s = alloc(STRLEN(res) + 1 + newl - l);
461 if (s == NULL)
462 {
463 vim_free(res);
464 return NULL;
465 }
466 mch_memmove(s, res, p - res);
467 STRCPY(s + (p - res) + newl, p + l);
468 p = s + (p - res);
469 vim_free(res);
470 res = s;
471 }
472
473 utf_char2bytes(lc, p);
474 p += newl;
475 }
476 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
477 p += l; // skip multi-byte character
478 else
479 {
480 *p = TOLOWER_LOC(*p); // note that tolower() can be a macro
481 p++;
482 }
483 }
484
485 return res;
486}
487#endif
488
489/*
490 * delete spaces at the end of a string
491 */
492 void
493del_trailing_spaces(char_u *ptr)
494{
495 char_u *q;
496
497 q = ptr + STRLEN(ptr);
498 while (--q > ptr && VIM_ISWHITE(q[0]) && q[-1] != '\\' && q[-1] != Ctrl_V)
499 *q = NUL;
500}
501
502/*
503 * Like strncpy(), but always terminate the result with one NUL.
504 * "to" must be "len + 1" long!
505 */
506 void
507vim_strncpy(char_u *to, char_u *from, size_t len)
508{
509 STRNCPY(to, from, len);
510 to[len] = NUL;
511}
512
513/*
514 * Like strcat(), but make sure the result fits in "tosize" bytes and is
515 * always NUL terminated. "from" and "to" may overlap.
516 */
517 void
518vim_strcat(char_u *to, char_u *from, size_t tosize)
519{
520 size_t tolen = STRLEN(to);
521 size_t fromlen = STRLEN(from);
522
523 if (tolen + fromlen + 1 > tosize)
524 {
525 mch_memmove(to + tolen, from, tosize - tolen - 1);
526 to[tosize - 1] = NUL;
527 }
528 else
529 mch_memmove(to + tolen, from, fromlen + 1);
530}
531
532#if (!defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)) || defined(PROTO)
533/*
534 * Compare two strings, ignoring case, using current locale.
535 * Doesn't work for multi-byte characters.
536 * return 0 for match, < 0 for smaller, > 0 for bigger
537 */
538 int
539vim_stricmp(char *s1, char *s2)
540{
541 int i;
542
543 for (;;)
544 {
545 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
546 if (i != 0)
547 return i; // this character different
548 if (*s1 == NUL)
549 break; // strings match until NUL
550 ++s1;
551 ++s2;
552 }
553 return 0; // strings match
554}
555#endif
556
557#if (!defined(HAVE_STRNCASECMP) && !defined(HAVE_STRNICMP)) || defined(PROTO)
558/*
559 * Compare two strings, for length "len", ignoring case, using current locale.
560 * Doesn't work for multi-byte characters.
561 * return 0 for match, < 0 for smaller, > 0 for bigger
562 */
563 int
564vim_strnicmp(char *s1, char *s2, size_t len)
565{
566 int i;
567
568 while (len > 0)
569 {
570 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
571 if (i != 0)
572 return i; // this character different
573 if (*s1 == NUL)
574 break; // strings match until NUL
575 ++s1;
576 ++s2;
577 --len;
578 }
579 return 0; // strings match
580}
581#endif
582
583/*
584 * Search for first occurrence of "c" in "string".
585 * Version of strchr() that handles unsigned char strings with characters from
586 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
587 * end of the string.
588 */
589 char_u *
590vim_strchr(char_u *string, int c)
591{
592 char_u *p;
593 int b;
594
595 p = string;
596 if (enc_utf8 && c >= 0x80)
597 {
598 while (*p != NUL)
599 {
600 int l = utfc_ptr2len(p);
601
602 // Avoid matching an illegal byte here.
603 if (utf_ptr2char(p) == c && l > 1)
604 return p;
605 p += l;
606 }
607 return NULL;
608 }
609 if (enc_dbcs != 0 && c > 255)
610 {
611 int n2 = c & 0xff;
612
613 c = ((unsigned)c >> 8) & 0xff;
614 while ((b = *p) != NUL)
615 {
616 if (b == c && p[1] == n2)
617 return p;
618 p += (*mb_ptr2len)(p);
619 }
620 return NULL;
621 }
622 if (has_mbyte)
623 {
624 while ((b = *p) != NUL)
625 {
626 if (b == c)
627 return p;
628 p += (*mb_ptr2len)(p);
629 }
630 return NULL;
631 }
632 while ((b = *p) != NUL)
633 {
634 if (b == c)
635 return p;
636 ++p;
637 }
638 return NULL;
639}
640
641/*
642 * Version of strchr() that only works for bytes and handles unsigned char
643 * strings with characters above 128 correctly. It also doesn't return a
644 * pointer to the NUL at the end of the string.
645 */
646 char_u *
647vim_strbyte(char_u *string, int c)
648{
649 char_u *p = string;
650
651 while (*p != NUL)
652 {
653 if (*p == c)
654 return p;
655 ++p;
656 }
657 return NULL;
658}
659
660/*
661 * Search for last occurrence of "c" in "string".
662 * Version of strrchr() that handles unsigned char strings with characters from
663 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
664 * end of the string.
665 * Return NULL if not found.
666 * Does not handle multi-byte char for "c"!
667 */
668 char_u *
669vim_strrchr(char_u *string, int c)
670{
671 char_u *retval = NULL;
672 char_u *p = string;
673
674 while (*p)
675 {
676 if (*p == c)
677 retval = p;
678 MB_PTR_ADV(p);
679 }
680 return retval;
681}
682
683/*
684 * Vim's version of strpbrk(), in case it's missing.
685 * Don't generate a prototype for this, causes problems when it's not used.
686 */
687#ifndef PROTO
688# ifndef HAVE_STRPBRK
689# ifdef vim_strpbrk
690# undef vim_strpbrk
691# endif
692 char_u *
693vim_strpbrk(char_u *s, char_u *charset)
694{
695 while (*s)
696 {
697 if (vim_strchr(charset, *s) != NULL)
698 return s;
699 MB_PTR_ADV(s);
700 }
701 return NULL;
702}
703# endif
704#endif
705
706/*
707 * Sort an array of strings.
708 */
709static int sort_compare(const void *s1, const void *s2);
710
711 static int
712sort_compare(const void *s1, const void *s2)
713{
714 return STRCMP(*(char **)s1, *(char **)s2);
715}
716
717 void
718sort_strings(
719 char_u **files,
720 int count)
721{
722 qsort((void *)files, (size_t)count, sizeof(char_u *), sort_compare);
723}
724
725#if defined(FEAT_QUICKFIX) || defined(FEAT_SPELL) || defined(PROTO)
726/*
727 * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
728 * When "s" is NULL FALSE is returned.
729 */
730 int
731has_non_ascii(char_u *s)
732{
733 char_u *p;
734
735 if (s != NULL)
736 for (p = s; *p != NUL; ++p)
737 if (*p >= 128)
738 return TRUE;
739 return FALSE;
740}
741#endif
742
743/*
744 * Concatenate two strings and return the result in allocated memory.
745 * Returns NULL when out of memory.
746 */
747 char_u *
748concat_str(char_u *str1, char_u *str2)
749{
750 char_u *dest;
751 size_t l = str1 == NULL ? 0 : STRLEN(str1);
752
753 dest = alloc(l + (str2 == NULL ? 0 : STRLEN(str2)) + 1L);
754 if (dest != NULL)
755 {
756 if (str1 == NULL)
757 *dest = NUL;
758 else
759 STRCPY(dest, str1);
760 if (str2 != NULL)
761 STRCPY(dest + l, str2);
762 }
763 return dest;
764}
765
766#if defined(FEAT_EVAL) || defined(PROTO)
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200767/*
768 * Return string "str" in ' quotes, doubling ' characters.
769 * If "str" is NULL an empty string is assumed.
770 * If "function" is TRUE make it function('string').
771 */
772 char_u *
773string_quote(char_u *str, int function)
774{
775 unsigned len;
776 char_u *p, *r, *s;
777
778 len = (function ? 13 : 3);
779 if (str != NULL)
780 {
781 len += (unsigned)STRLEN(str);
782 for (p = str; *p != NUL; MB_PTR_ADV(p))
783 if (*p == '\'')
784 ++len;
785 }
786 s = r = alloc(len);
787 if (r != NULL)
788 {
789 if (function)
790 {
791 STRCPY(r, "function('");
792 r += 10;
793 }
794 else
795 *r++ = '\'';
796 if (str != NULL)
797 for (p = str; *p != NUL; )
798 {
799 if (*p == '\'')
800 *r++ = '\'';
801 MB_COPY_CHAR(p, r);
802 }
803 *r++ = '\'';
804 if (function)
805 *r++ = ')';
806 *r++ = NUL;
807 }
808 return s;
809}
810
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000811/*
812 * Count the number of times "needle" occurs in string "haystack". Case is
813 * ignored if "ic" is TRUE.
814 */
815 long
816string_count(char_u *haystack, char_u *needle, int ic)
817{
818 long n = 0;
819 char_u *p = haystack;
820 char_u *next;
821
822 if (p == NULL || needle == NULL || *needle == NUL)
823 return 0;
824
825 if (ic)
826 {
827 size_t len = STRLEN(needle);
828
829 while (*p != NUL)
830 {
831 if (MB_STRNICMP(p, needle, len) == 0)
832 {
833 ++n;
834 p += len;
835 }
836 else
837 MB_PTR_ADV(p);
838 }
839 }
840 else
841 while ((next = (char_u *)strstr((char *)p, (char *)needle)) != NULL)
842 {
843 ++n;
844 p = next + STRLEN(needle);
845 }
846
847 return n;
848}
849
850/*
851 * Make a typval_T of the first character of "input" and store it in "output".
852 * Return OK or FAIL.
853 */
854 static int
855copy_first_char_to_tv(char_u *input, typval_T *output)
856{
857 char_u buf[MB_MAXBYTES + 1];
858 int len;
859
860 if (input == NULL || output == NULL)
861 return FAIL;
862
863 len = has_mbyte ? mb_ptr2len(input) : 1;
864 STRNCPY(buf, input, len);
865 buf[len] = NUL;
866 output->v_type = VAR_STRING;
867 output->vval.v_string = vim_strsave(buf);
868
869 return output->vval.v_string == NULL ? FAIL : OK;
870}
871
872/*
873 * Implementation of map() and filter() for a String. Apply "expr" to every
874 * character in string "str" and return the result in "rettv".
875 */
876 void
877string_filter_map(
878 char_u *str,
879 filtermap_T filtermap,
880 typval_T *expr,
881 typval_T *rettv)
882{
883 char_u *p;
884 typval_T tv;
885 garray_T ga;
886 int len = 0;
887 int idx = 0;
888 int rem;
889
890 rettv->v_type = VAR_STRING;
891 rettv->vval.v_string = NULL;
892
893 // set_vim_var_nr() doesn't set the type
894 set_vim_var_type(VV_KEY, VAR_NUMBER);
895
896 ga_init2(&ga, (int)sizeof(char), 80);
897 for (p = str; *p != NUL; p += len)
898 {
899 typval_T newtv;
900
901 if (copy_first_char_to_tv(p, &tv) == FAIL)
902 break;
903 len = (int)STRLEN(tv.vval.v_string);
904
905 set_vim_var_nr(VV_KEY, idx);
906 if (filter_map_one(&tv, expr, filtermap, &newtv, &rem) == FAIL
907 || did_emsg)
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000908 {
909 clear_tv(&newtv);
910 clear_tv(&tv);
911 break;
912 }
913 else if (filtermap != FILTERMAP_FILTER)
914 {
915 if (newtv.v_type != VAR_STRING)
916 {
917 clear_tv(&newtv);
918 clear_tv(&tv);
Bram Moolenaare70cec92022-01-01 14:25:55 +0000919 emsg(_(e_string_required));
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000920 break;
921 }
922 else
923 ga_concat(&ga, newtv.vval.v_string);
924 }
925 else if (!rem)
926 ga_concat(&ga, tv.vval.v_string);
927
928 clear_tv(&newtv);
929 clear_tv(&tv);
930
931 ++idx;
932 }
933 ga_append(&ga, NUL);
934 rettv->vval.v_string = ga.ga_data;
935}
936
937/*
938 * reduce() String argvars[0] using the function 'funcname' with arguments in
939 * 'funcexe' starting with the initial value argvars[2] and return the result
940 * in 'rettv'.
941 */
942 void
943string_reduce(
944 typval_T *argvars,
945 char_u *func_name,
946 funcexe_T *funcexe,
947 typval_T *rettv)
948{
949 char_u *p = tv_get_string(&argvars[0]);
950 int len;
951 typval_T argv[3];
952 int r;
953 int called_emsg_start = called_emsg;
954
955 if (argvars[2].v_type == VAR_UNKNOWN)
956 {
957 if (*p == NUL)
958 {
Bram Moolenaare70cec92022-01-01 14:25:55 +0000959 semsg(_(e_reduce_of_an_empty_str_with_no_initial_value), "String");
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +0000960 return;
961 }
962 if (copy_first_char_to_tv(p, rettv) == FAIL)
963 return;
964 p += STRLEN(rettv->vval.v_string);
965 }
966 else if (argvars[2].v_type != VAR_STRING)
967 {
968 semsg(_(e_string_expected_for_argument_nr), 3);
969 return;
970 }
971 else
972 copy_tv(&argvars[2], rettv);
973
974 for ( ; *p != NUL; p += len)
975 {
976 argv[0] = *rettv;
977 if (copy_first_char_to_tv(p, &argv[1]) == FAIL)
978 break;
979 len = (int)STRLEN(argv[1].vval.v_string);
980 r = call_func(func_name, -1, rettv, 2, argv, funcexe);
981 clear_tv(&argv[0]);
982 clear_tv(&argv[1]);
983 if (r == FAIL || called_emsg != called_emsg_start)
984 return;
985 }
986}
987
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200988 static void
989byteidx(typval_T *argvars, typval_T *rettv, int comp UNUSED)
990{
991 char_u *t;
992 char_u *str;
993 varnumber_T idx;
994
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +0200995 rettv->vval.v_number = -1;
996
997 if (in_vim9script()
998 && (check_for_string_arg(argvars, 0) == FAIL
999 || check_for_number_arg(argvars, 1) == FAIL))
1000 return;
1001
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001002 str = tv_get_string_chk(&argvars[0]);
1003 idx = tv_get_number_chk(&argvars[1], NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001004 if (str == NULL || idx < 0)
1005 return;
1006
1007 t = str;
1008 for ( ; idx > 0; idx--)
1009 {
1010 if (*t == NUL) // EOL reached
1011 return;
1012 if (enc_utf8 && comp)
1013 t += utf_ptr2len(t);
1014 else
1015 t += (*mb_ptr2len)(t);
1016 }
1017 rettv->vval.v_number = (varnumber_T)(t - str);
1018}
1019
1020/*
1021 * "byteidx()" function
1022 */
1023 void
1024f_byteidx(typval_T *argvars, typval_T *rettv)
1025{
1026 byteidx(argvars, rettv, FALSE);
1027}
1028
1029/*
1030 * "byteidxcomp()" function
1031 */
1032 void
1033f_byteidxcomp(typval_T *argvars, typval_T *rettv)
1034{
1035 byteidx(argvars, rettv, TRUE);
1036}
1037
1038/*
1039 * "charidx()" function
1040 */
1041 void
1042f_charidx(typval_T *argvars, typval_T *rettv)
1043{
1044 char_u *str;
1045 varnumber_T idx;
1046 varnumber_T countcc = FALSE;
1047 char_u *p;
1048 int len;
1049 int (*ptr2len)(char_u *);
1050
1051 rettv->vval.v_number = -1;
1052
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001053 if (in_vim9script()
1054 && (check_for_string_arg(argvars, 0) == FAIL
1055 || check_for_number_arg(argvars, 1) == FAIL
1056 || check_for_opt_bool_arg(argvars, 2) == FAIL))
1057 return;
1058
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001059 if (argvars[0].v_type != VAR_STRING || argvars[1].v_type != VAR_NUMBER
1060 || (argvars[2].v_type != VAR_UNKNOWN
1061 && argvars[2].v_type != VAR_NUMBER
1062 && argvars[2].v_type != VAR_BOOL))
1063 {
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00001064 emsg(_(e_invalid_argument));
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001065 return;
1066 }
1067
1068 str = tv_get_string_chk(&argvars[0]);
1069 idx = tv_get_number_chk(&argvars[1], NULL);
1070 if (str == NULL || idx < 0)
1071 return;
1072
1073 if (argvars[2].v_type != VAR_UNKNOWN)
1074 countcc = tv_get_bool(&argvars[2]);
1075 if (countcc < 0 || countcc > 1)
1076 {
1077 semsg(_(e_using_number_as_bool_nr), countcc);
1078 return;
1079 }
1080
1081 if (enc_utf8 && countcc)
1082 ptr2len = utf_ptr2len;
1083 else
1084 ptr2len = mb_ptr2len;
1085
1086 for (p = str, len = 0; p <= str + idx; len++)
1087 {
1088 if (*p == NUL)
1089 return;
1090 p += ptr2len(p);
1091 }
1092
1093 rettv->vval.v_number = len > 0 ? len - 1 : 0;
1094}
1095
1096/*
1097 * "str2list()" function
1098 */
1099 void
1100f_str2list(typval_T *argvars, typval_T *rettv)
1101{
1102 char_u *p;
1103 int utf8 = FALSE;
1104
1105 if (rettv_list_alloc(rettv) == FAIL)
1106 return;
1107
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001108 if (in_vim9script()
1109 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001110 || check_for_opt_bool_arg(argvars, 1) == FAIL))
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001111 return;
1112
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001113 if (argvars[1].v_type != VAR_UNKNOWN)
1114 utf8 = (int)tv_get_bool_chk(&argvars[1], NULL);
1115
1116 p = tv_get_string(&argvars[0]);
1117
1118 if (has_mbyte || utf8)
1119 {
1120 int (*ptr2len)(char_u *);
1121 int (*ptr2char)(char_u *);
1122
1123 if (utf8 || enc_utf8)
1124 {
1125 ptr2len = utf_ptr2len;
1126 ptr2char = utf_ptr2char;
1127 }
1128 else
1129 {
1130 ptr2len = mb_ptr2len;
1131 ptr2char = mb_ptr2char;
1132 }
1133
1134 for ( ; *p != NUL; p += (*ptr2len)(p))
1135 list_append_number(rettv->vval.v_list, (*ptr2char)(p));
1136 }
1137 else
1138 for ( ; *p != NUL; ++p)
1139 list_append_number(rettv->vval.v_list, *p);
1140}
1141
1142/*
1143 * "str2nr()" function
1144 */
1145 void
1146f_str2nr(typval_T *argvars, typval_T *rettv)
1147{
1148 int base = 10;
1149 char_u *p;
1150 varnumber_T n;
1151 int what = 0;
1152 int isneg;
1153
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001154 if (in_vim9script()
1155 && (check_for_string_arg(argvars, 0) == FAIL
1156 || check_for_opt_number_arg(argvars, 1) == FAIL
1157 || (argvars[1].v_type != VAR_UNKNOWN
1158 && check_for_opt_bool_arg(argvars, 2) == FAIL)))
1159 return;
1160
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001161 if (argvars[1].v_type != VAR_UNKNOWN)
1162 {
1163 base = (int)tv_get_number(&argvars[1]);
1164 if (base != 2 && base != 8 && base != 10 && base != 16)
1165 {
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00001166 emsg(_(e_invalid_argument));
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001167 return;
1168 }
1169 if (argvars[2].v_type != VAR_UNKNOWN && tv_get_bool(&argvars[2]))
1170 what |= STR2NR_QUOTE;
1171 }
1172
1173 p = skipwhite(tv_get_string_strict(&argvars[0]));
1174 isneg = (*p == '-');
1175 if (*p == '+' || *p == '-')
1176 p = skipwhite(p + 1);
1177 switch (base)
1178 {
1179 case 2: what |= STR2NR_BIN + STR2NR_FORCE; break;
1180 case 8: what |= STR2NR_OCT + STR2NR_OOCT + STR2NR_FORCE; break;
1181 case 16: what |= STR2NR_HEX + STR2NR_FORCE; break;
1182 }
1183 vim_str2nr(p, NULL, NULL, what, &n, NULL, 0, FALSE);
1184 // Text after the number is silently ignored.
1185 if (isneg)
1186 rettv->vval.v_number = -n;
1187 else
1188 rettv->vval.v_number = n;
1189
1190}
1191
1192/*
1193 * "strgetchar()" function
1194 */
1195 void
1196f_strgetchar(typval_T *argvars, typval_T *rettv)
1197{
1198 char_u *str;
1199 int len;
1200 int error = FALSE;
1201 int charidx;
1202 int byteidx = 0;
1203
1204 rettv->vval.v_number = -1;
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001205
1206 if (in_vim9script()
1207 && (check_for_string_arg(argvars, 0) == FAIL
1208 || check_for_number_arg(argvars, 1) == FAIL))
1209 return;
1210
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001211 str = tv_get_string_chk(&argvars[0]);
1212 if (str == NULL)
1213 return;
1214 len = (int)STRLEN(str);
1215 charidx = (int)tv_get_number_chk(&argvars[1], &error);
1216 if (error)
1217 return;
1218
1219 while (charidx >= 0 && byteidx < len)
1220 {
1221 if (charidx == 0)
1222 {
1223 rettv->vval.v_number = mb_ptr2char(str + byteidx);
1224 break;
1225 }
1226 --charidx;
1227 byteidx += MB_CPTR2LEN(str + byteidx);
1228 }
1229}
1230
1231/*
1232 * "stridx()" function
1233 */
1234 void
1235f_stridx(typval_T *argvars, typval_T *rettv)
1236{
1237 char_u buf[NUMBUFLEN];
1238 char_u *needle;
1239 char_u *haystack;
1240 char_u *save_haystack;
1241 char_u *pos;
1242 int start_idx;
1243
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001244 if (in_vim9script()
1245 && (check_for_string_arg(argvars, 0) == FAIL
1246 || check_for_string_arg(argvars, 1) == FAIL
1247 || check_for_opt_number_arg(argvars, 2) == FAIL))
1248 return;
1249
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001250 needle = tv_get_string_chk(&argvars[1]);
1251 save_haystack = haystack = tv_get_string_buf_chk(&argvars[0], buf);
1252 rettv->vval.v_number = -1;
1253 if (needle == NULL || haystack == NULL)
1254 return; // type error; errmsg already given
1255
1256 if (argvars[2].v_type != VAR_UNKNOWN)
1257 {
1258 int error = FALSE;
1259
1260 start_idx = (int)tv_get_number_chk(&argvars[2], &error);
1261 if (error || start_idx >= (int)STRLEN(haystack))
1262 return;
1263 if (start_idx >= 0)
1264 haystack += start_idx;
1265 }
1266
1267 pos = (char_u *)strstr((char *)haystack, (char *)needle);
1268 if (pos != NULL)
1269 rettv->vval.v_number = (varnumber_T)(pos - save_haystack);
1270}
1271
1272/*
1273 * "string()" function
1274 */
1275 void
1276f_string(typval_T *argvars, typval_T *rettv)
1277{
1278 char_u *tofree;
1279 char_u numbuf[NUMBUFLEN];
1280
1281 rettv->v_type = VAR_STRING;
1282 rettv->vval.v_string = tv2string(&argvars[0], &tofree, numbuf,
1283 get_copyID());
1284 // Make a copy if we have a value but it's not in allocated memory.
1285 if (rettv->vval.v_string != NULL && tofree == NULL)
1286 rettv->vval.v_string = vim_strsave(rettv->vval.v_string);
1287}
1288
1289/*
1290 * "strlen()" function
1291 */
1292 void
1293f_strlen(typval_T *argvars, typval_T *rettv)
1294{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001295 if (in_vim9script()
1296 && check_for_string_or_number_arg(argvars, 0) == FAIL)
1297 return;
1298
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001299 rettv->vval.v_number = (varnumber_T)(STRLEN(
1300 tv_get_string(&argvars[0])));
1301}
1302
1303 static void
1304strchar_common(typval_T *argvars, typval_T *rettv, int skipcc)
1305{
1306 char_u *s = tv_get_string(&argvars[0]);
1307 varnumber_T len = 0;
1308 int (*func_mb_ptr2char_adv)(char_u **pp);
1309
1310 func_mb_ptr2char_adv = skipcc ? mb_ptr2char_adv : mb_cptr2char_adv;
1311 while (*s != NUL)
1312 {
1313 func_mb_ptr2char_adv(&s);
1314 ++len;
1315 }
1316 rettv->vval.v_number = len;
1317}
1318
1319/*
1320 * "strcharlen()" function
1321 */
1322 void
1323f_strcharlen(typval_T *argvars, typval_T *rettv)
1324{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001325 if (in_vim9script()
1326 && check_for_string_or_number_arg(argvars, 0) == FAIL)
1327 return;
1328
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001329 strchar_common(argvars, rettv, TRUE);
1330}
1331
1332/*
1333 * "strchars()" function
1334 */
1335 void
1336f_strchars(typval_T *argvars, typval_T *rettv)
1337{
1338 varnumber_T skipcc = FALSE;
1339
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001340 if (in_vim9script()
1341 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001342 || check_for_opt_bool_arg(argvars, 1) == FAIL))
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001343 return;
1344
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001345 if (argvars[1].v_type != VAR_UNKNOWN)
1346 skipcc = tv_get_bool(&argvars[1]);
1347 if (skipcc < 0 || skipcc > 1)
1348 semsg(_(e_using_number_as_bool_nr), skipcc);
1349 else
1350 strchar_common(argvars, rettv, skipcc);
1351}
1352
1353/*
1354 * "strdisplaywidth()" function
1355 */
1356 void
1357f_strdisplaywidth(typval_T *argvars, typval_T *rettv)
1358{
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001359 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001360 int col = 0;
1361
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001362 rettv->vval.v_number = -1;
1363
1364 if (in_vim9script()
1365 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001366 || check_for_opt_number_arg(argvars, 1) == FAIL))
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001367 return;
1368
1369 s = tv_get_string(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001370 if (argvars[1].v_type != VAR_UNKNOWN)
1371 col = (int)tv_get_number(&argvars[1]);
1372
1373 rettv->vval.v_number = (varnumber_T)(linetabsize_col(col, s) - col);
1374}
1375
1376/*
1377 * "strwidth()" function
1378 */
1379 void
1380f_strwidth(typval_T *argvars, typval_T *rettv)
1381{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001382 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001383
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001384 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1385 return;
1386
1387 s = tv_get_string_strict(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001388 rettv->vval.v_number = (varnumber_T)(mb_string2cells(s, -1));
1389}
1390
1391/*
1392 * "strcharpart()" function
1393 */
1394 void
1395f_strcharpart(typval_T *argvars, typval_T *rettv)
1396{
1397 char_u *p;
1398 int nchar;
1399 int nbyte = 0;
1400 int charlen;
1401 int skipcc = FALSE;
1402 int len = 0;
1403 int slen;
1404 int error = FALSE;
1405
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001406 if (in_vim9script()
1407 && (check_for_string_arg(argvars, 0) == FAIL
1408 || check_for_number_arg(argvars, 1) == FAIL
1409 || check_for_opt_number_arg(argvars, 2) == FAIL
1410 || (argvars[2].v_type != VAR_UNKNOWN
1411 && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1412 return;
1413
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001414 p = tv_get_string(&argvars[0]);
1415 slen = (int)STRLEN(p);
1416
1417 nchar = (int)tv_get_number_chk(&argvars[1], &error);
1418 if (!error)
1419 {
1420 if (argvars[2].v_type != VAR_UNKNOWN
1421 && argvars[3].v_type != VAR_UNKNOWN)
1422 {
1423 skipcc = tv_get_bool(&argvars[3]);
1424 if (skipcc < 0 || skipcc > 1)
1425 {
1426 semsg(_(e_using_number_as_bool_nr), skipcc);
1427 return;
1428 }
1429 }
1430
1431 if (nchar > 0)
1432 while (nchar > 0 && nbyte < slen)
1433 {
1434 if (skipcc)
1435 nbyte += mb_ptr2len(p + nbyte);
1436 else
1437 nbyte += MB_CPTR2LEN(p + nbyte);
1438 --nchar;
1439 }
1440 else
1441 nbyte = nchar;
1442 if (argvars[2].v_type != VAR_UNKNOWN)
1443 {
1444 charlen = (int)tv_get_number(&argvars[2]);
1445 while (charlen > 0 && nbyte + len < slen)
1446 {
1447 int off = nbyte + len;
1448
1449 if (off < 0)
1450 len += 1;
1451 else
1452 {
1453 if (skipcc)
1454 len += mb_ptr2len(p + off);
1455 else
1456 len += MB_CPTR2LEN(p + off);
1457 }
1458 --charlen;
1459 }
1460 }
1461 else
1462 len = slen - nbyte; // default: all bytes that are available.
1463 }
1464
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02001465 // Only return the overlap between the specified part and the actual
1466 // string.
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001467 if (nbyte < 0)
1468 {
1469 len += nbyte;
1470 nbyte = 0;
1471 }
1472 else if (nbyte > slen)
1473 nbyte = slen;
1474 if (len < 0)
1475 len = 0;
1476 else if (nbyte + len > slen)
1477 len = slen - nbyte;
1478
1479 rettv->v_type = VAR_STRING;
1480 rettv->vval.v_string = vim_strnsave(p + nbyte, len);
1481}
1482
1483/*
1484 * "strpart()" function
1485 */
1486 void
1487f_strpart(typval_T *argvars, typval_T *rettv)
1488{
1489 char_u *p;
1490 int n;
1491 int len;
1492 int slen;
1493 int error = FALSE;
1494
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001495 if (in_vim9script()
1496 && (check_for_string_arg(argvars, 0) == FAIL
1497 || check_for_number_arg(argvars, 1) == FAIL
1498 || check_for_opt_number_arg(argvars, 2) == FAIL
1499 || (argvars[2].v_type != VAR_UNKNOWN
1500 && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1501 return;
1502
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001503 p = tv_get_string(&argvars[0]);
1504 slen = (int)STRLEN(p);
1505
1506 n = (int)tv_get_number_chk(&argvars[1], &error);
1507 if (error)
1508 len = 0;
1509 else if (argvars[2].v_type != VAR_UNKNOWN)
1510 len = (int)tv_get_number(&argvars[2]);
1511 else
1512 len = slen - n; // default len: all bytes that are available.
1513
1514 // Only return the overlap between the specified part and the actual
1515 // string.
1516 if (n < 0)
1517 {
1518 len += n;
1519 n = 0;
1520 }
1521 else if (n > slen)
1522 n = slen;
1523 if (len < 0)
1524 len = 0;
1525 else if (n + len > slen)
1526 len = slen - n;
1527
1528 if (argvars[2].v_type != VAR_UNKNOWN && argvars[3].v_type != VAR_UNKNOWN)
1529 {
1530 int off;
1531
1532 // length in characters
1533 for (off = n; off < slen && len > 0; --len)
1534 off += mb_ptr2len(p + off);
1535 len = off - n;
1536 }
1537
1538 rettv->v_type = VAR_STRING;
1539 rettv->vval.v_string = vim_strnsave(p + n, len);
1540}
1541
1542/*
1543 * "strridx()" function
1544 */
1545 void
1546f_strridx(typval_T *argvars, typval_T *rettv)
1547{
1548 char_u buf[NUMBUFLEN];
1549 char_u *needle;
1550 char_u *haystack;
1551 char_u *rest;
1552 char_u *lastmatch = NULL;
1553 int haystack_len, end_idx;
1554
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001555 if (in_vim9script()
1556 && (check_for_string_arg(argvars, 0) == FAIL
1557 || check_for_string_arg(argvars, 1) == FAIL
1558 || check_for_opt_number_arg(argvars, 2) == FAIL))
1559 return;
1560
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001561 needle = tv_get_string_chk(&argvars[1]);
1562 haystack = tv_get_string_buf_chk(&argvars[0], buf);
1563
1564 rettv->vval.v_number = -1;
1565 if (needle == NULL || haystack == NULL)
1566 return; // type error; errmsg already given
1567
1568 haystack_len = (int)STRLEN(haystack);
1569 if (argvars[2].v_type != VAR_UNKNOWN)
1570 {
1571 // Third argument: upper limit for index
1572 end_idx = (int)tv_get_number_chk(&argvars[2], NULL);
1573 if (end_idx < 0)
1574 return; // can never find a match
1575 }
1576 else
1577 end_idx = haystack_len;
1578
1579 if (*needle == NUL)
1580 {
1581 // Empty string matches past the end.
1582 lastmatch = haystack + end_idx;
1583 }
1584 else
1585 {
1586 for (rest = haystack; *rest != '\0'; ++rest)
1587 {
1588 rest = (char_u *)strstr((char *)rest, (char *)needle);
1589 if (rest == NULL || rest > haystack + end_idx)
1590 break;
1591 lastmatch = rest;
1592 }
1593 }
1594
1595 if (lastmatch == NULL)
1596 rettv->vval.v_number = -1;
1597 else
1598 rettv->vval.v_number = (varnumber_T)(lastmatch - haystack);
1599}
1600
1601/*
1602 * "strtrans()" function
1603 */
1604 void
1605f_strtrans(typval_T *argvars, typval_T *rettv)
1606{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001607 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1608 return;
1609
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001610 rettv->v_type = VAR_STRING;
1611 rettv->vval.v_string = transstr(tv_get_string(&argvars[0]));
1612}
1613
1614/*
1615 * "tolower(string)" function
1616 */
1617 void
1618f_tolower(typval_T *argvars, typval_T *rettv)
1619{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001620 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1621 return;
1622
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001623 rettv->v_type = VAR_STRING;
1624 rettv->vval.v_string = strlow_save(tv_get_string(&argvars[0]));
1625}
1626
1627/*
1628 * "toupper(string)" function
1629 */
1630 void
1631f_toupper(typval_T *argvars, typval_T *rettv)
1632{
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001633 if (in_vim9script() && check_for_string_arg(argvars, 0) == FAIL)
1634 return;
1635
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001636 rettv->v_type = VAR_STRING;
1637 rettv->vval.v_string = strup_save(tv_get_string(&argvars[0]));
1638}
1639
1640/*
1641 * "tr(string, fromstr, tostr)" function
1642 */
1643 void
1644f_tr(typval_T *argvars, typval_T *rettv)
1645{
1646 char_u *in_str;
1647 char_u *fromstr;
1648 char_u *tostr;
1649 char_u *p;
1650 int inlen;
1651 int fromlen;
1652 int tolen;
1653 int idx;
1654 char_u *cpstr;
1655 int cplen;
1656 int first = TRUE;
1657 char_u buf[NUMBUFLEN];
1658 char_u buf2[NUMBUFLEN];
1659 garray_T ga;
1660
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001661 if (in_vim9script()
1662 && (check_for_string_arg(argvars, 0) == FAIL
1663 || check_for_string_arg(argvars, 1) == FAIL
1664 || check_for_string_arg(argvars, 2) == FAIL))
1665 return;
1666
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001667 in_str = tv_get_string(&argvars[0]);
1668 fromstr = tv_get_string_buf_chk(&argvars[1], buf);
1669 tostr = tv_get_string_buf_chk(&argvars[2], buf2);
1670
1671 // Default return value: empty string.
1672 rettv->v_type = VAR_STRING;
1673 rettv->vval.v_string = NULL;
1674 if (fromstr == NULL || tostr == NULL)
1675 return; // type error; errmsg already given
1676 ga_init2(&ga, (int)sizeof(char), 80);
1677
1678 if (!has_mbyte)
1679 // not multi-byte: fromstr and tostr must be the same length
1680 if (STRLEN(fromstr) != STRLEN(tostr))
1681 {
1682error:
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00001683 semsg(_(e_invalid_argument_str), fromstr);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001684 ga_clear(&ga);
1685 return;
1686 }
1687
1688 // fromstr and tostr have to contain the same number of chars
1689 while (*in_str != NUL)
1690 {
1691 if (has_mbyte)
1692 {
1693 inlen = (*mb_ptr2len)(in_str);
1694 cpstr = in_str;
1695 cplen = inlen;
1696 idx = 0;
1697 for (p = fromstr; *p != NUL; p += fromlen)
1698 {
1699 fromlen = (*mb_ptr2len)(p);
1700 if (fromlen == inlen && STRNCMP(in_str, p, inlen) == 0)
1701 {
1702 for (p = tostr; *p != NUL; p += tolen)
1703 {
1704 tolen = (*mb_ptr2len)(p);
1705 if (idx-- == 0)
1706 {
1707 cplen = tolen;
1708 cpstr = p;
1709 break;
1710 }
1711 }
1712 if (*p == NUL) // tostr is shorter than fromstr
1713 goto error;
1714 break;
1715 }
1716 ++idx;
1717 }
1718
1719 if (first && cpstr == in_str)
1720 {
1721 // Check that fromstr and tostr have the same number of
1722 // (multi-byte) characters. Done only once when a character
1723 // of in_str doesn't appear in fromstr.
1724 first = FALSE;
1725 for (p = tostr; *p != NUL; p += tolen)
1726 {
1727 tolen = (*mb_ptr2len)(p);
1728 --idx;
1729 }
1730 if (idx != 0)
1731 goto error;
1732 }
1733
1734 (void)ga_grow(&ga, cplen);
1735 mch_memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen);
1736 ga.ga_len += cplen;
1737
1738 in_str += inlen;
1739 }
1740 else
1741 {
1742 // When not using multi-byte chars we can do it faster.
1743 p = vim_strchr(fromstr, *in_str);
1744 if (p != NULL)
1745 ga_append(&ga, tostr[p - fromstr]);
1746 else
1747 ga_append(&ga, *in_str);
1748 ++in_str;
1749 }
1750 }
1751
1752 // add a terminating NUL
1753 (void)ga_grow(&ga, 1);
1754 ga_append(&ga, NUL);
1755
1756 rettv->vval.v_string = ga.ga_data;
1757}
1758
1759/*
1760 * "trim({expr})" function
1761 */
1762 void
1763f_trim(typval_T *argvars, typval_T *rettv)
1764{
1765 char_u buf1[NUMBUFLEN];
1766 char_u buf2[NUMBUFLEN];
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001767 char_u *head;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001768 char_u *mask = NULL;
1769 char_u *tail;
1770 char_u *prev;
1771 char_u *p;
1772 int c1;
1773 int dir = 0;
1774
1775 rettv->v_type = VAR_STRING;
1776 rettv->vval.v_string = NULL;
Yegappan Lakshmanan4490ec42021-07-27 22:00:44 +02001777
1778 if (in_vim9script()
1779 && (check_for_string_arg(argvars, 0) == FAIL
1780 || check_for_opt_string_arg(argvars, 1) == FAIL
1781 || (argvars[1].v_type != VAR_UNKNOWN
1782 && check_for_opt_number_arg(argvars, 2) == FAIL)))
1783 return;
1784
1785 head = tv_get_string_buf_chk(&argvars[0], buf1);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001786 if (head == NULL)
1787 return;
1788
1789 if (argvars[1].v_type != VAR_UNKNOWN && argvars[1].v_type != VAR_STRING)
1790 {
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00001791 semsg(_(e_invalid_argument_str), tv_get_string(&argvars[1]));
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001792 return;
1793 }
1794
1795 if (argvars[1].v_type == VAR_STRING)
1796 {
1797 mask = tv_get_string_buf_chk(&argvars[1], buf2);
1798
1799 if (argvars[2].v_type != VAR_UNKNOWN)
1800 {
1801 int error = 0;
1802
1803 // leading or trailing characters to trim
1804 dir = (int)tv_get_number_chk(&argvars[2], &error);
1805 if (error)
1806 return;
1807 if (dir < 0 || dir > 2)
1808 {
Bram Moolenaar436b5ad2021-12-31 22:49:24 +00001809 semsg(_(e_invalid_argument_str), tv_get_string(&argvars[2]));
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001810 return;
1811 }
1812 }
1813 }
1814
1815 if (dir == 0 || dir == 1)
1816 {
1817 // Trim leading characters
1818 while (*head != NUL)
1819 {
1820 c1 = PTR2CHAR(head);
1821 if (mask == NULL)
1822 {
1823 if (c1 > ' ' && c1 != 0xa0)
1824 break;
1825 }
1826 else
1827 {
1828 for (p = mask; *p != NUL; MB_PTR_ADV(p))
1829 if (c1 == PTR2CHAR(p))
1830 break;
1831 if (*p == NUL)
1832 break;
1833 }
1834 MB_PTR_ADV(head);
1835 }
1836 }
1837
1838 tail = head + STRLEN(head);
1839 if (dir == 0 || dir == 2)
1840 {
1841 // Trim trailing characters
1842 for (; tail > head; tail = prev)
1843 {
1844 prev = tail;
1845 MB_PTR_BACK(head, prev);
1846 c1 = PTR2CHAR(prev);
1847 if (mask == NULL)
1848 {
1849 if (c1 > ' ' && c1 != 0xa0)
1850 break;
1851 }
1852 else
1853 {
1854 for (p = mask; *p != NUL; MB_PTR_ADV(p))
1855 if (c1 == PTR2CHAR(p))
1856 break;
1857 if (*p == NUL)
1858 break;
1859 }
1860 }
1861 }
1862 rettv->vval.v_string = vim_strnsave(head, tail - head);
1863}
1864
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02001865static char *e_printf = N_("E766: Insufficient arguments for printf()");
1866
1867/*
1868 * Get number argument from "idxp" entry in "tvs". First entry is 1.
1869 */
1870 static varnumber_T
1871tv_nr(typval_T *tvs, int *idxp)
1872{
1873 int idx = *idxp - 1;
1874 varnumber_T n = 0;
1875 int err = FALSE;
1876
1877 if (tvs[idx].v_type == VAR_UNKNOWN)
1878 emsg(_(e_printf));
1879 else
1880 {
1881 ++*idxp;
1882 n = tv_get_number_chk(&tvs[idx], &err);
1883 if (err)
1884 n = 0;
1885 }
1886 return n;
1887}
1888
1889/*
1890 * Get string argument from "idxp" entry in "tvs". First entry is 1.
1891 * If "tofree" is NULL tv_get_string_chk() is used. Some types (e.g. List)
1892 * are not converted to a string.
1893 * If "tofree" is not NULL echo_string() is used. All types are converted to
1894 * a string with the same format as ":echo". The caller must free "*tofree".
1895 * Returns NULL for an error.
1896 */
1897 static char *
1898tv_str(typval_T *tvs, int *idxp, char_u **tofree)
1899{
1900 int idx = *idxp - 1;
1901 char *s = NULL;
1902 static char_u numbuf[NUMBUFLEN];
1903
1904 if (tvs[idx].v_type == VAR_UNKNOWN)
1905 emsg(_(e_printf));
1906 else
1907 {
1908 ++*idxp;
1909 if (tofree != NULL)
1910 s = (char *)echo_string(&tvs[idx], tofree, numbuf, get_copyID());
1911 else
1912 s = (char *)tv_get_string_chk(&tvs[idx]);
1913 }
1914 return s;
1915}
1916
1917# ifdef FEAT_FLOAT
1918/*
1919 * Get float argument from "idxp" entry in "tvs". First entry is 1.
1920 */
1921 static double
1922tv_float(typval_T *tvs, int *idxp)
1923{
1924 int idx = *idxp - 1;
1925 double f = 0;
1926
1927 if (tvs[idx].v_type == VAR_UNKNOWN)
1928 emsg(_(e_printf));
1929 else
1930 {
1931 ++*idxp;
1932 if (tvs[idx].v_type == VAR_FLOAT)
1933 f = tvs[idx].vval.v_float;
1934 else if (tvs[idx].v_type == VAR_NUMBER)
1935 f = (double)tvs[idx].vval.v_number;
1936 else
1937 emsg(_("E807: Expected Float argument for printf()"));
1938 }
1939 return f;
1940}
1941# endif
Yegappan Lakshmananf973eeb2021-12-22 18:19:26 +00001942
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02001943#endif
1944
1945#ifdef FEAT_FLOAT
1946/*
1947 * Return the representation of infinity for printf() function:
1948 * "-inf", "inf", "+inf", " inf", "-INF", "INF", "+INF" or " INF".
1949 */
1950 static const char *
1951infinity_str(int positive,
1952 char fmt_spec,
1953 int force_sign,
1954 int space_for_positive)
1955{
1956 static const char *table[] =
1957 {
1958 "-inf", "inf", "+inf", " inf",
1959 "-INF", "INF", "+INF", " INF"
1960 };
1961 int idx = positive * (1 + force_sign + force_sign * space_for_positive);
1962
1963 if (ASCII_ISUPPER(fmt_spec))
1964 idx += 4;
1965 return table[idx];
1966}
1967#endif
1968
1969/*
1970 * This code was included to provide a portable vsnprintf() and snprintf().
1971 * Some systems may provide their own, but we always use this one for
1972 * consistency.
1973 *
1974 * This code is based on snprintf.c - a portable implementation of snprintf
1975 * by Mark Martinec <mark.martinec@ijs.si>, Version 2.2, 2000-10-06.
1976 * Included with permission. It was heavily modified to fit in Vim.
1977 * The original code, including useful comments, can be found here:
1978 * http://www.ijs.si/software/snprintf/
1979 *
1980 * This snprintf() only supports the following conversion specifiers:
1981 * s, c, d, u, o, x, X, p (and synonyms: i, D, U, O - see below)
1982 * with flags: '-', '+', ' ', '0' and '#'.
1983 * An asterisk is supported for field width as well as precision.
1984 *
1985 * Limited support for floating point was added: 'f', 'F', 'e', 'E', 'g', 'G'.
1986 *
1987 * Length modifiers 'h' (short int) and 'l' (long int) and 'll' (long long int)
1988 * are supported. NOTE: for 'll' the argument is varnumber_T or uvarnumber_T.
1989 *
1990 * The locale is not used, the string is used as a byte string. This is only
1991 * relevant for double-byte encodings where the second byte may be '%'.
1992 *
1993 * It is permitted for "str_m" to be zero, and it is permitted to specify NULL
1994 * pointer for resulting string argument if "str_m" is zero (as per ISO C99).
1995 *
1996 * The return value is the number of characters which would be generated
1997 * for the given input, excluding the trailing NUL. If this value
1998 * is greater or equal to "str_m", not all characters from the result
1999 * have been stored in str, output bytes beyond the ("str_m"-1) -th character
2000 * are discarded. If "str_m" is greater than zero it is guaranteed
2001 * the resulting string will be NUL-terminated.
2002 */
2003
2004/*
2005 * When va_list is not supported we only define vim_snprintf().
2006 *
2007 * vim_vsnprintf_typval() can be invoked with either "va_list" or a list of
2008 * "typval_T". When the latter is not used it must be NULL.
2009 */
2010
2011// When generating prototypes all of this is skipped, cproto doesn't
2012// understand this.
2013#ifndef PROTO
2014
2015// Like vim_vsnprintf() but append to the string.
2016 int
2017vim_snprintf_add(char *str, size_t str_m, const char *fmt, ...)
2018{
2019 va_list ap;
2020 int str_l;
2021 size_t len = STRLEN(str);
2022 size_t space;
2023
2024 if (str_m <= len)
2025 space = 0;
2026 else
2027 space = str_m - len;
2028 va_start(ap, fmt);
2029 str_l = vim_vsnprintf(str + len, space, fmt, ap);
2030 va_end(ap);
2031 return str_l;
2032}
2033
2034 int
2035vim_snprintf(char *str, size_t str_m, const char *fmt, ...)
2036{
2037 va_list ap;
2038 int str_l;
2039
2040 va_start(ap, fmt);
2041 str_l = vim_vsnprintf(str, str_m, fmt, ap);
2042 va_end(ap);
2043 return str_l;
2044}
2045
2046 int
2047vim_vsnprintf(
2048 char *str,
2049 size_t str_m,
2050 const char *fmt,
2051 va_list ap)
2052{
2053 return vim_vsnprintf_typval(str, str_m, fmt, ap, NULL);
2054}
2055
2056 int
2057vim_vsnprintf_typval(
2058 char *str,
2059 size_t str_m,
2060 const char *fmt,
2061 va_list ap,
2062 typval_T *tvs)
2063{
2064 size_t str_l = 0;
2065 const char *p = fmt;
2066 int arg_idx = 1;
2067
2068 if (p == NULL)
2069 p = "";
2070 while (*p != NUL)
2071 {
2072 if (*p != '%')
2073 {
2074 char *q = strchr(p + 1, '%');
2075 size_t n = (q == NULL) ? STRLEN(p) : (size_t)(q - p);
2076
2077 // Copy up to the next '%' or NUL without any changes.
2078 if (str_l < str_m)
2079 {
2080 size_t avail = str_m - str_l;
2081
2082 mch_memmove(str + str_l, p, n > avail ? avail : n);
2083 }
2084 p += n;
2085 str_l += n;
2086 }
2087 else
2088 {
2089 size_t min_field_width = 0, precision = 0;
2090 int zero_padding = 0, precision_specified = 0, justify_left = 0;
2091 int alternate_form = 0, force_sign = 0;
2092
2093 // If both the ' ' and '+' flags appear, the ' ' flag should be
2094 // ignored.
2095 int space_for_positive = 1;
2096
2097 // allowed values: \0, h, l, L
2098 char length_modifier = '\0';
2099
2100 // temporary buffer for simple numeric->string conversion
2101# if defined(FEAT_FLOAT)
2102# define TMP_LEN 350 // On my system 1e308 is the biggest number possible.
2103 // That sounds reasonable to use as the maximum
2104 // printable.
2105# else
2106# define TMP_LEN 66
2107# endif
2108 char tmp[TMP_LEN];
2109
2110 // string address in case of string argument
2111 const char *str_arg = NULL;
2112
2113 // natural field width of arg without padding and sign
2114 size_t str_arg_l;
2115
2116 // unsigned char argument value - only defined for c conversion.
2117 // N.B. standard explicitly states the char argument for the c
2118 // conversion is unsigned
2119 unsigned char uchar_arg;
2120
2121 // number of zeros to be inserted for numeric conversions as
2122 // required by the precision or minimal field width
2123 size_t number_of_zeros_to_pad = 0;
2124
2125 // index into tmp where zero padding is to be inserted
2126 size_t zero_padding_insertion_ind = 0;
2127
2128 // current conversion specifier character
2129 char fmt_spec = '\0';
2130
2131 // buffer for 's' and 'S' specs
2132 char_u *tofree = NULL;
2133
2134
2135 p++; // skip '%'
2136
2137 // parse flags
2138 while (*p == '0' || *p == '-' || *p == '+' || *p == ' '
2139 || *p == '#' || *p == '\'')
2140 {
2141 switch (*p)
2142 {
2143 case '0': zero_padding = 1; break;
2144 case '-': justify_left = 1; break;
2145 case '+': force_sign = 1; space_for_positive = 0; break;
2146 case ' ': force_sign = 1;
2147 // If both the ' ' and '+' flags appear, the ' '
2148 // flag should be ignored
2149 break;
2150 case '#': alternate_form = 1; break;
2151 case '\'': break;
2152 }
2153 p++;
2154 }
2155 // If the '0' and '-' flags both appear, the '0' flag should be
2156 // ignored.
2157
2158 // parse field width
2159 if (*p == '*')
2160 {
2161 int j;
2162
2163 p++;
2164 j =
2165# if defined(FEAT_EVAL)
2166 tvs != NULL ? tv_nr(tvs, &arg_idx) :
2167# endif
2168 va_arg(ap, int);
2169 if (j >= 0)
2170 min_field_width = j;
2171 else
2172 {
2173 min_field_width = -j;
2174 justify_left = 1;
2175 }
2176 }
2177 else if (VIM_ISDIGIT((int)(*p)))
2178 {
2179 // size_t could be wider than unsigned int; make sure we treat
2180 // argument like common implementations do
2181 unsigned int uj = *p++ - '0';
2182
2183 while (VIM_ISDIGIT((int)(*p)))
2184 uj = 10 * uj + (unsigned int)(*p++ - '0');
2185 min_field_width = uj;
2186 }
2187
2188 // parse precision
2189 if (*p == '.')
2190 {
2191 p++;
2192 precision_specified = 1;
2193 if (*p == '*')
2194 {
2195 int j;
2196
2197 j =
2198# if defined(FEAT_EVAL)
2199 tvs != NULL ? tv_nr(tvs, &arg_idx) :
2200# endif
2201 va_arg(ap, int);
2202 p++;
2203 if (j >= 0)
2204 precision = j;
2205 else
2206 {
2207 precision_specified = 0;
2208 precision = 0;
2209 }
2210 }
2211 else if (VIM_ISDIGIT((int)(*p)))
2212 {
2213 // size_t could be wider than unsigned int; make sure we
2214 // treat argument like common implementations do
2215 unsigned int uj = *p++ - '0';
2216
2217 while (VIM_ISDIGIT((int)(*p)))
2218 uj = 10 * uj + (unsigned int)(*p++ - '0');
2219 precision = uj;
2220 }
2221 }
2222
2223 // parse 'h', 'l' and 'll' length modifiers
2224 if (*p == 'h' || *p == 'l')
2225 {
2226 length_modifier = *p;
2227 p++;
2228 if (length_modifier == 'l' && *p == 'l')
2229 {
2230 // double l = __int64 / varnumber_T
2231 length_modifier = 'L';
2232 p++;
2233 }
2234 }
2235 fmt_spec = *p;
2236
2237 // common synonyms:
2238 switch (fmt_spec)
2239 {
2240 case 'i': fmt_spec = 'd'; break;
2241 case 'D': fmt_spec = 'd'; length_modifier = 'l'; break;
2242 case 'U': fmt_spec = 'u'; length_modifier = 'l'; break;
2243 case 'O': fmt_spec = 'o'; length_modifier = 'l'; break;
2244 default: break;
2245 }
2246
2247# if defined(FEAT_EVAL)
2248 switch (fmt_spec)
2249 {
2250 case 'd': case 'u': case 'o': case 'x': case 'X':
2251 if (tvs != NULL && length_modifier == '\0')
2252 length_modifier = 'L';
2253 }
2254# endif
2255
2256 // get parameter value, do initial processing
2257 switch (fmt_spec)
2258 {
2259 // '%' and 'c' behave similar to 's' regarding flags and field
2260 // widths
2261 case '%':
2262 case 'c':
2263 case 's':
2264 case 'S':
2265 str_arg_l = 1;
2266 switch (fmt_spec)
2267 {
2268 case '%':
2269 str_arg = p;
2270 break;
2271
2272 case 'c':
2273 {
2274 int j;
2275
2276 j =
2277# if defined(FEAT_EVAL)
2278 tvs != NULL ? tv_nr(tvs, &arg_idx) :
2279# endif
2280 va_arg(ap, int);
2281 // standard demands unsigned char
2282 uchar_arg = (unsigned char)j;
2283 str_arg = (char *)&uchar_arg;
2284 break;
2285 }
2286
2287 case 's':
2288 case 'S':
2289 str_arg =
2290# if defined(FEAT_EVAL)
2291 tvs != NULL ? tv_str(tvs, &arg_idx, &tofree) :
2292# endif
2293 va_arg(ap, char *);
2294 if (str_arg == NULL)
2295 {
2296 str_arg = "[NULL]";
2297 str_arg_l = 6;
2298 }
2299 // make sure not to address string beyond the specified
2300 // precision !!!
2301 else if (!precision_specified)
2302 str_arg_l = strlen(str_arg);
2303 // truncate string if necessary as requested by precision
2304 else if (precision == 0)
2305 str_arg_l = 0;
2306 else
2307 {
2308 // Don't put the #if inside memchr(), it can be a
2309 // macro.
2310 // memchr on HP does not like n > 2^31 !!!
2311 char *q = memchr(str_arg, '\0',
2312 precision <= (size_t)0x7fffffffL ? precision
2313 : (size_t)0x7fffffffL);
presukud85fccd2021-11-20 19:38:31 +00002314
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002315 str_arg_l = (q == NULL) ? precision
2316 : (size_t)(q - str_arg);
2317 }
2318 if (fmt_spec == 'S')
2319 {
presuku1f2453f2021-11-24 15:32:57 +00002320 char_u *p1;
2321 size_t i;
2322 int cell;
presukud85fccd2021-11-20 19:38:31 +00002323
presuku1f2453f2021-11-24 15:32:57 +00002324 for (i = 0, p1 = (char_u *)str_arg; *p1;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002325 p1 += mb_ptr2len(p1))
presuku1f2453f2021-11-24 15:32:57 +00002326 {
2327 cell = mb_ptr2cells(p1);
2328 if (precision_specified && i + cell > precision)
2329 break;
2330 i += cell;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002331 }
presuku1f2453f2021-11-24 15:32:57 +00002332
2333 str_arg_l = p1 - (char_u *)str_arg;
presukud85fccd2021-11-20 19:38:31 +00002334 if (min_field_width != 0)
presuku1f2453f2021-11-24 15:32:57 +00002335 min_field_width += str_arg_l - i;
Yegappan Lakshmanan8ee52af2021-08-09 19:59:06 +02002336 }
2337 break;
2338
2339 default:
2340 break;
2341 }
2342 break;
2343
2344 case 'd': case 'u':
2345 case 'b': case 'B':
2346 case 'o':
2347 case 'x': case 'X':
2348 case 'p':
2349 {
2350 // NOTE: the u, b, o, x, X and p conversion specifiers
2351 // imply the value is unsigned; d implies a signed
2352 // value
2353
2354 // 0 if numeric argument is zero (or if pointer is
2355 // NULL for 'p'), +1 if greater than zero (or nonzero
2356 // for unsigned arguments), -1 if negative (unsigned
2357 // argument is never negative)
2358 int arg_sign = 0;
2359
2360 // only set for length modifier h, or for no length
2361 // modifiers
2362 int int_arg = 0;
2363 unsigned int uint_arg = 0;
2364
2365 // only set for length modifier l
2366 long int long_arg = 0;
2367 unsigned long int ulong_arg = 0;
2368
2369 // only set for length modifier ll
2370 varnumber_T llong_arg = 0;
2371 uvarnumber_T ullong_arg = 0;
2372
2373 // only set for b conversion
2374 uvarnumber_T bin_arg = 0;
2375
2376 // pointer argument value -only defined for p
2377 // conversion
2378 void *ptr_arg = NULL;
2379
2380 if (fmt_spec == 'p')
2381 {
2382 length_modifier = '\0';
2383 ptr_arg =
2384# if defined(FEAT_EVAL)
2385 tvs != NULL ? (void *)tv_str(tvs, &arg_idx,
2386 NULL) :
2387# endif
2388 va_arg(ap, void *);
2389 if (ptr_arg != NULL)
2390 arg_sign = 1;
2391 }
2392 else if (fmt_spec == 'b' || fmt_spec == 'B')
2393 {
2394 bin_arg =
2395# if defined(FEAT_EVAL)
2396 tvs != NULL ?
2397 (uvarnumber_T)tv_nr(tvs, &arg_idx) :
2398# endif
2399 va_arg(ap, uvarnumber_T);
2400 if (bin_arg != 0)
2401 arg_sign = 1;
2402 }
2403 else if (fmt_spec == 'd')
2404 {
2405 // signed
2406 switch (length_modifier)
2407 {
2408 case '\0':
2409 case 'h':
2410 // char and short arguments are passed as int.
2411 int_arg =
2412# if defined(FEAT_EVAL)
2413 tvs != NULL ? tv_nr(tvs, &arg_idx) :
2414# endif
2415 va_arg(ap, int);
2416 if (int_arg > 0)
2417 arg_sign = 1;
2418 else if (int_arg < 0)
2419 arg_sign = -1;
2420 break;
2421 case 'l':
2422 long_arg =
2423# if defined(FEAT_EVAL)
2424 tvs != NULL ? tv_nr(tvs, &arg_idx) :
2425# endif
2426 va_arg(ap, long int);
2427 if (long_arg > 0)
2428 arg_sign = 1;
2429 else if (long_arg < 0)
2430 arg_sign = -1;
2431 break;
2432 case 'L':
2433 llong_arg =
2434# if defined(FEAT_EVAL)
2435 tvs != NULL ? tv_nr(tvs, &arg_idx) :
2436# endif
2437 va_arg(ap, varnumber_T);
2438 if (llong_arg > 0)
2439 arg_sign = 1;
2440 else if (llong_arg < 0)
2441 arg_sign = -1;
2442 break;
2443 }
2444 }
2445 else
2446 {
2447 // unsigned
2448 switch (length_modifier)
2449 {
2450 case '\0':
2451 case 'h':
2452 uint_arg =
2453# if defined(FEAT_EVAL)
2454 tvs != NULL ? (unsigned)
2455 tv_nr(tvs, &arg_idx) :
2456# endif
2457 va_arg(ap, unsigned int);
2458 if (uint_arg != 0)
2459 arg_sign = 1;
2460 break;
2461 case 'l':
2462 ulong_arg =
2463# if defined(FEAT_EVAL)
2464 tvs != NULL ? (unsigned long)
2465 tv_nr(tvs, &arg_idx) :
2466# endif
2467 va_arg(ap, unsigned long int);
2468 if (ulong_arg != 0)
2469 arg_sign = 1;
2470 break;
2471 case 'L':
2472 ullong_arg =
2473# if defined(FEAT_EVAL)
2474 tvs != NULL ? (uvarnumber_T)
2475 tv_nr(tvs, &arg_idx) :
2476# endif
2477 va_arg(ap, uvarnumber_T);
2478 if (ullong_arg != 0)
2479 arg_sign = 1;
2480 break;
2481 }
2482 }
2483
2484 str_arg = tmp;
2485 str_arg_l = 0;
2486
2487 // NOTE:
2488 // For d, i, u, o, x, and X conversions, if precision is
2489 // specified, the '0' flag should be ignored. This is so
2490 // with Solaris 2.6, Digital UNIX 4.0, HPUX 10, Linux,
2491 // FreeBSD, NetBSD; but not with Perl.
2492 if (precision_specified)
2493 zero_padding = 0;
2494 if (fmt_spec == 'd')
2495 {
2496 if (force_sign && arg_sign >= 0)
2497 tmp[str_arg_l++] = space_for_positive ? ' ' : '+';
2498 // leave negative numbers for sprintf to handle, to
2499 // avoid handling tricky cases like (short int)-32768
2500 }
2501 else if (alternate_form)
2502 {
2503 if (arg_sign != 0
2504 && (fmt_spec == 'b' || fmt_spec == 'B'
2505 || fmt_spec == 'x' || fmt_spec == 'X') )
2506 {
2507 tmp[str_arg_l++] = '0';
2508 tmp[str_arg_l++] = fmt_spec;
2509 }
2510 // alternate form should have no effect for p
2511 // conversion, but ...
2512 }
2513
2514 zero_padding_insertion_ind = str_arg_l;
2515 if (!precision_specified)
2516 precision = 1; // default precision is 1
2517 if (precision == 0 && arg_sign == 0)
2518 {
2519 // When zero value is formatted with an explicit
2520 // precision 0, the resulting formatted string is
2521 // empty (d, i, u, b, B, o, x, X, p).
2522 }
2523 else
2524 {
2525 char f[6];
2526 int f_l = 0;
2527
2528 // construct a simple format string for sprintf
2529 f[f_l++] = '%';
2530 if (!length_modifier)
2531 ;
2532 else if (length_modifier == 'L')
2533 {
2534# ifdef MSWIN
2535 f[f_l++] = 'I';
2536 f[f_l++] = '6';
2537 f[f_l++] = '4';
2538# else
2539 f[f_l++] = 'l';
2540 f[f_l++] = 'l';
2541# endif
2542 }
2543 else
2544 f[f_l++] = length_modifier;
2545 f[f_l++] = fmt_spec;
2546 f[f_l++] = '\0';
2547
2548 if (fmt_spec == 'p')
2549 str_arg_l += sprintf(tmp + str_arg_l, f, ptr_arg);
2550 else if (fmt_spec == 'b' || fmt_spec == 'B')
2551 {
2552 char b[8 * sizeof(uvarnumber_T)];
2553 size_t b_l = 0;
2554 uvarnumber_T bn = bin_arg;
2555
2556 do
2557 {
2558 b[sizeof(b) - ++b_l] = '0' + (bn & 0x1);
2559 bn >>= 1;
2560 }
2561 while (bn != 0);
2562
2563 memcpy(tmp + str_arg_l, b + sizeof(b) - b_l, b_l);
2564 str_arg_l += b_l;
2565 }
2566 else if (fmt_spec == 'd')
2567 {
2568 // signed
2569 switch (length_modifier)
2570 {
2571 case '\0': str_arg_l += sprintf(
2572 tmp + str_arg_l, f,
2573 int_arg);
2574 break;
2575 case 'h': str_arg_l += sprintf(
2576 tmp + str_arg_l, f,
2577 (short)int_arg);
2578 break;
2579 case 'l': str_arg_l += sprintf(
2580 tmp + str_arg_l, f, long_arg);
2581 break;
2582 case 'L': str_arg_l += sprintf(
2583 tmp + str_arg_l, f, llong_arg);
2584 break;
2585 }
2586 }
2587 else
2588 {
2589 // unsigned
2590 switch (length_modifier)
2591 {
2592 case '\0': str_arg_l += sprintf(
2593 tmp + str_arg_l, f,
2594 uint_arg);
2595 break;
2596 case 'h': str_arg_l += sprintf(
2597 tmp + str_arg_l, f,
2598 (unsigned short)uint_arg);
2599 break;
2600 case 'l': str_arg_l += sprintf(
2601 tmp + str_arg_l, f, ulong_arg);
2602 break;
2603 case 'L': str_arg_l += sprintf(
2604 tmp + str_arg_l, f, ullong_arg);
2605 break;
2606 }
2607 }
2608
2609 // include the optional minus sign and possible
2610 // "0x" in the region before the zero padding
2611 // insertion point
2612 if (zero_padding_insertion_ind < str_arg_l
2613 && tmp[zero_padding_insertion_ind] == '-')
2614 zero_padding_insertion_ind++;
2615 if (zero_padding_insertion_ind + 1 < str_arg_l
2616 && tmp[zero_padding_insertion_ind] == '0'
2617 && (tmp[zero_padding_insertion_ind + 1] == 'x'
2618 || tmp[zero_padding_insertion_ind + 1] == 'X'))
2619 zero_padding_insertion_ind += 2;
2620 }
2621
2622 {
2623 size_t num_of_digits = str_arg_l
2624 - zero_padding_insertion_ind;
2625
2626 if (alternate_form && fmt_spec == 'o'
2627 // unless zero is already the first
2628 // character
2629 && !(zero_padding_insertion_ind < str_arg_l
2630 && tmp[zero_padding_insertion_ind] == '0'))
2631 {
2632 // assure leading zero for alternate-form
2633 // octal numbers
2634 if (!precision_specified
2635 || precision < num_of_digits + 1)
2636 {
2637 // precision is increased to force the
2638 // first character to be zero, except if a
2639 // zero value is formatted with an
2640 // explicit precision of zero
2641 precision = num_of_digits + 1;
2642 }
2643 }
2644 // zero padding to specified precision?
2645 if (num_of_digits < precision)
2646 number_of_zeros_to_pad = precision - num_of_digits;
2647 }
2648 // zero padding to specified minimal field width?
2649 if (!justify_left && zero_padding)
2650 {
2651 int n = (int)(min_field_width - (str_arg_l
2652 + number_of_zeros_to_pad));
2653 if (n > 0)
2654 number_of_zeros_to_pad += n;
2655 }
2656 break;
2657 }
2658
2659# ifdef FEAT_FLOAT
2660 case 'f':
2661 case 'F':
2662 case 'e':
2663 case 'E':
2664 case 'g':
2665 case 'G':
2666 {
2667 // Floating point.
2668 double f;
2669 double abs_f;
2670 char format[40];
2671 int l;
2672 int remove_trailing_zeroes = FALSE;
2673
2674 f =
2675# if defined(FEAT_EVAL)
2676 tvs != NULL ? tv_float(tvs, &arg_idx) :
2677# endif
2678 va_arg(ap, double);
2679 abs_f = f < 0 ? -f : f;
2680
2681 if (fmt_spec == 'g' || fmt_spec == 'G')
2682 {
2683 // Would be nice to use %g directly, but it prints
2684 // "1.0" as "1", we don't want that.
2685 if ((abs_f >= 0.001 && abs_f < 10000000.0)
2686 || abs_f == 0.0)
2687 fmt_spec = ASCII_ISUPPER(fmt_spec) ? 'F' : 'f';
2688 else
2689 fmt_spec = fmt_spec == 'g' ? 'e' : 'E';
2690 remove_trailing_zeroes = TRUE;
2691 }
2692
2693 if ((fmt_spec == 'f' || fmt_spec == 'F') &&
2694# ifdef VAX
2695 abs_f > 1.0e38
2696# else
2697 abs_f > 1.0e307
2698# endif
2699 )
2700 {
2701 // Avoid a buffer overflow
2702 STRCPY(tmp, infinity_str(f > 0.0, fmt_spec,
2703 force_sign, space_for_positive));
2704 str_arg_l = STRLEN(tmp);
2705 zero_padding = 0;
2706 }
2707 else
2708 {
2709 if (isnan(f))
2710 {
2711 // Not a number: nan or NAN
2712 STRCPY(tmp, ASCII_ISUPPER(fmt_spec) ? "NAN"
2713 : "nan");
2714 str_arg_l = 3;
2715 zero_padding = 0;
2716 }
2717 else if (isinf(f))
2718 {
2719 STRCPY(tmp, infinity_str(f > 0.0, fmt_spec,
2720 force_sign, space_for_positive));
2721 str_arg_l = STRLEN(tmp);
2722 zero_padding = 0;
2723 }
2724 else
2725 {
2726 // Regular float number
2727 format[0] = '%';
2728 l = 1;
2729 if (force_sign)
2730 format[l++] = space_for_positive ? ' ' : '+';
2731 if (precision_specified)
2732 {
2733 size_t max_prec = TMP_LEN - 10;
2734
2735 // Make sure we don't get more digits than we
2736 // have room for.
2737 if ((fmt_spec == 'f' || fmt_spec == 'F')
2738 && abs_f > 1.0)
2739 max_prec -= (size_t)log10(abs_f);
2740 if (precision > max_prec)
2741 precision = max_prec;
2742 l += sprintf(format + l, ".%d", (int)precision);
2743 }
2744 format[l] = fmt_spec == 'F' ? 'f' : fmt_spec;
2745 format[l + 1] = NUL;
2746
2747 str_arg_l = sprintf(tmp, format, f);
2748 }
2749
2750 if (remove_trailing_zeroes)
2751 {
2752 int i;
2753 char *tp;
2754
2755 // Using %g or %G: remove superfluous zeroes.
2756 if (fmt_spec == 'f' || fmt_spec == 'F')
2757 tp = tmp + str_arg_l - 1;
2758 else
2759 {
2760 tp = (char *)vim_strchr((char_u *)tmp,
2761 fmt_spec == 'e' ? 'e' : 'E');
2762 if (tp != NULL)
2763 {
2764 // Remove superfluous '+' and leading
2765 // zeroes from the exponent.
2766 if (tp[1] == '+')
2767 {
2768 // Change "1.0e+07" to "1.0e07"
2769 STRMOVE(tp + 1, tp + 2);
2770 --str_arg_l;
2771 }
2772 i = (tp[1] == '-') ? 2 : 1;
2773 while (tp[i] == '0')
2774 {
2775 // Change "1.0e07" to "1.0e7"
2776 STRMOVE(tp + i, tp + i + 1);
2777 --str_arg_l;
2778 }
2779 --tp;
2780 }
2781 }
2782
2783 if (tp != NULL && !precision_specified)
2784 // Remove trailing zeroes, but keep the one
2785 // just after a dot.
2786 while (tp > tmp + 2 && *tp == '0'
2787 && tp[-1] != '.')
2788 {
2789 STRMOVE(tp, tp + 1);
2790 --tp;
2791 --str_arg_l;
2792 }
2793 }
2794 else
2795 {
2796 char *tp;
2797
2798 // Be consistent: some printf("%e") use 1.0e+12
2799 // and some 1.0e+012. Remove one zero in the last
2800 // case.
2801 tp = (char *)vim_strchr((char_u *)tmp,
2802 fmt_spec == 'e' ? 'e' : 'E');
2803 if (tp != NULL && (tp[1] == '+' || tp[1] == '-')
2804 && tp[2] == '0'
2805 && vim_isdigit(tp[3])
2806 && vim_isdigit(tp[4]))
2807 {
2808 STRMOVE(tp + 2, tp + 3);
2809 --str_arg_l;
2810 }
2811 }
2812 }
2813 if (zero_padding && min_field_width > str_arg_l
2814 && (tmp[0] == '-' || force_sign))
2815 {
2816 // padding 0's should be inserted after the sign
2817 number_of_zeros_to_pad = min_field_width - str_arg_l;
2818 zero_padding_insertion_ind = 1;
2819 }
2820 str_arg = tmp;
2821 break;
2822 }
2823# endif
2824
2825 default:
2826 // unrecognized conversion specifier, keep format string
2827 // as-is
2828 zero_padding = 0; // turn zero padding off for non-numeric
2829 // conversion
2830 justify_left = 1;
2831 min_field_width = 0; // reset flags
2832
2833 // discard the unrecognized conversion, just keep *
2834 // the unrecognized conversion character
2835 str_arg = p;
2836 str_arg_l = 0;
2837 if (*p != NUL)
2838 str_arg_l++; // include invalid conversion specifier
2839 // unchanged if not at end-of-string
2840 break;
2841 }
2842
2843 if (*p != NUL)
2844 p++; // step over the just processed conversion specifier
2845
2846 // insert padding to the left as requested by min_field_width;
2847 // this does not include the zero padding in case of numerical
2848 // conversions
2849 if (!justify_left)
2850 {
2851 // left padding with blank or zero
2852 int pn = (int)(min_field_width - (str_arg_l + number_of_zeros_to_pad));
2853
2854 if (pn > 0)
2855 {
2856 if (str_l < str_m)
2857 {
2858 size_t avail = str_m - str_l;
2859
2860 vim_memset(str + str_l, zero_padding ? '0' : ' ',
2861 (size_t)pn > avail ? avail
2862 : (size_t)pn);
2863 }
2864 str_l += pn;
2865 }
2866 }
2867
2868 // zero padding as requested by the precision or by the minimal
2869 // field width for numeric conversions required?
2870 if (number_of_zeros_to_pad == 0)
2871 {
2872 // will not copy first part of numeric right now, *
2873 // force it to be copied later in its entirety
2874 zero_padding_insertion_ind = 0;
2875 }
2876 else
2877 {
2878 // insert first part of numerics (sign or '0x') before zero
2879 // padding
2880 int zn = (int)zero_padding_insertion_ind;
2881
2882 if (zn > 0)
2883 {
2884 if (str_l < str_m)
2885 {
2886 size_t avail = str_m - str_l;
2887
2888 mch_memmove(str + str_l, str_arg,
2889 (size_t)zn > avail ? avail
2890 : (size_t)zn);
2891 }
2892 str_l += zn;
2893 }
2894
2895 // insert zero padding as requested by the precision or min
2896 // field width
2897 zn = (int)number_of_zeros_to_pad;
2898 if (zn > 0)
2899 {
2900 if (str_l < str_m)
2901 {
2902 size_t avail = str_m - str_l;
2903
2904 vim_memset(str + str_l, '0',
2905 (size_t)zn > avail ? avail
2906 : (size_t)zn);
2907 }
2908 str_l += zn;
2909 }
2910 }
2911
2912 // insert formatted string
2913 // (or as-is conversion specifier for unknown conversions)
2914 {
2915 int sn = (int)(str_arg_l - zero_padding_insertion_ind);
2916
2917 if (sn > 0)
2918 {
2919 if (str_l < str_m)
2920 {
2921 size_t avail = str_m - str_l;
2922
2923 mch_memmove(str + str_l,
2924 str_arg + zero_padding_insertion_ind,
2925 (size_t)sn > avail ? avail : (size_t)sn);
2926 }
2927 str_l += sn;
2928 }
2929 }
2930
2931 // insert right padding
2932 if (justify_left)
2933 {
2934 // right blank padding to the field width
2935 int pn = (int)(min_field_width
2936 - (str_arg_l + number_of_zeros_to_pad));
2937
2938 if (pn > 0)
2939 {
2940 if (str_l < str_m)
2941 {
2942 size_t avail = str_m - str_l;
2943
2944 vim_memset(str + str_l, ' ',
2945 (size_t)pn > avail ? avail
2946 : (size_t)pn);
2947 }
2948 str_l += pn;
2949 }
2950 }
2951 vim_free(tofree);
2952 }
2953 }
2954
2955 if (str_m > 0)
2956 {
2957 // make sure the string is nul-terminated even at the expense of
2958 // overwriting the last character (shouldn't happen, but just in case)
2959 //
2960 str[str_l <= str_m - 1 ? str_l : str_m - 1] = '\0';
2961 }
2962
2963 if (tvs != NULL && tvs[arg_idx - 1].v_type != VAR_UNKNOWN)
2964 emsg(_("E767: Too many arguments to printf()"));
2965
2966 // Return the number of characters formatted (excluding trailing nul
2967 // character), that is, the number of characters that would have been
2968 // written to the buffer if it were large enough.
2969 return (int)str_l;
2970}
2971
2972#endif // PROTO