blob: bc0fc5e315d8843f4b641eaa2f8461f43c5cd833 [file] [log] [blame]
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001/* vi:set ts=8 sts=4 sw=4 noet:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * strings.c: string manipulation functions
12 */
13
14#include "vim.h"
15
16/*
17 * Copy "string" into newly allocated memory.
18 */
19 char_u *
20vim_strsave(char_u *string)
21{
22 char_u *p;
23 size_t len;
24
25 len = STRLEN(string) + 1;
26 p = alloc(len);
27 if (p != NULL)
28 mch_memmove(p, string, len);
29 return p;
30}
31
32/*
33 * Copy up to "len" bytes of "string" into newly allocated memory and
34 * terminate with a NUL.
35 * The allocated memory always has size "len + 1", also when "string" is
36 * shorter.
37 */
38 char_u *
39vim_strnsave(char_u *string, size_t len)
40{
41 char_u *p;
42
43 p = alloc(len + 1);
44 if (p != NULL)
45 {
46 STRNCPY(p, string, len);
47 p[len] = NUL;
48 }
49 return p;
50}
51
52/*
53 * Same as vim_strsave(), but any characters found in esc_chars are preceded
54 * by a backslash.
55 */
56 char_u *
57vim_strsave_escaped(char_u *string, char_u *esc_chars)
58{
59 return vim_strsave_escaped_ext(string, esc_chars, '\\', FALSE);
60}
61
62/*
63 * Same as vim_strsave_escaped(), but when "bsl" is TRUE also escape
64 * characters where rem_backslash() would remove the backslash.
65 * Escape the characters with "cc".
66 */
67 char_u *
68vim_strsave_escaped_ext(
69 char_u *string,
70 char_u *esc_chars,
71 int cc,
72 int bsl)
73{
74 char_u *p;
75 char_u *p2;
76 char_u *escaped_string;
77 unsigned length;
78 int l;
79
80 /*
81 * First count the number of backslashes required.
82 * Then allocate the memory and insert them.
83 */
84 length = 1; // count the trailing NUL
85 for (p = string; *p; p++)
86 {
87 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
88 {
89 length += l; // count a multibyte char
90 p += l - 1;
91 continue;
92 }
93 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
94 ++length; // count a backslash
95 ++length; // count an ordinary char
96 }
97 escaped_string = alloc(length);
98 if (escaped_string != NULL)
99 {
100 p2 = escaped_string;
101 for (p = string; *p; p++)
102 {
103 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
104 {
105 mch_memmove(p2, p, (size_t)l);
106 p2 += l;
107 p += l - 1; // skip multibyte char
108 continue;
109 }
110 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
111 *p2++ = cc;
112 *p2++ = *p;
113 }
114 *p2 = NUL;
115 }
116 return escaped_string;
117}
118
119/*
120 * Return TRUE when 'shell' has "csh" in the tail.
121 */
122 int
123csh_like_shell(void)
124{
125 return (strstr((char *)gettail(p_sh), "csh") != NULL);
126}
127
128/*
129 * Escape "string" for use as a shell argument with system().
130 * This uses single quotes, except when we know we need to use double quotes
131 * (MS-DOS and MS-Windows not using PowerShell and without 'shellslash' set).
132 * PowerShell also uses a novel escaping for enclosed single quotes - double
133 * them up.
134 * Escape a newline, depending on the 'shell' option.
135 * When "do_special" is TRUE also replace "!", "%", "#" and things starting
136 * with "<" like "<cfile>".
137 * When "do_newline" is FALSE do not escape newline unless it is csh shell.
138 * Returns the result in allocated memory, NULL if we have run out.
139 */
140 char_u *
141vim_strsave_shellescape(char_u *string, int do_special, int do_newline)
142{
143 unsigned length;
144 char_u *p;
145 char_u *d;
146 char_u *escaped_string;
147 int l;
148 int csh_like;
149 char_u *shname;
150 int powershell;
151# ifdef MSWIN
152 int double_quotes;
153# endif
154
155 // Only csh and similar shells expand '!' within single quotes. For sh and
156 // the like we must not put a backslash before it, it will be taken
157 // literally. If do_special is set the '!' will be escaped twice.
158 // Csh also needs to have "\n" escaped twice when do_special is set.
159 csh_like = csh_like_shell();
160
161 // PowerShell uses it's own version for quoting single quotes
162 shname = gettail(p_sh);
163 powershell = strstr((char *)shname, "pwsh") != NULL;
164# ifdef MSWIN
165 powershell = powershell || strstr((char *)shname, "powershell") != NULL;
166 // PowerShell only accepts single quotes so override shellslash.
167 double_quotes = !powershell && !p_ssl;
168# endif
169
170 // First count the number of extra bytes required.
171 length = (unsigned)STRLEN(string) + 3; // two quotes and a trailing NUL
172 for (p = string; *p != NUL; MB_PTR_ADV(p))
173 {
174# ifdef MSWIN
175 if (double_quotes)
176 {
177 if (*p == '"')
178 ++length; // " -> ""
179 }
180 else
181# endif
182 if (*p == '\'')
183 {
184 if (powershell)
185 length +=2; // ' => ''
186 else
187 length += 3; // ' => '\''
188 }
189 if ((*p == '\n' && (csh_like || do_newline))
190 || (*p == '!' && (csh_like || do_special)))
191 {
192 ++length; // insert backslash
193 if (csh_like && do_special)
194 ++length; // insert backslash
195 }
196 if (do_special && find_cmdline_var(p, &l) >= 0)
197 {
198 ++length; // insert backslash
199 p += l - 1;
200 }
201 }
202
203 // Allocate memory for the result and fill it.
204 escaped_string = alloc(length);
205 if (escaped_string != NULL)
206 {
207 d = escaped_string;
208
209 // add opening quote
210# ifdef MSWIN
211 if (double_quotes)
212 *d++ = '"';
213 else
214# endif
215 *d++ = '\'';
216
217 for (p = string; *p != NUL; )
218 {
219# ifdef MSWIN
220 if (double_quotes)
221 {
222 if (*p == '"')
223 {
224 *d++ = '"';
225 *d++ = '"';
226 ++p;
227 continue;
228 }
229 }
230 else
231# endif
232 if (*p == '\'')
233 {
234 if (powershell)
235 {
236 *d++ = '\'';
237 *d++ = '\'';
238 }
239 else
240 {
241 *d++ = '\'';
242 *d++ = '\\';
243 *d++ = '\'';
244 *d++ = '\'';
245 }
246 ++p;
247 continue;
248 }
249 if ((*p == '\n' && (csh_like || do_newline))
250 || (*p == '!' && (csh_like || do_special)))
251 {
252 *d++ = '\\';
253 if (csh_like && do_special)
254 *d++ = '\\';
255 *d++ = *p++;
256 continue;
257 }
258 if (do_special && find_cmdline_var(p, &l) >= 0)
259 {
260 *d++ = '\\'; // insert backslash
261 while (--l >= 0) // copy the var
262 *d++ = *p++;
263 continue;
264 }
265
266 MB_COPY_CHAR(p, d);
267 }
268
269 // add terminating quote and finish with a NUL
270# ifdef MSWIN
271 if (double_quotes)
272 *d++ = '"';
273 else
274# endif
275 *d++ = '\'';
276 *d = NUL;
277 }
278
279 return escaped_string;
280}
281
282/*
283 * Like vim_strsave(), but make all characters uppercase.
284 * This uses ASCII lower-to-upper case translation, language independent.
285 */
286 char_u *
287vim_strsave_up(char_u *string)
288{
289 char_u *p1;
290
291 p1 = vim_strsave(string);
292 vim_strup(p1);
293 return p1;
294}
295
296/*
297 * Like vim_strnsave(), but make all characters uppercase.
298 * This uses ASCII lower-to-upper case translation, language independent.
299 */
300 char_u *
301vim_strnsave_up(char_u *string, size_t len)
302{
303 char_u *p1;
304
305 p1 = vim_strnsave(string, len);
306 vim_strup(p1);
307 return p1;
308}
309
310/*
311 * ASCII lower-to-upper case translation, language independent.
312 */
313 void
314vim_strup(
315 char_u *p)
316{
317 char_u *p2;
318 int c;
319
320 if (p != NULL)
321 {
322 p2 = p;
323 while ((c = *p2) != NUL)
324#ifdef EBCDIC
325 *p2++ = isalpha(c) ? toupper(c) : c;
326#else
327 *p2++ = (c < 'a' || c > 'z') ? c : (c - 0x20);
328#endif
329 }
330}
331
332#if defined(FEAT_EVAL) || defined(FEAT_SPELL) || defined(PROTO)
333/*
334 * Make string "s" all upper-case and return it in allocated memory.
335 * Handles multi-byte characters as well as possible.
336 * Returns NULL when out of memory.
337 */
338 static char_u *
339strup_save(char_u *orig)
340{
341 char_u *p;
342 char_u *res;
343
344 res = p = vim_strsave(orig);
345
346 if (res != NULL)
347 while (*p != NUL)
348 {
349 int l;
350
351 if (enc_utf8)
352 {
353 int c, uc;
354 int newl;
355 char_u *s;
356
357 c = utf_ptr2char(p);
358 l = utf_ptr2len(p);
359 if (c == 0)
360 {
361 // overlong sequence, use only the first byte
362 c = *p;
363 l = 1;
364 }
365 uc = utf_toupper(c);
366
367 // Reallocate string when byte count changes. This is rare,
368 // thus it's OK to do another malloc()/free().
369 newl = utf_char2len(uc);
370 if (newl != l)
371 {
372 s = alloc(STRLEN(res) + 1 + newl - l);
373 if (s == NULL)
374 {
375 vim_free(res);
376 return NULL;
377 }
378 mch_memmove(s, res, p - res);
379 STRCPY(s + (p - res) + newl, p + l);
380 p = s + (p - res);
381 vim_free(res);
382 res = s;
383 }
384
385 utf_char2bytes(uc, p);
386 p += newl;
387 }
388 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
389 p += l; // skip multi-byte character
390 else
391 {
392 *p = TOUPPER_LOC(*p); // note that toupper() can be a macro
393 p++;
394 }
395 }
396
397 return res;
398}
399
400/*
401 * Make string "s" all lower-case and return it in allocated memory.
402 * Handles multi-byte characters as well as possible.
403 * Returns NULL when out of memory.
404 */
405 char_u *
406strlow_save(char_u *orig)
407{
408 char_u *p;
409 char_u *res;
410
411 res = p = vim_strsave(orig);
412
413 if (res != NULL)
414 while (*p != NUL)
415 {
416 int l;
417
418 if (enc_utf8)
419 {
420 int c, lc;
421 int newl;
422 char_u *s;
423
424 c = utf_ptr2char(p);
425 l = utf_ptr2len(p);
426 if (c == 0)
427 {
428 // overlong sequence, use only the first byte
429 c = *p;
430 l = 1;
431 }
432 lc = utf_tolower(c);
433
434 // Reallocate string when byte count changes. This is rare,
435 // thus it's OK to do another malloc()/free().
436 newl = utf_char2len(lc);
437 if (newl != l)
438 {
439 s = alloc(STRLEN(res) + 1 + newl - l);
440 if (s == NULL)
441 {
442 vim_free(res);
443 return NULL;
444 }
445 mch_memmove(s, res, p - res);
446 STRCPY(s + (p - res) + newl, p + l);
447 p = s + (p - res);
448 vim_free(res);
449 res = s;
450 }
451
452 utf_char2bytes(lc, p);
453 p += newl;
454 }
455 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
456 p += l; // skip multi-byte character
457 else
458 {
459 *p = TOLOWER_LOC(*p); // note that tolower() can be a macro
460 p++;
461 }
462 }
463
464 return res;
465}
466#endif
467
468/*
469 * delete spaces at the end of a string
470 */
471 void
472del_trailing_spaces(char_u *ptr)
473{
474 char_u *q;
475
476 q = ptr + STRLEN(ptr);
477 while (--q > ptr && VIM_ISWHITE(q[0]) && q[-1] != '\\' && q[-1] != Ctrl_V)
478 *q = NUL;
479}
480
481/*
482 * Like strncpy(), but always terminate the result with one NUL.
483 * "to" must be "len + 1" long!
484 */
485 void
486vim_strncpy(char_u *to, char_u *from, size_t len)
487{
488 STRNCPY(to, from, len);
489 to[len] = NUL;
490}
491
492/*
493 * Like strcat(), but make sure the result fits in "tosize" bytes and is
494 * always NUL terminated. "from" and "to" may overlap.
495 */
496 void
497vim_strcat(char_u *to, char_u *from, size_t tosize)
498{
499 size_t tolen = STRLEN(to);
500 size_t fromlen = STRLEN(from);
501
502 if (tolen + fromlen + 1 > tosize)
503 {
504 mch_memmove(to + tolen, from, tosize - tolen - 1);
505 to[tosize - 1] = NUL;
506 }
507 else
508 mch_memmove(to + tolen, from, fromlen + 1);
509}
510
511#if (!defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)) || defined(PROTO)
512/*
513 * Compare two strings, ignoring case, using current locale.
514 * Doesn't work for multi-byte characters.
515 * return 0 for match, < 0 for smaller, > 0 for bigger
516 */
517 int
518vim_stricmp(char *s1, char *s2)
519{
520 int i;
521
522 for (;;)
523 {
524 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
525 if (i != 0)
526 return i; // this character different
527 if (*s1 == NUL)
528 break; // strings match until NUL
529 ++s1;
530 ++s2;
531 }
532 return 0; // strings match
533}
534#endif
535
536#if (!defined(HAVE_STRNCASECMP) && !defined(HAVE_STRNICMP)) || defined(PROTO)
537/*
538 * Compare two strings, for length "len", ignoring case, using current locale.
539 * Doesn't work for multi-byte characters.
540 * return 0 for match, < 0 for smaller, > 0 for bigger
541 */
542 int
543vim_strnicmp(char *s1, char *s2, size_t len)
544{
545 int i;
546
547 while (len > 0)
548 {
549 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
550 if (i != 0)
551 return i; // this character different
552 if (*s1 == NUL)
553 break; // strings match until NUL
554 ++s1;
555 ++s2;
556 --len;
557 }
558 return 0; // strings match
559}
560#endif
561
562/*
563 * Search for first occurrence of "c" in "string".
564 * Version of strchr() that handles unsigned char strings with characters from
565 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
566 * end of the string.
567 */
568 char_u *
569vim_strchr(char_u *string, int c)
570{
571 char_u *p;
572 int b;
573
574 p = string;
575 if (enc_utf8 && c >= 0x80)
576 {
577 while (*p != NUL)
578 {
579 int l = utfc_ptr2len(p);
580
581 // Avoid matching an illegal byte here.
582 if (utf_ptr2char(p) == c && l > 1)
583 return p;
584 p += l;
585 }
586 return NULL;
587 }
588 if (enc_dbcs != 0 && c > 255)
589 {
590 int n2 = c & 0xff;
591
592 c = ((unsigned)c >> 8) & 0xff;
593 while ((b = *p) != NUL)
594 {
595 if (b == c && p[1] == n2)
596 return p;
597 p += (*mb_ptr2len)(p);
598 }
599 return NULL;
600 }
601 if (has_mbyte)
602 {
603 while ((b = *p) != NUL)
604 {
605 if (b == c)
606 return p;
607 p += (*mb_ptr2len)(p);
608 }
609 return NULL;
610 }
611 while ((b = *p) != NUL)
612 {
613 if (b == c)
614 return p;
615 ++p;
616 }
617 return NULL;
618}
619
620/*
621 * Version of strchr() that only works for bytes and handles unsigned char
622 * strings with characters above 128 correctly. It also doesn't return a
623 * pointer to the NUL at the end of the string.
624 */
625 char_u *
626vim_strbyte(char_u *string, int c)
627{
628 char_u *p = string;
629
630 while (*p != NUL)
631 {
632 if (*p == c)
633 return p;
634 ++p;
635 }
636 return NULL;
637}
638
639/*
640 * Search for last occurrence of "c" in "string".
641 * Version of strrchr() that handles unsigned char strings with characters from
642 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
643 * end of the string.
644 * Return NULL if not found.
645 * Does not handle multi-byte char for "c"!
646 */
647 char_u *
648vim_strrchr(char_u *string, int c)
649{
650 char_u *retval = NULL;
651 char_u *p = string;
652
653 while (*p)
654 {
655 if (*p == c)
656 retval = p;
657 MB_PTR_ADV(p);
658 }
659 return retval;
660}
661
662/*
663 * Vim's version of strpbrk(), in case it's missing.
664 * Don't generate a prototype for this, causes problems when it's not used.
665 */
666#ifndef PROTO
667# ifndef HAVE_STRPBRK
668# ifdef vim_strpbrk
669# undef vim_strpbrk
670# endif
671 char_u *
672vim_strpbrk(char_u *s, char_u *charset)
673{
674 while (*s)
675 {
676 if (vim_strchr(charset, *s) != NULL)
677 return s;
678 MB_PTR_ADV(s);
679 }
680 return NULL;
681}
682# endif
683#endif
684
685/*
686 * Sort an array of strings.
687 */
688static int sort_compare(const void *s1, const void *s2);
689
690 static int
691sort_compare(const void *s1, const void *s2)
692{
693 return STRCMP(*(char **)s1, *(char **)s2);
694}
695
696 void
697sort_strings(
698 char_u **files,
699 int count)
700{
701 qsort((void *)files, (size_t)count, sizeof(char_u *), sort_compare);
702}
703
704#if defined(FEAT_QUICKFIX) || defined(FEAT_SPELL) || defined(PROTO)
705/*
706 * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
707 * When "s" is NULL FALSE is returned.
708 */
709 int
710has_non_ascii(char_u *s)
711{
712 char_u *p;
713
714 if (s != NULL)
715 for (p = s; *p != NUL; ++p)
716 if (*p >= 128)
717 return TRUE;
718 return FALSE;
719}
720#endif
721
722/*
723 * Concatenate two strings and return the result in allocated memory.
724 * Returns NULL when out of memory.
725 */
726 char_u *
727concat_str(char_u *str1, char_u *str2)
728{
729 char_u *dest;
730 size_t l = str1 == NULL ? 0 : STRLEN(str1);
731
732 dest = alloc(l + (str2 == NULL ? 0 : STRLEN(str2)) + 1L);
733 if (dest != NULL)
734 {
735 if (str1 == NULL)
736 *dest = NUL;
737 else
738 STRCPY(dest, str1);
739 if (str2 != NULL)
740 STRCPY(dest + l, str2);
741 }
742 return dest;
743}
744
745#if defined(FEAT_EVAL) || defined(PROTO)
746
747/*
748 * Return string "str" in ' quotes, doubling ' characters.
749 * If "str" is NULL an empty string is assumed.
750 * If "function" is TRUE make it function('string').
751 */
752 char_u *
753string_quote(char_u *str, int function)
754{
755 unsigned len;
756 char_u *p, *r, *s;
757
758 len = (function ? 13 : 3);
759 if (str != NULL)
760 {
761 len += (unsigned)STRLEN(str);
762 for (p = str; *p != NUL; MB_PTR_ADV(p))
763 if (*p == '\'')
764 ++len;
765 }
766 s = r = alloc(len);
767 if (r != NULL)
768 {
769 if (function)
770 {
771 STRCPY(r, "function('");
772 r += 10;
773 }
774 else
775 *r++ = '\'';
776 if (str != NULL)
777 for (p = str; *p != NUL; )
778 {
779 if (*p == '\'')
780 *r++ = '\'';
781 MB_COPY_CHAR(p, r);
782 }
783 *r++ = '\'';
784 if (function)
785 *r++ = ')';
786 *r++ = NUL;
787 }
788 return s;
789}
790
791 static void
792byteidx(typval_T *argvars, typval_T *rettv, int comp UNUSED)
793{
794 char_u *t;
795 char_u *str;
796 varnumber_T idx;
797
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +0200798 rettv->vval.v_number = -1;
799
800 if (in_vim9script()
801 && (check_for_string_arg(argvars, 0) == FAIL
802 || check_for_number_arg(argvars, 1) == FAIL))
803 return;
804
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200805 str = tv_get_string_chk(&argvars[0]);
806 idx = tv_get_number_chk(&argvars[1], NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200807 if (str == NULL || idx < 0)
808 return;
809
810 t = str;
811 for ( ; idx > 0; idx--)
812 {
813 if (*t == NUL) // EOL reached
814 return;
815 if (enc_utf8 && comp)
816 t += utf_ptr2len(t);
817 else
818 t += (*mb_ptr2len)(t);
819 }
820 rettv->vval.v_number = (varnumber_T)(t - str);
821}
822
823/*
824 * "byteidx()" function
825 */
826 void
827f_byteidx(typval_T *argvars, typval_T *rettv)
828{
829 byteidx(argvars, rettv, FALSE);
830}
831
832/*
833 * "byteidxcomp()" function
834 */
835 void
836f_byteidxcomp(typval_T *argvars, typval_T *rettv)
837{
838 byteidx(argvars, rettv, TRUE);
839}
840
841/*
842 * "charidx()" function
843 */
844 void
845f_charidx(typval_T *argvars, typval_T *rettv)
846{
847 char_u *str;
848 varnumber_T idx;
849 varnumber_T countcc = FALSE;
850 char_u *p;
851 int len;
852 int (*ptr2len)(char_u *);
853
854 rettv->vval.v_number = -1;
855
856 if (argvars[0].v_type != VAR_STRING || argvars[1].v_type != VAR_NUMBER
857 || (argvars[2].v_type != VAR_UNKNOWN
858 && argvars[2].v_type != VAR_NUMBER
859 && argvars[2].v_type != VAR_BOOL))
860 {
861 emsg(_(e_invarg));
862 return;
863 }
864
865 str = tv_get_string_chk(&argvars[0]);
866 idx = tv_get_number_chk(&argvars[1], NULL);
867 if (str == NULL || idx < 0)
868 return;
869
870 if (argvars[2].v_type != VAR_UNKNOWN)
871 countcc = tv_get_bool(&argvars[2]);
872 if (countcc < 0 || countcc > 1)
873 {
874 semsg(_(e_using_number_as_bool_nr), countcc);
875 return;
876 }
877
878 if (enc_utf8 && countcc)
879 ptr2len = utf_ptr2len;
880 else
881 ptr2len = mb_ptr2len;
882
883 for (p = str, len = 0; p <= str + idx; len++)
884 {
885 if (*p == NUL)
886 return;
887 p += ptr2len(p);
888 }
889
890 rettv->vval.v_number = len > 0 ? len - 1 : 0;
891}
892
893/*
894 * "str2list()" function
895 */
896 void
897f_str2list(typval_T *argvars, typval_T *rettv)
898{
899 char_u *p;
900 int utf8 = FALSE;
901
902 if (rettv_list_alloc(rettv) == FAIL)
903 return;
904
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +0200905 if (in_vim9script()
906 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +0200907 || check_for_opt_bool_arg(argvars, 1) == FAIL))
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +0200908 return;
909
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200910 if (argvars[1].v_type != VAR_UNKNOWN)
911 utf8 = (int)tv_get_bool_chk(&argvars[1], NULL);
912
913 p = tv_get_string(&argvars[0]);
914
915 if (has_mbyte || utf8)
916 {
917 int (*ptr2len)(char_u *);
918 int (*ptr2char)(char_u *);
919
920 if (utf8 || enc_utf8)
921 {
922 ptr2len = utf_ptr2len;
923 ptr2char = utf_ptr2char;
924 }
925 else
926 {
927 ptr2len = mb_ptr2len;
928 ptr2char = mb_ptr2char;
929 }
930
931 for ( ; *p != NUL; p += (*ptr2len)(p))
932 list_append_number(rettv->vval.v_list, (*ptr2char)(p));
933 }
934 else
935 for ( ; *p != NUL; ++p)
936 list_append_number(rettv->vval.v_list, *p);
937}
938
939/*
940 * "str2nr()" function
941 */
942 void
943f_str2nr(typval_T *argvars, typval_T *rettv)
944{
945 int base = 10;
946 char_u *p;
947 varnumber_T n;
948 int what = 0;
949 int isneg;
950
951 if (argvars[1].v_type != VAR_UNKNOWN)
952 {
953 base = (int)tv_get_number(&argvars[1]);
954 if (base != 2 && base != 8 && base != 10 && base != 16)
955 {
956 emsg(_(e_invarg));
957 return;
958 }
959 if (argvars[2].v_type != VAR_UNKNOWN && tv_get_bool(&argvars[2]))
960 what |= STR2NR_QUOTE;
961 }
962
963 p = skipwhite(tv_get_string_strict(&argvars[0]));
964 isneg = (*p == '-');
965 if (*p == '+' || *p == '-')
966 p = skipwhite(p + 1);
967 switch (base)
968 {
969 case 2: what |= STR2NR_BIN + STR2NR_FORCE; break;
970 case 8: what |= STR2NR_OCT + STR2NR_OOCT + STR2NR_FORCE; break;
971 case 16: what |= STR2NR_HEX + STR2NR_FORCE; break;
972 }
973 vim_str2nr(p, NULL, NULL, what, &n, NULL, 0, FALSE);
974 // Text after the number is silently ignored.
975 if (isneg)
976 rettv->vval.v_number = -n;
977 else
978 rettv->vval.v_number = n;
979
980}
981
982/*
983 * "strgetchar()" function
984 */
985 void
986f_strgetchar(typval_T *argvars, typval_T *rettv)
987{
988 char_u *str;
989 int len;
990 int error = FALSE;
991 int charidx;
992 int byteidx = 0;
993
994 rettv->vval.v_number = -1;
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +0200995
996 if (in_vim9script()
997 && (check_for_string_arg(argvars, 0) == FAIL
998 || check_for_number_arg(argvars, 1) == FAIL))
999 return;
1000
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001001 str = tv_get_string_chk(&argvars[0]);
1002 if (str == NULL)
1003 return;
1004 len = (int)STRLEN(str);
1005 charidx = (int)tv_get_number_chk(&argvars[1], &error);
1006 if (error)
1007 return;
1008
1009 while (charidx >= 0 && byteidx < len)
1010 {
1011 if (charidx == 0)
1012 {
1013 rettv->vval.v_number = mb_ptr2char(str + byteidx);
1014 break;
1015 }
1016 --charidx;
1017 byteidx += MB_CPTR2LEN(str + byteidx);
1018 }
1019}
1020
1021/*
1022 * "stridx()" function
1023 */
1024 void
1025f_stridx(typval_T *argvars, typval_T *rettv)
1026{
1027 char_u buf[NUMBUFLEN];
1028 char_u *needle;
1029 char_u *haystack;
1030 char_u *save_haystack;
1031 char_u *pos;
1032 int start_idx;
1033
1034 needle = tv_get_string_chk(&argvars[1]);
1035 save_haystack = haystack = tv_get_string_buf_chk(&argvars[0], buf);
1036 rettv->vval.v_number = -1;
1037 if (needle == NULL || haystack == NULL)
1038 return; // type error; errmsg already given
1039
1040 if (argvars[2].v_type != VAR_UNKNOWN)
1041 {
1042 int error = FALSE;
1043
1044 start_idx = (int)tv_get_number_chk(&argvars[2], &error);
1045 if (error || start_idx >= (int)STRLEN(haystack))
1046 return;
1047 if (start_idx >= 0)
1048 haystack += start_idx;
1049 }
1050
1051 pos = (char_u *)strstr((char *)haystack, (char *)needle);
1052 if (pos != NULL)
1053 rettv->vval.v_number = (varnumber_T)(pos - save_haystack);
1054}
1055
1056/*
1057 * "string()" function
1058 */
1059 void
1060f_string(typval_T *argvars, typval_T *rettv)
1061{
1062 char_u *tofree;
1063 char_u numbuf[NUMBUFLEN];
1064
1065 rettv->v_type = VAR_STRING;
1066 rettv->vval.v_string = tv2string(&argvars[0], &tofree, numbuf,
1067 get_copyID());
1068 // Make a copy if we have a value but it's not in allocated memory.
1069 if (rettv->vval.v_string != NULL && tofree == NULL)
1070 rettv->vval.v_string = vim_strsave(rettv->vval.v_string);
1071}
1072
1073/*
1074 * "strlen()" function
1075 */
1076 void
1077f_strlen(typval_T *argvars, typval_T *rettv)
1078{
1079 rettv->vval.v_number = (varnumber_T)(STRLEN(
1080 tv_get_string(&argvars[0])));
1081}
1082
1083 static void
1084strchar_common(typval_T *argvars, typval_T *rettv, int skipcc)
1085{
1086 char_u *s = tv_get_string(&argvars[0]);
1087 varnumber_T len = 0;
1088 int (*func_mb_ptr2char_adv)(char_u **pp);
1089
1090 func_mb_ptr2char_adv = skipcc ? mb_ptr2char_adv : mb_cptr2char_adv;
1091 while (*s != NUL)
1092 {
1093 func_mb_ptr2char_adv(&s);
1094 ++len;
1095 }
1096 rettv->vval.v_number = len;
1097}
1098
1099/*
1100 * "strcharlen()" function
1101 */
1102 void
1103f_strcharlen(typval_T *argvars, typval_T *rettv)
1104{
1105 strchar_common(argvars, rettv, TRUE);
1106}
1107
1108/*
1109 * "strchars()" function
1110 */
1111 void
1112f_strchars(typval_T *argvars, typval_T *rettv)
1113{
1114 varnumber_T skipcc = FALSE;
1115
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001116 if (in_vim9script()
1117 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001118 || check_for_opt_bool_arg(argvars, 1) == FAIL))
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001119 return;
1120
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001121 if (argvars[1].v_type != VAR_UNKNOWN)
1122 skipcc = tv_get_bool(&argvars[1]);
1123 if (skipcc < 0 || skipcc > 1)
1124 semsg(_(e_using_number_as_bool_nr), skipcc);
1125 else
1126 strchar_common(argvars, rettv, skipcc);
1127}
1128
1129/*
1130 * "strdisplaywidth()" function
1131 */
1132 void
1133f_strdisplaywidth(typval_T *argvars, typval_T *rettv)
1134{
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001135 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001136 int col = 0;
1137
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001138 rettv->vval.v_number = -1;
1139
1140 if (in_vim9script()
1141 && (check_for_string_arg(argvars, 0) == FAIL
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001142 || check_for_opt_number_arg(argvars, 1) == FAIL))
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001143 return;
1144
1145 s = tv_get_string(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001146 if (argvars[1].v_type != VAR_UNKNOWN)
1147 col = (int)tv_get_number(&argvars[1]);
1148
1149 rettv->vval.v_number = (varnumber_T)(linetabsize_col(col, s) - col);
1150}
1151
1152/*
1153 * "strwidth()" function
1154 */
1155 void
1156f_strwidth(typval_T *argvars, typval_T *rettv)
1157{
1158 char_u *s = tv_get_string_strict(&argvars[0]);
1159
1160 rettv->vval.v_number = (varnumber_T)(mb_string2cells(s, -1));
1161}
1162
1163/*
1164 * "strcharpart()" function
1165 */
1166 void
1167f_strcharpart(typval_T *argvars, typval_T *rettv)
1168{
1169 char_u *p;
1170 int nchar;
1171 int nbyte = 0;
1172 int charlen;
1173 int skipcc = FALSE;
1174 int len = 0;
1175 int slen;
1176 int error = FALSE;
1177
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001178 if (in_vim9script()
1179 && (check_for_string_arg(argvars, 0) == FAIL
1180 || check_for_number_arg(argvars, 1) == FAIL
1181 || check_for_opt_number_arg(argvars, 2) == FAIL
1182 || (argvars[2].v_type != VAR_UNKNOWN
1183 && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1184 return;
1185
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001186 p = tv_get_string(&argvars[0]);
1187 slen = (int)STRLEN(p);
1188
1189 nchar = (int)tv_get_number_chk(&argvars[1], &error);
1190 if (!error)
1191 {
1192 if (argvars[2].v_type != VAR_UNKNOWN
1193 && argvars[3].v_type != VAR_UNKNOWN)
1194 {
1195 skipcc = tv_get_bool(&argvars[3]);
1196 if (skipcc < 0 || skipcc > 1)
1197 {
1198 semsg(_(e_using_number_as_bool_nr), skipcc);
1199 return;
1200 }
1201 }
1202
1203 if (nchar > 0)
1204 while (nchar > 0 && nbyte < slen)
1205 {
1206 if (skipcc)
1207 nbyte += mb_ptr2len(p + nbyte);
1208 else
1209 nbyte += MB_CPTR2LEN(p + nbyte);
1210 --nchar;
1211 }
1212 else
1213 nbyte = nchar;
1214 if (argvars[2].v_type != VAR_UNKNOWN)
1215 {
1216 charlen = (int)tv_get_number(&argvars[2]);
1217 while (charlen > 0 && nbyte + len < slen)
1218 {
1219 int off = nbyte + len;
1220
1221 if (off < 0)
1222 len += 1;
1223 else
1224 {
1225 if (skipcc)
1226 len += mb_ptr2len(p + off);
1227 else
1228 len += MB_CPTR2LEN(p + off);
1229 }
1230 --charlen;
1231 }
1232 }
1233 else
1234 len = slen - nbyte; // default: all bytes that are available.
1235 }
1236
1237 /*
1238 * Only return the overlap between the specified part and the actual
1239 * string.
1240 */
1241 if (nbyte < 0)
1242 {
1243 len += nbyte;
1244 nbyte = 0;
1245 }
1246 else if (nbyte > slen)
1247 nbyte = slen;
1248 if (len < 0)
1249 len = 0;
1250 else if (nbyte + len > slen)
1251 len = slen - nbyte;
1252
1253 rettv->v_type = VAR_STRING;
1254 rettv->vval.v_string = vim_strnsave(p + nbyte, len);
1255}
1256
1257/*
1258 * "strpart()" function
1259 */
1260 void
1261f_strpart(typval_T *argvars, typval_T *rettv)
1262{
1263 char_u *p;
1264 int n;
1265 int len;
1266 int slen;
1267 int error = FALSE;
1268
Yegappan Lakshmanan83494b42021-07-20 17:51:51 +02001269 if (in_vim9script()
1270 && (check_for_string_arg(argvars, 0) == FAIL
1271 || check_for_number_arg(argvars, 1) == FAIL
1272 || check_for_opt_number_arg(argvars, 2) == FAIL
1273 || (argvars[2].v_type != VAR_UNKNOWN
1274 && check_for_opt_bool_arg(argvars, 3) == FAIL)))
1275 return;
1276
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001277 p = tv_get_string(&argvars[0]);
1278 slen = (int)STRLEN(p);
1279
1280 n = (int)tv_get_number_chk(&argvars[1], &error);
1281 if (error)
1282 len = 0;
1283 else if (argvars[2].v_type != VAR_UNKNOWN)
1284 len = (int)tv_get_number(&argvars[2]);
1285 else
1286 len = slen - n; // default len: all bytes that are available.
1287
1288 // Only return the overlap between the specified part and the actual
1289 // string.
1290 if (n < 0)
1291 {
1292 len += n;
1293 n = 0;
1294 }
1295 else if (n > slen)
1296 n = slen;
1297 if (len < 0)
1298 len = 0;
1299 else if (n + len > slen)
1300 len = slen - n;
1301
1302 if (argvars[2].v_type != VAR_UNKNOWN && argvars[3].v_type != VAR_UNKNOWN)
1303 {
1304 int off;
1305
1306 // length in characters
1307 for (off = n; off < slen && len > 0; --len)
1308 off += mb_ptr2len(p + off);
1309 len = off - n;
1310 }
1311
1312 rettv->v_type = VAR_STRING;
1313 rettv->vval.v_string = vim_strnsave(p + n, len);
1314}
1315
1316/*
1317 * "strridx()" function
1318 */
1319 void
1320f_strridx(typval_T *argvars, typval_T *rettv)
1321{
1322 char_u buf[NUMBUFLEN];
1323 char_u *needle;
1324 char_u *haystack;
1325 char_u *rest;
1326 char_u *lastmatch = NULL;
1327 int haystack_len, end_idx;
1328
1329 needle = tv_get_string_chk(&argvars[1]);
1330 haystack = tv_get_string_buf_chk(&argvars[0], buf);
1331
1332 rettv->vval.v_number = -1;
1333 if (needle == NULL || haystack == NULL)
1334 return; // type error; errmsg already given
1335
1336 haystack_len = (int)STRLEN(haystack);
1337 if (argvars[2].v_type != VAR_UNKNOWN)
1338 {
1339 // Third argument: upper limit for index
1340 end_idx = (int)tv_get_number_chk(&argvars[2], NULL);
1341 if (end_idx < 0)
1342 return; // can never find a match
1343 }
1344 else
1345 end_idx = haystack_len;
1346
1347 if (*needle == NUL)
1348 {
1349 // Empty string matches past the end.
1350 lastmatch = haystack + end_idx;
1351 }
1352 else
1353 {
1354 for (rest = haystack; *rest != '\0'; ++rest)
1355 {
1356 rest = (char_u *)strstr((char *)rest, (char *)needle);
1357 if (rest == NULL || rest > haystack + end_idx)
1358 break;
1359 lastmatch = rest;
1360 }
1361 }
1362
1363 if (lastmatch == NULL)
1364 rettv->vval.v_number = -1;
1365 else
1366 rettv->vval.v_number = (varnumber_T)(lastmatch - haystack);
1367}
1368
1369/*
1370 * "strtrans()" function
1371 */
1372 void
1373f_strtrans(typval_T *argvars, typval_T *rettv)
1374{
1375 rettv->v_type = VAR_STRING;
1376 rettv->vval.v_string = transstr(tv_get_string(&argvars[0]));
1377}
1378
1379/*
1380 * "tolower(string)" function
1381 */
1382 void
1383f_tolower(typval_T *argvars, typval_T *rettv)
1384{
1385 rettv->v_type = VAR_STRING;
1386 rettv->vval.v_string = strlow_save(tv_get_string(&argvars[0]));
1387}
1388
1389/*
1390 * "toupper(string)" function
1391 */
1392 void
1393f_toupper(typval_T *argvars, typval_T *rettv)
1394{
1395 rettv->v_type = VAR_STRING;
1396 rettv->vval.v_string = strup_save(tv_get_string(&argvars[0]));
1397}
1398
1399/*
1400 * "tr(string, fromstr, tostr)" function
1401 */
1402 void
1403f_tr(typval_T *argvars, typval_T *rettv)
1404{
1405 char_u *in_str;
1406 char_u *fromstr;
1407 char_u *tostr;
1408 char_u *p;
1409 int inlen;
1410 int fromlen;
1411 int tolen;
1412 int idx;
1413 char_u *cpstr;
1414 int cplen;
1415 int first = TRUE;
1416 char_u buf[NUMBUFLEN];
1417 char_u buf2[NUMBUFLEN];
1418 garray_T ga;
1419
1420 in_str = tv_get_string(&argvars[0]);
1421 fromstr = tv_get_string_buf_chk(&argvars[1], buf);
1422 tostr = tv_get_string_buf_chk(&argvars[2], buf2);
1423
1424 // Default return value: empty string.
1425 rettv->v_type = VAR_STRING;
1426 rettv->vval.v_string = NULL;
1427 if (fromstr == NULL || tostr == NULL)
1428 return; // type error; errmsg already given
1429 ga_init2(&ga, (int)sizeof(char), 80);
1430
1431 if (!has_mbyte)
1432 // not multi-byte: fromstr and tostr must be the same length
1433 if (STRLEN(fromstr) != STRLEN(tostr))
1434 {
1435error:
1436 semsg(_(e_invarg2), fromstr);
1437 ga_clear(&ga);
1438 return;
1439 }
1440
1441 // fromstr and tostr have to contain the same number of chars
1442 while (*in_str != NUL)
1443 {
1444 if (has_mbyte)
1445 {
1446 inlen = (*mb_ptr2len)(in_str);
1447 cpstr = in_str;
1448 cplen = inlen;
1449 idx = 0;
1450 for (p = fromstr; *p != NUL; p += fromlen)
1451 {
1452 fromlen = (*mb_ptr2len)(p);
1453 if (fromlen == inlen && STRNCMP(in_str, p, inlen) == 0)
1454 {
1455 for (p = tostr; *p != NUL; p += tolen)
1456 {
1457 tolen = (*mb_ptr2len)(p);
1458 if (idx-- == 0)
1459 {
1460 cplen = tolen;
1461 cpstr = p;
1462 break;
1463 }
1464 }
1465 if (*p == NUL) // tostr is shorter than fromstr
1466 goto error;
1467 break;
1468 }
1469 ++idx;
1470 }
1471
1472 if (first && cpstr == in_str)
1473 {
1474 // Check that fromstr and tostr have the same number of
1475 // (multi-byte) characters. Done only once when a character
1476 // of in_str doesn't appear in fromstr.
1477 first = FALSE;
1478 for (p = tostr; *p != NUL; p += tolen)
1479 {
1480 tolen = (*mb_ptr2len)(p);
1481 --idx;
1482 }
1483 if (idx != 0)
1484 goto error;
1485 }
1486
1487 (void)ga_grow(&ga, cplen);
1488 mch_memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen);
1489 ga.ga_len += cplen;
1490
1491 in_str += inlen;
1492 }
1493 else
1494 {
1495 // When not using multi-byte chars we can do it faster.
1496 p = vim_strchr(fromstr, *in_str);
1497 if (p != NULL)
1498 ga_append(&ga, tostr[p - fromstr]);
1499 else
1500 ga_append(&ga, *in_str);
1501 ++in_str;
1502 }
1503 }
1504
1505 // add a terminating NUL
1506 (void)ga_grow(&ga, 1);
1507 ga_append(&ga, NUL);
1508
1509 rettv->vval.v_string = ga.ga_data;
1510}
1511
1512/*
1513 * "trim({expr})" function
1514 */
1515 void
1516f_trim(typval_T *argvars, typval_T *rettv)
1517{
1518 char_u buf1[NUMBUFLEN];
1519 char_u buf2[NUMBUFLEN];
1520 char_u *head = tv_get_string_buf_chk(&argvars[0], buf1);
1521 char_u *mask = NULL;
1522 char_u *tail;
1523 char_u *prev;
1524 char_u *p;
1525 int c1;
1526 int dir = 0;
1527
1528 rettv->v_type = VAR_STRING;
1529 rettv->vval.v_string = NULL;
1530 if (head == NULL)
1531 return;
1532
1533 if (argvars[1].v_type != VAR_UNKNOWN && argvars[1].v_type != VAR_STRING)
1534 {
1535 semsg(_(e_invarg2), tv_get_string(&argvars[1]));
1536 return;
1537 }
1538
1539 if (argvars[1].v_type == VAR_STRING)
1540 {
1541 mask = tv_get_string_buf_chk(&argvars[1], buf2);
1542
1543 if (argvars[2].v_type != VAR_UNKNOWN)
1544 {
1545 int error = 0;
1546
1547 // leading or trailing characters to trim
1548 dir = (int)tv_get_number_chk(&argvars[2], &error);
1549 if (error)
1550 return;
1551 if (dir < 0 || dir > 2)
1552 {
1553 semsg(_(e_invarg2), tv_get_string(&argvars[2]));
1554 return;
1555 }
1556 }
1557 }
1558
1559 if (dir == 0 || dir == 1)
1560 {
1561 // Trim leading characters
1562 while (*head != NUL)
1563 {
1564 c1 = PTR2CHAR(head);
1565 if (mask == NULL)
1566 {
1567 if (c1 > ' ' && c1 != 0xa0)
1568 break;
1569 }
1570 else
1571 {
1572 for (p = mask; *p != NUL; MB_PTR_ADV(p))
1573 if (c1 == PTR2CHAR(p))
1574 break;
1575 if (*p == NUL)
1576 break;
1577 }
1578 MB_PTR_ADV(head);
1579 }
1580 }
1581
1582 tail = head + STRLEN(head);
1583 if (dir == 0 || dir == 2)
1584 {
1585 // Trim trailing characters
1586 for (; tail > head; tail = prev)
1587 {
1588 prev = tail;
1589 MB_PTR_BACK(head, prev);
1590 c1 = PTR2CHAR(prev);
1591 if (mask == NULL)
1592 {
1593 if (c1 > ' ' && c1 != 0xa0)
1594 break;
1595 }
1596 else
1597 {
1598 for (p = mask; *p != NUL; MB_PTR_ADV(p))
1599 if (c1 == PTR2CHAR(p))
1600 break;
1601 if (*p == NUL)
1602 break;
1603 }
1604 }
1605 }
1606 rettv->vval.v_string = vim_strnsave(head, tail - head);
1607}
1608
1609#endif