blob: a87922d4699df9b037f9af78095e74b4c3cca794 [file] [log] [blame]
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001/* vi:set ts=8 sts=4 sw=4 noet:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * strings.c: string manipulation functions
12 */
13
14#include "vim.h"
15
16/*
17 * Copy "string" into newly allocated memory.
18 */
19 char_u *
20vim_strsave(char_u *string)
21{
22 char_u *p;
23 size_t len;
24
25 len = STRLEN(string) + 1;
26 p = alloc(len);
27 if (p != NULL)
28 mch_memmove(p, string, len);
29 return p;
30}
31
32/*
33 * Copy up to "len" bytes of "string" into newly allocated memory and
34 * terminate with a NUL.
35 * The allocated memory always has size "len + 1", also when "string" is
36 * shorter.
37 */
38 char_u *
39vim_strnsave(char_u *string, size_t len)
40{
41 char_u *p;
42
43 p = alloc(len + 1);
44 if (p != NULL)
45 {
46 STRNCPY(p, string, len);
47 p[len] = NUL;
48 }
49 return p;
50}
51
52/*
53 * Same as vim_strsave(), but any characters found in esc_chars are preceded
54 * by a backslash.
55 */
56 char_u *
57vim_strsave_escaped(char_u *string, char_u *esc_chars)
58{
59 return vim_strsave_escaped_ext(string, esc_chars, '\\', FALSE);
60}
61
62/*
63 * Same as vim_strsave_escaped(), but when "bsl" is TRUE also escape
64 * characters where rem_backslash() would remove the backslash.
65 * Escape the characters with "cc".
66 */
67 char_u *
68vim_strsave_escaped_ext(
69 char_u *string,
70 char_u *esc_chars,
71 int cc,
72 int bsl)
73{
74 char_u *p;
75 char_u *p2;
76 char_u *escaped_string;
77 unsigned length;
78 int l;
79
80 /*
81 * First count the number of backslashes required.
82 * Then allocate the memory and insert them.
83 */
84 length = 1; // count the trailing NUL
85 for (p = string; *p; p++)
86 {
87 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
88 {
89 length += l; // count a multibyte char
90 p += l - 1;
91 continue;
92 }
93 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
94 ++length; // count a backslash
95 ++length; // count an ordinary char
96 }
97 escaped_string = alloc(length);
98 if (escaped_string != NULL)
99 {
100 p2 = escaped_string;
101 for (p = string; *p; p++)
102 {
103 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
104 {
105 mch_memmove(p2, p, (size_t)l);
106 p2 += l;
107 p += l - 1; // skip multibyte char
108 continue;
109 }
110 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
111 *p2++ = cc;
112 *p2++ = *p;
113 }
114 *p2 = NUL;
115 }
116 return escaped_string;
117}
118
119/*
120 * Return TRUE when 'shell' has "csh" in the tail.
121 */
122 int
123csh_like_shell(void)
124{
125 return (strstr((char *)gettail(p_sh), "csh") != NULL);
126}
127
128/*
129 * Escape "string" for use as a shell argument with system().
130 * This uses single quotes, except when we know we need to use double quotes
131 * (MS-DOS and MS-Windows not using PowerShell and without 'shellslash' set).
132 * PowerShell also uses a novel escaping for enclosed single quotes - double
133 * them up.
134 * Escape a newline, depending on the 'shell' option.
135 * When "do_special" is TRUE also replace "!", "%", "#" and things starting
136 * with "<" like "<cfile>".
137 * When "do_newline" is FALSE do not escape newline unless it is csh shell.
138 * Returns the result in allocated memory, NULL if we have run out.
139 */
140 char_u *
141vim_strsave_shellescape(char_u *string, int do_special, int do_newline)
142{
143 unsigned length;
144 char_u *p;
145 char_u *d;
146 char_u *escaped_string;
147 int l;
148 int csh_like;
149 char_u *shname;
150 int powershell;
151# ifdef MSWIN
152 int double_quotes;
153# endif
154
155 // Only csh and similar shells expand '!' within single quotes. For sh and
156 // the like we must not put a backslash before it, it will be taken
157 // literally. If do_special is set the '!' will be escaped twice.
158 // Csh also needs to have "\n" escaped twice when do_special is set.
159 csh_like = csh_like_shell();
160
161 // PowerShell uses it's own version for quoting single quotes
162 shname = gettail(p_sh);
163 powershell = strstr((char *)shname, "pwsh") != NULL;
164# ifdef MSWIN
165 powershell = powershell || strstr((char *)shname, "powershell") != NULL;
166 // PowerShell only accepts single quotes so override shellslash.
167 double_quotes = !powershell && !p_ssl;
168# endif
169
170 // First count the number of extra bytes required.
171 length = (unsigned)STRLEN(string) + 3; // two quotes and a trailing NUL
172 for (p = string; *p != NUL; MB_PTR_ADV(p))
173 {
174# ifdef MSWIN
175 if (double_quotes)
176 {
177 if (*p == '"')
178 ++length; // " -> ""
179 }
180 else
181# endif
182 if (*p == '\'')
183 {
184 if (powershell)
185 length +=2; // ' => ''
186 else
187 length += 3; // ' => '\''
188 }
189 if ((*p == '\n' && (csh_like || do_newline))
190 || (*p == '!' && (csh_like || do_special)))
191 {
192 ++length; // insert backslash
193 if (csh_like && do_special)
194 ++length; // insert backslash
195 }
196 if (do_special && find_cmdline_var(p, &l) >= 0)
197 {
198 ++length; // insert backslash
199 p += l - 1;
200 }
201 }
202
203 // Allocate memory for the result and fill it.
204 escaped_string = alloc(length);
205 if (escaped_string != NULL)
206 {
207 d = escaped_string;
208
209 // add opening quote
210# ifdef MSWIN
211 if (double_quotes)
212 *d++ = '"';
213 else
214# endif
215 *d++ = '\'';
216
217 for (p = string; *p != NUL; )
218 {
219# ifdef MSWIN
220 if (double_quotes)
221 {
222 if (*p == '"')
223 {
224 *d++ = '"';
225 *d++ = '"';
226 ++p;
227 continue;
228 }
229 }
230 else
231# endif
232 if (*p == '\'')
233 {
234 if (powershell)
235 {
236 *d++ = '\'';
237 *d++ = '\'';
238 }
239 else
240 {
241 *d++ = '\'';
242 *d++ = '\\';
243 *d++ = '\'';
244 *d++ = '\'';
245 }
246 ++p;
247 continue;
248 }
249 if ((*p == '\n' && (csh_like || do_newline))
250 || (*p == '!' && (csh_like || do_special)))
251 {
252 *d++ = '\\';
253 if (csh_like && do_special)
254 *d++ = '\\';
255 *d++ = *p++;
256 continue;
257 }
258 if (do_special && find_cmdline_var(p, &l) >= 0)
259 {
260 *d++ = '\\'; // insert backslash
261 while (--l >= 0) // copy the var
262 *d++ = *p++;
263 continue;
264 }
265
266 MB_COPY_CHAR(p, d);
267 }
268
269 // add terminating quote and finish with a NUL
270# ifdef MSWIN
271 if (double_quotes)
272 *d++ = '"';
273 else
274# endif
275 *d++ = '\'';
276 *d = NUL;
277 }
278
279 return escaped_string;
280}
281
282/*
283 * Like vim_strsave(), but make all characters uppercase.
284 * This uses ASCII lower-to-upper case translation, language independent.
285 */
286 char_u *
287vim_strsave_up(char_u *string)
288{
289 char_u *p1;
290
291 p1 = vim_strsave(string);
292 vim_strup(p1);
293 return p1;
294}
295
296/*
297 * Like vim_strnsave(), but make all characters uppercase.
298 * This uses ASCII lower-to-upper case translation, language independent.
299 */
300 char_u *
301vim_strnsave_up(char_u *string, size_t len)
302{
303 char_u *p1;
304
305 p1 = vim_strnsave(string, len);
306 vim_strup(p1);
307 return p1;
308}
309
310/*
311 * ASCII lower-to-upper case translation, language independent.
312 */
313 void
314vim_strup(
315 char_u *p)
316{
317 char_u *p2;
318 int c;
319
320 if (p != NULL)
321 {
322 p2 = p;
323 while ((c = *p2) != NUL)
324#ifdef EBCDIC
325 *p2++ = isalpha(c) ? toupper(c) : c;
326#else
327 *p2++ = (c < 'a' || c > 'z') ? c : (c - 0x20);
328#endif
329 }
330}
331
332#if defined(FEAT_EVAL) || defined(FEAT_SPELL) || defined(PROTO)
333/*
334 * Make string "s" all upper-case and return it in allocated memory.
335 * Handles multi-byte characters as well as possible.
336 * Returns NULL when out of memory.
337 */
338 static char_u *
339strup_save(char_u *orig)
340{
341 char_u *p;
342 char_u *res;
343
344 res = p = vim_strsave(orig);
345
346 if (res != NULL)
347 while (*p != NUL)
348 {
349 int l;
350
351 if (enc_utf8)
352 {
353 int c, uc;
354 int newl;
355 char_u *s;
356
357 c = utf_ptr2char(p);
358 l = utf_ptr2len(p);
359 if (c == 0)
360 {
361 // overlong sequence, use only the first byte
362 c = *p;
363 l = 1;
364 }
365 uc = utf_toupper(c);
366
367 // Reallocate string when byte count changes. This is rare,
368 // thus it's OK to do another malloc()/free().
369 newl = utf_char2len(uc);
370 if (newl != l)
371 {
372 s = alloc(STRLEN(res) + 1 + newl - l);
373 if (s == NULL)
374 {
375 vim_free(res);
376 return NULL;
377 }
378 mch_memmove(s, res, p - res);
379 STRCPY(s + (p - res) + newl, p + l);
380 p = s + (p - res);
381 vim_free(res);
382 res = s;
383 }
384
385 utf_char2bytes(uc, p);
386 p += newl;
387 }
388 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
389 p += l; // skip multi-byte character
390 else
391 {
392 *p = TOUPPER_LOC(*p); // note that toupper() can be a macro
393 p++;
394 }
395 }
396
397 return res;
398}
399
400/*
401 * Make string "s" all lower-case and return it in allocated memory.
402 * Handles multi-byte characters as well as possible.
403 * Returns NULL when out of memory.
404 */
405 char_u *
406strlow_save(char_u *orig)
407{
408 char_u *p;
409 char_u *res;
410
411 res = p = vim_strsave(orig);
412
413 if (res != NULL)
414 while (*p != NUL)
415 {
416 int l;
417
418 if (enc_utf8)
419 {
420 int c, lc;
421 int newl;
422 char_u *s;
423
424 c = utf_ptr2char(p);
425 l = utf_ptr2len(p);
426 if (c == 0)
427 {
428 // overlong sequence, use only the first byte
429 c = *p;
430 l = 1;
431 }
432 lc = utf_tolower(c);
433
434 // Reallocate string when byte count changes. This is rare,
435 // thus it's OK to do another malloc()/free().
436 newl = utf_char2len(lc);
437 if (newl != l)
438 {
439 s = alloc(STRLEN(res) + 1 + newl - l);
440 if (s == NULL)
441 {
442 vim_free(res);
443 return NULL;
444 }
445 mch_memmove(s, res, p - res);
446 STRCPY(s + (p - res) + newl, p + l);
447 p = s + (p - res);
448 vim_free(res);
449 res = s;
450 }
451
452 utf_char2bytes(lc, p);
453 p += newl;
454 }
455 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
456 p += l; // skip multi-byte character
457 else
458 {
459 *p = TOLOWER_LOC(*p); // note that tolower() can be a macro
460 p++;
461 }
462 }
463
464 return res;
465}
466#endif
467
468/*
469 * delete spaces at the end of a string
470 */
471 void
472del_trailing_spaces(char_u *ptr)
473{
474 char_u *q;
475
476 q = ptr + STRLEN(ptr);
477 while (--q > ptr && VIM_ISWHITE(q[0]) && q[-1] != '\\' && q[-1] != Ctrl_V)
478 *q = NUL;
479}
480
481/*
482 * Like strncpy(), but always terminate the result with one NUL.
483 * "to" must be "len + 1" long!
484 */
485 void
486vim_strncpy(char_u *to, char_u *from, size_t len)
487{
488 STRNCPY(to, from, len);
489 to[len] = NUL;
490}
491
492/*
493 * Like strcat(), but make sure the result fits in "tosize" bytes and is
494 * always NUL terminated. "from" and "to" may overlap.
495 */
496 void
497vim_strcat(char_u *to, char_u *from, size_t tosize)
498{
499 size_t tolen = STRLEN(to);
500 size_t fromlen = STRLEN(from);
501
502 if (tolen + fromlen + 1 > tosize)
503 {
504 mch_memmove(to + tolen, from, tosize - tolen - 1);
505 to[tosize - 1] = NUL;
506 }
507 else
508 mch_memmove(to + tolen, from, fromlen + 1);
509}
510
511#if (!defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)) || defined(PROTO)
512/*
513 * Compare two strings, ignoring case, using current locale.
514 * Doesn't work for multi-byte characters.
515 * return 0 for match, < 0 for smaller, > 0 for bigger
516 */
517 int
518vim_stricmp(char *s1, char *s2)
519{
520 int i;
521
522 for (;;)
523 {
524 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
525 if (i != 0)
526 return i; // this character different
527 if (*s1 == NUL)
528 break; // strings match until NUL
529 ++s1;
530 ++s2;
531 }
532 return 0; // strings match
533}
534#endif
535
536#if (!defined(HAVE_STRNCASECMP) && !defined(HAVE_STRNICMP)) || defined(PROTO)
537/*
538 * Compare two strings, for length "len", ignoring case, using current locale.
539 * Doesn't work for multi-byte characters.
540 * return 0 for match, < 0 for smaller, > 0 for bigger
541 */
542 int
543vim_strnicmp(char *s1, char *s2, size_t len)
544{
545 int i;
546
547 while (len > 0)
548 {
549 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
550 if (i != 0)
551 return i; // this character different
552 if (*s1 == NUL)
553 break; // strings match until NUL
554 ++s1;
555 ++s2;
556 --len;
557 }
558 return 0; // strings match
559}
560#endif
561
562/*
563 * Search for first occurrence of "c" in "string".
564 * Version of strchr() that handles unsigned char strings with characters from
565 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
566 * end of the string.
567 */
568 char_u *
569vim_strchr(char_u *string, int c)
570{
571 char_u *p;
572 int b;
573
574 p = string;
575 if (enc_utf8 && c >= 0x80)
576 {
577 while (*p != NUL)
578 {
579 int l = utfc_ptr2len(p);
580
581 // Avoid matching an illegal byte here.
582 if (utf_ptr2char(p) == c && l > 1)
583 return p;
584 p += l;
585 }
586 return NULL;
587 }
588 if (enc_dbcs != 0 && c > 255)
589 {
590 int n2 = c & 0xff;
591
592 c = ((unsigned)c >> 8) & 0xff;
593 while ((b = *p) != NUL)
594 {
595 if (b == c && p[1] == n2)
596 return p;
597 p += (*mb_ptr2len)(p);
598 }
599 return NULL;
600 }
601 if (has_mbyte)
602 {
603 while ((b = *p) != NUL)
604 {
605 if (b == c)
606 return p;
607 p += (*mb_ptr2len)(p);
608 }
609 return NULL;
610 }
611 while ((b = *p) != NUL)
612 {
613 if (b == c)
614 return p;
615 ++p;
616 }
617 return NULL;
618}
619
620/*
621 * Version of strchr() that only works for bytes and handles unsigned char
622 * strings with characters above 128 correctly. It also doesn't return a
623 * pointer to the NUL at the end of the string.
624 */
625 char_u *
626vim_strbyte(char_u *string, int c)
627{
628 char_u *p = string;
629
630 while (*p != NUL)
631 {
632 if (*p == c)
633 return p;
634 ++p;
635 }
636 return NULL;
637}
638
639/*
640 * Search for last occurrence of "c" in "string".
641 * Version of strrchr() that handles unsigned char strings with characters from
642 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
643 * end of the string.
644 * Return NULL if not found.
645 * Does not handle multi-byte char for "c"!
646 */
647 char_u *
648vim_strrchr(char_u *string, int c)
649{
650 char_u *retval = NULL;
651 char_u *p = string;
652
653 while (*p)
654 {
655 if (*p == c)
656 retval = p;
657 MB_PTR_ADV(p);
658 }
659 return retval;
660}
661
662/*
663 * Vim's version of strpbrk(), in case it's missing.
664 * Don't generate a prototype for this, causes problems when it's not used.
665 */
666#ifndef PROTO
667# ifndef HAVE_STRPBRK
668# ifdef vim_strpbrk
669# undef vim_strpbrk
670# endif
671 char_u *
672vim_strpbrk(char_u *s, char_u *charset)
673{
674 while (*s)
675 {
676 if (vim_strchr(charset, *s) != NULL)
677 return s;
678 MB_PTR_ADV(s);
679 }
680 return NULL;
681}
682# endif
683#endif
684
685/*
686 * Sort an array of strings.
687 */
688static int sort_compare(const void *s1, const void *s2);
689
690 static int
691sort_compare(const void *s1, const void *s2)
692{
693 return STRCMP(*(char **)s1, *(char **)s2);
694}
695
696 void
697sort_strings(
698 char_u **files,
699 int count)
700{
701 qsort((void *)files, (size_t)count, sizeof(char_u *), sort_compare);
702}
703
704#if defined(FEAT_QUICKFIX) || defined(FEAT_SPELL) || defined(PROTO)
705/*
706 * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
707 * When "s" is NULL FALSE is returned.
708 */
709 int
710has_non_ascii(char_u *s)
711{
712 char_u *p;
713
714 if (s != NULL)
715 for (p = s; *p != NUL; ++p)
716 if (*p >= 128)
717 return TRUE;
718 return FALSE;
719}
720#endif
721
722/*
723 * Concatenate two strings and return the result in allocated memory.
724 * Returns NULL when out of memory.
725 */
726 char_u *
727concat_str(char_u *str1, char_u *str2)
728{
729 char_u *dest;
730 size_t l = str1 == NULL ? 0 : STRLEN(str1);
731
732 dest = alloc(l + (str2 == NULL ? 0 : STRLEN(str2)) + 1L);
733 if (dest != NULL)
734 {
735 if (str1 == NULL)
736 *dest = NUL;
737 else
738 STRCPY(dest, str1);
739 if (str2 != NULL)
740 STRCPY(dest + l, str2);
741 }
742 return dest;
743}
744
745#if defined(FEAT_EVAL) || defined(PROTO)
746
747/*
748 * Return string "str" in ' quotes, doubling ' characters.
749 * If "str" is NULL an empty string is assumed.
750 * If "function" is TRUE make it function('string').
751 */
752 char_u *
753string_quote(char_u *str, int function)
754{
755 unsigned len;
756 char_u *p, *r, *s;
757
758 len = (function ? 13 : 3);
759 if (str != NULL)
760 {
761 len += (unsigned)STRLEN(str);
762 for (p = str; *p != NUL; MB_PTR_ADV(p))
763 if (*p == '\'')
764 ++len;
765 }
766 s = r = alloc(len);
767 if (r != NULL)
768 {
769 if (function)
770 {
771 STRCPY(r, "function('");
772 r += 10;
773 }
774 else
775 *r++ = '\'';
776 if (str != NULL)
777 for (p = str; *p != NUL; )
778 {
779 if (*p == '\'')
780 *r++ = '\'';
781 MB_COPY_CHAR(p, r);
782 }
783 *r++ = '\'';
784 if (function)
785 *r++ = ')';
786 *r++ = NUL;
787 }
788 return s;
789}
790
791 static void
792byteidx(typval_T *argvars, typval_T *rettv, int comp UNUSED)
793{
794 char_u *t;
795 char_u *str;
796 varnumber_T idx;
797
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +0200798 rettv->vval.v_number = -1;
799
800 if (in_vim9script()
801 && (check_for_string_arg(argvars, 0) == FAIL
802 || check_for_number_arg(argvars, 1) == FAIL))
803 return;
804
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200805 str = tv_get_string_chk(&argvars[0]);
806 idx = tv_get_number_chk(&argvars[1], NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200807 if (str == NULL || idx < 0)
808 return;
809
810 t = str;
811 for ( ; idx > 0; idx--)
812 {
813 if (*t == NUL) // EOL reached
814 return;
815 if (enc_utf8 && comp)
816 t += utf_ptr2len(t);
817 else
818 t += (*mb_ptr2len)(t);
819 }
820 rettv->vval.v_number = (varnumber_T)(t - str);
821}
822
823/*
824 * "byteidx()" function
825 */
826 void
827f_byteidx(typval_T *argvars, typval_T *rettv)
828{
829 byteidx(argvars, rettv, FALSE);
830}
831
832/*
833 * "byteidxcomp()" function
834 */
835 void
836f_byteidxcomp(typval_T *argvars, typval_T *rettv)
837{
838 byteidx(argvars, rettv, TRUE);
839}
840
841/*
842 * "charidx()" function
843 */
844 void
845f_charidx(typval_T *argvars, typval_T *rettv)
846{
847 char_u *str;
848 varnumber_T idx;
849 varnumber_T countcc = FALSE;
850 char_u *p;
851 int len;
852 int (*ptr2len)(char_u *);
853
854 rettv->vval.v_number = -1;
855
856 if (argvars[0].v_type != VAR_STRING || argvars[1].v_type != VAR_NUMBER
857 || (argvars[2].v_type != VAR_UNKNOWN
858 && argvars[2].v_type != VAR_NUMBER
859 && argvars[2].v_type != VAR_BOOL))
860 {
861 emsg(_(e_invarg));
862 return;
863 }
864
865 str = tv_get_string_chk(&argvars[0]);
866 idx = tv_get_number_chk(&argvars[1], NULL);
867 if (str == NULL || idx < 0)
868 return;
869
870 if (argvars[2].v_type != VAR_UNKNOWN)
871 countcc = tv_get_bool(&argvars[2]);
872 if (countcc < 0 || countcc > 1)
873 {
874 semsg(_(e_using_number_as_bool_nr), countcc);
875 return;
876 }
877
878 if (enc_utf8 && countcc)
879 ptr2len = utf_ptr2len;
880 else
881 ptr2len = mb_ptr2len;
882
883 for (p = str, len = 0; p <= str + idx; len++)
884 {
885 if (*p == NUL)
886 return;
887 p += ptr2len(p);
888 }
889
890 rettv->vval.v_number = len > 0 ? len - 1 : 0;
891}
892
893/*
894 * "str2list()" function
895 */
896 void
897f_str2list(typval_T *argvars, typval_T *rettv)
898{
899 char_u *p;
900 int utf8 = FALSE;
901
902 if (rettv_list_alloc(rettv) == FAIL)
903 return;
904
905 if (argvars[1].v_type != VAR_UNKNOWN)
906 utf8 = (int)tv_get_bool_chk(&argvars[1], NULL);
907
908 p = tv_get_string(&argvars[0]);
909
910 if (has_mbyte || utf8)
911 {
912 int (*ptr2len)(char_u *);
913 int (*ptr2char)(char_u *);
914
915 if (utf8 || enc_utf8)
916 {
917 ptr2len = utf_ptr2len;
918 ptr2char = utf_ptr2char;
919 }
920 else
921 {
922 ptr2len = mb_ptr2len;
923 ptr2char = mb_ptr2char;
924 }
925
926 for ( ; *p != NUL; p += (*ptr2len)(p))
927 list_append_number(rettv->vval.v_list, (*ptr2char)(p));
928 }
929 else
930 for ( ; *p != NUL; ++p)
931 list_append_number(rettv->vval.v_list, *p);
932}
933
934/*
935 * "str2nr()" function
936 */
937 void
938f_str2nr(typval_T *argvars, typval_T *rettv)
939{
940 int base = 10;
941 char_u *p;
942 varnumber_T n;
943 int what = 0;
944 int isneg;
945
946 if (argvars[1].v_type != VAR_UNKNOWN)
947 {
948 base = (int)tv_get_number(&argvars[1]);
949 if (base != 2 && base != 8 && base != 10 && base != 16)
950 {
951 emsg(_(e_invarg));
952 return;
953 }
954 if (argvars[2].v_type != VAR_UNKNOWN && tv_get_bool(&argvars[2]))
955 what |= STR2NR_QUOTE;
956 }
957
958 p = skipwhite(tv_get_string_strict(&argvars[0]));
959 isneg = (*p == '-');
960 if (*p == '+' || *p == '-')
961 p = skipwhite(p + 1);
962 switch (base)
963 {
964 case 2: what |= STR2NR_BIN + STR2NR_FORCE; break;
965 case 8: what |= STR2NR_OCT + STR2NR_OOCT + STR2NR_FORCE; break;
966 case 16: what |= STR2NR_HEX + STR2NR_FORCE; break;
967 }
968 vim_str2nr(p, NULL, NULL, what, &n, NULL, 0, FALSE);
969 // Text after the number is silently ignored.
970 if (isneg)
971 rettv->vval.v_number = -n;
972 else
973 rettv->vval.v_number = n;
974
975}
976
977/*
978 * "strgetchar()" function
979 */
980 void
981f_strgetchar(typval_T *argvars, typval_T *rettv)
982{
983 char_u *str;
984 int len;
985 int error = FALSE;
986 int charidx;
987 int byteidx = 0;
988
989 rettv->vval.v_number = -1;
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +0200990
991 if (in_vim9script()
992 && (check_for_string_arg(argvars, 0) == FAIL
993 || check_for_number_arg(argvars, 1) == FAIL))
994 return;
995
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200996 str = tv_get_string_chk(&argvars[0]);
997 if (str == NULL)
998 return;
999 len = (int)STRLEN(str);
1000 charidx = (int)tv_get_number_chk(&argvars[1], &error);
1001 if (error)
1002 return;
1003
1004 while (charidx >= 0 && byteidx < len)
1005 {
1006 if (charidx == 0)
1007 {
1008 rettv->vval.v_number = mb_ptr2char(str + byteidx);
1009 break;
1010 }
1011 --charidx;
1012 byteidx += MB_CPTR2LEN(str + byteidx);
1013 }
1014}
1015
1016/*
1017 * "stridx()" function
1018 */
1019 void
1020f_stridx(typval_T *argvars, typval_T *rettv)
1021{
1022 char_u buf[NUMBUFLEN];
1023 char_u *needle;
1024 char_u *haystack;
1025 char_u *save_haystack;
1026 char_u *pos;
1027 int start_idx;
1028
1029 needle = tv_get_string_chk(&argvars[1]);
1030 save_haystack = haystack = tv_get_string_buf_chk(&argvars[0], buf);
1031 rettv->vval.v_number = -1;
1032 if (needle == NULL || haystack == NULL)
1033 return; // type error; errmsg already given
1034
1035 if (argvars[2].v_type != VAR_UNKNOWN)
1036 {
1037 int error = FALSE;
1038
1039 start_idx = (int)tv_get_number_chk(&argvars[2], &error);
1040 if (error || start_idx >= (int)STRLEN(haystack))
1041 return;
1042 if (start_idx >= 0)
1043 haystack += start_idx;
1044 }
1045
1046 pos = (char_u *)strstr((char *)haystack, (char *)needle);
1047 if (pos != NULL)
1048 rettv->vval.v_number = (varnumber_T)(pos - save_haystack);
1049}
1050
1051/*
1052 * "string()" function
1053 */
1054 void
1055f_string(typval_T *argvars, typval_T *rettv)
1056{
1057 char_u *tofree;
1058 char_u numbuf[NUMBUFLEN];
1059
1060 rettv->v_type = VAR_STRING;
1061 rettv->vval.v_string = tv2string(&argvars[0], &tofree, numbuf,
1062 get_copyID());
1063 // Make a copy if we have a value but it's not in allocated memory.
1064 if (rettv->vval.v_string != NULL && tofree == NULL)
1065 rettv->vval.v_string = vim_strsave(rettv->vval.v_string);
1066}
1067
1068/*
1069 * "strlen()" function
1070 */
1071 void
1072f_strlen(typval_T *argvars, typval_T *rettv)
1073{
1074 rettv->vval.v_number = (varnumber_T)(STRLEN(
1075 tv_get_string(&argvars[0])));
1076}
1077
1078 static void
1079strchar_common(typval_T *argvars, typval_T *rettv, int skipcc)
1080{
1081 char_u *s = tv_get_string(&argvars[0]);
1082 varnumber_T len = 0;
1083 int (*func_mb_ptr2char_adv)(char_u **pp);
1084
1085 func_mb_ptr2char_adv = skipcc ? mb_ptr2char_adv : mb_cptr2char_adv;
1086 while (*s != NUL)
1087 {
1088 func_mb_ptr2char_adv(&s);
1089 ++len;
1090 }
1091 rettv->vval.v_number = len;
1092}
1093
1094/*
1095 * "strcharlen()" function
1096 */
1097 void
1098f_strcharlen(typval_T *argvars, typval_T *rettv)
1099{
1100 strchar_common(argvars, rettv, TRUE);
1101}
1102
1103/*
1104 * "strchars()" function
1105 */
1106 void
1107f_strchars(typval_T *argvars, typval_T *rettv)
1108{
1109 varnumber_T skipcc = FALSE;
1110
1111 if (argvars[1].v_type != VAR_UNKNOWN)
1112 skipcc = tv_get_bool(&argvars[1]);
1113 if (skipcc < 0 || skipcc > 1)
1114 semsg(_(e_using_number_as_bool_nr), skipcc);
1115 else
1116 strchar_common(argvars, rettv, skipcc);
1117}
1118
1119/*
1120 * "strdisplaywidth()" function
1121 */
1122 void
1123f_strdisplaywidth(typval_T *argvars, typval_T *rettv)
1124{
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001125 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001126 int col = 0;
1127
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001128 rettv->vval.v_number = -1;
1129
1130 if (in_vim9script()
1131 && (check_for_string_arg(argvars, 0) == FAIL
1132 || (argvars[1].v_type != VAR_UNKNOWN
1133 && check_for_number_arg(argvars, 1) == FAIL)))
1134 return;
1135
1136 s = tv_get_string(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001137 if (argvars[1].v_type != VAR_UNKNOWN)
1138 col = (int)tv_get_number(&argvars[1]);
1139
1140 rettv->vval.v_number = (varnumber_T)(linetabsize_col(col, s) - col);
1141}
1142
1143/*
1144 * "strwidth()" function
1145 */
1146 void
1147f_strwidth(typval_T *argvars, typval_T *rettv)
1148{
1149 char_u *s = tv_get_string_strict(&argvars[0]);
1150
1151 rettv->vval.v_number = (varnumber_T)(mb_string2cells(s, -1));
1152}
1153
1154/*
1155 * "strcharpart()" function
1156 */
1157 void
1158f_strcharpart(typval_T *argvars, typval_T *rettv)
1159{
1160 char_u *p;
1161 int nchar;
1162 int nbyte = 0;
1163 int charlen;
1164 int skipcc = FALSE;
1165 int len = 0;
1166 int slen;
1167 int error = FALSE;
1168
1169 p = tv_get_string(&argvars[0]);
1170 slen = (int)STRLEN(p);
1171
1172 nchar = (int)tv_get_number_chk(&argvars[1], &error);
1173 if (!error)
1174 {
1175 if (argvars[2].v_type != VAR_UNKNOWN
1176 && argvars[3].v_type != VAR_UNKNOWN)
1177 {
1178 skipcc = tv_get_bool(&argvars[3]);
1179 if (skipcc < 0 || skipcc > 1)
1180 {
1181 semsg(_(e_using_number_as_bool_nr), skipcc);
1182 return;
1183 }
1184 }
1185
1186 if (nchar > 0)
1187 while (nchar > 0 && nbyte < slen)
1188 {
1189 if (skipcc)
1190 nbyte += mb_ptr2len(p + nbyte);
1191 else
1192 nbyte += MB_CPTR2LEN(p + nbyte);
1193 --nchar;
1194 }
1195 else
1196 nbyte = nchar;
1197 if (argvars[2].v_type != VAR_UNKNOWN)
1198 {
1199 charlen = (int)tv_get_number(&argvars[2]);
1200 while (charlen > 0 && nbyte + len < slen)
1201 {
1202 int off = nbyte + len;
1203
1204 if (off < 0)
1205 len += 1;
1206 else
1207 {
1208 if (skipcc)
1209 len += mb_ptr2len(p + off);
1210 else
1211 len += MB_CPTR2LEN(p + off);
1212 }
1213 --charlen;
1214 }
1215 }
1216 else
1217 len = slen - nbyte; // default: all bytes that are available.
1218 }
1219
1220 /*
1221 * Only return the overlap between the specified part and the actual
1222 * string.
1223 */
1224 if (nbyte < 0)
1225 {
1226 len += nbyte;
1227 nbyte = 0;
1228 }
1229 else if (nbyte > slen)
1230 nbyte = slen;
1231 if (len < 0)
1232 len = 0;
1233 else if (nbyte + len > slen)
1234 len = slen - nbyte;
1235
1236 rettv->v_type = VAR_STRING;
1237 rettv->vval.v_string = vim_strnsave(p + nbyte, len);
1238}
1239
1240/*
1241 * "strpart()" function
1242 */
1243 void
1244f_strpart(typval_T *argvars, typval_T *rettv)
1245{
1246 char_u *p;
1247 int n;
1248 int len;
1249 int slen;
1250 int error = FALSE;
1251
1252 p = tv_get_string(&argvars[0]);
1253 slen = (int)STRLEN(p);
1254
1255 n = (int)tv_get_number_chk(&argvars[1], &error);
1256 if (error)
1257 len = 0;
1258 else if (argvars[2].v_type != VAR_UNKNOWN)
1259 len = (int)tv_get_number(&argvars[2]);
1260 else
1261 len = slen - n; // default len: all bytes that are available.
1262
1263 // Only return the overlap between the specified part and the actual
1264 // string.
1265 if (n < 0)
1266 {
1267 len += n;
1268 n = 0;
1269 }
1270 else if (n > slen)
1271 n = slen;
1272 if (len < 0)
1273 len = 0;
1274 else if (n + len > slen)
1275 len = slen - n;
1276
1277 if (argvars[2].v_type != VAR_UNKNOWN && argvars[3].v_type != VAR_UNKNOWN)
1278 {
1279 int off;
1280
1281 // length in characters
1282 for (off = n; off < slen && len > 0; --len)
1283 off += mb_ptr2len(p + off);
1284 len = off - n;
1285 }
1286
1287 rettv->v_type = VAR_STRING;
1288 rettv->vval.v_string = vim_strnsave(p + n, len);
1289}
1290
1291/*
1292 * "strridx()" function
1293 */
1294 void
1295f_strridx(typval_T *argvars, typval_T *rettv)
1296{
1297 char_u buf[NUMBUFLEN];
1298 char_u *needle;
1299 char_u *haystack;
1300 char_u *rest;
1301 char_u *lastmatch = NULL;
1302 int haystack_len, end_idx;
1303
1304 needle = tv_get_string_chk(&argvars[1]);
1305 haystack = tv_get_string_buf_chk(&argvars[0], buf);
1306
1307 rettv->vval.v_number = -1;
1308 if (needle == NULL || haystack == NULL)
1309 return; // type error; errmsg already given
1310
1311 haystack_len = (int)STRLEN(haystack);
1312 if (argvars[2].v_type != VAR_UNKNOWN)
1313 {
1314 // Third argument: upper limit for index
1315 end_idx = (int)tv_get_number_chk(&argvars[2], NULL);
1316 if (end_idx < 0)
1317 return; // can never find a match
1318 }
1319 else
1320 end_idx = haystack_len;
1321
1322 if (*needle == NUL)
1323 {
1324 // Empty string matches past the end.
1325 lastmatch = haystack + end_idx;
1326 }
1327 else
1328 {
1329 for (rest = haystack; *rest != '\0'; ++rest)
1330 {
1331 rest = (char_u *)strstr((char *)rest, (char *)needle);
1332 if (rest == NULL || rest > haystack + end_idx)
1333 break;
1334 lastmatch = rest;
1335 }
1336 }
1337
1338 if (lastmatch == NULL)
1339 rettv->vval.v_number = -1;
1340 else
1341 rettv->vval.v_number = (varnumber_T)(lastmatch - haystack);
1342}
1343
1344/*
1345 * "strtrans()" function
1346 */
1347 void
1348f_strtrans(typval_T *argvars, typval_T *rettv)
1349{
1350 rettv->v_type = VAR_STRING;
1351 rettv->vval.v_string = transstr(tv_get_string(&argvars[0]));
1352}
1353
1354/*
1355 * "tolower(string)" function
1356 */
1357 void
1358f_tolower(typval_T *argvars, typval_T *rettv)
1359{
1360 rettv->v_type = VAR_STRING;
1361 rettv->vval.v_string = strlow_save(tv_get_string(&argvars[0]));
1362}
1363
1364/*
1365 * "toupper(string)" function
1366 */
1367 void
1368f_toupper(typval_T *argvars, typval_T *rettv)
1369{
1370 rettv->v_type = VAR_STRING;
1371 rettv->vval.v_string = strup_save(tv_get_string(&argvars[0]));
1372}
1373
1374/*
1375 * "tr(string, fromstr, tostr)" function
1376 */
1377 void
1378f_tr(typval_T *argvars, typval_T *rettv)
1379{
1380 char_u *in_str;
1381 char_u *fromstr;
1382 char_u *tostr;
1383 char_u *p;
1384 int inlen;
1385 int fromlen;
1386 int tolen;
1387 int idx;
1388 char_u *cpstr;
1389 int cplen;
1390 int first = TRUE;
1391 char_u buf[NUMBUFLEN];
1392 char_u buf2[NUMBUFLEN];
1393 garray_T ga;
1394
1395 in_str = tv_get_string(&argvars[0]);
1396 fromstr = tv_get_string_buf_chk(&argvars[1], buf);
1397 tostr = tv_get_string_buf_chk(&argvars[2], buf2);
1398
1399 // Default return value: empty string.
1400 rettv->v_type = VAR_STRING;
1401 rettv->vval.v_string = NULL;
1402 if (fromstr == NULL || tostr == NULL)
1403 return; // type error; errmsg already given
1404 ga_init2(&ga, (int)sizeof(char), 80);
1405
1406 if (!has_mbyte)
1407 // not multi-byte: fromstr and tostr must be the same length
1408 if (STRLEN(fromstr) != STRLEN(tostr))
1409 {
1410error:
1411 semsg(_(e_invarg2), fromstr);
1412 ga_clear(&ga);
1413 return;
1414 }
1415
1416 // fromstr and tostr have to contain the same number of chars
1417 while (*in_str != NUL)
1418 {
1419 if (has_mbyte)
1420 {
1421 inlen = (*mb_ptr2len)(in_str);
1422 cpstr = in_str;
1423 cplen = inlen;
1424 idx = 0;
1425 for (p = fromstr; *p != NUL; p += fromlen)
1426 {
1427 fromlen = (*mb_ptr2len)(p);
1428 if (fromlen == inlen && STRNCMP(in_str, p, inlen) == 0)
1429 {
1430 for (p = tostr; *p != NUL; p += tolen)
1431 {
1432 tolen = (*mb_ptr2len)(p);
1433 if (idx-- == 0)
1434 {
1435 cplen = tolen;
1436 cpstr = p;
1437 break;
1438 }
1439 }
1440 if (*p == NUL) // tostr is shorter than fromstr
1441 goto error;
1442 break;
1443 }
1444 ++idx;
1445 }
1446
1447 if (first && cpstr == in_str)
1448 {
1449 // Check that fromstr and tostr have the same number of
1450 // (multi-byte) characters. Done only once when a character
1451 // of in_str doesn't appear in fromstr.
1452 first = FALSE;
1453 for (p = tostr; *p != NUL; p += tolen)
1454 {
1455 tolen = (*mb_ptr2len)(p);
1456 --idx;
1457 }
1458 if (idx != 0)
1459 goto error;
1460 }
1461
1462 (void)ga_grow(&ga, cplen);
1463 mch_memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen);
1464 ga.ga_len += cplen;
1465
1466 in_str += inlen;
1467 }
1468 else
1469 {
1470 // When not using multi-byte chars we can do it faster.
1471 p = vim_strchr(fromstr, *in_str);
1472 if (p != NULL)
1473 ga_append(&ga, tostr[p - fromstr]);
1474 else
1475 ga_append(&ga, *in_str);
1476 ++in_str;
1477 }
1478 }
1479
1480 // add a terminating NUL
1481 (void)ga_grow(&ga, 1);
1482 ga_append(&ga, NUL);
1483
1484 rettv->vval.v_string = ga.ga_data;
1485}
1486
1487/*
1488 * "trim({expr})" function
1489 */
1490 void
1491f_trim(typval_T *argvars, typval_T *rettv)
1492{
1493 char_u buf1[NUMBUFLEN];
1494 char_u buf2[NUMBUFLEN];
1495 char_u *head = tv_get_string_buf_chk(&argvars[0], buf1);
1496 char_u *mask = NULL;
1497 char_u *tail;
1498 char_u *prev;
1499 char_u *p;
1500 int c1;
1501 int dir = 0;
1502
1503 rettv->v_type = VAR_STRING;
1504 rettv->vval.v_string = NULL;
1505 if (head == NULL)
1506 return;
1507
1508 if (argvars[1].v_type != VAR_UNKNOWN && argvars[1].v_type != VAR_STRING)
1509 {
1510 semsg(_(e_invarg2), tv_get_string(&argvars[1]));
1511 return;
1512 }
1513
1514 if (argvars[1].v_type == VAR_STRING)
1515 {
1516 mask = tv_get_string_buf_chk(&argvars[1], buf2);
1517
1518 if (argvars[2].v_type != VAR_UNKNOWN)
1519 {
1520 int error = 0;
1521
1522 // leading or trailing characters to trim
1523 dir = (int)tv_get_number_chk(&argvars[2], &error);
1524 if (error)
1525 return;
1526 if (dir < 0 || dir > 2)
1527 {
1528 semsg(_(e_invarg2), tv_get_string(&argvars[2]));
1529 return;
1530 }
1531 }
1532 }
1533
1534 if (dir == 0 || dir == 1)
1535 {
1536 // Trim leading characters
1537 while (*head != NUL)
1538 {
1539 c1 = PTR2CHAR(head);
1540 if (mask == NULL)
1541 {
1542 if (c1 > ' ' && c1 != 0xa0)
1543 break;
1544 }
1545 else
1546 {
1547 for (p = mask; *p != NUL; MB_PTR_ADV(p))
1548 if (c1 == PTR2CHAR(p))
1549 break;
1550 if (*p == NUL)
1551 break;
1552 }
1553 MB_PTR_ADV(head);
1554 }
1555 }
1556
1557 tail = head + STRLEN(head);
1558 if (dir == 0 || dir == 2)
1559 {
1560 // Trim trailing characters
1561 for (; tail > head; tail = prev)
1562 {
1563 prev = tail;
1564 MB_PTR_BACK(head, prev);
1565 c1 = PTR2CHAR(prev);
1566 if (mask == NULL)
1567 {
1568 if (c1 > ' ' && c1 != 0xa0)
1569 break;
1570 }
1571 else
1572 {
1573 for (p = mask; *p != NUL; MB_PTR_ADV(p))
1574 if (c1 == PTR2CHAR(p))
1575 break;
1576 if (*p == NUL)
1577 break;
1578 }
1579 }
1580 }
1581 rettv->vval.v_string = vim_strnsave(head, tail - head);
1582}
1583
1584#endif