blob: 98d420c6aa878e472bf233bbfc4d4813d21f327c [file] [log] [blame]
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001/* vi:set ts=8 sts=4 sw=4 noet:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * strings.c: string manipulation functions
12 */
13
14#include "vim.h"
15
16/*
17 * Copy "string" into newly allocated memory.
18 */
19 char_u *
20vim_strsave(char_u *string)
21{
22 char_u *p;
23 size_t len;
24
25 len = STRLEN(string) + 1;
26 p = alloc(len);
27 if (p != NULL)
28 mch_memmove(p, string, len);
29 return p;
30}
31
32/*
33 * Copy up to "len" bytes of "string" into newly allocated memory and
34 * terminate with a NUL.
35 * The allocated memory always has size "len + 1", also when "string" is
36 * shorter.
37 */
38 char_u *
39vim_strnsave(char_u *string, size_t len)
40{
41 char_u *p;
42
43 p = alloc(len + 1);
44 if (p != NULL)
45 {
46 STRNCPY(p, string, len);
47 p[len] = NUL;
48 }
49 return p;
50}
51
52/*
53 * Same as vim_strsave(), but any characters found in esc_chars are preceded
54 * by a backslash.
55 */
56 char_u *
57vim_strsave_escaped(char_u *string, char_u *esc_chars)
58{
59 return vim_strsave_escaped_ext(string, esc_chars, '\\', FALSE);
60}
61
62/*
63 * Same as vim_strsave_escaped(), but when "bsl" is TRUE also escape
64 * characters where rem_backslash() would remove the backslash.
65 * Escape the characters with "cc".
66 */
67 char_u *
68vim_strsave_escaped_ext(
69 char_u *string,
70 char_u *esc_chars,
71 int cc,
72 int bsl)
73{
74 char_u *p;
75 char_u *p2;
76 char_u *escaped_string;
77 unsigned length;
78 int l;
79
80 /*
81 * First count the number of backslashes required.
82 * Then allocate the memory and insert them.
83 */
84 length = 1; // count the trailing NUL
85 for (p = string; *p; p++)
86 {
87 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
88 {
89 length += l; // count a multibyte char
90 p += l - 1;
91 continue;
92 }
93 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
94 ++length; // count a backslash
95 ++length; // count an ordinary char
96 }
97 escaped_string = alloc(length);
98 if (escaped_string != NULL)
99 {
100 p2 = escaped_string;
101 for (p = string; *p; p++)
102 {
103 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
104 {
105 mch_memmove(p2, p, (size_t)l);
106 p2 += l;
107 p += l - 1; // skip multibyte char
108 continue;
109 }
110 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
111 *p2++ = cc;
112 *p2++ = *p;
113 }
114 *p2 = NUL;
115 }
116 return escaped_string;
117}
118
119/*
120 * Return TRUE when 'shell' has "csh" in the tail.
121 */
122 int
123csh_like_shell(void)
124{
125 return (strstr((char *)gettail(p_sh), "csh") != NULL);
126}
127
128/*
129 * Escape "string" for use as a shell argument with system().
130 * This uses single quotes, except when we know we need to use double quotes
131 * (MS-DOS and MS-Windows not using PowerShell and without 'shellslash' set).
132 * PowerShell also uses a novel escaping for enclosed single quotes - double
133 * them up.
134 * Escape a newline, depending on the 'shell' option.
135 * When "do_special" is TRUE also replace "!", "%", "#" and things starting
136 * with "<" like "<cfile>".
137 * When "do_newline" is FALSE do not escape newline unless it is csh shell.
138 * Returns the result in allocated memory, NULL if we have run out.
139 */
140 char_u *
141vim_strsave_shellescape(char_u *string, int do_special, int do_newline)
142{
143 unsigned length;
144 char_u *p;
145 char_u *d;
146 char_u *escaped_string;
147 int l;
148 int csh_like;
149 char_u *shname;
150 int powershell;
151# ifdef MSWIN
152 int double_quotes;
153# endif
154
155 // Only csh and similar shells expand '!' within single quotes. For sh and
156 // the like we must not put a backslash before it, it will be taken
157 // literally. If do_special is set the '!' will be escaped twice.
158 // Csh also needs to have "\n" escaped twice when do_special is set.
159 csh_like = csh_like_shell();
160
161 // PowerShell uses it's own version for quoting single quotes
162 shname = gettail(p_sh);
163 powershell = strstr((char *)shname, "pwsh") != NULL;
164# ifdef MSWIN
165 powershell = powershell || strstr((char *)shname, "powershell") != NULL;
166 // PowerShell only accepts single quotes so override shellslash.
167 double_quotes = !powershell && !p_ssl;
168# endif
169
170 // First count the number of extra bytes required.
171 length = (unsigned)STRLEN(string) + 3; // two quotes and a trailing NUL
172 for (p = string; *p != NUL; MB_PTR_ADV(p))
173 {
174# ifdef MSWIN
175 if (double_quotes)
176 {
177 if (*p == '"')
178 ++length; // " -> ""
179 }
180 else
181# endif
182 if (*p == '\'')
183 {
184 if (powershell)
185 length +=2; // ' => ''
186 else
187 length += 3; // ' => '\''
188 }
189 if ((*p == '\n' && (csh_like || do_newline))
190 || (*p == '!' && (csh_like || do_special)))
191 {
192 ++length; // insert backslash
193 if (csh_like && do_special)
194 ++length; // insert backslash
195 }
196 if (do_special && find_cmdline_var(p, &l) >= 0)
197 {
198 ++length; // insert backslash
199 p += l - 1;
200 }
201 }
202
203 // Allocate memory for the result and fill it.
204 escaped_string = alloc(length);
205 if (escaped_string != NULL)
206 {
207 d = escaped_string;
208
209 // add opening quote
210# ifdef MSWIN
211 if (double_quotes)
212 *d++ = '"';
213 else
214# endif
215 *d++ = '\'';
216
217 for (p = string; *p != NUL; )
218 {
219# ifdef MSWIN
220 if (double_quotes)
221 {
222 if (*p == '"')
223 {
224 *d++ = '"';
225 *d++ = '"';
226 ++p;
227 continue;
228 }
229 }
230 else
231# endif
232 if (*p == '\'')
233 {
234 if (powershell)
235 {
236 *d++ = '\'';
237 *d++ = '\'';
238 }
239 else
240 {
241 *d++ = '\'';
242 *d++ = '\\';
243 *d++ = '\'';
244 *d++ = '\'';
245 }
246 ++p;
247 continue;
248 }
249 if ((*p == '\n' && (csh_like || do_newline))
250 || (*p == '!' && (csh_like || do_special)))
251 {
252 *d++ = '\\';
253 if (csh_like && do_special)
254 *d++ = '\\';
255 *d++ = *p++;
256 continue;
257 }
258 if (do_special && find_cmdline_var(p, &l) >= 0)
259 {
260 *d++ = '\\'; // insert backslash
261 while (--l >= 0) // copy the var
262 *d++ = *p++;
263 continue;
264 }
265
266 MB_COPY_CHAR(p, d);
267 }
268
269 // add terminating quote and finish with a NUL
270# ifdef MSWIN
271 if (double_quotes)
272 *d++ = '"';
273 else
274# endif
275 *d++ = '\'';
276 *d = NUL;
277 }
278
279 return escaped_string;
280}
281
282/*
283 * Like vim_strsave(), but make all characters uppercase.
284 * This uses ASCII lower-to-upper case translation, language independent.
285 */
286 char_u *
287vim_strsave_up(char_u *string)
288{
289 char_u *p1;
290
291 p1 = vim_strsave(string);
292 vim_strup(p1);
293 return p1;
294}
295
296/*
297 * Like vim_strnsave(), but make all characters uppercase.
298 * This uses ASCII lower-to-upper case translation, language independent.
299 */
300 char_u *
301vim_strnsave_up(char_u *string, size_t len)
302{
303 char_u *p1;
304
305 p1 = vim_strnsave(string, len);
306 vim_strup(p1);
307 return p1;
308}
309
310/*
311 * ASCII lower-to-upper case translation, language independent.
312 */
313 void
314vim_strup(
315 char_u *p)
316{
317 char_u *p2;
318 int c;
319
320 if (p != NULL)
321 {
322 p2 = p;
323 while ((c = *p2) != NUL)
324#ifdef EBCDIC
325 *p2++ = isalpha(c) ? toupper(c) : c;
326#else
327 *p2++ = (c < 'a' || c > 'z') ? c : (c - 0x20);
328#endif
329 }
330}
331
332#if defined(FEAT_EVAL) || defined(FEAT_SPELL) || defined(PROTO)
333/*
334 * Make string "s" all upper-case and return it in allocated memory.
335 * Handles multi-byte characters as well as possible.
336 * Returns NULL when out of memory.
337 */
338 static char_u *
339strup_save(char_u *orig)
340{
341 char_u *p;
342 char_u *res;
343
344 res = p = vim_strsave(orig);
345
346 if (res != NULL)
347 while (*p != NUL)
348 {
349 int l;
350
351 if (enc_utf8)
352 {
353 int c, uc;
354 int newl;
355 char_u *s;
356
357 c = utf_ptr2char(p);
358 l = utf_ptr2len(p);
359 if (c == 0)
360 {
361 // overlong sequence, use only the first byte
362 c = *p;
363 l = 1;
364 }
365 uc = utf_toupper(c);
366
367 // Reallocate string when byte count changes. This is rare,
368 // thus it's OK to do another malloc()/free().
369 newl = utf_char2len(uc);
370 if (newl != l)
371 {
372 s = alloc(STRLEN(res) + 1 + newl - l);
373 if (s == NULL)
374 {
375 vim_free(res);
376 return NULL;
377 }
378 mch_memmove(s, res, p - res);
379 STRCPY(s + (p - res) + newl, p + l);
380 p = s + (p - res);
381 vim_free(res);
382 res = s;
383 }
384
385 utf_char2bytes(uc, p);
386 p += newl;
387 }
388 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
389 p += l; // skip multi-byte character
390 else
391 {
392 *p = TOUPPER_LOC(*p); // note that toupper() can be a macro
393 p++;
394 }
395 }
396
397 return res;
398}
399
400/*
401 * Make string "s" all lower-case and return it in allocated memory.
402 * Handles multi-byte characters as well as possible.
403 * Returns NULL when out of memory.
404 */
405 char_u *
406strlow_save(char_u *orig)
407{
408 char_u *p;
409 char_u *res;
410
411 res = p = vim_strsave(orig);
412
413 if (res != NULL)
414 while (*p != NUL)
415 {
416 int l;
417
418 if (enc_utf8)
419 {
420 int c, lc;
421 int newl;
422 char_u *s;
423
424 c = utf_ptr2char(p);
425 l = utf_ptr2len(p);
426 if (c == 0)
427 {
428 // overlong sequence, use only the first byte
429 c = *p;
430 l = 1;
431 }
432 lc = utf_tolower(c);
433
434 // Reallocate string when byte count changes. This is rare,
435 // thus it's OK to do another malloc()/free().
436 newl = utf_char2len(lc);
437 if (newl != l)
438 {
439 s = alloc(STRLEN(res) + 1 + newl - l);
440 if (s == NULL)
441 {
442 vim_free(res);
443 return NULL;
444 }
445 mch_memmove(s, res, p - res);
446 STRCPY(s + (p - res) + newl, p + l);
447 p = s + (p - res);
448 vim_free(res);
449 res = s;
450 }
451
452 utf_char2bytes(lc, p);
453 p += newl;
454 }
455 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
456 p += l; // skip multi-byte character
457 else
458 {
459 *p = TOLOWER_LOC(*p); // note that tolower() can be a macro
460 p++;
461 }
462 }
463
464 return res;
465}
466#endif
467
468/*
469 * delete spaces at the end of a string
470 */
471 void
472del_trailing_spaces(char_u *ptr)
473{
474 char_u *q;
475
476 q = ptr + STRLEN(ptr);
477 while (--q > ptr && VIM_ISWHITE(q[0]) && q[-1] != '\\' && q[-1] != Ctrl_V)
478 *q = NUL;
479}
480
481/*
482 * Like strncpy(), but always terminate the result with one NUL.
483 * "to" must be "len + 1" long!
484 */
485 void
486vim_strncpy(char_u *to, char_u *from, size_t len)
487{
488 STRNCPY(to, from, len);
489 to[len] = NUL;
490}
491
492/*
493 * Like strcat(), but make sure the result fits in "tosize" bytes and is
494 * always NUL terminated. "from" and "to" may overlap.
495 */
496 void
497vim_strcat(char_u *to, char_u *from, size_t tosize)
498{
499 size_t tolen = STRLEN(to);
500 size_t fromlen = STRLEN(from);
501
502 if (tolen + fromlen + 1 > tosize)
503 {
504 mch_memmove(to + tolen, from, tosize - tolen - 1);
505 to[tosize - 1] = NUL;
506 }
507 else
508 mch_memmove(to + tolen, from, fromlen + 1);
509}
510
511#if (!defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)) || defined(PROTO)
512/*
513 * Compare two strings, ignoring case, using current locale.
514 * Doesn't work for multi-byte characters.
515 * return 0 for match, < 0 for smaller, > 0 for bigger
516 */
517 int
518vim_stricmp(char *s1, char *s2)
519{
520 int i;
521
522 for (;;)
523 {
524 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
525 if (i != 0)
526 return i; // this character different
527 if (*s1 == NUL)
528 break; // strings match until NUL
529 ++s1;
530 ++s2;
531 }
532 return 0; // strings match
533}
534#endif
535
536#if (!defined(HAVE_STRNCASECMP) && !defined(HAVE_STRNICMP)) || defined(PROTO)
537/*
538 * Compare two strings, for length "len", ignoring case, using current locale.
539 * Doesn't work for multi-byte characters.
540 * return 0 for match, < 0 for smaller, > 0 for bigger
541 */
542 int
543vim_strnicmp(char *s1, char *s2, size_t len)
544{
545 int i;
546
547 while (len > 0)
548 {
549 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
550 if (i != 0)
551 return i; // this character different
552 if (*s1 == NUL)
553 break; // strings match until NUL
554 ++s1;
555 ++s2;
556 --len;
557 }
558 return 0; // strings match
559}
560#endif
561
562/*
563 * Search for first occurrence of "c" in "string".
564 * Version of strchr() that handles unsigned char strings with characters from
565 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
566 * end of the string.
567 */
568 char_u *
569vim_strchr(char_u *string, int c)
570{
571 char_u *p;
572 int b;
573
574 p = string;
575 if (enc_utf8 && c >= 0x80)
576 {
577 while (*p != NUL)
578 {
579 int l = utfc_ptr2len(p);
580
581 // Avoid matching an illegal byte here.
582 if (utf_ptr2char(p) == c && l > 1)
583 return p;
584 p += l;
585 }
586 return NULL;
587 }
588 if (enc_dbcs != 0 && c > 255)
589 {
590 int n2 = c & 0xff;
591
592 c = ((unsigned)c >> 8) & 0xff;
593 while ((b = *p) != NUL)
594 {
595 if (b == c && p[1] == n2)
596 return p;
597 p += (*mb_ptr2len)(p);
598 }
599 return NULL;
600 }
601 if (has_mbyte)
602 {
603 while ((b = *p) != NUL)
604 {
605 if (b == c)
606 return p;
607 p += (*mb_ptr2len)(p);
608 }
609 return NULL;
610 }
611 while ((b = *p) != NUL)
612 {
613 if (b == c)
614 return p;
615 ++p;
616 }
617 return NULL;
618}
619
620/*
621 * Version of strchr() that only works for bytes and handles unsigned char
622 * strings with characters above 128 correctly. It also doesn't return a
623 * pointer to the NUL at the end of the string.
624 */
625 char_u *
626vim_strbyte(char_u *string, int c)
627{
628 char_u *p = string;
629
630 while (*p != NUL)
631 {
632 if (*p == c)
633 return p;
634 ++p;
635 }
636 return NULL;
637}
638
639/*
640 * Search for last occurrence of "c" in "string".
641 * Version of strrchr() that handles unsigned char strings with characters from
642 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
643 * end of the string.
644 * Return NULL if not found.
645 * Does not handle multi-byte char for "c"!
646 */
647 char_u *
648vim_strrchr(char_u *string, int c)
649{
650 char_u *retval = NULL;
651 char_u *p = string;
652
653 while (*p)
654 {
655 if (*p == c)
656 retval = p;
657 MB_PTR_ADV(p);
658 }
659 return retval;
660}
661
662/*
663 * Vim's version of strpbrk(), in case it's missing.
664 * Don't generate a prototype for this, causes problems when it's not used.
665 */
666#ifndef PROTO
667# ifndef HAVE_STRPBRK
668# ifdef vim_strpbrk
669# undef vim_strpbrk
670# endif
671 char_u *
672vim_strpbrk(char_u *s, char_u *charset)
673{
674 while (*s)
675 {
676 if (vim_strchr(charset, *s) != NULL)
677 return s;
678 MB_PTR_ADV(s);
679 }
680 return NULL;
681}
682# endif
683#endif
684
685/*
686 * Sort an array of strings.
687 */
688static int sort_compare(const void *s1, const void *s2);
689
690 static int
691sort_compare(const void *s1, const void *s2)
692{
693 return STRCMP(*(char **)s1, *(char **)s2);
694}
695
696 void
697sort_strings(
698 char_u **files,
699 int count)
700{
701 qsort((void *)files, (size_t)count, sizeof(char_u *), sort_compare);
702}
703
704#if defined(FEAT_QUICKFIX) || defined(FEAT_SPELL) || defined(PROTO)
705/*
706 * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
707 * When "s" is NULL FALSE is returned.
708 */
709 int
710has_non_ascii(char_u *s)
711{
712 char_u *p;
713
714 if (s != NULL)
715 for (p = s; *p != NUL; ++p)
716 if (*p >= 128)
717 return TRUE;
718 return FALSE;
719}
720#endif
721
722/*
723 * Concatenate two strings and return the result in allocated memory.
724 * Returns NULL when out of memory.
725 */
726 char_u *
727concat_str(char_u *str1, char_u *str2)
728{
729 char_u *dest;
730 size_t l = str1 == NULL ? 0 : STRLEN(str1);
731
732 dest = alloc(l + (str2 == NULL ? 0 : STRLEN(str2)) + 1L);
733 if (dest != NULL)
734 {
735 if (str1 == NULL)
736 *dest = NUL;
737 else
738 STRCPY(dest, str1);
739 if (str2 != NULL)
740 STRCPY(dest + l, str2);
741 }
742 return dest;
743}
744
745#if defined(FEAT_EVAL) || defined(PROTO)
746
747/*
748 * Return string "str" in ' quotes, doubling ' characters.
749 * If "str" is NULL an empty string is assumed.
750 * If "function" is TRUE make it function('string').
751 */
752 char_u *
753string_quote(char_u *str, int function)
754{
755 unsigned len;
756 char_u *p, *r, *s;
757
758 len = (function ? 13 : 3);
759 if (str != NULL)
760 {
761 len += (unsigned)STRLEN(str);
762 for (p = str; *p != NUL; MB_PTR_ADV(p))
763 if (*p == '\'')
764 ++len;
765 }
766 s = r = alloc(len);
767 if (r != NULL)
768 {
769 if (function)
770 {
771 STRCPY(r, "function('");
772 r += 10;
773 }
774 else
775 *r++ = '\'';
776 if (str != NULL)
777 for (p = str; *p != NUL; )
778 {
779 if (*p == '\'')
780 *r++ = '\'';
781 MB_COPY_CHAR(p, r);
782 }
783 *r++ = '\'';
784 if (function)
785 *r++ = ')';
786 *r++ = NUL;
787 }
788 return s;
789}
790
791 static void
792byteidx(typval_T *argvars, typval_T *rettv, int comp UNUSED)
793{
794 char_u *t;
795 char_u *str;
796 varnumber_T idx;
797
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +0200798 rettv->vval.v_number = -1;
799
800 if (in_vim9script()
801 && (check_for_string_arg(argvars, 0) == FAIL
802 || check_for_number_arg(argvars, 1) == FAIL))
803 return;
804
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200805 str = tv_get_string_chk(&argvars[0]);
806 idx = tv_get_number_chk(&argvars[1], NULL);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200807 if (str == NULL || idx < 0)
808 return;
809
810 t = str;
811 for ( ; idx > 0; idx--)
812 {
813 if (*t == NUL) // EOL reached
814 return;
815 if (enc_utf8 && comp)
816 t += utf_ptr2len(t);
817 else
818 t += (*mb_ptr2len)(t);
819 }
820 rettv->vval.v_number = (varnumber_T)(t - str);
821}
822
823/*
824 * "byteidx()" function
825 */
826 void
827f_byteidx(typval_T *argvars, typval_T *rettv)
828{
829 byteidx(argvars, rettv, FALSE);
830}
831
832/*
833 * "byteidxcomp()" function
834 */
835 void
836f_byteidxcomp(typval_T *argvars, typval_T *rettv)
837{
838 byteidx(argvars, rettv, TRUE);
839}
840
841/*
842 * "charidx()" function
843 */
844 void
845f_charidx(typval_T *argvars, typval_T *rettv)
846{
847 char_u *str;
848 varnumber_T idx;
849 varnumber_T countcc = FALSE;
850 char_u *p;
851 int len;
852 int (*ptr2len)(char_u *);
853
854 rettv->vval.v_number = -1;
855
856 if (argvars[0].v_type != VAR_STRING || argvars[1].v_type != VAR_NUMBER
857 || (argvars[2].v_type != VAR_UNKNOWN
858 && argvars[2].v_type != VAR_NUMBER
859 && argvars[2].v_type != VAR_BOOL))
860 {
861 emsg(_(e_invarg));
862 return;
863 }
864
865 str = tv_get_string_chk(&argvars[0]);
866 idx = tv_get_number_chk(&argvars[1], NULL);
867 if (str == NULL || idx < 0)
868 return;
869
870 if (argvars[2].v_type != VAR_UNKNOWN)
871 countcc = tv_get_bool(&argvars[2]);
872 if (countcc < 0 || countcc > 1)
873 {
874 semsg(_(e_using_number_as_bool_nr), countcc);
875 return;
876 }
877
878 if (enc_utf8 && countcc)
879 ptr2len = utf_ptr2len;
880 else
881 ptr2len = mb_ptr2len;
882
883 for (p = str, len = 0; p <= str + idx; len++)
884 {
885 if (*p == NUL)
886 return;
887 p += ptr2len(p);
888 }
889
890 rettv->vval.v_number = len > 0 ? len - 1 : 0;
891}
892
893/*
894 * "str2list()" function
895 */
896 void
897f_str2list(typval_T *argvars, typval_T *rettv)
898{
899 char_u *p;
900 int utf8 = FALSE;
901
902 if (rettv_list_alloc(rettv) == FAIL)
903 return;
904
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +0200905 if (in_vim9script()
906 && (check_for_string_arg(argvars, 0) == FAIL
907 || (argvars[1].v_type != VAR_UNKNOWN
908 && check_for_bool_arg(argvars, 1) == FAIL)))
909 return;
910
Yegappan Lakshmanana2438132021-07-10 21:29:18 +0200911 if (argvars[1].v_type != VAR_UNKNOWN)
912 utf8 = (int)tv_get_bool_chk(&argvars[1], NULL);
913
914 p = tv_get_string(&argvars[0]);
915
916 if (has_mbyte || utf8)
917 {
918 int (*ptr2len)(char_u *);
919 int (*ptr2char)(char_u *);
920
921 if (utf8 || enc_utf8)
922 {
923 ptr2len = utf_ptr2len;
924 ptr2char = utf_ptr2char;
925 }
926 else
927 {
928 ptr2len = mb_ptr2len;
929 ptr2char = mb_ptr2char;
930 }
931
932 for ( ; *p != NUL; p += (*ptr2len)(p))
933 list_append_number(rettv->vval.v_list, (*ptr2char)(p));
934 }
935 else
936 for ( ; *p != NUL; ++p)
937 list_append_number(rettv->vval.v_list, *p);
938}
939
940/*
941 * "str2nr()" function
942 */
943 void
944f_str2nr(typval_T *argvars, typval_T *rettv)
945{
946 int base = 10;
947 char_u *p;
948 varnumber_T n;
949 int what = 0;
950 int isneg;
951
952 if (argvars[1].v_type != VAR_UNKNOWN)
953 {
954 base = (int)tv_get_number(&argvars[1]);
955 if (base != 2 && base != 8 && base != 10 && base != 16)
956 {
957 emsg(_(e_invarg));
958 return;
959 }
960 if (argvars[2].v_type != VAR_UNKNOWN && tv_get_bool(&argvars[2]))
961 what |= STR2NR_QUOTE;
962 }
963
964 p = skipwhite(tv_get_string_strict(&argvars[0]));
965 isneg = (*p == '-');
966 if (*p == '+' || *p == '-')
967 p = skipwhite(p + 1);
968 switch (base)
969 {
970 case 2: what |= STR2NR_BIN + STR2NR_FORCE; break;
971 case 8: what |= STR2NR_OCT + STR2NR_OOCT + STR2NR_FORCE; break;
972 case 16: what |= STR2NR_HEX + STR2NR_FORCE; break;
973 }
974 vim_str2nr(p, NULL, NULL, what, &n, NULL, 0, FALSE);
975 // Text after the number is silently ignored.
976 if (isneg)
977 rettv->vval.v_number = -n;
978 else
979 rettv->vval.v_number = n;
980
981}
982
983/*
984 * "strgetchar()" function
985 */
986 void
987f_strgetchar(typval_T *argvars, typval_T *rettv)
988{
989 char_u *str;
990 int len;
991 int error = FALSE;
992 int charidx;
993 int byteidx = 0;
994
995 rettv->vval.v_number = -1;
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +0200996
997 if (in_vim9script()
998 && (check_for_string_arg(argvars, 0) == FAIL
999 || check_for_number_arg(argvars, 1) == FAIL))
1000 return;
1001
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001002 str = tv_get_string_chk(&argvars[0]);
1003 if (str == NULL)
1004 return;
1005 len = (int)STRLEN(str);
1006 charidx = (int)tv_get_number_chk(&argvars[1], &error);
1007 if (error)
1008 return;
1009
1010 while (charidx >= 0 && byteidx < len)
1011 {
1012 if (charidx == 0)
1013 {
1014 rettv->vval.v_number = mb_ptr2char(str + byteidx);
1015 break;
1016 }
1017 --charidx;
1018 byteidx += MB_CPTR2LEN(str + byteidx);
1019 }
1020}
1021
1022/*
1023 * "stridx()" function
1024 */
1025 void
1026f_stridx(typval_T *argvars, typval_T *rettv)
1027{
1028 char_u buf[NUMBUFLEN];
1029 char_u *needle;
1030 char_u *haystack;
1031 char_u *save_haystack;
1032 char_u *pos;
1033 int start_idx;
1034
1035 needle = tv_get_string_chk(&argvars[1]);
1036 save_haystack = haystack = tv_get_string_buf_chk(&argvars[0], buf);
1037 rettv->vval.v_number = -1;
1038 if (needle == NULL || haystack == NULL)
1039 return; // type error; errmsg already given
1040
1041 if (argvars[2].v_type != VAR_UNKNOWN)
1042 {
1043 int error = FALSE;
1044
1045 start_idx = (int)tv_get_number_chk(&argvars[2], &error);
1046 if (error || start_idx >= (int)STRLEN(haystack))
1047 return;
1048 if (start_idx >= 0)
1049 haystack += start_idx;
1050 }
1051
1052 pos = (char_u *)strstr((char *)haystack, (char *)needle);
1053 if (pos != NULL)
1054 rettv->vval.v_number = (varnumber_T)(pos - save_haystack);
1055}
1056
1057/*
1058 * "string()" function
1059 */
1060 void
1061f_string(typval_T *argvars, typval_T *rettv)
1062{
1063 char_u *tofree;
1064 char_u numbuf[NUMBUFLEN];
1065
1066 rettv->v_type = VAR_STRING;
1067 rettv->vval.v_string = tv2string(&argvars[0], &tofree, numbuf,
1068 get_copyID());
1069 // Make a copy if we have a value but it's not in allocated memory.
1070 if (rettv->vval.v_string != NULL && tofree == NULL)
1071 rettv->vval.v_string = vim_strsave(rettv->vval.v_string);
1072}
1073
1074/*
1075 * "strlen()" function
1076 */
1077 void
1078f_strlen(typval_T *argvars, typval_T *rettv)
1079{
1080 rettv->vval.v_number = (varnumber_T)(STRLEN(
1081 tv_get_string(&argvars[0])));
1082}
1083
1084 static void
1085strchar_common(typval_T *argvars, typval_T *rettv, int skipcc)
1086{
1087 char_u *s = tv_get_string(&argvars[0]);
1088 varnumber_T len = 0;
1089 int (*func_mb_ptr2char_adv)(char_u **pp);
1090
1091 func_mb_ptr2char_adv = skipcc ? mb_ptr2char_adv : mb_cptr2char_adv;
1092 while (*s != NUL)
1093 {
1094 func_mb_ptr2char_adv(&s);
1095 ++len;
1096 }
1097 rettv->vval.v_number = len;
1098}
1099
1100/*
1101 * "strcharlen()" function
1102 */
1103 void
1104f_strcharlen(typval_T *argvars, typval_T *rettv)
1105{
1106 strchar_common(argvars, rettv, TRUE);
1107}
1108
1109/*
1110 * "strchars()" function
1111 */
1112 void
1113f_strchars(typval_T *argvars, typval_T *rettv)
1114{
1115 varnumber_T skipcc = FALSE;
1116
Yegappan Lakshmanana9a7c0c2021-07-17 19:11:07 +02001117 if (in_vim9script()
1118 && (check_for_string_arg(argvars, 0) == FAIL
1119 || (argvars[1].v_type != VAR_UNKNOWN
1120 && check_for_bool_arg(argvars, 1) == FAIL)))
1121 return;
1122
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001123 if (argvars[1].v_type != VAR_UNKNOWN)
1124 skipcc = tv_get_bool(&argvars[1]);
1125 if (skipcc < 0 || skipcc > 1)
1126 semsg(_(e_using_number_as_bool_nr), skipcc);
1127 else
1128 strchar_common(argvars, rettv, skipcc);
1129}
1130
1131/*
1132 * "strdisplaywidth()" function
1133 */
1134 void
1135f_strdisplaywidth(typval_T *argvars, typval_T *rettv)
1136{
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001137 char_u *s;
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001138 int col = 0;
1139
Yegappan Lakshmanan1a71d312021-07-15 12:49:58 +02001140 rettv->vval.v_number = -1;
1141
1142 if (in_vim9script()
1143 && (check_for_string_arg(argvars, 0) == FAIL
1144 || (argvars[1].v_type != VAR_UNKNOWN
1145 && check_for_number_arg(argvars, 1) == FAIL)))
1146 return;
1147
1148 s = tv_get_string(&argvars[0]);
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001149 if (argvars[1].v_type != VAR_UNKNOWN)
1150 col = (int)tv_get_number(&argvars[1]);
1151
1152 rettv->vval.v_number = (varnumber_T)(linetabsize_col(col, s) - col);
1153}
1154
1155/*
1156 * "strwidth()" function
1157 */
1158 void
1159f_strwidth(typval_T *argvars, typval_T *rettv)
1160{
1161 char_u *s = tv_get_string_strict(&argvars[0]);
1162
1163 rettv->vval.v_number = (varnumber_T)(mb_string2cells(s, -1));
1164}
1165
1166/*
1167 * "strcharpart()" function
1168 */
1169 void
1170f_strcharpart(typval_T *argvars, typval_T *rettv)
1171{
1172 char_u *p;
1173 int nchar;
1174 int nbyte = 0;
1175 int charlen;
1176 int skipcc = FALSE;
1177 int len = 0;
1178 int slen;
1179 int error = FALSE;
1180
1181 p = tv_get_string(&argvars[0]);
1182 slen = (int)STRLEN(p);
1183
1184 nchar = (int)tv_get_number_chk(&argvars[1], &error);
1185 if (!error)
1186 {
1187 if (argvars[2].v_type != VAR_UNKNOWN
1188 && argvars[3].v_type != VAR_UNKNOWN)
1189 {
1190 skipcc = tv_get_bool(&argvars[3]);
1191 if (skipcc < 0 || skipcc > 1)
1192 {
1193 semsg(_(e_using_number_as_bool_nr), skipcc);
1194 return;
1195 }
1196 }
1197
1198 if (nchar > 0)
1199 while (nchar > 0 && nbyte < slen)
1200 {
1201 if (skipcc)
1202 nbyte += mb_ptr2len(p + nbyte);
1203 else
1204 nbyte += MB_CPTR2LEN(p + nbyte);
1205 --nchar;
1206 }
1207 else
1208 nbyte = nchar;
1209 if (argvars[2].v_type != VAR_UNKNOWN)
1210 {
1211 charlen = (int)tv_get_number(&argvars[2]);
1212 while (charlen > 0 && nbyte + len < slen)
1213 {
1214 int off = nbyte + len;
1215
1216 if (off < 0)
1217 len += 1;
1218 else
1219 {
1220 if (skipcc)
1221 len += mb_ptr2len(p + off);
1222 else
1223 len += MB_CPTR2LEN(p + off);
1224 }
1225 --charlen;
1226 }
1227 }
1228 else
1229 len = slen - nbyte; // default: all bytes that are available.
1230 }
1231
1232 /*
1233 * Only return the overlap between the specified part and the actual
1234 * string.
1235 */
1236 if (nbyte < 0)
1237 {
1238 len += nbyte;
1239 nbyte = 0;
1240 }
1241 else if (nbyte > slen)
1242 nbyte = slen;
1243 if (len < 0)
1244 len = 0;
1245 else if (nbyte + len > slen)
1246 len = slen - nbyte;
1247
1248 rettv->v_type = VAR_STRING;
1249 rettv->vval.v_string = vim_strnsave(p + nbyte, len);
1250}
1251
1252/*
1253 * "strpart()" function
1254 */
1255 void
1256f_strpart(typval_T *argvars, typval_T *rettv)
1257{
1258 char_u *p;
1259 int n;
1260 int len;
1261 int slen;
1262 int error = FALSE;
1263
1264 p = tv_get_string(&argvars[0]);
1265 slen = (int)STRLEN(p);
1266
1267 n = (int)tv_get_number_chk(&argvars[1], &error);
1268 if (error)
1269 len = 0;
1270 else if (argvars[2].v_type != VAR_UNKNOWN)
1271 len = (int)tv_get_number(&argvars[2]);
1272 else
1273 len = slen - n; // default len: all bytes that are available.
1274
1275 // Only return the overlap between the specified part and the actual
1276 // string.
1277 if (n < 0)
1278 {
1279 len += n;
1280 n = 0;
1281 }
1282 else if (n > slen)
1283 n = slen;
1284 if (len < 0)
1285 len = 0;
1286 else if (n + len > slen)
1287 len = slen - n;
1288
1289 if (argvars[2].v_type != VAR_UNKNOWN && argvars[3].v_type != VAR_UNKNOWN)
1290 {
1291 int off;
1292
1293 // length in characters
1294 for (off = n; off < slen && len > 0; --len)
1295 off += mb_ptr2len(p + off);
1296 len = off - n;
1297 }
1298
1299 rettv->v_type = VAR_STRING;
1300 rettv->vval.v_string = vim_strnsave(p + n, len);
1301}
1302
1303/*
1304 * "strridx()" function
1305 */
1306 void
1307f_strridx(typval_T *argvars, typval_T *rettv)
1308{
1309 char_u buf[NUMBUFLEN];
1310 char_u *needle;
1311 char_u *haystack;
1312 char_u *rest;
1313 char_u *lastmatch = NULL;
1314 int haystack_len, end_idx;
1315
1316 needle = tv_get_string_chk(&argvars[1]);
1317 haystack = tv_get_string_buf_chk(&argvars[0], buf);
1318
1319 rettv->vval.v_number = -1;
1320 if (needle == NULL || haystack == NULL)
1321 return; // type error; errmsg already given
1322
1323 haystack_len = (int)STRLEN(haystack);
1324 if (argvars[2].v_type != VAR_UNKNOWN)
1325 {
1326 // Third argument: upper limit for index
1327 end_idx = (int)tv_get_number_chk(&argvars[2], NULL);
1328 if (end_idx < 0)
1329 return; // can never find a match
1330 }
1331 else
1332 end_idx = haystack_len;
1333
1334 if (*needle == NUL)
1335 {
1336 // Empty string matches past the end.
1337 lastmatch = haystack + end_idx;
1338 }
1339 else
1340 {
1341 for (rest = haystack; *rest != '\0'; ++rest)
1342 {
1343 rest = (char_u *)strstr((char *)rest, (char *)needle);
1344 if (rest == NULL || rest > haystack + end_idx)
1345 break;
1346 lastmatch = rest;
1347 }
1348 }
1349
1350 if (lastmatch == NULL)
1351 rettv->vval.v_number = -1;
1352 else
1353 rettv->vval.v_number = (varnumber_T)(lastmatch - haystack);
1354}
1355
1356/*
1357 * "strtrans()" function
1358 */
1359 void
1360f_strtrans(typval_T *argvars, typval_T *rettv)
1361{
1362 rettv->v_type = VAR_STRING;
1363 rettv->vval.v_string = transstr(tv_get_string(&argvars[0]));
1364}
1365
1366/*
1367 * "tolower(string)" function
1368 */
1369 void
1370f_tolower(typval_T *argvars, typval_T *rettv)
1371{
1372 rettv->v_type = VAR_STRING;
1373 rettv->vval.v_string = strlow_save(tv_get_string(&argvars[0]));
1374}
1375
1376/*
1377 * "toupper(string)" function
1378 */
1379 void
1380f_toupper(typval_T *argvars, typval_T *rettv)
1381{
1382 rettv->v_type = VAR_STRING;
1383 rettv->vval.v_string = strup_save(tv_get_string(&argvars[0]));
1384}
1385
1386/*
1387 * "tr(string, fromstr, tostr)" function
1388 */
1389 void
1390f_tr(typval_T *argvars, typval_T *rettv)
1391{
1392 char_u *in_str;
1393 char_u *fromstr;
1394 char_u *tostr;
1395 char_u *p;
1396 int inlen;
1397 int fromlen;
1398 int tolen;
1399 int idx;
1400 char_u *cpstr;
1401 int cplen;
1402 int first = TRUE;
1403 char_u buf[NUMBUFLEN];
1404 char_u buf2[NUMBUFLEN];
1405 garray_T ga;
1406
1407 in_str = tv_get_string(&argvars[0]);
1408 fromstr = tv_get_string_buf_chk(&argvars[1], buf);
1409 tostr = tv_get_string_buf_chk(&argvars[2], buf2);
1410
1411 // Default return value: empty string.
1412 rettv->v_type = VAR_STRING;
1413 rettv->vval.v_string = NULL;
1414 if (fromstr == NULL || tostr == NULL)
1415 return; // type error; errmsg already given
1416 ga_init2(&ga, (int)sizeof(char), 80);
1417
1418 if (!has_mbyte)
1419 // not multi-byte: fromstr and tostr must be the same length
1420 if (STRLEN(fromstr) != STRLEN(tostr))
1421 {
1422error:
1423 semsg(_(e_invarg2), fromstr);
1424 ga_clear(&ga);
1425 return;
1426 }
1427
1428 // fromstr and tostr have to contain the same number of chars
1429 while (*in_str != NUL)
1430 {
1431 if (has_mbyte)
1432 {
1433 inlen = (*mb_ptr2len)(in_str);
1434 cpstr = in_str;
1435 cplen = inlen;
1436 idx = 0;
1437 for (p = fromstr; *p != NUL; p += fromlen)
1438 {
1439 fromlen = (*mb_ptr2len)(p);
1440 if (fromlen == inlen && STRNCMP(in_str, p, inlen) == 0)
1441 {
1442 for (p = tostr; *p != NUL; p += tolen)
1443 {
1444 tolen = (*mb_ptr2len)(p);
1445 if (idx-- == 0)
1446 {
1447 cplen = tolen;
1448 cpstr = p;
1449 break;
1450 }
1451 }
1452 if (*p == NUL) // tostr is shorter than fromstr
1453 goto error;
1454 break;
1455 }
1456 ++idx;
1457 }
1458
1459 if (first && cpstr == in_str)
1460 {
1461 // Check that fromstr and tostr have the same number of
1462 // (multi-byte) characters. Done only once when a character
1463 // of in_str doesn't appear in fromstr.
1464 first = FALSE;
1465 for (p = tostr; *p != NUL; p += tolen)
1466 {
1467 tolen = (*mb_ptr2len)(p);
1468 --idx;
1469 }
1470 if (idx != 0)
1471 goto error;
1472 }
1473
1474 (void)ga_grow(&ga, cplen);
1475 mch_memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen);
1476 ga.ga_len += cplen;
1477
1478 in_str += inlen;
1479 }
1480 else
1481 {
1482 // When not using multi-byte chars we can do it faster.
1483 p = vim_strchr(fromstr, *in_str);
1484 if (p != NULL)
1485 ga_append(&ga, tostr[p - fromstr]);
1486 else
1487 ga_append(&ga, *in_str);
1488 ++in_str;
1489 }
1490 }
1491
1492 // add a terminating NUL
1493 (void)ga_grow(&ga, 1);
1494 ga_append(&ga, NUL);
1495
1496 rettv->vval.v_string = ga.ga_data;
1497}
1498
1499/*
1500 * "trim({expr})" function
1501 */
1502 void
1503f_trim(typval_T *argvars, typval_T *rettv)
1504{
1505 char_u buf1[NUMBUFLEN];
1506 char_u buf2[NUMBUFLEN];
1507 char_u *head = tv_get_string_buf_chk(&argvars[0], buf1);
1508 char_u *mask = NULL;
1509 char_u *tail;
1510 char_u *prev;
1511 char_u *p;
1512 int c1;
1513 int dir = 0;
1514
1515 rettv->v_type = VAR_STRING;
1516 rettv->vval.v_string = NULL;
1517 if (head == NULL)
1518 return;
1519
1520 if (argvars[1].v_type != VAR_UNKNOWN && argvars[1].v_type != VAR_STRING)
1521 {
1522 semsg(_(e_invarg2), tv_get_string(&argvars[1]));
1523 return;
1524 }
1525
1526 if (argvars[1].v_type == VAR_STRING)
1527 {
1528 mask = tv_get_string_buf_chk(&argvars[1], buf2);
1529
1530 if (argvars[2].v_type != VAR_UNKNOWN)
1531 {
1532 int error = 0;
1533
1534 // leading or trailing characters to trim
1535 dir = (int)tv_get_number_chk(&argvars[2], &error);
1536 if (error)
1537 return;
1538 if (dir < 0 || dir > 2)
1539 {
1540 semsg(_(e_invarg2), tv_get_string(&argvars[2]));
1541 return;
1542 }
1543 }
1544 }
1545
1546 if (dir == 0 || dir == 1)
1547 {
1548 // Trim leading characters
1549 while (*head != NUL)
1550 {
1551 c1 = PTR2CHAR(head);
1552 if (mask == NULL)
1553 {
1554 if (c1 > ' ' && c1 != 0xa0)
1555 break;
1556 }
1557 else
1558 {
1559 for (p = mask; *p != NUL; MB_PTR_ADV(p))
1560 if (c1 == PTR2CHAR(p))
1561 break;
1562 if (*p == NUL)
1563 break;
1564 }
1565 MB_PTR_ADV(head);
1566 }
1567 }
1568
1569 tail = head + STRLEN(head);
1570 if (dir == 0 || dir == 2)
1571 {
1572 // Trim trailing characters
1573 for (; tail > head; tail = prev)
1574 {
1575 prev = tail;
1576 MB_PTR_BACK(head, prev);
1577 c1 = PTR2CHAR(prev);
1578 if (mask == NULL)
1579 {
1580 if (c1 > ' ' && c1 != 0xa0)
1581 break;
1582 }
1583 else
1584 {
1585 for (p = mask; *p != NUL; MB_PTR_ADV(p))
1586 if (c1 == PTR2CHAR(p))
1587 break;
1588 if (*p == NUL)
1589 break;
1590 }
1591 }
1592 }
1593 rettv->vval.v_string = vim_strnsave(head, tail - head);
1594}
1595
1596#endif