blob: 61ffd84fe96dedfb23a03bc0391fc258c7870ee2 [file] [log] [blame]
Yegappan Lakshmanana2438132021-07-10 21:29:18 +02001/* vi:set ts=8 sts=4 sw=4 noet:
2 *
3 * VIM - Vi IMproved by Bram Moolenaar
4 *
5 * Do ":help uganda" in Vim to read copying and usage conditions.
6 * Do ":help credits" in Vim to see a list of people who contributed.
7 * See README.txt for an overview of the Vim source code.
8 */
9
10/*
11 * strings.c: string manipulation functions
12 */
13
14#include "vim.h"
15
16/*
17 * Copy "string" into newly allocated memory.
18 */
19 char_u *
20vim_strsave(char_u *string)
21{
22 char_u *p;
23 size_t len;
24
25 len = STRLEN(string) + 1;
26 p = alloc(len);
27 if (p != NULL)
28 mch_memmove(p, string, len);
29 return p;
30}
31
32/*
33 * Copy up to "len" bytes of "string" into newly allocated memory and
34 * terminate with a NUL.
35 * The allocated memory always has size "len + 1", also when "string" is
36 * shorter.
37 */
38 char_u *
39vim_strnsave(char_u *string, size_t len)
40{
41 char_u *p;
42
43 p = alloc(len + 1);
44 if (p != NULL)
45 {
46 STRNCPY(p, string, len);
47 p[len] = NUL;
48 }
49 return p;
50}
51
52/*
53 * Same as vim_strsave(), but any characters found in esc_chars are preceded
54 * by a backslash.
55 */
56 char_u *
57vim_strsave_escaped(char_u *string, char_u *esc_chars)
58{
59 return vim_strsave_escaped_ext(string, esc_chars, '\\', FALSE);
60}
61
62/*
63 * Same as vim_strsave_escaped(), but when "bsl" is TRUE also escape
64 * characters where rem_backslash() would remove the backslash.
65 * Escape the characters with "cc".
66 */
67 char_u *
68vim_strsave_escaped_ext(
69 char_u *string,
70 char_u *esc_chars,
71 int cc,
72 int bsl)
73{
74 char_u *p;
75 char_u *p2;
76 char_u *escaped_string;
77 unsigned length;
78 int l;
79
80 /*
81 * First count the number of backslashes required.
82 * Then allocate the memory and insert them.
83 */
84 length = 1; // count the trailing NUL
85 for (p = string; *p; p++)
86 {
87 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
88 {
89 length += l; // count a multibyte char
90 p += l - 1;
91 continue;
92 }
93 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
94 ++length; // count a backslash
95 ++length; // count an ordinary char
96 }
97 escaped_string = alloc(length);
98 if (escaped_string != NULL)
99 {
100 p2 = escaped_string;
101 for (p = string; *p; p++)
102 {
103 if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
104 {
105 mch_memmove(p2, p, (size_t)l);
106 p2 += l;
107 p += l - 1; // skip multibyte char
108 continue;
109 }
110 if (vim_strchr(esc_chars, *p) != NULL || (bsl && rem_backslash(p)))
111 *p2++ = cc;
112 *p2++ = *p;
113 }
114 *p2 = NUL;
115 }
116 return escaped_string;
117}
118
119/*
120 * Return TRUE when 'shell' has "csh" in the tail.
121 */
122 int
123csh_like_shell(void)
124{
125 return (strstr((char *)gettail(p_sh), "csh") != NULL);
126}
127
128/*
129 * Escape "string" for use as a shell argument with system().
130 * This uses single quotes, except when we know we need to use double quotes
131 * (MS-DOS and MS-Windows not using PowerShell and without 'shellslash' set).
132 * PowerShell also uses a novel escaping for enclosed single quotes - double
133 * them up.
134 * Escape a newline, depending on the 'shell' option.
135 * When "do_special" is TRUE also replace "!", "%", "#" and things starting
136 * with "<" like "<cfile>".
137 * When "do_newline" is FALSE do not escape newline unless it is csh shell.
138 * Returns the result in allocated memory, NULL if we have run out.
139 */
140 char_u *
141vim_strsave_shellescape(char_u *string, int do_special, int do_newline)
142{
143 unsigned length;
144 char_u *p;
145 char_u *d;
146 char_u *escaped_string;
147 int l;
148 int csh_like;
149 char_u *shname;
150 int powershell;
151# ifdef MSWIN
152 int double_quotes;
153# endif
154
155 // Only csh and similar shells expand '!' within single quotes. For sh and
156 // the like we must not put a backslash before it, it will be taken
157 // literally. If do_special is set the '!' will be escaped twice.
158 // Csh also needs to have "\n" escaped twice when do_special is set.
159 csh_like = csh_like_shell();
160
161 // PowerShell uses it's own version for quoting single quotes
162 shname = gettail(p_sh);
163 powershell = strstr((char *)shname, "pwsh") != NULL;
164# ifdef MSWIN
165 powershell = powershell || strstr((char *)shname, "powershell") != NULL;
166 // PowerShell only accepts single quotes so override shellslash.
167 double_quotes = !powershell && !p_ssl;
168# endif
169
170 // First count the number of extra bytes required.
171 length = (unsigned)STRLEN(string) + 3; // two quotes and a trailing NUL
172 for (p = string; *p != NUL; MB_PTR_ADV(p))
173 {
174# ifdef MSWIN
175 if (double_quotes)
176 {
177 if (*p == '"')
178 ++length; // " -> ""
179 }
180 else
181# endif
182 if (*p == '\'')
183 {
184 if (powershell)
185 length +=2; // ' => ''
186 else
187 length += 3; // ' => '\''
188 }
189 if ((*p == '\n' && (csh_like || do_newline))
190 || (*p == '!' && (csh_like || do_special)))
191 {
192 ++length; // insert backslash
193 if (csh_like && do_special)
194 ++length; // insert backslash
195 }
196 if (do_special && find_cmdline_var(p, &l) >= 0)
197 {
198 ++length; // insert backslash
199 p += l - 1;
200 }
201 }
202
203 // Allocate memory for the result and fill it.
204 escaped_string = alloc(length);
205 if (escaped_string != NULL)
206 {
207 d = escaped_string;
208
209 // add opening quote
210# ifdef MSWIN
211 if (double_quotes)
212 *d++ = '"';
213 else
214# endif
215 *d++ = '\'';
216
217 for (p = string; *p != NUL; )
218 {
219# ifdef MSWIN
220 if (double_quotes)
221 {
222 if (*p == '"')
223 {
224 *d++ = '"';
225 *d++ = '"';
226 ++p;
227 continue;
228 }
229 }
230 else
231# endif
232 if (*p == '\'')
233 {
234 if (powershell)
235 {
236 *d++ = '\'';
237 *d++ = '\'';
238 }
239 else
240 {
241 *d++ = '\'';
242 *d++ = '\\';
243 *d++ = '\'';
244 *d++ = '\'';
245 }
246 ++p;
247 continue;
248 }
249 if ((*p == '\n' && (csh_like || do_newline))
250 || (*p == '!' && (csh_like || do_special)))
251 {
252 *d++ = '\\';
253 if (csh_like && do_special)
254 *d++ = '\\';
255 *d++ = *p++;
256 continue;
257 }
258 if (do_special && find_cmdline_var(p, &l) >= 0)
259 {
260 *d++ = '\\'; // insert backslash
261 while (--l >= 0) // copy the var
262 *d++ = *p++;
263 continue;
264 }
265
266 MB_COPY_CHAR(p, d);
267 }
268
269 // add terminating quote and finish with a NUL
270# ifdef MSWIN
271 if (double_quotes)
272 *d++ = '"';
273 else
274# endif
275 *d++ = '\'';
276 *d = NUL;
277 }
278
279 return escaped_string;
280}
281
282/*
283 * Like vim_strsave(), but make all characters uppercase.
284 * This uses ASCII lower-to-upper case translation, language independent.
285 */
286 char_u *
287vim_strsave_up(char_u *string)
288{
289 char_u *p1;
290
291 p1 = vim_strsave(string);
292 vim_strup(p1);
293 return p1;
294}
295
296/*
297 * Like vim_strnsave(), but make all characters uppercase.
298 * This uses ASCII lower-to-upper case translation, language independent.
299 */
300 char_u *
301vim_strnsave_up(char_u *string, size_t len)
302{
303 char_u *p1;
304
305 p1 = vim_strnsave(string, len);
306 vim_strup(p1);
307 return p1;
308}
309
310/*
311 * ASCII lower-to-upper case translation, language independent.
312 */
313 void
314vim_strup(
315 char_u *p)
316{
317 char_u *p2;
318 int c;
319
320 if (p != NULL)
321 {
322 p2 = p;
323 while ((c = *p2) != NUL)
324#ifdef EBCDIC
325 *p2++ = isalpha(c) ? toupper(c) : c;
326#else
327 *p2++ = (c < 'a' || c > 'z') ? c : (c - 0x20);
328#endif
329 }
330}
331
332#if defined(FEAT_EVAL) || defined(FEAT_SPELL) || defined(PROTO)
333/*
334 * Make string "s" all upper-case and return it in allocated memory.
335 * Handles multi-byte characters as well as possible.
336 * Returns NULL when out of memory.
337 */
338 static char_u *
339strup_save(char_u *orig)
340{
341 char_u *p;
342 char_u *res;
343
344 res = p = vim_strsave(orig);
345
346 if (res != NULL)
347 while (*p != NUL)
348 {
349 int l;
350
351 if (enc_utf8)
352 {
353 int c, uc;
354 int newl;
355 char_u *s;
356
357 c = utf_ptr2char(p);
358 l = utf_ptr2len(p);
359 if (c == 0)
360 {
361 // overlong sequence, use only the first byte
362 c = *p;
363 l = 1;
364 }
365 uc = utf_toupper(c);
366
367 // Reallocate string when byte count changes. This is rare,
368 // thus it's OK to do another malloc()/free().
369 newl = utf_char2len(uc);
370 if (newl != l)
371 {
372 s = alloc(STRLEN(res) + 1 + newl - l);
373 if (s == NULL)
374 {
375 vim_free(res);
376 return NULL;
377 }
378 mch_memmove(s, res, p - res);
379 STRCPY(s + (p - res) + newl, p + l);
380 p = s + (p - res);
381 vim_free(res);
382 res = s;
383 }
384
385 utf_char2bytes(uc, p);
386 p += newl;
387 }
388 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
389 p += l; // skip multi-byte character
390 else
391 {
392 *p = TOUPPER_LOC(*p); // note that toupper() can be a macro
393 p++;
394 }
395 }
396
397 return res;
398}
399
400/*
401 * Make string "s" all lower-case and return it in allocated memory.
402 * Handles multi-byte characters as well as possible.
403 * Returns NULL when out of memory.
404 */
405 char_u *
406strlow_save(char_u *orig)
407{
408 char_u *p;
409 char_u *res;
410
411 res = p = vim_strsave(orig);
412
413 if (res != NULL)
414 while (*p != NUL)
415 {
416 int l;
417
418 if (enc_utf8)
419 {
420 int c, lc;
421 int newl;
422 char_u *s;
423
424 c = utf_ptr2char(p);
425 l = utf_ptr2len(p);
426 if (c == 0)
427 {
428 // overlong sequence, use only the first byte
429 c = *p;
430 l = 1;
431 }
432 lc = utf_tolower(c);
433
434 // Reallocate string when byte count changes. This is rare,
435 // thus it's OK to do another malloc()/free().
436 newl = utf_char2len(lc);
437 if (newl != l)
438 {
439 s = alloc(STRLEN(res) + 1 + newl - l);
440 if (s == NULL)
441 {
442 vim_free(res);
443 return NULL;
444 }
445 mch_memmove(s, res, p - res);
446 STRCPY(s + (p - res) + newl, p + l);
447 p = s + (p - res);
448 vim_free(res);
449 res = s;
450 }
451
452 utf_char2bytes(lc, p);
453 p += newl;
454 }
455 else if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
456 p += l; // skip multi-byte character
457 else
458 {
459 *p = TOLOWER_LOC(*p); // note that tolower() can be a macro
460 p++;
461 }
462 }
463
464 return res;
465}
466#endif
467
468/*
469 * delete spaces at the end of a string
470 */
471 void
472del_trailing_spaces(char_u *ptr)
473{
474 char_u *q;
475
476 q = ptr + STRLEN(ptr);
477 while (--q > ptr && VIM_ISWHITE(q[0]) && q[-1] != '\\' && q[-1] != Ctrl_V)
478 *q = NUL;
479}
480
481/*
482 * Like strncpy(), but always terminate the result with one NUL.
483 * "to" must be "len + 1" long!
484 */
485 void
486vim_strncpy(char_u *to, char_u *from, size_t len)
487{
488 STRNCPY(to, from, len);
489 to[len] = NUL;
490}
491
492/*
493 * Like strcat(), but make sure the result fits in "tosize" bytes and is
494 * always NUL terminated. "from" and "to" may overlap.
495 */
496 void
497vim_strcat(char_u *to, char_u *from, size_t tosize)
498{
499 size_t tolen = STRLEN(to);
500 size_t fromlen = STRLEN(from);
501
502 if (tolen + fromlen + 1 > tosize)
503 {
504 mch_memmove(to + tolen, from, tosize - tolen - 1);
505 to[tosize - 1] = NUL;
506 }
507 else
508 mch_memmove(to + tolen, from, fromlen + 1);
509}
510
511#if (!defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)) || defined(PROTO)
512/*
513 * Compare two strings, ignoring case, using current locale.
514 * Doesn't work for multi-byte characters.
515 * return 0 for match, < 0 for smaller, > 0 for bigger
516 */
517 int
518vim_stricmp(char *s1, char *s2)
519{
520 int i;
521
522 for (;;)
523 {
524 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
525 if (i != 0)
526 return i; // this character different
527 if (*s1 == NUL)
528 break; // strings match until NUL
529 ++s1;
530 ++s2;
531 }
532 return 0; // strings match
533}
534#endif
535
536#if (!defined(HAVE_STRNCASECMP) && !defined(HAVE_STRNICMP)) || defined(PROTO)
537/*
538 * Compare two strings, for length "len", ignoring case, using current locale.
539 * Doesn't work for multi-byte characters.
540 * return 0 for match, < 0 for smaller, > 0 for bigger
541 */
542 int
543vim_strnicmp(char *s1, char *s2, size_t len)
544{
545 int i;
546
547 while (len > 0)
548 {
549 i = (int)TOLOWER_LOC(*s1) - (int)TOLOWER_LOC(*s2);
550 if (i != 0)
551 return i; // this character different
552 if (*s1 == NUL)
553 break; // strings match until NUL
554 ++s1;
555 ++s2;
556 --len;
557 }
558 return 0; // strings match
559}
560#endif
561
562/*
563 * Search for first occurrence of "c" in "string".
564 * Version of strchr() that handles unsigned char strings with characters from
565 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
566 * end of the string.
567 */
568 char_u *
569vim_strchr(char_u *string, int c)
570{
571 char_u *p;
572 int b;
573
574 p = string;
575 if (enc_utf8 && c >= 0x80)
576 {
577 while (*p != NUL)
578 {
579 int l = utfc_ptr2len(p);
580
581 // Avoid matching an illegal byte here.
582 if (utf_ptr2char(p) == c && l > 1)
583 return p;
584 p += l;
585 }
586 return NULL;
587 }
588 if (enc_dbcs != 0 && c > 255)
589 {
590 int n2 = c & 0xff;
591
592 c = ((unsigned)c >> 8) & 0xff;
593 while ((b = *p) != NUL)
594 {
595 if (b == c && p[1] == n2)
596 return p;
597 p += (*mb_ptr2len)(p);
598 }
599 return NULL;
600 }
601 if (has_mbyte)
602 {
603 while ((b = *p) != NUL)
604 {
605 if (b == c)
606 return p;
607 p += (*mb_ptr2len)(p);
608 }
609 return NULL;
610 }
611 while ((b = *p) != NUL)
612 {
613 if (b == c)
614 return p;
615 ++p;
616 }
617 return NULL;
618}
619
620/*
621 * Version of strchr() that only works for bytes and handles unsigned char
622 * strings with characters above 128 correctly. It also doesn't return a
623 * pointer to the NUL at the end of the string.
624 */
625 char_u *
626vim_strbyte(char_u *string, int c)
627{
628 char_u *p = string;
629
630 while (*p != NUL)
631 {
632 if (*p == c)
633 return p;
634 ++p;
635 }
636 return NULL;
637}
638
639/*
640 * Search for last occurrence of "c" in "string".
641 * Version of strrchr() that handles unsigned char strings with characters from
642 * 128 to 255 correctly. It also doesn't return a pointer to the NUL at the
643 * end of the string.
644 * Return NULL if not found.
645 * Does not handle multi-byte char for "c"!
646 */
647 char_u *
648vim_strrchr(char_u *string, int c)
649{
650 char_u *retval = NULL;
651 char_u *p = string;
652
653 while (*p)
654 {
655 if (*p == c)
656 retval = p;
657 MB_PTR_ADV(p);
658 }
659 return retval;
660}
661
662/*
663 * Vim's version of strpbrk(), in case it's missing.
664 * Don't generate a prototype for this, causes problems when it's not used.
665 */
666#ifndef PROTO
667# ifndef HAVE_STRPBRK
668# ifdef vim_strpbrk
669# undef vim_strpbrk
670# endif
671 char_u *
672vim_strpbrk(char_u *s, char_u *charset)
673{
674 while (*s)
675 {
676 if (vim_strchr(charset, *s) != NULL)
677 return s;
678 MB_PTR_ADV(s);
679 }
680 return NULL;
681}
682# endif
683#endif
684
685/*
686 * Sort an array of strings.
687 */
688static int sort_compare(const void *s1, const void *s2);
689
690 static int
691sort_compare(const void *s1, const void *s2)
692{
693 return STRCMP(*(char **)s1, *(char **)s2);
694}
695
696 void
697sort_strings(
698 char_u **files,
699 int count)
700{
701 qsort((void *)files, (size_t)count, sizeof(char_u *), sort_compare);
702}
703
704#if defined(FEAT_QUICKFIX) || defined(FEAT_SPELL) || defined(PROTO)
705/*
706 * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
707 * When "s" is NULL FALSE is returned.
708 */
709 int
710has_non_ascii(char_u *s)
711{
712 char_u *p;
713
714 if (s != NULL)
715 for (p = s; *p != NUL; ++p)
716 if (*p >= 128)
717 return TRUE;
718 return FALSE;
719}
720#endif
721
722/*
723 * Concatenate two strings and return the result in allocated memory.
724 * Returns NULL when out of memory.
725 */
726 char_u *
727concat_str(char_u *str1, char_u *str2)
728{
729 char_u *dest;
730 size_t l = str1 == NULL ? 0 : STRLEN(str1);
731
732 dest = alloc(l + (str2 == NULL ? 0 : STRLEN(str2)) + 1L);
733 if (dest != NULL)
734 {
735 if (str1 == NULL)
736 *dest = NUL;
737 else
738 STRCPY(dest, str1);
739 if (str2 != NULL)
740 STRCPY(dest + l, str2);
741 }
742 return dest;
743}
744
745#if defined(FEAT_EVAL) || defined(PROTO)
746
747/*
748 * Return string "str" in ' quotes, doubling ' characters.
749 * If "str" is NULL an empty string is assumed.
750 * If "function" is TRUE make it function('string').
751 */
752 char_u *
753string_quote(char_u *str, int function)
754{
755 unsigned len;
756 char_u *p, *r, *s;
757
758 len = (function ? 13 : 3);
759 if (str != NULL)
760 {
761 len += (unsigned)STRLEN(str);
762 for (p = str; *p != NUL; MB_PTR_ADV(p))
763 if (*p == '\'')
764 ++len;
765 }
766 s = r = alloc(len);
767 if (r != NULL)
768 {
769 if (function)
770 {
771 STRCPY(r, "function('");
772 r += 10;
773 }
774 else
775 *r++ = '\'';
776 if (str != NULL)
777 for (p = str; *p != NUL; )
778 {
779 if (*p == '\'')
780 *r++ = '\'';
781 MB_COPY_CHAR(p, r);
782 }
783 *r++ = '\'';
784 if (function)
785 *r++ = ')';
786 *r++ = NUL;
787 }
788 return s;
789}
790
791 static void
792byteidx(typval_T *argvars, typval_T *rettv, int comp UNUSED)
793{
794 char_u *t;
795 char_u *str;
796 varnumber_T idx;
797
798 str = tv_get_string_chk(&argvars[0]);
799 idx = tv_get_number_chk(&argvars[1], NULL);
800 rettv->vval.v_number = -1;
801 if (str == NULL || idx < 0)
802 return;
803
804 t = str;
805 for ( ; idx > 0; idx--)
806 {
807 if (*t == NUL) // EOL reached
808 return;
809 if (enc_utf8 && comp)
810 t += utf_ptr2len(t);
811 else
812 t += (*mb_ptr2len)(t);
813 }
814 rettv->vval.v_number = (varnumber_T)(t - str);
815}
816
817/*
818 * "byteidx()" function
819 */
820 void
821f_byteidx(typval_T *argvars, typval_T *rettv)
822{
823 byteidx(argvars, rettv, FALSE);
824}
825
826/*
827 * "byteidxcomp()" function
828 */
829 void
830f_byteidxcomp(typval_T *argvars, typval_T *rettv)
831{
832 byteidx(argvars, rettv, TRUE);
833}
834
835/*
836 * "charidx()" function
837 */
838 void
839f_charidx(typval_T *argvars, typval_T *rettv)
840{
841 char_u *str;
842 varnumber_T idx;
843 varnumber_T countcc = FALSE;
844 char_u *p;
845 int len;
846 int (*ptr2len)(char_u *);
847
848 rettv->vval.v_number = -1;
849
850 if (argvars[0].v_type != VAR_STRING || argvars[1].v_type != VAR_NUMBER
851 || (argvars[2].v_type != VAR_UNKNOWN
852 && argvars[2].v_type != VAR_NUMBER
853 && argvars[2].v_type != VAR_BOOL))
854 {
855 emsg(_(e_invarg));
856 return;
857 }
858
859 str = tv_get_string_chk(&argvars[0]);
860 idx = tv_get_number_chk(&argvars[1], NULL);
861 if (str == NULL || idx < 0)
862 return;
863
864 if (argvars[2].v_type != VAR_UNKNOWN)
865 countcc = tv_get_bool(&argvars[2]);
866 if (countcc < 0 || countcc > 1)
867 {
868 semsg(_(e_using_number_as_bool_nr), countcc);
869 return;
870 }
871
872 if (enc_utf8 && countcc)
873 ptr2len = utf_ptr2len;
874 else
875 ptr2len = mb_ptr2len;
876
877 for (p = str, len = 0; p <= str + idx; len++)
878 {
879 if (*p == NUL)
880 return;
881 p += ptr2len(p);
882 }
883
884 rettv->vval.v_number = len > 0 ? len - 1 : 0;
885}
886
887/*
888 * "str2list()" function
889 */
890 void
891f_str2list(typval_T *argvars, typval_T *rettv)
892{
893 char_u *p;
894 int utf8 = FALSE;
895
896 if (rettv_list_alloc(rettv) == FAIL)
897 return;
898
899 if (argvars[1].v_type != VAR_UNKNOWN)
900 utf8 = (int)tv_get_bool_chk(&argvars[1], NULL);
901
902 p = tv_get_string(&argvars[0]);
903
904 if (has_mbyte || utf8)
905 {
906 int (*ptr2len)(char_u *);
907 int (*ptr2char)(char_u *);
908
909 if (utf8 || enc_utf8)
910 {
911 ptr2len = utf_ptr2len;
912 ptr2char = utf_ptr2char;
913 }
914 else
915 {
916 ptr2len = mb_ptr2len;
917 ptr2char = mb_ptr2char;
918 }
919
920 for ( ; *p != NUL; p += (*ptr2len)(p))
921 list_append_number(rettv->vval.v_list, (*ptr2char)(p));
922 }
923 else
924 for ( ; *p != NUL; ++p)
925 list_append_number(rettv->vval.v_list, *p);
926}
927
928/*
929 * "str2nr()" function
930 */
931 void
932f_str2nr(typval_T *argvars, typval_T *rettv)
933{
934 int base = 10;
935 char_u *p;
936 varnumber_T n;
937 int what = 0;
938 int isneg;
939
940 if (argvars[1].v_type != VAR_UNKNOWN)
941 {
942 base = (int)tv_get_number(&argvars[1]);
943 if (base != 2 && base != 8 && base != 10 && base != 16)
944 {
945 emsg(_(e_invarg));
946 return;
947 }
948 if (argvars[2].v_type != VAR_UNKNOWN && tv_get_bool(&argvars[2]))
949 what |= STR2NR_QUOTE;
950 }
951
952 p = skipwhite(tv_get_string_strict(&argvars[0]));
953 isneg = (*p == '-');
954 if (*p == '+' || *p == '-')
955 p = skipwhite(p + 1);
956 switch (base)
957 {
958 case 2: what |= STR2NR_BIN + STR2NR_FORCE; break;
959 case 8: what |= STR2NR_OCT + STR2NR_OOCT + STR2NR_FORCE; break;
960 case 16: what |= STR2NR_HEX + STR2NR_FORCE; break;
961 }
962 vim_str2nr(p, NULL, NULL, what, &n, NULL, 0, FALSE);
963 // Text after the number is silently ignored.
964 if (isneg)
965 rettv->vval.v_number = -n;
966 else
967 rettv->vval.v_number = n;
968
969}
970
971/*
972 * "strgetchar()" function
973 */
974 void
975f_strgetchar(typval_T *argvars, typval_T *rettv)
976{
977 char_u *str;
978 int len;
979 int error = FALSE;
980 int charidx;
981 int byteidx = 0;
982
983 rettv->vval.v_number = -1;
984 str = tv_get_string_chk(&argvars[0]);
985 if (str == NULL)
986 return;
987 len = (int)STRLEN(str);
988 charidx = (int)tv_get_number_chk(&argvars[1], &error);
989 if (error)
990 return;
991
992 while (charidx >= 0 && byteidx < len)
993 {
994 if (charidx == 0)
995 {
996 rettv->vval.v_number = mb_ptr2char(str + byteidx);
997 break;
998 }
999 --charidx;
1000 byteidx += MB_CPTR2LEN(str + byteidx);
1001 }
1002}
1003
1004/*
1005 * "stridx()" function
1006 */
1007 void
1008f_stridx(typval_T *argvars, typval_T *rettv)
1009{
1010 char_u buf[NUMBUFLEN];
1011 char_u *needle;
1012 char_u *haystack;
1013 char_u *save_haystack;
1014 char_u *pos;
1015 int start_idx;
1016
1017 needle = tv_get_string_chk(&argvars[1]);
1018 save_haystack = haystack = tv_get_string_buf_chk(&argvars[0], buf);
1019 rettv->vval.v_number = -1;
1020 if (needle == NULL || haystack == NULL)
1021 return; // type error; errmsg already given
1022
1023 if (argvars[2].v_type != VAR_UNKNOWN)
1024 {
1025 int error = FALSE;
1026
1027 start_idx = (int)tv_get_number_chk(&argvars[2], &error);
1028 if (error || start_idx >= (int)STRLEN(haystack))
1029 return;
1030 if (start_idx >= 0)
1031 haystack += start_idx;
1032 }
1033
1034 pos = (char_u *)strstr((char *)haystack, (char *)needle);
1035 if (pos != NULL)
1036 rettv->vval.v_number = (varnumber_T)(pos - save_haystack);
1037}
1038
1039/*
1040 * "string()" function
1041 */
1042 void
1043f_string(typval_T *argvars, typval_T *rettv)
1044{
1045 char_u *tofree;
1046 char_u numbuf[NUMBUFLEN];
1047
1048 rettv->v_type = VAR_STRING;
1049 rettv->vval.v_string = tv2string(&argvars[0], &tofree, numbuf,
1050 get_copyID());
1051 // Make a copy if we have a value but it's not in allocated memory.
1052 if (rettv->vval.v_string != NULL && tofree == NULL)
1053 rettv->vval.v_string = vim_strsave(rettv->vval.v_string);
1054}
1055
1056/*
1057 * "strlen()" function
1058 */
1059 void
1060f_strlen(typval_T *argvars, typval_T *rettv)
1061{
1062 rettv->vval.v_number = (varnumber_T)(STRLEN(
1063 tv_get_string(&argvars[0])));
1064}
1065
1066 static void
1067strchar_common(typval_T *argvars, typval_T *rettv, int skipcc)
1068{
1069 char_u *s = tv_get_string(&argvars[0]);
1070 varnumber_T len = 0;
1071 int (*func_mb_ptr2char_adv)(char_u **pp);
1072
1073 func_mb_ptr2char_adv = skipcc ? mb_ptr2char_adv : mb_cptr2char_adv;
1074 while (*s != NUL)
1075 {
1076 func_mb_ptr2char_adv(&s);
1077 ++len;
1078 }
1079 rettv->vval.v_number = len;
1080}
1081
1082/*
1083 * "strcharlen()" function
1084 */
1085 void
1086f_strcharlen(typval_T *argvars, typval_T *rettv)
1087{
1088 strchar_common(argvars, rettv, TRUE);
1089}
1090
1091/*
1092 * "strchars()" function
1093 */
1094 void
1095f_strchars(typval_T *argvars, typval_T *rettv)
1096{
1097 varnumber_T skipcc = FALSE;
1098
1099 if (argvars[1].v_type != VAR_UNKNOWN)
1100 skipcc = tv_get_bool(&argvars[1]);
1101 if (skipcc < 0 || skipcc > 1)
1102 semsg(_(e_using_number_as_bool_nr), skipcc);
1103 else
1104 strchar_common(argvars, rettv, skipcc);
1105}
1106
1107/*
1108 * "strdisplaywidth()" function
1109 */
1110 void
1111f_strdisplaywidth(typval_T *argvars, typval_T *rettv)
1112{
1113 char_u *s = tv_get_string(&argvars[0]);
1114 int col = 0;
1115
1116 if (argvars[1].v_type != VAR_UNKNOWN)
1117 col = (int)tv_get_number(&argvars[1]);
1118
1119 rettv->vval.v_number = (varnumber_T)(linetabsize_col(col, s) - col);
1120}
1121
1122/*
1123 * "strwidth()" function
1124 */
1125 void
1126f_strwidth(typval_T *argvars, typval_T *rettv)
1127{
1128 char_u *s = tv_get_string_strict(&argvars[0]);
1129
1130 rettv->vval.v_number = (varnumber_T)(mb_string2cells(s, -1));
1131}
1132
1133/*
1134 * "strcharpart()" function
1135 */
1136 void
1137f_strcharpart(typval_T *argvars, typval_T *rettv)
1138{
1139 char_u *p;
1140 int nchar;
1141 int nbyte = 0;
1142 int charlen;
1143 int skipcc = FALSE;
1144 int len = 0;
1145 int slen;
1146 int error = FALSE;
1147
1148 p = tv_get_string(&argvars[0]);
1149 slen = (int)STRLEN(p);
1150
1151 nchar = (int)tv_get_number_chk(&argvars[1], &error);
1152 if (!error)
1153 {
1154 if (argvars[2].v_type != VAR_UNKNOWN
1155 && argvars[3].v_type != VAR_UNKNOWN)
1156 {
1157 skipcc = tv_get_bool(&argvars[3]);
1158 if (skipcc < 0 || skipcc > 1)
1159 {
1160 semsg(_(e_using_number_as_bool_nr), skipcc);
1161 return;
1162 }
1163 }
1164
1165 if (nchar > 0)
1166 while (nchar > 0 && nbyte < slen)
1167 {
1168 if (skipcc)
1169 nbyte += mb_ptr2len(p + nbyte);
1170 else
1171 nbyte += MB_CPTR2LEN(p + nbyte);
1172 --nchar;
1173 }
1174 else
1175 nbyte = nchar;
1176 if (argvars[2].v_type != VAR_UNKNOWN)
1177 {
1178 charlen = (int)tv_get_number(&argvars[2]);
1179 while (charlen > 0 && nbyte + len < slen)
1180 {
1181 int off = nbyte + len;
1182
1183 if (off < 0)
1184 len += 1;
1185 else
1186 {
1187 if (skipcc)
1188 len += mb_ptr2len(p + off);
1189 else
1190 len += MB_CPTR2LEN(p + off);
1191 }
1192 --charlen;
1193 }
1194 }
1195 else
1196 len = slen - nbyte; // default: all bytes that are available.
1197 }
1198
1199 /*
1200 * Only return the overlap between the specified part and the actual
1201 * string.
1202 */
1203 if (nbyte < 0)
1204 {
1205 len += nbyte;
1206 nbyte = 0;
1207 }
1208 else if (nbyte > slen)
1209 nbyte = slen;
1210 if (len < 0)
1211 len = 0;
1212 else if (nbyte + len > slen)
1213 len = slen - nbyte;
1214
1215 rettv->v_type = VAR_STRING;
1216 rettv->vval.v_string = vim_strnsave(p + nbyte, len);
1217}
1218
1219/*
1220 * "strpart()" function
1221 */
1222 void
1223f_strpart(typval_T *argvars, typval_T *rettv)
1224{
1225 char_u *p;
1226 int n;
1227 int len;
1228 int slen;
1229 int error = FALSE;
1230
1231 p = tv_get_string(&argvars[0]);
1232 slen = (int)STRLEN(p);
1233
1234 n = (int)tv_get_number_chk(&argvars[1], &error);
1235 if (error)
1236 len = 0;
1237 else if (argvars[2].v_type != VAR_UNKNOWN)
1238 len = (int)tv_get_number(&argvars[2]);
1239 else
1240 len = slen - n; // default len: all bytes that are available.
1241
1242 // Only return the overlap between the specified part and the actual
1243 // string.
1244 if (n < 0)
1245 {
1246 len += n;
1247 n = 0;
1248 }
1249 else if (n > slen)
1250 n = slen;
1251 if (len < 0)
1252 len = 0;
1253 else if (n + len > slen)
1254 len = slen - n;
1255
1256 if (argvars[2].v_type != VAR_UNKNOWN && argvars[3].v_type != VAR_UNKNOWN)
1257 {
1258 int off;
1259
1260 // length in characters
1261 for (off = n; off < slen && len > 0; --len)
1262 off += mb_ptr2len(p + off);
1263 len = off - n;
1264 }
1265
1266 rettv->v_type = VAR_STRING;
1267 rettv->vval.v_string = vim_strnsave(p + n, len);
1268}
1269
1270/*
1271 * "strridx()" function
1272 */
1273 void
1274f_strridx(typval_T *argvars, typval_T *rettv)
1275{
1276 char_u buf[NUMBUFLEN];
1277 char_u *needle;
1278 char_u *haystack;
1279 char_u *rest;
1280 char_u *lastmatch = NULL;
1281 int haystack_len, end_idx;
1282
1283 needle = tv_get_string_chk(&argvars[1]);
1284 haystack = tv_get_string_buf_chk(&argvars[0], buf);
1285
1286 rettv->vval.v_number = -1;
1287 if (needle == NULL || haystack == NULL)
1288 return; // type error; errmsg already given
1289
1290 haystack_len = (int)STRLEN(haystack);
1291 if (argvars[2].v_type != VAR_UNKNOWN)
1292 {
1293 // Third argument: upper limit for index
1294 end_idx = (int)tv_get_number_chk(&argvars[2], NULL);
1295 if (end_idx < 0)
1296 return; // can never find a match
1297 }
1298 else
1299 end_idx = haystack_len;
1300
1301 if (*needle == NUL)
1302 {
1303 // Empty string matches past the end.
1304 lastmatch = haystack + end_idx;
1305 }
1306 else
1307 {
1308 for (rest = haystack; *rest != '\0'; ++rest)
1309 {
1310 rest = (char_u *)strstr((char *)rest, (char *)needle);
1311 if (rest == NULL || rest > haystack + end_idx)
1312 break;
1313 lastmatch = rest;
1314 }
1315 }
1316
1317 if (lastmatch == NULL)
1318 rettv->vval.v_number = -1;
1319 else
1320 rettv->vval.v_number = (varnumber_T)(lastmatch - haystack);
1321}
1322
1323/*
1324 * "strtrans()" function
1325 */
1326 void
1327f_strtrans(typval_T *argvars, typval_T *rettv)
1328{
1329 rettv->v_type = VAR_STRING;
1330 rettv->vval.v_string = transstr(tv_get_string(&argvars[0]));
1331}
1332
1333/*
1334 * "tolower(string)" function
1335 */
1336 void
1337f_tolower(typval_T *argvars, typval_T *rettv)
1338{
1339 rettv->v_type = VAR_STRING;
1340 rettv->vval.v_string = strlow_save(tv_get_string(&argvars[0]));
1341}
1342
1343/*
1344 * "toupper(string)" function
1345 */
1346 void
1347f_toupper(typval_T *argvars, typval_T *rettv)
1348{
1349 rettv->v_type = VAR_STRING;
1350 rettv->vval.v_string = strup_save(tv_get_string(&argvars[0]));
1351}
1352
1353/*
1354 * "tr(string, fromstr, tostr)" function
1355 */
1356 void
1357f_tr(typval_T *argvars, typval_T *rettv)
1358{
1359 char_u *in_str;
1360 char_u *fromstr;
1361 char_u *tostr;
1362 char_u *p;
1363 int inlen;
1364 int fromlen;
1365 int tolen;
1366 int idx;
1367 char_u *cpstr;
1368 int cplen;
1369 int first = TRUE;
1370 char_u buf[NUMBUFLEN];
1371 char_u buf2[NUMBUFLEN];
1372 garray_T ga;
1373
1374 in_str = tv_get_string(&argvars[0]);
1375 fromstr = tv_get_string_buf_chk(&argvars[1], buf);
1376 tostr = tv_get_string_buf_chk(&argvars[2], buf2);
1377
1378 // Default return value: empty string.
1379 rettv->v_type = VAR_STRING;
1380 rettv->vval.v_string = NULL;
1381 if (fromstr == NULL || tostr == NULL)
1382 return; // type error; errmsg already given
1383 ga_init2(&ga, (int)sizeof(char), 80);
1384
1385 if (!has_mbyte)
1386 // not multi-byte: fromstr and tostr must be the same length
1387 if (STRLEN(fromstr) != STRLEN(tostr))
1388 {
1389error:
1390 semsg(_(e_invarg2), fromstr);
1391 ga_clear(&ga);
1392 return;
1393 }
1394
1395 // fromstr and tostr have to contain the same number of chars
1396 while (*in_str != NUL)
1397 {
1398 if (has_mbyte)
1399 {
1400 inlen = (*mb_ptr2len)(in_str);
1401 cpstr = in_str;
1402 cplen = inlen;
1403 idx = 0;
1404 for (p = fromstr; *p != NUL; p += fromlen)
1405 {
1406 fromlen = (*mb_ptr2len)(p);
1407 if (fromlen == inlen && STRNCMP(in_str, p, inlen) == 0)
1408 {
1409 for (p = tostr; *p != NUL; p += tolen)
1410 {
1411 tolen = (*mb_ptr2len)(p);
1412 if (idx-- == 0)
1413 {
1414 cplen = tolen;
1415 cpstr = p;
1416 break;
1417 }
1418 }
1419 if (*p == NUL) // tostr is shorter than fromstr
1420 goto error;
1421 break;
1422 }
1423 ++idx;
1424 }
1425
1426 if (first && cpstr == in_str)
1427 {
1428 // Check that fromstr and tostr have the same number of
1429 // (multi-byte) characters. Done only once when a character
1430 // of in_str doesn't appear in fromstr.
1431 first = FALSE;
1432 for (p = tostr; *p != NUL; p += tolen)
1433 {
1434 tolen = (*mb_ptr2len)(p);
1435 --idx;
1436 }
1437 if (idx != 0)
1438 goto error;
1439 }
1440
1441 (void)ga_grow(&ga, cplen);
1442 mch_memmove((char *)ga.ga_data + ga.ga_len, cpstr, (size_t)cplen);
1443 ga.ga_len += cplen;
1444
1445 in_str += inlen;
1446 }
1447 else
1448 {
1449 // When not using multi-byte chars we can do it faster.
1450 p = vim_strchr(fromstr, *in_str);
1451 if (p != NULL)
1452 ga_append(&ga, tostr[p - fromstr]);
1453 else
1454 ga_append(&ga, *in_str);
1455 ++in_str;
1456 }
1457 }
1458
1459 // add a terminating NUL
1460 (void)ga_grow(&ga, 1);
1461 ga_append(&ga, NUL);
1462
1463 rettv->vval.v_string = ga.ga_data;
1464}
1465
1466/*
1467 * "trim({expr})" function
1468 */
1469 void
1470f_trim(typval_T *argvars, typval_T *rettv)
1471{
1472 char_u buf1[NUMBUFLEN];
1473 char_u buf2[NUMBUFLEN];
1474 char_u *head = tv_get_string_buf_chk(&argvars[0], buf1);
1475 char_u *mask = NULL;
1476 char_u *tail;
1477 char_u *prev;
1478 char_u *p;
1479 int c1;
1480 int dir = 0;
1481
1482 rettv->v_type = VAR_STRING;
1483 rettv->vval.v_string = NULL;
1484 if (head == NULL)
1485 return;
1486
1487 if (argvars[1].v_type != VAR_UNKNOWN && argvars[1].v_type != VAR_STRING)
1488 {
1489 semsg(_(e_invarg2), tv_get_string(&argvars[1]));
1490 return;
1491 }
1492
1493 if (argvars[1].v_type == VAR_STRING)
1494 {
1495 mask = tv_get_string_buf_chk(&argvars[1], buf2);
1496
1497 if (argvars[2].v_type != VAR_UNKNOWN)
1498 {
1499 int error = 0;
1500
1501 // leading or trailing characters to trim
1502 dir = (int)tv_get_number_chk(&argvars[2], &error);
1503 if (error)
1504 return;
1505 if (dir < 0 || dir > 2)
1506 {
1507 semsg(_(e_invarg2), tv_get_string(&argvars[2]));
1508 return;
1509 }
1510 }
1511 }
1512
1513 if (dir == 0 || dir == 1)
1514 {
1515 // Trim leading characters
1516 while (*head != NUL)
1517 {
1518 c1 = PTR2CHAR(head);
1519 if (mask == NULL)
1520 {
1521 if (c1 > ' ' && c1 != 0xa0)
1522 break;
1523 }
1524 else
1525 {
1526 for (p = mask; *p != NUL; MB_PTR_ADV(p))
1527 if (c1 == PTR2CHAR(p))
1528 break;
1529 if (*p == NUL)
1530 break;
1531 }
1532 MB_PTR_ADV(head);
1533 }
1534 }
1535
1536 tail = head + STRLEN(head);
1537 if (dir == 0 || dir == 2)
1538 {
1539 // Trim trailing characters
1540 for (; tail > head; tail = prev)
1541 {
1542 prev = tail;
1543 MB_PTR_BACK(head, prev);
1544 c1 = PTR2CHAR(prev);
1545 if (mask == NULL)
1546 {
1547 if (c1 > ' ' && c1 != 0xa0)
1548 break;
1549 }
1550 else
1551 {
1552 for (p = mask; *p != NUL; MB_PTR_ADV(p))
1553 if (c1 == PTR2CHAR(p))
1554 break;
1555 if (*p == NUL)
1556 break;
1557 }
1558 }
1559 }
1560 rettv->vval.v_string = vim_strnsave(head, tail - head);
1561}
1562
1563#endif