patch 9.1.1016: Not possible to convert string2blob and blob2string
Problem: Not possible to convert string2blob and blob2string
Solution: add support for the blob2str() and str2blob() functions
closes: #16373
Signed-off-by: Yegappan Lakshmanan <yegappan@yahoo.com>
Signed-off-by: Christian Brabandt <cb@256bit.org>
diff --git a/src/errors.h b/src/errors.h
index 2811a32..6bab826 100644
--- a/src/errors.h
+++ b/src/errors.h
@@ -3657,3 +3657,5 @@
INIT(= N_("E1513: Cannot switch buffer. 'winfixbuf' is enabled"));
EXTERN char e_invalid_return_type_from_findfunc[]
INIT(= N_("E1514: 'findfunc' did not return a List type"));
+EXTERN char e_str_encoding_failed[]
+ INIT(= N_("E1515: Unable to convert %s '%s' encoding"));
diff --git a/src/evalfunc.c b/src/evalfunc.c
index ef30792..6314252 100644
--- a/src/evalfunc.c
+++ b/src/evalfunc.c
@@ -1151,6 +1151,7 @@
static argcheck_T arg2_string_or_list_dict[] = {arg_string_or_list_any, arg_dict_any};
static argcheck_T arg2_string_or_list_number[] = {arg_string_or_list_any, arg_number};
static argcheck_T arg2_string_string_or_number[] = {arg_string, arg_string_or_nr};
+static argcheck_T arg2_blob_dict[] = {arg_blob, arg_dict_any};
static argcheck_T arg3_any_list_dict[] = {arg_any, arg_list_any, arg_dict_any};
static argcheck_T arg3_buffer_lnum_lnum[] = {arg_buffer, arg_lnum, arg_lnum};
static argcheck_T arg3_buffer_number_number[] = {arg_buffer, arg_number, arg_number};
@@ -1844,6 +1845,8 @@
ret_bool, f_bindtextdomain},
{"blob2list", 1, 1, FEARG_1, arg1_blob,
ret_list_number, f_blob2list},
+ {"blob2str", 1, 2, FEARG_1, arg2_blob_dict,
+ ret_string, f_blob2str},
{"browse", 4, 4, 0, arg4_browse,
ret_string, f_browse},
{"browsedir", 2, 2, 0, arg2_string,
@@ -2710,6 +2713,8 @@
ret_list_number, f_srand},
{"state", 0, 1, FEARG_1, arg1_string,
ret_string, f_state},
+ {"str2blob", 1, 2, FEARG_1, arg2_string_dict,
+ ret_blob, f_str2blob},
{"str2float", 1, 2, FEARG_1, arg2_string_bool,
ret_float, f_str2float},
{"str2list", 1, 2, FEARG_1, arg2_string_bool,
diff --git a/src/mbyte.c b/src/mbyte.c
index 4a7eada..1570182 100644
--- a/src/mbyte.c
+++ b/src/mbyte.c
@@ -2107,6 +2107,17 @@
}
/*
+ * Return length of UTF-8 character, obtained from the first byte.
+ * "b" must be between 0 and 255!
+ * Returns 0 for an invalid first byte value.
+ */
+ int
+utf_byte2len_zero(int b)
+{
+ return utf8len_tab_zero[b];
+}
+
+/*
* Get the length of UTF-8 byte sequence "p[size]". Does not include any
* following composing characters.
* Returns 1 for "".
diff --git a/src/proto/mbyte.pro b/src/proto/mbyte.pro
index bb976e3..7061bab 100644
--- a/src/proto/mbyte.pro
+++ b/src/proto/mbyte.pro
@@ -31,6 +31,7 @@
int utfc_char2bytes(int off, char_u *buf);
int utf_ptr2len(char_u *p);
int utf_byte2len(int b);
+int utf_byte2len_zero(int b);
int utf_ptr2len_len(char_u *p, int size);
int utfc_ptr2len(char_u *p);
int utfc_ptr2len_len(char_u *p, int size);
diff --git a/src/proto/strings.pro b/src/proto/strings.pro
index c25555f..ddc53cc 100644
--- a/src/proto/strings.pro
+++ b/src/proto/strings.pro
@@ -31,6 +31,8 @@
void f_byteidx(typval_T *argvars, typval_T *rettv);
void f_byteidxcomp(typval_T *argvars, typval_T *rettv);
void f_charidx(typval_T *argvars, typval_T *rettv);
+void f_blob2str(typval_T *argvars, typval_T *rettv);
+void f_str2blob(typval_T *argvars, typval_T *rettv);
void f_str2list(typval_T *argvars, typval_T *rettv);
void f_str2nr(typval_T *argvars, typval_T *rettv);
void f_strgetchar(typval_T *argvars, typval_T *rettv);
diff --git a/src/strings.c b/src/strings.c
index 54ac178..c26914d 100644
--- a/src/strings.c
+++ b/src/strings.c
@@ -1214,6 +1214,146 @@
}
/*
+ * Convert the string "str", from encoding "from" to encoding "to".
+ */
+ static char_u *
+convert_string(char_u *str, char_u *from, char_u *to)
+{
+ vimconv_T vimconv;
+
+ vimconv.vc_type = CONV_NONE;
+ if (convert_setup(&vimconv, from, to) == FAIL)
+ return NULL;
+ vimconv.vc_fail = TRUE;
+ if (vimconv.vc_type == CONV_NONE)
+ str = vim_strsave(str);
+ else
+ str = string_convert(&vimconv, str, NULL);
+ convert_setup(&vimconv, NULL, NULL);
+
+ return str;
+}
+
+/*
+ * "blob2str()" function
+ * Converts a blob to a string, ensuring valid UTF-8 encoding.
+ */
+ void
+f_blob2str(typval_T *argvars, typval_T *rettv)
+{
+ blob_T *blob;
+ char_u *str;
+ char_u *p;
+ int blen;
+
+ if (check_for_blob_arg(argvars, 0) == FAIL
+ || check_for_opt_dict_arg(argvars, 1) == FAIL)
+ return;
+
+ blob = argvars->vval.v_blob;
+ blen = blob_len(blob);
+
+ rettv->v_type = VAR_STRING;
+
+ str = alloc(blen + 1);
+ if (str == NULL)
+ return;
+
+ for (int i = 0; i < blen; i++)
+ str[i] = (char_u)blob_get(blob, i);
+ str[blen] = NUL;
+
+ p = str;
+ if (argvars[1].v_type != VAR_UNKNOWN)
+ {
+ dict_T *d = argvars[1].vval.v_dict;
+ if (d != NULL)
+ {
+ char_u *enc = dict_get_string(d, "encoding", FALSE);
+ if (enc != NULL)
+ {
+ char_u *from = enc_canonize(enc_skip(enc));
+ p = convert_string(str, from, p_enc);
+ vim_free(str);
+ if (p == NULL)
+ {
+ semsg(_(e_str_encoding_failed), "from", from);
+ vim_free(from);
+ return;
+ }
+ vim_free(from);
+ }
+ }
+ }
+
+ if (STRCMP(p_enc, "utf-8") == 0 || STRCMP(p_enc, "utf8") == 0)
+ {
+ if (!utf_valid_string(p, NULL))
+ {
+ semsg(_(e_str_encoding_failed), "from", p_enc);
+ vim_free(p);
+ return;
+ }
+ }
+
+ rettv->vval.v_string = p;
+}
+
+/*
+ * "str2blob()" function
+ */
+ void
+f_str2blob(typval_T *argvars, typval_T *rettv)
+{
+ blob_T *blob;
+ char_u *p;
+ size_t len;
+
+ if (check_for_string_arg(argvars, 0) == FAIL
+ || check_for_opt_dict_arg(argvars, 1) == FAIL)
+ return;
+
+ if (rettv_blob_alloc(rettv) == FAIL)
+ return;
+
+ blob = rettv->vval.v_blob;
+
+ p = tv_get_string_chk(&argvars[0]);
+ if (p == NULL)
+ return;
+
+ int free_str = FALSE;
+ if (argvars[1].v_type != VAR_UNKNOWN)
+ {
+ dict_T *d = argvars[1].vval.v_dict;
+ if (d != NULL)
+ {
+ char_u *enc = dict_get_string(d, "encoding", FALSE);
+ if (enc != NULL)
+ {
+ char_u *to = enc_canonize(enc_skip(enc));
+ p = convert_string(p, p_enc, to);
+ if (p == NULL)
+ {
+ semsg(_(e_str_encoding_failed), "to", to);
+ vim_free(to);
+ return;
+ }
+ vim_free(to);
+ free_str = TRUE;
+ }
+ }
+ }
+
+ len = STRLEN(p);
+ for (size_t i = 0; i < len; i++)
+ ga_append(&blob->bv_ga, (int)p[i]);
+
+ if (free_str)
+ vim_free(p);
+}
+
+/*
* "str2list()" function
*/
void
diff --git a/src/testdir/test_functions.vim b/src/testdir/test_functions.vim
index f80754f..8b0af91 100644
--- a/src/testdir/test_functions.vim
+++ b/src/testdir/test_functions.vim
@@ -4257,4 +4257,73 @@
call v9.CheckLegacyAndVim9Success(lines)
endfunc
+" Tests for the str2blob() function
+func Test_str2blob()
+ let lines =<< trim END
+ call assert_equal(0z, str2blob(""))
+ call assert_fails("call str2blob([])", 'E1174: String required for argument 1')
+ call assert_equal(0z6162, str2blob("ab"))
+ call assert_equal(0zC2ABC2BB, str2blob("«»"))
+ call assert_equal(0zC59DC59F, str2blob("ŝş"))
+ call assert_equal(0zE0AE85E0.AE87, str2blob("அஇ"))
+ call assert_equal(0zF09F81B0.F09F81B3, str2blob("🁰🁳"))
+ call assert_equal(0z616263, str2blob('abc', {}))
+ call assert_equal(0zABBB, str2blob('«»', {'encoding': 'latin1'}))
+ call assert_equal(0zC2ABC2BB, str2blob('«»', {'encoding': 'utf8'}))
+
+ call assert_fails("call str2blob('abc', [])", 'E1206: Dictionary required for argument 2')
+ call assert_fails("call str2blob('abc', {'encoding': []})", 'E730: Using a List as a String')
+ call assert_fails("call str2blob('abc', {'encoding': 'ab12xy'})", 'E1515: Unable to convert to ''ab12xy'' encoding')
+ call assert_fails("call str2blob('ŝş', {'encoding': 'latin1'})", 'E1515: Unable to convert to ''latin1'' encoding')
+ call assert_fails("call str2blob('அஇ', {'encoding': 'latin1'})", 'E1515: Unable to convert to ''latin1'' encoding')
+ call assert_fails("call str2blob('🁰🁳', {'encoding': 'latin1'})", 'E1515: Unable to convert to ''latin1'' encoding')
+ END
+ call v9.CheckLegacyAndVim9Success(lines)
+endfunc
+
+" Tests for the blob2str() function
+func Test_blob2str()
+ let lines =<< trim END
+ call assert_equal("", blob2str(0z))
+ call assert_fails("call blob2str([])", 'E1238: Blob required for argument 1')
+ call assert_equal("ab", blob2str(0z6162))
+ call assert_equal("«»", blob2str(0zC2ABC2BB))
+ call assert_equal("ŝş", blob2str(0zC59DC59F))
+ call assert_equal("அஇ", blob2str(0zE0AE85E0.AE87))
+ call assert_equal("🁰🁳", blob2str(0zF09F81B0.F09F81B3))
+ call assert_equal('«»', blob2str(0zABBB, {'encoding': 'latin1'}))
+ call assert_equal('«»', blob2str(0zC2ABC2BB, {'encoding': 'utf8'}))
+
+ #" Invalid encoding
+ call assert_fails("call blob2str(0z80)", "E1515: Unable to convert from 'utf-8' encoding")
+ call assert_fails("call blob2str(0zC0)", "E1515: Unable to convert from 'utf-8' encoding")
+ call assert_fails("call blob2str(0zE0)", "E1515: Unable to convert from 'utf-8' encoding")
+ call assert_fails("call blob2str(0zF0)", "E1515: Unable to convert from 'utf-8' encoding")
+
+ call assert_fails("call blob2str(0z6180)", "E1515: Unable to convert from 'utf-8' encoding")
+ call assert_fails("call blob2str(0z61C0)", "E1515: Unable to convert from 'utf-8' encoding")
+ call assert_fails("call blob2str(0z61E0)", "E1515: Unable to convert from 'utf-8' encoding")
+ call assert_fails("call blob2str(0z61F0)", "E1515: Unable to convert from 'utf-8' encoding")
+
+ call assert_fails("call blob2str(0zC0C0)", "E1515: Unable to convert from 'utf-8' encoding")
+ call assert_fails("call blob2str(0z61C0C0)", "E1515: Unable to convert from 'utf-8' encoding")
+
+ call assert_fails("call blob2str(0zE0)", "E1515: Unable to convert from 'utf-8' encoding")
+ call assert_fails("call blob2str(0zE080)", "E1515: Unable to convert from 'utf-8' encoding")
+ call assert_fails("call blob2str(0zE080C0)", "E1515: Unable to convert from 'utf-8' encoding")
+ call assert_fails("call blob2str(0z61E080C0)", "E1515: Unable to convert from 'utf-8' encoding")
+
+ call assert_fails("call blob2str(0zF08080C0)", "E1515: Unable to convert from 'utf-8' encoding")
+ call assert_fails("call blob2str(0z61F08080C0)", "E1515: Unable to convert from 'utf-8' encoding")
+ call assert_fails("call blob2str(0zF0)", "E1515: Unable to convert from 'utf-8' encoding")
+ call assert_fails("call blob2str(0zF080)", "E1515: Unable to convert from 'utf-8' encoding")
+ call assert_fails("call blob2str(0zF08080)", "E1515: Unable to convert from 'utf-8' encoding")
+
+ call assert_fails("call blob2str(0z6162, [])", 'E1206: Dictionary required for argument 2')
+ call assert_fails("call blob2str(0z6162, {'encoding': []})", 'E730: Using a List as a String')
+ call assert_fails("call blob2str(0z6162, {'encoding': 'ab12xy'})", 'E1515: Unable to convert from ''ab12xy'' encoding')
+ END
+ call v9.CheckLegacyAndVim9Success(lines)
+endfunc
+
" vim: shiftwidth=2 sts=2 expandtab
diff --git a/src/version.c b/src/version.c
index 96a51da..13ffc5d 100644
--- a/src/version.c
+++ b/src/version.c
@@ -705,6 +705,8 @@
static int included_patches[] =
{ /* Add new patch number below this line */
/**/
+ 1016,
+/**/
1015,
/**/
1014,