patch 9.0.1485: no functions for converting from/to UTF-16 index
Problem: no functions for converting from/to UTF-16 index.
Solution: Add UTF-16 flag to existing funtions and add strutf16len() and
utf16idx(). (Yegappan Lakshmanan, closes #12216)
diff --git a/src/testdir/test_functions.vim b/src/testdir/test_functions.vim
index 3bea88d..e32c4f5 100644
--- a/src/testdir/test_functions.vim
+++ b/src/testdir/test_functions.vim
@@ -1192,19 +1192,14 @@
bw!
endfunc
-" Test for byteidx() and byteidxcomp() functions
+" Test for byteidx() using a character index
func Test_byteidx()
let a = '.é.' " one char of two bytes
call assert_equal(0, byteidx(a, 0))
- call assert_equal(0, byteidxcomp(a, 0))
call assert_equal(1, byteidx(a, 1))
- call assert_equal(1, byteidxcomp(a, 1))
call assert_equal(3, byteidx(a, 2))
- call assert_equal(3, byteidxcomp(a, 2))
call assert_equal(4, byteidx(a, 3))
- call assert_equal(4, byteidxcomp(a, 3))
call assert_equal(-1, byteidx(a, 4))
- call assert_equal(-1, byteidxcomp(a, 4))
let b = '.é.' " normal e with composing char
call assert_equal(0, b->byteidx(0))
@@ -1212,18 +1207,184 @@
call assert_equal(4, b->byteidx(2))
call assert_equal(5, b->byteidx(3))
call assert_equal(-1, b->byteidx(4))
- call assert_fails("call byteidx([], 0)", 'E730:')
+ " string with multiple composing characters
+ let str = '-ą́-ą́'
+ call assert_equal(0, byteidx(str, 0))
+ call assert_equal(1, byteidx(str, 1))
+ call assert_equal(6, byteidx(str, 2))
+ call assert_equal(7, byteidx(str, 3))
+ call assert_equal(12, byteidx(str, 4))
+ call assert_equal(-1, byteidx(str, 5))
+
+ " empty string
+ call assert_equal(0, byteidx('', 0))
+ call assert_equal(-1, byteidx('', 1))
+
+ " error cases
+ call assert_fails("call byteidx([], 0)", 'E730:')
+ call assert_fails("call byteidx('abc', [])", 'E745:')
+endfunc
+
+" Test for byteidxcomp() using a character index
+func Test_byteidxcomp()
+ let a = '.é.' " one char of two bytes
+ call assert_equal(0, byteidxcomp(a, 0))
+ call assert_equal(1, byteidxcomp(a, 1))
+ call assert_equal(3, byteidxcomp(a, 2))
+ call assert_equal(4, byteidxcomp(a, 3))
+ call assert_equal(-1, byteidxcomp(a, 4))
+
+ let b = '.é.' " normal e with composing char
call assert_equal(0, b->byteidxcomp(0))
call assert_equal(1, b->byteidxcomp(1))
call assert_equal(2, b->byteidxcomp(2))
call assert_equal(4, b->byteidxcomp(3))
call assert_equal(5, b->byteidxcomp(4))
call assert_equal(-1, b->byteidxcomp(5))
+
+ " string with multiple composing characters
+ let str = '-ą́-ą́'
+ call assert_equal(0, byteidxcomp(str, 0))
+ call assert_equal(1, byteidxcomp(str, 1))
+ call assert_equal(2, byteidxcomp(str, 2))
+ call assert_equal(4, byteidxcomp(str, 3))
+ call assert_equal(6, byteidxcomp(str, 4))
+ call assert_equal(7, byteidxcomp(str, 5))
+ call assert_equal(8, byteidxcomp(str, 6))
+ call assert_equal(10, byteidxcomp(str, 7))
+ call assert_equal(12, byteidxcomp(str, 8))
+ call assert_equal(-1, byteidxcomp(str, 9))
+
+ " empty string
+ call assert_equal(0, byteidxcomp('', 0))
+ call assert_equal(-1, byteidxcomp('', 1))
+
+ " error cases
call assert_fails("call byteidxcomp([], 0)", 'E730:')
+ call assert_fails("call byteidxcomp('abc', [])", 'E745:')
endfunc
-" Test for charidx()
+" Test for byteidx() using a UTF-16 index
+func Test_byteidx_from_utf16_index()
+ " string with single byte characters
+ let str = "abc"
+ for i in range(3)
+ call assert_equal(i, byteidx(str, i, v:true))
+ endfor
+ call assert_equal(3, byteidx(str, 3, v:true))
+ call assert_equal(-1, byteidx(str, 4, v:true))
+
+ " string with two byte characters
+ let str = "a©©b"
+ call assert_equal(0, byteidx(str, 0, v:true))
+ call assert_equal(1, byteidx(str, 1, v:true))
+ call assert_equal(3, byteidx(str, 2, v:true))
+ call assert_equal(5, byteidx(str, 3, v:true))
+ call assert_equal(6, byteidx(str, 4, v:true))
+ call assert_equal(-1, byteidx(str, 5, v:true))
+
+ " string with two byte characters
+ let str = "a😊😊b"
+ call assert_equal(0, byteidx(str, 0, v:true))
+ call assert_equal(1, byteidx(str, 1, v:true))
+ call assert_equal(1, byteidx(str, 2, v:true))
+ call assert_equal(5, byteidx(str, 3, v:true))
+ call assert_equal(5, byteidx(str, 4, v:true))
+ call assert_equal(9, byteidx(str, 5, v:true))
+ call assert_equal(10, byteidx(str, 6, v:true))
+ call assert_equal(-1, byteidx(str, 7, v:true))
+
+ " string with composing characters
+ let str = '-á-b́'
+ call assert_equal(0, byteidx(str, 0, v:true))
+ call assert_equal(1, byteidx(str, 1, v:true))
+ call assert_equal(4, byteidx(str, 2, v:true))
+ call assert_equal(5, byteidx(str, 3, v:true))
+ call assert_equal(8, byteidx(str, 4, v:true))
+ call assert_equal(-1, byteidx(str, 5, v:true))
+
+ " string with multiple composing characters
+ let str = '-ą́-ą́'
+ call assert_equal(0, byteidx(str, 0, v:true))
+ call assert_equal(1, byteidx(str, 1, v:true))
+ call assert_equal(6, byteidx(str, 2, v:true))
+ call assert_equal(7, byteidx(str, 3, v:true))
+ call assert_equal(12, byteidx(str, 4, v:true))
+ call assert_equal(-1, byteidx(str, 5, v:true))
+
+ " empty string
+ call assert_equal(0, byteidx('', 0, v:true))
+ call assert_equal(-1, byteidx('', 1, v:true))
+
+ " error cases
+ call assert_fails('call byteidx(str, 0, [])', 'E745:')
+endfunc
+
+" Test for byteidxcomp() using a UTF-16 index
+func Test_byteidxcomp_from_utf16_index()
+ " string with single byte characters
+ let str = "abc"
+ for i in range(3)
+ call assert_equal(i, byteidxcomp(str, i, v:true))
+ endfor
+ call assert_equal(3, byteidxcomp(str, 3, v:true))
+ call assert_equal(-1, byteidxcomp(str, 4, v:true))
+
+ " string with two byte characters
+ let str = "a©©b"
+ call assert_equal(0, byteidxcomp(str, 0, v:true))
+ call assert_equal(1, byteidxcomp(str, 1, v:true))
+ call assert_equal(3, byteidxcomp(str, 2, v:true))
+ call assert_equal(5, byteidxcomp(str, 3, v:true))
+ call assert_equal(6, byteidxcomp(str, 4, v:true))
+ call assert_equal(-1, byteidxcomp(str, 5, v:true))
+
+ " string with two byte characters
+ let str = "a😊😊b"
+ call assert_equal(0, byteidxcomp(str, 0, v:true))
+ call assert_equal(1, byteidxcomp(str, 1, v:true))
+ call assert_equal(1, byteidxcomp(str, 2, v:true))
+ call assert_equal(5, byteidxcomp(str, 3, v:true))
+ call assert_equal(5, byteidxcomp(str, 4, v:true))
+ call assert_equal(9, byteidxcomp(str, 5, v:true))
+ call assert_equal(10, byteidxcomp(str, 6, v:true))
+ call assert_equal(-1, byteidxcomp(str, 7, v:true))
+
+ " string with composing characters
+ let str = '-á-b́'
+ call assert_equal(0, byteidxcomp(str, 0, v:true))
+ call assert_equal(1, byteidxcomp(str, 1, v:true))
+ call assert_equal(2, byteidxcomp(str, 2, v:true))
+ call assert_equal(4, byteidxcomp(str, 3, v:true))
+ call assert_equal(5, byteidxcomp(str, 4, v:true))
+ call assert_equal(6, byteidxcomp(str, 5, v:true))
+ call assert_equal(8, byteidxcomp(str, 6, v:true))
+ call assert_equal(-1, byteidxcomp(str, 7, v:true))
+ call assert_fails('call byteidxcomp(str, 0, [])', 'E745:')
+
+ " string with multiple composing characters
+ let str = '-ą́-ą́'
+ call assert_equal(0, byteidxcomp(str, 0, v:true))
+ call assert_equal(1, byteidxcomp(str, 1, v:true))
+ call assert_equal(2, byteidxcomp(str, 2, v:true))
+ call assert_equal(4, byteidxcomp(str, 3, v:true))
+ call assert_equal(6, byteidxcomp(str, 4, v:true))
+ call assert_equal(7, byteidxcomp(str, 5, v:true))
+ call assert_equal(8, byteidxcomp(str, 6, v:true))
+ call assert_equal(10, byteidxcomp(str, 7, v:true))
+ call assert_equal(12, byteidxcomp(str, 8, v:true))
+ call assert_equal(-1, byteidxcomp(str, 9, v:true))
+
+ " empty string
+ call assert_equal(0, byteidxcomp('', 0, v:true))
+ call assert_equal(-1, byteidxcomp('', 1, v:true))
+
+ " error cases
+ call assert_fails('call byteidxcomp(str, 0, [])', 'E745:')
+endfunc
+
+" Test for charidx() using a byte index
func Test_charidx()
let a = 'xáb́y'
call assert_equal(0, charidx(a, 0))
@@ -1232,17 +1393,20 @@
call assert_equal(3, charidx(a, 7))
call assert_equal(-1, charidx(a, 8))
call assert_equal(-1, charidx(a, -1))
- call assert_equal(-1, charidx('', 0))
- call assert_equal(-1, charidx(test_null_string(), 0))
" count composing characters
- call assert_equal(0, charidx(a, 0, 1))
- call assert_equal(2, charidx(a, 2, 1))
- call assert_equal(3, charidx(a, 4, 1))
- call assert_equal(5, charidx(a, 7, 1))
- call assert_equal(-1, charidx(a, 8, 1))
+ call assert_equal(0, a->charidx(0, 1))
+ call assert_equal(2, a->charidx(2, 1))
+ call assert_equal(3, a->charidx(4, 1))
+ call assert_equal(5, a->charidx(7, 1))
+ call assert_equal(-1, a->charidx(8, 1))
+
+ " empty string
+ call assert_equal(-1, charidx('', 0))
call assert_equal(-1, charidx('', 0, 1))
+ " error cases
+ call assert_equal(-1, charidx(test_null_string(), 0))
call assert_fails('let x = charidx([], 1)', 'E1174:')
call assert_fails('let x = charidx("abc", [])', 'E1210:')
call assert_fails('let x = charidx("abc", 1, [])', 'E1212:')
@@ -1250,6 +1414,237 @@
call assert_fails('let x = charidx("abc", 1, 2)', 'E1212:')
endfunc
+" Test for charidx() using a UTF-16 index
+func Test_charidx_from_utf16_index()
+ " string with single byte characters
+ let str = "abc"
+ for i in range(3)
+ call assert_equal(i, charidx(str, i, v:false, v:true))
+ endfor
+ call assert_equal(-1, charidx(str, 3, v:false, v:true))
+
+ " string with two byte characters
+ let str = "a©©b"
+ call assert_equal(0, charidx(str, 0, v:false, v:true))
+ call assert_equal(1, charidx(str, 1, v:false, v:true))
+ call assert_equal(2, charidx(str, 2, v:false, v:true))
+ call assert_equal(3, charidx(str, 3, v:false, v:true))
+ call assert_equal(-1, charidx(str, 4, v:false, v:true))
+
+ " string with four byte characters
+ let str = "a😊😊b"
+ call assert_equal(0, charidx(str, 0, v:false, v:true))
+ call assert_equal(1, charidx(str, 1, v:false, v:true))
+ call assert_equal(1, charidx(str, 2, v:false, v:true))
+ call assert_equal(2, charidx(str, 3, v:false, v:true))
+ call assert_equal(2, charidx(str, 4, v:false, v:true))
+ call assert_equal(3, charidx(str, 5, v:false, v:true))
+ call assert_equal(-1, charidx(str, 6, v:false, v:true))
+
+ " string with composing characters
+ let str = '-á-b́'
+ for i in str->strcharlen()->range()
+ call assert_equal(i, charidx(str, i, v:false, v:true))
+ endfor
+ call assert_equal(-1, charidx(str, 4, v:false, v:true))
+ for i in str->strchars()->range()
+ call assert_equal(i, charidx(str, i, v:true, v:true))
+ endfor
+ call assert_equal(-1, charidx(str, 6, v:true, v:true))
+
+ " string with multiple composing characters
+ let str = '-ą́-ą́'
+ for i in str->strcharlen()->range()
+ call assert_equal(i, charidx(str, i, v:false, v:true))
+ endfor
+ call assert_equal(-1, charidx(str, 4, v:false, v:true))
+ for i in str->strchars()->range()
+ call assert_equal(i, charidx(str, i, v:true, v:true))
+ endfor
+ call assert_equal(-1, charidx(str, 8, v:true, v:true))
+
+ " empty string
+ call assert_equal(-1, charidx('', 0, v:false, v:true))
+ call assert_equal(-1, charidx('', 0, v:true, v:true))
+
+ " error cases
+ call assert_equal(-1, charidx('', 0, v:false, v:true))
+ call assert_equal(-1, charidx('', 0, v:true, v:true))
+ call assert_equal(-1, charidx(test_null_string(), 0, v:false, v:true))
+ call assert_fails('let x = charidx("abc", 1, v:false, [])', 'E1212:')
+ call assert_fails('let x = charidx("abc", 1, v:true, [])', 'E1212:')
+endfunc
+
+" Test for utf16idx() using a byte index
+func Test_utf16idx_from_byteidx()
+ " UTF-16 index of a string with single byte characters
+ let str = "abc"
+ for i in range(3)
+ call assert_equal(i, utf16idx(str, i))
+ endfor
+ call assert_equal(-1, utf16idx(str, 3))
+
+ " UTF-16 index of a string with two byte characters
+ let str = 'a©©b'
+ call assert_equal(0, str->utf16idx(0))
+ call assert_equal(1, str->utf16idx(1))
+ call assert_equal(1, str->utf16idx(2))
+ call assert_equal(2, str->utf16idx(3))
+ call assert_equal(2, str->utf16idx(4))
+ call assert_equal(3, str->utf16idx(5))
+ call assert_equal(-1, str->utf16idx(6))
+
+ " UTF-16 index of a string with four byte characters
+ let str = 'a😊😊b'
+ call assert_equal(0, utf16idx(str, 0))
+ call assert_equal(2, utf16idx(str, 1))
+ call assert_equal(2, utf16idx(str, 2))
+ call assert_equal(2, utf16idx(str, 3))
+ call assert_equal(2, utf16idx(str, 4))
+ call assert_equal(4, utf16idx(str, 5))
+ call assert_equal(4, utf16idx(str, 6))
+ call assert_equal(4, utf16idx(str, 7))
+ call assert_equal(4, utf16idx(str, 8))
+ call assert_equal(5, utf16idx(str, 9))
+ call assert_equal(-1, utf16idx(str, 10))
+
+ " UTF-16 index of a string with composing characters
+ let str = '-á-b́'
+ call assert_equal(0, utf16idx(str, 0))
+ call assert_equal(1, utf16idx(str, 1))
+ call assert_equal(1, utf16idx(str, 2))
+ call assert_equal(1, utf16idx(str, 3))
+ call assert_equal(2, utf16idx(str, 4))
+ call assert_equal(3, utf16idx(str, 5))
+ call assert_equal(3, utf16idx(str, 6))
+ call assert_equal(3, utf16idx(str, 7))
+ call assert_equal(-1, utf16idx(str, 8))
+ call assert_equal(0, utf16idx(str, 0, v:true))
+ call assert_equal(1, utf16idx(str, 1, v:true))
+ call assert_equal(2, utf16idx(str, 2, v:true))
+ call assert_equal(2, utf16idx(str, 3, v:true))
+ call assert_equal(3, utf16idx(str, 4, v:true))
+ call assert_equal(4, utf16idx(str, 5, v:true))
+ call assert_equal(5, utf16idx(str, 6, v:true))
+ call assert_equal(5, utf16idx(str, 7, v:true))
+ call assert_equal(-1, utf16idx(str, 8, v:true))
+
+ " string with multiple composing characters
+ let str = '-ą́-ą́'
+ call assert_equal(0, utf16idx(str, 0))
+ call assert_equal(1, utf16idx(str, 1))
+ call assert_equal(1, utf16idx(str, 2))
+ call assert_equal(1, utf16idx(str, 3))
+ call assert_equal(1, utf16idx(str, 4))
+ call assert_equal(1, utf16idx(str, 5))
+ call assert_equal(2, utf16idx(str, 6))
+ call assert_equal(3, utf16idx(str, 7))
+ call assert_equal(3, utf16idx(str, 8))
+ call assert_equal(3, utf16idx(str, 9))
+ call assert_equal(3, utf16idx(str, 10))
+ call assert_equal(3, utf16idx(str, 11))
+ call assert_equal(-1, utf16idx(str, 12))
+ call assert_equal(0, utf16idx(str, 0, v:true))
+ call assert_equal(1, utf16idx(str, 1, v:true))
+ call assert_equal(2, utf16idx(str, 2, v:true))
+ call assert_equal(2, utf16idx(str, 3, v:true))
+ call assert_equal(3, utf16idx(str, 4, v:true))
+ call assert_equal(3, utf16idx(str, 5, v:true))
+ call assert_equal(4, utf16idx(str, 6, v:true))
+ call assert_equal(5, utf16idx(str, 7, v:true))
+ call assert_equal(6, utf16idx(str, 8, v:true))
+ call assert_equal(6, utf16idx(str, 9, v:true))
+ call assert_equal(7, utf16idx(str, 10, v:true))
+ call assert_equal(7, utf16idx(str, 11, v:true))
+ call assert_equal(-1, utf16idx(str, 12, v:true))
+
+ " empty string
+ call assert_equal(-1, utf16idx('', 0))
+ call assert_equal(-1, utf16idx('', 0, v:true))
+
+ " error cases
+ call assert_equal(-1, utf16idx("", 0))
+ call assert_equal(-1, utf16idx("abc", -1))
+ call assert_equal(-1, utf16idx(test_null_string(), 0))
+ call assert_fails('let l = utf16idx([], 0)', 'E1174:')
+ call assert_fails('let l = utf16idx("ab", [])', 'E1210:')
+ call assert_fails('let l = utf16idx("ab", 0, [])', 'E1212:')
+endfunc
+
+" Test for utf16idx() using a character index
+func Test_utf16idx_from_charidx()
+ let str = "abc"
+ for i in str->strcharlen()->range()
+ call assert_equal(i, utf16idx(str, i, v:false, v:true))
+ endfor
+ call assert_equal(-1, utf16idx(str, 3, v:false, v:true))
+
+ " UTF-16 index of a string with two byte characters
+ let str = "a©©b"
+ for i in str->strcharlen()->range()
+ call assert_equal(i, utf16idx(str, i, v:false, v:true))
+ endfor
+ call assert_equal(-1, utf16idx(str, 4, v:false, v:true))
+
+ " UTF-16 index of a string with four byte characters
+ let str = "a😊😊b"
+ call assert_equal(0, utf16idx(str, 0, v:false, v:true))
+ call assert_equal(2, utf16idx(str, 1, v:false, v:true))
+ call assert_equal(4, utf16idx(str, 2, v:false, v:true))
+ call assert_equal(5, utf16idx(str, 3, v:false, v:true))
+ call assert_equal(-1, utf16idx(str, 4, v:false, v:true))
+
+ " UTF-16 index of a string with composing characters
+ let str = '-á-b́'
+ for i in str->strcharlen()->range()
+ call assert_equal(i, utf16idx(str, i, v:false, v:true))
+ endfor
+ call assert_equal(-1, utf16idx(str, 4, v:false, v:true))
+ for i in str->strchars()->range()
+ call assert_equal(i, utf16idx(str, i, v:true, v:true))
+ endfor
+ call assert_equal(-1, utf16idx(str, 6, v:true, v:true))
+
+ " string with multiple composing characters
+ let str = '-ą́-ą́'
+ for i in str->strcharlen()->range()
+ call assert_equal(i, utf16idx(str, i, v:false, v:true))
+ endfor
+ call assert_equal(-1, utf16idx(str, 4, v:false, v:true))
+ for i in str->strchars()->range()
+ call assert_equal(i, utf16idx(str, i, v:true, v:true))
+ endfor
+ call assert_equal(-1, utf16idx(str, 8, v:true, v:true))
+
+ " empty string
+ call assert_equal(-1, utf16idx('', 0, v:false, v:true))
+ call assert_equal(-1, utf16idx('', 0, v:true, v:true))
+
+ " error cases
+ call assert_equal(-1, utf16idx(test_null_string(), 0, v:true, v:true))
+ call assert_fails('let l = utf16idx("ab", 0, v:false, [])', 'E1212:')
+endfunc
+
+" Test for strutf16len()
+func Test_strutf16len()
+ call assert_equal(3, strutf16len('abc'))
+ call assert_equal(3, 'abc'->strutf16len(v:true))
+ call assert_equal(4, strutf16len('a©©b'))
+ call assert_equal(4, strutf16len('a©©b', v:true))
+ call assert_equal(6, strutf16len('a😊😊b'))
+ call assert_equal(6, strutf16len('a😊😊b', v:true))
+ call assert_equal(4, strutf16len('-á-b́'))
+ call assert_equal(6, strutf16len('-á-b́', v:true))
+ call assert_equal(4, strutf16len('-ą́-ą́'))
+ call assert_equal(8, strutf16len('-ą́-ą́', v:true))
+ call assert_equal(0, strutf16len(''))
+
+ " error cases
+ call assert_fails('let l = strutf16len([])', 'E1174:')
+ call assert_fails('let l = strutf16len("a", [])', 'E1212:')
+ call assert_equal(0, strutf16len(test_null_string()))
+endfunc
+
func Test_count()
let l = ['a', 'a', 'A', 'b']
call assert_equal(2, count(l, 'a'))
@@ -3074,5 +3469,4 @@
call StopVimInTerminal(buf)
endfunc
-
" vim: shiftwidth=2 sts=2 expandtab