From 17793ef23aae0bc94539390ccfe5e63b0ad39ff2 Mon Sep 17 00:00:00 2001 From: Bram Moolenaar Date: Mon, 28 Dec 2020 12:56:58 +0100 Subject: [PATCH] patch 8.2.2233: cannot convert a byte index into a character index Problem: Cannot convert a byte index into a character index. Solution: Add charidx(). (Yegappan Lakshmanan, closes #7561) --- runtime/doc/eval.txt | 27 +++++++++++++++++ runtime/doc/usr_41.txt | 1 + src/evalfunc.c | 54 ++++++++++++++++++++++++++++++++++ src/testdir/test_functions.vim | 25 ++++++++++++++++ src/version.c | 2 ++ 5 files changed, 109 insertions(+) diff --git a/runtime/doc/eval.txt b/runtime/doc/eval.txt index 4c6636c614..8b85dad9cf 100644 --- a/runtime/doc/eval.txt +++ b/runtime/doc/eval.txt @@ -2475,6 +2475,8 @@ ch_status({handle} [, {options}]) changenr() Number current change number char2nr({expr} [, {utf8}]) Number ASCII/UTF8 value of first char in {expr} charclass({string}) Number character class of {string} +charidx({string}, {idx} [, {countcc}]) + Number char index of byte {idx} in {string} chdir({dir}) String change current working directory cindent({lnum}) Number C indent for line {lnum} clearmatches([{win}]) none clear all matches @@ -3588,6 +3590,31 @@ charclass({string}) *charclass()* other specific Unicode class The class is used in patterns and word motions. + *charidx()* +charidx({string}, {idx} [, {countcc}]) + Return the character index of the byte at {idx} in {string}. + The index of the first character is zero. + If there are no multibyte characters the returned value is + equal to {idx}. + When {countcc} is omitted or zero, then composing characters + are not counted separately, their byte length is added to the + preceding base character. + When {countcc} is set to 1, then composing characters are + counted as separate characters. + Returns -1 if the arguments are invalid or if {idx} is greater + than the index of the last byte in {string}. An error is + given if the first argument is not a string, the second + argument is not a number or when the third argument is present + and is not zero or one. + See |byteidx()| and |byteidxcomp()| for getting the byte index + from the character index. + Examples: > + echo charidx('áb́ć', 3) returns 1 + echo charidx('áb́ć', 6, 1) returns 4 + echo charidx('áb́ć', 16) returns -1 +< + Can also be used as a |method|: > + GetName()->charidx(idx) chdir({dir}) *chdir()* Change the current working directory to {dir}. The scope of diff --git a/runtime/doc/usr_41.txt b/runtime/doc/usr_41.txt index a19d005631..a035038e47 100644 --- a/runtime/doc/usr_41.txt +++ b/runtime/doc/usr_41.txt @@ -625,6 +625,7 @@ String manipulation: *string-functions* iconv() convert text from one encoding to another byteidx() byte index of a character in a string byteidxcomp() like byteidx() but count composing characters + charidx() character index of a byte in a string repeat() repeat a string multiple times eval() evaluate a string expression execute() execute an Ex command and get the output diff --git a/src/evalfunc.c b/src/evalfunc.c index cf9c2c45ca..9b3b5beb66 100644 --- a/src/evalfunc.c +++ b/src/evalfunc.c @@ -47,6 +47,7 @@ static void f_ceil(typval_T *argvars, typval_T *rettv); #endif static void f_changenr(typval_T *argvars, typval_T *rettv); static void f_char2nr(typval_T *argvars, typval_T *rettv); +static void f_charidx(typval_T *argvars, typval_T *rettv); static void f_col(typval_T *argvars, typval_T *rettv); static void f_confirm(typval_T *argvars, typval_T *rettv); static void f_copy(typval_T *argvars, typval_T *rettv); @@ -789,6 +790,8 @@ static funcentry_T global_functions[] = ret_number, f_char2nr}, {"charclass", 1, 1, FEARG_1, NULL, ret_number, f_charclass}, + {"charidx", 2, 3, FEARG_1, NULL, + ret_number, f_charidx}, {"chdir", 1, 1, FEARG_1, NULL, ret_string, f_chdir}, {"cindent", 1, 1, FEARG_1, NULL, @@ -2420,6 +2423,57 @@ f_char2nr(typval_T *argvars, typval_T *rettv) rettv->vval.v_number = tv_get_string(&argvars[0])[0]; } +/* + * "charidx()" function + */ + static void +f_charidx(typval_T *argvars, typval_T *rettv) +{ + char_u *str; + varnumber_T idx; + int countcc = FALSE; + char_u *p; + int len; + int (*ptr2len)(char_u *); + + rettv->vval.v_number = -1; + + if (argvars[0].v_type != VAR_STRING || argvars[1].v_type != VAR_NUMBER + || (argvars[2].v_type != VAR_UNKNOWN + && argvars[2].v_type != VAR_NUMBER)) + { + emsg(_(e_invarg)); + return; + } + + str = tv_get_string_chk(&argvars[0]); + idx = tv_get_number_chk(&argvars[1], NULL); + if (str == NULL || idx < 0) + return; + + if (argvars[2].v_type != VAR_UNKNOWN) + countcc = (int)tv_get_bool(&argvars[2]); + if (countcc < 0 || countcc > 1) + { + semsg(_(e_using_number_as_bool_nr), countcc); + return; + } + + if (enc_utf8 && countcc) + ptr2len = utf_ptr2len; + else + ptr2len = mb_ptr2len; + + for (p = str, len = 0; p <= str + idx; len++) + { + if (*p == NUL) + return; + p += ptr2len(p); + } + + rettv->vval.v_number = len > 0 ? len - 1 : 0; +} + win_T * get_optional_window(typval_T *argvars, int idx) { diff --git a/src/testdir/test_functions.vim b/src/testdir/test_functions.vim index dd4429e709..89db161802 100644 --- a/src/testdir/test_functions.vim +++ b/src/testdir/test_functions.vim @@ -1132,6 +1132,31 @@ func Test_byteidx() call assert_fails("call byteidxcomp([], 0)", 'E730:') endfunc +" Test for charidx() +func Test_charidx() + let a = 'xáb́y' + call assert_equal(0, charidx(a, 0)) + call assert_equal(1, charidx(a, 3)) + call assert_equal(2, charidx(a, 4)) + call assert_equal(3, charidx(a, 7)) + call assert_equal(-1, charidx(a, 8)) + call assert_equal(-1, charidx('', 0)) + + " count composing characters + call assert_equal(0, charidx(a, 0, 1)) + call assert_equal(2, charidx(a, 2, 1)) + call assert_equal(3, charidx(a, 4, 1)) + call assert_equal(5, charidx(a, 7, 1)) + call assert_equal(-1, charidx(a, 8, 1)) + call assert_equal(-1, charidx('', 0, 1)) + + call assert_fails('let x = charidx([], 1)', 'E474:') + call assert_fails('let x = charidx("abc", [])', 'E474:') + call assert_fails('let x = charidx("abc", 1, [])', 'E474:') + call assert_fails('let x = charidx("abc", 1, -1)', 'E1023:') + call assert_fails('let x = charidx("abc", 1, 2)', 'E1023:') +endfunc + func Test_count() let l = ['a', 'a', 'A', 'b'] call assert_equal(2, count(l, 'a')) diff --git a/src/version.c b/src/version.c index 35f1defe31..71bc47d043 100644 --- a/src/version.c +++ b/src/version.c @@ -750,6 +750,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ +/**/ + 2233, /**/ 2232, /**/