patch 8.2.2605: Vim9: string index and slice does not include composing chars

Problem:    Vim9: string index and slice does not include composing chars.
Solution:   Include composing characters. (issue #6563)
This commit is contained in:
Bram Moolenaar
2021-03-14 18:40:19 +01:00
parent 240309c9bf
commit 0289a093a4
4 changed files with 54 additions and 16 deletions

View File

@ -96,8 +96,8 @@ script and `:def` functions; details are below:
def CallMe(count: number, message: string): bool
- Call functions without `:call`: >
writefile(['done'], 'file.txt')
- You cannot use `:xit`, `:t`, `:k`, `:append`, `:change`, `:insert` or
curly-braces names.
- You cannot use `:xit`, `:t`, `:k`, `:append`, `:change`, `:insert`, `:open`
or curly-braces names.
- A range before a command must be prefixed with a colon: >
:%s/this/that
- Unless mentioned specifically, the highest |scriptversion| is used.
@ -341,7 +341,8 @@ Functions can be called without `:call`: >
Using `:call` is still possible, but this is discouraged.
A method call without `eval` is possible, so long as the start is an
identifier or can't be an Ex command. Examples: >
identifier or can't be an Ex command. For a function either "(" or "->" must
be following, without a line break. Examples: >
myList->add(123)
g:myList->add(123)
[1, 2, 3]->Process()
@ -696,8 +697,9 @@ for v:null. When converting a boolean to a string "false" and "true" are
used, not "v:false" and "v:true" like in legacy script. "v:none" is not
changed, it is only used in JSON and has no equivalent in other languages.
Indexing a string with [idx] or [idx : idx] uses character indexes instead of
byte indexes. Example: >
Indexing a string with [idx] or taking a slice with [idx : idx] uses character
indexes instead of byte indexes. Composing characters are included.
Example: >
echo 'bár'[1]
In legacy script this results in the character 0xc3 (an illegal byte), in Vim9
script this results in the string 'á'.
@ -845,6 +847,8 @@ THIS IS STILL UNDER DEVELOPMENT - ANYTHING CAN BREAK - ANYTHING CAN CHANGE
:enddef End of a function defined with `:def`. It should be on
a line by its own.
You may also find this wiki useful. It was written by an early adoptor of
Vim9 script: https://github.com/lacygoill/wiki/blob/master/vim/vim9.md
If the script the function is defined in is Vim9 script, then script-local
variables can be accessed without the "s:" prefix. They must be defined

View File

@ -2367,6 +2367,35 @@ def Test_expr7_any_index_slice()
assert_equal('abcd', g:teststring[: -3])
assert_equal('', g:teststring[: -9])
# composing characters are included
g:teststring = 'àéû'
assert_equal('à', g:teststring[0])
assert_equal('é', g:teststring[1])
assert_equal('û', g:teststring[2])
assert_equal('', g:teststring[3])
assert_equal('', g:teststring[4])
assert_equal('û', g:teststring[-1])
assert_equal('é', g:teststring[-2])
assert_equal('à', g:teststring[-3])
assert_equal('', g:teststring[-4])
assert_equal('', g:teststring[-5])
assert_equal('à', g:teststring[0 : 0])
assert_equal('é', g:teststring[1 : 1])
assert_equal('àé', g:teststring[0 : 1])
assert_equal('àéû', g:teststring[0 : -1])
assert_equal('àé', g:teststring[0 : -2])
assert_equal('à', g:teststring[0 : -3])
assert_equal('', g:teststring[0 : -4])
assert_equal('', g:teststring[0 : -5])
assert_equal('àéû', g:teststring[ : ])
assert_equal('àéû', g:teststring[0 : ])
assert_equal('éû', g:teststring[1 : ])
assert_equal('û', g:teststring[2 : ])
assert_equal('', g:teststring[3 : ])
assert_equal('', g:teststring[4 : ])
# blob index cannot be out of range
g:testblob = 0z01ab
assert_equal(0x01, g:testblob[0])

View File

@ -750,6 +750,8 @@ static char *(features[]) =
static int included_patches[] =
{ /* Add new patch number below this line */
/**/
2605,
/**/
2604,
/**/

View File

@ -985,8 +985,9 @@ allocate_if_null(typval_T *tv)
}
/*
* Return the character "str[index]" where "index" is the character index. If
* "index" is out of range NULL is returned.
* Return the character "str[index]" where "index" is the character index,
* including composing characters.
* If "index" is out of range NULL is returned.
*/
char_u *
char_from_string(char_u *str, varnumber_T index)
@ -1005,7 +1006,7 @@ char_from_string(char_u *str, varnumber_T index)
int clen = 0;
for (nbyte = 0; nbyte < slen; ++clen)
nbyte += MB_CPTR2LEN(str + nbyte);
nbyte += mb_ptr2len(str + nbyte);
nchar = clen + index;
if (nchar < 0)
// unlike list: index out of range results in empty string
@ -1013,15 +1014,15 @@ char_from_string(char_u *str, varnumber_T index)
}
for (nbyte = 0; nchar > 0 && nbyte < slen; --nchar)
nbyte += MB_CPTR2LEN(str + nbyte);
nbyte += mb_ptr2len(str + nbyte);
if (nbyte >= slen)
return NULL;
return vim_strnsave(str + nbyte, MB_CPTR2LEN(str + nbyte));
return vim_strnsave(str + nbyte, mb_ptr2len(str + nbyte));
}
/*
* Get the byte index for character index "idx" in string "str" with length
* "str_len".
* "str_len". Composing characters are included.
* If going over the end return "str_len".
* If "idx" is negative count from the end, -1 is the last character.
* When going over the start return -1.
@ -1036,7 +1037,7 @@ char_idx2byte(char_u *str, size_t str_len, varnumber_T idx)
{
while (nchar > 0 && nbyte < str_len)
{
nbyte += MB_CPTR2LEN(str + nbyte);
nbyte += mb_ptr2len(str + nbyte);
--nchar;
}
}
@ -1056,7 +1057,8 @@ char_idx2byte(char_u *str, size_t str_len, varnumber_T idx)
}
/*
* Return the slice "str[first:last]" using character indexes.
* Return the slice "str[first : last]" using character indexes. Composing
* characters are included.
* "exclusive" is TRUE for slice().
* Return NULL when the result is empty.
*/
@ -1079,7 +1081,7 @@ string_slice(char_u *str, varnumber_T first, varnumber_T last, int exclusive)
end_byte = char_idx2byte(str, slen, last);
if (!exclusive && end_byte >= 0 && end_byte < (long)slen)
// end index is inclusive
end_byte += MB_CPTR2LEN(str + end_byte);
end_byte += mb_ptr2len(str + end_byte);
}
if (start_byte >= (long)slen || end_byte <= start_byte)
@ -3249,8 +3251,9 @@ call_def_function(
res = string_slice(tv->vval.v_string, n1, n2, FALSE);
else
// Index: The resulting variable is a string of a
// single character. If the index is too big or
// negative the result is empty.
// single character (including composing characters).
// If the index is too big or negative the result is
// empty.
res = char_from_string(tv->vval.v_string, n2);
vim_free(tv->vval.v_string);
tv->vval.v_string = res;