patch 8.2.2605: Vim9: string index and slice does not include composing chars
Problem: Vim9: string index and slice does not include composing chars. Solution: Include composing characters. (issue #6563)
This commit is contained in:
		| @ -96,8 +96,8 @@ script and `:def` functions; details are below: | |||||||
| 	def CallMe(count: number, message: string): bool | 	def CallMe(count: number, message: string): bool | ||||||
| - Call functions without `:call`: > | - Call functions without `:call`: > | ||||||
| 	writefile(['done'], 'file.txt') | 	writefile(['done'], 'file.txt') | ||||||
| - You cannot use `:xit`, `:t`, `:k`, `:append`, `:change`, `:insert` or | - You cannot use `:xit`, `:t`, `:k`, `:append`, `:change`, `:insert`, `:open` | ||||||
|   curly-braces names. |   or curly-braces names. | ||||||
| - A range before a command must be prefixed with a colon: > | - A range before a command must be prefixed with a colon: > | ||||||
| 	:%s/this/that | 	:%s/this/that | ||||||
| - Unless mentioned specifically, the highest |scriptversion| is used. | - Unless mentioned specifically, the highest |scriptversion| is used. | ||||||
| @ -341,7 +341,8 @@ Functions can be called without `:call`: > | |||||||
| Using `:call` is still possible, but this is discouraged. | Using `:call` is still possible, but this is discouraged. | ||||||
|  |  | ||||||
| A method call without `eval` is possible, so long as the start is an | A method call without `eval` is possible, so long as the start is an | ||||||
| identifier or can't be an Ex command.  Examples: > | identifier or can't be an Ex command.  For a function either "(" or "->" must | ||||||
|  | be following, without a line break.  Examples: > | ||||||
| 	myList->add(123) | 	myList->add(123) | ||||||
| 	g:myList->add(123) | 	g:myList->add(123) | ||||||
| 	[1, 2, 3]->Process() | 	[1, 2, 3]->Process() | ||||||
| @ -696,8 +697,9 @@ for v:null.  When converting a boolean to a string "false" and "true" are | |||||||
| used, not "v:false" and "v:true" like in legacy script.  "v:none" is not | used, not "v:false" and "v:true" like in legacy script.  "v:none" is not | ||||||
| changed, it is only used in JSON and has no equivalent in other languages. | changed, it is only used in JSON and has no equivalent in other languages. | ||||||
|  |  | ||||||
| Indexing a string with [idx] or [idx : idx] uses character indexes instead of | Indexing a string with [idx] or taking a slice with [idx : idx] uses character | ||||||
| byte indexes. Example: > | indexes instead of byte indexes.  Composing characters are included. | ||||||
|  | Example: > | ||||||
| 	echo 'bár'[1] | 	echo 'bár'[1] | ||||||
| In legacy script this results in the character 0xc3 (an illegal byte), in Vim9 | In legacy script this results in the character 0xc3 (an illegal byte), in Vim9 | ||||||
| script this results in the string 'á'. | script this results in the string 'á'. | ||||||
| @ -845,6 +847,8 @@ THIS IS STILL UNDER DEVELOPMENT - ANYTHING CAN BREAK - ANYTHING CAN CHANGE | |||||||
| :enddef			End of a function defined with `:def`. It should be on | :enddef			End of a function defined with `:def`. It should be on | ||||||
| 			a line by its own. | 			a line by its own. | ||||||
|  |  | ||||||
|  | You may also find this wiki useful.  It was written by an early adoptor of | ||||||
|  | Vim9 script: https://github.com/lacygoill/wiki/blob/master/vim/vim9.md | ||||||
|  |  | ||||||
| If the script the function is defined in is Vim9 script, then script-local | If the script the function is defined in is Vim9 script, then script-local | ||||||
| variables can be accessed without the "s:" prefix.  They must be defined | variables can be accessed without the "s:" prefix.  They must be defined | ||||||
|  | |||||||
| @ -2367,6 +2367,35 @@ def Test_expr7_any_index_slice() | |||||||
|     assert_equal('abcd', g:teststring[: -3]) |     assert_equal('abcd', g:teststring[: -3]) | ||||||
|     assert_equal('', g:teststring[: -9]) |     assert_equal('', g:teststring[: -9]) | ||||||
|  |  | ||||||
|  |     # composing characters are included | ||||||
|  |     g:teststring = 'àéû' | ||||||
|  |     assert_equal('à', g:teststring[0]) | ||||||
|  |     assert_equal('é', g:teststring[1]) | ||||||
|  |     assert_equal('û', g:teststring[2]) | ||||||
|  |     assert_equal('', g:teststring[3]) | ||||||
|  |     assert_equal('', g:teststring[4]) | ||||||
|  |  | ||||||
|  |     assert_equal('û', g:teststring[-1]) | ||||||
|  |     assert_equal('é', g:teststring[-2]) | ||||||
|  |     assert_equal('à', g:teststring[-3]) | ||||||
|  |     assert_equal('', g:teststring[-4]) | ||||||
|  |     assert_equal('', g:teststring[-5]) | ||||||
|  |  | ||||||
|  |     assert_equal('à', g:teststring[0 : 0]) | ||||||
|  |     assert_equal('é', g:teststring[1 : 1]) | ||||||
|  |     assert_equal('àé', g:teststring[0 : 1]) | ||||||
|  |     assert_equal('àéû', g:teststring[0 : -1]) | ||||||
|  |     assert_equal('àé', g:teststring[0 : -2]) | ||||||
|  |     assert_equal('à', g:teststring[0 : -3]) | ||||||
|  |     assert_equal('', g:teststring[0 : -4]) | ||||||
|  |     assert_equal('', g:teststring[0 : -5]) | ||||||
|  |     assert_equal('àéû', g:teststring[ : ]) | ||||||
|  |     assert_equal('àéû', g:teststring[0 : ]) | ||||||
|  |     assert_equal('éû', g:teststring[1 : ]) | ||||||
|  |     assert_equal('û', g:teststring[2 : ]) | ||||||
|  |     assert_equal('', g:teststring[3 : ]) | ||||||
|  |     assert_equal('', g:teststring[4 : ]) | ||||||
|  |  | ||||||
|     # blob index cannot be out of range |     # blob index cannot be out of range | ||||||
|     g:testblob = 0z01ab |     g:testblob = 0z01ab | ||||||
|     assert_equal(0x01, g:testblob[0]) |     assert_equal(0x01, g:testblob[0]) | ||||||
|  | |||||||
| @ -750,6 +750,8 @@ static char *(features[]) = | |||||||
|  |  | ||||||
| static int included_patches[] = | static int included_patches[] = | ||||||
| {   /* Add new patch number below this line */ | {   /* Add new patch number below this line */ | ||||||
|  | /**/ | ||||||
|  |     2605, | ||||||
| /**/ | /**/ | ||||||
|     2604, |     2604, | ||||||
| /**/ | /**/ | ||||||
|  | |||||||
| @ -985,8 +985,9 @@ allocate_if_null(typval_T *tv) | |||||||
| } | } | ||||||
|  |  | ||||||
| /* | /* | ||||||
|  * Return the character "str[index]" where "index" is the character index.  If |  * Return the character "str[index]" where "index" is the character index, | ||||||
|  * "index" is out of range NULL is returned. |  * including composing characters. | ||||||
|  |  * If "index" is out of range NULL is returned. | ||||||
|  */ |  */ | ||||||
|     char_u * |     char_u * | ||||||
| char_from_string(char_u *str, varnumber_T index) | char_from_string(char_u *str, varnumber_T index) | ||||||
| @ -1005,7 +1006,7 @@ char_from_string(char_u *str, varnumber_T index) | |||||||
| 	int	clen = 0; | 	int	clen = 0; | ||||||
|  |  | ||||||
| 	for (nbyte = 0; nbyte < slen; ++clen) | 	for (nbyte = 0; nbyte < slen; ++clen) | ||||||
| 	    nbyte += MB_CPTR2LEN(str + nbyte); | 	    nbyte += mb_ptr2len(str + nbyte); | ||||||
| 	nchar = clen + index; | 	nchar = clen + index; | ||||||
| 	if (nchar < 0) | 	if (nchar < 0) | ||||||
| 	    // unlike list: index out of range results in empty string | 	    // unlike list: index out of range results in empty string | ||||||
| @ -1013,15 +1014,15 @@ char_from_string(char_u *str, varnumber_T index) | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     for (nbyte = 0; nchar > 0 && nbyte < slen; --nchar) |     for (nbyte = 0; nchar > 0 && nbyte < slen; --nchar) | ||||||
| 	nbyte += MB_CPTR2LEN(str + nbyte); | 	nbyte += mb_ptr2len(str + nbyte); | ||||||
|     if (nbyte >= slen) |     if (nbyte >= slen) | ||||||
| 	return NULL; | 	return NULL; | ||||||
|     return vim_strnsave(str + nbyte, MB_CPTR2LEN(str + nbyte)); |     return vim_strnsave(str + nbyte, mb_ptr2len(str + nbyte)); | ||||||
| } | } | ||||||
|  |  | ||||||
| /* | /* | ||||||
|  * Get the byte index for character index "idx" in string "str" with length |  * Get the byte index for character index "idx" in string "str" with length | ||||||
|  * "str_len". |  * "str_len".  Composing characters are included. | ||||||
|  * If going over the end return "str_len". |  * If going over the end return "str_len". | ||||||
|  * If "idx" is negative count from the end, -1 is the last character. |  * If "idx" is negative count from the end, -1 is the last character. | ||||||
|  * When going over the start return -1. |  * When going over the start return -1. | ||||||
| @ -1036,7 +1037,7 @@ char_idx2byte(char_u *str, size_t str_len, varnumber_T idx) | |||||||
|     { |     { | ||||||
| 	while (nchar > 0 && nbyte < str_len) | 	while (nchar > 0 && nbyte < str_len) | ||||||
| 	{ | 	{ | ||||||
| 	    nbyte += MB_CPTR2LEN(str + nbyte); | 	    nbyte += mb_ptr2len(str + nbyte); | ||||||
| 	    --nchar; | 	    --nchar; | ||||||
| 	} | 	} | ||||||
|     } |     } | ||||||
| @ -1056,7 +1057,8 @@ char_idx2byte(char_u *str, size_t str_len, varnumber_T idx) | |||||||
| } | } | ||||||
|  |  | ||||||
| /* | /* | ||||||
|  * Return the slice "str[first:last]" using character indexes. |  * Return the slice "str[first : last]" using character indexes.  Composing | ||||||
|  |  * characters are included. | ||||||
|  * "exclusive" is TRUE for slice(). |  * "exclusive" is TRUE for slice(). | ||||||
|  * Return NULL when the result is empty. |  * Return NULL when the result is empty. | ||||||
|  */ |  */ | ||||||
| @ -1079,7 +1081,7 @@ string_slice(char_u *str, varnumber_T first, varnumber_T last, int exclusive) | |||||||
| 	end_byte = char_idx2byte(str, slen, last); | 	end_byte = char_idx2byte(str, slen, last); | ||||||
| 	if (!exclusive && end_byte >= 0 && end_byte < (long)slen) | 	if (!exclusive && end_byte >= 0 && end_byte < (long)slen) | ||||||
| 	    // end index is inclusive | 	    // end index is inclusive | ||||||
| 	    end_byte += MB_CPTR2LEN(str + end_byte); | 	    end_byte += mb_ptr2len(str + end_byte); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if (start_byte >= (long)slen || end_byte <= start_byte) |     if (start_byte >= (long)slen || end_byte <= start_byte) | ||||||
| @ -3249,8 +3251,9 @@ call_def_function( | |||||||
| 			res = string_slice(tv->vval.v_string, n1, n2, FALSE); | 			res = string_slice(tv->vval.v_string, n1, n2, FALSE); | ||||||
| 		    else | 		    else | ||||||
| 			// Index: The resulting variable is a string of a | 			// Index: The resulting variable is a string of a | ||||||
| 			// single character.  If the index is too big or | 			// single character (including composing characters). | ||||||
| 			// negative the result is empty. | 			// If the index is too big or negative the result is | ||||||
|  | 			// empty. | ||||||
| 			res = char_from_string(tv->vval.v_string, n2); | 			res = char_from_string(tv->vval.v_string, n2); | ||||||
| 		    vim_free(tv->vval.v_string); | 		    vim_free(tv->vval.v_string); | ||||||
| 		    tv->vval.v_string = res; | 		    tv->vval.v_string = res; | ||||||
|  | |||||||
		Reference in New Issue
	
	Block a user