patch 8.2.1461: Vim9: string indexes are counted in bytes
Problem: Vim9: string indexes are counted in bytes. Solution: Use character indexes. (closes #6574)
This commit is contained in:
		| @ -1131,19 +1131,25 @@ Evaluation is always from left to right. | |||||||
|  |  | ||||||
| expr8[expr1]		item of String or |List|	*expr-[]* *E111* | expr8[expr1]		item of String or |List|	*expr-[]* *E111* | ||||||
| 							*E909* *subscript* | 							*E909* *subscript* | ||||||
|  | In legacy Vim script: | ||||||
| If expr8 is a Number or String this results in a String that contains the | If expr8 is a Number or String this results in a String that contains the | ||||||
| expr1'th single byte from expr8.  expr8 is used as a String, expr1 as a | expr1'th single byte from expr8.  expr8 is used as a String (a number is | ||||||
| Number.  This doesn't recognize multi-byte encodings, see `byteidx()` for | automatically converted to a String), expr1 as a Number.  This doesn't | ||||||
| an alternative, or use `split()` to turn the string into a list of characters. | recognize multi-byte encodings, see `byteidx()` for an alternative, or use | ||||||
|  | `split()` to turn the string into a list of characters.  Example, to get the | ||||||
| Index zero gives the first byte.  This is like it works in C.  Careful: | byte under the cursor: > | ||||||
| text column numbers start with one!  Example, to get the byte under the |  | ||||||
| cursor: > |  | ||||||
| 	:let c = getline(".")[col(".") - 1] | 	:let c = getline(".")[col(".") - 1] | ||||||
|  |  | ||||||
|  | In Vim9 script: | ||||||
|  | If expr8 is a String this results in a String that contains the expr1'th | ||||||
|  | single character from expr8.  To use byte indexes use |strpart()|. | ||||||
|  |  | ||||||
|  | Index zero gives the first byte or character.  Careful: text column numbers | ||||||
|  | start with one! | ||||||
|  |  | ||||||
| If the length of the String is less than the index, the result is an empty | If the length of the String is less than the index, the result is an empty | ||||||
| String.  A negative index always results in an empty string (reason: backward | String.  A negative index always results in an empty string (reason: backward | ||||||
| compatibility).  Use [-1:] to get the last byte. | compatibility).  Use [-1:] to get the last byte or character. | ||||||
|  |  | ||||||
| If expr8 is a |List| then it results the item at index expr1.  See |list-index| | If expr8 is a |List| then it results the item at index expr1.  See |list-index| | ||||||
| for possible index values.  If the index is out of range this results in an | for possible index values.  If the index is out of range this results in an | ||||||
| @ -1157,10 +1163,16 @@ error. | |||||||
|  |  | ||||||
| expr8[expr1a : expr1b]	substring or sublist		*expr-[:]* | expr8[expr1a : expr1b]	substring or sublist		*expr-[:]* | ||||||
|  |  | ||||||
| If expr8 is a Number or String this results in the substring with the bytes | If expr8 is a String this results in the substring with the bytes from expr1a | ||||||
| from expr1a to and including expr1b.  expr8 is used as a String, expr1a and | to and including expr1b.  expr8 is used as a String, expr1a and expr1b are | ||||||
| expr1b are used as a Number.  This doesn't recognize multi-byte encodings, see | used as a Number. | ||||||
| |byteidx()| for computing the indexes. |  | ||||||
|  | In legacy Vim script the indexes are byte indexes.  This doesn't recognize | ||||||
|  | multi-byte encodings, see |byteidx()| for computing the indexes.  If expr8 is | ||||||
|  | a Number it is first converted to a String. | ||||||
|  |  | ||||||
|  | In Vim9 script the indexes are character indexes.  To use byte indexes use | ||||||
|  | |strpart()|. | ||||||
|  |  | ||||||
| If expr1a is omitted zero is used.  If expr1b is omitted the length of the | If expr1a is omitted zero is used.  If expr1b is omitted the length of the | ||||||
| string minus one is used. | string minus one is used. | ||||||
|  | |||||||
							
								
								
									
										28
									
								
								src/eval.c
									
									
									
									
									
								
							
							
						
						
									
										28
									
								
								src/eval.c
									
									
									
									
									
								
							| @ -3718,6 +3718,10 @@ eval_index( | |||||||
| 		    else | 		    else | ||||||
| 			s = vim_strnsave(s + n1, n2 - n1 + 1); | 			s = vim_strnsave(s + n1, n2 - n1 + 1); | ||||||
| 		} | 		} | ||||||
|  | 		else if (in_vim9script()) | ||||||
|  | 		{ | ||||||
|  | 		    s = char_from_string(s, n1); | ||||||
|  | 		} | ||||||
| 		else | 		else | ||||||
| 		{ | 		{ | ||||||
| 		    // The resulting variable is a string of a single | 		    // The resulting variable is a string of a single | ||||||
| @ -5284,6 +5288,30 @@ eval_isdictc(int c) | |||||||
|     return ASCII_ISALNUM(c) || c == '_'; |     return ASCII_ISALNUM(c) || c == '_'; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Return the character "str[index]" where "index" is the character index.  If | ||||||
|  |  * "index" is out of range NULL is returned. | ||||||
|  |  */ | ||||||
|  |     char_u * | ||||||
|  | char_from_string(char_u *str, varnumber_T index) | ||||||
|  | { | ||||||
|  |     size_t	    nbyte = 0; | ||||||
|  |     varnumber_T	    nchar = index; | ||||||
|  |     size_t	    slen; | ||||||
|  |  | ||||||
|  |     if (str == NULL || index < 0) | ||||||
|  | 	return NULL; | ||||||
|  |     slen = STRLEN(str); | ||||||
|  |     while (nchar > 0 && nbyte < slen) | ||||||
|  |     { | ||||||
|  | 	nbyte += MB_CPTR2LEN(str + nbyte); | ||||||
|  | 	--nchar; | ||||||
|  |     } | ||||||
|  |     if (nbyte >= slen) | ||||||
|  | 	return NULL; | ||||||
|  |     return vim_strnsave(str + nbyte, MB_CPTR2LEN(str + nbyte)); | ||||||
|  | } | ||||||
|  |  | ||||||
| /* | /* | ||||||
|  * Handle: |  * Handle: | ||||||
|  * - expr[expr], expr[expr:expr] subscript |  * - expr[expr], expr[expr:expr] subscript | ||||||
|  | |||||||
| @ -59,6 +59,7 @@ char_u *find_name_end(char_u *arg, char_u **expr_start, char_u **expr_end, int f | |||||||
| int eval_isnamec(int c); | int eval_isnamec(int c); | ||||||
| int eval_isnamec1(int c); | int eval_isnamec1(int c); | ||||||
| int eval_isdictc(int c); | int eval_isdictc(int c); | ||||||
|  | char_u *char_from_string(char_u *str, varnumber_T index); | ||||||
| int handle_subscript(char_u **arg, typval_T *rettv, evalarg_T *evalarg, int verbose); | int handle_subscript(char_u **arg, typval_T *rettv, evalarg_T *evalarg, int verbose); | ||||||
| int item_copy(typval_T *from, typval_T *to, int deep, int copyID); | int item_copy(typval_T *from, typval_T *to, int deep, int copyID); | ||||||
| void echo_one(typval_T *rettv, int with_space, int *atstart, int *needclr); | void echo_one(typval_T *rettv, int with_space, int *atstart, int *needclr); | ||||||
|  | |||||||
| @ -2075,12 +2075,28 @@ def Test_expr7_trailing() | |||||||
| enddef | enddef | ||||||
|  |  | ||||||
| def Test_expr7_subscript() | def Test_expr7_subscript() | ||||||
|   let text = 'abcdef' |   let lines =<< trim END | ||||||
|   assert_equal('', text[-1]) |     let text = 'abcdef' | ||||||
|   assert_equal('a', text[0]) |     assert_equal('', text[-1]) | ||||||
|   assert_equal('e', text[4]) |     assert_equal('a', text[0]) | ||||||
|   assert_equal('f', text[5]) |     assert_equal('e', text[4]) | ||||||
|   assert_equal('', text[6]) |     assert_equal('f', text[5]) | ||||||
|  |     assert_equal('', text[6]) | ||||||
|  |  | ||||||
|  |     text = 'ábçdëf' | ||||||
|  |     assert_equal('', text[-999]) | ||||||
|  |     assert_equal('', text[-1]) | ||||||
|  |     assert_equal('á', text[0]) | ||||||
|  |     assert_equal('b', text[1]) | ||||||
|  |     assert_equal('ç', text[2]) | ||||||
|  |     assert_equal('d', text[3]) | ||||||
|  |     assert_equal('ë', text[4]) | ||||||
|  |     assert_equal('f', text[5]) | ||||||
|  |     assert_equal('', text[6]) | ||||||
|  |     assert_equal('', text[999]) | ||||||
|  |   END | ||||||
|  |   CheckDefSuccess(lines) | ||||||
|  |   CheckScriptSuccess(['vim9script'] + lines) | ||||||
| enddef | enddef | ||||||
|  |  | ||||||
| def Test_expr7_subscript_linebreak() | def Test_expr7_subscript_linebreak() | ||||||
|  | |||||||
| @ -754,6 +754,8 @@ static char *(features[]) = | |||||||
|  |  | ||||||
| static int included_patches[] = | static int included_patches[] = | ||||||
| {   /* Add new patch number below this line */ | {   /* Add new patch number below this line */ | ||||||
|  | /**/ | ||||||
|  |     1461, | ||||||
| /**/ | /**/ | ||||||
|     1460, |     1460, | ||||||
| /**/ | /**/ | ||||||
|  | |||||||
| @ -2233,7 +2233,6 @@ call_def_function( | |||||||
|  |  | ||||||
| 	    case ISN_STRINDEX: | 	    case ISN_STRINDEX: | ||||||
| 		{ | 		{ | ||||||
| 		    char_u	*s; |  | ||||||
| 		    varnumber_T	n; | 		    varnumber_T	n; | ||||||
| 		    char_u	*res; | 		    char_u	*res; | ||||||
|  |  | ||||||
| @ -2245,7 +2244,6 @@ call_def_function( | |||||||
| 			emsg(_(e_stringreq)); | 			emsg(_(e_stringreq)); | ||||||
| 			goto on_error; | 			goto on_error; | ||||||
| 		    } | 		    } | ||||||
| 		    s = tv->vval.v_string; |  | ||||||
|  |  | ||||||
| 		    tv = STACK_TV_BOT(-1); | 		    tv = STACK_TV_BOT(-1); | ||||||
| 		    if (tv->v_type != VAR_NUMBER) | 		    if (tv->v_type != VAR_NUMBER) | ||||||
| @ -2259,12 +2257,9 @@ call_def_function( | |||||||
| 		    // The resulting variable is a string of a single | 		    // The resulting variable is a string of a single | ||||||
| 		    // character.  If the index is too big or negative the | 		    // character.  If the index is too big or negative the | ||||||
| 		    // result is empty. | 		    // result is empty. | ||||||
| 		    if (n < 0 || n >= (varnumber_T)STRLEN(s)) |  | ||||||
| 			res = NULL; |  | ||||||
| 		    else |  | ||||||
| 			res = vim_strnsave(s + n, 1); |  | ||||||
| 		    --ectx.ec_stack.ga_len; | 		    --ectx.ec_stack.ga_len; | ||||||
| 		    tv = STACK_TV_BOT(-1); | 		    tv = STACK_TV_BOT(-1); | ||||||
|  | 		    res = char_from_string(tv->vval.v_string, n); | ||||||
| 		    vim_free(tv->vval.v_string); | 		    vim_free(tv->vval.v_string); | ||||||
| 		    tv->vval.v_string = res; | 		    tv->vval.v_string = res; | ||||||
| 		} | 		} | ||||||
|  | |||||||
		Reference in New Issue
	
	Block a user