patch 9.0.1485: no functions for converting from/to UTF-16 index
Problem:    no functions for converting from/to UTF-16 index.
Solution:   Add UTF-16 flag to existing funtions and add strutf16len() and
            utf16idx(). (Yegappan Lakshmanan, closes #12216)
			
			
This commit is contained in:
		
				
					committed by
					
						 Bram Moolenaar
						Bram Moolenaar
					
				
			
			
				
	
			
			
			
						parent
						
							e1b4822137
						
					
				
				
					commit
					67672ef097
				
			| @ -81,8 +81,10 @@ bufnr([{buf} [, {create}]])	Number	Number of the buffer {buf} | |||||||
| bufwinid({buf})			Number	window ID of buffer {buf} | bufwinid({buf})			Number	window ID of buffer {buf} | ||||||
| bufwinnr({buf})			Number	window number of buffer {buf} | bufwinnr({buf})			Number	window number of buffer {buf} | ||||||
| byte2line({byte})		Number	line number at byte count {byte} | byte2line({byte})		Number	line number at byte count {byte} | ||||||
| byteidx({expr}, {nr})		Number	byte index of {nr}'th char in {expr} | byteidx({expr}, {nr} [, {utf16}]) | ||||||
| byteidxcomp({expr}, {nr})	Number	byte index of {nr}'th char in {expr} | 				Number	byte index of {nr}'th char in {expr} | ||||||
|  | byteidxcomp({expr}, {nr} [, {utf16}]) | ||||||
|  | 				Number	byte index of {nr}'th char in {expr} | ||||||
| call({func}, {arglist} [, {dict}]) | call({func}, {arglist} [, {dict}]) | ||||||
| 				any	call {func} with arguments {arglist} | 				any	call {func} with arguments {arglist} | ||||||
| ceil({expr})			Float	round {expr} up | ceil({expr})			Float	round {expr} up | ||||||
| @ -117,7 +119,7 @@ changenr()			Number	current change number | |||||||
| char2nr({expr} [, {utf8}])	Number	ASCII/UTF-8 value of first char in {expr} | char2nr({expr} [, {utf8}])	Number	ASCII/UTF-8 value of first char in {expr} | ||||||
| charclass({string})		Number	character class of {string} | charclass({string})		Number	character class of {string} | ||||||
| charcol({expr} [, {winid}])	Number	column number of cursor or mark | charcol({expr} [, {winid}])	Number	column number of cursor or mark | ||||||
| charidx({string}, {idx} [, {countcc}]) | charidx({string}, {idx} [, {countcc} [, {utf16}]]) | ||||||
| 				Number	char index of byte {idx} in {string} | 				Number	char index of byte {idx} in {string} | ||||||
| chdir({dir})			String	change current working directory | chdir({dir})			String	change current working directory | ||||||
| cindent({lnum})			Number	C indent for line {lnum} | cindent({lnum})			Number	C indent for line {lnum} | ||||||
| @ -604,6 +606,8 @@ strptime({format}, {timestring}) | |||||||
| strridx({haystack}, {needle} [, {start}]) | strridx({haystack}, {needle} [, {start}]) | ||||||
| 				Number	last index of {needle} in {haystack} | 				Number	last index of {needle} in {haystack} | ||||||
| strtrans({expr})		String	translate string to make it printable | strtrans({expr})		String	translate string to make it printable | ||||||
|  | strutf16len({string} [, {countcc}]) | ||||||
|  | 				Number	number of UTF-16 code units in {string} | ||||||
| strwidth({expr})		Number	display cell length of the String {expr} | strwidth({expr})		Number	display cell length of the String {expr} | ||||||
| submatch({nr} [, {list}])	String or List | submatch({nr} [, {list}])	String or List | ||||||
| 					specific match in ":s" or substitute() | 					specific match in ":s" or substitute() | ||||||
| @ -704,6 +708,8 @@ undofile({name})		String	undo file name for {name} | |||||||
| undotree()			List	undo file tree | undotree()			List	undo file tree | ||||||
| uniq({list} [, {func} [, {dict}]]) | uniq({list} [, {func} [, {dict}]]) | ||||||
| 				List	remove adjacent duplicates from a list | 				List	remove adjacent duplicates from a list | ||||||
|  | utf16idx({string}, {idx} [, {countcc} [, {charidx}]]) | ||||||
|  | 				Number	UTF-16 index of byte {idx} in {string} | ||||||
| values({dict})			List	values in {dict} | values({dict})			List	values in {dict} | ||||||
| virtcol({expr} [, {list}])	Number or List | virtcol({expr} [, {list}])	Number or List | ||||||
| 					screen column of cursor or mark | 					screen column of cursor or mark | ||||||
| @ -1363,7 +1369,7 @@ byte2line({byte})					*byte2line()* | |||||||
| <		{not available when compiled without the |+byte_offset| | <		{not available when compiled without the |+byte_offset| | ||||||
| 		feature} | 		feature} | ||||||
|  |  | ||||||
| byteidx({expr}, {nr})					*byteidx()* | byteidx({expr}, {nr} [, {utf16}])			*byteidx()* | ||||||
| 		Return byte index of the {nr}'th character in the String | 		Return byte index of the {nr}'th character in the String | ||||||
| 		{expr}.  Use zero for the first character, it then returns | 		{expr}.  Use zero for the first character, it then returns | ||||||
| 		zero. | 		zero. | ||||||
| @ -1373,6 +1379,13 @@ byteidx({expr}, {nr})					*byteidx()* | |||||||
| 		length is added to the preceding base character.  See | 		length is added to the preceding base character.  See | ||||||
| 		|byteidxcomp()| below for counting composing characters | 		|byteidxcomp()| below for counting composing characters | ||||||
| 		separately. | 		separately. | ||||||
|  | 		When {utf16} is present and TRUE, {nr} is used as the UTF-16 | ||||||
|  | 		index in the String {expr} instead of as the character index. | ||||||
|  | 		The UTF-16 index is the index in the string when it is encoded | ||||||
|  | 		with 16-bit words.  If the specified UTF-16 index is in the | ||||||
|  | 		middle of a character (e.g. in a 4-byte character), then the | ||||||
|  | 		byte index of the first byte in the character is returned. | ||||||
|  | 		Refer to |string-offset-encoding| for more information. | ||||||
| 		Example : > | 		Example : > | ||||||
| 			echo matchstr(str, ".", byteidx(str, 3)) | 			echo matchstr(str, ".", byteidx(str, 3)) | ||||||
| <		will display the fourth character.  Another way to do the | <		will display the fourth character.  Another way to do the | ||||||
| @ -1384,11 +1397,17 @@ byteidx({expr}, {nr})					*byteidx()* | |||||||
| 		If there are less than {nr} characters -1 is returned. | 		If there are less than {nr} characters -1 is returned. | ||||||
| 		If there are exactly {nr} characters the length of the string | 		If there are exactly {nr} characters the length of the string | ||||||
| 		in bytes is returned. | 		in bytes is returned. | ||||||
|  | 		See |charidx()| and |utf16idx()| for getting the character and | ||||||
|  | 		UTF-16 index respectively from the byte index. | ||||||
|  | 		Examples: > | ||||||
|  | 			echo byteidx('a😊😊', 2)	returns 5 | ||||||
|  | 			echo byteidx('a😊😊', 2, 1)	returns 1 | ||||||
|  | 			echo byteidx('a😊😊', 3, 1)	returns 5 | ||||||
|  | < | ||||||
| 		Can also be used as a |method|: > | 		Can also be used as a |method|: > | ||||||
| 			GetName()->byteidx(idx) | 			GetName()->byteidx(idx) | ||||||
|  |  | ||||||
| byteidxcomp({expr}, {nr})					*byteidxcomp()* | byteidxcomp({expr}, {nr} [, {utf16}])			*byteidxcomp()* | ||||||
| 		Like byteidx(), except that a composing character is counted | 		Like byteidx(), except that a composing character is counted | ||||||
| 		as a separate character.  Example: > | 		as a separate character.  Example: > | ||||||
| 			let s = 'e' .. nr2char(0x301) | 			let s = 'e' .. nr2char(0x301) | ||||||
| @ -1493,27 +1512,36 @@ charcol({expr} [, {winid}])				*charcol()* | |||||||
| 			GetPos()->col() | 			GetPos()->col() | ||||||
| < | < | ||||||
| 							*charidx()* | 							*charidx()* | ||||||
| charidx({string}, {idx} [, {countcc}]) | charidx({string}, {idx} [, {countcc} [, {utf16}]]) | ||||||
| 		Return the character index of the byte at {idx} in {string}. | 		Return the character index of the byte at {idx} in {string}. | ||||||
| 		The index of the first character is zero. | 		The index of the first character is zero. | ||||||
| 		If there are no multibyte characters the returned value is | 		If there are no multibyte characters the returned value is | ||||||
| 		equal to {idx}. | 		equal to {idx}. | ||||||
|  |  | ||||||
| 		When {countcc} is omitted or |FALSE|, then composing characters | 		When {countcc} is omitted or |FALSE|, then composing characters | ||||||
| 		are not counted separately, their byte length is | 		are not counted separately, their byte length is added to the | ||||||
| 		added to the preceding base character. | 		preceding base character. | ||||||
| 		When {countcc} is |TRUE|, then composing characters are | 		When {countcc} is |TRUE|, then composing characters are | ||||||
| 		counted as separate characters. | 		counted as separate characters. | ||||||
|  |  | ||||||
|  | 		When {utf16} is present and TRUE, {idx} is used as the UTF-16 | ||||||
|  | 		index in the String {expr} instead of as the byte index. | ||||||
|  |  | ||||||
| 		Returns -1 if the arguments are invalid or if {idx} is greater | 		Returns -1 if the arguments are invalid or if {idx} is greater | ||||||
| 		than the index of the last byte in {string}.  An error is | 		than the index of the last byte in {string}.  An error is | ||||||
| 		given if the first argument is not a string, the second | 		given if the first argument is not a string, the second | ||||||
| 		argument is not a number or when the third argument is present | 		argument is not a number or when the third argument is present | ||||||
| 		and is not zero or one. | 		and is not zero or one. | ||||||
|  |  | ||||||
| 		See |byteidx()| and |byteidxcomp()| for getting the byte index | 		See |byteidx()| and |byteidxcomp()| for getting the byte index | ||||||
| 		from the character index. | 		from the character index and |utf16idx()| for getting the | ||||||
|  | 		UTF-16 index from the character index. | ||||||
|  | 		Refer to |string-offset-encoding| for more information. | ||||||
| 		Examples: > | 		Examples: > | ||||||
| 			echo charidx('áb́ć', 3)		returns 1 | 			echo charidx('áb́ć', 3)		returns 1 | ||||||
| 			echo charidx('áb́ć', 6, 1)	returns 4 | 			echo charidx('áb́ć', 6, 1)	returns 4 | ||||||
| 			echo charidx('áb́ć', 16)		returns -1 | 			echo charidx('áb́ć', 16)		returns -1 | ||||||
|  | 			echo charidx('a😊😊', 4, 0, 1)	returns 2 | ||||||
| < | < | ||||||
| 		Can also be used as a |method|: > | 		Can also be used as a |method|: > | ||||||
| 			GetName()->charidx(idx) | 			GetName()->charidx(idx) | ||||||
| @ -9244,6 +9272,28 @@ strtrans({string})					*strtrans()* | |||||||
| 		Can also be used as a |method|: > | 		Can also be used as a |method|: > | ||||||
| 			GetString()->strtrans() | 			GetString()->strtrans() | ||||||
|  |  | ||||||
|  | strutf16len({string} [, {countcc}])			*strutf16len()* | ||||||
|  | 		The result is a Number, which is the number of UTF-16 code | ||||||
|  | 		units in String {string} (after converting it to UTF-16). | ||||||
|  |  | ||||||
|  | 		When {countcc} is TRUE, composing characters are counted | ||||||
|  | 		separately. | ||||||
|  | 		When {countcc} is omitted or FALSE, composing characters are | ||||||
|  | 		ignored. | ||||||
|  |  | ||||||
|  | 		Returns zero on error. | ||||||
|  |  | ||||||
|  | 		Also see |strlen()| and |strcharlen()|. | ||||||
|  | 		Examples: > | ||||||
|  | 		    echo strutf16len('a')		returns 1 | ||||||
|  | 		    echo strutf16len('©')		returns 1 | ||||||
|  | 		    echo strutf16len('😊')		returns 2 | ||||||
|  | 		    echo strutf16len('ą́')		returns 1 | ||||||
|  | 		    echo strutf16len('ą́', v:true)	returns 3 | ||||||
|  |  | ||||||
|  | 		Can also be used as a |method|: > | ||||||
|  | 			GetText()->strutf16len() | ||||||
|  | < | ||||||
| strwidth({string})					*strwidth()* | strwidth({string})					*strwidth()* | ||||||
| 		The result is a Number, which is the number of display cells | 		The result is a Number, which is the number of display cells | ||||||
| 		String {string} occupies.  A Tab character is counted as one | 		String {string} occupies.  A Tab character is counted as one | ||||||
| @ -10059,6 +10109,34 @@ uniq({list} [, {func} [, {dict}]])			*uniq()* *E882* | |||||||
|  |  | ||||||
| 		Can also be used as a |method|: > | 		Can also be used as a |method|: > | ||||||
| 			mylist->uniq() | 			mylist->uniq() | ||||||
|  | < | ||||||
|  | 							*utf16idx()* | ||||||
|  | utf16idx({string}, {idx} [, {countcc} [, {charidx}]]) | ||||||
|  | 		Same as |charidx()| but returns the UTF-16 index of the byte | ||||||
|  | 		at {idx} in {string} (after converting it to UTF-16). | ||||||
|  |  | ||||||
|  | 		When {charidx} is present and TRUE, {idx} is used as the | ||||||
|  | 		character index in the String {string} instead of as the byte | ||||||
|  | 		index. | ||||||
|  | 		An {idx} in the middle of a UTF-8 sequence is rounded upwards | ||||||
|  | 		to the end of that sequence. | ||||||
|  |  | ||||||
|  | 		See |byteidx()| and |byteidxcomp()| for getting the byte index | ||||||
|  | 		from the UTF-16 index and |charidx()| for getting the | ||||||
|  | 		character index from the UTF-16 index. | ||||||
|  | 		Refer to |string-offset-encoding| for more information. | ||||||
|  | 		Examples: > | ||||||
|  | 			echo utf16idx('a😊😊', 3)	returns 2 | ||||||
|  | 			echo utf16idx('a😊😊', 7)	returns 4 | ||||||
|  | 			echo utf16idx('a😊😊', 1, 0, 1)	returns 2 | ||||||
|  | 			echo utf16idx('a😊😊', 2, 0, 1)	returns 4 | ||||||
|  | 			echo utf16idx('aą́c', 6)		returns 2 | ||||||
|  | 			echo utf16idx('aą́c', 6, 1)	returns 4 | ||||||
|  | 			echo utf16idx('a😊😊', 9)	returns -1 | ||||||
|  | < | ||||||
|  | 		Can also be used as a |method|: > | ||||||
|  | 			GetName()->utf16idx(idx) | ||||||
|  |  | ||||||
|  |  | ||||||
| values({dict})						*values()* | values({dict})						*values()* | ||||||
| 		Return a |List| with all the values of {dict}.  The |List| is | 		Return a |List| with all the values of {dict}.  The |List| is | ||||||
|  | |||||||
| @ -1580,6 +1580,33 @@ Examples: > | |||||||
| 	echo $"The square root of {{9}} is {sqrt(9)}" | 	echo $"The square root of {{9}} is {sqrt(9)}" | ||||||
| <	The square root of {9} is 3.0 ~ | <	The square root of {9} is 3.0 ~ | ||||||
|  |  | ||||||
|  | 						*string-offset-encoding* | ||||||
|  | A string consists of multiple characters.  How the characters are stored | ||||||
|  | depends on 'encoding'.  Most common is UTF-8, which uses one byte for ASCII | ||||||
|  | characters, two bytes for other latin characters and more bytes for other | ||||||
|  | characters. | ||||||
|  |  | ||||||
|  | A string offset can count characters or bytes.  Other programs may use | ||||||
|  | UTF-16 encoding (16-bit words) and an offset of UTF-16 words.  Some functions | ||||||
|  | use byte offsets, usually for UTF-8 encoding.  Other functions use character | ||||||
|  | offsets, in which case the encoding doesn't matter. | ||||||
|  |  | ||||||
|  | The different offsets for the string "a©😊" are below: | ||||||
|  |  | ||||||
|  |   UTF-8 offsets: | ||||||
|  |       [0]: 61, [1]: C2, [2]: A9, [3]: F0, [4]: 9F, [5]: 98, [6]: 8A | ||||||
|  |   UTF-16 offsets: | ||||||
|  |       [0]: 0061, [1]: 00A9, [2]: D83D, [3]: DE0A | ||||||
|  |   UTF-32 (character) offsets: | ||||||
|  |       [0]: 00000061, [1]: 000000A9, [2]: 0001F60A | ||||||
|  |  | ||||||
|  | You can use the "g8" and "ga" commands on a character to see the | ||||||
|  | decimal/hex/octal values. | ||||||
|  |  | ||||||
|  | The functions |byteidx()|, |utf16idx()| and |charidx()| can be used to convert | ||||||
|  | between these indices.  The functions |strlen()|, |strutf16len()| and | ||||||
|  | |strcharlen()| return the number of bytes, UTF-16 code units and characters in | ||||||
|  | a string respectively. | ||||||
|  |  | ||||||
| option						*expr-option* *E112* *E113* | option						*expr-option* *E112* *E113* | ||||||
| ------ | ------ | ||||||
|  | |||||||
| @ -754,6 +754,7 @@ String manipulation:					*string-functions* | |||||||
| 	strlen()		length of a string in bytes | 	strlen()		length of a string in bytes | ||||||
| 	strcharlen()		length of a string in characters | 	strcharlen()		length of a string in characters | ||||||
| 	strchars()		number of characters in a string | 	strchars()		number of characters in a string | ||||||
|  | 	strutf16len()		number of UTF-16 code units in a string | ||||||
| 	strwidth()		size of string when displayed | 	strwidth()		size of string when displayed | ||||||
| 	strdisplaywidth()	size of string when displayed, deals with tabs | 	strdisplaywidth()	size of string when displayed, deals with tabs | ||||||
| 	setcellwidths()		set character cell width overrides | 	setcellwidths()		set character cell width overrides | ||||||
| @ -771,6 +772,7 @@ String manipulation:					*string-functions* | |||||||
| 	byteidx()		byte index of a character in a string | 	byteidx()		byte index of a character in a string | ||||||
| 	byteidxcomp()		like byteidx() but count composing characters | 	byteidxcomp()		like byteidx() but count composing characters | ||||||
| 	charidx()		character index of a byte in a string | 	charidx()		character index of a byte in a string | ||||||
|  | 	utf16idx()		UTF-16 index of a byte in a string | ||||||
| 	repeat()		repeat a string multiple times | 	repeat()		repeat a string multiple times | ||||||
| 	eval()			evaluate a string expression | 	eval()			evaluate a string expression | ||||||
| 	execute()		execute an Ex command and get the output | 	execute()		execute an Ex command and get the output | ||||||
|  | |||||||
| @ -1751,9 +1751,9 @@ static funcentry_T global_functions[] = | |||||||
| 			ret_number,	    f_bufwinnr}, | 			ret_number,	    f_bufwinnr}, | ||||||
|     {"byte2line",	1, 1, FEARG_1,	    arg1_number, |     {"byte2line",	1, 1, FEARG_1,	    arg1_number, | ||||||
| 			ret_number,	    f_byte2line}, | 			ret_number,	    f_byte2line}, | ||||||
|     {"byteidx",		2, 2, FEARG_1,	    arg2_string_number, |     {"byteidx",		2, 3, FEARG_1,	    arg3_string_number_bool, | ||||||
| 			ret_number,	    f_byteidx}, | 			ret_number,	    f_byteidx}, | ||||||
|     {"byteidxcomp",	2, 2, FEARG_1,	    arg2_string_number, |     {"byteidxcomp",	2, 3, FEARG_1,	    arg3_string_number_bool, | ||||||
| 			ret_number,	    f_byteidxcomp}, | 			ret_number,	    f_byteidxcomp}, | ||||||
|     {"call",		2, 3, FEARG_1,	    arg3_any_list_dict, |     {"call",		2, 3, FEARG_1,	    arg3_any_list_dict, | ||||||
| 			ret_any,	    f_call}, | 			ret_any,	    f_call}, | ||||||
| @ -1803,7 +1803,7 @@ static funcentry_T global_functions[] = | |||||||
| 			ret_number,	    f_charclass}, | 			ret_number,	    f_charclass}, | ||||||
|     {"charcol",		1, 2, FEARG_1,	    arg2_string_or_list_number, |     {"charcol",		1, 2, FEARG_1,	    arg2_string_or_list_number, | ||||||
| 			ret_number,	    f_charcol}, | 			ret_number,	    f_charcol}, | ||||||
|     {"charidx",		2, 3, FEARG_1,	    arg3_string_number_bool, |     {"charidx",		2, 4, FEARG_1,	    arg3_string_number_bool, | ||||||
| 			ret_number,	    f_charidx}, | 			ret_number,	    f_charidx}, | ||||||
|     {"chdir",		1, 1, FEARG_1,	    arg1_string, |     {"chdir",		1, 1, FEARG_1,	    arg1_string, | ||||||
| 			ret_string,	    f_chdir}, | 			ret_string,	    f_chdir}, | ||||||
| @ -2601,6 +2601,8 @@ static funcentry_T global_functions[] = | |||||||
| 			ret_number,	    f_strridx}, | 			ret_number,	    f_strridx}, | ||||||
|     {"strtrans",	1, 1, FEARG_1,	    arg1_string, |     {"strtrans",	1, 1, FEARG_1,	    arg1_string, | ||||||
| 			ret_string,	    f_strtrans}, | 			ret_string,	    f_strtrans}, | ||||||
|  |     {"strutf16len",	1, 2, FEARG_1,	    arg2_string_bool, | ||||||
|  | 			ret_number,	    f_strutf16len}, | ||||||
|     {"strwidth",	1, 1, FEARG_1,	    arg1_string, |     {"strwidth",	1, 1, FEARG_1,	    arg1_string, | ||||||
| 			ret_number,	    f_strwidth}, | 			ret_number,	    f_strwidth}, | ||||||
|     {"submatch",	1, 2, FEARG_1,	    arg2_number_bool, |     {"submatch",	1, 2, FEARG_1,	    arg2_number_bool, | ||||||
| @ -2785,6 +2787,8 @@ static funcentry_T global_functions[] = | |||||||
| 			ret_dict_any,	    f_undotree}, | 			ret_dict_any,	    f_undotree}, | ||||||
|     {"uniq",		1, 3, FEARG_1,	    arg13_sortuniq, |     {"uniq",		1, 3, FEARG_1,	    arg13_sortuniq, | ||||||
| 			ret_first_arg,	    f_uniq}, | 			ret_first_arg,	    f_uniq}, | ||||||
|  |     {"utf16idx",	2, 4, FEARG_1,	    arg3_string_number_bool, | ||||||
|  | 			ret_number,	    f_utf16idx}, | ||||||
|     {"values",		1, 1, FEARG_1,	    arg1_dict_any, |     {"values",		1, 1, FEARG_1,	    arg1_dict_any, | ||||||
| 			ret_list_member,    f_values}, | 			ret_list_member,    f_values}, | ||||||
|     {"virtcol",		1, 2, FEARG_1,	    arg2_string_or_list_bool, |     {"virtcol",		1, 2, FEARG_1,	    arg2_string_or_list_bool, | ||||||
|  | |||||||
| @ -36,12 +36,14 @@ void f_string(typval_T *argvars, typval_T *rettv); | |||||||
| void f_strlen(typval_T *argvars, typval_T *rettv); | void f_strlen(typval_T *argvars, typval_T *rettv); | ||||||
| void f_strcharlen(typval_T *argvars, typval_T *rettv); | void f_strcharlen(typval_T *argvars, typval_T *rettv); | ||||||
| void f_strchars(typval_T *argvars, typval_T *rettv); | void f_strchars(typval_T *argvars, typval_T *rettv); | ||||||
|  | void f_strutf16len(typval_T *argvars, typval_T *rettv); | ||||||
| void f_strdisplaywidth(typval_T *argvars, typval_T *rettv); | void f_strdisplaywidth(typval_T *argvars, typval_T *rettv); | ||||||
| void f_strwidth(typval_T *argvars, typval_T *rettv); | void f_strwidth(typval_T *argvars, typval_T *rettv); | ||||||
| void f_strcharpart(typval_T *argvars, typval_T *rettv); | void f_strcharpart(typval_T *argvars, typval_T *rettv); | ||||||
| void f_strpart(typval_T *argvars, typval_T *rettv); | void f_strpart(typval_T *argvars, typval_T *rettv); | ||||||
| void f_strridx(typval_T *argvars, typval_T *rettv); | void f_strridx(typval_T *argvars, typval_T *rettv); | ||||||
| void f_strtrans(typval_T *argvars, typval_T *rettv); | void f_strtrans(typval_T *argvars, typval_T *rettv); | ||||||
|  | void f_utf16idx(typval_T *argvars, typval_T *rettv); | ||||||
| void f_tolower(typval_T *argvars, typval_T *rettv); | void f_tolower(typval_T *argvars, typval_T *rettv); | ||||||
| void f_toupper(typval_T *argvars, typval_T *rettv); | void f_toupper(typval_T *argvars, typval_T *rettv); | ||||||
| void f_tr(typval_T *argvars, typval_T *rettv); | void f_tr(typval_T *argvars, typval_T *rettv); | ||||||
|  | |||||||
							
								
								
									
										166
									
								
								src/strings.c
									
									
									
									
									
								
							
							
						
						
									
										166
									
								
								src/strings.c
									
									
									
									
									
								
							| @ -1006,10 +1006,6 @@ string_reduce( | |||||||
|     static void |     static void | ||||||
| byteidx(typval_T *argvars, typval_T *rettv, int comp UNUSED) | byteidx(typval_T *argvars, typval_T *rettv, int comp UNUSED) | ||||||
| { | { | ||||||
|     char_u	*t; |  | ||||||
|     char_u	*str; |  | ||||||
|     varnumber_T	idx; |  | ||||||
|  |  | ||||||
|     rettv->vval.v_number = -1; |     rettv->vval.v_number = -1; | ||||||
|  |  | ||||||
|     if (in_vim9script() |     if (in_vim9script() | ||||||
| @ -1017,20 +1013,42 @@ byteidx(typval_T *argvars, typval_T *rettv, int comp UNUSED) | |||||||
| 		|| check_for_number_arg(argvars, 1) == FAIL)) | 		|| check_for_number_arg(argvars, 1) == FAIL)) | ||||||
| 	return; | 	return; | ||||||
|  |  | ||||||
|     str = tv_get_string_chk(&argvars[0]); |     char_u *str = tv_get_string_chk(&argvars[0]); | ||||||
|     idx = tv_get_number_chk(&argvars[1], NULL); |     varnumber_T	idx = tv_get_number_chk(&argvars[1], NULL); | ||||||
|     if (str == NULL || idx < 0) |     if (str == NULL || idx < 0) | ||||||
| 	return; | 	return; | ||||||
|  |  | ||||||
|     t = str; |     varnumber_T	utf16idx = FALSE; | ||||||
|  |     if (argvars[2].v_type != VAR_UNKNOWN) | ||||||
|  |     { | ||||||
|  | 	utf16idx = tv_get_bool(&argvars[2]); | ||||||
|  | 	if (utf16idx < 0 || utf16idx > 1) | ||||||
|  | 	{ | ||||||
|  | 	    semsg(_(e_using_number_as_bool_nr), utf16idx); | ||||||
|  | 	    return; | ||||||
|  | 	} | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     int (*ptr2len)(char_u *); | ||||||
|  |     if (enc_utf8 && comp) | ||||||
|  | 	ptr2len = utf_ptr2len; | ||||||
|  |     else | ||||||
|  | 	ptr2len = mb_ptr2len; | ||||||
|  |  | ||||||
|  |     char_u *t = str; | ||||||
|     for ( ; idx > 0; idx--) |     for ( ; idx > 0; idx--) | ||||||
|     { |     { | ||||||
| 	if (*t == NUL)		// EOL reached | 	if (*t == NUL)		// EOL reached | ||||||
| 	    return; | 	    return; | ||||||
| 	if (enc_utf8 && comp) | 	if (utf16idx) | ||||||
| 	    t += utf_ptr2len(t); | 	{ | ||||||
| 	else | 	    int clen = ptr2len(t); | ||||||
| 	    t += (*mb_ptr2len)(t); | 	    int c = (clen > 1) ? utf_ptr2char(t) : *t; | ||||||
|  | 	    if (c > 0xFFFF) | ||||||
|  | 		idx--; | ||||||
|  | 	} | ||||||
|  | 	if (idx > 0) | ||||||
|  | 	    t += ptr2len(t); | ||||||
|     } |     } | ||||||
|     rettv->vval.v_number = (varnumber_T)(t - str); |     rettv->vval.v_number = (varnumber_T)(t - str); | ||||||
| } | } | ||||||
| @ -1059,42 +1077,49 @@ f_byteidxcomp(typval_T *argvars, typval_T *rettv) | |||||||
|     void |     void | ||||||
| f_charidx(typval_T *argvars, typval_T *rettv) | f_charidx(typval_T *argvars, typval_T *rettv) | ||||||
| { | { | ||||||
|     char_u	*str; |  | ||||||
|     varnumber_T	idx; |  | ||||||
|     varnumber_T	countcc = FALSE; |  | ||||||
|     char_u	*p; |  | ||||||
|     int		len; |  | ||||||
|     int		(*ptr2len)(char_u *); |  | ||||||
|  |  | ||||||
|     rettv->vval.v_number = -1; |     rettv->vval.v_number = -1; | ||||||
|  |  | ||||||
|     if ((check_for_string_arg(argvars, 0) == FAIL |     if (check_for_string_arg(argvars, 0) == FAIL | ||||||
| 		|| check_for_number_arg(argvars, 1) == FAIL | 		|| check_for_number_arg(argvars, 1) == FAIL | ||||||
| 		|| check_for_opt_bool_arg(argvars, 2) == FAIL)) | 		|| check_for_opt_bool_arg(argvars, 2) == FAIL | ||||||
|  | 		|| (argvars[2].v_type != VAR_UNKNOWN | ||||||
|  | 		    && check_for_opt_bool_arg(argvars, 3) == FAIL)) | ||||||
| 	return; | 	return; | ||||||
|  |  | ||||||
|     str = tv_get_string_chk(&argvars[0]); |     char_u *str = tv_get_string_chk(&argvars[0]); | ||||||
|     idx = tv_get_number_chk(&argvars[1], NULL); |     varnumber_T	idx = tv_get_number_chk(&argvars[1], NULL); | ||||||
|     if (str == NULL || idx < 0) |     if (str == NULL || idx < 0) | ||||||
| 	return; | 	return; | ||||||
|  |  | ||||||
|  |     varnumber_T	countcc = FALSE; | ||||||
|  |     varnumber_T	utf16idx = FALSE; | ||||||
|     if (argvars[2].v_type != VAR_UNKNOWN) |     if (argvars[2].v_type != VAR_UNKNOWN) | ||||||
| 	countcc = tv_get_bool(&argvars[2]); |  | ||||||
|     if (countcc < 0 || countcc > 1) |  | ||||||
|     { |     { | ||||||
| 	semsg(_(e_using_number_as_bool_nr), countcc); | 	countcc = tv_get_bool(&argvars[2]); | ||||||
| 	return; | 	if (argvars[3].v_type != VAR_UNKNOWN) | ||||||
|  | 	    utf16idx = tv_get_bool(&argvars[3]); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     int (*ptr2len)(char_u *); | ||||||
|     if (enc_utf8 && countcc) |     if (enc_utf8 && countcc) | ||||||
| 	ptr2len = utf_ptr2len; | 	ptr2len = utf_ptr2len; | ||||||
|     else |     else | ||||||
| 	ptr2len = mb_ptr2len; | 	ptr2len = mb_ptr2len; | ||||||
|  |  | ||||||
|     for (p = str, len = 0; p <= str + idx; len++) |     char_u	*p; | ||||||
|  |     int		len; | ||||||
|  |     for (p = str, len = 0; utf16idx ? idx >= 0 : p <= str + idx; len++) | ||||||
|     { |     { | ||||||
| 	if (*p == NUL) | 	if (*p == NUL) | ||||||
| 	    return; | 	    return; | ||||||
|  | 	if (utf16idx) | ||||||
|  | 	{ | ||||||
|  | 	    idx--; | ||||||
|  | 	    int clen = ptr2len(p); | ||||||
|  | 	    int c = (clen > 1) ? utf_ptr2char(p) : *p; | ||||||
|  | 	    if (c > 0xFFFF) | ||||||
|  | 		idx--; | ||||||
|  | 	} | ||||||
| 	p += ptr2len(p); | 	p += ptr2len(p); | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @ -1358,6 +1383,38 @@ f_strchars(typval_T *argvars, typval_T *rettv) | |||||||
| 	strchar_common(argvars, rettv, skipcc); | 	strchar_common(argvars, rettv, skipcc); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * "strutf16len()" function | ||||||
|  |  */ | ||||||
|  |     void | ||||||
|  | f_strutf16len(typval_T *argvars, typval_T *rettv) | ||||||
|  | { | ||||||
|  |     rettv->vval.v_number = -1; | ||||||
|  |  | ||||||
|  |     if (check_for_string_arg(argvars, 0) == FAIL | ||||||
|  | 	    || check_for_opt_bool_arg(argvars, 1) == FAIL) | ||||||
|  | 	return; | ||||||
|  |  | ||||||
|  |     varnumber_T countcc = FALSE; | ||||||
|  |     if (argvars[1].v_type != VAR_UNKNOWN) | ||||||
|  | 	countcc = tv_get_bool(&argvars[1]); | ||||||
|  |  | ||||||
|  |     char_u		*s = tv_get_string(&argvars[0]); | ||||||
|  |     varnumber_T		len = 0; | ||||||
|  |     int			(*func_mb_ptr2char_adv)(char_u **pp); | ||||||
|  |     int			ch; | ||||||
|  |  | ||||||
|  |     func_mb_ptr2char_adv = countcc ? mb_cptr2char_adv : mb_ptr2char_adv; | ||||||
|  |     while (*s != NUL) | ||||||
|  |     { | ||||||
|  | 	ch = func_mb_ptr2char_adv(&s); | ||||||
|  | 	if (ch > 0xFFFF) | ||||||
|  | 	    ++len; | ||||||
|  | 	++len; | ||||||
|  |     } | ||||||
|  |     rettv->vval.v_number = len; | ||||||
|  | } | ||||||
|  |  | ||||||
| /* | /* | ||||||
|  * "strdisplaywidth()" function |  * "strdisplaywidth()" function | ||||||
|  */ |  */ | ||||||
| @ -1619,6 +1676,61 @@ f_strtrans(typval_T *argvars, typval_T *rettv) | |||||||
|     rettv->vval.v_string = transstr(tv_get_string(&argvars[0])); |     rettv->vval.v_string = transstr(tv_get_string(&argvars[0])); | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * | ||||||
|  |  * "utf16idx()" function | ||||||
|  |  */ | ||||||
|  |     void | ||||||
|  | f_utf16idx(typval_T *argvars, typval_T *rettv) | ||||||
|  | { | ||||||
|  |     rettv->vval.v_number = -1; | ||||||
|  |  | ||||||
|  |     if (check_for_string_arg(argvars, 0) == FAIL | ||||||
|  | 	    || check_for_opt_number_arg(argvars, 1) == FAIL | ||||||
|  | 	    || check_for_opt_bool_arg(argvars, 2) == FAIL | ||||||
|  | 	    || (argvars[2].v_type != VAR_UNKNOWN | ||||||
|  | 		    && check_for_opt_bool_arg(argvars, 3) == FAIL)) | ||||||
|  | 	    return; | ||||||
|  |  | ||||||
|  |     char_u *str = tv_get_string_chk(&argvars[0]); | ||||||
|  |     varnumber_T	idx = tv_get_number_chk(&argvars[1], NULL); | ||||||
|  |     if (str == NULL || idx < 0) | ||||||
|  | 	return; | ||||||
|  |  | ||||||
|  |     varnumber_T	countcc = FALSE; | ||||||
|  |     varnumber_T	charidx = FALSE; | ||||||
|  |     if (argvars[2].v_type != VAR_UNKNOWN) | ||||||
|  |     { | ||||||
|  | 	countcc = tv_get_bool(&argvars[2]); | ||||||
|  | 	if (argvars[3].v_type != VAR_UNKNOWN) | ||||||
|  | 	    charidx = tv_get_bool(&argvars[3]); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     int (*ptr2len)(char_u *); | ||||||
|  |     if (enc_utf8 && countcc) | ||||||
|  | 	ptr2len = utf_ptr2len; | ||||||
|  |     else | ||||||
|  | 	ptr2len = mb_ptr2len; | ||||||
|  |  | ||||||
|  |     char_u	*p; | ||||||
|  |     int		len; | ||||||
|  |     for (p = str, len = 0; charidx ? idx >= 0 : p <= str + idx; len++) | ||||||
|  |     { | ||||||
|  | 	if (*p == NUL) | ||||||
|  | 	    return; | ||||||
|  | 	int clen = ptr2len(p); | ||||||
|  | 	int c = (clen > 1) ? utf_ptr2char(p) : *p; | ||||||
|  | 	if (c > 0xFFFF) | ||||||
|  | 	    len++; | ||||||
|  | 	p += ptr2len(p); | ||||||
|  | 	if (charidx) | ||||||
|  | 	    idx--; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     rettv->vval.v_number = len > 0 ? len - 1 : 0; | ||||||
|  | } | ||||||
|  |  | ||||||
| /* | /* | ||||||
|  * "tolower(string)" function |  * "tolower(string)" function | ||||||
|  */ |  */ | ||||||
|  | |||||||
| @ -1192,19 +1192,14 @@ func Test_byte2line_line2byte() | |||||||
|   bw! |   bw! | ||||||
| endfunc | endfunc | ||||||
|  |  | ||||||
| " Test for byteidx() and byteidxcomp() functions | " Test for byteidx() using a character index | ||||||
| func Test_byteidx() | func Test_byteidx() | ||||||
|   let a = '.é.' " one char of two bytes |   let a = '.é.' " one char of two bytes | ||||||
|   call assert_equal(0, byteidx(a, 0)) |   call assert_equal(0, byteidx(a, 0)) | ||||||
|   call assert_equal(0, byteidxcomp(a, 0)) |  | ||||||
|   call assert_equal(1, byteidx(a, 1)) |   call assert_equal(1, byteidx(a, 1)) | ||||||
|   call assert_equal(1, byteidxcomp(a, 1)) |  | ||||||
|   call assert_equal(3, byteidx(a, 2)) |   call assert_equal(3, byteidx(a, 2)) | ||||||
|   call assert_equal(3, byteidxcomp(a, 2)) |  | ||||||
|   call assert_equal(4, byteidx(a, 3)) |   call assert_equal(4, byteidx(a, 3)) | ||||||
|   call assert_equal(4, byteidxcomp(a, 3)) |  | ||||||
|   call assert_equal(-1, byteidx(a, 4)) |   call assert_equal(-1, byteidx(a, 4)) | ||||||
|   call assert_equal(-1, byteidxcomp(a, 4)) |  | ||||||
|  |  | ||||||
|   let b = '.é.' " normal e with composing char |   let b = '.é.' " normal e with composing char | ||||||
|   call assert_equal(0, b->byteidx(0)) |   call assert_equal(0, b->byteidx(0)) | ||||||
| @ -1212,18 +1207,184 @@ func Test_byteidx() | |||||||
|   call assert_equal(4, b->byteidx(2)) |   call assert_equal(4, b->byteidx(2)) | ||||||
|   call assert_equal(5, b->byteidx(3)) |   call assert_equal(5, b->byteidx(3)) | ||||||
|   call assert_equal(-1, b->byteidx(4)) |   call assert_equal(-1, b->byteidx(4)) | ||||||
|   call assert_fails("call byteidx([], 0)", 'E730:') |  | ||||||
|  |  | ||||||
|  |   " string with multiple composing characters | ||||||
|  |   let str = '-ą́-ą́' | ||||||
|  |   call assert_equal(0, byteidx(str, 0)) | ||||||
|  |   call assert_equal(1, byteidx(str, 1)) | ||||||
|  |   call assert_equal(6, byteidx(str, 2)) | ||||||
|  |   call assert_equal(7, byteidx(str, 3)) | ||||||
|  |   call assert_equal(12, byteidx(str, 4)) | ||||||
|  |   call assert_equal(-1, byteidx(str, 5)) | ||||||
|  |  | ||||||
|  |   " empty string | ||||||
|  |   call assert_equal(0, byteidx('', 0)) | ||||||
|  |   call assert_equal(-1, byteidx('', 1)) | ||||||
|  |  | ||||||
|  |   " error cases | ||||||
|  |   call assert_fails("call byteidx([], 0)", 'E730:') | ||||||
|  |   call assert_fails("call byteidx('abc', [])", 'E745:') | ||||||
|  | endfunc | ||||||
|  |  | ||||||
|  | " Test for byteidxcomp() using a character index | ||||||
|  | func Test_byteidxcomp() | ||||||
|  |   let a = '.é.' " one char of two bytes | ||||||
|  |   call assert_equal(0, byteidxcomp(a, 0)) | ||||||
|  |   call assert_equal(1, byteidxcomp(a, 1)) | ||||||
|  |   call assert_equal(3, byteidxcomp(a, 2)) | ||||||
|  |   call assert_equal(4, byteidxcomp(a, 3)) | ||||||
|  |   call assert_equal(-1, byteidxcomp(a, 4)) | ||||||
|  |  | ||||||
|  |   let b = '.é.' " normal e with composing char | ||||||
|   call assert_equal(0, b->byteidxcomp(0)) |   call assert_equal(0, b->byteidxcomp(0)) | ||||||
|   call assert_equal(1, b->byteidxcomp(1)) |   call assert_equal(1, b->byteidxcomp(1)) | ||||||
|   call assert_equal(2, b->byteidxcomp(2)) |   call assert_equal(2, b->byteidxcomp(2)) | ||||||
|   call assert_equal(4, b->byteidxcomp(3)) |   call assert_equal(4, b->byteidxcomp(3)) | ||||||
|   call assert_equal(5, b->byteidxcomp(4)) |   call assert_equal(5, b->byteidxcomp(4)) | ||||||
|   call assert_equal(-1, b->byteidxcomp(5)) |   call assert_equal(-1, b->byteidxcomp(5)) | ||||||
|  |  | ||||||
|  |   " string with multiple composing characters | ||||||
|  |   let str = '-ą́-ą́' | ||||||
|  |   call assert_equal(0, byteidxcomp(str, 0)) | ||||||
|  |   call assert_equal(1, byteidxcomp(str, 1)) | ||||||
|  |   call assert_equal(2, byteidxcomp(str, 2)) | ||||||
|  |   call assert_equal(4, byteidxcomp(str, 3)) | ||||||
|  |   call assert_equal(6, byteidxcomp(str, 4)) | ||||||
|  |   call assert_equal(7, byteidxcomp(str, 5)) | ||||||
|  |   call assert_equal(8, byteidxcomp(str, 6)) | ||||||
|  |   call assert_equal(10, byteidxcomp(str, 7)) | ||||||
|  |   call assert_equal(12, byteidxcomp(str, 8)) | ||||||
|  |   call assert_equal(-1, byteidxcomp(str, 9)) | ||||||
|  |  | ||||||
|  |   " empty string | ||||||
|  |   call assert_equal(0, byteidxcomp('', 0)) | ||||||
|  |   call assert_equal(-1, byteidxcomp('', 1)) | ||||||
|  |  | ||||||
|  |   " error cases | ||||||
|   call assert_fails("call byteidxcomp([], 0)", 'E730:') |   call assert_fails("call byteidxcomp([], 0)", 'E730:') | ||||||
|  |   call assert_fails("call byteidxcomp('abc', [])", 'E745:') | ||||||
| endfunc | endfunc | ||||||
|  |  | ||||||
| " Test for charidx() | " Test for byteidx() using a UTF-16 index | ||||||
|  | func Test_byteidx_from_utf16_index() | ||||||
|  |   " string with single byte characters | ||||||
|  |   let str = "abc" | ||||||
|  |   for i in range(3) | ||||||
|  |     call assert_equal(i, byteidx(str, i, v:true)) | ||||||
|  |   endfor | ||||||
|  |   call assert_equal(3, byteidx(str, 3, v:true)) | ||||||
|  |   call assert_equal(-1, byteidx(str, 4, v:true)) | ||||||
|  |  | ||||||
|  |   " string with two byte characters | ||||||
|  |   let str = "a©©b" | ||||||
|  |   call assert_equal(0, byteidx(str, 0, v:true)) | ||||||
|  |   call assert_equal(1, byteidx(str, 1, v:true)) | ||||||
|  |   call assert_equal(3, byteidx(str, 2, v:true)) | ||||||
|  |   call assert_equal(5, byteidx(str, 3, v:true)) | ||||||
|  |   call assert_equal(6, byteidx(str, 4, v:true)) | ||||||
|  |   call assert_equal(-1, byteidx(str, 5, v:true)) | ||||||
|  |  | ||||||
|  |   " string with two byte characters | ||||||
|  |   let str = "a😊😊b" | ||||||
|  |   call assert_equal(0, byteidx(str, 0, v:true)) | ||||||
|  |   call assert_equal(1, byteidx(str, 1, v:true)) | ||||||
|  |   call assert_equal(1, byteidx(str, 2, v:true)) | ||||||
|  |   call assert_equal(5, byteidx(str, 3, v:true)) | ||||||
|  |   call assert_equal(5, byteidx(str, 4, v:true)) | ||||||
|  |   call assert_equal(9, byteidx(str, 5, v:true)) | ||||||
|  |   call assert_equal(10, byteidx(str, 6, v:true)) | ||||||
|  |   call assert_equal(-1, byteidx(str, 7, v:true)) | ||||||
|  |  | ||||||
|  |   " string with composing characters | ||||||
|  |   let str = '-á-b́' | ||||||
|  |   call assert_equal(0, byteidx(str, 0, v:true)) | ||||||
|  |   call assert_equal(1, byteidx(str, 1, v:true)) | ||||||
|  |   call assert_equal(4, byteidx(str, 2, v:true)) | ||||||
|  |   call assert_equal(5, byteidx(str, 3, v:true)) | ||||||
|  |   call assert_equal(8, byteidx(str, 4, v:true)) | ||||||
|  |   call assert_equal(-1, byteidx(str, 5, v:true)) | ||||||
|  |  | ||||||
|  |   " string with multiple composing characters | ||||||
|  |   let str = '-ą́-ą́' | ||||||
|  |   call assert_equal(0, byteidx(str, 0, v:true)) | ||||||
|  |   call assert_equal(1, byteidx(str, 1, v:true)) | ||||||
|  |   call assert_equal(6, byteidx(str, 2, v:true)) | ||||||
|  |   call assert_equal(7, byteidx(str, 3, v:true)) | ||||||
|  |   call assert_equal(12, byteidx(str, 4, v:true)) | ||||||
|  |   call assert_equal(-1, byteidx(str, 5, v:true)) | ||||||
|  |  | ||||||
|  |   " empty string | ||||||
|  |   call assert_equal(0, byteidx('', 0, v:true)) | ||||||
|  |   call assert_equal(-1, byteidx('', 1, v:true)) | ||||||
|  |  | ||||||
|  |   " error cases | ||||||
|  |   call assert_fails('call byteidx(str, 0, [])', 'E745:') | ||||||
|  | endfunc | ||||||
|  |  | ||||||
|  | " Test for byteidxcomp() using a UTF-16 index | ||||||
|  | func Test_byteidxcomp_from_utf16_index() | ||||||
|  |   " string with single byte characters | ||||||
|  |   let str = "abc" | ||||||
|  |   for i in range(3) | ||||||
|  |     call assert_equal(i, byteidxcomp(str, i, v:true)) | ||||||
|  |   endfor | ||||||
|  |   call assert_equal(3, byteidxcomp(str, 3, v:true)) | ||||||
|  |   call assert_equal(-1, byteidxcomp(str, 4, v:true)) | ||||||
|  |  | ||||||
|  |   " string with two byte characters | ||||||
|  |   let str = "a©©b" | ||||||
|  |   call assert_equal(0, byteidxcomp(str, 0, v:true)) | ||||||
|  |   call assert_equal(1, byteidxcomp(str, 1, v:true)) | ||||||
|  |   call assert_equal(3, byteidxcomp(str, 2, v:true)) | ||||||
|  |   call assert_equal(5, byteidxcomp(str, 3, v:true)) | ||||||
|  |   call assert_equal(6, byteidxcomp(str, 4, v:true)) | ||||||
|  |   call assert_equal(-1, byteidxcomp(str, 5, v:true)) | ||||||
|  |  | ||||||
|  |   " string with two byte characters | ||||||
|  |   let str = "a😊😊b" | ||||||
|  |   call assert_equal(0, byteidxcomp(str, 0, v:true)) | ||||||
|  |   call assert_equal(1, byteidxcomp(str, 1, v:true)) | ||||||
|  |   call assert_equal(1, byteidxcomp(str, 2, v:true)) | ||||||
|  |   call assert_equal(5, byteidxcomp(str, 3, v:true)) | ||||||
|  |   call assert_equal(5, byteidxcomp(str, 4, v:true)) | ||||||
|  |   call assert_equal(9, byteidxcomp(str, 5, v:true)) | ||||||
|  |   call assert_equal(10, byteidxcomp(str, 6, v:true)) | ||||||
|  |   call assert_equal(-1, byteidxcomp(str, 7, v:true)) | ||||||
|  |  | ||||||
|  |   " string with composing characters | ||||||
|  |   let str = '-á-b́' | ||||||
|  |   call assert_equal(0, byteidxcomp(str, 0, v:true)) | ||||||
|  |   call assert_equal(1, byteidxcomp(str, 1, v:true)) | ||||||
|  |   call assert_equal(2, byteidxcomp(str, 2, v:true)) | ||||||
|  |   call assert_equal(4, byteidxcomp(str, 3, v:true)) | ||||||
|  |   call assert_equal(5, byteidxcomp(str, 4, v:true)) | ||||||
|  |   call assert_equal(6, byteidxcomp(str, 5, v:true)) | ||||||
|  |   call assert_equal(8, byteidxcomp(str, 6, v:true)) | ||||||
|  |   call assert_equal(-1, byteidxcomp(str, 7, v:true)) | ||||||
|  |   call assert_fails('call byteidxcomp(str, 0, [])', 'E745:') | ||||||
|  |  | ||||||
|  |   " string with multiple composing characters | ||||||
|  |   let str = '-ą́-ą́' | ||||||
|  |   call assert_equal(0, byteidxcomp(str, 0, v:true)) | ||||||
|  |   call assert_equal(1, byteidxcomp(str, 1, v:true)) | ||||||
|  |   call assert_equal(2, byteidxcomp(str, 2, v:true)) | ||||||
|  |   call assert_equal(4, byteidxcomp(str, 3, v:true)) | ||||||
|  |   call assert_equal(6, byteidxcomp(str, 4, v:true)) | ||||||
|  |   call assert_equal(7, byteidxcomp(str, 5, v:true)) | ||||||
|  |   call assert_equal(8, byteidxcomp(str, 6, v:true)) | ||||||
|  |   call assert_equal(10, byteidxcomp(str, 7, v:true)) | ||||||
|  |   call assert_equal(12, byteidxcomp(str, 8, v:true)) | ||||||
|  |   call assert_equal(-1, byteidxcomp(str, 9, v:true)) | ||||||
|  |  | ||||||
|  |   " empty string | ||||||
|  |   call assert_equal(0, byteidxcomp('', 0, v:true)) | ||||||
|  |   call assert_equal(-1, byteidxcomp('', 1, v:true)) | ||||||
|  |  | ||||||
|  |   " error cases | ||||||
|  |   call assert_fails('call byteidxcomp(str, 0, [])', 'E745:') | ||||||
|  | endfunc | ||||||
|  |  | ||||||
|  | " Test for charidx() using a byte index | ||||||
| func Test_charidx() | func Test_charidx() | ||||||
|   let a = 'xáb́y' |   let a = 'xáb́y' | ||||||
|   call assert_equal(0, charidx(a, 0)) |   call assert_equal(0, charidx(a, 0)) | ||||||
| @ -1232,17 +1393,20 @@ func Test_charidx() | |||||||
|   call assert_equal(3, charidx(a, 7)) |   call assert_equal(3, charidx(a, 7)) | ||||||
|   call assert_equal(-1, charidx(a, 8)) |   call assert_equal(-1, charidx(a, 8)) | ||||||
|   call assert_equal(-1, charidx(a, -1)) |   call assert_equal(-1, charidx(a, -1)) | ||||||
|   call assert_equal(-1, charidx('', 0)) |  | ||||||
|   call assert_equal(-1, charidx(test_null_string(), 0)) |  | ||||||
|  |  | ||||||
|   " count composing characters |   " count composing characters | ||||||
|   call assert_equal(0, charidx(a, 0, 1)) |   call assert_equal(0, a->charidx(0, 1)) | ||||||
|   call assert_equal(2, charidx(a, 2, 1)) |   call assert_equal(2, a->charidx(2, 1)) | ||||||
|   call assert_equal(3, charidx(a, 4, 1)) |   call assert_equal(3, a->charidx(4, 1)) | ||||||
|   call assert_equal(5, charidx(a, 7, 1)) |   call assert_equal(5, a->charidx(7, 1)) | ||||||
|   call assert_equal(-1, charidx(a, 8, 1)) |   call assert_equal(-1, a->charidx(8, 1)) | ||||||
|  |  | ||||||
|  |   " empty string | ||||||
|  |   call assert_equal(-1, charidx('', 0)) | ||||||
|   call assert_equal(-1, charidx('', 0, 1)) |   call assert_equal(-1, charidx('', 0, 1)) | ||||||
|  |  | ||||||
|  |   " error cases | ||||||
|  |   call assert_equal(-1, charidx(test_null_string(), 0)) | ||||||
|   call assert_fails('let x = charidx([], 1)', 'E1174:') |   call assert_fails('let x = charidx([], 1)', 'E1174:') | ||||||
|   call assert_fails('let x = charidx("abc", [])', 'E1210:') |   call assert_fails('let x = charidx("abc", [])', 'E1210:') | ||||||
|   call assert_fails('let x = charidx("abc", 1, [])', 'E1212:') |   call assert_fails('let x = charidx("abc", 1, [])', 'E1212:') | ||||||
| @ -1250,6 +1414,237 @@ func Test_charidx() | |||||||
|   call assert_fails('let x = charidx("abc", 1, 2)', 'E1212:') |   call assert_fails('let x = charidx("abc", 1, 2)', 'E1212:') | ||||||
| endfunc | endfunc | ||||||
|  |  | ||||||
|  | " Test for charidx() using a UTF-16 index | ||||||
|  | func Test_charidx_from_utf16_index() | ||||||
|  |   " string with single byte characters | ||||||
|  |   let str = "abc" | ||||||
|  |   for i in range(3) | ||||||
|  |     call assert_equal(i, charidx(str, i, v:false, v:true)) | ||||||
|  |   endfor | ||||||
|  |   call assert_equal(-1, charidx(str, 3, v:false, v:true)) | ||||||
|  |  | ||||||
|  |   " string with two byte characters | ||||||
|  |   let str = "a©©b" | ||||||
|  |   call assert_equal(0, charidx(str, 0, v:false, v:true)) | ||||||
|  |   call assert_equal(1, charidx(str, 1, v:false, v:true)) | ||||||
|  |   call assert_equal(2, charidx(str, 2, v:false, v:true)) | ||||||
|  |   call assert_equal(3, charidx(str, 3, v:false, v:true)) | ||||||
|  |   call assert_equal(-1, charidx(str, 4, v:false, v:true)) | ||||||
|  |  | ||||||
|  |   " string with four byte characters | ||||||
|  |   let str = "a😊😊b" | ||||||
|  |   call assert_equal(0, charidx(str, 0, v:false, v:true)) | ||||||
|  |   call assert_equal(1, charidx(str, 1, v:false, v:true)) | ||||||
|  |   call assert_equal(1, charidx(str, 2, v:false, v:true)) | ||||||
|  |   call assert_equal(2, charidx(str, 3, v:false, v:true)) | ||||||
|  |   call assert_equal(2, charidx(str, 4, v:false, v:true)) | ||||||
|  |   call assert_equal(3, charidx(str, 5, v:false, v:true)) | ||||||
|  |   call assert_equal(-1, charidx(str, 6, v:false, v:true)) | ||||||
|  |  | ||||||
|  |   " string with composing characters | ||||||
|  |   let str = '-á-b́' | ||||||
|  |   for i in str->strcharlen()->range() | ||||||
|  |     call assert_equal(i, charidx(str, i, v:false, v:true)) | ||||||
|  |   endfor | ||||||
|  |   call assert_equal(-1, charidx(str, 4, v:false, v:true)) | ||||||
|  |   for i in str->strchars()->range() | ||||||
|  |     call assert_equal(i, charidx(str, i, v:true, v:true)) | ||||||
|  |   endfor | ||||||
|  |   call assert_equal(-1, charidx(str, 6, v:true, v:true)) | ||||||
|  |  | ||||||
|  |   " string with multiple composing characters | ||||||
|  |   let str = '-ą́-ą́' | ||||||
|  |   for i in str->strcharlen()->range() | ||||||
|  |     call assert_equal(i, charidx(str, i, v:false, v:true)) | ||||||
|  |   endfor | ||||||
|  |   call assert_equal(-1, charidx(str, 4, v:false, v:true)) | ||||||
|  |   for i in str->strchars()->range() | ||||||
|  |     call assert_equal(i, charidx(str, i, v:true, v:true)) | ||||||
|  |   endfor | ||||||
|  |   call assert_equal(-1, charidx(str, 8, v:true, v:true)) | ||||||
|  |  | ||||||
|  |   " empty string | ||||||
|  |   call assert_equal(-1, charidx('', 0, v:false, v:true)) | ||||||
|  |   call assert_equal(-1, charidx('', 0, v:true, v:true)) | ||||||
|  |  | ||||||
|  |   " error cases | ||||||
|  |   call assert_equal(-1, charidx('', 0, v:false, v:true)) | ||||||
|  |   call assert_equal(-1, charidx('', 0, v:true, v:true)) | ||||||
|  |   call assert_equal(-1, charidx(test_null_string(), 0, v:false, v:true)) | ||||||
|  |   call assert_fails('let x = charidx("abc", 1, v:false, [])', 'E1212:') | ||||||
|  |   call assert_fails('let x = charidx("abc", 1, v:true, [])', 'E1212:') | ||||||
|  | endfunc | ||||||
|  |  | ||||||
|  | " Test for utf16idx() using a byte index | ||||||
|  | func Test_utf16idx_from_byteidx() | ||||||
|  |   " UTF-16 index of a string with single byte characters | ||||||
|  |   let str = "abc" | ||||||
|  |   for i in range(3) | ||||||
|  |     call assert_equal(i, utf16idx(str, i)) | ||||||
|  |   endfor | ||||||
|  |   call assert_equal(-1, utf16idx(str, 3)) | ||||||
|  |  | ||||||
|  |   " UTF-16 index of a string with two byte characters | ||||||
|  |   let str = 'a©©b' | ||||||
|  |   call assert_equal(0, str->utf16idx(0)) | ||||||
|  |   call assert_equal(1, str->utf16idx(1)) | ||||||
|  |   call assert_equal(1, str->utf16idx(2)) | ||||||
|  |   call assert_equal(2, str->utf16idx(3)) | ||||||
|  |   call assert_equal(2, str->utf16idx(4)) | ||||||
|  |   call assert_equal(3, str->utf16idx(5)) | ||||||
|  |   call assert_equal(-1, str->utf16idx(6)) | ||||||
|  |  | ||||||
|  |   " UTF-16 index of a string with four byte characters | ||||||
|  |   let str = 'a😊😊b' | ||||||
|  |   call assert_equal(0, utf16idx(str, 0)) | ||||||
|  |   call assert_equal(2, utf16idx(str, 1)) | ||||||
|  |   call assert_equal(2, utf16idx(str, 2)) | ||||||
|  |   call assert_equal(2, utf16idx(str, 3)) | ||||||
|  |   call assert_equal(2, utf16idx(str, 4)) | ||||||
|  |   call assert_equal(4, utf16idx(str, 5)) | ||||||
|  |   call assert_equal(4, utf16idx(str, 6)) | ||||||
|  |   call assert_equal(4, utf16idx(str, 7)) | ||||||
|  |   call assert_equal(4, utf16idx(str, 8)) | ||||||
|  |   call assert_equal(5, utf16idx(str, 9)) | ||||||
|  |   call assert_equal(-1, utf16idx(str, 10)) | ||||||
|  |  | ||||||
|  |   " UTF-16 index of a string with composing characters | ||||||
|  |   let str = '-á-b́' | ||||||
|  |   call assert_equal(0, utf16idx(str, 0)) | ||||||
|  |   call assert_equal(1, utf16idx(str, 1)) | ||||||
|  |   call assert_equal(1, utf16idx(str, 2)) | ||||||
|  |   call assert_equal(1, utf16idx(str, 3)) | ||||||
|  |   call assert_equal(2, utf16idx(str, 4)) | ||||||
|  |   call assert_equal(3, utf16idx(str, 5)) | ||||||
|  |   call assert_equal(3, utf16idx(str, 6)) | ||||||
|  |   call assert_equal(3, utf16idx(str, 7)) | ||||||
|  |   call assert_equal(-1, utf16idx(str, 8)) | ||||||
|  |   call assert_equal(0, utf16idx(str, 0, v:true)) | ||||||
|  |   call assert_equal(1, utf16idx(str, 1, v:true)) | ||||||
|  |   call assert_equal(2, utf16idx(str, 2, v:true)) | ||||||
|  |   call assert_equal(2, utf16idx(str, 3, v:true)) | ||||||
|  |   call assert_equal(3, utf16idx(str, 4, v:true)) | ||||||
|  |   call assert_equal(4, utf16idx(str, 5, v:true)) | ||||||
|  |   call assert_equal(5, utf16idx(str, 6, v:true)) | ||||||
|  |   call assert_equal(5, utf16idx(str, 7, v:true)) | ||||||
|  |   call assert_equal(-1, utf16idx(str, 8, v:true)) | ||||||
|  |  | ||||||
|  |   " string with multiple composing characters | ||||||
|  |   let str = '-ą́-ą́' | ||||||
|  |   call assert_equal(0, utf16idx(str, 0)) | ||||||
|  |   call assert_equal(1, utf16idx(str, 1)) | ||||||
|  |   call assert_equal(1, utf16idx(str, 2)) | ||||||
|  |   call assert_equal(1, utf16idx(str, 3)) | ||||||
|  |   call assert_equal(1, utf16idx(str, 4)) | ||||||
|  |   call assert_equal(1, utf16idx(str, 5)) | ||||||
|  |   call assert_equal(2, utf16idx(str, 6)) | ||||||
|  |   call assert_equal(3, utf16idx(str, 7)) | ||||||
|  |   call assert_equal(3, utf16idx(str, 8)) | ||||||
|  |   call assert_equal(3, utf16idx(str, 9)) | ||||||
|  |   call assert_equal(3, utf16idx(str, 10)) | ||||||
|  |   call assert_equal(3, utf16idx(str, 11)) | ||||||
|  |   call assert_equal(-1, utf16idx(str, 12)) | ||||||
|  |   call assert_equal(0, utf16idx(str, 0, v:true)) | ||||||
|  |   call assert_equal(1, utf16idx(str, 1, v:true)) | ||||||
|  |   call assert_equal(2, utf16idx(str, 2, v:true)) | ||||||
|  |   call assert_equal(2, utf16idx(str, 3, v:true)) | ||||||
|  |   call assert_equal(3, utf16idx(str, 4, v:true)) | ||||||
|  |   call assert_equal(3, utf16idx(str, 5, v:true)) | ||||||
|  |   call assert_equal(4, utf16idx(str, 6, v:true)) | ||||||
|  |   call assert_equal(5, utf16idx(str, 7, v:true)) | ||||||
|  |   call assert_equal(6, utf16idx(str, 8, v:true)) | ||||||
|  |   call assert_equal(6, utf16idx(str, 9, v:true)) | ||||||
|  |   call assert_equal(7, utf16idx(str, 10, v:true)) | ||||||
|  |   call assert_equal(7, utf16idx(str, 11, v:true)) | ||||||
|  |   call assert_equal(-1, utf16idx(str, 12, v:true)) | ||||||
|  |  | ||||||
|  |   " empty string | ||||||
|  |   call assert_equal(-1, utf16idx('', 0)) | ||||||
|  |   call assert_equal(-1, utf16idx('', 0, v:true)) | ||||||
|  |  | ||||||
|  |   " error cases | ||||||
|  |   call assert_equal(-1, utf16idx("", 0)) | ||||||
|  |   call assert_equal(-1, utf16idx("abc", -1)) | ||||||
|  |   call assert_equal(-1, utf16idx(test_null_string(), 0)) | ||||||
|  |   call assert_fails('let l = utf16idx([], 0)', 'E1174:') | ||||||
|  |   call assert_fails('let l = utf16idx("ab", [])', 'E1210:') | ||||||
|  |   call assert_fails('let l = utf16idx("ab", 0, [])', 'E1212:') | ||||||
|  | endfunc | ||||||
|  |  | ||||||
|  | " Test for utf16idx() using a character index | ||||||
|  | func Test_utf16idx_from_charidx() | ||||||
|  |   let str = "abc" | ||||||
|  |   for i in str->strcharlen()->range() | ||||||
|  |     call assert_equal(i, utf16idx(str, i, v:false, v:true)) | ||||||
|  |   endfor | ||||||
|  |   call assert_equal(-1, utf16idx(str, 3, v:false, v:true)) | ||||||
|  |  | ||||||
|  |   " UTF-16 index of a string with two byte characters | ||||||
|  |   let str = "a©©b" | ||||||
|  |   for i in str->strcharlen()->range() | ||||||
|  |     call assert_equal(i, utf16idx(str, i, v:false, v:true)) | ||||||
|  |   endfor | ||||||
|  |   call assert_equal(-1, utf16idx(str, 4, v:false, v:true)) | ||||||
|  |  | ||||||
|  |   " UTF-16 index of a string with four byte characters | ||||||
|  |   let str = "a😊😊b" | ||||||
|  |   call assert_equal(0, utf16idx(str, 0, v:false, v:true)) | ||||||
|  |   call assert_equal(2, utf16idx(str, 1, v:false, v:true)) | ||||||
|  |   call assert_equal(4, utf16idx(str, 2, v:false, v:true)) | ||||||
|  |   call assert_equal(5, utf16idx(str, 3, v:false, v:true)) | ||||||
|  |   call assert_equal(-1, utf16idx(str, 4, v:false, v:true)) | ||||||
|  |  | ||||||
|  |   " UTF-16 index of a string with composing characters | ||||||
|  |   let str = '-á-b́' | ||||||
|  |   for i in str->strcharlen()->range() | ||||||
|  |     call assert_equal(i, utf16idx(str, i, v:false, v:true)) | ||||||
|  |   endfor | ||||||
|  |   call assert_equal(-1, utf16idx(str, 4, v:false, v:true)) | ||||||
|  |   for i in str->strchars()->range() | ||||||
|  |     call assert_equal(i, utf16idx(str, i, v:true, v:true)) | ||||||
|  |   endfor | ||||||
|  |   call assert_equal(-1, utf16idx(str, 6, v:true, v:true)) | ||||||
|  |  | ||||||
|  |   " string with multiple composing characters | ||||||
|  |   let str = '-ą́-ą́' | ||||||
|  |   for i in str->strcharlen()->range() | ||||||
|  |     call assert_equal(i, utf16idx(str, i, v:false, v:true)) | ||||||
|  |   endfor | ||||||
|  |   call assert_equal(-1, utf16idx(str, 4, v:false, v:true)) | ||||||
|  |   for i in str->strchars()->range() | ||||||
|  |     call assert_equal(i, utf16idx(str, i, v:true, v:true)) | ||||||
|  |   endfor | ||||||
|  |   call assert_equal(-1, utf16idx(str, 8, v:true, v:true)) | ||||||
|  |  | ||||||
|  |   " empty string | ||||||
|  |   call assert_equal(-1, utf16idx('', 0, v:false, v:true)) | ||||||
|  |   call assert_equal(-1, utf16idx('', 0, v:true, v:true)) | ||||||
|  |  | ||||||
|  |   " error cases | ||||||
|  |   call assert_equal(-1, utf16idx(test_null_string(), 0, v:true, v:true)) | ||||||
|  |   call assert_fails('let l = utf16idx("ab", 0, v:false, [])', 'E1212:') | ||||||
|  | endfunc | ||||||
|  |  | ||||||
|  | " Test for strutf16len() | ||||||
|  | func Test_strutf16len() | ||||||
|  |   call assert_equal(3, strutf16len('abc')) | ||||||
|  |   call assert_equal(3, 'abc'->strutf16len(v:true)) | ||||||
|  |   call assert_equal(4, strutf16len('a©©b')) | ||||||
|  |   call assert_equal(4, strutf16len('a©©b', v:true)) | ||||||
|  |   call assert_equal(6, strutf16len('a😊😊b')) | ||||||
|  |   call assert_equal(6, strutf16len('a😊😊b', v:true)) | ||||||
|  |   call assert_equal(4, strutf16len('-á-b́')) | ||||||
|  |   call assert_equal(6, strutf16len('-á-b́', v:true)) | ||||||
|  |   call assert_equal(4, strutf16len('-ą́-ą́')) | ||||||
|  |   call assert_equal(8, strutf16len('-ą́-ą́', v:true)) | ||||||
|  |   call assert_equal(0, strutf16len('')) | ||||||
|  |  | ||||||
|  |   " error cases | ||||||
|  |   call assert_fails('let l = strutf16len([])', 'E1174:') | ||||||
|  |   call assert_fails('let l = strutf16len("a", [])', 'E1212:') | ||||||
|  |   call assert_equal(0, strutf16len(test_null_string())) | ||||||
|  | endfunc | ||||||
|  |  | ||||||
| func Test_count() | func Test_count() | ||||||
|   let l = ['a', 'a', 'A', 'b'] |   let l = ['a', 'a', 'A', 'b'] | ||||||
|   call assert_equal(2, count(l, 'a')) |   call assert_equal(2, count(l, 'a')) | ||||||
| @ -3074,5 +3469,4 @@ func Test_delfunc_while_listing() | |||||||
|   call StopVimInTerminal(buf) |   call StopVimInTerminal(buf) | ||||||
| endfunc | endfunc | ||||||
|  |  | ||||||
|  |  | ||||||
| " vim: shiftwidth=2 sts=2 expandtab | " vim: shiftwidth=2 sts=2 expandtab | ||||||
|  | |||||||
| @ -695,6 +695,8 @@ static char *(features[]) = | |||||||
|  |  | ||||||
| static int included_patches[] = | static int included_patches[] = | ||||||
| {   /* Add new patch number below this line */ | {   /* Add new patch number below this line */ | ||||||
|  | /**/ | ||||||
|  |     1485, | ||||||
| /**/ | /**/ | ||||||
|     1484, |     1484, | ||||||
| /**/ | /**/ | ||||||
|  | |||||||
		Reference in New Issue
	
	Block a user