patch 9.1.1669: Vim script: no support for URI de-/encoding
Problem:  Vim script: no support for URI de-/encoding
          (ubaldot)
Solution: Add the uri_encode() and uri_decode() functions
          (Yegappan Lakshmanan)
fixes: #17861
closes: #18034
Signed-off-by: Yegappan Lakshmanan <yegappan@yahoo.com>
Signed-off-by: Christian Brabandt <cb@256bit.org>
			
			
This commit is contained in:
		
				
					committed by
					
						 Christian Brabandt
						Christian Brabandt
					
				
			
			
				
	
			
			
			
						parent
						
							da34f84847
						
					
				
				
					commit
					454c7ea484
				
			| @ -747,6 +747,8 @@ undofile({name})		String	undo file name for {name} | |||||||
| undotree([{buf}])		List	undo file tree for buffer {buf} | undotree([{buf}])		List	undo file tree for buffer {buf} | ||||||
| uniq({list} [, {func} [, {dict}]]) | uniq({list} [, {func} [, {dict}]]) | ||||||
| 				List	remove adjacent duplicates from a list | 				List	remove adjacent duplicates from a list | ||||||
|  | uri_decode({string})		String	URI-decode a string | ||||||
|  | uri_encode({string})		String	URI-encode a string | ||||||
| utf16idx({string}, {idx} [, {countcc} [, {charidx}]]) | utf16idx({string}, {idx} [, {countcc} [, {charidx}]]) | ||||||
| 				Number	UTF-16 index of byte {idx} in {string} | 				Number	UTF-16 index of byte {idx} in {string} | ||||||
| values({dict})			List	values in {dict} | values({dict})			List	values in {dict} | ||||||
| @ -12187,6 +12189,59 @@ uniq({list} [, {func} [, {dict}]])			*uniq()* *E882* | |||||||
| 		Return type: list<{type}> | 		Return type: list<{type}> | ||||||
|  |  | ||||||
|  |  | ||||||
|  | uri_decode({string})					*uri_decode()* | ||||||
|  | 		Returns the URI-decoded form of {string}, reversing | ||||||
|  | 		percent-encoding (converting sequences like "%3D" back to | ||||||
|  | 		the corresponding character). | ||||||
|  |  | ||||||
|  | 		The decoding follows standard percent-decoding rules: | ||||||
|  | 		    - "%HH" is replaced with the character for the hex value | ||||||
|  | 		      HH. | ||||||
|  | 		    - If the decoded bytes form valid UTF-8, they are combined | ||||||
|  | 		      into the corresponding character(s).  Otherwise, the | ||||||
|  | 		      bytes are kept as-is. | ||||||
|  | 		    - Invalid or incomplete encodings (e.g. "%GZ", "%3", or a | ||||||
|  | 		      trailing "%") are left unchanged. | ||||||
|  |  | ||||||
|  | 		Returns an empty String if {string} is empty. | ||||||
|  |  | ||||||
|  | 		Example: > | ||||||
|  | 			:echo uri_decode('c%3A%5Cmy%5Cdir%5Cfoo%20bar') | ||||||
|  | 			c:\my\dir\foo bar | ||||||
|  | 			:echo uri_decode('%CE%B1%CE%B2%CE%B3') | ||||||
|  | 			αβγ | ||||||
|  | < | ||||||
|  | 		Can also be used as a |method|: > | ||||||
|  | 			mystr->uri_decode() | ||||||
|  | < | ||||||
|  | 		Return type: |String| | ||||||
|  |  | ||||||
|  | uri_encode({string})					*uri_encode()* | ||||||
|  | 		Returns the URI-encoded form of {string}.  URI encoding | ||||||
|  | 		replaces unsafe or reserved characters with percent-encoded | ||||||
|  | 		sequences. | ||||||
|  |  | ||||||
|  | 		The encoding follows standard percent-encoding rules: | ||||||
|  |                     - Alphanumeric characters [0-9A-Za-z] remain unchanged. | ||||||
|  |                     - The characters "-", "_", ".", and "~" also remain | ||||||
|  |                       unchanged. | ||||||
|  |                     - All other characters are replaced with "%HH", where HH | ||||||
|  |                       is the two-digit uppercase hexadecimal value. | ||||||
|  |                     - Existing percent-encoded sequences are not modified. | ||||||
|  |  | ||||||
|  | 		Returns an empty String if {string} is empty. | ||||||
|  |  | ||||||
|  | 		Example: > | ||||||
|  | 			:echo uri_encode('c:\my\dir\foo bar') | ||||||
|  | 			c%3A%5Cmy%5Cdir%5Cfoo%20bar | ||||||
|  | 			:echo uri_encode('key=value&name=αβγ') | ||||||
|  | 			key%3Dvalue%26name%3D%CE%B1%CE%B2%CE%B3 | ||||||
|  | < | ||||||
|  | 		Can also be used as a |method|: > | ||||||
|  | 			mystr->uri_encode() | ||||||
|  | < | ||||||
|  | 		Return type: |String| | ||||||
|  |  | ||||||
| 							*utf16idx()* | 							*utf16idx()* | ||||||
| utf16idx({string}, {idx} [, {countcc} [, {charidx}]]) | utf16idx({string}, {idx} [, {countcc} [, {charidx}]]) | ||||||
| 		Same as |charidx()| but returns the UTF-16 code unit index of | 		Same as |charidx()| but returns the UTF-16 code unit index of | ||||||
|  | |||||||
| @ -11101,6 +11101,8 @@ unix	os_unix.txt	/*unix* | |||||||
| unlisted-buffer	windows.txt	/*unlisted-buffer* | unlisted-buffer	windows.txt	/*unlisted-buffer* | ||||||
| up-down-motions	motion.txt	/*up-down-motions* | up-down-motions	motion.txt	/*up-down-motions* | ||||||
| uppercase	change.txt	/*uppercase* | uppercase	change.txt	/*uppercase* | ||||||
|  | uri_decode()	builtin.txt	/*uri_decode()* | ||||||
|  | uri_encode()	builtin.txt	/*uri_encode()* | ||||||
| urxvt-mouse	options.txt	/*urxvt-mouse* | urxvt-mouse	options.txt	/*urxvt-mouse* | ||||||
| use-visual-cmds	version4.txt	/*use-visual-cmds* | use-visual-cmds	version4.txt	/*use-visual-cmds* | ||||||
| useful-mappings	tips.txt	/*useful-mappings* | useful-mappings	tips.txt	/*useful-mappings* | ||||||
|  | |||||||
| @ -807,6 +807,8 @@ String manipulation:					*string-functions* | |||||||
| 	str2blob()		convert a list of strings into a blob | 	str2blob()		convert a list of strings into a blob | ||||||
| 	blob2str()		convert a blob into a list of strings | 	blob2str()		convert a blob into a list of strings | ||||||
| 	items()			get List of String index-character pairs | 	items()			get List of String index-character pairs | ||||||
|  | 	uri_encode()		URI-encode a string | ||||||
|  | 	uri_decode()		URI-decode a string | ||||||
|  |  | ||||||
| List manipulation:					*list-functions* | List manipulation:					*list-functions* | ||||||
| 	get()			get an item without error for wrong index | 	get()			get an item without error for wrong index | ||||||
|  | |||||||
| @ -41788,6 +41788,8 @@ Functions: ~ | |||||||
| |str2blob()|		convert a List of strings into a blob | |str2blob()|		convert a List of strings into a blob | ||||||
| |test_null_tuple()|	return a null tuple | |test_null_tuple()|	return a null tuple | ||||||
| |tuple2list()|		turn a Tuple of items into a List | |tuple2list()|		turn a Tuple of items into a List | ||||||
|  | |uri_decode()|		URI-decode a string | ||||||
|  | |uri_encode()|		URI-encode a string | ||||||
| |wildtrigger()|		trigger wildcard expansion | |wildtrigger()|		trigger wildcard expansion | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -3116,6 +3116,10 @@ static funcentry_T global_functions[] = | |||||||
| 			ret_dict_any,	    f_undotree}, | 			ret_dict_any,	    f_undotree}, | ||||||
|     {"uniq",		1, 3, FEARG_1,	    arg13_sortuniq, |     {"uniq",		1, 3, FEARG_1,	    arg13_sortuniq, | ||||||
| 			ret_first_arg,	    f_uniq}, | 			ret_first_arg,	    f_uniq}, | ||||||
|  |     {"uri_decode",	1, 1, FEARG_1,	    arg1_string, | ||||||
|  | 			ret_string,	    f_uridecode}, | ||||||
|  |     {"uri_encode",	1, 1, FEARG_1,	    arg1_string, | ||||||
|  | 			ret_string,	    f_uriencode}, | ||||||
|     {"utf16idx",	2, 4, FEARG_1,	    arg4_string_number_bool_bool, |     {"utf16idx",	2, 4, FEARG_1,	    arg4_string_number_bool_bool, | ||||||
| 			ret_number,	    f_utf16idx}, | 			ret_number,	    f_utf16idx}, | ||||||
|     {"values",		1, 1, FEARG_1,	    arg1_dict_any, |     {"values",		1, 1, FEARG_1,	    arg1_dict_any, | ||||||
|  | |||||||
| @ -52,4 +52,6 @@ void f_tolower(typval_T *argvars, typval_T *rettv); | |||||||
| void f_toupper(typval_T *argvars, typval_T *rettv); | void f_toupper(typval_T *argvars, typval_T *rettv); | ||||||
| void f_tr(typval_T *argvars, typval_T *rettv); | void f_tr(typval_T *argvars, typval_T *rettv); | ||||||
| void f_trim(typval_T *argvars, typval_T *rettv); | void f_trim(typval_T *argvars, typval_T *rettv); | ||||||
|  | void f_uridecode(typval_T *argvars, typval_T *rettv); | ||||||
|  | void f_uriencode(typval_T *argvars, typval_T *rettv); | ||||||
| /* vim: set ft=c : */ | /* vim: set ft=c : */ | ||||||
|  | |||||||
							
								
								
									
										145
									
								
								src/strings.c
									
									
									
									
									
								
							
							
						
						
									
										145
									
								
								src/strings.c
									
									
									
									
									
								
							| @ -2310,6 +2310,151 @@ f_trim(typval_T *argvars, typval_T *rettv) | |||||||
|     rettv->vval.v_string = vim_strnsave(head, tail - head); |     rettv->vval.v_string = vim_strnsave(head, tail - head); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Decodes a URI-encoded string. | ||||||
|  |  * | ||||||
|  |  * Parameters: | ||||||
|  |  *   str - The URI-encoded input string (may contain %XX sequences and '+'). | ||||||
|  |  * | ||||||
|  |  * Returns: | ||||||
|  |  *   A newly allocated string with URI encoding decoded: | ||||||
|  |  *     - %XX sequences are converted to the corresponding character. | ||||||
|  |  *     - If the input is malformed (e.g., incomplete % sequence), the original | ||||||
|  |  *       characters are copied. | ||||||
|  |  *   The output string will never be longer than the input string. | ||||||
|  |  *   The caller is responsible for freeing the returned string. | ||||||
|  |  * | ||||||
|  |  * Returns NULL if input is NULL or memory allocation fails. | ||||||
|  |  */ | ||||||
|  |     static char_u * | ||||||
|  | uri_decode(char_u *str) | ||||||
|  | { | ||||||
|  |     if (str == NULL) | ||||||
|  | 	return NULL; | ||||||
|  |  | ||||||
|  |     size_t len = STRLEN(str); | ||||||
|  |  | ||||||
|  |     char_u *decoded = alloc(len + 1); | ||||||
|  |     if (!decoded) | ||||||
|  | 	return NULL; | ||||||
|  |  | ||||||
|  |     char_u	*p = decoded; | ||||||
|  |     size_t	i = 0; | ||||||
|  |  | ||||||
|  |     while (i < len) | ||||||
|  |     { | ||||||
|  | 	if (str[i] == '%') | ||||||
|  | 	{ | ||||||
|  | 	    if (i + 2 >= len) | ||||||
|  | 	    { | ||||||
|  | 		// Malformed encoding | ||||||
|  | 		*p++ = str[i++]; | ||||||
|  | 		if (str[i] != NUL) | ||||||
|  | 		    *p++ = str[i++]; | ||||||
|  | 	    } | ||||||
|  | 	    else | ||||||
|  | 	    { | ||||||
|  | 		int val = hexhex2nr(&str[i + 1]); | ||||||
|  | 		if (val != -1) | ||||||
|  | 		{ | ||||||
|  | 		    *p++ = (char_u)val; | ||||||
|  | 		    i += 3; | ||||||
|  | 		} | ||||||
|  | 		else | ||||||
|  | 		{ | ||||||
|  | 		    // invalid hex digits following "%" | ||||||
|  | 		    for (int j = 0; j < 3; j++) | ||||||
|  | 			*p++ = str[i++]; | ||||||
|  | 		} | ||||||
|  | 	    } | ||||||
|  |  | ||||||
|  | 	} | ||||||
|  | 	else | ||||||
|  | 	    *p++ = str[i++]; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     *p = NUL; | ||||||
|  |  | ||||||
|  |     return decoded; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * "uri_decode({str})" function | ||||||
|  |  */ | ||||||
|  |     void | ||||||
|  | f_uridecode(typval_T *argvars, typval_T *rettv) | ||||||
|  | { | ||||||
|  |     rettv->v_type = VAR_STRING; | ||||||
|  |     rettv->vval.v_string = NULL; | ||||||
|  |  | ||||||
|  |     if (check_for_string_arg(argvars, 0) == FAIL) | ||||||
|  | 	return; | ||||||
|  |  | ||||||
|  |     rettv->vval.v_string = uri_decode(tv_get_string(&argvars[0])); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * Encodes a string for safe use in a URI. | ||||||
|  |  * | ||||||
|  |  * Parameters: | ||||||
|  |  *   str - The input string to encode. | ||||||
|  |  * | ||||||
|  |  * Returns: | ||||||
|  |  *   A newly allocated string where: | ||||||
|  |  *     - Alphanumeric characters and '-', '_', '.', '~' are left unchanged. | ||||||
|  |  *     - All other bytes are encoded as %XX (uppercase hex). | ||||||
|  |  *   The caller is responsible for freeing the returned string. | ||||||
|  |  * | ||||||
|  |  *   Returns NULL if input is NULL or memory allocation fails. | ||||||
|  |  */ | ||||||
|  |     static char_u * | ||||||
|  | uri_encode(char_u *str) | ||||||
|  | { | ||||||
|  |     if (str == NULL) | ||||||
|  | 	return NULL; | ||||||
|  |  | ||||||
|  |     size_t len = STRLEN(str); | ||||||
|  |  | ||||||
|  |     // Worst case: every character needs encoding => 3x size + 1 for null | ||||||
|  |     // terminator | ||||||
|  |     char_u *encoded = alloc(len * 3 + 1); | ||||||
|  |     if (encoded == NULL) | ||||||
|  | 	return NULL; | ||||||
|  |  | ||||||
|  |     char_u *p = encoded; | ||||||
|  |  | ||||||
|  |     for (size_t i = 0; i < len; ++i) | ||||||
|  |     { | ||||||
|  | 	char_u c = str[i]; | ||||||
|  | 	if (ASCII_ISALNUM(c) || c == '-' || c == '_' || c == '.' || c == '~') | ||||||
|  | 	    *p++ = c; | ||||||
|  | 	else | ||||||
|  | 	{ | ||||||
|  | 	    sprintf((char *)p, "%%%02X", c); | ||||||
|  | 	    p += 3; | ||||||
|  | 	} | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     *p = NUL; | ||||||
|  |  | ||||||
|  |     return encoded; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /* | ||||||
|  |  * "uri_encode({str})" function | ||||||
|  |  */ | ||||||
|  |     void | ||||||
|  | f_uriencode(typval_T *argvars, typval_T *rettv) | ||||||
|  | { | ||||||
|  |     rettv->v_type = VAR_STRING; | ||||||
|  |     rettv->vval.v_string = NULL; | ||||||
|  |  | ||||||
|  |     if (check_for_string_arg(argvars, 0) == FAIL) | ||||||
|  | 	return; | ||||||
|  |  | ||||||
|  |     rettv->vval.v_string = uri_encode(tv_get_string(&argvars[0])); | ||||||
|  | } | ||||||
|  |  | ||||||
| static char *e_printf = N_(e_insufficient_arguments_for_printf); | static char *e_printf = N_(e_insufficient_arguments_for_printf); | ||||||
|  |  | ||||||
| /* | /* | ||||||
|  | |||||||
| @ -4503,4 +4503,61 @@ func Test_blob2str() | |||||||
|   call v9.CheckLegacyAndVim9Success(lines) |   call v9.CheckLegacyAndVim9Success(lines) | ||||||
| endfunc | endfunc | ||||||
|  |  | ||||||
|  | " Test for uri_encode() and uri_decode() functions | ||||||
|  | func Test_uriencoding() | ||||||
|  |   let lines =<< trim END | ||||||
|  |     #" uri encoding | ||||||
|  |     call assert_equal('a1%20b2', uri_encode('a1 b2')) | ||||||
|  |     call assert_equal('-%3F%26%2F%23%2B%3D%3A%5B%5D%40-', uri_encode('-?&/#+=:[]@-')) | ||||||
|  |     call assert_equal('%22%3C%3E%5E%60%7B%7C%7D', uri_encode('"<>^`{|}')) | ||||||
|  |     call assert_equal('%CE%B1%CE%B2%CE%B3%CE%B4%CE%B5', 'αβγδε'->uri_encode()) | ||||||
|  |     call assert_equal('r%C3%A9sum%C3%A9', uri_encode('résumé')) | ||||||
|  |     call assert_equal('%E4%BD%A0%E5%A5%BD', uri_encode('你好')) | ||||||
|  |     call assert_equal('%F0%9F%98%8A%F0%9F%98%8A', uri_encode('😊😊')) | ||||||
|  |     call assert_equal('-_.~', uri_encode('-_.~')) | ||||||
|  |     call assert_equal('', uri_encode('')) | ||||||
|  |     call assert_equal('%2520%2523', uri_encode('%20%23')) | ||||||
|  |     call assert_equal('', uri_encode(test_null_string())) | ||||||
|  |     call assert_equal('a', uri_encode('a')) | ||||||
|  |     call assert_equal('%20', uri_encode(' ')) | ||||||
|  |     call assert_equal('%CE%B1', uri_encode('α')) | ||||||
|  |     call assert_equal('c%3A%5Cmy%5Cdir%5Ca%20b%20c', uri_encode('c:\my\dir\a b c')) | ||||||
|  |     call assert_fails('call uri_encode([])', 'E1174: String required for argument 1') | ||||||
|  |  | ||||||
|  |     #" uri decoding | ||||||
|  |     call assert_equal('a1 b2', uri_decode('a1%20b2')) | ||||||
|  |     call assert_equal('-?&/#+=:[]@-', uri_decode('-%3F%26%2F%23%2B%3D%3A%5B%5D%40-')) | ||||||
|  |     call assert_equal('"<>^`{|}', uri_decode('%22%3C%3E%5E%60%7B%7C%7D')) | ||||||
|  |     call assert_equal('αβγδε', '%CE%B1%CE%B2%CE%B3%CE%B4%CE%B5'->uri_decode()) | ||||||
|  |     call assert_equal('résumé', uri_decode('r%C3%A9sum%C3%A9')) | ||||||
|  |     call assert_equal('你好', uri_decode('%E4%BD%A0%E5%A5%BD')) | ||||||
|  |     call assert_equal('😊😊', uri_decode('%F0%9F%98%8A%F0%9F%98%8A')) | ||||||
|  |     call assert_equal('a+b', uri_decode('a+b')) | ||||||
|  |     call assert_equal('-_.~', uri_decode('-_.~')) | ||||||
|  |     call assert_equal('', uri_decode('')) | ||||||
|  |     call assert_equal('%20%23', uri_decode('%2520%2523')) | ||||||
|  |     call assert_equal('', uri_decode(test_null_string())) | ||||||
|  |     call assert_equal('a', uri_decode('a')) | ||||||
|  |     call assert_equal(' ', uri_decode('%20')) | ||||||
|  |     call assert_equal('α', uri_decode('%CE%B1')) | ||||||
|  |     call assert_equal('c:\my\dir\a b c', uri_decode('c%3A%5Cmy%5Cdir%5Ca%20b%20c')) | ||||||
|  |     call assert_equal('%', uri_decode('%')) | ||||||
|  |     call assert_equal('%3', uri_decode('%3')) | ||||||
|  |     call assert_equal(';', uri_decode('%3b')) | ||||||
|  |     call assert_equal('a%xyb', uri_decode('a%xyb')) | ||||||
|  |     call assert_fails('call uri_decode([])', 'E1174: String required for argument 1') | ||||||
|  |  | ||||||
|  |     #" control characters | ||||||
|  |     VAR cstr = "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10" | ||||||
|  |     LET cstr ..= "\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F" | ||||||
|  |     VAR expected = '' | ||||||
|  |     for i in range(1, 31) | ||||||
|  |       LET expected ..= printf("%%%02X", i) | ||||||
|  |     endfor | ||||||
|  |     call assert_equal(expected, uri_encode(cstr)) | ||||||
|  |     call assert_equal(cstr, uri_decode(expected)) | ||||||
|  |   END | ||||||
|  |   call v9.CheckLegacyAndVim9Success(lines) | ||||||
|  | endfunc | ||||||
|  |  | ||||||
| " vim: shiftwidth=2 sts=2 expandtab | " vim: shiftwidth=2 sts=2 expandtab | ||||||
|  | |||||||
| @ -724,6 +724,8 @@ static char *(features[]) = | |||||||
|  |  | ||||||
| static int included_patches[] = | static int included_patches[] = | ||||||
| {   /* Add new patch number below this line */ | {   /* Add new patch number below this line */ | ||||||
|  | /**/ | ||||||
|  |     1669, | ||||||
| /**/ | /**/ | ||||||
|     1668, |     1668, | ||||||
| /**/ | /**/ | ||||||
|  | |||||||
		Reference in New Issue
	
	Block a user