updated for version 7.4.293
Problem:    It is not possible to ignore composing characters at a specific
            point in a pattern.
Solution:   Add the %C item.
			
			
This commit is contained in:
		| @ -545,6 +545,7 @@ Character classes {not in Vi}:				*/character-classes* | ||||
| |/\%u|	\%u	\%u	match specified multibyte character (eg \%u20ac) | ||||
| |/\%U|	\%U	\%U	match specified large multibyte character (eg | ||||
| 			\%U12345678) | ||||
| |/\%C|	\%C	\%C	match any composing characters | ||||
|  | ||||
| Example			matches ~ | ||||
| \<\I\i*		or | ||||
| @ -1207,12 +1208,18 @@ will probably never match. | ||||
| 8. Composing characters					*patterns-composing* | ||||
|  | ||||
| 							*/\Z* | ||||
| When "\Z" appears anywhere in the pattern, composing characters are ignored. | ||||
| Thus only the base characters need to match, the composing characters may be | ||||
| different and the number of composing characters may differ.  Only relevant | ||||
| when 'encoding' is "utf-8". | ||||
| When "\Z" appears anywhere in the pattern, all composing characters are | ||||
| ignored.  Thus only the base characters need to match, the composing | ||||
| characters may be different and the number of composing characters may differ. | ||||
| Only relevant when 'encoding' is "utf-8". | ||||
| Exception: If the pattern starts with one or more composing characters, these | ||||
| must match. | ||||
| 							*/\%C* | ||||
| Use "\%C" to skip any composing characters.  For example, the pattern "a" does | ||||
| not match in "càt" (where the a has the composing character 0x0300), but | ||||
| "a\%C" does.  Note that this does not match "cát" (where the á is character | ||||
| 0xe1, it does not have a compositing character).  It does match "cat" (where | ||||
| the a is just an a). | ||||
|  | ||||
| When a composing character appears at the start of the pattern of after an | ||||
| item that doesn't include the composing character, a match is found at any | ||||
|  | ||||
							
								
								
									
										21
									
								
								src/regexp.c
									
									
									
									
									
								
							
							
						
						
									
										21
									
								
								src/regexp.c
									
									
									
									
									
								
							| @ -244,6 +244,7 @@ | ||||
|  | ||||
| #define RE_MARK		207	/* mark cmp  Match mark position */ | ||||
| #define RE_VISUAL	208	/*	Match Visual area */ | ||||
| #define RE_COMPOSING	209	/* any composing characters */ | ||||
|  | ||||
| /* | ||||
|  * Magic characters have a special meaning, they don't match literally. | ||||
| @ -2208,6 +2209,10 @@ regatom(flagp) | ||||
| 		    ret = regnode(RE_VISUAL); | ||||
| 		    break; | ||||
|  | ||||
| 		case 'C': | ||||
| 		    ret = regnode(RE_COMPOSING); | ||||
| 		    break; | ||||
|  | ||||
| 		/* \%[abc]: Emit as a list of branches, all ending at the last | ||||
| 		 * branch which matches nothing. */ | ||||
| 		case '[': | ||||
| @ -4710,11 +4715,13 @@ regmatch(scan) | ||||
| 			    status = RA_NOMATCH; | ||||
| 		    } | ||||
| #ifdef FEAT_MBYTE | ||||
| 		    /* Check for following composing character. */ | ||||
| 		    /* Check for following composing character, unless %C | ||||
| 		     * follows (skips over all composing chars). */ | ||||
| 		    if (status != RA_NOMATCH | ||||
| 			    && enc_utf8 | ||||
| 			    && UTF_COMPOSINGLIKE(reginput, reginput + len) | ||||
| 			    && !ireg_icombine) | ||||
| 			    && !ireg_icombine | ||||
| 			    && OP(next) != RE_COMPOSING) | ||||
| 		    { | ||||
| 			/* raaron: This code makes a composing character get | ||||
| 			 * ignored, which is the correct behavior (sometimes) | ||||
| @ -4791,6 +4798,16 @@ regmatch(scan) | ||||
| 		status = RA_NOMATCH; | ||||
| 	    break; | ||||
| #endif | ||||
| 	  case RE_COMPOSING: | ||||
| #ifdef FEAT_MBYTE | ||||
| 	    if (enc_utf8) | ||||
| 	    { | ||||
| 		/* Skip composing characters. */ | ||||
| 		while (utf_iscomposing(utf_ptr2char(reginput))) | ||||
| 		    mb_cptr_adv(reginput); | ||||
| 	    } | ||||
| #endif | ||||
| 	    break; | ||||
|  | ||||
| 	  case NOTHING: | ||||
| 	    break; | ||||
|  | ||||
| @ -81,6 +81,7 @@ enum | ||||
|     NFA_COMPOSING,		    /* Next nodes in NFA are part of the | ||||
| 				       composing multibyte char */ | ||||
|     NFA_END_COMPOSING,		    /* End of a composing char in the NFA */ | ||||
|     NFA_ANY_COMPOSING,		    /* \%C: Any composing characters. */ | ||||
|     NFA_OPT_CHARS,		    /* \%[abc] */ | ||||
|  | ||||
|     /* The following are used only in the postfix form, not in the NFA */ | ||||
| @ -1418,6 +1419,10 @@ nfa_regatom() | ||||
| 		    EMIT(NFA_VISUAL); | ||||
| 		    break; | ||||
|  | ||||
| 		case 'C': | ||||
| 		    EMIT(NFA_ANY_COMPOSING); | ||||
| 		    break; | ||||
|  | ||||
| 		case '[': | ||||
| 		    { | ||||
| 			int	    n; | ||||
| @ -2429,6 +2434,7 @@ nfa_set_code(c) | ||||
| 	case NFA_MARK_LT:	STRCPY(code, "NFA_MARK_LT "); break; | ||||
| 	case NFA_CURSOR:	STRCPY(code, "NFA_CURSOR "); break; | ||||
| 	case NFA_VISUAL:	STRCPY(code, "NFA_VISUAL "); break; | ||||
| 	case NFA_ANY_COMPOSING:	STRCPY(code, "NFA_ANY_COMPOSING "); break; | ||||
|  | ||||
| 	case NFA_STAR:		STRCPY(code, "NFA_STAR "); break; | ||||
| 	case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break; | ||||
| @ -2967,6 +2973,7 @@ nfa_max_width(startstate, depth) | ||||
| 	    case NFA_NLOWER_IC: | ||||
| 	    case NFA_UPPER_IC: | ||||
| 	    case NFA_NUPPER_IC: | ||||
| 	    case NFA_ANY_COMPOSING: | ||||
| 		/* possibly non-ascii */ | ||||
| #ifdef FEAT_MBYTE | ||||
| 		if (has_mbyte) | ||||
| @ -4152,6 +4159,7 @@ match_follows(startstate, depth) | ||||
| 		continue; | ||||
|  | ||||
| 	    case NFA_ANY: | ||||
| 	    case NFA_ANY_COMPOSING: | ||||
| 	    case NFA_IDENT: | ||||
| 	    case NFA_SIDENT: | ||||
| 	    case NFA_KWORD: | ||||
| @ -4395,7 +4403,7 @@ skip_add: | ||||
|     switch (state->c) | ||||
|     { | ||||
| 	case NFA_MATCH: | ||||
| 	    nfa_match = TRUE; | ||||
| //	    nfa_match = TRUE; | ||||
| 	    break; | ||||
|  | ||||
| 	case NFA_SPLIT: | ||||
| @ -5151,6 +5159,7 @@ failure_chance(state, depth) | ||||
|  | ||||
| 	case NFA_MATCH: | ||||
| 	case NFA_MCLOSE: | ||||
| 	case NFA_ANY_COMPOSING: | ||||
| 	    /* empty match works always */ | ||||
| 	    return 0; | ||||
|  | ||||
| @ -5573,6 +5582,12 @@ nfa_regmatch(prog, start, submatch, m) | ||||
| 	    { | ||||
| 	    case NFA_MATCH: | ||||
| 	      { | ||||
| #ifdef FEAT_MBYTE | ||||
| 		/* If the match ends before a composing characters and | ||||
| 		 * ireg_icombine is not set, that is not really a match. */ | ||||
| 		if (enc_utf8 && !ireg_icombine && utf_iscomposing(curc)) | ||||
| 		    break; | ||||
| #endif | ||||
| 		nfa_match = TRUE; | ||||
| 		copy_sub(&submatch->norm, &t->subs.norm); | ||||
| #ifdef FEAT_SYN_HL | ||||
| @ -6120,6 +6135,23 @@ nfa_regmatch(prog, start, submatch, m) | ||||
| 		} | ||||
| 		break; | ||||
|  | ||||
| 	    case NFA_ANY_COMPOSING: | ||||
| 		/* On a composing character skip over it.  Otherwise do | ||||
| 		 * nothing.  Always matches. */ | ||||
| #ifdef FEAT_MBYTE | ||||
| 		if (enc_utf8 && utf_iscomposing(curc)) | ||||
| 		{ | ||||
| 		    add_off = clen; | ||||
| 		} | ||||
| 		else | ||||
| #endif | ||||
| 		{ | ||||
| 		    add_here = TRUE; | ||||
| 		    add_off = 0; | ||||
| 		} | ||||
| 		add_state = t->state->out; | ||||
| 		break; | ||||
|  | ||||
| 	    /* | ||||
| 	     * Character classes like \a for alpha, \d for digit etc. | ||||
| 	     */ | ||||
| @ -6484,12 +6516,10 @@ nfa_regmatch(prog, start, submatch, m) | ||||
| 		if (!result && ireg_ic) | ||||
| 		    result = MB_TOLOWER(c) == MB_TOLOWER(curc); | ||||
| #ifdef FEAT_MBYTE | ||||
| 		/* If there is a composing character which is not being | ||||
| 		 * ignored there can be no match. Match with composing | ||||
| 		 * character uses NFA_COMPOSING above. */ | ||||
| 		if (result && enc_utf8 && !ireg_icombine | ||||
| 						&& clen != utf_char2len(curc)) | ||||
| 		    result = FALSE; | ||||
| 		/* If ireg_icombine is not set only skip over the character | ||||
| 		 * itself.  When it is set skip over composing characters. */ | ||||
| 		if (result && enc_utf8 && !ireg_icombine) | ||||
| 		    clen = utf_char2len(curc); | ||||
| #endif | ||||
| 		ADD_STATE_IF_MATCH(t->state); | ||||
| 		break; | ||||
|  | ||||
| @ -734,6 +734,8 @@ static char *(features[]) = | ||||
|  | ||||
| static int included_patches[] = | ||||
| {   /* Add new patch number below this line */ | ||||
| /**/ | ||||
|     293, | ||||
| /**/ | ||||
|     292, | ||||
| /**/ | ||||
|  | ||||
		Reference in New Issue
	
	Block a user