1 /* 2 * Copyright (c) 1989, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Guido van Rossum. 7 * 8 * Copyright (c) 2011 The FreeBSD Foundation 9 * All rights reserved. 10 * Portions of this software were developed by David Chisnall 11 * under sponsorship from the FreeBSD Foundation. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)fnmatch.c 8.2 (Berkeley) 4/16/94 38 * $FreeBSD: head/lib/libc/gen/fnmatch.c 254091 2013-08-08 09:04:02Z ache $ 39 */ 40 41 42 /* 43 * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6. 44 * Compares a filename or pathname to a pattern. 45 */ 46 47 /* 48 * Some notes on multibyte character support: 49 * 1. Patterns with illegal byte sequences match nothing. 50 * 2. Illegal byte sequences in the "string" argument are handled by treating 51 * them as single-byte characters with a value of the first byte of the 52 * sequence cast to wchar_t. 53 * 3. Multibyte conversion state objects (mbstate_t) are passed around and 54 * used for most, but not all, conversions. Further work will be required 55 * to support state-dependent encodings. 56 */ 57 58 #include <fnmatch.h> 59 #include <limits.h> 60 #include <string.h> 61 #include <wchar.h> 62 #include <wctype.h> 63 64 #include "collate.h" 65 66 #define EOS '\0' 67 68 #define RANGE_MATCH 1 69 #define RANGE_NOMATCH 0 70 #define RANGE_ERROR (-1) 71 72 static int rangematch(const char *, wchar_t, int, char **, mbstate_t *); 73 static int fnmatch1(const char *, const char *, const char *, int, mbstate_t, 74 mbstate_t); 75 76 int 77 fnmatch(pattern, string, flags) 78 const char *pattern, *string; 79 int flags; 80 { 81 static const mbstate_t initial; 82 83 return (fnmatch1(pattern, string, string, flags, initial, initial)); 84 } 85 86 static int 87 fnmatch1(pattern, string, stringstart, flags, patmbs, strmbs) 88 const char *pattern, *string, *stringstart; 89 int flags; 90 mbstate_t patmbs, strmbs; 91 { 92 char *newp; 93 char c; 94 wchar_t pc, sc; 95 size_t pclen, sclen; 96 97 for (;;) { 98 pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, &patmbs); 99 if (pclen == (size_t)-1 || pclen == (size_t)-2) 100 return (FNM_NOMATCH); 101 pattern += pclen; 102 sclen = mbrtowc(&sc, string, MB_LEN_MAX, &strmbs); 103 if (sclen == (size_t)-1 || sclen == (size_t)-2) { 104 sc = (unsigned char)*string; 105 sclen = 1; 106 memset(&strmbs, 0, sizeof(strmbs)); 107 } 108 switch (pc) { 109 case EOS: 110 if ((flags & FNM_LEADING_DIR) && sc == '/') 111 return (0); 112 return (sc == EOS ? 0 : FNM_NOMATCH); 113 case '?': 114 if (sc == EOS) 115 return (FNM_NOMATCH); 116 if (sc == '/' && (flags & FNM_PATHNAME)) 117 return (FNM_NOMATCH); 118 if (sc == '.' && (flags & FNM_PERIOD) && 119 (string == stringstart || 120 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 121 return (FNM_NOMATCH); 122 string += sclen; 123 break; 124 case '*': 125 c = *pattern; 126 /* Collapse multiple stars. */ 127 while (c == '*') 128 c = *++pattern; 129 130 if (sc == '.' && (flags & FNM_PERIOD) && 131 (string == stringstart || 132 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 133 return (FNM_NOMATCH); 134 135 /* Optimize for pattern with * at end or before /. */ 136 if (c == EOS) 137 if (flags & FNM_PATHNAME) 138 return ((flags & FNM_LEADING_DIR) || 139 strchr(string, '/') == NULL ? 140 0 : FNM_NOMATCH); 141 else 142 return (0); 143 else if (c == '/' && flags & FNM_PATHNAME) { 144 if ((string = strchr(string, '/')) == NULL) 145 return (FNM_NOMATCH); 146 break; 147 } 148 149 /* General case, use recursion. */ 150 while (sc != EOS) { 151 if (!fnmatch1(pattern, string, stringstart, 152 flags, patmbs, strmbs)) 153 return (0); 154 sclen = mbrtowc(&sc, string, MB_LEN_MAX, 155 &strmbs); 156 if (sclen == (size_t)-1 || 157 sclen == (size_t)-2) { 158 sc = (unsigned char)*string; 159 sclen = 1; 160 memset(&strmbs, 0, sizeof(strmbs)); 161 } 162 if (sc == '/' && flags & FNM_PATHNAME) 163 break; 164 string += sclen; 165 } 166 return (FNM_NOMATCH); 167 case '[': 168 if (sc == EOS) 169 return (FNM_NOMATCH); 170 if (sc == '/' && (flags & FNM_PATHNAME)) 171 return (FNM_NOMATCH); 172 if (sc == '.' && (flags & FNM_PERIOD) && 173 (string == stringstart || 174 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 175 return (FNM_NOMATCH); 176 177 switch (rangematch(pattern, sc, flags, &newp, 178 &patmbs)) { 179 case RANGE_ERROR: 180 goto norm; 181 case RANGE_MATCH: 182 pattern = newp; 183 break; 184 case RANGE_NOMATCH: 185 return (FNM_NOMATCH); 186 } 187 string += sclen; 188 break; 189 case '\\': 190 if (!(flags & FNM_NOESCAPE)) { 191 pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, 192 &patmbs); 193 if (pclen == (size_t)-1 || pclen == (size_t)-2) 194 return (FNM_NOMATCH); 195 pattern += pclen; 196 } 197 /* FALLTHROUGH */ 198 default: 199 norm: 200 if (pc == sc) 201 ; 202 else if ((flags & FNM_CASEFOLD) && 203 (towlower(pc) == towlower(sc))) 204 ; 205 else 206 return (FNM_NOMATCH); 207 string += sclen; 208 break; 209 } 210 } 211 /* NOTREACHED */ 212 } 213 214 static int 215 rangematch(pattern, test, flags, newp, patmbs) 216 const char *pattern; 217 wchar_t test; 218 int flags; 219 char **newp; 220 mbstate_t *patmbs; 221 { 222 int negate, ok; 223 wchar_t c, c2; 224 size_t pclen; 225 const char *origpat; 226 struct xlocale_collate *table = 227 (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; 228 229 /* 230 * A bracket expression starting with an unquoted circumflex 231 * character produces unspecified results (IEEE 1003.2-1992, 232 * 3.13.2). This implementation treats it like '!', for 233 * consistency with the regular expression syntax. 234 * J.T. Conklin (conklin@ngai.kaleida.com) 235 */ 236 if ( (negate = (*pattern == '!' || *pattern == '^')) ) 237 ++pattern; 238 239 if (flags & FNM_CASEFOLD) 240 test = towlower(test); 241 242 /* 243 * A right bracket shall lose its special meaning and represent 244 * itself in a bracket expression if it occurs first in the list. 245 * -- POSIX.2 2.8.3.2 246 */ 247 ok = 0; 248 origpat = pattern; 249 for (;;) { 250 if (*pattern == ']' && pattern > origpat) { 251 pattern++; 252 break; 253 } else if (*pattern == '\0') { 254 return (RANGE_ERROR); 255 } else if (*pattern == '/' && (flags & FNM_PATHNAME)) { 256 return (RANGE_NOMATCH); 257 } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE)) 258 pattern++; 259 pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs); 260 if (pclen == (size_t)-1 || pclen == (size_t)-2) 261 return (RANGE_NOMATCH); 262 pattern += pclen; 263 264 if (flags & FNM_CASEFOLD) 265 c = towlower(c); 266 267 if (*pattern == '-' && *(pattern + 1) != EOS && 268 *(pattern + 1) != ']') { 269 if (*++pattern == '\\' && !(flags & FNM_NOESCAPE)) 270 if (*pattern != EOS) 271 pattern++; 272 pclen = mbrtowc(&c2, pattern, MB_LEN_MAX, patmbs); 273 if (pclen == (size_t)-1 || pclen == (size_t)-2) 274 return (RANGE_NOMATCH); 275 pattern += pclen; 276 if (c2 == EOS) 277 return (RANGE_ERROR); 278 279 if (flags & FNM_CASEFOLD) 280 c2 = towlower(c2); 281 282 if (table->__collate_load_error ? 283 c <= test && test <= c2 : 284 __collate_range_cmp(table, c, test) <= 0 285 && __collate_range_cmp(table, test, c2) <= 0 286 ) 287 ok = 1; 288 } else if (c == test) 289 ok = 1; 290 } 291 292 *newp = (char *)pattern; 293 return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH); 294 } 295