1 /* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005
2 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
17 #include <sys/cdefs.h>
18 __RCSID("$NetBSD: fnmatch.c,v 1.2 2016/05/17 14:00:09 christos Exp $");
19
20
21 #ifdef HAVE_CONFIG_H
22 # include <config.h>
23 #endif
24
25 /* Enable GNU extensions in fnmatch.h. */
26 #ifndef _GNU_SOURCE
27 # define _GNU_SOURCE 1
28 #endif
29
30 #if ! defined __builtin_expect && __GNUC__ < 3
31 # define __builtin_expect(expr, expected) (expr)
32 #endif
33
34 #include <fnmatch.h>
35
36 #include <alloca.h>
37 #include <assert.h>
38 #include <ctype.h>
39 #include <errno.h>
40 #include <stddef.h>
41 #include <stdbool.h>
42 #include <stdlib.h>
43 #include <string.h>
44
45 #define WIDE_CHAR_SUPPORT \
46 (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC \
47 && HAVE_WMEMCHR && (HAVE_WMEMCPY || HAVE_WMEMPCPY))
48
49 /* For platform which support the ISO C amendement 1 functionality we
50 support user defined character classes. */
51 #if defined _LIBC || WIDE_CHAR_SUPPORT
52 /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
53 # include <wchar.h>
54 # include <wctype.h>
55 #endif
56
57 /* We need some of the locale data (the collation sequence information)
58 but there is no interface to get this information in general. Therefore
59 we support a correct implementation only in glibc. */
60 #ifdef _LIBC
61 # include "../locale/localeinfo.h"
62 # include "../locale/elem-hash.h"
63 # include "../locale/coll-lookup.h"
64 # include <shlib-compat.h>
65
66 # define CONCAT(a,b) __CONCAT(a,b)
67 # define mbsrtowcs __mbsrtowcs
68 # define fnmatch __fnmatch
69 extern int fnmatch (const char *pattern, const char *string, int flags);
70 #endif
71
72 #ifndef SIZE_MAX
73 # define SIZE_MAX ((size_t) -1)
74 #endif
75
76 /* We often have to test for FNM_FILE_NAME and FNM_PERIOD being both set. */
77 #define NO_LEADING_PERIOD(flags) \
78 ((flags & (FNM_FILE_NAME | FNM_PERIOD)) == (FNM_FILE_NAME | FNM_PERIOD))
79
80 /* Comment out all this code if we are using the GNU C Library, and are not
81 actually compiling the library itself, and have not detected a bug
82 in the library. This code is part of the GNU C
83 Library, but also included in many other GNU distributions. Compiling
84 and linking in this code is a waste when using the GNU C library
85 (especially if it is a shared library). Rather than having every GNU
86 program understand `configure --with-gnu-libc' and omit the object files,
87 it is simpler to just do this in the source for each such file. */
88
89 #if defined _LIBC || !defined __GNU_LIBRARY__ || !HAVE_FNMATCH_GNU
90
91
92 # if defined STDC_HEADERS || !defined isascii
93 # define ISASCII(c) 1
94 # else
95 # define ISASCII(c) isascii(c)
96 # endif
97
98 # ifdef isblank
99 # define ISBLANK(c) (ISASCII (c) && isblank (c))
100 # else
101 # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
102 # endif
103 # ifdef isgraph
104 # define ISGRAPH(c) (ISASCII (c) && isgraph (c))
105 # else
106 # define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
107 # endif
108
109 # define ISPRINT(c) (ISASCII (c) && isprint (c))
110 # define ISDIGIT(c) (ISASCII (c) && isdigit (c))
111 # define ISALNUM(c) (ISASCII (c) && isalnum (c))
112 # define ISALPHA(c) (ISASCII (c) && isalpha (c))
113 # define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
114 # define ISLOWER(c) (ISASCII (c) && islower (c))
115 # define ISPUNCT(c) (ISASCII (c) && ispunct (c))
116 # define ISSPACE(c) (ISASCII (c) && isspace (c))
117 # define ISUPPER(c) (ISASCII (c) && isupper (c))
118 # define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
119
120 # define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
121
122 # if defined _LIBC || WIDE_CHAR_SUPPORT
123 /* The GNU C library provides support for user-defined character classes
124 and the functions from ISO C amendement 1. */
125 # ifdef CHARCLASS_NAME_MAX
126 # define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
127 # else
128 /* This shouldn't happen but some implementation might still have this
129 problem. Use a reasonable default value. */
130 # define CHAR_CLASS_MAX_LENGTH 256
131 # endif
132
133 # ifdef _LIBC
134 # define IS_CHAR_CLASS(string) __wctype (string)
135 # else
136 # define IS_CHAR_CLASS(string) wctype (string)
137 # endif
138
139 # ifdef _LIBC
140 # define ISWCTYPE(WC, WT) __iswctype (WC, WT)
141 # else
142 # define ISWCTYPE(WC, WT) iswctype (WC, WT)
143 # endif
144
145 # if (HAVE_MBSTATE_T && HAVE_MBSRTOWCS) || _LIBC
146 /* In this case we are implementing the multibyte character handling. */
147 # define HANDLE_MULTIBYTE 1
148 # endif
149
150 # else
151 # define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
152
153 # define IS_CHAR_CLASS(string) \
154 (STREQ (string, "alpha") || STREQ (string, "upper") \
155 || STREQ (string, "lower") || STREQ (string, "digit") \
156 || STREQ (string, "alnum") || STREQ (string, "xdigit") \
157 || STREQ (string, "space") || STREQ (string, "print") \
158 || STREQ (string, "punct") || STREQ (string, "graph") \
159 || STREQ (string, "cntrl") || STREQ (string, "blank"))
160 # endif
161
162 /* Avoid depending on library functions or files
163 whose names are inconsistent. */
164
165 /* Global variable. */
166 static int posixly_correct;
167
168 # ifndef internal_function
169 /* Inside GNU libc we mark some function in a special way. In other
170 environments simply ignore the marking. */
171 # define internal_function
172 # endif
173
174 /* Note that this evaluates C many times. */
175 # ifdef _LIBC
176 # define FOLD(c) ((flags & FNM_CASEFOLD) ? tolower (c) : (c))
177 # else
178 # define FOLD(c) ((flags & FNM_CASEFOLD) && ISUPPER (c) ? tolower (c) : (c))
179 # endif
180 # define CHAR char
181 # define UCHAR unsigned char
182 # define INT int
183 # define FCT internal_fnmatch
184 # define EXT ext_match
185 # define END end_pattern
186 # define L(CS) CS
187 # ifdef _LIBC
188 # define BTOWC(C) __btowc (C)
189 # else
190 # define BTOWC(C) btowc (C)
191 # endif
192 # define STRLEN(S) strlen (S)
193 # define STRCAT(D, S) strcat (D, S)
194 # ifdef _LIBC
195 # define MEMPCPY(D, S, N) __mempcpy (D, S, N)
196 # else
197 # if HAVE_MEMPCPY
198 # define MEMPCPY(D, S, N) mempcpy (D, S, N)
199 # else
200 # define MEMPCPY(D, S, N) ((void *) ((char *) memcpy (D, S, N) + (N)))
201 # endif
202 # endif
203 # define MEMCHR(S, C, N) memchr (S, C, N)
204 # define STRCOLL(S1, S2) strcoll (S1, S2)
205 # include "fnmatch_loop.c"
206
207
208 # if HANDLE_MULTIBYTE
209 # define FOLD(c) ((flags & FNM_CASEFOLD) ? towlower (c) : (c))
210 # define CHAR wchar_t
211 # define UCHAR wint_t
212 # define INT wint_t
213 # define FCT internal_fnwmatch
214 # define EXT ext_wmatch
215 # define END end_wpattern
216 # define L(CS) L##CS
217 # define BTOWC(C) (C)
218 # ifdef _LIBC
219 # define STRLEN(S) __wcslen (S)
220 # define STRCAT(D, S) __wcscat (D, S)
221 # define MEMPCPY(D, S, N) __wmempcpy (D, S, N)
222 # else
223 # define STRLEN(S) wcslen (S)
224 # define STRCAT(D, S) wcscat (D, S)
225 # if HAVE_WMEMPCPY
226 # define MEMPCPY(D, S, N) wmempcpy (D, S, N)
227 # else
228 # define MEMPCPY(D, S, N) (wmemcpy (D, S, N) + (N))
229 # endif
230 # endif
231 # define MEMCHR(S, C, N) wmemchr (S, C, N)
232 # define STRCOLL(S1, S2) wcscoll (S1, S2)
233 # define WIDE_CHAR_VERSION 1
234
235 # undef IS_CHAR_CLASS
236 /* We have to convert the wide character string in a multibyte string. But
237 we know that the character class names consist of alphanumeric characters
238 from the portable character set, and since the wide character encoding
239 for a member of the portable character set is the same code point as
240 its single-byte encoding, we can use a simplified method to convert the
241 string to a multibyte character string. */
242 static wctype_t
is_char_class(const wchar_t * wcs)243 is_char_class (const wchar_t *wcs)
244 {
245 char s[CHAR_CLASS_MAX_LENGTH + 1];
246 char *cp = s;
247
248 do
249 {
250 /* Test for a printable character from the portable character set. */
251 # ifdef _LIBC
252 if (*wcs < 0x20 || *wcs > 0x7e
253 || *wcs == 0x24 || *wcs == 0x40 || *wcs == 0x60)
254 return (wctype_t) 0;
255 # else
256 switch (*wcs)
257 {
258 case L' ': case L'!': case L'"': case L'#': case L'%':
259 case L'&': case L'\'': case L'(': case L')': case L'*':
260 case L'+': case L',': case L'-': case L'.': case L'/':
261 case L'0': case L'1': case L'2': case L'3': case L'4':
262 case L'5': case L'6': case L'7': case L'8': case L'9':
263 case L':': case L';': case L'<': case L'=': case L'>':
264 case L'?':
265 case L'A': case L'B': case L'C': case L'D': case L'E':
266 case L'F': case L'G': case L'H': case L'I': case L'J':
267 case L'K': case L'L': case L'M': case L'N': case L'O':
268 case L'P': case L'Q': case L'R': case L'S': case L'T':
269 case L'U': case L'V': case L'W': case L'X': case L'Y':
270 case L'Z':
271 case L'[': case L'\\': case L']': case L'^': case L'_':
272 case L'a': case L'b': case L'c': case L'd': case L'e':
273 case L'f': case L'g': case L'h': case L'i': case L'j':
274 case L'k': case L'l': case L'm': case L'n': case L'o':
275 case L'p': case L'q': case L'r': case L's': case L't':
276 case L'u': case L'v': case L'w': case L'x': case L'y':
277 case L'z': case L'{': case L'|': case L'}': case L'~':
278 break;
279 default:
280 return (wctype_t) 0;
281 }
282 # endif
283
284 /* Avoid overrunning the buffer. */
285 if (cp == s + CHAR_CLASS_MAX_LENGTH)
286 return (wctype_t) 0;
287
288 *cp++ = (char) *wcs++;
289 }
290 while (*wcs != L'\0');
291
292 *cp = '\0';
293
294 # ifdef _LIBC
295 return __wctype (s);
296 # else
297 return wctype (s);
298 # endif
299 }
300 # define IS_CHAR_CLASS(string) is_char_class (string)
301
302 # include "fnmatch_loop.c"
303 # endif
304
305
306 int
fnmatch(const char * pattern,const char * string,int flags)307 fnmatch (const char *pattern, const char *string, int flags)
308 {
309 # if HANDLE_MULTIBYTE
310 # define ALLOCA_LIMIT 2000
311 if (__builtin_expect (MB_CUR_MAX, 1) != 1)
312 {
313 mbstate_t ps;
314 size_t patsize;
315 size_t strsize;
316 size_t totsize;
317 wchar_t *wpattern;
318 wchar_t *wstring;
319 int res;
320
321 /* Calculate the size needed to convert the strings to
322 wide characters. */
323 memset (&ps, '\0', sizeof (ps));
324 patsize = mbsrtowcs (NULL, &pattern, 0, &ps) + 1;
325 if (__builtin_expect (patsize != 0, 1))
326 {
327 assert (mbsinit (&ps));
328 strsize = mbsrtowcs (NULL, &string, 0, &ps) + 1;
329 if (__builtin_expect (strsize != 0, 1))
330 {
331 assert (mbsinit (&ps));
332 totsize = patsize + strsize;
333 if (__builtin_expect (! (patsize <= totsize
334 && totsize <= SIZE_MAX / sizeof (wchar_t)),
335 0))
336 {
337 errno = ENOMEM;
338 return -1;
339 }
340
341 /* Allocate room for the wide characters. */
342 if (__builtin_expect (totsize < ALLOCA_LIMIT, 1))
343 wpattern = (wchar_t *) alloca (totsize * sizeof (wchar_t));
344 else
345 {
346 wpattern = malloc (totsize * sizeof (wchar_t));
347 if (__builtin_expect (! wpattern, 0))
348 {
349 errno = ENOMEM;
350 return -1;
351 }
352 }
353 wstring = wpattern + patsize;
354
355 /* Convert the strings into wide characters. */
356 mbsrtowcs (wpattern, &pattern, patsize, &ps);
357 assert (mbsinit (&ps));
358 mbsrtowcs (wstring, &string, strsize, &ps);
359
360 res = internal_fnwmatch (wpattern, wstring, wstring + strsize - 1,
361 flags & FNM_PERIOD, flags);
362
363 if (__builtin_expect (! (totsize < ALLOCA_LIMIT), 0))
364 free (wpattern);
365 return res;
366 }
367 }
368 }
369
370 # endif /* HANDLE_MULTIBYTE */
371
372 return internal_fnmatch (pattern, string, string + strlen (string),
373 flags & FNM_PERIOD, flags);
374 }
375
376 # ifdef _LIBC
377 # undef fnmatch
378 versioned_symbol (libc, __fnmatch, fnmatch, GLIBC_2_2_3);
379 # if SHLIB_COMPAT(libc, GLIBC_2_0, GLIBC_2_2_3)
380 strong_alias (__fnmatch, __fnmatch_old)
381 compat_symbol (libc, __fnmatch_old, fnmatch, GLIBC_2_0);
382 # endif
383 libc_hidden_ver (__fnmatch, fnmatch)
384 # endif
385
386 #endif /* _LIBC or not __GNU_LIBRARY__. */
387