1*75f6d617Schristos /* $NetBSD: regex.c,v 1.1.1.1 2016/01/13 03:15:30 christos Exp $ */
2*75f6d617Schristos
3*75f6d617Schristos /* Extended regular expression matching and search library,
4*75f6d617Schristos version 0.12.
5*75f6d617Schristos (Implements POSIX draft P1003.2/D11.2, except for some of the
6*75f6d617Schristos internationalization features.)
7*75f6d617Schristos Copyright (C) 1993-1999, 2000, 2001 Free Software Foundation, Inc.
8*75f6d617Schristos
9*75f6d617Schristos This program is free software; you can redistribute it and/or modify
10*75f6d617Schristos it under the terms of the GNU General Public License as published by
11*75f6d617Schristos the Free Software Foundation; either version 2, or (at your option)
12*75f6d617Schristos any later version.
13*75f6d617Schristos
14*75f6d617Schristos This program is distributed in the hope that it will be useful,
15*75f6d617Schristos but WITHOUT ANY WARRANTY; without even the implied warranty of
16*75f6d617Schristos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17*75f6d617Schristos GNU General Public License for more details.
18*75f6d617Schristos
19*75f6d617Schristos You should have received a copy of the GNU General Public License
20*75f6d617Schristos along with this program; if not, write to the Free Software Foundation,
21*75f6d617Schristos Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22*75f6d617Schristos
23*75f6d617Schristos /* AIX requires this to be the first thing in the file. */
24*75f6d617Schristos #if defined _AIX && !defined REGEX_MALLOC
25*75f6d617Schristos #pragma alloca
26*75f6d617Schristos #endif
27*75f6d617Schristos
28*75f6d617Schristos #undef _GNU_SOURCE
29*75f6d617Schristos #define _GNU_SOURCE
30*75f6d617Schristos
31*75f6d617Schristos #ifdef HAVE_CONFIG_H
32*75f6d617Schristos # include <config.h>
33*75f6d617Schristos #endif
34*75f6d617Schristos
35*75f6d617Schristos #ifndef PARAMS
36*75f6d617Schristos # if defined __GNUC__ || (defined __STDC__ && __STDC__)
37*75f6d617Schristos # define PARAMS(args) args
38*75f6d617Schristos # else
39*75f6d617Schristos # define PARAMS(args) ()
40*75f6d617Schristos # endif /* GCC. */
41*75f6d617Schristos #endif /* Not PARAMS. */
42*75f6d617Schristos
43*75f6d617Schristos #ifndef INSIDE_RECURSION
44*75f6d617Schristos
45*75f6d617Schristos # if defined STDC_HEADERS && !defined emacs
46*75f6d617Schristos # include <stddef.h>
47*75f6d617Schristos # else
48*75f6d617Schristos /* We need this for `regex.h', and perhaps for the Emacs include files. */
49*75f6d617Schristos # include <sys/types.h>
50*75f6d617Schristos # endif
51*75f6d617Schristos
52*75f6d617Schristos # define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
53*75f6d617Schristos
54*75f6d617Schristos /* For platform which support the ISO C amendement 1 functionality we
55*75f6d617Schristos support user defined character classes. */
56*75f6d617Schristos # if defined _LIBC || WIDE_CHAR_SUPPORT
57*75f6d617Schristos /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
58*75f6d617Schristos # include <wchar.h>
59*75f6d617Schristos # include <wctype.h>
60*75f6d617Schristos # endif
61*75f6d617Schristos
62*75f6d617Schristos # ifdef _LIBC
63*75f6d617Schristos /* We have to keep the namespace clean. */
64*75f6d617Schristos # define regfree(preg) __regfree (preg)
65*75f6d617Schristos # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
66*75f6d617Schristos # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
67*75f6d617Schristos # define regerror(errcode, preg, errbuf, errbuf_size) \
68*75f6d617Schristos __regerror(errcode, preg, errbuf, errbuf_size)
69*75f6d617Schristos # define re_set_registers(bu, re, nu, st, en) \
70*75f6d617Schristos __re_set_registers (bu, re, nu, st, en)
71*75f6d617Schristos # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
72*75f6d617Schristos __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
73*75f6d617Schristos # define re_match(bufp, string, size, pos, regs) \
74*75f6d617Schristos __re_match (bufp, string, size, pos, regs)
75*75f6d617Schristos # define re_search(bufp, string, size, startpos, range, regs) \
76*75f6d617Schristos __re_search (bufp, string, size, startpos, range, regs)
77*75f6d617Schristos # define re_compile_pattern(pattern, length, bufp) \
78*75f6d617Schristos __re_compile_pattern (pattern, length, bufp)
79*75f6d617Schristos # define re_set_syntax(syntax) __re_set_syntax (syntax)
80*75f6d617Schristos # define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
81*75f6d617Schristos __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
82*75f6d617Schristos # define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
83*75f6d617Schristos
84*75f6d617Schristos # define btowc __btowc
85*75f6d617Schristos # define iswctype __iswctype
86*75f6d617Schristos # define mbrtowc __mbrtowc
87*75f6d617Schristos # define wcslen __wcslen
88*75f6d617Schristos # define wcscoll __wcscoll
89*75f6d617Schristos # define wcrtomb __wcrtomb
90*75f6d617Schristos
91*75f6d617Schristos /* We are also using some library internals. */
92*75f6d617Schristos # include <locale/localeinfo.h>
93*75f6d617Schristos # include <locale/elem-hash.h>
94*75f6d617Schristos # include <langinfo.h>
95*75f6d617Schristos # include <locale/coll-lookup.h>
96*75f6d617Schristos # endif
97*75f6d617Schristos
98*75f6d617Schristos /* This is for other GNU distributions with internationalized messages. */
99*75f6d617Schristos # if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
100*75f6d617Schristos # include <libintl.h>
101*75f6d617Schristos # ifdef _LIBC
102*75f6d617Schristos # undef gettext
103*75f6d617Schristos # define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES)
104*75f6d617Schristos # endif
105*75f6d617Schristos # else
106*75f6d617Schristos # define gettext(msgid) (msgid)
107*75f6d617Schristos # endif
108*75f6d617Schristos
109*75f6d617Schristos # ifndef gettext_noop
110*75f6d617Schristos /* This define is so xgettext can find the internationalizable
111*75f6d617Schristos strings. */
112*75f6d617Schristos # define gettext_noop(String) String
113*75f6d617Schristos # endif
114*75f6d617Schristos
115*75f6d617Schristos /* Support for bounded pointers. */
116*75f6d617Schristos # if !defined _LIBC && !defined __BOUNDED_POINTERS__
117*75f6d617Schristos # define __bounded /* nothing */
118*75f6d617Schristos # define __unbounded /* nothing */
119*75f6d617Schristos # define __ptrvalue /* nothing */
120*75f6d617Schristos # endif
121*75f6d617Schristos
122*75f6d617Schristos /* The `emacs' switch turns on certain matching commands
123*75f6d617Schristos that make sense only in Emacs. */
124*75f6d617Schristos # ifdef emacs
125*75f6d617Schristos
126*75f6d617Schristos # include "lisp.h"
127*75f6d617Schristos # include "buffer.h"
128*75f6d617Schristos # include "syntax.h"
129*75f6d617Schristos
130*75f6d617Schristos # else /* not emacs */
131*75f6d617Schristos
132*75f6d617Schristos /* If we are not linking with Emacs proper,
133*75f6d617Schristos we can't use the relocating allocator
134*75f6d617Schristos even if config.h says that we can. */
135*75f6d617Schristos # undef REL_ALLOC
136*75f6d617Schristos
137*75f6d617Schristos # if defined STDC_HEADERS || defined _LIBC
138*75f6d617Schristos # include <stdlib.h>
139*75f6d617Schristos # else
140*75f6d617Schristos char *malloc ();
141*75f6d617Schristos char *realloc ();
142*75f6d617Schristos # endif
143*75f6d617Schristos
144*75f6d617Schristos /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
145*75f6d617Schristos If nothing else has been done, use the method below. */
146*75f6d617Schristos # ifdef INHIBIT_STRING_HEADER
147*75f6d617Schristos # if !(defined HAVE_BZERO && defined HAVE_BCOPY)
148*75f6d617Schristos # if !defined bzero && !defined bcopy
149*75f6d617Schristos # undef INHIBIT_STRING_HEADER
150*75f6d617Schristos # endif
151*75f6d617Schristos # endif
152*75f6d617Schristos # endif
153*75f6d617Schristos
154*75f6d617Schristos /* This is the normal way of making sure we have a bcopy and a bzero.
155*75f6d617Schristos This is used in most programs--a few other programs avoid this
156*75f6d617Schristos by defining INHIBIT_STRING_HEADER. */
157*75f6d617Schristos # ifndef INHIBIT_STRING_HEADER
158*75f6d617Schristos # if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
159*75f6d617Schristos # include <string.h>
160*75f6d617Schristos # ifndef bzero
161*75f6d617Schristos # ifndef _LIBC
162*75f6d617Schristos # define bzero(s, n) (memset (s, '\0', n), (s))
163*75f6d617Schristos # else
164*75f6d617Schristos # define bzero(s, n) __bzero (s, n)
165*75f6d617Schristos # endif
166*75f6d617Schristos # endif
167*75f6d617Schristos # else
168*75f6d617Schristos # include <strings.h>
169*75f6d617Schristos # ifndef memcmp
170*75f6d617Schristos # define memcmp(s1, s2, n) bcmp (s1, s2, n)
171*75f6d617Schristos # endif
172*75f6d617Schristos # ifndef memcpy
173*75f6d617Schristos # define memcpy(d, s, n) (bcopy (s, d, n), (d))
174*75f6d617Schristos # endif
175*75f6d617Schristos # endif
176*75f6d617Schristos # endif
177*75f6d617Schristos
178*75f6d617Schristos /* Define the syntax stuff for \<, \>, etc. */
179*75f6d617Schristos
180*75f6d617Schristos /* This must be nonzero for the wordchar and notwordchar pattern
181*75f6d617Schristos commands in re_match_2. */
182*75f6d617Schristos # ifndef Sword
183*75f6d617Schristos # define Sword 1
184*75f6d617Schristos # endif
185*75f6d617Schristos
186*75f6d617Schristos # ifdef SWITCH_ENUM_BUG
187*75f6d617Schristos # define SWITCH_ENUM_CAST(x) ((int)(x))
188*75f6d617Schristos # else
189*75f6d617Schristos # define SWITCH_ENUM_CAST(x) (x)
190*75f6d617Schristos # endif
191*75f6d617Schristos
192*75f6d617Schristos # endif /* not emacs */
193*75f6d617Schristos
194*75f6d617Schristos # if defined _LIBC || HAVE_LIMITS_H
195*75f6d617Schristos # include <limits.h>
196*75f6d617Schristos # endif
197*75f6d617Schristos
198*75f6d617Schristos # ifndef MB_LEN_MAX
199*75f6d617Schristos # define MB_LEN_MAX 1
200*75f6d617Schristos # endif
201*75f6d617Schristos
202*75f6d617Schristos /* Get the interface, including the syntax bits. */
203*75f6d617Schristos # include <regex.h>
204*75f6d617Schristos
205*75f6d617Schristos /* isalpha etc. are used for the character classes. */
206*75f6d617Schristos # include <ctype.h>
207*75f6d617Schristos
208*75f6d617Schristos /* Jim Meyering writes:
209*75f6d617Schristos
210*75f6d617Schristos "... Some ctype macros are valid only for character codes that
211*75f6d617Schristos isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
212*75f6d617Schristos using /bin/cc or gcc but without giving an ansi option). So, all
213*75f6d617Schristos ctype uses should be through macros like ISPRINT... If
214*75f6d617Schristos STDC_HEADERS is defined, then autoconf has verified that the ctype
215*75f6d617Schristos macros don't need to be guarded with references to isascii. ...
216*75f6d617Schristos Defining isascii to 1 should let any compiler worth its salt
217*75f6d617Schristos eliminate the && through constant folding."
218*75f6d617Schristos Solaris defines some of these symbols so we must undefine them first. */
219*75f6d617Schristos
220*75f6d617Schristos # if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
221*75f6d617Schristos # define IN_CTYPE_DOMAIN(c) 1
222*75f6d617Schristos # else
223*75f6d617Schristos # define IN_CTYPE_DOMAIN(c) isascii(c)
224*75f6d617Schristos # endif
225*75f6d617Schristos
226*75f6d617Schristos # ifdef isblank
227*75f6d617Schristos # define ISBLANK(c) (IN_CTYPE_DOMAIN (c) && isblank (c))
228*75f6d617Schristos # else
229*75f6d617Schristos # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
230*75f6d617Schristos # endif
231*75f6d617Schristos # ifdef isgraph
232*75f6d617Schristos # define ISGRAPH(c) (IN_CTYPE_DOMAIN (c) && isgraph (c))
233*75f6d617Schristos # else
234*75f6d617Schristos # define ISGRAPH(c) (IN_CTYPE_DOMAIN (c) && isprint (c) && !isspace (c))
235*75f6d617Schristos # endif
236*75f6d617Schristos
237*75f6d617Schristos # undef ISPRINT
238*75f6d617Schristos # define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
239*75f6d617Schristos # define ISDIGIT(c) (IN_CTYPE_DOMAIN (c) && isdigit (c))
240*75f6d617Schristos # define ISALNUM(c) (IN_CTYPE_DOMAIN (c) && isalnum (c))
241*75f6d617Schristos # define ISALPHA(c) (IN_CTYPE_DOMAIN (c) && isalpha (c))
242*75f6d617Schristos # define ISCNTRL(c) (IN_CTYPE_DOMAIN (c) && iscntrl (c))
243*75f6d617Schristos # define ISLOWER(c) (IN_CTYPE_DOMAIN (c) && islower (c))
244*75f6d617Schristos # define ISPUNCT(c) (IN_CTYPE_DOMAIN (c) && ispunct (c))
245*75f6d617Schristos # define ISSPACE(c) (IN_CTYPE_DOMAIN (c) && isspace (c))
246*75f6d617Schristos # define ISUPPER(c) (IN_CTYPE_DOMAIN (c) && isupper (c))
247*75f6d617Schristos # define ISXDIGIT(c) (IN_CTYPE_DOMAIN (c) && isxdigit (c))
248*75f6d617Schristos
249*75f6d617Schristos # ifdef _tolower
250*75f6d617Schristos # define TOLOWER(c) _tolower(c)
251*75f6d617Schristos # else
252*75f6d617Schristos # define TOLOWER(c) tolower(c)
253*75f6d617Schristos # endif
254*75f6d617Schristos
255*75f6d617Schristos # ifndef NULL
256*75f6d617Schristos # define NULL (void *)0
257*75f6d617Schristos # endif
258*75f6d617Schristos
259*75f6d617Schristos /* We remove any previous definition of `SIGN_EXTEND_CHAR',
260*75f6d617Schristos since ours (we hope) works properly with all combinations of
261*75f6d617Schristos machines, compilers, `char' and `unsigned char' argument types.
262*75f6d617Schristos (Per Bothner suggested the basic approach.) */
263*75f6d617Schristos # undef SIGN_EXTEND_CHAR
264*75f6d617Schristos # if __STDC__
265*75f6d617Schristos # define SIGN_EXTEND_CHAR(c) ((signed char) (c))
266*75f6d617Schristos # else /* not __STDC__ */
267*75f6d617Schristos /* As in Harbison and Steele. */
268*75f6d617Schristos # define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
269*75f6d617Schristos # endif
270*75f6d617Schristos
271*75f6d617Schristos # ifndef emacs
272*75f6d617Schristos /* How many characters in the character set. */
273*75f6d617Schristos # define CHAR_SET_SIZE 256
274*75f6d617Schristos
275*75f6d617Schristos # ifdef SYNTAX_TABLE
276*75f6d617Schristos
277*75f6d617Schristos extern char *re_syntax_table;
278*75f6d617Schristos
279*75f6d617Schristos # else /* not SYNTAX_TABLE */
280*75f6d617Schristos
281*75f6d617Schristos static char re_syntax_table[CHAR_SET_SIZE];
282*75f6d617Schristos
283*75f6d617Schristos static void init_syntax_once PARAMS ((void));
284*75f6d617Schristos
285*75f6d617Schristos static void
init_syntax_once()286*75f6d617Schristos init_syntax_once ()
287*75f6d617Schristos {
288*75f6d617Schristos register int c;
289*75f6d617Schristos static int done = 0;
290*75f6d617Schristos
291*75f6d617Schristos if (done)
292*75f6d617Schristos return;
293*75f6d617Schristos bzero (re_syntax_table, sizeof re_syntax_table);
294*75f6d617Schristos
295*75f6d617Schristos for (c = 0; c < CHAR_SET_SIZE; ++c)
296*75f6d617Schristos if (ISALNUM (c))
297*75f6d617Schristos re_syntax_table[c] = Sword;
298*75f6d617Schristos
299*75f6d617Schristos re_syntax_table['_'] = Sword;
300*75f6d617Schristos
301*75f6d617Schristos done = 1;
302*75f6d617Schristos }
303*75f6d617Schristos
304*75f6d617Schristos # endif /* not SYNTAX_TABLE */
305*75f6d617Schristos
306*75f6d617Schristos # define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
307*75f6d617Schristos
308*75f6d617Schristos # endif /* emacs */
309*75f6d617Schristos
310*75f6d617Schristos /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
311*75f6d617Schristos use `alloca' instead of `malloc'. This is because using malloc in
312*75f6d617Schristos re_search* or re_match* could cause memory leaks when C-g is used in
313*75f6d617Schristos Emacs; also, malloc is slower and causes storage fragmentation. On
314*75f6d617Schristos the other hand, malloc is more portable, and easier to debug.
315*75f6d617Schristos
316*75f6d617Schristos Because we sometimes use alloca, some routines have to be macros,
317*75f6d617Schristos not functions -- `alloca'-allocated space disappears at the end of the
318*75f6d617Schristos function it is called in. */
319*75f6d617Schristos
320*75f6d617Schristos # ifdef REGEX_MALLOC
321*75f6d617Schristos
322*75f6d617Schristos # define REGEX_ALLOCATE malloc
323*75f6d617Schristos # define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
324*75f6d617Schristos # define REGEX_FREE free
325*75f6d617Schristos
326*75f6d617Schristos # else /* not REGEX_MALLOC */
327*75f6d617Schristos
328*75f6d617Schristos /* Emacs already defines alloca, sometimes. */
329*75f6d617Schristos # ifndef alloca
330*75f6d617Schristos
331*75f6d617Schristos /* Make alloca work the best possible way. */
332*75f6d617Schristos # ifdef __GNUC__
333*75f6d617Schristos # define alloca __builtin_alloca
334*75f6d617Schristos # else /* not __GNUC__ */
335*75f6d617Schristos # if HAVE_ALLOCA_H
336*75f6d617Schristos # include <alloca.h>
337*75f6d617Schristos # endif /* HAVE_ALLOCA_H */
338*75f6d617Schristos # endif /* not __GNUC__ */
339*75f6d617Schristos
340*75f6d617Schristos # endif /* not alloca */
341*75f6d617Schristos
342*75f6d617Schristos # define REGEX_ALLOCATE alloca
343*75f6d617Schristos
344*75f6d617Schristos /* Assumes a `char *destination' variable. */
345*75f6d617Schristos # define REGEX_REALLOCATE(source, osize, nsize) \
346*75f6d617Schristos (destination = (char *) alloca (nsize), \
347*75f6d617Schristos memcpy (destination, source, osize))
348*75f6d617Schristos
349*75f6d617Schristos /* No need to do anything to free, after alloca. */
350*75f6d617Schristos # define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
351*75f6d617Schristos
352*75f6d617Schristos # endif /* not REGEX_MALLOC */
353*75f6d617Schristos
354*75f6d617Schristos /* Define how to allocate the failure stack. */
355*75f6d617Schristos
356*75f6d617Schristos # if defined REL_ALLOC && defined REGEX_MALLOC
357*75f6d617Schristos
358*75f6d617Schristos # define REGEX_ALLOCATE_STACK(size) \
359*75f6d617Schristos r_alloc (&failure_stack_ptr, (size))
360*75f6d617Schristos # define REGEX_REALLOCATE_STACK(source, osize, nsize) \
361*75f6d617Schristos r_re_alloc (&failure_stack_ptr, (nsize))
362*75f6d617Schristos # define REGEX_FREE_STACK(ptr) \
363*75f6d617Schristos r_alloc_free (&failure_stack_ptr)
364*75f6d617Schristos
365*75f6d617Schristos # else /* not using relocating allocator */
366*75f6d617Schristos
367*75f6d617Schristos # ifdef REGEX_MALLOC
368*75f6d617Schristos
369*75f6d617Schristos # define REGEX_ALLOCATE_STACK malloc
370*75f6d617Schristos # define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
371*75f6d617Schristos # define REGEX_FREE_STACK free
372*75f6d617Schristos
373*75f6d617Schristos # else /* not REGEX_MALLOC */
374*75f6d617Schristos
375*75f6d617Schristos # define REGEX_ALLOCATE_STACK alloca
376*75f6d617Schristos
377*75f6d617Schristos # define REGEX_REALLOCATE_STACK(source, osize, nsize) \
378*75f6d617Schristos REGEX_REALLOCATE (source, osize, nsize)
379*75f6d617Schristos /* No need to explicitly free anything. */
380*75f6d617Schristos # define REGEX_FREE_STACK(arg)
381*75f6d617Schristos
382*75f6d617Schristos # endif /* not REGEX_MALLOC */
383*75f6d617Schristos # endif /* not using relocating allocator */
384*75f6d617Schristos
385*75f6d617Schristos
386*75f6d617Schristos /* True if `size1' is non-NULL and PTR is pointing anywhere inside
387*75f6d617Schristos `string1' or just past its end. This works if PTR is NULL, which is
388*75f6d617Schristos a good thing. */
389*75f6d617Schristos # define FIRST_STRING_P(ptr) \
390*75f6d617Schristos (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
391*75f6d617Schristos
392*75f6d617Schristos /* (Re)Allocate N items of type T using malloc, or fail. */
393*75f6d617Schristos # define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
394*75f6d617Schristos # define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
395*75f6d617Schristos # define RETALLOC_IF(addr, n, t) \
396*75f6d617Schristos if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
397*75f6d617Schristos # define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
398*75f6d617Schristos
399*75f6d617Schristos # define BYTEWIDTH 8 /* In bits. */
400*75f6d617Schristos
401*75f6d617Schristos # define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
402*75f6d617Schristos
403*75f6d617Schristos # undef MAX
404*75f6d617Schristos # undef MIN
405*75f6d617Schristos # define MAX(a, b) ((a) > (b) ? (a) : (b))
406*75f6d617Schristos # define MIN(a, b) ((a) < (b) ? (a) : (b))
407*75f6d617Schristos
408*75f6d617Schristos typedef char boolean;
409*75f6d617Schristos # define false 0
410*75f6d617Schristos # define true 1
411*75f6d617Schristos
412*75f6d617Schristos static reg_errcode_t byte_regex_compile _RE_ARGS ((const char *pattern, size_t size,
413*75f6d617Schristos reg_syntax_t syntax,
414*75f6d617Schristos struct re_pattern_buffer *bufp));
415*75f6d617Schristos
416*75f6d617Schristos static int byte_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
417*75f6d617Schristos const char *string1, int size1,
418*75f6d617Schristos const char *string2, int size2,
419*75f6d617Schristos int pos,
420*75f6d617Schristos struct re_registers *regs,
421*75f6d617Schristos int stop));
422*75f6d617Schristos static int byte_re_search_2 PARAMS ((struct re_pattern_buffer *bufp,
423*75f6d617Schristos const char *string1, int size1,
424*75f6d617Schristos const char *string2, int size2,
425*75f6d617Schristos int startpos, int range,
426*75f6d617Schristos struct re_registers *regs, int stop));
427*75f6d617Schristos static int byte_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp));
428*75f6d617Schristos
429*75f6d617Schristos #ifdef MBS_SUPPORT
430*75f6d617Schristos static reg_errcode_t wcs_regex_compile _RE_ARGS ((const char *pattern, size_t size,
431*75f6d617Schristos reg_syntax_t syntax,
432*75f6d617Schristos struct re_pattern_buffer *bufp));
433*75f6d617Schristos
434*75f6d617Schristos
435*75f6d617Schristos static int wcs_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
436*75f6d617Schristos const char *cstring1, int csize1,
437*75f6d617Schristos const char *cstring2, int csize2,
438*75f6d617Schristos int pos,
439*75f6d617Schristos struct re_registers *regs,
440*75f6d617Schristos int stop,
441*75f6d617Schristos wchar_t *string1, int size1,
442*75f6d617Schristos wchar_t *string2, int size2,
443*75f6d617Schristos int *mbs_offset1, int *mbs_offset2));
444*75f6d617Schristos static int wcs_re_search_2 PARAMS ((struct re_pattern_buffer *bufp,
445*75f6d617Schristos const char *string1, int size1,
446*75f6d617Schristos const char *string2, int size2,
447*75f6d617Schristos int startpos, int range,
448*75f6d617Schristos struct re_registers *regs, int stop));
449*75f6d617Schristos static int wcs_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp));
450*75f6d617Schristos #endif
451*75f6d617Schristos
452*75f6d617Schristos /* These are the command codes that appear in compiled regular
453*75f6d617Schristos expressions. Some opcodes are followed by argument bytes. A
454*75f6d617Schristos command code can specify any interpretation whatsoever for its
455*75f6d617Schristos arguments. Zero bytes may appear in the compiled regular expression. */
456*75f6d617Schristos
457*75f6d617Schristos typedef enum
458*75f6d617Schristos {
459*75f6d617Schristos no_op = 0,
460*75f6d617Schristos
461*75f6d617Schristos /* Succeed right away--no more backtracking. */
462*75f6d617Schristos succeed,
463*75f6d617Schristos
464*75f6d617Schristos /* Followed by one byte giving n, then by n literal bytes. */
465*75f6d617Schristos exactn,
466*75f6d617Schristos
467*75f6d617Schristos # ifdef MBS_SUPPORT
468*75f6d617Schristos /* Same as exactn, but contains binary data. */
469*75f6d617Schristos exactn_bin,
470*75f6d617Schristos # endif
471*75f6d617Schristos
472*75f6d617Schristos /* Matches any (more or less) character. */
473*75f6d617Schristos anychar,
474*75f6d617Schristos
475*75f6d617Schristos /* Matches any one char belonging to specified set. First
476*75f6d617Schristos following byte is number of bitmap bytes. Then come bytes
477*75f6d617Schristos for a bitmap saying which chars are in. Bits in each byte
478*75f6d617Schristos are ordered low-bit-first. A character is in the set if its
479*75f6d617Schristos bit is 1. A character too large to have a bit in the map is
480*75f6d617Schristos automatically not in the set. */
481*75f6d617Schristos /* ifdef MBS_SUPPORT, following element is length of character
482*75f6d617Schristos classes, length of collating symbols, length of equivalence
483*75f6d617Schristos classes, length of character ranges, and length of characters.
484*75f6d617Schristos Next, character class element, collating symbols elements,
485*75f6d617Schristos equivalence class elements, range elements, and character
486*75f6d617Schristos elements follow.
487*75f6d617Schristos See regex_compile function. */
488*75f6d617Schristos charset,
489*75f6d617Schristos
490*75f6d617Schristos /* Same parameters as charset, but match any character that is
491*75f6d617Schristos not one of those specified. */
492*75f6d617Schristos charset_not,
493*75f6d617Schristos
494*75f6d617Schristos /* Start remembering the text that is matched, for storing in a
495*75f6d617Schristos register. Followed by one byte with the register number, in
496*75f6d617Schristos the range 0 to one less than the pattern buffer's re_nsub
497*75f6d617Schristos field. Then followed by one byte with the number of groups
498*75f6d617Schristos inner to this one. (This last has to be part of the
499*75f6d617Schristos start_memory only because we need it in the on_failure_jump
500*75f6d617Schristos of re_match_2.) */
501*75f6d617Schristos start_memory,
502*75f6d617Schristos
503*75f6d617Schristos /* Stop remembering the text that is matched and store it in a
504*75f6d617Schristos memory register. Followed by one byte with the register
505*75f6d617Schristos number, in the range 0 to one less than `re_nsub' in the
506*75f6d617Schristos pattern buffer, and one byte with the number of inner groups,
507*75f6d617Schristos just like `start_memory'. (We need the number of inner
508*75f6d617Schristos groups here because we don't have any easy way of finding the
509*75f6d617Schristos corresponding start_memory when we're at a stop_memory.) */
510*75f6d617Schristos stop_memory,
511*75f6d617Schristos
512*75f6d617Schristos /* Match a duplicate of something remembered. Followed by one
513*75f6d617Schristos byte containing the register number. */
514*75f6d617Schristos duplicate,
515*75f6d617Schristos
516*75f6d617Schristos /* Fail unless at beginning of line. */
517*75f6d617Schristos begline,
518*75f6d617Schristos
519*75f6d617Schristos /* Fail unless at end of line. */
520*75f6d617Schristos endline,
521*75f6d617Schristos
522*75f6d617Schristos /* Succeeds if at beginning of buffer (if emacs) or at beginning
523*75f6d617Schristos of string to be matched (if not). */
524*75f6d617Schristos begbuf,
525*75f6d617Schristos
526*75f6d617Schristos /* Analogously, for end of buffer/string. */
527*75f6d617Schristos endbuf,
528*75f6d617Schristos
529*75f6d617Schristos /* Followed by two byte relative address to which to jump. */
530*75f6d617Schristos jump,
531*75f6d617Schristos
532*75f6d617Schristos /* Same as jump, but marks the end of an alternative. */
533*75f6d617Schristos jump_past_alt,
534*75f6d617Schristos
535*75f6d617Schristos /* Followed by two-byte relative address of place to resume at
536*75f6d617Schristos in case of failure. */
537*75f6d617Schristos /* ifdef MBS_SUPPORT, the size of address is 1. */
538*75f6d617Schristos on_failure_jump,
539*75f6d617Schristos
540*75f6d617Schristos /* Like on_failure_jump, but pushes a placeholder instead of the
541*75f6d617Schristos current string position when executed. */
542*75f6d617Schristos on_failure_keep_string_jump,
543*75f6d617Schristos
544*75f6d617Schristos /* Throw away latest failure point and then jump to following
545*75f6d617Schristos two-byte relative address. */
546*75f6d617Schristos /* ifdef MBS_SUPPORT, the size of address is 1. */
547*75f6d617Schristos pop_failure_jump,
548*75f6d617Schristos
549*75f6d617Schristos /* Change to pop_failure_jump if know won't have to backtrack to
550*75f6d617Schristos match; otherwise change to jump. This is used to jump
551*75f6d617Schristos back to the beginning of a repeat. If what follows this jump
552*75f6d617Schristos clearly won't match what the repeat does, such that we can be
553*75f6d617Schristos sure that there is no use backtracking out of repetitions
554*75f6d617Schristos already matched, then we change it to a pop_failure_jump.
555*75f6d617Schristos Followed by two-byte address. */
556*75f6d617Schristos /* ifdef MBS_SUPPORT, the size of address is 1. */
557*75f6d617Schristos maybe_pop_jump,
558*75f6d617Schristos
559*75f6d617Schristos /* Jump to following two-byte address, and push a dummy failure
560*75f6d617Schristos point. This failure point will be thrown away if an attempt
561*75f6d617Schristos is made to use it for a failure. A `+' construct makes this
562*75f6d617Schristos before the first repeat. Also used as an intermediary kind
563*75f6d617Schristos of jump when compiling an alternative. */
564*75f6d617Schristos /* ifdef MBS_SUPPORT, the size of address is 1. */
565*75f6d617Schristos dummy_failure_jump,
566*75f6d617Schristos
567*75f6d617Schristos /* Push a dummy failure point and continue. Used at the end of
568*75f6d617Schristos alternatives. */
569*75f6d617Schristos push_dummy_failure,
570*75f6d617Schristos
571*75f6d617Schristos /* Followed by two-byte relative address and two-byte number n.
572*75f6d617Schristos After matching N times, jump to the address upon failure. */
573*75f6d617Schristos /* ifdef MBS_SUPPORT, the size of address is 1. */
574*75f6d617Schristos succeed_n,
575*75f6d617Schristos
576*75f6d617Schristos /* Followed by two-byte relative address, and two-byte number n.
577*75f6d617Schristos Jump to the address N times, then fail. */
578*75f6d617Schristos /* ifdef MBS_SUPPORT, the size of address is 1. */
579*75f6d617Schristos jump_n,
580*75f6d617Schristos
581*75f6d617Schristos /* Set the following two-byte relative address to the
582*75f6d617Schristos subsequent two-byte number. The address *includes* the two
583*75f6d617Schristos bytes of number. */
584*75f6d617Schristos /* ifdef MBS_SUPPORT, the size of address is 1. */
585*75f6d617Schristos set_number_at,
586*75f6d617Schristos
587*75f6d617Schristos wordchar, /* Matches any word-constituent character. */
588*75f6d617Schristos notwordchar, /* Matches any char that is not a word-constituent. */
589*75f6d617Schristos
590*75f6d617Schristos wordbeg, /* Succeeds if at word beginning. */
591*75f6d617Schristos wordend, /* Succeeds if at word end. */
592*75f6d617Schristos
593*75f6d617Schristos wordbound, /* Succeeds if at a word boundary. */
594*75f6d617Schristos notwordbound /* Succeeds if not at a word boundary. */
595*75f6d617Schristos
596*75f6d617Schristos # ifdef emacs
597*75f6d617Schristos ,before_dot, /* Succeeds if before point. */
598*75f6d617Schristos at_dot, /* Succeeds if at point. */
599*75f6d617Schristos after_dot, /* Succeeds if after point. */
600*75f6d617Schristos
601*75f6d617Schristos /* Matches any character whose syntax is specified. Followed by
602*75f6d617Schristos a byte which contains a syntax code, e.g., Sword. */
603*75f6d617Schristos syntaxspec,
604*75f6d617Schristos
605*75f6d617Schristos /* Matches any character whose syntax is not that specified. */
606*75f6d617Schristos notsyntaxspec
607*75f6d617Schristos # endif /* emacs */
608*75f6d617Schristos } re_opcode_t;
609*75f6d617Schristos #endif /* not INSIDE_RECURSION */
610*75f6d617Schristos
611*75f6d617Schristos
612*75f6d617Schristos #ifdef BYTE
613*75f6d617Schristos # define CHAR_T char
614*75f6d617Schristos # define UCHAR_T unsigned char
615*75f6d617Schristos # define COMPILED_BUFFER_VAR bufp->buffer
616*75f6d617Schristos # define OFFSET_ADDRESS_SIZE 2
617*75f6d617Schristos # define PREFIX(name) byte_##name
618*75f6d617Schristos # define ARG_PREFIX(name) name
619*75f6d617Schristos # define PUT_CHAR(c) putchar (c)
620*75f6d617Schristos #else
621*75f6d617Schristos # ifdef WCHAR
622*75f6d617Schristos # define CHAR_T wchar_t
623*75f6d617Schristos # define UCHAR_T wchar_t
624*75f6d617Schristos # define COMPILED_BUFFER_VAR wc_buffer
625*75f6d617Schristos # define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */
626*75f6d617Schristos # define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1)
627*75f6d617Schristos # define PREFIX(name) wcs_##name
628*75f6d617Schristos # define ARG_PREFIX(name) c##name
629*75f6d617Schristos /* Should we use wide stream?? */
630*75f6d617Schristos # define PUT_CHAR(c) printf ("%C", c);
631*75f6d617Schristos # define TRUE 1
632*75f6d617Schristos # define FALSE 0
633*75f6d617Schristos # else
634*75f6d617Schristos # ifdef MBS_SUPPORT
635*75f6d617Schristos # define WCHAR
636*75f6d617Schristos # define INSIDE_RECURSION
637*75f6d617Schristos # include "regex.c"
638*75f6d617Schristos # undef INSIDE_RECURSION
639*75f6d617Schristos # endif
640*75f6d617Schristos # define BYTE
641*75f6d617Schristos # define INSIDE_RECURSION
642*75f6d617Schristos # include "regex.c"
643*75f6d617Schristos # undef INSIDE_RECURSION
644*75f6d617Schristos # endif
645*75f6d617Schristos #endif
646*75f6d617Schristos #include "unlocked-io.h"
647*75f6d617Schristos
648*75f6d617Schristos #ifdef INSIDE_RECURSION
649*75f6d617Schristos /* Common operations on the compiled pattern. */
650*75f6d617Schristos
651*75f6d617Schristos /* Store NUMBER in two contiguous bytes starting at DESTINATION. */
652*75f6d617Schristos /* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */
653*75f6d617Schristos
654*75f6d617Schristos # ifdef WCHAR
655*75f6d617Schristos # define STORE_NUMBER(destination, number) \
656*75f6d617Schristos do { \
657*75f6d617Schristos *(destination) = (UCHAR_T)(number); \
658*75f6d617Schristos } while (0)
659*75f6d617Schristos # else /* BYTE */
660*75f6d617Schristos # define STORE_NUMBER(destination, number) \
661*75f6d617Schristos do { \
662*75f6d617Schristos (destination)[0] = (number) & 0377; \
663*75f6d617Schristos (destination)[1] = (number) >> 8; \
664*75f6d617Schristos } while (0)
665*75f6d617Schristos # endif /* WCHAR */
666*75f6d617Schristos
667*75f6d617Schristos /* Same as STORE_NUMBER, except increment DESTINATION to
668*75f6d617Schristos the byte after where the number is stored. Therefore, DESTINATION
669*75f6d617Schristos must be an lvalue. */
670*75f6d617Schristos /* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */
671*75f6d617Schristos
672*75f6d617Schristos # define STORE_NUMBER_AND_INCR(destination, number) \
673*75f6d617Schristos do { \
674*75f6d617Schristos STORE_NUMBER (destination, number); \
675*75f6d617Schristos (destination) += OFFSET_ADDRESS_SIZE; \
676*75f6d617Schristos } while (0)
677*75f6d617Schristos
678*75f6d617Schristos /* Put into DESTINATION a number stored in two contiguous bytes starting
679*75f6d617Schristos at SOURCE. */
680*75f6d617Schristos /* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */
681*75f6d617Schristos
682*75f6d617Schristos # ifdef WCHAR
683*75f6d617Schristos # define EXTRACT_NUMBER(destination, source) \
684*75f6d617Schristos do { \
685*75f6d617Schristos (destination) = *(source); \
686*75f6d617Schristos } while (0)
687*75f6d617Schristos # else /* BYTE */
688*75f6d617Schristos # define EXTRACT_NUMBER(destination, source) \
689*75f6d617Schristos do { \
690*75f6d617Schristos (destination) = *(source) & 0377; \
691*75f6d617Schristos (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
692*75f6d617Schristos } while (0)
693*75f6d617Schristos # endif
694*75f6d617Schristos
695*75f6d617Schristos # ifdef DEBUG
696*75f6d617Schristos static void PREFIX(extract_number) _RE_ARGS ((int *dest, UCHAR_T *source));
697*75f6d617Schristos static void
698*75f6d617Schristos PREFIX(extract_number) (dest, source)
699*75f6d617Schristos int *dest;
700*75f6d617Schristos UCHAR_T *source;
701*75f6d617Schristos {
702*75f6d617Schristos # ifdef WCHAR
703*75f6d617Schristos *dest = *source;
704*75f6d617Schristos # else /* BYTE */
705*75f6d617Schristos int temp = SIGN_EXTEND_CHAR (*(source + 1));
706*75f6d617Schristos *dest = *source & 0377;
707*75f6d617Schristos *dest += temp << 8;
708*75f6d617Schristos # endif
709*75f6d617Schristos }
710*75f6d617Schristos
711*75f6d617Schristos # ifndef EXTRACT_MACROS /* To debug the macros. */
712*75f6d617Schristos # undef EXTRACT_NUMBER
713*75f6d617Schristos # define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src)
714*75f6d617Schristos # endif /* not EXTRACT_MACROS */
715*75f6d617Schristos
716*75f6d617Schristos # endif /* DEBUG */
717*75f6d617Schristos
718*75f6d617Schristos /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
719*75f6d617Schristos SOURCE must be an lvalue. */
720*75f6d617Schristos
721*75f6d617Schristos # define EXTRACT_NUMBER_AND_INCR(destination, source) \
722*75f6d617Schristos do { \
723*75f6d617Schristos EXTRACT_NUMBER (destination, source); \
724*75f6d617Schristos (source) += OFFSET_ADDRESS_SIZE; \
725*75f6d617Schristos } while (0)
726*75f6d617Schristos
727*75f6d617Schristos # ifdef DEBUG
728*75f6d617Schristos static void PREFIX(extract_number_and_incr) _RE_ARGS ((int *destination,
729*75f6d617Schristos UCHAR_T **source));
730*75f6d617Schristos static void
731*75f6d617Schristos PREFIX(extract_number_and_incr) (destination, source)
732*75f6d617Schristos int *destination;
733*75f6d617Schristos UCHAR_T **source;
734*75f6d617Schristos {
735*75f6d617Schristos PREFIX(extract_number) (destination, *source);
736*75f6d617Schristos *source += OFFSET_ADDRESS_SIZE;
737*75f6d617Schristos }
738*75f6d617Schristos
739*75f6d617Schristos # ifndef EXTRACT_MACROS
740*75f6d617Schristos # undef EXTRACT_NUMBER_AND_INCR
741*75f6d617Schristos # define EXTRACT_NUMBER_AND_INCR(dest, src) \
742*75f6d617Schristos PREFIX(extract_number_and_incr) (&dest, &src)
743*75f6d617Schristos # endif /* not EXTRACT_MACROS */
744*75f6d617Schristos
745*75f6d617Schristos # endif /* DEBUG */
746*75f6d617Schristos
747*75f6d617Schristos
748*75f6d617Schristos
749*75f6d617Schristos /* If DEBUG is defined, Regex prints many voluminous messages about what
750*75f6d617Schristos it is doing (if the variable `debug' is nonzero). If linked with the
751*75f6d617Schristos main program in `iregex.c', you can enter patterns and strings
752*75f6d617Schristos interactively. And if linked with the main program in `main.c' and
753*75f6d617Schristos the other test files, you can run the already-written tests. */
754*75f6d617Schristos
755*75f6d617Schristos # ifdef DEBUG
756*75f6d617Schristos
757*75f6d617Schristos # ifndef DEFINED_ONCE
758*75f6d617Schristos
759*75f6d617Schristos /* We use standard I/O for debugging. */
760*75f6d617Schristos # include <stdio.h>
761*75f6d617Schristos
762*75f6d617Schristos /* It is useful to test things that ``must'' be true when debugging. */
763*75f6d617Schristos # include <assert.h>
764*75f6d617Schristos
765*75f6d617Schristos static int debug;
766*75f6d617Schristos
767*75f6d617Schristos # define DEBUG_STATEMENT(e) e
768*75f6d617Schristos # define DEBUG_PRINT1(x) if (debug) printf (x)
769*75f6d617Schristos # define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
770*75f6d617Schristos # define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
771*75f6d617Schristos # define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
772*75f6d617Schristos # endif /* not DEFINED_ONCE */
773*75f6d617Schristos
774*75f6d617Schristos # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
775*75f6d617Schristos if (debug) PREFIX(print_partial_compiled_pattern) (s, e)
776*75f6d617Schristos # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
777*75f6d617Schristos if (debug) PREFIX(print_double_string) (w, s1, sz1, s2, sz2)
778*75f6d617Schristos
779*75f6d617Schristos
780*75f6d617Schristos /* Print the fastmap in human-readable form. */
781*75f6d617Schristos
782*75f6d617Schristos # ifndef DEFINED_ONCE
783*75f6d617Schristos void
print_fastmap(fastmap)784*75f6d617Schristos print_fastmap (fastmap)
785*75f6d617Schristos char *fastmap;
786*75f6d617Schristos {
787*75f6d617Schristos unsigned was_a_range = 0;
788*75f6d617Schristos unsigned i = 0;
789*75f6d617Schristos
790*75f6d617Schristos while (i < (1 << BYTEWIDTH))
791*75f6d617Schristos {
792*75f6d617Schristos if (fastmap[i++])
793*75f6d617Schristos {
794*75f6d617Schristos was_a_range = 0;
795*75f6d617Schristos putchar (i - 1);
796*75f6d617Schristos while (i < (1 << BYTEWIDTH) && fastmap[i])
797*75f6d617Schristos {
798*75f6d617Schristos was_a_range = 1;
799*75f6d617Schristos i++;
800*75f6d617Schristos }
801*75f6d617Schristos if (was_a_range)
802*75f6d617Schristos {
803*75f6d617Schristos printf ("-");
804*75f6d617Schristos putchar (i - 1);
805*75f6d617Schristos }
806*75f6d617Schristos }
807*75f6d617Schristos }
808*75f6d617Schristos putchar ('\n');
809*75f6d617Schristos }
810*75f6d617Schristos # endif /* not DEFINED_ONCE */
811*75f6d617Schristos
812*75f6d617Schristos
813*75f6d617Schristos /* Print a compiled pattern string in human-readable form, starting at
814*75f6d617Schristos the START pointer into it and ending just before the pointer END. */
815*75f6d617Schristos
816*75f6d617Schristos void
817*75f6d617Schristos PREFIX(print_partial_compiled_pattern) (start, end)
818*75f6d617Schristos UCHAR_T *start;
819*75f6d617Schristos UCHAR_T *end;
820*75f6d617Schristos {
821*75f6d617Schristos int mcnt, mcnt2;
822*75f6d617Schristos UCHAR_T *p1;
823*75f6d617Schristos UCHAR_T *p = start;
824*75f6d617Schristos UCHAR_T *pend = end;
825*75f6d617Schristos
826*75f6d617Schristos if (start == NULL)
827*75f6d617Schristos {
828*75f6d617Schristos printf ("(null)\n");
829*75f6d617Schristos return;
830*75f6d617Schristos }
831*75f6d617Schristos
832*75f6d617Schristos /* Loop over pattern commands. */
833*75f6d617Schristos while (p < pend)
834*75f6d617Schristos {
835*75f6d617Schristos # ifdef _LIBC
836*75f6d617Schristos printf ("%td:\t", p - start);
837*75f6d617Schristos # else
838*75f6d617Schristos printf ("%ld:\t", (long int) (p - start));
839*75f6d617Schristos # endif
840*75f6d617Schristos
841*75f6d617Schristos switch ((re_opcode_t) *p++)
842*75f6d617Schristos {
843*75f6d617Schristos case no_op:
844*75f6d617Schristos printf ("/no_op");
845*75f6d617Schristos break;
846*75f6d617Schristos
847*75f6d617Schristos case exactn:
848*75f6d617Schristos mcnt = *p++;
849*75f6d617Schristos printf ("/exactn/%d", mcnt);
850*75f6d617Schristos do
851*75f6d617Schristos {
852*75f6d617Schristos putchar ('/');
853*75f6d617Schristos PUT_CHAR (*p++);
854*75f6d617Schristos }
855*75f6d617Schristos while (--mcnt);
856*75f6d617Schristos break;
857*75f6d617Schristos
858*75f6d617Schristos # ifdef MBS_SUPPORT
859*75f6d617Schristos case exactn_bin:
860*75f6d617Schristos mcnt = *p++;
861*75f6d617Schristos printf ("/exactn_bin/%d", mcnt);
862*75f6d617Schristos do
863*75f6d617Schristos {
864*75f6d617Schristos printf("/%lx", (long int) *p++);
865*75f6d617Schristos }
866*75f6d617Schristos while (--mcnt);
867*75f6d617Schristos break;
868*75f6d617Schristos # endif /* MBS_SUPPORT */
869*75f6d617Schristos
870*75f6d617Schristos case start_memory:
871*75f6d617Schristos mcnt = *p++;
872*75f6d617Schristos printf ("/start_memory/%d/%ld", mcnt, (long int) *p++);
873*75f6d617Schristos break;
874*75f6d617Schristos
875*75f6d617Schristos case stop_memory:
876*75f6d617Schristos mcnt = *p++;
877*75f6d617Schristos printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++);
878*75f6d617Schristos break;
879*75f6d617Schristos
880*75f6d617Schristos case duplicate:
881*75f6d617Schristos printf ("/duplicate/%ld", (long int) *p++);
882*75f6d617Schristos break;
883*75f6d617Schristos
884*75f6d617Schristos case anychar:
885*75f6d617Schristos printf ("/anychar");
886*75f6d617Schristos break;
887*75f6d617Schristos
888*75f6d617Schristos case charset:
889*75f6d617Schristos case charset_not:
890*75f6d617Schristos {
891*75f6d617Schristos # ifdef WCHAR
892*75f6d617Schristos int i, length;
893*75f6d617Schristos wchar_t *workp = p;
894*75f6d617Schristos printf ("/charset [%s",
895*75f6d617Schristos (re_opcode_t) *(workp - 1) == charset_not ? "^" : "");
896*75f6d617Schristos p += 5;
897*75f6d617Schristos length = *workp++; /* the length of char_classes */
898*75f6d617Schristos for (i=0 ; i<length ; i++)
899*75f6d617Schristos printf("[:%lx:]", (long int) *p++);
900*75f6d617Schristos length = *workp++; /* the length of collating_symbol */
901*75f6d617Schristos for (i=0 ; i<length ;)
902*75f6d617Schristos {
903*75f6d617Schristos printf("[.");
904*75f6d617Schristos while(*p != 0)
905*75f6d617Schristos PUT_CHAR((i++,*p++));
906*75f6d617Schristos i++,p++;
907*75f6d617Schristos printf(".]");
908*75f6d617Schristos }
909*75f6d617Schristos length = *workp++; /* the length of equivalence_class */
910*75f6d617Schristos for (i=0 ; i<length ;)
911*75f6d617Schristos {
912*75f6d617Schristos printf("[=");
913*75f6d617Schristos while(*p != 0)
914*75f6d617Schristos PUT_CHAR((i++,*p++));
915*75f6d617Schristos i++,p++;
916*75f6d617Schristos printf("=]");
917*75f6d617Schristos }
918*75f6d617Schristos length = *workp++; /* the length of char_range */
919*75f6d617Schristos for (i=0 ; i<length ; i++)
920*75f6d617Schristos {
921*75f6d617Schristos wchar_t range_start = *p++;
922*75f6d617Schristos wchar_t range_end = *p++;
923*75f6d617Schristos printf("%C-%C", range_start, range_end);
924*75f6d617Schristos }
925*75f6d617Schristos length = *workp++; /* the length of char */
926*75f6d617Schristos for (i=0 ; i<length ; i++)
927*75f6d617Schristos printf("%C", *p++);
928*75f6d617Schristos putchar (']');
929*75f6d617Schristos # else
930*75f6d617Schristos register int c, last = -100;
931*75f6d617Schristos register int in_range = 0;
932*75f6d617Schristos
933*75f6d617Schristos printf ("/charset [%s",
934*75f6d617Schristos (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
935*75f6d617Schristos
936*75f6d617Schristos assert (p + *p < pend);
937*75f6d617Schristos
938*75f6d617Schristos for (c = 0; c < 256; c++)
939*75f6d617Schristos if (c / 8 < *p
940*75f6d617Schristos && (p[1 + (c/8)] & (1 << (c % 8))))
941*75f6d617Schristos {
942*75f6d617Schristos /* Are we starting a range? */
943*75f6d617Schristos if (last + 1 == c && ! in_range)
944*75f6d617Schristos {
945*75f6d617Schristos putchar ('-');
946*75f6d617Schristos in_range = 1;
947*75f6d617Schristos }
948*75f6d617Schristos /* Have we broken a range? */
949*75f6d617Schristos else if (last + 1 != c && in_range)
950*75f6d617Schristos {
951*75f6d617Schristos putchar (last);
952*75f6d617Schristos in_range = 0;
953*75f6d617Schristos }
954*75f6d617Schristos
955*75f6d617Schristos if (! in_range)
956*75f6d617Schristos putchar (c);
957*75f6d617Schristos
958*75f6d617Schristos last = c;
959*75f6d617Schristos }
960*75f6d617Schristos
961*75f6d617Schristos if (in_range)
962*75f6d617Schristos putchar (last);
963*75f6d617Schristos
964*75f6d617Schristos putchar (']');
965*75f6d617Schristos
966*75f6d617Schristos p += 1 + *p;
967*75f6d617Schristos # endif /* WCHAR */
968*75f6d617Schristos }
969*75f6d617Schristos break;
970*75f6d617Schristos
971*75f6d617Schristos case begline:
972*75f6d617Schristos printf ("/begline");
973*75f6d617Schristos break;
974*75f6d617Schristos
975*75f6d617Schristos case endline:
976*75f6d617Schristos printf ("/endline");
977*75f6d617Schristos break;
978*75f6d617Schristos
979*75f6d617Schristos case on_failure_jump:
980*75f6d617Schristos PREFIX(extract_number_and_incr) (&mcnt, &p);
981*75f6d617Schristos # ifdef _LIBC
982*75f6d617Schristos printf ("/on_failure_jump to %td", p + mcnt - start);
983*75f6d617Schristos # else
984*75f6d617Schristos printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start));
985*75f6d617Schristos # endif
986*75f6d617Schristos break;
987*75f6d617Schristos
988*75f6d617Schristos case on_failure_keep_string_jump:
989*75f6d617Schristos PREFIX(extract_number_and_incr) (&mcnt, &p);
990*75f6d617Schristos # ifdef _LIBC
991*75f6d617Schristos printf ("/on_failure_keep_string_jump to %td", p + mcnt - start);
992*75f6d617Schristos # else
993*75f6d617Schristos printf ("/on_failure_keep_string_jump to %ld",
994*75f6d617Schristos (long int) (p + mcnt - start));
995*75f6d617Schristos # endif
996*75f6d617Schristos break;
997*75f6d617Schristos
998*75f6d617Schristos case dummy_failure_jump:
999*75f6d617Schristos PREFIX(extract_number_and_incr) (&mcnt, &p);
1000*75f6d617Schristos # ifdef _LIBC
1001*75f6d617Schristos printf ("/dummy_failure_jump to %td", p + mcnt - start);
1002*75f6d617Schristos # else
1003*75f6d617Schristos printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start));
1004*75f6d617Schristos # endif
1005*75f6d617Schristos break;
1006*75f6d617Schristos
1007*75f6d617Schristos case push_dummy_failure:
1008*75f6d617Schristos printf ("/push_dummy_failure");
1009*75f6d617Schristos break;
1010*75f6d617Schristos
1011*75f6d617Schristos case maybe_pop_jump:
1012*75f6d617Schristos PREFIX(extract_number_and_incr) (&mcnt, &p);
1013*75f6d617Schristos # ifdef _LIBC
1014*75f6d617Schristos printf ("/maybe_pop_jump to %td", p + mcnt - start);
1015*75f6d617Schristos # else
1016*75f6d617Schristos printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start));
1017*75f6d617Schristos # endif
1018*75f6d617Schristos break;
1019*75f6d617Schristos
1020*75f6d617Schristos case pop_failure_jump:
1021*75f6d617Schristos PREFIX(extract_number_and_incr) (&mcnt, &p);
1022*75f6d617Schristos # ifdef _LIBC
1023*75f6d617Schristos printf ("/pop_failure_jump to %td", p + mcnt - start);
1024*75f6d617Schristos # else
1025*75f6d617Schristos printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start));
1026*75f6d617Schristos # endif
1027*75f6d617Schristos break;
1028*75f6d617Schristos
1029*75f6d617Schristos case jump_past_alt:
1030*75f6d617Schristos PREFIX(extract_number_and_incr) (&mcnt, &p);
1031*75f6d617Schristos # ifdef _LIBC
1032*75f6d617Schristos printf ("/jump_past_alt to %td", p + mcnt - start);
1033*75f6d617Schristos # else
1034*75f6d617Schristos printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start));
1035*75f6d617Schristos # endif
1036*75f6d617Schristos break;
1037*75f6d617Schristos
1038*75f6d617Schristos case jump:
1039*75f6d617Schristos PREFIX(extract_number_and_incr) (&mcnt, &p);
1040*75f6d617Schristos # ifdef _LIBC
1041*75f6d617Schristos printf ("/jump to %td", p + mcnt - start);
1042*75f6d617Schristos # else
1043*75f6d617Schristos printf ("/jump to %ld", (long int) (p + mcnt - start));
1044*75f6d617Schristos # endif
1045*75f6d617Schristos break;
1046*75f6d617Schristos
1047*75f6d617Schristos case succeed_n:
1048*75f6d617Schristos PREFIX(extract_number_and_incr) (&mcnt, &p);
1049*75f6d617Schristos p1 = p + mcnt;
1050*75f6d617Schristos PREFIX(extract_number_and_incr) (&mcnt2, &p);
1051*75f6d617Schristos # ifdef _LIBC
1052*75f6d617Schristos printf ("/succeed_n to %td, %d times", p1 - start, mcnt2);
1053*75f6d617Schristos # else
1054*75f6d617Schristos printf ("/succeed_n to %ld, %d times",
1055*75f6d617Schristos (long int) (p1 - start), mcnt2);
1056*75f6d617Schristos # endif
1057*75f6d617Schristos break;
1058*75f6d617Schristos
1059*75f6d617Schristos case jump_n:
1060*75f6d617Schristos PREFIX(extract_number_and_incr) (&mcnt, &p);
1061*75f6d617Schristos p1 = p + mcnt;
1062*75f6d617Schristos PREFIX(extract_number_and_incr) (&mcnt2, &p);
1063*75f6d617Schristos printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
1064*75f6d617Schristos break;
1065*75f6d617Schristos
1066*75f6d617Schristos case set_number_at:
1067*75f6d617Schristos PREFIX(extract_number_and_incr) (&mcnt, &p);
1068*75f6d617Schristos p1 = p + mcnt;
1069*75f6d617Schristos PREFIX(extract_number_and_incr) (&mcnt2, &p);
1070*75f6d617Schristos # ifdef _LIBC
1071*75f6d617Schristos printf ("/set_number_at location %td to %d", p1 - start, mcnt2);
1072*75f6d617Schristos # else
1073*75f6d617Schristos printf ("/set_number_at location %ld to %d",
1074*75f6d617Schristos (long int) (p1 - start), mcnt2);
1075*75f6d617Schristos # endif
1076*75f6d617Schristos break;
1077*75f6d617Schristos
1078*75f6d617Schristos case wordbound:
1079*75f6d617Schristos printf ("/wordbound");
1080*75f6d617Schristos break;
1081*75f6d617Schristos
1082*75f6d617Schristos case notwordbound:
1083*75f6d617Schristos printf ("/notwordbound");
1084*75f6d617Schristos break;
1085*75f6d617Schristos
1086*75f6d617Schristos case wordbeg:
1087*75f6d617Schristos printf ("/wordbeg");
1088*75f6d617Schristos break;
1089*75f6d617Schristos
1090*75f6d617Schristos case wordend:
1091*75f6d617Schristos printf ("/wordend");
1092*75f6d617Schristos break;
1093*75f6d617Schristos
1094*75f6d617Schristos # ifdef emacs
1095*75f6d617Schristos case before_dot:
1096*75f6d617Schristos printf ("/before_dot");
1097*75f6d617Schristos break;
1098*75f6d617Schristos
1099*75f6d617Schristos case at_dot:
1100*75f6d617Schristos printf ("/at_dot");
1101*75f6d617Schristos break;
1102*75f6d617Schristos
1103*75f6d617Schristos case after_dot:
1104*75f6d617Schristos printf ("/after_dot");
1105*75f6d617Schristos break;
1106*75f6d617Schristos
1107*75f6d617Schristos case syntaxspec:
1108*75f6d617Schristos printf ("/syntaxspec");
1109*75f6d617Schristos mcnt = *p++;
1110*75f6d617Schristos printf ("/%d", mcnt);
1111*75f6d617Schristos break;
1112*75f6d617Schristos
1113*75f6d617Schristos case notsyntaxspec:
1114*75f6d617Schristos printf ("/notsyntaxspec");
1115*75f6d617Schristos mcnt = *p++;
1116*75f6d617Schristos printf ("/%d", mcnt);
1117*75f6d617Schristos break;
1118*75f6d617Schristos # endif /* emacs */
1119*75f6d617Schristos
1120*75f6d617Schristos case wordchar:
1121*75f6d617Schristos printf ("/wordchar");
1122*75f6d617Schristos break;
1123*75f6d617Schristos
1124*75f6d617Schristos case notwordchar:
1125*75f6d617Schristos printf ("/notwordchar");
1126*75f6d617Schristos break;
1127*75f6d617Schristos
1128*75f6d617Schristos case begbuf:
1129*75f6d617Schristos printf ("/begbuf");
1130*75f6d617Schristos break;
1131*75f6d617Schristos
1132*75f6d617Schristos case endbuf:
1133*75f6d617Schristos printf ("/endbuf");
1134*75f6d617Schristos break;
1135*75f6d617Schristos
1136*75f6d617Schristos default:
1137*75f6d617Schristos printf ("?%ld", (long int) *(p-1));
1138*75f6d617Schristos }
1139*75f6d617Schristos
1140*75f6d617Schristos putchar ('\n');
1141*75f6d617Schristos }
1142*75f6d617Schristos
1143*75f6d617Schristos # ifdef _LIBC
1144*75f6d617Schristos printf ("%td:\tend of pattern.\n", p - start);
1145*75f6d617Schristos # else
1146*75f6d617Schristos printf ("%ld:\tend of pattern.\n", (long int) (p - start));
1147*75f6d617Schristos # endif
1148*75f6d617Schristos }
1149*75f6d617Schristos
1150*75f6d617Schristos
1151*75f6d617Schristos void
1152*75f6d617Schristos PREFIX(print_compiled_pattern) (bufp)
1153*75f6d617Schristos struct re_pattern_buffer *bufp;
1154*75f6d617Schristos {
1155*75f6d617Schristos UCHAR_T *buffer = (UCHAR_T*) bufp->buffer;
1156*75f6d617Schristos
1157*75f6d617Schristos PREFIX(print_partial_compiled_pattern) (buffer, buffer
1158*75f6d617Schristos + bufp->used / sizeof(UCHAR_T));
1159*75f6d617Schristos printf ("%ld bytes used/%ld bytes allocated.\n",
1160*75f6d617Schristos bufp->used, bufp->allocated);
1161*75f6d617Schristos
1162*75f6d617Schristos if (bufp->fastmap_accurate && bufp->fastmap)
1163*75f6d617Schristos {
1164*75f6d617Schristos printf ("fastmap: ");
1165*75f6d617Schristos print_fastmap (bufp->fastmap);
1166*75f6d617Schristos }
1167*75f6d617Schristos
1168*75f6d617Schristos # ifdef _LIBC
1169*75f6d617Schristos printf ("re_nsub: %Zd\t", bufp->re_nsub);
1170*75f6d617Schristos # else
1171*75f6d617Schristos printf ("re_nsub: %ld\t", (long int) bufp->re_nsub);
1172*75f6d617Schristos # endif
1173*75f6d617Schristos printf ("regs_alloc: %d\t", bufp->regs_allocated);
1174*75f6d617Schristos printf ("can_be_null: %d\t", bufp->can_be_null);
1175*75f6d617Schristos printf ("newline_anchor: %d\n", bufp->newline_anchor);
1176*75f6d617Schristos printf ("no_sub: %d\t", bufp->no_sub);
1177*75f6d617Schristos printf ("not_bol: %d\t", bufp->not_bol);
1178*75f6d617Schristos printf ("not_eol: %d\t", bufp->not_eol);
1179*75f6d617Schristos printf ("syntax: %lx\n", bufp->syntax);
1180*75f6d617Schristos /* Perhaps we should print the translate table? */
1181*75f6d617Schristos }
1182*75f6d617Schristos
1183*75f6d617Schristos
1184*75f6d617Schristos void
1185*75f6d617Schristos PREFIX(print_double_string) (where, string1, size1, string2, size2)
1186*75f6d617Schristos const CHAR_T *where;
1187*75f6d617Schristos const CHAR_T *string1;
1188*75f6d617Schristos const CHAR_T *string2;
1189*75f6d617Schristos int size1;
1190*75f6d617Schristos int size2;
1191*75f6d617Schristos {
1192*75f6d617Schristos int this_char;
1193*75f6d617Schristos
1194*75f6d617Schristos if (where == NULL)
1195*75f6d617Schristos printf ("(null)");
1196*75f6d617Schristos else
1197*75f6d617Schristos {
1198*75f6d617Schristos int cnt;
1199*75f6d617Schristos
1200*75f6d617Schristos if (FIRST_STRING_P (where))
1201*75f6d617Schristos {
1202*75f6d617Schristos for (this_char = where - string1; this_char < size1; this_char++)
1203*75f6d617Schristos PUT_CHAR (string1[this_char]);
1204*75f6d617Schristos
1205*75f6d617Schristos where = string2;
1206*75f6d617Schristos }
1207*75f6d617Schristos
1208*75f6d617Schristos cnt = 0;
1209*75f6d617Schristos for (this_char = where - string2; this_char < size2; this_char++)
1210*75f6d617Schristos {
1211*75f6d617Schristos PUT_CHAR (string2[this_char]);
1212*75f6d617Schristos if (++cnt > 100)
1213*75f6d617Schristos {
1214*75f6d617Schristos fputs ("...", stdout);
1215*75f6d617Schristos break;
1216*75f6d617Schristos }
1217*75f6d617Schristos }
1218*75f6d617Schristos }
1219*75f6d617Schristos }
1220*75f6d617Schristos
1221*75f6d617Schristos # ifndef DEFINED_ONCE
1222*75f6d617Schristos void
printchar(c)1223*75f6d617Schristos printchar (c)
1224*75f6d617Schristos int c;
1225*75f6d617Schristos {
1226*75f6d617Schristos putc (c, stderr);
1227*75f6d617Schristos }
1228*75f6d617Schristos # endif
1229*75f6d617Schristos
1230*75f6d617Schristos # else /* not DEBUG */
1231*75f6d617Schristos
1232*75f6d617Schristos # ifndef DEFINED_ONCE
1233*75f6d617Schristos # undef assert
1234*75f6d617Schristos # define assert(e)
1235*75f6d617Schristos
1236*75f6d617Schristos # define DEBUG_STATEMENT(e)
1237*75f6d617Schristos # define DEBUG_PRINT1(x)
1238*75f6d617Schristos # define DEBUG_PRINT2(x1, x2)
1239*75f6d617Schristos # define DEBUG_PRINT3(x1, x2, x3)
1240*75f6d617Schristos # define DEBUG_PRINT4(x1, x2, x3, x4)
1241*75f6d617Schristos # endif /* not DEFINED_ONCE */
1242*75f6d617Schristos # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1243*75f6d617Schristos # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1244*75f6d617Schristos
1245*75f6d617Schristos # endif /* not DEBUG */
1246*75f6d617Schristos
1247*75f6d617Schristos
1248*75f6d617Schristos
1249*75f6d617Schristos # ifdef WCHAR
1250*75f6d617Schristos /* This convert a multibyte string to a wide character string.
1251*75f6d617Schristos And write their correspondances to offset_buffer(see below)
1252*75f6d617Schristos and write whether each wchar_t is binary data to is_binary.
1253*75f6d617Schristos This assume invalid multibyte sequences as binary data.
1254*75f6d617Schristos We assume offset_buffer and is_binary is already allocated
1255*75f6d617Schristos enough space. */
1256*75f6d617Schristos
1257*75f6d617Schristos static size_t convert_mbs_to_wcs (CHAR_T *dest, const unsigned char* src,
1258*75f6d617Schristos size_t len, int *offset_buffer,
1259*75f6d617Schristos char *is_binary);
1260*75f6d617Schristos static size_t
convert_mbs_to_wcs(dest,src,len,offset_buffer,is_binary)1261*75f6d617Schristos convert_mbs_to_wcs (dest, src, len, offset_buffer, is_binary)
1262*75f6d617Schristos CHAR_T *dest;
1263*75f6d617Schristos const unsigned char* src;
1264*75f6d617Schristos size_t len; /* the length of multibyte string. */
1265*75f6d617Schristos
1266*75f6d617Schristos /* It hold correspondances between src(char string) and
1267*75f6d617Schristos dest(wchar_t string) for optimization.
1268*75f6d617Schristos e.g. src = "xxxyzz"
1269*75f6d617Schristos dest = {'X', 'Y', 'Z'}
1270*75f6d617Schristos (each "xxx", "y" and "zz" represent one multibyte character
1271*75f6d617Schristos corresponding to 'X', 'Y' and 'Z'.)
1272*75f6d617Schristos offset_buffer = {0, 0+3("xxx"), 0+3+1("y"), 0+3+1+2("zz")}
1273*75f6d617Schristos = {0, 3, 4, 6}
1274*75f6d617Schristos */
1275*75f6d617Schristos int *offset_buffer;
1276*75f6d617Schristos char *is_binary;
1277*75f6d617Schristos {
1278*75f6d617Schristos wchar_t *pdest = dest;
1279*75f6d617Schristos const unsigned char *psrc = src;
1280*75f6d617Schristos size_t wc_count = 0;
1281*75f6d617Schristos
1282*75f6d617Schristos mbstate_t mbs;
1283*75f6d617Schristos int i, consumed;
1284*75f6d617Schristos size_t mb_remain = len;
1285*75f6d617Schristos size_t mb_count = 0;
1286*75f6d617Schristos
1287*75f6d617Schristos /* Initialize the conversion state. */
1288*75f6d617Schristos memset (&mbs, 0, sizeof (mbstate_t));
1289*75f6d617Schristos
1290*75f6d617Schristos offset_buffer[0] = 0;
1291*75f6d617Schristos for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed,
1292*75f6d617Schristos psrc += consumed)
1293*75f6d617Schristos {
1294*75f6d617Schristos consumed = mbrtowc (pdest, psrc, mb_remain, &mbs);
1295*75f6d617Schristos
1296*75f6d617Schristos if (consumed <= 0)
1297*75f6d617Schristos /* failed to convert. maybe src contains binary data.
1298*75f6d617Schristos So we consume 1 byte manualy. */
1299*75f6d617Schristos {
1300*75f6d617Schristos *pdest = *psrc;
1301*75f6d617Schristos consumed = 1;
1302*75f6d617Schristos is_binary[wc_count] = TRUE;
1303*75f6d617Schristos }
1304*75f6d617Schristos else
1305*75f6d617Schristos is_binary[wc_count] = FALSE;
1306*75f6d617Schristos /* In sjis encoding, we use yen sign as escape character in
1307*75f6d617Schristos place of reverse solidus. So we convert 0x5c(yen sign in
1308*75f6d617Schristos sjis) to not 0xa5(yen sign in UCS2) but 0x5c(reverse
1309*75f6d617Schristos solidus in UCS2). */
1310*75f6d617Schristos if (consumed == 1 && (int) *psrc == 0x5c && (int) *pdest == 0xa5)
1311*75f6d617Schristos *pdest = (wchar_t) *psrc;
1312*75f6d617Schristos
1313*75f6d617Schristos offset_buffer[wc_count + 1] = mb_count += consumed;
1314*75f6d617Schristos }
1315*75f6d617Schristos
1316*75f6d617Schristos /* Fill remain of the buffer with sentinel. */
1317*75f6d617Schristos for (i = wc_count + 1 ; i <= len ; i++)
1318*75f6d617Schristos offset_buffer[i] = mb_count + 1;
1319*75f6d617Schristos
1320*75f6d617Schristos return wc_count;
1321*75f6d617Schristos }
1322*75f6d617Schristos
1323*75f6d617Schristos # endif /* WCHAR */
1324*75f6d617Schristos
1325*75f6d617Schristos #else /* not INSIDE_RECURSION */
1326*75f6d617Schristos
1327*75f6d617Schristos /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
1328*75f6d617Schristos also be assigned to arbitrarily: each pattern buffer stores its own
1329*75f6d617Schristos syntax, so it can be changed between regex compilations. */
1330*75f6d617Schristos /* This has no initializer because initialized variables in Emacs
1331*75f6d617Schristos become read-only after dumping. */
1332*75f6d617Schristos reg_syntax_t re_syntax_options;
1333*75f6d617Schristos
1334*75f6d617Schristos
1335*75f6d617Schristos /* Specify the precise syntax of regexps for compilation. This provides
1336*75f6d617Schristos for compatibility for various utilities which historically have
1337*75f6d617Schristos different, incompatible syntaxes.
1338*75f6d617Schristos
1339*75f6d617Schristos The argument SYNTAX is a bit mask comprised of the various bits
1340*75f6d617Schristos defined in regex.h. We return the old syntax. */
1341*75f6d617Schristos
1342*75f6d617Schristos reg_syntax_t
re_set_syntax(syntax)1343*75f6d617Schristos re_set_syntax (syntax)
1344*75f6d617Schristos reg_syntax_t syntax;
1345*75f6d617Schristos {
1346*75f6d617Schristos reg_syntax_t ret = re_syntax_options;
1347*75f6d617Schristos
1348*75f6d617Schristos re_syntax_options = syntax;
1349*75f6d617Schristos # ifdef DEBUG
1350*75f6d617Schristos if (syntax & RE_DEBUG)
1351*75f6d617Schristos debug = 1;
1352*75f6d617Schristos else if (debug) /* was on but now is not */
1353*75f6d617Schristos debug = 0;
1354*75f6d617Schristos # endif /* DEBUG */
1355*75f6d617Schristos return ret;
1356*75f6d617Schristos }
1357*75f6d617Schristos # ifdef _LIBC
1358*75f6d617Schristos weak_alias (__re_set_syntax, re_set_syntax)
1359*75f6d617Schristos # endif
1360*75f6d617Schristos
1361*75f6d617Schristos /* This table gives an error message for each of the error codes listed
1362*75f6d617Schristos in regex.h. Obviously the order here has to be same as there.
1363*75f6d617Schristos POSIX doesn't require that we do anything for REG_NOERROR,
1364*75f6d617Schristos but why not be nice? */
1365*75f6d617Schristos
1366*75f6d617Schristos static const char re_error_msgid[] =
1367*75f6d617Schristos {
1368*75f6d617Schristos # define REG_NOERROR_IDX 0
1369*75f6d617Schristos gettext_noop ("Success") /* REG_NOERROR */
1370*75f6d617Schristos "\0"
1371*75f6d617Schristos # define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
1372*75f6d617Schristos gettext_noop ("No match") /* REG_NOMATCH */
1373*75f6d617Schristos "\0"
1374*75f6d617Schristos # define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
1375*75f6d617Schristos gettext_noop ("Invalid regular expression") /* REG_BADPAT */
1376*75f6d617Schristos "\0"
1377*75f6d617Schristos # define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
1378*75f6d617Schristos gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
1379*75f6d617Schristos "\0"
1380*75f6d617Schristos # define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
1381*75f6d617Schristos gettext_noop ("Invalid character class name") /* REG_ECTYPE */
1382*75f6d617Schristos "\0"
1383*75f6d617Schristos # define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
1384*75f6d617Schristos gettext_noop ("Trailing backslash") /* REG_EESCAPE */
1385*75f6d617Schristos "\0"
1386*75f6d617Schristos # define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
1387*75f6d617Schristos gettext_noop ("Invalid back reference") /* REG_ESUBREG */
1388*75f6d617Schristos "\0"
1389*75f6d617Schristos # define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
1390*75f6d617Schristos gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
1391*75f6d617Schristos "\0"
1392*75f6d617Schristos # define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
1393*75f6d617Schristos gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
1394*75f6d617Schristos "\0"
1395*75f6d617Schristos # define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
1396*75f6d617Schristos gettext_noop ("Unmatched \\{") /* REG_EBRACE */
1397*75f6d617Schristos "\0"
1398*75f6d617Schristos # define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
1399*75f6d617Schristos gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
1400*75f6d617Schristos "\0"
1401*75f6d617Schristos # define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
1402*75f6d617Schristos gettext_noop ("Invalid range end") /* REG_ERANGE */
1403*75f6d617Schristos "\0"
1404*75f6d617Schristos # define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
1405*75f6d617Schristos gettext_noop ("Memory exhausted") /* REG_ESPACE */
1406*75f6d617Schristos "\0"
1407*75f6d617Schristos # define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
1408*75f6d617Schristos gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
1409*75f6d617Schristos "\0"
1410*75f6d617Schristos # define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
1411*75f6d617Schristos gettext_noop ("Premature end of regular expression") /* REG_EEND */
1412*75f6d617Schristos "\0"
1413*75f6d617Schristos # define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
1414*75f6d617Schristos gettext_noop ("Regular expression too big") /* REG_ESIZE */
1415*75f6d617Schristos "\0"
1416*75f6d617Schristos # define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
1417*75f6d617Schristos gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
1418*75f6d617Schristos };
1419*75f6d617Schristos
1420*75f6d617Schristos static const size_t re_error_msgid_idx[] =
1421*75f6d617Schristos {
1422*75f6d617Schristos REG_NOERROR_IDX,
1423*75f6d617Schristos REG_NOMATCH_IDX,
1424*75f6d617Schristos REG_BADPAT_IDX,
1425*75f6d617Schristos REG_ECOLLATE_IDX,
1426*75f6d617Schristos REG_ECTYPE_IDX,
1427*75f6d617Schristos REG_EESCAPE_IDX,
1428*75f6d617Schristos REG_ESUBREG_IDX,
1429*75f6d617Schristos REG_EBRACK_IDX,
1430*75f6d617Schristos REG_EPAREN_IDX,
1431*75f6d617Schristos REG_EBRACE_IDX,
1432*75f6d617Schristos REG_BADBR_IDX,
1433*75f6d617Schristos REG_ERANGE_IDX,
1434*75f6d617Schristos REG_ESPACE_IDX,
1435*75f6d617Schristos REG_BADRPT_IDX,
1436*75f6d617Schristos REG_EEND_IDX,
1437*75f6d617Schristos REG_ESIZE_IDX,
1438*75f6d617Schristos REG_ERPAREN_IDX
1439*75f6d617Schristos };
1440*75f6d617Schristos
1441*75f6d617Schristos #endif /* INSIDE_RECURSION */
1442*75f6d617Schristos
1443*75f6d617Schristos #ifndef DEFINED_ONCE
1444*75f6d617Schristos /* Avoiding alloca during matching, to placate r_alloc. */
1445*75f6d617Schristos
1446*75f6d617Schristos /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1447*75f6d617Schristos searching and matching functions should not call alloca. On some
1448*75f6d617Schristos systems, alloca is implemented in terms of malloc, and if we're
1449*75f6d617Schristos using the relocating allocator routines, then malloc could cause a
1450*75f6d617Schristos relocation, which might (if the strings being searched are in the
1451*75f6d617Schristos ralloc heap) shift the data out from underneath the regexp
1452*75f6d617Schristos routines.
1453*75f6d617Schristos
1454*75f6d617Schristos Here's another reason to avoid allocation: Emacs
1455*75f6d617Schristos processes input from X in a signal handler; processing X input may
1456*75f6d617Schristos call malloc; if input arrives while a matching routine is calling
1457*75f6d617Schristos malloc, then we're scrod. But Emacs can't just block input while
1458*75f6d617Schristos calling matching routines; then we don't notice interrupts when
1459*75f6d617Schristos they come in. So, Emacs blocks input around all regexp calls
1460*75f6d617Schristos except the matching calls, which it leaves unprotected, in the
1461*75f6d617Schristos faith that they will not malloc. */
1462*75f6d617Schristos
1463*75f6d617Schristos /* Normally, this is fine. */
1464*75f6d617Schristos # define MATCH_MAY_ALLOCATE
1465*75f6d617Schristos
1466*75f6d617Schristos /* When using GNU C, we are not REALLY using the C alloca, no matter
1467*75f6d617Schristos what config.h may say. So don't take precautions for it. */
1468*75f6d617Schristos # ifdef __GNUC__
1469*75f6d617Schristos # undef C_ALLOCA
1470*75f6d617Schristos # endif
1471*75f6d617Schristos
1472*75f6d617Schristos /* The match routines may not allocate if (1) they would do it with malloc
1473*75f6d617Schristos and (2) it's not safe for them to use malloc.
1474*75f6d617Schristos Note that if REL_ALLOC is defined, matching would not use malloc for the
1475*75f6d617Schristos failure stack, but we would still use it for the register vectors;
1476*75f6d617Schristos so REL_ALLOC should not affect this. */
1477*75f6d617Schristos # if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
1478*75f6d617Schristos # undef MATCH_MAY_ALLOCATE
1479*75f6d617Schristos # endif
1480*75f6d617Schristos #endif /* not DEFINED_ONCE */
1481*75f6d617Schristos
1482*75f6d617Schristos #ifdef INSIDE_RECURSION
1483*75f6d617Schristos /* Failure stack declarations and macros; both re_compile_fastmap and
1484*75f6d617Schristos re_match_2 use a failure stack. These have to be macros because of
1485*75f6d617Schristos REGEX_ALLOCATE_STACK. */
1486*75f6d617Schristos
1487*75f6d617Schristos
1488*75f6d617Schristos /* Number of failure points for which to initially allocate space
1489*75f6d617Schristos when matching. If this number is exceeded, we allocate more
1490*75f6d617Schristos space, so it is not a hard limit. */
1491*75f6d617Schristos # ifndef INIT_FAILURE_ALLOC
1492*75f6d617Schristos # define INIT_FAILURE_ALLOC 5
1493*75f6d617Schristos # endif
1494*75f6d617Schristos
1495*75f6d617Schristos /* Roughly the maximum number of failure points on the stack. Would be
1496*75f6d617Schristos exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
1497*75f6d617Schristos This is a variable only so users of regex can assign to it; we never
1498*75f6d617Schristos change it ourselves. */
1499*75f6d617Schristos
1500*75f6d617Schristos # ifdef INT_IS_16BIT
1501*75f6d617Schristos
1502*75f6d617Schristos # ifndef DEFINED_ONCE
1503*75f6d617Schristos # if defined MATCH_MAY_ALLOCATE
1504*75f6d617Schristos /* 4400 was enough to cause a crash on Alpha OSF/1,
1505*75f6d617Schristos whose default stack limit is 2mb. */
1506*75f6d617Schristos long int re_max_failures = 4000;
1507*75f6d617Schristos # else
1508*75f6d617Schristos long int re_max_failures = 2000;
1509*75f6d617Schristos # endif
1510*75f6d617Schristos # endif
1511*75f6d617Schristos
PREFIX(fail_stack_elt)1512*75f6d617Schristos union PREFIX(fail_stack_elt)
1513*75f6d617Schristos {
1514*75f6d617Schristos UCHAR_T *pointer;
1515*75f6d617Schristos long int integer;
1516*75f6d617Schristos };
1517*75f6d617Schristos
1518*75f6d617Schristos typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
1519*75f6d617Schristos
1520*75f6d617Schristos typedef struct
1521*75f6d617Schristos {
1522*75f6d617Schristos PREFIX(fail_stack_elt_t) *stack;
1523*75f6d617Schristos unsigned long int size;
1524*75f6d617Schristos unsigned long int avail; /* Offset of next open position. */
1525*75f6d617Schristos } PREFIX(fail_stack_type);
1526*75f6d617Schristos
1527*75f6d617Schristos # else /* not INT_IS_16BIT */
1528*75f6d617Schristos
1529*75f6d617Schristos # ifndef DEFINED_ONCE
1530*75f6d617Schristos # if defined MATCH_MAY_ALLOCATE
1531*75f6d617Schristos /* 4400 was enough to cause a crash on Alpha OSF/1,
1532*75f6d617Schristos whose default stack limit is 2mb. */
1533*75f6d617Schristos int re_max_failures = 4000;
1534*75f6d617Schristos # else
1535*75f6d617Schristos int re_max_failures = 2000;
1536*75f6d617Schristos # endif
1537*75f6d617Schristos # endif
1538*75f6d617Schristos
PREFIX(fail_stack_elt)1539*75f6d617Schristos union PREFIX(fail_stack_elt)
1540*75f6d617Schristos {
1541*75f6d617Schristos UCHAR_T *pointer;
1542*75f6d617Schristos int integer;
1543*75f6d617Schristos };
1544*75f6d617Schristos
1545*75f6d617Schristos typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
1546*75f6d617Schristos
1547*75f6d617Schristos typedef struct
1548*75f6d617Schristos {
1549*75f6d617Schristos PREFIX(fail_stack_elt_t) *stack;
1550*75f6d617Schristos unsigned size;
1551*75f6d617Schristos unsigned avail; /* Offset of next open position. */
1552*75f6d617Schristos } PREFIX(fail_stack_type);
1553*75f6d617Schristos
1554*75f6d617Schristos # endif /* INT_IS_16BIT */
1555*75f6d617Schristos
1556*75f6d617Schristos # ifndef DEFINED_ONCE
1557*75f6d617Schristos # define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
1558*75f6d617Schristos # define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1559*75f6d617Schristos # define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
1560*75f6d617Schristos # endif
1561*75f6d617Schristos
1562*75f6d617Schristos
1563*75f6d617Schristos /* Define macros to initialize and free the failure stack.
1564*75f6d617Schristos Do `return -2' if the alloc fails. */
1565*75f6d617Schristos
1566*75f6d617Schristos # ifdef MATCH_MAY_ALLOCATE
1567*75f6d617Schristos # define INIT_FAIL_STACK() \
1568*75f6d617Schristos do { \
1569*75f6d617Schristos fail_stack.stack = (PREFIX(fail_stack_elt_t) *) \
1570*75f6d617Schristos REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (PREFIX(fail_stack_elt_t))); \
1571*75f6d617Schristos \
1572*75f6d617Schristos if (fail_stack.stack == NULL) \
1573*75f6d617Schristos return -2; \
1574*75f6d617Schristos \
1575*75f6d617Schristos fail_stack.size = INIT_FAILURE_ALLOC; \
1576*75f6d617Schristos fail_stack.avail = 0; \
1577*75f6d617Schristos } while (0)
1578*75f6d617Schristos
1579*75f6d617Schristos # define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
1580*75f6d617Schristos # else
1581*75f6d617Schristos # define INIT_FAIL_STACK() \
1582*75f6d617Schristos do { \
1583*75f6d617Schristos fail_stack.avail = 0; \
1584*75f6d617Schristos } while (0)
1585*75f6d617Schristos
1586*75f6d617Schristos # define RESET_FAIL_STACK()
1587*75f6d617Schristos # endif
1588*75f6d617Schristos
1589*75f6d617Schristos
1590*75f6d617Schristos /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1591*75f6d617Schristos
1592*75f6d617Schristos Return 1 if succeeds, and 0 if either ran out of memory
1593*75f6d617Schristos allocating space for it or it was already too large.
1594*75f6d617Schristos
1595*75f6d617Schristos REGEX_REALLOCATE_STACK requires `destination' be declared. */
1596*75f6d617Schristos
1597*75f6d617Schristos # define DOUBLE_FAIL_STACK(fail_stack) \
1598*75f6d617Schristos ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \
1599*75f6d617Schristos ? 0 \
1600*75f6d617Schristos : ((fail_stack).stack = (PREFIX(fail_stack_elt_t) *) \
1601*75f6d617Schristos REGEX_REALLOCATE_STACK ((fail_stack).stack, \
1602*75f6d617Schristos (fail_stack).size * sizeof (PREFIX(fail_stack_elt_t)), \
1603*75f6d617Schristos ((fail_stack).size << 1) * sizeof (PREFIX(fail_stack_elt_t))),\
1604*75f6d617Schristos \
1605*75f6d617Schristos (fail_stack).stack == NULL \
1606*75f6d617Schristos ? 0 \
1607*75f6d617Schristos : ((fail_stack).size <<= 1, \
1608*75f6d617Schristos 1)))
1609*75f6d617Schristos
1610*75f6d617Schristos
1611*75f6d617Schristos /* Push pointer POINTER on FAIL_STACK.
1612*75f6d617Schristos Return 1 if was able to do so and 0 if ran out of memory allocating
1613*75f6d617Schristos space to do so. */
1614*75f6d617Schristos # define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
1615*75f6d617Schristos ((FAIL_STACK_FULL () \
1616*75f6d617Schristos && !DOUBLE_FAIL_STACK (FAIL_STACK)) \
1617*75f6d617Schristos ? 0 \
1618*75f6d617Schristos : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
1619*75f6d617Schristos 1))
1620*75f6d617Schristos
1621*75f6d617Schristos /* Push a pointer value onto the failure stack.
1622*75f6d617Schristos Assumes the variable `fail_stack'. Probably should only
1623*75f6d617Schristos be called from within `PUSH_FAILURE_POINT'. */
1624*75f6d617Schristos # define PUSH_FAILURE_POINTER(item) \
1625*75f6d617Schristos fail_stack.stack[fail_stack.avail++].pointer = (UCHAR_T *) (item)
1626*75f6d617Schristos
1627*75f6d617Schristos /* This pushes an integer-valued item onto the failure stack.
1628*75f6d617Schristos Assumes the variable `fail_stack'. Probably should only
1629*75f6d617Schristos be called from within `PUSH_FAILURE_POINT'. */
1630*75f6d617Schristos # define PUSH_FAILURE_INT(item) \
1631*75f6d617Schristos fail_stack.stack[fail_stack.avail++].integer = (item)
1632*75f6d617Schristos
1633*75f6d617Schristos /* Push a fail_stack_elt_t value onto the failure stack.
1634*75f6d617Schristos Assumes the variable `fail_stack'. Probably should only
1635*75f6d617Schristos be called from within `PUSH_FAILURE_POINT'. */
1636*75f6d617Schristos # define PUSH_FAILURE_ELT(item) \
1637*75f6d617Schristos fail_stack.stack[fail_stack.avail++] = (item)
1638*75f6d617Schristos
1639*75f6d617Schristos /* These three POP... operations complement the three PUSH... operations.
1640*75f6d617Schristos All assume that `fail_stack' is nonempty. */
1641*75f6d617Schristos # define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1642*75f6d617Schristos # define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1643*75f6d617Schristos # define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1644*75f6d617Schristos
1645*75f6d617Schristos /* Used to omit pushing failure point id's when we're not debugging. */
1646*75f6d617Schristos # ifdef DEBUG
1647*75f6d617Schristos # define DEBUG_PUSH PUSH_FAILURE_INT
1648*75f6d617Schristos # define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1649*75f6d617Schristos # else
1650*75f6d617Schristos # define DEBUG_PUSH(item)
1651*75f6d617Schristos # define DEBUG_POP(item_addr)
1652*75f6d617Schristos # endif
1653*75f6d617Schristos
1654*75f6d617Schristos
1655*75f6d617Schristos /* Push the information about the state we will need
1656*75f6d617Schristos if we ever fail back to it.
1657*75f6d617Schristos
1658*75f6d617Schristos Requires variables fail_stack, regstart, regend, reg_info, and
1659*75f6d617Schristos num_regs_pushed be declared. DOUBLE_FAIL_STACK requires `destination'
1660*75f6d617Schristos be declared.
1661*75f6d617Schristos
1662*75f6d617Schristos Does `return FAILURE_CODE' if runs out of memory. */
1663*75f6d617Schristos
1664*75f6d617Schristos # define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
1665*75f6d617Schristos do { \
1666*75f6d617Schristos char *destination; \
1667*75f6d617Schristos /* Must be int, so when we don't save any registers, the arithmetic \
1668*75f6d617Schristos of 0 + -1 isn't done as unsigned. */ \
1669*75f6d617Schristos /* Can't be int, since there is not a shred of a guarantee that int \
1670*75f6d617Schristos is wide enough to hold a value of something to which pointer can \
1671*75f6d617Schristos be assigned */ \
1672*75f6d617Schristos active_reg_t this_reg; \
1673*75f6d617Schristos \
1674*75f6d617Schristos DEBUG_STATEMENT (failure_id++); \
1675*75f6d617Schristos DEBUG_STATEMENT (nfailure_points_pushed++); \
1676*75f6d617Schristos DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
1677*75f6d617Schristos DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
1678*75f6d617Schristos DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
1679*75f6d617Schristos \
1680*75f6d617Schristos DEBUG_PRINT2 (" slots needed: %ld\n", NUM_FAILURE_ITEMS); \
1681*75f6d617Schristos DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
1682*75f6d617Schristos \
1683*75f6d617Schristos /* Ensure we have enough space allocated for what we will push. */ \
1684*75f6d617Schristos while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
1685*75f6d617Schristos { \
1686*75f6d617Schristos if (!DOUBLE_FAIL_STACK (fail_stack)) \
1687*75f6d617Schristos return failure_code; \
1688*75f6d617Schristos \
1689*75f6d617Schristos DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
1690*75f6d617Schristos (fail_stack).size); \
1691*75f6d617Schristos DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1692*75f6d617Schristos } \
1693*75f6d617Schristos \
1694*75f6d617Schristos /* Push the info, starting with the registers. */ \
1695*75f6d617Schristos DEBUG_PRINT1 ("\n"); \
1696*75f6d617Schristos \
1697*75f6d617Schristos if (1) \
1698*75f6d617Schristos for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1699*75f6d617Schristos this_reg++) \
1700*75f6d617Schristos { \
1701*75f6d617Schristos DEBUG_PRINT2 (" Pushing reg: %lu\n", this_reg); \
1702*75f6d617Schristos DEBUG_STATEMENT (num_regs_pushed++); \
1703*75f6d617Schristos \
1704*75f6d617Schristos DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
1705*75f6d617Schristos PUSH_FAILURE_POINTER (regstart[this_reg]); \
1706*75f6d617Schristos \
1707*75f6d617Schristos DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
1708*75f6d617Schristos PUSH_FAILURE_POINTER (regend[this_reg]); \
1709*75f6d617Schristos \
1710*75f6d617Schristos DEBUG_PRINT2 (" info: %p\n ", \
1711*75f6d617Schristos reg_info[this_reg].word.pointer); \
1712*75f6d617Schristos DEBUG_PRINT2 (" match_null=%d", \
1713*75f6d617Schristos REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
1714*75f6d617Schristos DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
1715*75f6d617Schristos DEBUG_PRINT2 (" matched_something=%d", \
1716*75f6d617Schristos MATCHED_SOMETHING (reg_info[this_reg])); \
1717*75f6d617Schristos DEBUG_PRINT2 (" ever_matched=%d", \
1718*75f6d617Schristos EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
1719*75f6d617Schristos DEBUG_PRINT1 ("\n"); \
1720*75f6d617Schristos PUSH_FAILURE_ELT (reg_info[this_reg].word); \
1721*75f6d617Schristos } \
1722*75f6d617Schristos \
1723*75f6d617Schristos DEBUG_PRINT2 (" Pushing low active reg: %ld\n", lowest_active_reg);\
1724*75f6d617Schristos PUSH_FAILURE_INT (lowest_active_reg); \
1725*75f6d617Schristos \
1726*75f6d617Schristos DEBUG_PRINT2 (" Pushing high active reg: %ld\n", highest_active_reg);\
1727*75f6d617Schristos PUSH_FAILURE_INT (highest_active_reg); \
1728*75f6d617Schristos \
1729*75f6d617Schristos DEBUG_PRINT2 (" Pushing pattern %p:\n", pattern_place); \
1730*75f6d617Schristos DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
1731*75f6d617Schristos PUSH_FAILURE_POINTER (pattern_place); \
1732*75f6d617Schristos \
1733*75f6d617Schristos DEBUG_PRINT2 (" Pushing string %p: `", string_place); \
1734*75f6d617Schristos DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
1735*75f6d617Schristos size2); \
1736*75f6d617Schristos DEBUG_PRINT1 ("'\n"); \
1737*75f6d617Schristos PUSH_FAILURE_POINTER (string_place); \
1738*75f6d617Schristos \
1739*75f6d617Schristos DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
1740*75f6d617Schristos DEBUG_PUSH (failure_id); \
1741*75f6d617Schristos } while (0)
1742*75f6d617Schristos
1743*75f6d617Schristos # ifndef DEFINED_ONCE
1744*75f6d617Schristos /* This is the number of items that are pushed and popped on the stack
1745*75f6d617Schristos for each register. */
1746*75f6d617Schristos # define NUM_REG_ITEMS 3
1747*75f6d617Schristos
1748*75f6d617Schristos /* Individual items aside from the registers. */
1749*75f6d617Schristos # ifdef DEBUG
1750*75f6d617Schristos # define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
1751*75f6d617Schristos # else
1752*75f6d617Schristos # define NUM_NONREG_ITEMS 4
1753*75f6d617Schristos # endif
1754*75f6d617Schristos
1755*75f6d617Schristos /* We push at most this many items on the stack. */
1756*75f6d617Schristos /* We used to use (num_regs - 1), which is the number of registers
1757*75f6d617Schristos this regexp will save; but that was changed to 5
1758*75f6d617Schristos to avoid stack overflow for a regexp with lots of parens. */
1759*75f6d617Schristos # define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1760*75f6d617Schristos
1761*75f6d617Schristos /* We actually push this many items. */
1762*75f6d617Schristos # define NUM_FAILURE_ITEMS \
1763*75f6d617Schristos (((0 \
1764*75f6d617Schristos ? 0 : highest_active_reg - lowest_active_reg + 1) \
1765*75f6d617Schristos * NUM_REG_ITEMS) \
1766*75f6d617Schristos + NUM_NONREG_ITEMS)
1767*75f6d617Schristos
1768*75f6d617Schristos /* How many items can still be added to the stack without overflowing it. */
1769*75f6d617Schristos # define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1770*75f6d617Schristos # endif /* not DEFINED_ONCE */
1771*75f6d617Schristos
1772*75f6d617Schristos
1773*75f6d617Schristos /* Pops what PUSH_FAIL_STACK pushes.
1774*75f6d617Schristos
1775*75f6d617Schristos We restore into the parameters, all of which should be lvalues:
1776*75f6d617Schristos STR -- the saved data position.
1777*75f6d617Schristos PAT -- the saved pattern position.
1778*75f6d617Schristos LOW_REG, HIGH_REG -- the highest and lowest active registers.
1779*75f6d617Schristos REGSTART, REGEND -- arrays of string positions.
1780*75f6d617Schristos REG_INFO -- array of information about each subexpression.
1781*75f6d617Schristos
1782*75f6d617Schristos Also assumes the variables `fail_stack' and (if debugging), `bufp',
1783*75f6d617Schristos `pend', `string1', `size1', `string2', and `size2'. */
1784*75f6d617Schristos # define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1785*75f6d617Schristos { \
1786*75f6d617Schristos DEBUG_STATEMENT (unsigned failure_id;) \
1787*75f6d617Schristos active_reg_t this_reg; \
1788*75f6d617Schristos const UCHAR_T *string_temp; \
1789*75f6d617Schristos \
1790*75f6d617Schristos assert (!FAIL_STACK_EMPTY ()); \
1791*75f6d617Schristos \
1792*75f6d617Schristos /* Remove failure points and point to how many regs pushed. */ \
1793*75f6d617Schristos DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
1794*75f6d617Schristos DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
1795*75f6d617Schristos DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
1796*75f6d617Schristos \
1797*75f6d617Schristos assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
1798*75f6d617Schristos \
1799*75f6d617Schristos DEBUG_POP (&failure_id); \
1800*75f6d617Schristos DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
1801*75f6d617Schristos \
1802*75f6d617Schristos /* If the saved string location is NULL, it came from an \
1803*75f6d617Schristos on_failure_keep_string_jump opcode, and we want to throw away the \
1804*75f6d617Schristos saved NULL, thus retaining our current position in the string. */ \
1805*75f6d617Schristos string_temp = POP_FAILURE_POINTER (); \
1806*75f6d617Schristos if (string_temp != NULL) \
1807*75f6d617Schristos str = (const CHAR_T *) string_temp; \
1808*75f6d617Schristos \
1809*75f6d617Schristos DEBUG_PRINT2 (" Popping string %p: `", str); \
1810*75f6d617Schristos DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
1811*75f6d617Schristos DEBUG_PRINT1 ("'\n"); \
1812*75f6d617Schristos \
1813*75f6d617Schristos pat = (UCHAR_T *) POP_FAILURE_POINTER (); \
1814*75f6d617Schristos DEBUG_PRINT2 (" Popping pattern %p:\n", pat); \
1815*75f6d617Schristos DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
1816*75f6d617Schristos \
1817*75f6d617Schristos /* Restore register info. */ \
1818*75f6d617Schristos high_reg = (active_reg_t) POP_FAILURE_INT (); \
1819*75f6d617Schristos DEBUG_PRINT2 (" Popping high active reg: %ld\n", high_reg); \
1820*75f6d617Schristos \
1821*75f6d617Schristos low_reg = (active_reg_t) POP_FAILURE_INT (); \
1822*75f6d617Schristos DEBUG_PRINT2 (" Popping low active reg: %ld\n", low_reg); \
1823*75f6d617Schristos \
1824*75f6d617Schristos if (1) \
1825*75f6d617Schristos for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
1826*75f6d617Schristos { \
1827*75f6d617Schristos DEBUG_PRINT2 (" Popping reg: %ld\n", this_reg); \
1828*75f6d617Schristos \
1829*75f6d617Schristos reg_info[this_reg].word = POP_FAILURE_ELT (); \
1830*75f6d617Schristos DEBUG_PRINT2 (" info: %p\n", \
1831*75f6d617Schristos reg_info[this_reg].word.pointer); \
1832*75f6d617Schristos \
1833*75f6d617Schristos regend[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER (); \
1834*75f6d617Schristos DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
1835*75f6d617Schristos \
1836*75f6d617Schristos regstart[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER (); \
1837*75f6d617Schristos DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
1838*75f6d617Schristos } \
1839*75f6d617Schristos else \
1840*75f6d617Schristos { \
1841*75f6d617Schristos for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1842*75f6d617Schristos { \
1843*75f6d617Schristos reg_info[this_reg].word.integer = 0; \
1844*75f6d617Schristos regend[this_reg] = 0; \
1845*75f6d617Schristos regstart[this_reg] = 0; \
1846*75f6d617Schristos } \
1847*75f6d617Schristos highest_active_reg = high_reg; \
1848*75f6d617Schristos } \
1849*75f6d617Schristos \
1850*75f6d617Schristos set_regs_matched_done = 0; \
1851*75f6d617Schristos DEBUG_STATEMENT (nfailure_points_popped++); \
1852*75f6d617Schristos } /* POP_FAILURE_POINT */
1853*75f6d617Schristos
1854*75f6d617Schristos /* Structure for per-register (a.k.a. per-group) information.
1855*75f6d617Schristos Other register information, such as the
1856*75f6d617Schristos starting and ending positions (which are addresses), and the list of
1857*75f6d617Schristos inner groups (which is a bits list) are maintained in separate
1858*75f6d617Schristos variables.
1859*75f6d617Schristos
1860*75f6d617Schristos We are making a (strictly speaking) nonportable assumption here: that
1861*75f6d617Schristos the compiler will pack our bit fields into something that fits into
1862*75f6d617Schristos the type of `word', i.e., is something that fits into one item on the
1863*75f6d617Schristos failure stack. */
1864*75f6d617Schristos
1865*75f6d617Schristos
1866*75f6d617Schristos /* Declarations and macros for re_match_2. */
1867*75f6d617Schristos
1868*75f6d617Schristos typedef union
1869*75f6d617Schristos {
1870*75f6d617Schristos PREFIX(fail_stack_elt_t) word;
1871*75f6d617Schristos struct
1872*75f6d617Schristos {
1873*75f6d617Schristos /* This field is one if this group can match the empty string,
1874*75f6d617Schristos zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
1875*75f6d617Schristos # define MATCH_NULL_UNSET_VALUE 3
1876*75f6d617Schristos unsigned match_null_string_p : 2;
1877*75f6d617Schristos unsigned is_active : 1;
1878*75f6d617Schristos unsigned matched_something : 1;
1879*75f6d617Schristos unsigned ever_matched_something : 1;
1880*75f6d617Schristos } bits;
1881*75f6d617Schristos } PREFIX(register_info_type);
1882*75f6d617Schristos
1883*75f6d617Schristos # ifndef DEFINED_ONCE
1884*75f6d617Schristos # define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
1885*75f6d617Schristos # define IS_ACTIVE(R) ((R).bits.is_active)
1886*75f6d617Schristos # define MATCHED_SOMETHING(R) ((R).bits.matched_something)
1887*75f6d617Schristos # define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
1888*75f6d617Schristos
1889*75f6d617Schristos
1890*75f6d617Schristos /* Call this when have matched a real character; it sets `matched' flags
1891*75f6d617Schristos for the subexpressions which we are currently inside. Also records
1892*75f6d617Schristos that those subexprs have matched. */
1893*75f6d617Schristos # define SET_REGS_MATCHED() \
1894*75f6d617Schristos do \
1895*75f6d617Schristos { \
1896*75f6d617Schristos if (!set_regs_matched_done) \
1897*75f6d617Schristos { \
1898*75f6d617Schristos active_reg_t r; \
1899*75f6d617Schristos set_regs_matched_done = 1; \
1900*75f6d617Schristos for (r = lowest_active_reg; r <= highest_active_reg; r++) \
1901*75f6d617Schristos { \
1902*75f6d617Schristos MATCHED_SOMETHING (reg_info[r]) \
1903*75f6d617Schristos = EVER_MATCHED_SOMETHING (reg_info[r]) \
1904*75f6d617Schristos = 1; \
1905*75f6d617Schristos } \
1906*75f6d617Schristos } \
1907*75f6d617Schristos } \
1908*75f6d617Schristos while (0)
1909*75f6d617Schristos # endif /* not DEFINED_ONCE */
1910*75f6d617Schristos
1911*75f6d617Schristos /* Registers are set to a sentinel when they haven't yet matched. */
1912*75f6d617Schristos static CHAR_T PREFIX(reg_unset_dummy);
1913*75f6d617Schristos # define REG_UNSET_VALUE (&PREFIX(reg_unset_dummy))
1914*75f6d617Schristos # define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1915*75f6d617Schristos
1916*75f6d617Schristos /* Subroutine declarations and macros for regex_compile. */
1917*75f6d617Schristos static void PREFIX(store_op1) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc, int arg));
1918*75f6d617Schristos static void PREFIX(store_op2) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc,
1919*75f6d617Schristos int arg1, int arg2));
1920*75f6d617Schristos static void PREFIX(insert_op1) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc,
1921*75f6d617Schristos int arg, UCHAR_T *end));
1922*75f6d617Schristos static void PREFIX(insert_op2) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc,
1923*75f6d617Schristos int arg1, int arg2, UCHAR_T *end));
1924*75f6d617Schristos static boolean PREFIX(at_begline_loc_p) _RE_ARGS ((const CHAR_T *pattern,
1925*75f6d617Schristos const CHAR_T *p,
1926*75f6d617Schristos reg_syntax_t syntax));
1927*75f6d617Schristos static boolean PREFIX(at_endline_loc_p) _RE_ARGS ((const CHAR_T *p,
1928*75f6d617Schristos const CHAR_T *pend,
1929*75f6d617Schristos reg_syntax_t syntax));
1930*75f6d617Schristos # ifdef WCHAR
1931*75f6d617Schristos static reg_errcode_t wcs_compile_range _RE_ARGS ((CHAR_T range_start,
1932*75f6d617Schristos const CHAR_T **p_ptr,
1933*75f6d617Schristos const CHAR_T *pend,
1934*75f6d617Schristos char *translate,
1935*75f6d617Schristos reg_syntax_t syntax,
1936*75f6d617Schristos UCHAR_T *b,
1937*75f6d617Schristos CHAR_T *char_set));
1938*75f6d617Schristos static void insert_space _RE_ARGS ((int num, CHAR_T *loc, CHAR_T *end));
1939*75f6d617Schristos # else /* BYTE */
1940*75f6d617Schristos static reg_errcode_t byte_compile_range _RE_ARGS ((unsigned int range_start,
1941*75f6d617Schristos const char **p_ptr,
1942*75f6d617Schristos const char *pend,
1943*75f6d617Schristos char *translate,
1944*75f6d617Schristos reg_syntax_t syntax,
1945*75f6d617Schristos unsigned char *b));
1946*75f6d617Schristos # endif /* WCHAR */
1947*75f6d617Schristos
1948*75f6d617Schristos /* Fetch the next character in the uncompiled pattern---translating it
1949*75f6d617Schristos if necessary. Also cast from a signed character in the constant
1950*75f6d617Schristos string passed to us by the user to an unsigned char that we can use
1951*75f6d617Schristos as an array index (in, e.g., `translate'). */
1952*75f6d617Schristos /* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
1953*75f6d617Schristos because it is impossible to allocate 4GB array for some encodings
1954*75f6d617Schristos which have 4 byte character_set like UCS4. */
1955*75f6d617Schristos # ifndef PATFETCH
1956*75f6d617Schristos # ifdef WCHAR
1957*75f6d617Schristos # define PATFETCH(c) \
1958*75f6d617Schristos do {if (p == pend) return REG_EEND; \
1959*75f6d617Schristos c = (UCHAR_T) *p++; \
1960*75f6d617Schristos if (translate && (c <= 0xff)) c = (UCHAR_T) translate[c]; \
1961*75f6d617Schristos } while (0)
1962*75f6d617Schristos # else /* BYTE */
1963*75f6d617Schristos # define PATFETCH(c) \
1964*75f6d617Schristos do {if (p == pend) return REG_EEND; \
1965*75f6d617Schristos c = (unsigned char) *p++; \
1966*75f6d617Schristos if (translate) c = (unsigned char) translate[c]; \
1967*75f6d617Schristos } while (0)
1968*75f6d617Schristos # endif /* WCHAR */
1969*75f6d617Schristos # endif
1970*75f6d617Schristos
1971*75f6d617Schristos /* Fetch the next character in the uncompiled pattern, with no
1972*75f6d617Schristos translation. */
1973*75f6d617Schristos # define PATFETCH_RAW(c) \
1974*75f6d617Schristos do {if (p == pend) return REG_EEND; \
1975*75f6d617Schristos c = (UCHAR_T) *p++; \
1976*75f6d617Schristos } while (0)
1977*75f6d617Schristos
1978*75f6d617Schristos /* Go backwards one character in the pattern. */
1979*75f6d617Schristos # define PATUNFETCH p--
1980*75f6d617Schristos
1981*75f6d617Schristos
1982*75f6d617Schristos /* If `translate' is non-null, return translate[D], else just D. We
1983*75f6d617Schristos cast the subscript to translate because some data is declared as
1984*75f6d617Schristos `char *', to avoid warnings when a string constant is passed. But
1985*75f6d617Schristos when we use a character as a subscript we must make it unsigned. */
1986*75f6d617Schristos /* ifdef MBS_SUPPORT, we translate only if character <= 0xff,
1987*75f6d617Schristos because it is impossible to allocate 4GB array for some encodings
1988*75f6d617Schristos which have 4 byte character_set like UCS4. */
1989*75f6d617Schristos
1990*75f6d617Schristos # ifndef TRANSLATE
1991*75f6d617Schristos # ifdef WCHAR
1992*75f6d617Schristos # define TRANSLATE(d) \
1993*75f6d617Schristos ((translate && ((UCHAR_T) (d)) <= 0xff) \
1994*75f6d617Schristos ? (char) translate[(unsigned char) (d)] : (d))
1995*75f6d617Schristos # else /* BYTE */
1996*75f6d617Schristos # define TRANSLATE(d) \
1997*75f6d617Schristos (translate ? (char) translate[(unsigned char) (d)] : (d))
1998*75f6d617Schristos # endif /* WCHAR */
1999*75f6d617Schristos # endif
2000*75f6d617Schristos
2001*75f6d617Schristos
2002*75f6d617Schristos /* Macros for outputting the compiled pattern into `buffer'. */
2003*75f6d617Schristos
2004*75f6d617Schristos /* If the buffer isn't allocated when it comes in, use this. */
2005*75f6d617Schristos # define INIT_BUF_SIZE (32 * sizeof(UCHAR_T))
2006*75f6d617Schristos
2007*75f6d617Schristos /* Make sure we have at least N more bytes of space in buffer. */
2008*75f6d617Schristos # ifdef WCHAR
2009*75f6d617Schristos # define GET_BUFFER_SPACE(n) \
2010*75f6d617Schristos while (((unsigned long)b - (unsigned long)COMPILED_BUFFER_VAR \
2011*75f6d617Schristos + (n)*sizeof(CHAR_T)) > bufp->allocated) \
2012*75f6d617Schristos EXTEND_BUFFER ()
2013*75f6d617Schristos # else /* BYTE */
2014*75f6d617Schristos # define GET_BUFFER_SPACE(n) \
2015*75f6d617Schristos while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \
2016*75f6d617Schristos EXTEND_BUFFER ()
2017*75f6d617Schristos # endif /* WCHAR */
2018*75f6d617Schristos
2019*75f6d617Schristos /* Make sure we have one more byte of buffer space and then add C to it. */
2020*75f6d617Schristos # define BUF_PUSH(c) \
2021*75f6d617Schristos do { \
2022*75f6d617Schristos GET_BUFFER_SPACE (1); \
2023*75f6d617Schristos *b++ = (UCHAR_T) (c); \
2024*75f6d617Schristos } while (0)
2025*75f6d617Schristos
2026*75f6d617Schristos
2027*75f6d617Schristos /* Ensure we have two more bytes of buffer space and then append C1 and C2. */
2028*75f6d617Schristos # define BUF_PUSH_2(c1, c2) \
2029*75f6d617Schristos do { \
2030*75f6d617Schristos GET_BUFFER_SPACE (2); \
2031*75f6d617Schristos *b++ = (UCHAR_T) (c1); \
2032*75f6d617Schristos *b++ = (UCHAR_T) (c2); \
2033*75f6d617Schristos } while (0)
2034*75f6d617Schristos
2035*75f6d617Schristos
2036*75f6d617Schristos /* As with BUF_PUSH_2, except for three bytes. */
2037*75f6d617Schristos # define BUF_PUSH_3(c1, c2, c3) \
2038*75f6d617Schristos do { \
2039*75f6d617Schristos GET_BUFFER_SPACE (3); \
2040*75f6d617Schristos *b++ = (UCHAR_T) (c1); \
2041*75f6d617Schristos *b++ = (UCHAR_T) (c2); \
2042*75f6d617Schristos *b++ = (UCHAR_T) (c3); \
2043*75f6d617Schristos } while (0)
2044*75f6d617Schristos
2045*75f6d617Schristos /* Store a jump with opcode OP at LOC to location TO. We store a
2046*75f6d617Schristos relative address offset by the three bytes the jump itself occupies. */
2047*75f6d617Schristos # define STORE_JUMP(op, loc, to) \
2048*75f6d617Schristos PREFIX(store_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)))
2049*75f6d617Schristos
2050*75f6d617Schristos /* Likewise, for a two-argument jump. */
2051*75f6d617Schristos # define STORE_JUMP2(op, loc, to, arg) \
2052*75f6d617Schristos PREFIX(store_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), arg)
2053*75f6d617Schristos
2054*75f6d617Schristos /* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
2055*75f6d617Schristos # define INSERT_JUMP(op, loc, to) \
2056*75f6d617Schristos PREFIX(insert_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), b)
2057*75f6d617Schristos
2058*75f6d617Schristos /* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
2059*75f6d617Schristos # define INSERT_JUMP2(op, loc, to, arg) \
2060*75f6d617Schristos PREFIX(insert_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)),\
2061*75f6d617Schristos arg, b)
2062*75f6d617Schristos
2063*75f6d617Schristos /* This is not an arbitrary limit: the arguments which represent offsets
2064*75f6d617Schristos into the pattern are two bytes long. So if 2^16 bytes turns out to
2065*75f6d617Schristos be too small, many things would have to change. */
2066*75f6d617Schristos /* Any other compiler which, like MSC, has allocation limit below 2^16
2067*75f6d617Schristos bytes will have to use approach similar to what was done below for
2068*75f6d617Schristos MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up
2069*75f6d617Schristos reallocating to 0 bytes. Such thing is not going to work too well.
2070*75f6d617Schristos You have been warned!! */
2071*75f6d617Schristos # ifndef DEFINED_ONCE
2072*75f6d617Schristos # if defined _MSC_VER && !defined WIN32
2073*75f6d617Schristos /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
2074*75f6d617Schristos The REALLOC define eliminates a flurry of conversion warnings,
2075*75f6d617Schristos but is not required. */
2076*75f6d617Schristos # define MAX_BUF_SIZE 65500L
2077*75f6d617Schristos # define REALLOC(p,s) realloc ((p), (size_t) (s))
2078*75f6d617Schristos # else
2079*75f6d617Schristos # define MAX_BUF_SIZE (1L << 16)
2080*75f6d617Schristos # define REALLOC(p,s) realloc ((p), (s))
2081*75f6d617Schristos # endif
2082*75f6d617Schristos
2083*75f6d617Schristos /* Extend the buffer by twice its current size via realloc and
2084*75f6d617Schristos reset the pointers that pointed into the old block to point to the
2085*75f6d617Schristos correct places in the new one. If extending the buffer results in it
2086*75f6d617Schristos being larger than MAX_BUF_SIZE, then flag memory exhausted. */
2087*75f6d617Schristos # if __BOUNDED_POINTERS__
2088*75f6d617Schristos # define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
2089*75f6d617Schristos # define MOVE_BUFFER_POINTER(P) \
2090*75f6d617Schristos (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
2091*75f6d617Schristos # define ELSE_EXTEND_BUFFER_HIGH_BOUND \
2092*75f6d617Schristos else \
2093*75f6d617Schristos { \
2094*75f6d617Schristos SET_HIGH_BOUND (b); \
2095*75f6d617Schristos SET_HIGH_BOUND (begalt); \
2096*75f6d617Schristos if (fixup_alt_jump) \
2097*75f6d617Schristos SET_HIGH_BOUND (fixup_alt_jump); \
2098*75f6d617Schristos if (laststart) \
2099*75f6d617Schristos SET_HIGH_BOUND (laststart); \
2100*75f6d617Schristos if (pending_exact) \
2101*75f6d617Schristos SET_HIGH_BOUND (pending_exact); \
2102*75f6d617Schristos }
2103*75f6d617Schristos # else
2104*75f6d617Schristos # define MOVE_BUFFER_POINTER(P) (P) += incr
2105*75f6d617Schristos # define ELSE_EXTEND_BUFFER_HIGH_BOUND
2106*75f6d617Schristos # endif
2107*75f6d617Schristos # endif /* not DEFINED_ONCE */
2108*75f6d617Schristos
2109*75f6d617Schristos # ifdef WCHAR
2110*75f6d617Schristos # define EXTEND_BUFFER() \
2111*75f6d617Schristos do { \
2112*75f6d617Schristos UCHAR_T *old_buffer = COMPILED_BUFFER_VAR; \
2113*75f6d617Schristos int wchar_count; \
2114*75f6d617Schristos if (bufp->allocated + sizeof(UCHAR_T) > MAX_BUF_SIZE) \
2115*75f6d617Schristos return REG_ESIZE; \
2116*75f6d617Schristos bufp->allocated <<= 1; \
2117*75f6d617Schristos if (bufp->allocated > MAX_BUF_SIZE) \
2118*75f6d617Schristos bufp->allocated = MAX_BUF_SIZE; \
2119*75f6d617Schristos /* How many characters the new buffer can have? */ \
2120*75f6d617Schristos wchar_count = bufp->allocated / sizeof(UCHAR_T); \
2121*75f6d617Schristos if (wchar_count == 0) wchar_count = 1; \
2122*75f6d617Schristos /* Truncate the buffer to CHAR_T align. */ \
2123*75f6d617Schristos bufp->allocated = wchar_count * sizeof(UCHAR_T); \
2124*75f6d617Schristos RETALLOC (COMPILED_BUFFER_VAR, wchar_count, UCHAR_T); \
2125*75f6d617Schristos bufp->buffer = (char*)COMPILED_BUFFER_VAR; \
2126*75f6d617Schristos if (COMPILED_BUFFER_VAR == NULL) \
2127*75f6d617Schristos return REG_ESPACE; \
2128*75f6d617Schristos /* If the buffer moved, move all the pointers into it. */ \
2129*75f6d617Schristos if (old_buffer != COMPILED_BUFFER_VAR) \
2130*75f6d617Schristos { \
2131*75f6d617Schristos int incr = COMPILED_BUFFER_VAR - old_buffer; \
2132*75f6d617Schristos MOVE_BUFFER_POINTER (b); \
2133*75f6d617Schristos MOVE_BUFFER_POINTER (begalt); \
2134*75f6d617Schristos if (fixup_alt_jump) \
2135*75f6d617Schristos MOVE_BUFFER_POINTER (fixup_alt_jump); \
2136*75f6d617Schristos if (laststart) \
2137*75f6d617Schristos MOVE_BUFFER_POINTER (laststart); \
2138*75f6d617Schristos if (pending_exact) \
2139*75f6d617Schristos MOVE_BUFFER_POINTER (pending_exact); \
2140*75f6d617Schristos } \
2141*75f6d617Schristos ELSE_EXTEND_BUFFER_HIGH_BOUND \
2142*75f6d617Schristos } while (0)
2143*75f6d617Schristos # else /* BYTE */
2144*75f6d617Schristos # define EXTEND_BUFFER() \
2145*75f6d617Schristos do { \
2146*75f6d617Schristos UCHAR_T *old_buffer = COMPILED_BUFFER_VAR; \
2147*75f6d617Schristos if (bufp->allocated == MAX_BUF_SIZE) \
2148*75f6d617Schristos return REG_ESIZE; \
2149*75f6d617Schristos bufp->allocated <<= 1; \
2150*75f6d617Schristos if (bufp->allocated > MAX_BUF_SIZE) \
2151*75f6d617Schristos bufp->allocated = MAX_BUF_SIZE; \
2152*75f6d617Schristos bufp->buffer = (UCHAR_T *) REALLOC (COMPILED_BUFFER_VAR, \
2153*75f6d617Schristos bufp->allocated); \
2154*75f6d617Schristos if (COMPILED_BUFFER_VAR == NULL) \
2155*75f6d617Schristos return REG_ESPACE; \
2156*75f6d617Schristos /* If the buffer moved, move all the pointers into it. */ \
2157*75f6d617Schristos if (old_buffer != COMPILED_BUFFER_VAR) \
2158*75f6d617Schristos { \
2159*75f6d617Schristos int incr = COMPILED_BUFFER_VAR - old_buffer; \
2160*75f6d617Schristos MOVE_BUFFER_POINTER (b); \
2161*75f6d617Schristos MOVE_BUFFER_POINTER (begalt); \
2162*75f6d617Schristos if (fixup_alt_jump) \
2163*75f6d617Schristos MOVE_BUFFER_POINTER (fixup_alt_jump); \
2164*75f6d617Schristos if (laststart) \
2165*75f6d617Schristos MOVE_BUFFER_POINTER (laststart); \
2166*75f6d617Schristos if (pending_exact) \
2167*75f6d617Schristos MOVE_BUFFER_POINTER (pending_exact); \
2168*75f6d617Schristos } \
2169*75f6d617Schristos ELSE_EXTEND_BUFFER_HIGH_BOUND \
2170*75f6d617Schristos } while (0)
2171*75f6d617Schristos # endif /* WCHAR */
2172*75f6d617Schristos
2173*75f6d617Schristos # ifndef DEFINED_ONCE
2174*75f6d617Schristos /* Since we have one byte reserved for the register number argument to
2175*75f6d617Schristos {start,stop}_memory, the maximum number of groups we can report
2176*75f6d617Schristos things about is what fits in that byte. */
2177*75f6d617Schristos # define MAX_REGNUM 255
2178*75f6d617Schristos
2179*75f6d617Schristos /* But patterns can have more than `MAX_REGNUM' registers. We just
2180*75f6d617Schristos ignore the excess. */
2181*75f6d617Schristos typedef unsigned regnum_t;
2182*75f6d617Schristos
2183*75f6d617Schristos
2184*75f6d617Schristos /* Macros for the compile stack. */
2185*75f6d617Schristos
2186*75f6d617Schristos /* Since offsets can go either forwards or backwards, this type needs to
2187*75f6d617Schristos be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
2188*75f6d617Schristos /* int may be not enough when sizeof(int) == 2. */
2189*75f6d617Schristos typedef long pattern_offset_t;
2190*75f6d617Schristos
2191*75f6d617Schristos typedef struct
2192*75f6d617Schristos {
2193*75f6d617Schristos pattern_offset_t begalt_offset;
2194*75f6d617Schristos pattern_offset_t fixup_alt_jump;
2195*75f6d617Schristos pattern_offset_t inner_group_offset;
2196*75f6d617Schristos pattern_offset_t laststart_offset;
2197*75f6d617Schristos regnum_t regnum;
2198*75f6d617Schristos } compile_stack_elt_t;
2199*75f6d617Schristos
2200*75f6d617Schristos
2201*75f6d617Schristos typedef struct
2202*75f6d617Schristos {
2203*75f6d617Schristos compile_stack_elt_t *stack;
2204*75f6d617Schristos unsigned size;
2205*75f6d617Schristos unsigned avail; /* Offset of next open position. */
2206*75f6d617Schristos } compile_stack_type;
2207*75f6d617Schristos
2208*75f6d617Schristos
2209*75f6d617Schristos # define INIT_COMPILE_STACK_SIZE 32
2210*75f6d617Schristos
2211*75f6d617Schristos # define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
2212*75f6d617Schristos # define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
2213*75f6d617Schristos
2214*75f6d617Schristos /* The next available element. */
2215*75f6d617Schristos # define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
2216*75f6d617Schristos
2217*75f6d617Schristos # endif /* not DEFINED_ONCE */
2218*75f6d617Schristos
2219*75f6d617Schristos /* Set the bit for character C in a list. */
2220*75f6d617Schristos # ifndef DEFINED_ONCE
2221*75f6d617Schristos # define SET_LIST_BIT(c) \
2222*75f6d617Schristos (b[((unsigned char) (c)) / BYTEWIDTH] \
2223*75f6d617Schristos |= 1 << (((unsigned char) c) % BYTEWIDTH))
2224*75f6d617Schristos # endif /* DEFINED_ONCE */
2225*75f6d617Schristos
2226*75f6d617Schristos /* Get the next unsigned number in the uncompiled pattern. */
2227*75f6d617Schristos # define GET_UNSIGNED_NUMBER(num) \
2228*75f6d617Schristos { \
2229*75f6d617Schristos while (p != pend) \
2230*75f6d617Schristos { \
2231*75f6d617Schristos PATFETCH (c); \
2232*75f6d617Schristos if (c < '0' || c > '9') \
2233*75f6d617Schristos break; \
2234*75f6d617Schristos if (num <= RE_DUP_MAX) \
2235*75f6d617Schristos { \
2236*75f6d617Schristos if (num < 0) \
2237*75f6d617Schristos num = 0; \
2238*75f6d617Schristos num = num * 10 + c - '0'; \
2239*75f6d617Schristos } \
2240*75f6d617Schristos } \
2241*75f6d617Schristos }
2242*75f6d617Schristos
2243*75f6d617Schristos # ifndef DEFINED_ONCE
2244*75f6d617Schristos # if defined _LIBC || WIDE_CHAR_SUPPORT
2245*75f6d617Schristos /* The GNU C library provides support for user-defined character classes
2246*75f6d617Schristos and the functions from ISO C amendement 1. */
2247*75f6d617Schristos # ifdef CHARCLASS_NAME_MAX
2248*75f6d617Schristos # define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
2249*75f6d617Schristos # else
2250*75f6d617Schristos /* This shouldn't happen but some implementation might still have this
2251*75f6d617Schristos problem. Use a reasonable default value. */
2252*75f6d617Schristos # define CHAR_CLASS_MAX_LENGTH 256
2253*75f6d617Schristos # endif
2254*75f6d617Schristos
2255*75f6d617Schristos # ifdef _LIBC
2256*75f6d617Schristos # define IS_CHAR_CLASS(string) __wctype (string)
2257*75f6d617Schristos # else
2258*75f6d617Schristos # define IS_CHAR_CLASS(string) wctype (string)
2259*75f6d617Schristos # endif
2260*75f6d617Schristos # else
2261*75f6d617Schristos # define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
2262*75f6d617Schristos
2263*75f6d617Schristos # define IS_CHAR_CLASS(string) \
2264*75f6d617Schristos (STREQ (string, "alpha") || STREQ (string, "upper") \
2265*75f6d617Schristos || STREQ (string, "lower") || STREQ (string, "digit") \
2266*75f6d617Schristos || STREQ (string, "alnum") || STREQ (string, "xdigit") \
2267*75f6d617Schristos || STREQ (string, "space") || STREQ (string, "print") \
2268*75f6d617Schristos || STREQ (string, "punct") || STREQ (string, "graph") \
2269*75f6d617Schristos || STREQ (string, "cntrl") || STREQ (string, "blank"))
2270*75f6d617Schristos # endif
2271*75f6d617Schristos # endif /* DEFINED_ONCE */
2272*75f6d617Schristos
2273*75f6d617Schristos # ifndef MATCH_MAY_ALLOCATE
2274*75f6d617Schristos
2275*75f6d617Schristos /* If we cannot allocate large objects within re_match_2_internal,
2276*75f6d617Schristos we make the fail stack and register vectors global.
2277*75f6d617Schristos The fail stack, we grow to the maximum size when a regexp
2278*75f6d617Schristos is compiled.
2279*75f6d617Schristos The register vectors, we adjust in size each time we
2280*75f6d617Schristos compile a regexp, according to the number of registers it needs. */
2281*75f6d617Schristos
2282*75f6d617Schristos static PREFIX(fail_stack_type) fail_stack;
2283*75f6d617Schristos
2284*75f6d617Schristos /* Size with which the following vectors are currently allocated.
2285*75f6d617Schristos That is so we can make them bigger as needed,
2286*75f6d617Schristos but never make them smaller. */
2287*75f6d617Schristos # ifdef DEFINED_ONCE
2288*75f6d617Schristos static int regs_allocated_size;
2289*75f6d617Schristos
2290*75f6d617Schristos static const char ** regstart, ** regend;
2291*75f6d617Schristos static const char ** old_regstart, ** old_regend;
2292*75f6d617Schristos static const char **best_regstart, **best_regend;
2293*75f6d617Schristos static const char **reg_dummy;
2294*75f6d617Schristos # endif /* DEFINED_ONCE */
2295*75f6d617Schristos
2296*75f6d617Schristos static PREFIX(register_info_type) *PREFIX(reg_info);
2297*75f6d617Schristos static PREFIX(register_info_type) *PREFIX(reg_info_dummy);
2298*75f6d617Schristos
2299*75f6d617Schristos /* Make the register vectors big enough for NUM_REGS registers,
2300*75f6d617Schristos but don't make them smaller. */
2301*75f6d617Schristos
2302*75f6d617Schristos static void
2303*75f6d617Schristos PREFIX(regex_grow_registers) (num_regs)
2304*75f6d617Schristos int num_regs;
2305*75f6d617Schristos {
2306*75f6d617Schristos if (num_regs > regs_allocated_size)
2307*75f6d617Schristos {
2308*75f6d617Schristos RETALLOC_IF (regstart, num_regs, const char *);
2309*75f6d617Schristos RETALLOC_IF (regend, num_regs, const char *);
2310*75f6d617Schristos RETALLOC_IF (old_regstart, num_regs, const char *);
2311*75f6d617Schristos RETALLOC_IF (old_regend, num_regs, const char *);
2312*75f6d617Schristos RETALLOC_IF (best_regstart, num_regs, const char *);
2313*75f6d617Schristos RETALLOC_IF (best_regend, num_regs, const char *);
2314*75f6d617Schristos RETALLOC_IF (PREFIX(reg_info), num_regs, PREFIX(register_info_type));
2315*75f6d617Schristos RETALLOC_IF (reg_dummy, num_regs, const char *);
2316*75f6d617Schristos RETALLOC_IF (PREFIX(reg_info_dummy), num_regs, PREFIX(register_info_type));
2317*75f6d617Schristos
2318*75f6d617Schristos regs_allocated_size = num_regs;
2319*75f6d617Schristos }
2320*75f6d617Schristos }
2321*75f6d617Schristos
2322*75f6d617Schristos # endif /* not MATCH_MAY_ALLOCATE */
2323*75f6d617Schristos
2324*75f6d617Schristos # ifndef DEFINED_ONCE
2325*75f6d617Schristos static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
2326*75f6d617Schristos compile_stack,
2327*75f6d617Schristos regnum_t regnum));
2328*75f6d617Schristos # endif /* not DEFINED_ONCE */
2329*75f6d617Schristos
2330*75f6d617Schristos /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
2331*75f6d617Schristos Returns one of error codes defined in `regex.h', or zero for success.
2332*75f6d617Schristos
2333*75f6d617Schristos Assumes the `allocated' (and perhaps `buffer') and `translate'
2334*75f6d617Schristos fields are set in BUFP on entry.
2335*75f6d617Schristos
2336*75f6d617Schristos If it succeeds, results are put in BUFP (if it returns an error, the
2337*75f6d617Schristos contents of BUFP are undefined):
2338*75f6d617Schristos `buffer' is the compiled pattern;
2339*75f6d617Schristos `syntax' is set to SYNTAX;
2340*75f6d617Schristos `used' is set to the length of the compiled pattern;
2341*75f6d617Schristos `fastmap_accurate' is zero;
2342*75f6d617Schristos `re_nsub' is the number of subexpressions in PATTERN;
2343*75f6d617Schristos `not_bol' and `not_eol' are zero;
2344*75f6d617Schristos
2345*75f6d617Schristos The `fastmap' and `newline_anchor' fields are neither
2346*75f6d617Schristos examined nor set. */
2347*75f6d617Schristos
2348*75f6d617Schristos /* Return, freeing storage we allocated. */
2349*75f6d617Schristos # ifdef WCHAR
2350*75f6d617Schristos # define FREE_STACK_RETURN(value) \
2351*75f6d617Schristos return (free(pattern), free(mbs_offset), free(is_binary), free (compile_stack.stack), value)
2352*75f6d617Schristos # else
2353*75f6d617Schristos # define FREE_STACK_RETURN(value) \
2354*75f6d617Schristos return (free (compile_stack.stack), value)
2355*75f6d617Schristos # endif /* WCHAR */
2356*75f6d617Schristos
2357*75f6d617Schristos static reg_errcode_t
2358*75f6d617Schristos PREFIX(regex_compile) (ARG_PREFIX(pattern), ARG_PREFIX(size), syntax, bufp)
2359*75f6d617Schristos const char *ARG_PREFIX(pattern);
2360*75f6d617Schristos size_t ARG_PREFIX(size);
2361*75f6d617Schristos reg_syntax_t syntax;
2362*75f6d617Schristos struct re_pattern_buffer *bufp;
2363*75f6d617Schristos {
2364*75f6d617Schristos /* We fetch characters from PATTERN here. Even though PATTERN is
2365*75f6d617Schristos `char *' (i.e., signed), we declare these variables as unsigned, so
2366*75f6d617Schristos they can be reliably used as array indices. */
2367*75f6d617Schristos register UCHAR_T c, c1;
2368*75f6d617Schristos
2369*75f6d617Schristos #ifdef WCHAR
2370*75f6d617Schristos /* A temporary space to keep wchar_t pattern and compiled pattern. */
2371*75f6d617Schristos CHAR_T *pattern, *COMPILED_BUFFER_VAR;
2372*75f6d617Schristos size_t size;
2373*75f6d617Schristos /* offset buffer for optimization. See convert_mbs_to_wc. */
2374*75f6d617Schristos int *mbs_offset = NULL;
2375*75f6d617Schristos /* It hold whether each wchar_t is binary data or not. */
2376*75f6d617Schristos char *is_binary = NULL;
2377*75f6d617Schristos /* A flag whether exactn is handling binary data or not. */
2378*75f6d617Schristos char is_exactn_bin = FALSE;
2379*75f6d617Schristos #endif /* WCHAR */
2380*75f6d617Schristos
2381*75f6d617Schristos /* A random temporary spot in PATTERN. */
2382*75f6d617Schristos const CHAR_T *p1;
2383*75f6d617Schristos
2384*75f6d617Schristos /* Points to the end of the buffer, where we should append. */
2385*75f6d617Schristos register UCHAR_T *b;
2386*75f6d617Schristos
2387*75f6d617Schristos /* Keeps track of unclosed groups. */
2388*75f6d617Schristos compile_stack_type compile_stack;
2389*75f6d617Schristos
2390*75f6d617Schristos /* Points to the current (ending) position in the pattern. */
2391*75f6d617Schristos #ifdef WCHAR
2392*75f6d617Schristos const CHAR_T *p;
2393*75f6d617Schristos const CHAR_T *pend;
2394*75f6d617Schristos #else /* BYTE */
2395*75f6d617Schristos const CHAR_T *p = pattern;
2396*75f6d617Schristos const CHAR_T *pend = pattern + size;
2397*75f6d617Schristos #endif /* WCHAR */
2398*75f6d617Schristos
2399*75f6d617Schristos /* How to translate the characters in the pattern. */
2400*75f6d617Schristos RE_TRANSLATE_TYPE translate = bufp->translate;
2401*75f6d617Schristos
2402*75f6d617Schristos /* Address of the count-byte of the most recently inserted `exactn'
2403*75f6d617Schristos command. This makes it possible to tell if a new exact-match
2404*75f6d617Schristos character can be added to that command or if the character requires
2405*75f6d617Schristos a new `exactn' command. */
2406*75f6d617Schristos UCHAR_T *pending_exact = 0;
2407*75f6d617Schristos
2408*75f6d617Schristos /* Address of start of the most recently finished expression.
2409*75f6d617Schristos This tells, e.g., postfix * where to find the start of its
2410*75f6d617Schristos operand. Reset at the beginning of groups and alternatives. */
2411*75f6d617Schristos UCHAR_T *laststart = 0;
2412*75f6d617Schristos
2413*75f6d617Schristos /* Address of beginning of regexp, or inside of last group. */
2414*75f6d617Schristos UCHAR_T *begalt;
2415*75f6d617Schristos
2416*75f6d617Schristos /* Address of the place where a forward jump should go to the end of
2417*75f6d617Schristos the containing expression. Each alternative of an `or' -- except the
2418*75f6d617Schristos last -- ends with a forward jump of this sort. */
2419*75f6d617Schristos UCHAR_T *fixup_alt_jump = 0;
2420*75f6d617Schristos
2421*75f6d617Schristos /* Counts open-groups as they are encountered. Remembered for the
2422*75f6d617Schristos matching close-group on the compile stack, so the same register
2423*75f6d617Schristos number is put in the stop_memory as the start_memory. */
2424*75f6d617Schristos regnum_t regnum = 0;
2425*75f6d617Schristos
2426*75f6d617Schristos #ifdef WCHAR
2427*75f6d617Schristos /* Initialize the wchar_t PATTERN and offset_buffer. */
2428*75f6d617Schristos p = pend = pattern = TALLOC(csize + 1, CHAR_T);
2429*75f6d617Schristos mbs_offset = TALLOC(csize + 1, int);
2430*75f6d617Schristos is_binary = TALLOC(csize + 1, char);
2431*75f6d617Schristos if (pattern == NULL || mbs_offset == NULL || is_binary == NULL)
2432*75f6d617Schristos {
2433*75f6d617Schristos free(pattern);
2434*75f6d617Schristos free(mbs_offset);
2435*75f6d617Schristos free(is_binary);
2436*75f6d617Schristos return REG_ESPACE;
2437*75f6d617Schristos }
2438*75f6d617Schristos pattern[csize] = L'\0'; /* sentinel */
2439*75f6d617Schristos size = convert_mbs_to_wcs(pattern, cpattern, csize, mbs_offset, is_binary);
2440*75f6d617Schristos pend = p + size;
2441*75f6d617Schristos if (size < 0)
2442*75f6d617Schristos {
2443*75f6d617Schristos free(pattern);
2444*75f6d617Schristos free(mbs_offset);
2445*75f6d617Schristos free(is_binary);
2446*75f6d617Schristos return REG_BADPAT;
2447*75f6d617Schristos }
2448*75f6d617Schristos #endif
2449*75f6d617Schristos
2450*75f6d617Schristos #ifdef DEBUG
2451*75f6d617Schristos DEBUG_PRINT1 ("\nCompiling pattern: ");
2452*75f6d617Schristos if (debug)
2453*75f6d617Schristos {
2454*75f6d617Schristos unsigned debug_count;
2455*75f6d617Schristos
2456*75f6d617Schristos for (debug_count = 0; debug_count < size; debug_count++)
2457*75f6d617Schristos PUT_CHAR (pattern[debug_count]);
2458*75f6d617Schristos putchar ('\n');
2459*75f6d617Schristos }
2460*75f6d617Schristos #endif /* DEBUG */
2461*75f6d617Schristos
2462*75f6d617Schristos /* Initialize the compile stack. */
2463*75f6d617Schristos compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
2464*75f6d617Schristos if (compile_stack.stack == NULL)
2465*75f6d617Schristos {
2466*75f6d617Schristos #ifdef WCHAR
2467*75f6d617Schristos free(pattern);
2468*75f6d617Schristos free(mbs_offset);
2469*75f6d617Schristos free(is_binary);
2470*75f6d617Schristos #endif
2471*75f6d617Schristos return REG_ESPACE;
2472*75f6d617Schristos }
2473*75f6d617Schristos
2474*75f6d617Schristos compile_stack.size = INIT_COMPILE_STACK_SIZE;
2475*75f6d617Schristos compile_stack.avail = 0;
2476*75f6d617Schristos
2477*75f6d617Schristos /* Initialize the pattern buffer. */
2478*75f6d617Schristos bufp->syntax = syntax;
2479*75f6d617Schristos bufp->fastmap_accurate = 0;
2480*75f6d617Schristos bufp->not_bol = bufp->not_eol = 0;
2481*75f6d617Schristos
2482*75f6d617Schristos /* Set `used' to zero, so that if we return an error, the pattern
2483*75f6d617Schristos printer (for debugging) will think there's no pattern. We reset it
2484*75f6d617Schristos at the end. */
2485*75f6d617Schristos bufp->used = 0;
2486*75f6d617Schristos
2487*75f6d617Schristos /* Always count groups, whether or not bufp->no_sub is set. */
2488*75f6d617Schristos bufp->re_nsub = 0;
2489*75f6d617Schristos
2490*75f6d617Schristos #if !defined emacs && !defined SYNTAX_TABLE
2491*75f6d617Schristos /* Initialize the syntax table. */
2492*75f6d617Schristos init_syntax_once ();
2493*75f6d617Schristos #endif
2494*75f6d617Schristos
2495*75f6d617Schristos if (bufp->allocated == 0)
2496*75f6d617Schristos {
2497*75f6d617Schristos if (bufp->buffer)
2498*75f6d617Schristos { /* If zero allocated, but buffer is non-null, try to realloc
2499*75f6d617Schristos enough space. This loses if buffer's address is bogus, but
2500*75f6d617Schristos that is the user's responsibility. */
2501*75f6d617Schristos #ifdef WCHAR
2502*75f6d617Schristos /* Free bufp->buffer and allocate an array for wchar_t pattern
2503*75f6d617Schristos buffer. */
2504*75f6d617Schristos free(bufp->buffer);
2505*75f6d617Schristos COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE/sizeof(UCHAR_T),
2506*75f6d617Schristos UCHAR_T);
2507*75f6d617Schristos #else
2508*75f6d617Schristos RETALLOC (COMPILED_BUFFER_VAR, INIT_BUF_SIZE, UCHAR_T);
2509*75f6d617Schristos #endif /* WCHAR */
2510*75f6d617Schristos }
2511*75f6d617Schristos else
2512*75f6d617Schristos { /* Caller did not allocate a buffer. Do it for them. */
2513*75f6d617Schristos COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE / sizeof(UCHAR_T),
2514*75f6d617Schristos UCHAR_T);
2515*75f6d617Schristos }
2516*75f6d617Schristos
2517*75f6d617Schristos if (!COMPILED_BUFFER_VAR) FREE_STACK_RETURN (REG_ESPACE);
2518*75f6d617Schristos #ifdef WCHAR
2519*75f6d617Schristos bufp->buffer = (char*)COMPILED_BUFFER_VAR;
2520*75f6d617Schristos #endif /* WCHAR */
2521*75f6d617Schristos bufp->allocated = INIT_BUF_SIZE;
2522*75f6d617Schristos }
2523*75f6d617Schristos #ifdef WCHAR
2524*75f6d617Schristos else
2525*75f6d617Schristos COMPILED_BUFFER_VAR = (UCHAR_T*) bufp->buffer;
2526*75f6d617Schristos #endif
2527*75f6d617Schristos
2528*75f6d617Schristos begalt = b = COMPILED_BUFFER_VAR;
2529*75f6d617Schristos
2530*75f6d617Schristos /* Loop through the uncompiled pattern until we're at the end. */
2531*75f6d617Schristos while (p != pend)
2532*75f6d617Schristos {
2533*75f6d617Schristos PATFETCH (c);
2534*75f6d617Schristos
2535*75f6d617Schristos switch (c)
2536*75f6d617Schristos {
2537*75f6d617Schristos case '^':
2538*75f6d617Schristos {
2539*75f6d617Schristos if ( /* If at start of pattern, it's an operator. */
2540*75f6d617Schristos p == pattern + 1
2541*75f6d617Schristos /* If context independent, it's an operator. */
2542*75f6d617Schristos || syntax & RE_CONTEXT_INDEP_ANCHORS
2543*75f6d617Schristos /* Otherwise, depends on what's come before. */
2544*75f6d617Schristos || PREFIX(at_begline_loc_p) (pattern, p, syntax))
2545*75f6d617Schristos BUF_PUSH (begline);
2546*75f6d617Schristos else
2547*75f6d617Schristos goto normal_char;
2548*75f6d617Schristos }
2549*75f6d617Schristos break;
2550*75f6d617Schristos
2551*75f6d617Schristos
2552*75f6d617Schristos case '$':
2553*75f6d617Schristos {
2554*75f6d617Schristos if ( /* If at end of pattern, it's an operator. */
2555*75f6d617Schristos p == pend
2556*75f6d617Schristos /* If context independent, it's an operator. */
2557*75f6d617Schristos || syntax & RE_CONTEXT_INDEP_ANCHORS
2558*75f6d617Schristos /* Otherwise, depends on what's next. */
2559*75f6d617Schristos || PREFIX(at_endline_loc_p) (p, pend, syntax))
2560*75f6d617Schristos BUF_PUSH (endline);
2561*75f6d617Schristos else
2562*75f6d617Schristos goto normal_char;
2563*75f6d617Schristos }
2564*75f6d617Schristos break;
2565*75f6d617Schristos
2566*75f6d617Schristos
2567*75f6d617Schristos case '+':
2568*75f6d617Schristos case '?':
2569*75f6d617Schristos if ((syntax & RE_BK_PLUS_QM)
2570*75f6d617Schristos || (syntax & RE_LIMITED_OPS))
2571*75f6d617Schristos goto normal_char;
2572*75f6d617Schristos handle_plus:
2573*75f6d617Schristos case '*':
2574*75f6d617Schristos /* If there is no previous pattern... */
2575*75f6d617Schristos if (!laststart)
2576*75f6d617Schristos {
2577*75f6d617Schristos if (syntax & RE_CONTEXT_INVALID_OPS)
2578*75f6d617Schristos FREE_STACK_RETURN (REG_BADRPT);
2579*75f6d617Schristos else if (!(syntax & RE_CONTEXT_INDEP_OPS))
2580*75f6d617Schristos goto normal_char;
2581*75f6d617Schristos }
2582*75f6d617Schristos
2583*75f6d617Schristos {
2584*75f6d617Schristos /* Are we optimizing this jump? */
2585*75f6d617Schristos boolean keep_string_p = false;
2586*75f6d617Schristos
2587*75f6d617Schristos /* 1 means zero (many) matches is allowed. */
2588*75f6d617Schristos char zero_times_ok = 0, many_times_ok = 0;
2589*75f6d617Schristos
2590*75f6d617Schristos /* If there is a sequence of repetition chars, collapse it
2591*75f6d617Schristos down to just one (the right one). We can't combine
2592*75f6d617Schristos interval operators with these because of, e.g., `a{2}*',
2593*75f6d617Schristos which should only match an even number of `a's. */
2594*75f6d617Schristos
2595*75f6d617Schristos for (;;)
2596*75f6d617Schristos {
2597*75f6d617Schristos zero_times_ok |= c != '+';
2598*75f6d617Schristos many_times_ok |= c != '?';
2599*75f6d617Schristos
2600*75f6d617Schristos if (p == pend)
2601*75f6d617Schristos break;
2602*75f6d617Schristos
2603*75f6d617Schristos PATFETCH (c);
2604*75f6d617Schristos
2605*75f6d617Schristos if (c == '*'
2606*75f6d617Schristos || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
2607*75f6d617Schristos ;
2608*75f6d617Schristos
2609*75f6d617Schristos else if (syntax & RE_BK_PLUS_QM && c == '\\')
2610*75f6d617Schristos {
2611*75f6d617Schristos if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2612*75f6d617Schristos
2613*75f6d617Schristos PATFETCH (c1);
2614*75f6d617Schristos if (!(c1 == '+' || c1 == '?'))
2615*75f6d617Schristos {
2616*75f6d617Schristos PATUNFETCH;
2617*75f6d617Schristos PATUNFETCH;
2618*75f6d617Schristos break;
2619*75f6d617Schristos }
2620*75f6d617Schristos
2621*75f6d617Schristos c = c1;
2622*75f6d617Schristos }
2623*75f6d617Schristos else
2624*75f6d617Schristos {
2625*75f6d617Schristos PATUNFETCH;
2626*75f6d617Schristos break;
2627*75f6d617Schristos }
2628*75f6d617Schristos
2629*75f6d617Schristos /* If we get here, we found another repeat character. */
2630*75f6d617Schristos }
2631*75f6d617Schristos
2632*75f6d617Schristos /* Star, etc. applied to an empty pattern is equivalent
2633*75f6d617Schristos to an empty pattern. */
2634*75f6d617Schristos if (!laststart)
2635*75f6d617Schristos break;
2636*75f6d617Schristos
2637*75f6d617Schristos /* Now we know whether or not zero matches is allowed
2638*75f6d617Schristos and also whether or not two or more matches is allowed. */
2639*75f6d617Schristos if (many_times_ok)
2640*75f6d617Schristos { /* More than one repetition is allowed, so put in at the
2641*75f6d617Schristos end a backward relative jump from `b' to before the next
2642*75f6d617Schristos jump we're going to put in below (which jumps from
2643*75f6d617Schristos laststart to after this jump).
2644*75f6d617Schristos
2645*75f6d617Schristos But if we are at the `*' in the exact sequence `.*\n',
2646*75f6d617Schristos insert an unconditional jump backwards to the .,
2647*75f6d617Schristos instead of the beginning of the loop. This way we only
2648*75f6d617Schristos push a failure point once, instead of every time
2649*75f6d617Schristos through the loop. */
2650*75f6d617Schristos assert (p - 1 > pattern);
2651*75f6d617Schristos
2652*75f6d617Schristos /* Allocate the space for the jump. */
2653*75f6d617Schristos GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
2654*75f6d617Schristos
2655*75f6d617Schristos /* We know we are not at the first character of the pattern,
2656*75f6d617Schristos because laststart was nonzero. And we've already
2657*75f6d617Schristos incremented `p', by the way, to be the character after
2658*75f6d617Schristos the `*'. Do we have to do something analogous here
2659*75f6d617Schristos for null bytes, because of RE_DOT_NOT_NULL? */
2660*75f6d617Schristos if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
2661*75f6d617Schristos && zero_times_ok
2662*75f6d617Schristos && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
2663*75f6d617Schristos && !(syntax & RE_DOT_NEWLINE))
2664*75f6d617Schristos { /* We have .*\n. */
2665*75f6d617Schristos STORE_JUMP (jump, b, laststart);
2666*75f6d617Schristos keep_string_p = true;
2667*75f6d617Schristos }
2668*75f6d617Schristos else
2669*75f6d617Schristos /* Anything else. */
2670*75f6d617Schristos STORE_JUMP (maybe_pop_jump, b, laststart -
2671*75f6d617Schristos (1 + OFFSET_ADDRESS_SIZE));
2672*75f6d617Schristos
2673*75f6d617Schristos /* We've added more stuff to the buffer. */
2674*75f6d617Schristos b += 1 + OFFSET_ADDRESS_SIZE;
2675*75f6d617Schristos }
2676*75f6d617Schristos
2677*75f6d617Schristos /* On failure, jump from laststart to b + 3, which will be the
2678*75f6d617Schristos end of the buffer after this jump is inserted. */
2679*75f6d617Schristos /* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE' instead of
2680*75f6d617Schristos 'b + 3'. */
2681*75f6d617Schristos GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
2682*75f6d617Schristos INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2683*75f6d617Schristos : on_failure_jump,
2684*75f6d617Schristos laststart, b + 1 + OFFSET_ADDRESS_SIZE);
2685*75f6d617Schristos pending_exact = 0;
2686*75f6d617Schristos b += 1 + OFFSET_ADDRESS_SIZE;
2687*75f6d617Schristos
2688*75f6d617Schristos if (!zero_times_ok)
2689*75f6d617Schristos {
2690*75f6d617Schristos /* At least one repetition is required, so insert a
2691*75f6d617Schristos `dummy_failure_jump' before the initial
2692*75f6d617Schristos `on_failure_jump' instruction of the loop. This
2693*75f6d617Schristos effects a skip over that instruction the first time
2694*75f6d617Schristos we hit that loop. */
2695*75f6d617Schristos GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
2696*75f6d617Schristos INSERT_JUMP (dummy_failure_jump, laststart, laststart +
2697*75f6d617Schristos 2 + 2 * OFFSET_ADDRESS_SIZE);
2698*75f6d617Schristos b += 1 + OFFSET_ADDRESS_SIZE;
2699*75f6d617Schristos }
2700*75f6d617Schristos }
2701*75f6d617Schristos break;
2702*75f6d617Schristos
2703*75f6d617Schristos
2704*75f6d617Schristos case '.':
2705*75f6d617Schristos laststart = b;
2706*75f6d617Schristos BUF_PUSH (anychar);
2707*75f6d617Schristos break;
2708*75f6d617Schristos
2709*75f6d617Schristos
2710*75f6d617Schristos case '[':
2711*75f6d617Schristos {
2712*75f6d617Schristos boolean had_char_class = false;
2713*75f6d617Schristos #ifdef WCHAR
2714*75f6d617Schristos CHAR_T range_start = 0xffffffff;
2715*75f6d617Schristos #else
2716*75f6d617Schristos unsigned int range_start = 0xffffffff;
2717*75f6d617Schristos #endif
2718*75f6d617Schristos if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2719*75f6d617Schristos
2720*75f6d617Schristos #ifdef WCHAR
2721*75f6d617Schristos /* We assume a charset(_not) structure as a wchar_t array.
2722*75f6d617Schristos charset[0] = (re_opcode_t) charset(_not)
2723*75f6d617Schristos charset[1] = l (= length of char_classes)
2724*75f6d617Schristos charset[2] = m (= length of collating_symbols)
2725*75f6d617Schristos charset[3] = n (= length of equivalence_classes)
2726*75f6d617Schristos charset[4] = o (= length of char_ranges)
2727*75f6d617Schristos charset[5] = p (= length of chars)
2728*75f6d617Schristos
2729*75f6d617Schristos charset[6] = char_class (wctype_t)
2730*75f6d617Schristos charset[6+CHAR_CLASS_SIZE] = char_class (wctype_t)
2731*75f6d617Schristos ...
2732*75f6d617Schristos charset[l+5] = char_class (wctype_t)
2733*75f6d617Schristos
2734*75f6d617Schristos charset[l+6] = collating_symbol (wchar_t)
2735*75f6d617Schristos ...
2736*75f6d617Schristos charset[l+m+5] = collating_symbol (wchar_t)
2737*75f6d617Schristos ifdef _LIBC we use the index if
2738*75f6d617Schristos _NL_COLLATE_SYMB_EXTRAMB instead of
2739*75f6d617Schristos wchar_t string.
2740*75f6d617Schristos
2741*75f6d617Schristos charset[l+m+6] = equivalence_classes (wchar_t)
2742*75f6d617Schristos ...
2743*75f6d617Schristos charset[l+m+n+5] = equivalence_classes (wchar_t)
2744*75f6d617Schristos ifdef _LIBC we use the index in
2745*75f6d617Schristos _NL_COLLATE_WEIGHT instead of
2746*75f6d617Schristos wchar_t string.
2747*75f6d617Schristos
2748*75f6d617Schristos charset[l+m+n+6] = range_start
2749*75f6d617Schristos charset[l+m+n+7] = range_end
2750*75f6d617Schristos ...
2751*75f6d617Schristos charset[l+m+n+2o+4] = range_start
2752*75f6d617Schristos charset[l+m+n+2o+5] = range_end
2753*75f6d617Schristos ifdef _LIBC we use the value looked up
2754*75f6d617Schristos in _NL_COLLATE_COLLSEQ instead of
2755*75f6d617Schristos wchar_t character.
2756*75f6d617Schristos
2757*75f6d617Schristos charset[l+m+n+2o+6] = char
2758*75f6d617Schristos ...
2759*75f6d617Schristos charset[l+m+n+2o+p+5] = char
2760*75f6d617Schristos
2761*75f6d617Schristos */
2762*75f6d617Schristos
2763*75f6d617Schristos /* We need at least 6 spaces: the opcode, the length of
2764*75f6d617Schristos char_classes, the length of collating_symbols, the length of
2765*75f6d617Schristos equivalence_classes, the length of char_ranges, the length of
2766*75f6d617Schristos chars. */
2767*75f6d617Schristos GET_BUFFER_SPACE (6);
2768*75f6d617Schristos
2769*75f6d617Schristos /* Save b as laststart. And We use laststart as the pointer
2770*75f6d617Schristos to the first element of the charset here.
2771*75f6d617Schristos In other words, laststart[i] indicates charset[i]. */
2772*75f6d617Schristos laststart = b;
2773*75f6d617Schristos
2774*75f6d617Schristos /* We test `*p == '^' twice, instead of using an if
2775*75f6d617Schristos statement, so we only need one BUF_PUSH. */
2776*75f6d617Schristos BUF_PUSH (*p == '^' ? charset_not : charset);
2777*75f6d617Schristos if (*p == '^')
2778*75f6d617Schristos p++;
2779*75f6d617Schristos
2780*75f6d617Schristos /* Push the length of char_classes, the length of
2781*75f6d617Schristos collating_symbols, the length of equivalence_classes, the
2782*75f6d617Schristos length of char_ranges and the length of chars. */
2783*75f6d617Schristos BUF_PUSH_3 (0, 0, 0);
2784*75f6d617Schristos BUF_PUSH_2 (0, 0);
2785*75f6d617Schristos
2786*75f6d617Schristos /* Remember the first position in the bracket expression. */
2787*75f6d617Schristos p1 = p;
2788*75f6d617Schristos
2789*75f6d617Schristos /* charset_not matches newline according to a syntax bit. */
2790*75f6d617Schristos if ((re_opcode_t) b[-6] == charset_not
2791*75f6d617Schristos && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2792*75f6d617Schristos {
2793*75f6d617Schristos BUF_PUSH('\n');
2794*75f6d617Schristos laststart[5]++; /* Update the length of characters */
2795*75f6d617Schristos }
2796*75f6d617Schristos
2797*75f6d617Schristos /* Read in characters and ranges, setting map bits. */
2798*75f6d617Schristos for (;;)
2799*75f6d617Schristos {
2800*75f6d617Schristos if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2801*75f6d617Schristos
2802*75f6d617Schristos PATFETCH (c);
2803*75f6d617Schristos
2804*75f6d617Schristos /* \ might escape characters inside [...] and [^...]. */
2805*75f6d617Schristos if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2806*75f6d617Schristos {
2807*75f6d617Schristos if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2808*75f6d617Schristos
2809*75f6d617Schristos PATFETCH (c1);
2810*75f6d617Schristos BUF_PUSH(c1);
2811*75f6d617Schristos laststart[5]++; /* Update the length of chars */
2812*75f6d617Schristos range_start = c1;
2813*75f6d617Schristos continue;
2814*75f6d617Schristos }
2815*75f6d617Schristos
2816*75f6d617Schristos /* Could be the end of the bracket expression. If it's
2817*75f6d617Schristos not (i.e., when the bracket expression is `[]' so
2818*75f6d617Schristos far), the ']' character bit gets set way below. */
2819*75f6d617Schristos if (c == ']' && p != p1 + 1)
2820*75f6d617Schristos break;
2821*75f6d617Schristos
2822*75f6d617Schristos /* Look ahead to see if it's a range when the last thing
2823*75f6d617Schristos was a character class. */
2824*75f6d617Schristos if (had_char_class && c == '-' && *p != ']')
2825*75f6d617Schristos FREE_STACK_RETURN (REG_ERANGE);
2826*75f6d617Schristos
2827*75f6d617Schristos /* Look ahead to see if it's a range when the last thing
2828*75f6d617Schristos was a character: if this is a hyphen not at the
2829*75f6d617Schristos beginning or the end of a list, then it's the range
2830*75f6d617Schristos operator. */
2831*75f6d617Schristos if (c == '-'
2832*75f6d617Schristos && !(p - 2 >= pattern && p[-2] == '[')
2833*75f6d617Schristos && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
2834*75f6d617Schristos && *p != ']')
2835*75f6d617Schristos {
2836*75f6d617Schristos reg_errcode_t ret;
2837*75f6d617Schristos /* Allocate the space for range_start and range_end. */
2838*75f6d617Schristos GET_BUFFER_SPACE (2);
2839*75f6d617Schristos /* Update the pointer to indicate end of buffer. */
2840*75f6d617Schristos b += 2;
2841*75f6d617Schristos ret = wcs_compile_range (range_start, &p, pend, translate,
2842*75f6d617Schristos syntax, b, laststart);
2843*75f6d617Schristos if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2844*75f6d617Schristos range_start = 0xffffffff;
2845*75f6d617Schristos }
2846*75f6d617Schristos else if (p[0] == '-' && p[1] != ']')
2847*75f6d617Schristos { /* This handles ranges made up of characters only. */
2848*75f6d617Schristos reg_errcode_t ret;
2849*75f6d617Schristos
2850*75f6d617Schristos /* Move past the `-'. */
2851*75f6d617Schristos PATFETCH (c1);
2852*75f6d617Schristos /* Allocate the space for range_start and range_end. */
2853*75f6d617Schristos GET_BUFFER_SPACE (2);
2854*75f6d617Schristos /* Update the pointer to indicate end of buffer. */
2855*75f6d617Schristos b += 2;
2856*75f6d617Schristos ret = wcs_compile_range (c, &p, pend, translate, syntax, b,
2857*75f6d617Schristos laststart);
2858*75f6d617Schristos if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2859*75f6d617Schristos range_start = 0xffffffff;
2860*75f6d617Schristos }
2861*75f6d617Schristos
2862*75f6d617Schristos /* See if we're at the beginning of a possible character
2863*75f6d617Schristos class. */
2864*75f6d617Schristos else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2865*75f6d617Schristos { /* Leave room for the null. */
2866*75f6d617Schristos char str[CHAR_CLASS_MAX_LENGTH + 1];
2867*75f6d617Schristos
2868*75f6d617Schristos PATFETCH (c);
2869*75f6d617Schristos c1 = 0;
2870*75f6d617Schristos
2871*75f6d617Schristos /* If pattern is `[[:'. */
2872*75f6d617Schristos if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2873*75f6d617Schristos
2874*75f6d617Schristos for (;;)
2875*75f6d617Schristos {
2876*75f6d617Schristos PATFETCH (c);
2877*75f6d617Schristos if ((c == ':' && *p == ']') || p == pend)
2878*75f6d617Schristos break;
2879*75f6d617Schristos if (c1 < CHAR_CLASS_MAX_LENGTH)
2880*75f6d617Schristos str[c1++] = c;
2881*75f6d617Schristos else
2882*75f6d617Schristos /* This is in any case an invalid class name. */
2883*75f6d617Schristos str[0] = '\0';
2884*75f6d617Schristos }
2885*75f6d617Schristos str[c1] = '\0';
2886*75f6d617Schristos
2887*75f6d617Schristos /* If isn't a word bracketed by `[:' and `:]':
2888*75f6d617Schristos undo the ending character, the letters, and leave
2889*75f6d617Schristos the leading `:' and `[' (but store them as character). */
2890*75f6d617Schristos if (c == ':' && *p == ']')
2891*75f6d617Schristos {
2892*75f6d617Schristos wctype_t wt;
2893*75f6d617Schristos uintptr_t alignedp;
2894*75f6d617Schristos
2895*75f6d617Schristos /* Query the character class as wctype_t. */
2896*75f6d617Schristos wt = IS_CHAR_CLASS (str);
2897*75f6d617Schristos if (wt == 0)
2898*75f6d617Schristos FREE_STACK_RETURN (REG_ECTYPE);
2899*75f6d617Schristos
2900*75f6d617Schristos /* Throw away the ] at the end of the character
2901*75f6d617Schristos class. */
2902*75f6d617Schristos PATFETCH (c);
2903*75f6d617Schristos
2904*75f6d617Schristos if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2905*75f6d617Schristos
2906*75f6d617Schristos /* Allocate the space for character class. */
2907*75f6d617Schristos GET_BUFFER_SPACE(CHAR_CLASS_SIZE);
2908*75f6d617Schristos /* Update the pointer to indicate end of buffer. */
2909*75f6d617Schristos b += CHAR_CLASS_SIZE;
2910*75f6d617Schristos /* Move data which follow character classes
2911*75f6d617Schristos not to violate the data. */
2912*75f6d617Schristos insert_space(CHAR_CLASS_SIZE,
2913*75f6d617Schristos laststart + 6 + laststart[1],
2914*75f6d617Schristos b - 1);
2915*75f6d617Schristos alignedp = ((uintptr_t)(laststart + 6 + laststart[1])
2916*75f6d617Schristos + __alignof__(wctype_t) - 1)
2917*75f6d617Schristos & ~(uintptr_t)(__alignof__(wctype_t) - 1);
2918*75f6d617Schristos /* Store the character class. */
2919*75f6d617Schristos *((wctype_t*)alignedp) = wt;
2920*75f6d617Schristos /* Update length of char_classes */
2921*75f6d617Schristos laststart[1] += CHAR_CLASS_SIZE;
2922*75f6d617Schristos
2923*75f6d617Schristos had_char_class = true;
2924*75f6d617Schristos }
2925*75f6d617Schristos else
2926*75f6d617Schristos {
2927*75f6d617Schristos c1++;
2928*75f6d617Schristos while (c1--)
2929*75f6d617Schristos PATUNFETCH;
2930*75f6d617Schristos BUF_PUSH ('[');
2931*75f6d617Schristos BUF_PUSH (':');
2932*75f6d617Schristos laststart[5] += 2; /* Update the length of characters */
2933*75f6d617Schristos range_start = ':';
2934*75f6d617Schristos had_char_class = false;
2935*75f6d617Schristos }
2936*75f6d617Schristos }
2937*75f6d617Schristos else if (syntax & RE_CHAR_CLASSES && c == '[' && (*p == '='
2938*75f6d617Schristos || *p == '.'))
2939*75f6d617Schristos {
2940*75f6d617Schristos CHAR_T str[128]; /* Should be large enough. */
2941*75f6d617Schristos CHAR_T delim = *p; /* '=' or '.' */
2942*75f6d617Schristos # ifdef _LIBC
2943*75f6d617Schristos uint32_t nrules =
2944*75f6d617Schristos _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
2945*75f6d617Schristos # endif
2946*75f6d617Schristos PATFETCH (c);
2947*75f6d617Schristos c1 = 0;
2948*75f6d617Schristos
2949*75f6d617Schristos /* If pattern is `[[=' or '[[.'. */
2950*75f6d617Schristos if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2951*75f6d617Schristos
2952*75f6d617Schristos for (;;)
2953*75f6d617Schristos {
2954*75f6d617Schristos PATFETCH (c);
2955*75f6d617Schristos if ((c == delim && *p == ']') || p == pend)
2956*75f6d617Schristos break;
2957*75f6d617Schristos if (c1 < sizeof (str) - 1)
2958*75f6d617Schristos str[c1++] = c;
2959*75f6d617Schristos else
2960*75f6d617Schristos /* This is in any case an invalid class name. */
2961*75f6d617Schristos str[0] = '\0';
2962*75f6d617Schristos }
2963*75f6d617Schristos str[c1] = '\0';
2964*75f6d617Schristos
2965*75f6d617Schristos if (c == delim && *p == ']' && str[0] != '\0')
2966*75f6d617Schristos {
2967*75f6d617Schristos unsigned int i, offset;
2968*75f6d617Schristos /* If we have no collation data we use the default
2969*75f6d617Schristos collation in which each character is in a class
2970*75f6d617Schristos by itself. It also means that ASCII is the
2971*75f6d617Schristos character set and therefore we cannot have character
2972*75f6d617Schristos with more than one byte in the multibyte
2973*75f6d617Schristos representation. */
2974*75f6d617Schristos
2975*75f6d617Schristos /* If not defined _LIBC, we push the name and
2976*75f6d617Schristos `\0' for the sake of matching performance. */
2977*75f6d617Schristos int datasize = c1 + 1;
2978*75f6d617Schristos
2979*75f6d617Schristos # ifdef _LIBC
2980*75f6d617Schristos int32_t idx = 0;
2981*75f6d617Schristos if (nrules == 0)
2982*75f6d617Schristos # endif
2983*75f6d617Schristos {
2984*75f6d617Schristos if (c1 != 1)
2985*75f6d617Schristos FREE_STACK_RETURN (REG_ECOLLATE);
2986*75f6d617Schristos }
2987*75f6d617Schristos # ifdef _LIBC
2988*75f6d617Schristos else
2989*75f6d617Schristos {
2990*75f6d617Schristos const int32_t *table;
2991*75f6d617Schristos const int32_t *weights;
2992*75f6d617Schristos const int32_t *extra;
2993*75f6d617Schristos const int32_t *indirect;
2994*75f6d617Schristos wint_t *cp;
2995*75f6d617Schristos
2996*75f6d617Schristos /* This #include defines a local function! */
2997*75f6d617Schristos # include <locale/weightwc.h>
2998*75f6d617Schristos
2999*75f6d617Schristos if(delim == '=')
3000*75f6d617Schristos {
3001*75f6d617Schristos /* We push the index for equivalence class. */
3002*75f6d617Schristos cp = (wint_t*)str;
3003*75f6d617Schristos
3004*75f6d617Schristos table = (const int32_t *)
3005*75f6d617Schristos _NL_CURRENT (LC_COLLATE,
3006*75f6d617Schristos _NL_COLLATE_TABLEWC);
3007*75f6d617Schristos weights = (const int32_t *)
3008*75f6d617Schristos _NL_CURRENT (LC_COLLATE,
3009*75f6d617Schristos _NL_COLLATE_WEIGHTWC);
3010*75f6d617Schristos extra = (const int32_t *)
3011*75f6d617Schristos _NL_CURRENT (LC_COLLATE,
3012*75f6d617Schristos _NL_COLLATE_EXTRAWC);
3013*75f6d617Schristos indirect = (const int32_t *)
3014*75f6d617Schristos _NL_CURRENT (LC_COLLATE,
3015*75f6d617Schristos _NL_COLLATE_INDIRECTWC);
3016*75f6d617Schristos
3017*75f6d617Schristos idx = findidx ((const wint_t**)&cp);
3018*75f6d617Schristos if (idx == 0 || cp < (wint_t*) str + c1)
3019*75f6d617Schristos /* This is no valid character. */
3020*75f6d617Schristos FREE_STACK_RETURN (REG_ECOLLATE);
3021*75f6d617Schristos
3022*75f6d617Schristos str[0] = (wchar_t)idx;
3023*75f6d617Schristos }
3024*75f6d617Schristos else /* delim == '.' */
3025*75f6d617Schristos {
3026*75f6d617Schristos /* We push collation sequence value
3027*75f6d617Schristos for collating symbol. */
3028*75f6d617Schristos int32_t table_size;
3029*75f6d617Schristos const int32_t *symb_table;
3030*75f6d617Schristos const unsigned char *extra;
3031*75f6d617Schristos int32_t idx;
3032*75f6d617Schristos int32_t elem;
3033*75f6d617Schristos int32_t second;
3034*75f6d617Schristos int32_t hash;
3035*75f6d617Schristos char char_str[c1];
3036*75f6d617Schristos
3037*75f6d617Schristos /* We have to convert the name to a single-byte
3038*75f6d617Schristos string. This is possible since the names
3039*75f6d617Schristos consist of ASCII characters and the internal
3040*75f6d617Schristos representation is UCS4. */
3041*75f6d617Schristos for (i = 0; i < c1; ++i)
3042*75f6d617Schristos char_str[i] = str[i];
3043*75f6d617Schristos
3044*75f6d617Schristos table_size =
3045*75f6d617Schristos _NL_CURRENT_WORD (LC_COLLATE,
3046*75f6d617Schristos _NL_COLLATE_SYMB_HASH_SIZEMB);
3047*75f6d617Schristos symb_table = (const int32_t *)
3048*75f6d617Schristos _NL_CURRENT (LC_COLLATE,
3049*75f6d617Schristos _NL_COLLATE_SYMB_TABLEMB);
3050*75f6d617Schristos extra = (const unsigned char *)
3051*75f6d617Schristos _NL_CURRENT (LC_COLLATE,
3052*75f6d617Schristos _NL_COLLATE_SYMB_EXTRAMB);
3053*75f6d617Schristos
3054*75f6d617Schristos /* Locate the character in the hashing table. */
3055*75f6d617Schristos hash = elem_hash (char_str, c1);
3056*75f6d617Schristos
3057*75f6d617Schristos idx = 0;
3058*75f6d617Schristos elem = hash % table_size;
3059*75f6d617Schristos second = hash % (table_size - 2);
3060*75f6d617Schristos while (symb_table[2 * elem] != 0)
3061*75f6d617Schristos {
3062*75f6d617Schristos /* First compare the hashing value. */
3063*75f6d617Schristos if (symb_table[2 * elem] == hash
3064*75f6d617Schristos && c1 == extra[symb_table[2 * elem + 1]]
3065*75f6d617Schristos && memcmp (char_str,
3066*75f6d617Schristos &extra[symb_table[2 * elem + 1]
3067*75f6d617Schristos + 1], c1) == 0)
3068*75f6d617Schristos {
3069*75f6d617Schristos /* Yep, this is the entry. */
3070*75f6d617Schristos idx = symb_table[2 * elem + 1];
3071*75f6d617Schristos idx += 1 + extra[idx];
3072*75f6d617Schristos break;
3073*75f6d617Schristos }
3074*75f6d617Schristos
3075*75f6d617Schristos /* Next entry. */
3076*75f6d617Schristos elem += second;
3077*75f6d617Schristos }
3078*75f6d617Schristos
3079*75f6d617Schristos if (symb_table[2 * elem] != 0)
3080*75f6d617Schristos {
3081*75f6d617Schristos /* Compute the index of the byte sequence
3082*75f6d617Schristos in the table. */
3083*75f6d617Schristos idx += 1 + extra[idx];
3084*75f6d617Schristos /* Adjust for the alignment. */
3085*75f6d617Schristos idx = (idx + 3) & ~3;
3086*75f6d617Schristos
3087*75f6d617Schristos str[0] = (wchar_t) idx + 4;
3088*75f6d617Schristos }
3089*75f6d617Schristos else if (symb_table[2 * elem] == 0 && c1 == 1)
3090*75f6d617Schristos {
3091*75f6d617Schristos /* No valid character. Match it as a
3092*75f6d617Schristos single byte character. */
3093*75f6d617Schristos had_char_class = false;
3094*75f6d617Schristos BUF_PUSH(str[0]);
3095*75f6d617Schristos /* Update the length of characters */
3096*75f6d617Schristos laststart[5]++;
3097*75f6d617Schristos range_start = str[0];
3098*75f6d617Schristos
3099*75f6d617Schristos /* Throw away the ] at the end of the
3100*75f6d617Schristos collating symbol. */
3101*75f6d617Schristos PATFETCH (c);
3102*75f6d617Schristos /* exit from the switch block. */
3103*75f6d617Schristos continue;
3104*75f6d617Schristos }
3105*75f6d617Schristos else
3106*75f6d617Schristos FREE_STACK_RETURN (REG_ECOLLATE);
3107*75f6d617Schristos }
3108*75f6d617Schristos datasize = 1;
3109*75f6d617Schristos }
3110*75f6d617Schristos # endif
3111*75f6d617Schristos /* Throw away the ] at the end of the equivalence
3112*75f6d617Schristos class (or collating symbol). */
3113*75f6d617Schristos PATFETCH (c);
3114*75f6d617Schristos
3115*75f6d617Schristos /* Allocate the space for the equivalence class
3116*75f6d617Schristos (or collating symbol) (and '\0' if needed). */
3117*75f6d617Schristos GET_BUFFER_SPACE(datasize);
3118*75f6d617Schristos /* Update the pointer to indicate end of buffer. */
3119*75f6d617Schristos b += datasize;
3120*75f6d617Schristos
3121*75f6d617Schristos if (delim == '=')
3122*75f6d617Schristos { /* equivalence class */
3123*75f6d617Schristos /* Calculate the offset of char_ranges,
3124*75f6d617Schristos which is next to equivalence_classes. */
3125*75f6d617Schristos offset = laststart[1] + laststart[2]
3126*75f6d617Schristos + laststart[3] +6;
3127*75f6d617Schristos /* Insert space. */
3128*75f6d617Schristos insert_space(datasize, laststart + offset, b - 1);
3129*75f6d617Schristos
3130*75f6d617Schristos /* Write the equivalence_class and \0. */
3131*75f6d617Schristos for (i = 0 ; i < datasize ; i++)
3132*75f6d617Schristos laststart[offset + i] = str[i];
3133*75f6d617Schristos
3134*75f6d617Schristos /* Update the length of equivalence_classes. */
3135*75f6d617Schristos laststart[3] += datasize;
3136*75f6d617Schristos had_char_class = true;
3137*75f6d617Schristos }
3138*75f6d617Schristos else /* delim == '.' */
3139*75f6d617Schristos { /* collating symbol */
3140*75f6d617Schristos /* Calculate the offset of the equivalence_classes,
3141*75f6d617Schristos which is next to collating_symbols. */
3142*75f6d617Schristos offset = laststart[1] + laststart[2] + 6;
3143*75f6d617Schristos /* Insert space and write the collationg_symbol
3144*75f6d617Schristos and \0. */
3145*75f6d617Schristos insert_space(datasize, laststart + offset, b-1);
3146*75f6d617Schristos for (i = 0 ; i < datasize ; i++)
3147*75f6d617Schristos laststart[offset + i] = str[i];
3148*75f6d617Schristos
3149*75f6d617Schristos /* In re_match_2_internal if range_start < -1, we
3150*75f6d617Schristos assume -range_start is the offset of the
3151*75f6d617Schristos collating symbol which is specified as
3152*75f6d617Schristos the character of the range start. So we assign
3153*75f6d617Schristos -(laststart[1] + laststart[2] + 6) to
3154*75f6d617Schristos range_start. */
3155*75f6d617Schristos range_start = -(laststart[1] + laststart[2] + 6);
3156*75f6d617Schristos /* Update the length of collating_symbol. */
3157*75f6d617Schristos laststart[2] += datasize;
3158*75f6d617Schristos had_char_class = false;
3159*75f6d617Schristos }
3160*75f6d617Schristos }
3161*75f6d617Schristos else
3162*75f6d617Schristos {
3163*75f6d617Schristos c1++;
3164*75f6d617Schristos while (c1--)
3165*75f6d617Schristos PATUNFETCH;
3166*75f6d617Schristos BUF_PUSH ('[');
3167*75f6d617Schristos BUF_PUSH (delim);
3168*75f6d617Schristos laststart[5] += 2; /* Update the length of characters */
3169*75f6d617Schristos range_start = delim;
3170*75f6d617Schristos had_char_class = false;
3171*75f6d617Schristos }
3172*75f6d617Schristos }
3173*75f6d617Schristos else
3174*75f6d617Schristos {
3175*75f6d617Schristos had_char_class = false;
3176*75f6d617Schristos BUF_PUSH(c);
3177*75f6d617Schristos laststart[5]++; /* Update the length of characters */
3178*75f6d617Schristos range_start = c;
3179*75f6d617Schristos }
3180*75f6d617Schristos }
3181*75f6d617Schristos
3182*75f6d617Schristos #else /* BYTE */
3183*75f6d617Schristos /* Ensure that we have enough space to push a charset: the
3184*75f6d617Schristos opcode, the length count, and the bitset; 34 bytes in all. */
3185*75f6d617Schristos GET_BUFFER_SPACE (34);
3186*75f6d617Schristos
3187*75f6d617Schristos laststart = b;
3188*75f6d617Schristos
3189*75f6d617Schristos /* We test `*p == '^' twice, instead of using an if
3190*75f6d617Schristos statement, so we only need one BUF_PUSH. */
3191*75f6d617Schristos BUF_PUSH (*p == '^' ? charset_not : charset);
3192*75f6d617Schristos if (*p == '^')
3193*75f6d617Schristos p++;
3194*75f6d617Schristos
3195*75f6d617Schristos /* Remember the first position in the bracket expression. */
3196*75f6d617Schristos p1 = p;
3197*75f6d617Schristos
3198*75f6d617Schristos /* Push the number of bytes in the bitmap. */
3199*75f6d617Schristos BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
3200*75f6d617Schristos
3201*75f6d617Schristos /* Clear the whole map. */
3202*75f6d617Schristos bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
3203*75f6d617Schristos
3204*75f6d617Schristos /* charset_not matches newline according to a syntax bit. */
3205*75f6d617Schristos if ((re_opcode_t) b[-2] == charset_not
3206*75f6d617Schristos && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
3207*75f6d617Schristos SET_LIST_BIT ('\n');
3208*75f6d617Schristos
3209*75f6d617Schristos /* Read in characters and ranges, setting map bits. */
3210*75f6d617Schristos for (;;)
3211*75f6d617Schristos {
3212*75f6d617Schristos if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3213*75f6d617Schristos
3214*75f6d617Schristos PATFETCH (c);
3215*75f6d617Schristos
3216*75f6d617Schristos /* \ might escape characters inside [...] and [^...]. */
3217*75f6d617Schristos if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
3218*75f6d617Schristos {
3219*75f6d617Schristos if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
3220*75f6d617Schristos
3221*75f6d617Schristos PATFETCH (c1);
3222*75f6d617Schristos SET_LIST_BIT (c1);
3223*75f6d617Schristos range_start = c1;
3224*75f6d617Schristos continue;
3225*75f6d617Schristos }
3226*75f6d617Schristos
3227*75f6d617Schristos /* Could be the end of the bracket expression. If it's
3228*75f6d617Schristos not (i.e., when the bracket expression is `[]' so
3229*75f6d617Schristos far), the ']' character bit gets set way below. */
3230*75f6d617Schristos if (c == ']' && p != p1 + 1)
3231*75f6d617Schristos break;
3232*75f6d617Schristos
3233*75f6d617Schristos /* Look ahead to see if it's a range when the last thing
3234*75f6d617Schristos was a character class. */
3235*75f6d617Schristos if (had_char_class && c == '-' && *p != ']')
3236*75f6d617Schristos FREE_STACK_RETURN (REG_ERANGE);
3237*75f6d617Schristos
3238*75f6d617Schristos /* Look ahead to see if it's a range when the last thing
3239*75f6d617Schristos was a character: if this is a hyphen not at the
3240*75f6d617Schristos beginning or the end of a list, then it's the range
3241*75f6d617Schristos operator. */
3242*75f6d617Schristos if (c == '-'
3243*75f6d617Schristos && !(p - 2 >= pattern && p[-2] == '[')
3244*75f6d617Schristos && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
3245*75f6d617Schristos && *p != ']')
3246*75f6d617Schristos {
3247*75f6d617Schristos reg_errcode_t ret
3248*75f6d617Schristos = byte_compile_range (range_start, &p, pend, translate,
3249*75f6d617Schristos syntax, b);
3250*75f6d617Schristos if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
3251*75f6d617Schristos range_start = 0xffffffff;
3252*75f6d617Schristos }
3253*75f6d617Schristos
3254*75f6d617Schristos else if (p[0] == '-' && p[1] != ']')
3255*75f6d617Schristos { /* This handles ranges made up of characters only. */
3256*75f6d617Schristos reg_errcode_t ret;
3257*75f6d617Schristos
3258*75f6d617Schristos /* Move past the `-'. */
3259*75f6d617Schristos PATFETCH (c1);
3260*75f6d617Schristos
3261*75f6d617Schristos ret = byte_compile_range (c, &p, pend, translate, syntax, b);
3262*75f6d617Schristos if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
3263*75f6d617Schristos range_start = 0xffffffff;
3264*75f6d617Schristos }
3265*75f6d617Schristos
3266*75f6d617Schristos /* See if we're at the beginning of a possible character
3267*75f6d617Schristos class. */
3268*75f6d617Schristos
3269*75f6d617Schristos else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
3270*75f6d617Schristos { /* Leave room for the null. */
3271*75f6d617Schristos char str[CHAR_CLASS_MAX_LENGTH + 1];
3272*75f6d617Schristos
3273*75f6d617Schristos PATFETCH (c);
3274*75f6d617Schristos c1 = 0;
3275*75f6d617Schristos
3276*75f6d617Schristos /* If pattern is `[[:'. */
3277*75f6d617Schristos if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3278*75f6d617Schristos
3279*75f6d617Schristos for (;;)
3280*75f6d617Schristos {
3281*75f6d617Schristos PATFETCH (c);
3282*75f6d617Schristos if ((c == ':' && *p == ']') || p == pend)
3283*75f6d617Schristos break;
3284*75f6d617Schristos if (c1 < CHAR_CLASS_MAX_LENGTH)
3285*75f6d617Schristos str[c1++] = c;
3286*75f6d617Schristos else
3287*75f6d617Schristos /* This is in any case an invalid class name. */
3288*75f6d617Schristos str[0] = '\0';
3289*75f6d617Schristos }
3290*75f6d617Schristos str[c1] = '\0';
3291*75f6d617Schristos
3292*75f6d617Schristos /* If isn't a word bracketed by `[:' and `:]':
3293*75f6d617Schristos undo the ending character, the letters, and leave
3294*75f6d617Schristos the leading `:' and `[' (but set bits for them). */
3295*75f6d617Schristos if (c == ':' && *p == ']')
3296*75f6d617Schristos {
3297*75f6d617Schristos # if defined _LIBC || WIDE_CHAR_SUPPORT
3298*75f6d617Schristos boolean is_lower = STREQ (str, "lower");
3299*75f6d617Schristos boolean is_upper = STREQ (str, "upper");
3300*75f6d617Schristos wctype_t wt;
3301*75f6d617Schristos int ch;
3302*75f6d617Schristos
3303*75f6d617Schristos wt = IS_CHAR_CLASS (str);
3304*75f6d617Schristos if (wt == 0)
3305*75f6d617Schristos FREE_STACK_RETURN (REG_ECTYPE);
3306*75f6d617Schristos
3307*75f6d617Schristos /* Throw away the ] at the end of the character
3308*75f6d617Schristos class. */
3309*75f6d617Schristos PATFETCH (c);
3310*75f6d617Schristos
3311*75f6d617Schristos if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3312*75f6d617Schristos
3313*75f6d617Schristos for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
3314*75f6d617Schristos {
3315*75f6d617Schristos if (iswctype (btowc (ch), wt))
3316*75f6d617Schristos SET_LIST_BIT (ch);
3317*75f6d617Schristos
3318*75f6d617Schristos if (translate && (is_upper || is_lower)
3319*75f6d617Schristos && (ISUPPER (ch) || ISLOWER (ch)))
3320*75f6d617Schristos SET_LIST_BIT (ch);
3321*75f6d617Schristos }
3322*75f6d617Schristos
3323*75f6d617Schristos had_char_class = true;
3324*75f6d617Schristos # else
3325*75f6d617Schristos int ch;
3326*75f6d617Schristos boolean is_alnum = STREQ (str, "alnum");
3327*75f6d617Schristos boolean is_alpha = STREQ (str, "alpha");
3328*75f6d617Schristos boolean is_blank = STREQ (str, "blank");
3329*75f6d617Schristos boolean is_cntrl = STREQ (str, "cntrl");
3330*75f6d617Schristos boolean is_digit = STREQ (str, "digit");
3331*75f6d617Schristos boolean is_graph = STREQ (str, "graph");
3332*75f6d617Schristos boolean is_lower = STREQ (str, "lower");
3333*75f6d617Schristos boolean is_print = STREQ (str, "print");
3334*75f6d617Schristos boolean is_punct = STREQ (str, "punct");
3335*75f6d617Schristos boolean is_space = STREQ (str, "space");
3336*75f6d617Schristos boolean is_upper = STREQ (str, "upper");
3337*75f6d617Schristos boolean is_xdigit = STREQ (str, "xdigit");
3338*75f6d617Schristos
3339*75f6d617Schristos if (!IS_CHAR_CLASS (str))
3340*75f6d617Schristos FREE_STACK_RETURN (REG_ECTYPE);
3341*75f6d617Schristos
3342*75f6d617Schristos /* Throw away the ] at the end of the character
3343*75f6d617Schristos class. */
3344*75f6d617Schristos PATFETCH (c);
3345*75f6d617Schristos
3346*75f6d617Schristos if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3347*75f6d617Schristos
3348*75f6d617Schristos for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
3349*75f6d617Schristos {
3350*75f6d617Schristos /* This was split into 3 if's to
3351*75f6d617Schristos avoid an arbitrary limit in some compiler. */
3352*75f6d617Schristos if ( (is_alnum && ISALNUM (ch))
3353*75f6d617Schristos || (is_alpha && ISALPHA (ch))
3354*75f6d617Schristos || (is_blank && ISBLANK (ch))
3355*75f6d617Schristos || (is_cntrl && ISCNTRL (ch)))
3356*75f6d617Schristos SET_LIST_BIT (ch);
3357*75f6d617Schristos if ( (is_digit && ISDIGIT (ch))
3358*75f6d617Schristos || (is_graph && ISGRAPH (ch))
3359*75f6d617Schristos || (is_lower && ISLOWER (ch))
3360*75f6d617Schristos || (is_print && ISPRINT (ch)))
3361*75f6d617Schristos SET_LIST_BIT (ch);
3362*75f6d617Schristos if ( (is_punct && ISPUNCT (ch))
3363*75f6d617Schristos || (is_space && ISSPACE (ch))
3364*75f6d617Schristos || (is_upper && ISUPPER (ch))
3365*75f6d617Schristos || (is_xdigit && ISXDIGIT (ch)))
3366*75f6d617Schristos SET_LIST_BIT (ch);
3367*75f6d617Schristos if ( translate && (is_upper || is_lower)
3368*75f6d617Schristos && (ISUPPER (ch) || ISLOWER (ch)))
3369*75f6d617Schristos SET_LIST_BIT (ch);
3370*75f6d617Schristos }
3371*75f6d617Schristos had_char_class = true;
3372*75f6d617Schristos # endif /* libc || wctype.h */
3373*75f6d617Schristos }
3374*75f6d617Schristos else
3375*75f6d617Schristos {
3376*75f6d617Schristos c1++;
3377*75f6d617Schristos while (c1--)
3378*75f6d617Schristos PATUNFETCH;
3379*75f6d617Schristos SET_LIST_BIT ('[');
3380*75f6d617Schristos SET_LIST_BIT (':');
3381*75f6d617Schristos range_start = ':';
3382*75f6d617Schristos had_char_class = false;
3383*75f6d617Schristos }
3384*75f6d617Schristos }
3385*75f6d617Schristos else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=')
3386*75f6d617Schristos {
3387*75f6d617Schristos unsigned char str[MB_LEN_MAX + 1];
3388*75f6d617Schristos # ifdef _LIBC
3389*75f6d617Schristos uint32_t nrules =
3390*75f6d617Schristos _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3391*75f6d617Schristos # endif
3392*75f6d617Schristos
3393*75f6d617Schristos PATFETCH (c);
3394*75f6d617Schristos c1 = 0;
3395*75f6d617Schristos
3396*75f6d617Schristos /* If pattern is `[[='. */
3397*75f6d617Schristos if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3398*75f6d617Schristos
3399*75f6d617Schristos for (;;)
3400*75f6d617Schristos {
3401*75f6d617Schristos PATFETCH (c);
3402*75f6d617Schristos if ((c == '=' && *p == ']') || p == pend)
3403*75f6d617Schristos break;
3404*75f6d617Schristos if (c1 < MB_LEN_MAX)
3405*75f6d617Schristos str[c1++] = c;
3406*75f6d617Schristos else
3407*75f6d617Schristos /* This is in any case an invalid class name. */
3408*75f6d617Schristos str[0] = '\0';
3409*75f6d617Schristos }
3410*75f6d617Schristos str[c1] = '\0';
3411*75f6d617Schristos
3412*75f6d617Schristos if (c == '=' && *p == ']' && str[0] != '\0')
3413*75f6d617Schristos {
3414*75f6d617Schristos /* If we have no collation data we use the default
3415*75f6d617Schristos collation in which each character is in a class
3416*75f6d617Schristos by itself. It also means that ASCII is the
3417*75f6d617Schristos character set and therefore we cannot have character
3418*75f6d617Schristos with more than one byte in the multibyte
3419*75f6d617Schristos representation. */
3420*75f6d617Schristos # ifdef _LIBC
3421*75f6d617Schristos if (nrules == 0)
3422*75f6d617Schristos # endif
3423*75f6d617Schristos {
3424*75f6d617Schristos if (c1 != 1)
3425*75f6d617Schristos FREE_STACK_RETURN (REG_ECOLLATE);
3426*75f6d617Schristos
3427*75f6d617Schristos /* Throw away the ] at the end of the equivalence
3428*75f6d617Schristos class. */
3429*75f6d617Schristos PATFETCH (c);
3430*75f6d617Schristos
3431*75f6d617Schristos /* Set the bit for the character. */
3432*75f6d617Schristos SET_LIST_BIT (str[0]);
3433*75f6d617Schristos }
3434*75f6d617Schristos # ifdef _LIBC
3435*75f6d617Schristos else
3436*75f6d617Schristos {
3437*75f6d617Schristos /* Try to match the byte sequence in `str' against
3438*75f6d617Schristos those known to the collate implementation.
3439*75f6d617Schristos First find out whether the bytes in `str' are
3440*75f6d617Schristos actually from exactly one character. */
3441*75f6d617Schristos const int32_t *table;
3442*75f6d617Schristos const unsigned char *weights;
3443*75f6d617Schristos const unsigned char *extra;
3444*75f6d617Schristos const int32_t *indirect;
3445*75f6d617Schristos int32_t idx;
3446*75f6d617Schristos const unsigned char *cp = str;
3447*75f6d617Schristos int ch;
3448*75f6d617Schristos
3449*75f6d617Schristos /* This #include defines a local function! */
3450*75f6d617Schristos # include <locale/weight.h>
3451*75f6d617Schristos
3452*75f6d617Schristos table = (const int32_t *)
3453*75f6d617Schristos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3454*75f6d617Schristos weights = (const unsigned char *)
3455*75f6d617Schristos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3456*75f6d617Schristos extra = (const unsigned char *)
3457*75f6d617Schristos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3458*75f6d617Schristos indirect = (const int32_t *)
3459*75f6d617Schristos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3460*75f6d617Schristos
3461*75f6d617Schristos idx = findidx (&cp);
3462*75f6d617Schristos if (idx == 0 || cp < str + c1)
3463*75f6d617Schristos /* This is no valid character. */
3464*75f6d617Schristos FREE_STACK_RETURN (REG_ECOLLATE);
3465*75f6d617Schristos
3466*75f6d617Schristos /* Throw away the ] at the end of the equivalence
3467*75f6d617Schristos class. */
3468*75f6d617Schristos PATFETCH (c);
3469*75f6d617Schristos
3470*75f6d617Schristos /* Now we have to go throught the whole table
3471*75f6d617Schristos and find all characters which have the same
3472*75f6d617Schristos first level weight.
3473*75f6d617Schristos
3474*75f6d617Schristos XXX Note that this is not entirely correct.
3475*75f6d617Schristos we would have to match multibyte sequences
3476*75f6d617Schristos but this is not possible with the current
3477*75f6d617Schristos implementation. */
3478*75f6d617Schristos for (ch = 1; ch < 256; ++ch)
3479*75f6d617Schristos /* XXX This test would have to be changed if we
3480*75f6d617Schristos would allow matching multibyte sequences. */
3481*75f6d617Schristos if (table[ch] > 0)
3482*75f6d617Schristos {
3483*75f6d617Schristos int32_t idx2 = table[ch];
3484*75f6d617Schristos size_t len = weights[idx2];
3485*75f6d617Schristos
3486*75f6d617Schristos /* Test whether the lenghts match. */
3487*75f6d617Schristos if (weights[idx] == len)
3488*75f6d617Schristos {
3489*75f6d617Schristos /* They do. New compare the bytes of
3490*75f6d617Schristos the weight. */
3491*75f6d617Schristos size_t cnt = 0;
3492*75f6d617Schristos
3493*75f6d617Schristos while (cnt < len
3494*75f6d617Schristos && (weights[idx + 1 + cnt]
3495*75f6d617Schristos == weights[idx2 + 1 + cnt]))
3496*75f6d617Schristos ++cnt;
3497*75f6d617Schristos
3498*75f6d617Schristos if (cnt == len)
3499*75f6d617Schristos /* They match. Mark the character as
3500*75f6d617Schristos acceptable. */
3501*75f6d617Schristos SET_LIST_BIT (ch);
3502*75f6d617Schristos }
3503*75f6d617Schristos }
3504*75f6d617Schristos }
3505*75f6d617Schristos # endif
3506*75f6d617Schristos had_char_class = true;
3507*75f6d617Schristos }
3508*75f6d617Schristos else
3509*75f6d617Schristos {
3510*75f6d617Schristos c1++;
3511*75f6d617Schristos while (c1--)
3512*75f6d617Schristos PATUNFETCH;
3513*75f6d617Schristos SET_LIST_BIT ('[');
3514*75f6d617Schristos SET_LIST_BIT ('=');
3515*75f6d617Schristos range_start = '=';
3516*75f6d617Schristos had_char_class = false;
3517*75f6d617Schristos }
3518*75f6d617Schristos }
3519*75f6d617Schristos else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
3520*75f6d617Schristos {
3521*75f6d617Schristos unsigned char str[128]; /* Should be large enough. */
3522*75f6d617Schristos # ifdef _LIBC
3523*75f6d617Schristos uint32_t nrules =
3524*75f6d617Schristos _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3525*75f6d617Schristos # endif
3526*75f6d617Schristos
3527*75f6d617Schristos PATFETCH (c);
3528*75f6d617Schristos c1 = 0;
3529*75f6d617Schristos
3530*75f6d617Schristos /* If pattern is `[[.'. */
3531*75f6d617Schristos if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3532*75f6d617Schristos
3533*75f6d617Schristos for (;;)
3534*75f6d617Schristos {
3535*75f6d617Schristos PATFETCH (c);
3536*75f6d617Schristos if ((c == '.' && *p == ']') || p == pend)
3537*75f6d617Schristos break;
3538*75f6d617Schristos if (c1 < sizeof (str))
3539*75f6d617Schristos str[c1++] = c;
3540*75f6d617Schristos else
3541*75f6d617Schristos /* This is in any case an invalid class name. */
3542*75f6d617Schristos str[0] = '\0';
3543*75f6d617Schristos }
3544*75f6d617Schristos str[c1] = '\0';
3545*75f6d617Schristos
3546*75f6d617Schristos if (c == '.' && *p == ']' && str[0] != '\0')
3547*75f6d617Schristos {
3548*75f6d617Schristos /* If we have no collation data we use the default
3549*75f6d617Schristos collation in which each character is the name
3550*75f6d617Schristos for its own class which contains only the one
3551*75f6d617Schristos character. It also means that ASCII is the
3552*75f6d617Schristos character set and therefore we cannot have character
3553*75f6d617Schristos with more than one byte in the multibyte
3554*75f6d617Schristos representation. */
3555*75f6d617Schristos # ifdef _LIBC
3556*75f6d617Schristos if (nrules == 0)
3557*75f6d617Schristos # endif
3558*75f6d617Schristos {
3559*75f6d617Schristos if (c1 != 1)
3560*75f6d617Schristos FREE_STACK_RETURN (REG_ECOLLATE);
3561*75f6d617Schristos
3562*75f6d617Schristos /* Throw away the ] at the end of the equivalence
3563*75f6d617Schristos class. */
3564*75f6d617Schristos PATFETCH (c);
3565*75f6d617Schristos
3566*75f6d617Schristos /* Set the bit for the character. */
3567*75f6d617Schristos SET_LIST_BIT (str[0]);
3568*75f6d617Schristos range_start = ((const unsigned char *) str)[0];
3569*75f6d617Schristos }
3570*75f6d617Schristos # ifdef _LIBC
3571*75f6d617Schristos else
3572*75f6d617Schristos {
3573*75f6d617Schristos /* Try to match the byte sequence in `str' against
3574*75f6d617Schristos those known to the collate implementation.
3575*75f6d617Schristos First find out whether the bytes in `str' are
3576*75f6d617Schristos actually from exactly one character. */
3577*75f6d617Schristos int32_t table_size;
3578*75f6d617Schristos const int32_t *symb_table;
3579*75f6d617Schristos const unsigned char *extra;
3580*75f6d617Schristos int32_t idx;
3581*75f6d617Schristos int32_t elem;
3582*75f6d617Schristos int32_t second;
3583*75f6d617Schristos int32_t hash;
3584*75f6d617Schristos
3585*75f6d617Schristos table_size =
3586*75f6d617Schristos _NL_CURRENT_WORD (LC_COLLATE,
3587*75f6d617Schristos _NL_COLLATE_SYMB_HASH_SIZEMB);
3588*75f6d617Schristos symb_table = (const int32_t *)
3589*75f6d617Schristos _NL_CURRENT (LC_COLLATE,
3590*75f6d617Schristos _NL_COLLATE_SYMB_TABLEMB);
3591*75f6d617Schristos extra = (const unsigned char *)
3592*75f6d617Schristos _NL_CURRENT (LC_COLLATE,
3593*75f6d617Schristos _NL_COLLATE_SYMB_EXTRAMB);
3594*75f6d617Schristos
3595*75f6d617Schristos /* Locate the character in the hashing table. */
3596*75f6d617Schristos hash = elem_hash (str, c1);
3597*75f6d617Schristos
3598*75f6d617Schristos idx = 0;
3599*75f6d617Schristos elem = hash % table_size;
3600*75f6d617Schristos second = hash % (table_size - 2);
3601*75f6d617Schristos while (symb_table[2 * elem] != 0)
3602*75f6d617Schristos {
3603*75f6d617Schristos /* First compare the hashing value. */
3604*75f6d617Schristos if (symb_table[2 * elem] == hash
3605*75f6d617Schristos && c1 == extra[symb_table[2 * elem + 1]]
3606*75f6d617Schristos && memcmp (str,
3607*75f6d617Schristos &extra[symb_table[2 * elem + 1]
3608*75f6d617Schristos + 1],
3609*75f6d617Schristos c1) == 0)
3610*75f6d617Schristos {
3611*75f6d617Schristos /* Yep, this is the entry. */
3612*75f6d617Schristos idx = symb_table[2 * elem + 1];
3613*75f6d617Schristos idx += 1 + extra[idx];
3614*75f6d617Schristos break;
3615*75f6d617Schristos }
3616*75f6d617Schristos
3617*75f6d617Schristos /* Next entry. */
3618*75f6d617Schristos elem += second;
3619*75f6d617Schristos }
3620*75f6d617Schristos
3621*75f6d617Schristos if (symb_table[2 * elem] == 0)
3622*75f6d617Schristos /* This is no valid character. */
3623*75f6d617Schristos FREE_STACK_RETURN (REG_ECOLLATE);
3624*75f6d617Schristos
3625*75f6d617Schristos /* Throw away the ] at the end of the equivalence
3626*75f6d617Schristos class. */
3627*75f6d617Schristos PATFETCH (c);
3628*75f6d617Schristos
3629*75f6d617Schristos /* Now add the multibyte character(s) we found
3630*75f6d617Schristos to the accept list.
3631*75f6d617Schristos
3632*75f6d617Schristos XXX Note that this is not entirely correct.
3633*75f6d617Schristos we would have to match multibyte sequences
3634*75f6d617Schristos but this is not possible with the current
3635*75f6d617Schristos implementation. Also, we have to match
3636*75f6d617Schristos collating symbols, which expand to more than
3637*75f6d617Schristos one file, as a whole and not allow the
3638*75f6d617Schristos individual bytes. */
3639*75f6d617Schristos c1 = extra[idx++];
3640*75f6d617Schristos if (c1 == 1)
3641*75f6d617Schristos range_start = extra[idx];
3642*75f6d617Schristos while (c1-- > 0)
3643*75f6d617Schristos {
3644*75f6d617Schristos SET_LIST_BIT (extra[idx]);
3645*75f6d617Schristos ++idx;
3646*75f6d617Schristos }
3647*75f6d617Schristos }
3648*75f6d617Schristos # endif
3649*75f6d617Schristos had_char_class = false;
3650*75f6d617Schristos }
3651*75f6d617Schristos else
3652*75f6d617Schristos {
3653*75f6d617Schristos c1++;
3654*75f6d617Schristos while (c1--)
3655*75f6d617Schristos PATUNFETCH;
3656*75f6d617Schristos SET_LIST_BIT ('[');
3657*75f6d617Schristos SET_LIST_BIT ('.');
3658*75f6d617Schristos range_start = '.';
3659*75f6d617Schristos had_char_class = false;
3660*75f6d617Schristos }
3661*75f6d617Schristos }
3662*75f6d617Schristos else
3663*75f6d617Schristos {
3664*75f6d617Schristos had_char_class = false;
3665*75f6d617Schristos SET_LIST_BIT (c);
3666*75f6d617Schristos range_start = c;
3667*75f6d617Schristos }
3668*75f6d617Schristos }
3669*75f6d617Schristos
3670*75f6d617Schristos /* Discard any (non)matching list bytes that are all 0 at the
3671*75f6d617Schristos end of the map. Decrease the map-length byte too. */
3672*75f6d617Schristos while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
3673*75f6d617Schristos b[-1]--;
3674*75f6d617Schristos b += b[-1];
3675*75f6d617Schristos #endif /* WCHAR */
3676*75f6d617Schristos }
3677*75f6d617Schristos break;
3678*75f6d617Schristos
3679*75f6d617Schristos
3680*75f6d617Schristos case '(':
3681*75f6d617Schristos if (syntax & RE_NO_BK_PARENS)
3682*75f6d617Schristos goto handle_open;
3683*75f6d617Schristos else
3684*75f6d617Schristos goto normal_char;
3685*75f6d617Schristos
3686*75f6d617Schristos
3687*75f6d617Schristos case ')':
3688*75f6d617Schristos if (syntax & RE_NO_BK_PARENS)
3689*75f6d617Schristos goto handle_close;
3690*75f6d617Schristos else
3691*75f6d617Schristos goto normal_char;
3692*75f6d617Schristos
3693*75f6d617Schristos
3694*75f6d617Schristos case '\n':
3695*75f6d617Schristos if (syntax & RE_NEWLINE_ALT)
3696*75f6d617Schristos goto handle_alt;
3697*75f6d617Schristos else
3698*75f6d617Schristos goto normal_char;
3699*75f6d617Schristos
3700*75f6d617Schristos
3701*75f6d617Schristos case '|':
3702*75f6d617Schristos if (syntax & RE_NO_BK_VBAR)
3703*75f6d617Schristos goto handle_alt;
3704*75f6d617Schristos else
3705*75f6d617Schristos goto normal_char;
3706*75f6d617Schristos
3707*75f6d617Schristos
3708*75f6d617Schristos case '{':
3709*75f6d617Schristos if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
3710*75f6d617Schristos goto handle_interval;
3711*75f6d617Schristos else
3712*75f6d617Schristos goto normal_char;
3713*75f6d617Schristos
3714*75f6d617Schristos
3715*75f6d617Schristos case '\\':
3716*75f6d617Schristos if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
3717*75f6d617Schristos
3718*75f6d617Schristos /* Do not translate the character after the \, so that we can
3719*75f6d617Schristos distinguish, e.g., \B from \b, even if we normally would
3720*75f6d617Schristos translate, e.g., B to b. */
3721*75f6d617Schristos PATFETCH_RAW (c);
3722*75f6d617Schristos
3723*75f6d617Schristos switch (c)
3724*75f6d617Schristos {
3725*75f6d617Schristos case '(':
3726*75f6d617Schristos if (syntax & RE_NO_BK_PARENS)
3727*75f6d617Schristos goto normal_backslash;
3728*75f6d617Schristos
3729*75f6d617Schristos handle_open:
3730*75f6d617Schristos bufp->re_nsub++;
3731*75f6d617Schristos regnum++;
3732*75f6d617Schristos
3733*75f6d617Schristos if (COMPILE_STACK_FULL)
3734*75f6d617Schristos {
3735*75f6d617Schristos RETALLOC (compile_stack.stack, compile_stack.size << 1,
3736*75f6d617Schristos compile_stack_elt_t);
3737*75f6d617Schristos if (compile_stack.stack == NULL) return REG_ESPACE;
3738*75f6d617Schristos
3739*75f6d617Schristos compile_stack.size <<= 1;
3740*75f6d617Schristos }
3741*75f6d617Schristos
3742*75f6d617Schristos /* These are the values to restore when we hit end of this
3743*75f6d617Schristos group. They are all relative offsets, so that if the
3744*75f6d617Schristos whole pattern moves because of realloc, they will still
3745*75f6d617Schristos be valid. */
3746*75f6d617Schristos COMPILE_STACK_TOP.begalt_offset = begalt - COMPILED_BUFFER_VAR;
3747*75f6d617Schristos COMPILE_STACK_TOP.fixup_alt_jump
3748*75f6d617Schristos = fixup_alt_jump ? fixup_alt_jump - COMPILED_BUFFER_VAR + 1 : 0;
3749*75f6d617Schristos COMPILE_STACK_TOP.laststart_offset = b - COMPILED_BUFFER_VAR;
3750*75f6d617Schristos COMPILE_STACK_TOP.regnum = regnum;
3751*75f6d617Schristos
3752*75f6d617Schristos /* We will eventually replace the 0 with the number of
3753*75f6d617Schristos groups inner to this one. But do not push a
3754*75f6d617Schristos start_memory for groups beyond the last one we can
3755*75f6d617Schristos represent in the compiled pattern. */
3756*75f6d617Schristos if (regnum <= MAX_REGNUM)
3757*75f6d617Schristos {
3758*75f6d617Schristos COMPILE_STACK_TOP.inner_group_offset = b
3759*75f6d617Schristos - COMPILED_BUFFER_VAR + 2;
3760*75f6d617Schristos BUF_PUSH_3 (start_memory, regnum, 0);
3761*75f6d617Schristos }
3762*75f6d617Schristos
3763*75f6d617Schristos compile_stack.avail++;
3764*75f6d617Schristos
3765*75f6d617Schristos fixup_alt_jump = 0;
3766*75f6d617Schristos laststart = 0;
3767*75f6d617Schristos begalt = b;
3768*75f6d617Schristos /* If we've reached MAX_REGNUM groups, then this open
3769*75f6d617Schristos won't actually generate any code, so we'll have to
3770*75f6d617Schristos clear pending_exact explicitly. */
3771*75f6d617Schristos pending_exact = 0;
3772*75f6d617Schristos break;
3773*75f6d617Schristos
3774*75f6d617Schristos
3775*75f6d617Schristos case ')':
3776*75f6d617Schristos if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
3777*75f6d617Schristos
3778*75f6d617Schristos if (COMPILE_STACK_EMPTY)
3779*75f6d617Schristos {
3780*75f6d617Schristos if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
3781*75f6d617Schristos goto normal_backslash;
3782*75f6d617Schristos else
3783*75f6d617Schristos FREE_STACK_RETURN (REG_ERPAREN);
3784*75f6d617Schristos }
3785*75f6d617Schristos
3786*75f6d617Schristos handle_close:
3787*75f6d617Schristos if (fixup_alt_jump)
3788*75f6d617Schristos { /* Push a dummy failure point at the end of the
3789*75f6d617Schristos alternative for a possible future
3790*75f6d617Schristos `pop_failure_jump' to pop. See comments at
3791*75f6d617Schristos `push_dummy_failure' in `re_match_2'. */
3792*75f6d617Schristos BUF_PUSH (push_dummy_failure);
3793*75f6d617Schristos
3794*75f6d617Schristos /* We allocated space for this jump when we assigned
3795*75f6d617Schristos to `fixup_alt_jump', in the `handle_alt' case below. */
3796*75f6d617Schristos STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
3797*75f6d617Schristos }
3798*75f6d617Schristos
3799*75f6d617Schristos /* See similar code for backslashed left paren above. */
3800*75f6d617Schristos if (COMPILE_STACK_EMPTY)
3801*75f6d617Schristos {
3802*75f6d617Schristos if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
3803*75f6d617Schristos goto normal_char;
3804*75f6d617Schristos else
3805*75f6d617Schristos FREE_STACK_RETURN (REG_ERPAREN);
3806*75f6d617Schristos }
3807*75f6d617Schristos
3808*75f6d617Schristos /* Since we just checked for an empty stack above, this
3809*75f6d617Schristos ``can't happen''. */
3810*75f6d617Schristos assert (compile_stack.avail != 0);
3811*75f6d617Schristos {
3812*75f6d617Schristos /* We don't just want to restore into `regnum', because
3813*75f6d617Schristos later groups should continue to be numbered higher,
3814*75f6d617Schristos as in `(ab)c(de)' -- the second group is #2. */
3815*75f6d617Schristos regnum_t this_group_regnum;
3816*75f6d617Schristos
3817*75f6d617Schristos compile_stack.avail--;
3818*75f6d617Schristos begalt = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.begalt_offset;
3819*75f6d617Schristos fixup_alt_jump
3820*75f6d617Schristos = COMPILE_STACK_TOP.fixup_alt_jump
3821*75f6d617Schristos ? COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.fixup_alt_jump - 1
3822*75f6d617Schristos : 0;
3823*75f6d617Schristos laststart = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.laststart_offset;
3824*75f6d617Schristos this_group_regnum = COMPILE_STACK_TOP.regnum;
3825*75f6d617Schristos /* If we've reached MAX_REGNUM groups, then this open
3826*75f6d617Schristos won't actually generate any code, so we'll have to
3827*75f6d617Schristos clear pending_exact explicitly. */
3828*75f6d617Schristos pending_exact = 0;
3829*75f6d617Schristos
3830*75f6d617Schristos /* We're at the end of the group, so now we know how many
3831*75f6d617Schristos groups were inside this one. */
3832*75f6d617Schristos if (this_group_regnum <= MAX_REGNUM)
3833*75f6d617Schristos {
3834*75f6d617Schristos UCHAR_T *inner_group_loc
3835*75f6d617Schristos = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.inner_group_offset;
3836*75f6d617Schristos
3837*75f6d617Schristos *inner_group_loc = regnum - this_group_regnum;
3838*75f6d617Schristos BUF_PUSH_3 (stop_memory, this_group_regnum,
3839*75f6d617Schristos regnum - this_group_regnum);
3840*75f6d617Schristos }
3841*75f6d617Schristos }
3842*75f6d617Schristos break;
3843*75f6d617Schristos
3844*75f6d617Schristos
3845*75f6d617Schristos case '|': /* `\|'. */
3846*75f6d617Schristos if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
3847*75f6d617Schristos goto normal_backslash;
3848*75f6d617Schristos handle_alt:
3849*75f6d617Schristos if (syntax & RE_LIMITED_OPS)
3850*75f6d617Schristos goto normal_char;
3851*75f6d617Schristos
3852*75f6d617Schristos /* Insert before the previous alternative a jump which
3853*75f6d617Schristos jumps to this alternative if the former fails. */
3854*75f6d617Schristos GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
3855*75f6d617Schristos INSERT_JUMP (on_failure_jump, begalt,
3856*75f6d617Schristos b + 2 + 2 * OFFSET_ADDRESS_SIZE);
3857*75f6d617Schristos pending_exact = 0;
3858*75f6d617Schristos b += 1 + OFFSET_ADDRESS_SIZE;
3859*75f6d617Schristos
3860*75f6d617Schristos /* The alternative before this one has a jump after it
3861*75f6d617Schristos which gets executed if it gets matched. Adjust that
3862*75f6d617Schristos jump so it will jump to this alternative's analogous
3863*75f6d617Schristos jump (put in below, which in turn will jump to the next
3864*75f6d617Schristos (if any) alternative's such jump, etc.). The last such
3865*75f6d617Schristos jump jumps to the correct final destination. A picture:
3866*75f6d617Schristos _____ _____
3867*75f6d617Schristos | | | |
3868*75f6d617Schristos | v | v
3869*75f6d617Schristos a | b | c
3870*75f6d617Schristos
3871*75f6d617Schristos If we are at `b', then fixup_alt_jump right now points to a
3872*75f6d617Schristos three-byte space after `a'. We'll put in the jump, set
3873*75f6d617Schristos fixup_alt_jump to right after `b', and leave behind three
3874*75f6d617Schristos bytes which we'll fill in when we get to after `c'. */
3875*75f6d617Schristos
3876*75f6d617Schristos if (fixup_alt_jump)
3877*75f6d617Schristos STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
3878*75f6d617Schristos
3879*75f6d617Schristos /* Mark and leave space for a jump after this alternative,
3880*75f6d617Schristos to be filled in later either by next alternative or
3881*75f6d617Schristos when know we're at the end of a series of alternatives. */
3882*75f6d617Schristos fixup_alt_jump = b;
3883*75f6d617Schristos GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
3884*75f6d617Schristos b += 1 + OFFSET_ADDRESS_SIZE;
3885*75f6d617Schristos
3886*75f6d617Schristos laststart = 0;
3887*75f6d617Schristos begalt = b;
3888*75f6d617Schristos break;
3889*75f6d617Schristos
3890*75f6d617Schristos
3891*75f6d617Schristos case '{':
3892*75f6d617Schristos /* If \{ is a literal. */
3893*75f6d617Schristos if (!(syntax & RE_INTERVALS)
3894*75f6d617Schristos /* If we're at `\{' and it's not the open-interval
3895*75f6d617Schristos operator. */
3896*75f6d617Schristos || (syntax & RE_NO_BK_BRACES))
3897*75f6d617Schristos goto normal_backslash;
3898*75f6d617Schristos
3899*75f6d617Schristos handle_interval:
3900*75f6d617Schristos {
3901*75f6d617Schristos /* If got here, then the syntax allows intervals. */
3902*75f6d617Schristos
3903*75f6d617Schristos /* At least (most) this many matches must be made. */
3904*75f6d617Schristos int lower_bound = -1, upper_bound = -1;
3905*75f6d617Schristos
3906*75f6d617Schristos /* Place in the uncompiled pattern (i.e., just after
3907*75f6d617Schristos the '{') to go back to if the interval is invalid. */
3908*75f6d617Schristos const CHAR_T *beg_interval = p;
3909*75f6d617Schristos
3910*75f6d617Schristos if (p == pend)
3911*75f6d617Schristos goto invalid_interval;
3912*75f6d617Schristos
3913*75f6d617Schristos GET_UNSIGNED_NUMBER (lower_bound);
3914*75f6d617Schristos
3915*75f6d617Schristos if (c == ',')
3916*75f6d617Schristos {
3917*75f6d617Schristos GET_UNSIGNED_NUMBER (upper_bound);
3918*75f6d617Schristos if (upper_bound < 0)
3919*75f6d617Schristos upper_bound = RE_DUP_MAX;
3920*75f6d617Schristos }
3921*75f6d617Schristos else
3922*75f6d617Schristos /* Interval such as `{1}' => match exactly once. */
3923*75f6d617Schristos upper_bound = lower_bound;
3924*75f6d617Schristos
3925*75f6d617Schristos if (! (0 <= lower_bound && lower_bound <= upper_bound))
3926*75f6d617Schristos goto invalid_interval;
3927*75f6d617Schristos
3928*75f6d617Schristos if (!(syntax & RE_NO_BK_BRACES))
3929*75f6d617Schristos {
3930*75f6d617Schristos if (c != '\\' || p == pend)
3931*75f6d617Schristos goto invalid_interval;
3932*75f6d617Schristos PATFETCH (c);
3933*75f6d617Schristos }
3934*75f6d617Schristos
3935*75f6d617Schristos if (c != '}')
3936*75f6d617Schristos goto invalid_interval;
3937*75f6d617Schristos
3938*75f6d617Schristos /* If it's invalid to have no preceding re. */
3939*75f6d617Schristos if (!laststart)
3940*75f6d617Schristos {
3941*75f6d617Schristos if (syntax & RE_CONTEXT_INVALID_OPS
3942*75f6d617Schristos && !(syntax & RE_INVALID_INTERVAL_ORD))
3943*75f6d617Schristos FREE_STACK_RETURN (REG_BADRPT);
3944*75f6d617Schristos else if (syntax & RE_CONTEXT_INDEP_OPS)
3945*75f6d617Schristos laststart = b;
3946*75f6d617Schristos else
3947*75f6d617Schristos goto unfetch_interval;
3948*75f6d617Schristos }
3949*75f6d617Schristos
3950*75f6d617Schristos /* We just parsed a valid interval. */
3951*75f6d617Schristos
3952*75f6d617Schristos if (RE_DUP_MAX < upper_bound)
3953*75f6d617Schristos FREE_STACK_RETURN (REG_BADBR);
3954*75f6d617Schristos
3955*75f6d617Schristos /* If the upper bound is zero, don't want to succeed at
3956*75f6d617Schristos all; jump from `laststart' to `b + 3', which will be
3957*75f6d617Schristos the end of the buffer after we insert the jump. */
3958*75f6d617Schristos /* ifdef WCHAR, 'b + 1 + OFFSET_ADDRESS_SIZE'
3959*75f6d617Schristos instead of 'b + 3'. */
3960*75f6d617Schristos if (upper_bound == 0)
3961*75f6d617Schristos {
3962*75f6d617Schristos GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
3963*75f6d617Schristos INSERT_JUMP (jump, laststart, b + 1
3964*75f6d617Schristos + OFFSET_ADDRESS_SIZE);
3965*75f6d617Schristos b += 1 + OFFSET_ADDRESS_SIZE;
3966*75f6d617Schristos }
3967*75f6d617Schristos
3968*75f6d617Schristos /* Otherwise, we have a nontrivial interval. When
3969*75f6d617Schristos we're all done, the pattern will look like:
3970*75f6d617Schristos set_number_at <jump count> <upper bound>
3971*75f6d617Schristos set_number_at <succeed_n count> <lower bound>
3972*75f6d617Schristos succeed_n <after jump addr> <succeed_n count>
3973*75f6d617Schristos <body of loop>
3974*75f6d617Schristos jump_n <succeed_n addr> <jump count>
3975*75f6d617Schristos (The upper bound and `jump_n' are omitted if
3976*75f6d617Schristos `upper_bound' is 1, though.) */
3977*75f6d617Schristos else
3978*75f6d617Schristos { /* If the upper bound is > 1, we need to insert
3979*75f6d617Schristos more at the end of the loop. */
3980*75f6d617Schristos unsigned nbytes = 2 + 4 * OFFSET_ADDRESS_SIZE +
3981*75f6d617Schristos (upper_bound > 1) * (2 + 4 * OFFSET_ADDRESS_SIZE);
3982*75f6d617Schristos
3983*75f6d617Schristos GET_BUFFER_SPACE (nbytes);
3984*75f6d617Schristos
3985*75f6d617Schristos /* Initialize lower bound of the `succeed_n', even
3986*75f6d617Schristos though it will be set during matching by its
3987*75f6d617Schristos attendant `set_number_at' (inserted next),
3988*75f6d617Schristos because `re_compile_fastmap' needs to know.
3989*75f6d617Schristos Jump to the `jump_n' we might insert below. */
3990*75f6d617Schristos INSERT_JUMP2 (succeed_n, laststart,
3991*75f6d617Schristos b + 1 + 2 * OFFSET_ADDRESS_SIZE
3992*75f6d617Schristos + (upper_bound > 1) * (1 + 2 * OFFSET_ADDRESS_SIZE)
3993*75f6d617Schristos , lower_bound);
3994*75f6d617Schristos b += 1 + 2 * OFFSET_ADDRESS_SIZE;
3995*75f6d617Schristos
3996*75f6d617Schristos /* Code to initialize the lower bound. Insert
3997*75f6d617Schristos before the `succeed_n'. The `5' is the last two
3998*75f6d617Schristos bytes of this `set_number_at', plus 3 bytes of
3999*75f6d617Schristos the following `succeed_n'. */
4000*75f6d617Schristos /* ifdef WCHAR, The '1+2*OFFSET_ADDRESS_SIZE'
4001*75f6d617Schristos is the 'set_number_at', plus '1+OFFSET_ADDRESS_SIZE'
4002*75f6d617Schristos of the following `succeed_n'. */
4003*75f6d617Schristos PREFIX(insert_op2) (set_number_at, laststart, 1
4004*75f6d617Schristos + 2 * OFFSET_ADDRESS_SIZE, lower_bound, b);
4005*75f6d617Schristos b += 1 + 2 * OFFSET_ADDRESS_SIZE;
4006*75f6d617Schristos
4007*75f6d617Schristos if (upper_bound > 1)
4008*75f6d617Schristos { /* More than one repetition is allowed, so
4009*75f6d617Schristos append a backward jump to the `succeed_n'
4010*75f6d617Schristos that starts this interval.
4011*75f6d617Schristos
4012*75f6d617Schristos When we've reached this during matching,
4013*75f6d617Schristos we'll have matched the interval once, so
4014*75f6d617Schristos jump back only `upper_bound - 1' times. */
4015*75f6d617Schristos STORE_JUMP2 (jump_n, b, laststart
4016*75f6d617Schristos + 2 * OFFSET_ADDRESS_SIZE + 1,
4017*75f6d617Schristos upper_bound - 1);
4018*75f6d617Schristos b += 1 + 2 * OFFSET_ADDRESS_SIZE;
4019*75f6d617Schristos
4020*75f6d617Schristos /* The location we want to set is the second
4021*75f6d617Schristos parameter of the `jump_n'; that is `b-2' as
4022*75f6d617Schristos an absolute address. `laststart' will be
4023*75f6d617Schristos the `set_number_at' we're about to insert;
4024*75f6d617Schristos `laststart+3' the number to set, the source
4025*75f6d617Schristos for the relative address. But we are
4026*75f6d617Schristos inserting into the middle of the pattern --
4027*75f6d617Schristos so everything is getting moved up by 5.
4028*75f6d617Schristos Conclusion: (b - 2) - (laststart + 3) + 5,
4029*75f6d617Schristos i.e., b - laststart.
4030*75f6d617Schristos
4031*75f6d617Schristos We insert this at the beginning of the loop
4032*75f6d617Schristos so that if we fail during matching, we'll
4033*75f6d617Schristos reinitialize the bounds. */
4034*75f6d617Schristos PREFIX(insert_op2) (set_number_at, laststart,
4035*75f6d617Schristos b - laststart,
4036*75f6d617Schristos upper_bound - 1, b);
4037*75f6d617Schristos b += 1 + 2 * OFFSET_ADDRESS_SIZE;
4038*75f6d617Schristos }
4039*75f6d617Schristos }
4040*75f6d617Schristos pending_exact = 0;
4041*75f6d617Schristos break;
4042*75f6d617Schristos
4043*75f6d617Schristos invalid_interval:
4044*75f6d617Schristos if (!(syntax & RE_INVALID_INTERVAL_ORD))
4045*75f6d617Schristos FREE_STACK_RETURN (p == pend ? REG_EBRACE : REG_BADBR);
4046*75f6d617Schristos unfetch_interval:
4047*75f6d617Schristos /* Match the characters as literals. */
4048*75f6d617Schristos p = beg_interval;
4049*75f6d617Schristos c = '{';
4050*75f6d617Schristos if (syntax & RE_NO_BK_BRACES)
4051*75f6d617Schristos goto normal_char;
4052*75f6d617Schristos else
4053*75f6d617Schristos goto normal_backslash;
4054*75f6d617Schristos }
4055*75f6d617Schristos
4056*75f6d617Schristos #ifdef emacs
4057*75f6d617Schristos /* There is no way to specify the before_dot and after_dot
4058*75f6d617Schristos operators. rms says this is ok. --karl */
4059*75f6d617Schristos case '=':
4060*75f6d617Schristos BUF_PUSH (at_dot);
4061*75f6d617Schristos break;
4062*75f6d617Schristos
4063*75f6d617Schristos case 's':
4064*75f6d617Schristos laststart = b;
4065*75f6d617Schristos PATFETCH (c);
4066*75f6d617Schristos BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
4067*75f6d617Schristos break;
4068*75f6d617Schristos
4069*75f6d617Schristos case 'S':
4070*75f6d617Schristos laststart = b;
4071*75f6d617Schristos PATFETCH (c);
4072*75f6d617Schristos BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
4073*75f6d617Schristos break;
4074*75f6d617Schristos #endif /* emacs */
4075*75f6d617Schristos
4076*75f6d617Schristos
4077*75f6d617Schristos case 'w':
4078*75f6d617Schristos if (syntax & RE_NO_GNU_OPS)
4079*75f6d617Schristos goto normal_char;
4080*75f6d617Schristos laststart = b;
4081*75f6d617Schristos BUF_PUSH (wordchar);
4082*75f6d617Schristos break;
4083*75f6d617Schristos
4084*75f6d617Schristos
4085*75f6d617Schristos case 'W':
4086*75f6d617Schristos if (syntax & RE_NO_GNU_OPS)
4087*75f6d617Schristos goto normal_char;
4088*75f6d617Schristos laststart = b;
4089*75f6d617Schristos BUF_PUSH (notwordchar);
4090*75f6d617Schristos break;
4091*75f6d617Schristos
4092*75f6d617Schristos
4093*75f6d617Schristos case '<':
4094*75f6d617Schristos if (syntax & RE_NO_GNU_OPS)
4095*75f6d617Schristos goto normal_char;
4096*75f6d617Schristos BUF_PUSH (wordbeg);
4097*75f6d617Schristos break;
4098*75f6d617Schristos
4099*75f6d617Schristos case '>':
4100*75f6d617Schristos if (syntax & RE_NO_GNU_OPS)
4101*75f6d617Schristos goto normal_char;
4102*75f6d617Schristos BUF_PUSH (wordend);
4103*75f6d617Schristos break;
4104*75f6d617Schristos
4105*75f6d617Schristos case 'b':
4106*75f6d617Schristos if (syntax & RE_NO_GNU_OPS)
4107*75f6d617Schristos goto normal_char;
4108*75f6d617Schristos BUF_PUSH (wordbound);
4109*75f6d617Schristos break;
4110*75f6d617Schristos
4111*75f6d617Schristos case 'B':
4112*75f6d617Schristos if (syntax & RE_NO_GNU_OPS)
4113*75f6d617Schristos goto normal_char;
4114*75f6d617Schristos BUF_PUSH (notwordbound);
4115*75f6d617Schristos break;
4116*75f6d617Schristos
4117*75f6d617Schristos case '`':
4118*75f6d617Schristos if (syntax & RE_NO_GNU_OPS)
4119*75f6d617Schristos goto normal_char;
4120*75f6d617Schristos BUF_PUSH (begbuf);
4121*75f6d617Schristos break;
4122*75f6d617Schristos
4123*75f6d617Schristos case '\'':
4124*75f6d617Schristos if (syntax & RE_NO_GNU_OPS)
4125*75f6d617Schristos goto normal_char;
4126*75f6d617Schristos BUF_PUSH (endbuf);
4127*75f6d617Schristos break;
4128*75f6d617Schristos
4129*75f6d617Schristos case '1': case '2': case '3': case '4': case '5':
4130*75f6d617Schristos case '6': case '7': case '8': case '9':
4131*75f6d617Schristos if (syntax & RE_NO_BK_REFS)
4132*75f6d617Schristos goto normal_char;
4133*75f6d617Schristos
4134*75f6d617Schristos c1 = c - '0';
4135*75f6d617Schristos
4136*75f6d617Schristos if (c1 > regnum)
4137*75f6d617Schristos FREE_STACK_RETURN (REG_ESUBREG);
4138*75f6d617Schristos
4139*75f6d617Schristos /* Can't back reference to a subexpression if inside of it. */
4140*75f6d617Schristos if (group_in_compile_stack (compile_stack, (regnum_t) c1))
4141*75f6d617Schristos goto normal_char;
4142*75f6d617Schristos
4143*75f6d617Schristos laststart = b;
4144*75f6d617Schristos BUF_PUSH_2 (duplicate, c1);
4145*75f6d617Schristos break;
4146*75f6d617Schristos
4147*75f6d617Schristos
4148*75f6d617Schristos case '+':
4149*75f6d617Schristos case '?':
4150*75f6d617Schristos if (syntax & RE_BK_PLUS_QM)
4151*75f6d617Schristos goto handle_plus;
4152*75f6d617Schristos else
4153*75f6d617Schristos goto normal_backslash;
4154*75f6d617Schristos
4155*75f6d617Schristos default:
4156*75f6d617Schristos normal_backslash:
4157*75f6d617Schristos /* You might think it would be useful for \ to mean
4158*75f6d617Schristos not to translate; but if we don't translate it
4159*75f6d617Schristos it will never match anything. */
4160*75f6d617Schristos c = TRANSLATE (c);
4161*75f6d617Schristos goto normal_char;
4162*75f6d617Schristos }
4163*75f6d617Schristos break;
4164*75f6d617Schristos
4165*75f6d617Schristos
4166*75f6d617Schristos default:
4167*75f6d617Schristos /* Expects the character in `c'. */
4168*75f6d617Schristos normal_char:
4169*75f6d617Schristos /* If no exactn currently being built. */
4170*75f6d617Schristos if (!pending_exact
4171*75f6d617Schristos #ifdef WCHAR
4172*75f6d617Schristos /* If last exactn handle binary(or character) and
4173*75f6d617Schristos new exactn handle character(or binary). */
4174*75f6d617Schristos || is_exactn_bin != is_binary[p - 1 - pattern]
4175*75f6d617Schristos #endif /* WCHAR */
4176*75f6d617Schristos
4177*75f6d617Schristos /* If last exactn not at current position. */
4178*75f6d617Schristos || pending_exact + *pending_exact + 1 != b
4179*75f6d617Schristos
4180*75f6d617Schristos /* We have only one byte following the exactn for the count. */
4181*75f6d617Schristos || *pending_exact == (1 << BYTEWIDTH) - 1
4182*75f6d617Schristos
4183*75f6d617Schristos /* If followed by a repetition operator. */
4184*75f6d617Schristos || *p == '*' || *p == '^'
4185*75f6d617Schristos || ((syntax & RE_BK_PLUS_QM)
4186*75f6d617Schristos ? *p == '\\' && (p[1] == '+' || p[1] == '?')
4187*75f6d617Schristos : (*p == '+' || *p == '?'))
4188*75f6d617Schristos || ((syntax & RE_INTERVALS)
4189*75f6d617Schristos && ((syntax & RE_NO_BK_BRACES)
4190*75f6d617Schristos ? *p == '{'
4191*75f6d617Schristos : (p[0] == '\\' && p[1] == '{'))))
4192*75f6d617Schristos {
4193*75f6d617Schristos /* Start building a new exactn. */
4194*75f6d617Schristos
4195*75f6d617Schristos laststart = b;
4196*75f6d617Schristos
4197*75f6d617Schristos #ifdef WCHAR
4198*75f6d617Schristos /* Is this exactn binary data or character? */
4199*75f6d617Schristos is_exactn_bin = is_binary[p - 1 - pattern];
4200*75f6d617Schristos if (is_exactn_bin)
4201*75f6d617Schristos BUF_PUSH_2 (exactn_bin, 0);
4202*75f6d617Schristos else
4203*75f6d617Schristos BUF_PUSH_2 (exactn, 0);
4204*75f6d617Schristos #else
4205*75f6d617Schristos BUF_PUSH_2 (exactn, 0);
4206*75f6d617Schristos #endif /* WCHAR */
4207*75f6d617Schristos pending_exact = b - 1;
4208*75f6d617Schristos }
4209*75f6d617Schristos
4210*75f6d617Schristos BUF_PUSH (c);
4211*75f6d617Schristos (*pending_exact)++;
4212*75f6d617Schristos break;
4213*75f6d617Schristos } /* switch (c) */
4214*75f6d617Schristos } /* while p != pend */
4215*75f6d617Schristos
4216*75f6d617Schristos
4217*75f6d617Schristos /* Through the pattern now. */
4218*75f6d617Schristos
4219*75f6d617Schristos if (fixup_alt_jump)
4220*75f6d617Schristos STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
4221*75f6d617Schristos
4222*75f6d617Schristos if (!COMPILE_STACK_EMPTY)
4223*75f6d617Schristos FREE_STACK_RETURN (REG_EPAREN);
4224*75f6d617Schristos
4225*75f6d617Schristos /* If we don't want backtracking, force success
4226*75f6d617Schristos the first time we reach the end of the compiled pattern. */
4227*75f6d617Schristos if (syntax & RE_NO_POSIX_BACKTRACKING)
4228*75f6d617Schristos BUF_PUSH (succeed);
4229*75f6d617Schristos
4230*75f6d617Schristos #ifdef WCHAR
4231*75f6d617Schristos free (pattern);
4232*75f6d617Schristos free (mbs_offset);
4233*75f6d617Schristos free (is_binary);
4234*75f6d617Schristos #endif
4235*75f6d617Schristos free (compile_stack.stack);
4236*75f6d617Schristos
4237*75f6d617Schristos /* We have succeeded; set the length of the buffer. */
4238*75f6d617Schristos #ifdef WCHAR
4239*75f6d617Schristos bufp->used = (uintptr_t) b - (uintptr_t) COMPILED_BUFFER_VAR;
4240*75f6d617Schristos #else
4241*75f6d617Schristos bufp->used = b - bufp->buffer;
4242*75f6d617Schristos #endif
4243*75f6d617Schristos
4244*75f6d617Schristos #ifdef DEBUG
4245*75f6d617Schristos if (debug)
4246*75f6d617Schristos {
4247*75f6d617Schristos DEBUG_PRINT1 ("\nCompiled pattern: \n");
4248*75f6d617Schristos PREFIX(print_compiled_pattern) (bufp);
4249*75f6d617Schristos }
4250*75f6d617Schristos #endif /* DEBUG */
4251*75f6d617Schristos
4252*75f6d617Schristos #ifndef MATCH_MAY_ALLOCATE
4253*75f6d617Schristos /* Initialize the failure stack to the largest possible stack. This
4254*75f6d617Schristos isn't necessary unless we're trying to avoid calling alloca in
4255*75f6d617Schristos the search and match routines. */
4256*75f6d617Schristos {
4257*75f6d617Schristos int num_regs = bufp->re_nsub + 1;
4258*75f6d617Schristos
4259*75f6d617Schristos /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
4260*75f6d617Schristos is strictly greater than re_max_failures, the largest possible stack
4261*75f6d617Schristos is 2 * re_max_failures failure points. */
4262*75f6d617Schristos if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
4263*75f6d617Schristos {
4264*75f6d617Schristos fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
4265*75f6d617Schristos
4266*75f6d617Schristos # ifdef emacs
4267*75f6d617Schristos if (! fail_stack.stack)
4268*75f6d617Schristos fail_stack.stack
4269*75f6d617Schristos = (PREFIX(fail_stack_elt_t) *) xmalloc (fail_stack.size
4270*75f6d617Schristos * sizeof (PREFIX(fail_stack_elt_t)));
4271*75f6d617Schristos else
4272*75f6d617Schristos fail_stack.stack
4273*75f6d617Schristos = (PREFIX(fail_stack_elt_t) *) xrealloc (fail_stack.stack,
4274*75f6d617Schristos (fail_stack.size
4275*75f6d617Schristos * sizeof (PREFIX(fail_stack_elt_t))));
4276*75f6d617Schristos # else /* not emacs */
4277*75f6d617Schristos if (! fail_stack.stack)
4278*75f6d617Schristos fail_stack.stack
4279*75f6d617Schristos = (PREFIX(fail_stack_elt_t) *) malloc (fail_stack.size
4280*75f6d617Schristos * sizeof (PREFIX(fail_stack_elt_t)));
4281*75f6d617Schristos else
4282*75f6d617Schristos fail_stack.stack
4283*75f6d617Schristos = (PREFIX(fail_stack_elt_t) *) realloc (fail_stack.stack,
4284*75f6d617Schristos (fail_stack.size
4285*75f6d617Schristos * sizeof (PREFIX(fail_stack_elt_t))));
4286*75f6d617Schristos # endif /* not emacs */
4287*75f6d617Schristos }
4288*75f6d617Schristos
4289*75f6d617Schristos PREFIX(regex_grow_registers) (num_regs);
4290*75f6d617Schristos }
4291*75f6d617Schristos #endif /* not MATCH_MAY_ALLOCATE */
4292*75f6d617Schristos
4293*75f6d617Schristos return REG_NOERROR;
4294*75f6d617Schristos } /* regex_compile */
4295*75f6d617Schristos
4296*75f6d617Schristos /* Subroutines for `regex_compile'. */
4297*75f6d617Schristos
4298*75f6d617Schristos /* Store OP at LOC followed by two-byte integer parameter ARG. */
4299*75f6d617Schristos /* ifdef WCHAR, integer parameter is 1 wchar_t. */
4300*75f6d617Schristos
4301*75f6d617Schristos static void
4302*75f6d617Schristos PREFIX(store_op1) (op, loc, arg)
4303*75f6d617Schristos re_opcode_t op;
4304*75f6d617Schristos UCHAR_T *loc;
4305*75f6d617Schristos int arg;
4306*75f6d617Schristos {
4307*75f6d617Schristos *loc = (UCHAR_T) op;
4308*75f6d617Schristos STORE_NUMBER (loc + 1, arg);
4309*75f6d617Schristos }
4310*75f6d617Schristos
4311*75f6d617Schristos
4312*75f6d617Schristos /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
4313*75f6d617Schristos /* ifdef WCHAR, integer parameter is 1 wchar_t. */
4314*75f6d617Schristos
4315*75f6d617Schristos static void
4316*75f6d617Schristos PREFIX(store_op2) (op, loc, arg1, arg2)
4317*75f6d617Schristos re_opcode_t op;
4318*75f6d617Schristos UCHAR_T *loc;
4319*75f6d617Schristos int arg1, arg2;
4320*75f6d617Schristos {
4321*75f6d617Schristos *loc = (UCHAR_T) op;
4322*75f6d617Schristos STORE_NUMBER (loc + 1, arg1);
4323*75f6d617Schristos STORE_NUMBER (loc + 1 + OFFSET_ADDRESS_SIZE, arg2);
4324*75f6d617Schristos }
4325*75f6d617Schristos
4326*75f6d617Schristos
4327*75f6d617Schristos /* Copy the bytes from LOC to END to open up three bytes of space at LOC
4328*75f6d617Schristos for OP followed by two-byte integer parameter ARG. */
4329*75f6d617Schristos /* ifdef WCHAR, integer parameter is 1 wchar_t. */
4330*75f6d617Schristos
4331*75f6d617Schristos static void
4332*75f6d617Schristos PREFIX(insert_op1) (op, loc, arg, end)
4333*75f6d617Schristos re_opcode_t op;
4334*75f6d617Schristos UCHAR_T *loc;
4335*75f6d617Schristos int arg;
4336*75f6d617Schristos UCHAR_T *end;
4337*75f6d617Schristos {
4338*75f6d617Schristos register UCHAR_T *pfrom = end;
4339*75f6d617Schristos register UCHAR_T *pto = end + 1 + OFFSET_ADDRESS_SIZE;
4340*75f6d617Schristos
4341*75f6d617Schristos while (pfrom != loc)
4342*75f6d617Schristos *--pto = *--pfrom;
4343*75f6d617Schristos
4344*75f6d617Schristos PREFIX(store_op1) (op, loc, arg);
4345*75f6d617Schristos }
4346*75f6d617Schristos
4347*75f6d617Schristos
4348*75f6d617Schristos /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
4349*75f6d617Schristos /* ifdef WCHAR, integer parameter is 1 wchar_t. */
4350*75f6d617Schristos
4351*75f6d617Schristos static void
4352*75f6d617Schristos PREFIX(insert_op2) (op, loc, arg1, arg2, end)
4353*75f6d617Schristos re_opcode_t op;
4354*75f6d617Schristos UCHAR_T *loc;
4355*75f6d617Schristos int arg1, arg2;
4356*75f6d617Schristos UCHAR_T *end;
4357*75f6d617Schristos {
4358*75f6d617Schristos register UCHAR_T *pfrom = end;
4359*75f6d617Schristos register UCHAR_T *pto = end + 1 + 2 * OFFSET_ADDRESS_SIZE;
4360*75f6d617Schristos
4361*75f6d617Schristos while (pfrom != loc)
4362*75f6d617Schristos *--pto = *--pfrom;
4363*75f6d617Schristos
4364*75f6d617Schristos PREFIX(store_op2) (op, loc, arg1, arg2);
4365*75f6d617Schristos }
4366*75f6d617Schristos
4367*75f6d617Schristos
4368*75f6d617Schristos /* P points to just after a ^ in PATTERN. Return true if that ^ comes
4369*75f6d617Schristos after an alternative or a begin-subexpression. We assume there is at
4370*75f6d617Schristos least one character before the ^. */
4371*75f6d617Schristos
4372*75f6d617Schristos static boolean
4373*75f6d617Schristos PREFIX(at_begline_loc_p) (pattern, p, syntax)
4374*75f6d617Schristos const CHAR_T *pattern, *p;
4375*75f6d617Schristos reg_syntax_t syntax;
4376*75f6d617Schristos {
4377*75f6d617Schristos const CHAR_T *prev = p - 2;
4378*75f6d617Schristos boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
4379*75f6d617Schristos
4380*75f6d617Schristos return
4381*75f6d617Schristos /* After a subexpression? */
4382*75f6d617Schristos (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
4383*75f6d617Schristos /* After an alternative? */
4384*75f6d617Schristos || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
4385*75f6d617Schristos }
4386*75f6d617Schristos
4387*75f6d617Schristos
4388*75f6d617Schristos /* The dual of at_begline_loc_p. This one is for $. We assume there is
4389*75f6d617Schristos at least one character after the $, i.e., `P < PEND'. */
4390*75f6d617Schristos
4391*75f6d617Schristos static boolean
4392*75f6d617Schristos PREFIX(at_endline_loc_p) (p, pend, syntax)
4393*75f6d617Schristos const CHAR_T *p, *pend;
4394*75f6d617Schristos reg_syntax_t syntax;
4395*75f6d617Schristos {
4396*75f6d617Schristos const CHAR_T *next = p;
4397*75f6d617Schristos boolean next_backslash = *next == '\\';
4398*75f6d617Schristos const CHAR_T *next_next = p + 1 < pend ? p + 1 : 0;
4399*75f6d617Schristos
4400*75f6d617Schristos return
4401*75f6d617Schristos /* Before a subexpression? */
4402*75f6d617Schristos (syntax & RE_NO_BK_PARENS ? *next == ')'
4403*75f6d617Schristos : next_backslash && next_next && *next_next == ')')
4404*75f6d617Schristos /* Before an alternative? */
4405*75f6d617Schristos || (syntax & RE_NO_BK_VBAR ? *next == '|'
4406*75f6d617Schristos : next_backslash && next_next && *next_next == '|');
4407*75f6d617Schristos }
4408*75f6d617Schristos
4409*75f6d617Schristos #else /* not INSIDE_RECURSION */
4410*75f6d617Schristos
4411*75f6d617Schristos /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
4412*75f6d617Schristos false if it's not. */
4413*75f6d617Schristos
4414*75f6d617Schristos static boolean
group_in_compile_stack(compile_stack,regnum)4415*75f6d617Schristos group_in_compile_stack (compile_stack, regnum)
4416*75f6d617Schristos compile_stack_type compile_stack;
4417*75f6d617Schristos regnum_t regnum;
4418*75f6d617Schristos {
4419*75f6d617Schristos int this_element;
4420*75f6d617Schristos
4421*75f6d617Schristos for (this_element = compile_stack.avail - 1;
4422*75f6d617Schristos this_element >= 0;
4423*75f6d617Schristos this_element--)
4424*75f6d617Schristos if (compile_stack.stack[this_element].regnum == regnum)
4425*75f6d617Schristos return true;
4426*75f6d617Schristos
4427*75f6d617Schristos return false;
4428*75f6d617Schristos }
4429*75f6d617Schristos #endif /* not INSIDE_RECURSION */
4430*75f6d617Schristos
4431*75f6d617Schristos #ifdef INSIDE_RECURSION
4432*75f6d617Schristos
4433*75f6d617Schristos #ifdef WCHAR
4434*75f6d617Schristos /* This insert space, which size is "num", into the pattern at "loc".
4435*75f6d617Schristos "end" must point the end of the allocated buffer. */
4436*75f6d617Schristos static void
insert_space(num,loc,end)4437*75f6d617Schristos insert_space (num, loc, end)
4438*75f6d617Schristos int num;
4439*75f6d617Schristos CHAR_T *loc;
4440*75f6d617Schristos CHAR_T *end;
4441*75f6d617Schristos {
4442*75f6d617Schristos register CHAR_T *pto = end;
4443*75f6d617Schristos register CHAR_T *pfrom = end - num;
4444*75f6d617Schristos
4445*75f6d617Schristos while (pfrom >= loc)
4446*75f6d617Schristos *pto-- = *pfrom--;
4447*75f6d617Schristos }
4448*75f6d617Schristos #endif /* WCHAR */
4449*75f6d617Schristos
4450*75f6d617Schristos #ifdef WCHAR
4451*75f6d617Schristos static reg_errcode_t
wcs_compile_range(range_start_char,p_ptr,pend,translate,syntax,b,char_set)4452*75f6d617Schristos wcs_compile_range (range_start_char, p_ptr, pend, translate, syntax, b,
4453*75f6d617Schristos char_set)
4454*75f6d617Schristos CHAR_T range_start_char;
4455*75f6d617Schristos const CHAR_T **p_ptr, *pend;
4456*75f6d617Schristos CHAR_T *char_set, *b;
4457*75f6d617Schristos RE_TRANSLATE_TYPE translate;
4458*75f6d617Schristos reg_syntax_t syntax;
4459*75f6d617Schristos {
4460*75f6d617Schristos const CHAR_T *p = *p_ptr;
4461*75f6d617Schristos CHAR_T range_start, range_end;
4462*75f6d617Schristos reg_errcode_t ret;
4463*75f6d617Schristos # ifdef _LIBC
4464*75f6d617Schristos uint32_t nrules;
4465*75f6d617Schristos uint32_t start_val, end_val;
4466*75f6d617Schristos # endif
4467*75f6d617Schristos if (p == pend)
4468*75f6d617Schristos return REG_ERANGE;
4469*75f6d617Schristos
4470*75f6d617Schristos # ifdef _LIBC
4471*75f6d617Schristos nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
4472*75f6d617Schristos if (nrules != 0)
4473*75f6d617Schristos {
4474*75f6d617Schristos const char *collseq = (const char *) _NL_CURRENT(LC_COLLATE,
4475*75f6d617Schristos _NL_COLLATE_COLLSEQWC);
4476*75f6d617Schristos const unsigned char *extra = (const unsigned char *)
4477*75f6d617Schristos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
4478*75f6d617Schristos
4479*75f6d617Schristos if (range_start_char < -1)
4480*75f6d617Schristos {
4481*75f6d617Schristos /* range_start is a collating symbol. */
4482*75f6d617Schristos int32_t *wextra;
4483*75f6d617Schristos /* Retreive the index and get collation sequence value. */
4484*75f6d617Schristos wextra = (int32_t*)(extra + char_set[-range_start_char]);
4485*75f6d617Schristos start_val = wextra[1 + *wextra];
4486*75f6d617Schristos }
4487*75f6d617Schristos else
4488*75f6d617Schristos start_val = collseq_table_lookup(collseq, TRANSLATE(range_start_char));
4489*75f6d617Schristos
4490*75f6d617Schristos end_val = collseq_table_lookup (collseq, TRANSLATE (p[0]));
4491*75f6d617Schristos
4492*75f6d617Schristos /* Report an error if the range is empty and the syntax prohibits
4493*75f6d617Schristos this. */
4494*75f6d617Schristos ret = ((syntax & RE_NO_EMPTY_RANGES)
4495*75f6d617Schristos && (start_val > end_val))? REG_ERANGE : REG_NOERROR;
4496*75f6d617Schristos
4497*75f6d617Schristos /* Insert space to the end of the char_ranges. */
4498*75f6d617Schristos insert_space(2, b - char_set[5] - 2, b - 1);
4499*75f6d617Schristos *(b - char_set[5] - 2) = (wchar_t)start_val;
4500*75f6d617Schristos *(b - char_set[5] - 1) = (wchar_t)end_val;
4501*75f6d617Schristos char_set[4]++; /* ranges_index */
4502*75f6d617Schristos }
4503*75f6d617Schristos else
4504*75f6d617Schristos # endif
4505*75f6d617Schristos {
4506*75f6d617Schristos range_start = (range_start_char >= 0)? TRANSLATE (range_start_char):
4507*75f6d617Schristos range_start_char;
4508*75f6d617Schristos range_end = TRANSLATE (p[0]);
4509*75f6d617Schristos /* Report an error if the range is empty and the syntax prohibits
4510*75f6d617Schristos this. */
4511*75f6d617Schristos ret = ((syntax & RE_NO_EMPTY_RANGES)
4512*75f6d617Schristos && (range_start > range_end))? REG_ERANGE : REG_NOERROR;
4513*75f6d617Schristos
4514*75f6d617Schristos /* Insert space to the end of the char_ranges. */
4515*75f6d617Schristos insert_space(2, b - char_set[5] - 2, b - 1);
4516*75f6d617Schristos *(b - char_set[5] - 2) = range_start;
4517*75f6d617Schristos *(b - char_set[5] - 1) = range_end;
4518*75f6d617Schristos char_set[4]++; /* ranges_index */
4519*75f6d617Schristos }
4520*75f6d617Schristos /* Have to increment the pointer into the pattern string, so the
4521*75f6d617Schristos caller isn't still at the ending character. */
4522*75f6d617Schristos (*p_ptr)++;
4523*75f6d617Schristos
4524*75f6d617Schristos return ret;
4525*75f6d617Schristos }
4526*75f6d617Schristos #else /* BYTE */
4527*75f6d617Schristos /* Read the ending character of a range (in a bracket expression) from the
4528*75f6d617Schristos uncompiled pattern *P_PTR (which ends at PEND). We assume the
4529*75f6d617Schristos starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
4530*75f6d617Schristos Then we set the translation of all bits between the starting and
4531*75f6d617Schristos ending characters (inclusive) in the compiled pattern B.
4532*75f6d617Schristos
4533*75f6d617Schristos Return an error code.
4534*75f6d617Schristos
4535*75f6d617Schristos We use these short variable names so we can use the same macros as
4536*75f6d617Schristos `regex_compile' itself. */
4537*75f6d617Schristos
4538*75f6d617Schristos static reg_errcode_t
byte_compile_range(range_start_char,p_ptr,pend,translate,syntax,b)4539*75f6d617Schristos byte_compile_range (range_start_char, p_ptr, pend, translate, syntax, b)
4540*75f6d617Schristos unsigned int range_start_char;
4541*75f6d617Schristos const char **p_ptr, *pend;
4542*75f6d617Schristos RE_TRANSLATE_TYPE translate;
4543*75f6d617Schristos reg_syntax_t syntax;
4544*75f6d617Schristos unsigned char *b;
4545*75f6d617Schristos {
4546*75f6d617Schristos unsigned this_char;
4547*75f6d617Schristos const char *p = *p_ptr;
4548*75f6d617Schristos reg_errcode_t ret;
4549*75f6d617Schristos # if _LIBC
4550*75f6d617Schristos const unsigned char *collseq;
4551*75f6d617Schristos unsigned int start_colseq;
4552*75f6d617Schristos unsigned int end_colseq;
4553*75f6d617Schristos # else
4554*75f6d617Schristos unsigned end_char;
4555*75f6d617Schristos # endif
4556*75f6d617Schristos
4557*75f6d617Schristos if (p == pend)
4558*75f6d617Schristos return REG_ERANGE;
4559*75f6d617Schristos
4560*75f6d617Schristos /* Have to increment the pointer into the pattern string, so the
4561*75f6d617Schristos caller isn't still at the ending character. */
4562*75f6d617Schristos (*p_ptr)++;
4563*75f6d617Schristos
4564*75f6d617Schristos /* Report an error if the range is empty and the syntax prohibits this. */
4565*75f6d617Schristos ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
4566*75f6d617Schristos
4567*75f6d617Schristos # if _LIBC
4568*75f6d617Schristos collseq = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
4569*75f6d617Schristos _NL_COLLATE_COLLSEQMB);
4570*75f6d617Schristos
4571*75f6d617Schristos start_colseq = collseq[(unsigned char) TRANSLATE (range_start_char)];
4572*75f6d617Schristos end_colseq = collseq[(unsigned char) TRANSLATE (p[0])];
4573*75f6d617Schristos for (this_char = 0; this_char <= (unsigned char) -1; ++this_char)
4574*75f6d617Schristos {
4575*75f6d617Schristos unsigned int this_colseq = collseq[(unsigned char) TRANSLATE (this_char)];
4576*75f6d617Schristos
4577*75f6d617Schristos if (start_colseq <= this_colseq && this_colseq <= end_colseq)
4578*75f6d617Schristos {
4579*75f6d617Schristos SET_LIST_BIT (TRANSLATE (this_char));
4580*75f6d617Schristos ret = REG_NOERROR;
4581*75f6d617Schristos }
4582*75f6d617Schristos }
4583*75f6d617Schristos # else
4584*75f6d617Schristos /* Here we see why `this_char' has to be larger than an `unsigned
4585*75f6d617Schristos char' -- we would otherwise go into an infinite loop, since all
4586*75f6d617Schristos characters <= 0xff. */
4587*75f6d617Schristos range_start_char = TRANSLATE (range_start_char);
4588*75f6d617Schristos /* TRANSLATE(p[0]) is casted to char (not unsigned char) in TRANSLATE,
4589*75f6d617Schristos and some compilers cast it to int implicitly, so following for_loop
4590*75f6d617Schristos may fall to (almost) infinite loop.
4591*75f6d617Schristos e.g. If translate[p[0]] = 0xff, end_char may equals to 0xffffffff.
4592*75f6d617Schristos To avoid this, we cast p[0] to unsigned int and truncate it. */
4593*75f6d617Schristos end_char = ((unsigned)TRANSLATE(p[0]) & ((1 << BYTEWIDTH) - 1));
4594*75f6d617Schristos
4595*75f6d617Schristos for (this_char = range_start_char; this_char <= end_char; ++this_char)
4596*75f6d617Schristos {
4597*75f6d617Schristos SET_LIST_BIT (TRANSLATE (this_char));
4598*75f6d617Schristos ret = REG_NOERROR;
4599*75f6d617Schristos }
4600*75f6d617Schristos # endif
4601*75f6d617Schristos
4602*75f6d617Schristos return ret;
4603*75f6d617Schristos }
4604*75f6d617Schristos #endif /* WCHAR */
4605*75f6d617Schristos
4606*75f6d617Schristos /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
4607*75f6d617Schristos BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
4608*75f6d617Schristos characters can start a string that matches the pattern. This fastmap
4609*75f6d617Schristos is used by re_search to skip quickly over impossible starting points.
4610*75f6d617Schristos
4611*75f6d617Schristos The caller must supply the address of a (1 << BYTEWIDTH)-byte data
4612*75f6d617Schristos area as BUFP->fastmap.
4613*75f6d617Schristos
4614*75f6d617Schristos We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
4615*75f6d617Schristos the pattern buffer.
4616*75f6d617Schristos
4617*75f6d617Schristos Returns 0 if we succeed, -2 if an internal error. */
4618*75f6d617Schristos
4619*75f6d617Schristos #ifdef WCHAR
4620*75f6d617Schristos /* local function for re_compile_fastmap.
4621*75f6d617Schristos truncate wchar_t character to char. */
4622*75f6d617Schristos static unsigned char truncate_wchar (CHAR_T c);
4623*75f6d617Schristos
4624*75f6d617Schristos static unsigned char
truncate_wchar(c)4625*75f6d617Schristos truncate_wchar (c)
4626*75f6d617Schristos CHAR_T c;
4627*75f6d617Schristos {
4628*75f6d617Schristos unsigned char buf[MB_CUR_MAX];
4629*75f6d617Schristos mbstate_t state;
4630*75f6d617Schristos int retval;
4631*75f6d617Schristos memset (&state, '\0', sizeof (state));
4632*75f6d617Schristos retval = wcrtomb (buf, c, &state);
4633*75f6d617Schristos return retval > 0 ? buf[0] : (unsigned char) c;
4634*75f6d617Schristos }
4635*75f6d617Schristos #endif /* WCHAR */
4636*75f6d617Schristos
4637*75f6d617Schristos static int
4638*75f6d617Schristos PREFIX(re_compile_fastmap) (bufp)
4639*75f6d617Schristos struct re_pattern_buffer *bufp;
4640*75f6d617Schristos {
4641*75f6d617Schristos int j, k;
4642*75f6d617Schristos #ifdef MATCH_MAY_ALLOCATE
4643*75f6d617Schristos PREFIX(fail_stack_type) fail_stack;
4644*75f6d617Schristos #endif
4645*75f6d617Schristos #ifndef REGEX_MALLOC
4646*75f6d617Schristos char *destination;
4647*75f6d617Schristos #endif
4648*75f6d617Schristos
4649*75f6d617Schristos register char *fastmap = bufp->fastmap;
4650*75f6d617Schristos
4651*75f6d617Schristos #ifdef WCHAR
4652*75f6d617Schristos /* We need to cast pattern to (wchar_t*), because we casted this compiled
4653*75f6d617Schristos pattern to (char*) in regex_compile. */
4654*75f6d617Schristos UCHAR_T *pattern = (UCHAR_T*)bufp->buffer;
4655*75f6d617Schristos register UCHAR_T *pend = (UCHAR_T*) (bufp->buffer + bufp->used);
4656*75f6d617Schristos #else /* BYTE */
4657*75f6d617Schristos UCHAR_T *pattern = bufp->buffer;
4658*75f6d617Schristos register UCHAR_T *pend = pattern + bufp->used;
4659*75f6d617Schristos #endif /* WCHAR */
4660*75f6d617Schristos UCHAR_T *p = pattern;
4661*75f6d617Schristos
4662*75f6d617Schristos #ifdef REL_ALLOC
4663*75f6d617Schristos /* This holds the pointer to the failure stack, when
4664*75f6d617Schristos it is allocated relocatably. */
4665*75f6d617Schristos fail_stack_elt_t *failure_stack_ptr;
4666*75f6d617Schristos #endif
4667*75f6d617Schristos
4668*75f6d617Schristos /* Assume that each path through the pattern can be null until
4669*75f6d617Schristos proven otherwise. We set this false at the bottom of switch
4670*75f6d617Schristos statement, to which we get only if a particular path doesn't
4671*75f6d617Schristos match the empty string. */
4672*75f6d617Schristos boolean path_can_be_null = true;
4673*75f6d617Schristos
4674*75f6d617Schristos /* We aren't doing a `succeed_n' to begin with. */
4675*75f6d617Schristos boolean succeed_n_p = false;
4676*75f6d617Schristos
4677*75f6d617Schristos assert (fastmap != NULL && p != NULL);
4678*75f6d617Schristos
4679*75f6d617Schristos INIT_FAIL_STACK ();
4680*75f6d617Schristos bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
4681*75f6d617Schristos bufp->fastmap_accurate = 1; /* It will be when we're done. */
4682*75f6d617Schristos bufp->can_be_null = 0;
4683*75f6d617Schristos
4684*75f6d617Schristos while (1)
4685*75f6d617Schristos {
4686*75f6d617Schristos if (p == pend || *p == succeed)
4687*75f6d617Schristos {
4688*75f6d617Schristos /* We have reached the (effective) end of pattern. */
4689*75f6d617Schristos if (!FAIL_STACK_EMPTY ())
4690*75f6d617Schristos {
4691*75f6d617Schristos bufp->can_be_null |= path_can_be_null;
4692*75f6d617Schristos
4693*75f6d617Schristos /* Reset for next path. */
4694*75f6d617Schristos path_can_be_null = true;
4695*75f6d617Schristos
4696*75f6d617Schristos p = fail_stack.stack[--fail_stack.avail].pointer;
4697*75f6d617Schristos
4698*75f6d617Schristos continue;
4699*75f6d617Schristos }
4700*75f6d617Schristos else
4701*75f6d617Schristos break;
4702*75f6d617Schristos }
4703*75f6d617Schristos
4704*75f6d617Schristos /* We should never be about to go beyond the end of the pattern. */
4705*75f6d617Schristos assert (p < pend);
4706*75f6d617Schristos
4707*75f6d617Schristos switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4708*75f6d617Schristos {
4709*75f6d617Schristos
4710*75f6d617Schristos /* I guess the idea here is to simply not bother with a fastmap
4711*75f6d617Schristos if a backreference is used, since it's too hard to figure out
4712*75f6d617Schristos the fastmap for the corresponding group. Setting
4713*75f6d617Schristos `can_be_null' stops `re_search_2' from using the fastmap, so
4714*75f6d617Schristos that is all we do. */
4715*75f6d617Schristos case duplicate:
4716*75f6d617Schristos bufp->can_be_null = 1;
4717*75f6d617Schristos goto done;
4718*75f6d617Schristos
4719*75f6d617Schristos
4720*75f6d617Schristos /* Following are the cases which match a character. These end
4721*75f6d617Schristos with `break'. */
4722*75f6d617Schristos
4723*75f6d617Schristos #ifdef WCHAR
4724*75f6d617Schristos case exactn:
4725*75f6d617Schristos fastmap[truncate_wchar(p[1])] = 1;
4726*75f6d617Schristos break;
4727*75f6d617Schristos #else /* BYTE */
4728*75f6d617Schristos case exactn:
4729*75f6d617Schristos fastmap[p[1]] = 1;
4730*75f6d617Schristos break;
4731*75f6d617Schristos #endif /* WCHAR */
4732*75f6d617Schristos #ifdef MBS_SUPPORT
4733*75f6d617Schristos case exactn_bin:
4734*75f6d617Schristos fastmap[p[1]] = 1;
4735*75f6d617Schristos break;
4736*75f6d617Schristos #endif
4737*75f6d617Schristos
4738*75f6d617Schristos #ifdef WCHAR
4739*75f6d617Schristos /* It is hard to distinguish fastmap from (multi byte) characters
4740*75f6d617Schristos which depends on current locale. */
4741*75f6d617Schristos case charset:
4742*75f6d617Schristos case charset_not:
4743*75f6d617Schristos case wordchar:
4744*75f6d617Schristos case notwordchar:
4745*75f6d617Schristos bufp->can_be_null = 1;
4746*75f6d617Schristos goto done;
4747*75f6d617Schristos #else /* BYTE */
4748*75f6d617Schristos case charset:
4749*75f6d617Schristos for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
4750*75f6d617Schristos if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
4751*75f6d617Schristos fastmap[j] = 1;
4752*75f6d617Schristos break;
4753*75f6d617Schristos
4754*75f6d617Schristos
4755*75f6d617Schristos case charset_not:
4756*75f6d617Schristos /* Chars beyond end of map must be allowed. */
4757*75f6d617Schristos for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
4758*75f6d617Schristos fastmap[j] = 1;
4759*75f6d617Schristos
4760*75f6d617Schristos for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
4761*75f6d617Schristos if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
4762*75f6d617Schristos fastmap[j] = 1;
4763*75f6d617Schristos break;
4764*75f6d617Schristos
4765*75f6d617Schristos
4766*75f6d617Schristos case wordchar:
4767*75f6d617Schristos for (j = 0; j < (1 << BYTEWIDTH); j++)
4768*75f6d617Schristos if (SYNTAX (j) == Sword)
4769*75f6d617Schristos fastmap[j] = 1;
4770*75f6d617Schristos break;
4771*75f6d617Schristos
4772*75f6d617Schristos
4773*75f6d617Schristos case notwordchar:
4774*75f6d617Schristos for (j = 0; j < (1 << BYTEWIDTH); j++)
4775*75f6d617Schristos if (SYNTAX (j) != Sword)
4776*75f6d617Schristos fastmap[j] = 1;
4777*75f6d617Schristos break;
4778*75f6d617Schristos #endif /* WCHAR */
4779*75f6d617Schristos
4780*75f6d617Schristos case anychar:
4781*75f6d617Schristos {
4782*75f6d617Schristos int fastmap_newline = fastmap['\n'];
4783*75f6d617Schristos
4784*75f6d617Schristos /* `.' matches anything ... */
4785*75f6d617Schristos for (j = 0; j < (1 << BYTEWIDTH); j++)
4786*75f6d617Schristos fastmap[j] = 1;
4787*75f6d617Schristos
4788*75f6d617Schristos /* ... except perhaps newline. */
4789*75f6d617Schristos if (!(bufp->syntax & RE_DOT_NEWLINE))
4790*75f6d617Schristos fastmap['\n'] = fastmap_newline;
4791*75f6d617Schristos
4792*75f6d617Schristos /* Return if we have already set `can_be_null'; if we have,
4793*75f6d617Schristos then the fastmap is irrelevant. Something's wrong here. */
4794*75f6d617Schristos else if (bufp->can_be_null)
4795*75f6d617Schristos goto done;
4796*75f6d617Schristos
4797*75f6d617Schristos /* Otherwise, have to check alternative paths. */
4798*75f6d617Schristos break;
4799*75f6d617Schristos }
4800*75f6d617Schristos
4801*75f6d617Schristos #ifdef emacs
4802*75f6d617Schristos case syntaxspec:
4803*75f6d617Schristos k = *p++;
4804*75f6d617Schristos for (j = 0; j < (1 << BYTEWIDTH); j++)
4805*75f6d617Schristos if (SYNTAX (j) == (enum syntaxcode) k)
4806*75f6d617Schristos fastmap[j] = 1;
4807*75f6d617Schristos break;
4808*75f6d617Schristos
4809*75f6d617Schristos
4810*75f6d617Schristos case notsyntaxspec:
4811*75f6d617Schristos k = *p++;
4812*75f6d617Schristos for (j = 0; j < (1 << BYTEWIDTH); j++)
4813*75f6d617Schristos if (SYNTAX (j) != (enum syntaxcode) k)
4814*75f6d617Schristos fastmap[j] = 1;
4815*75f6d617Schristos break;
4816*75f6d617Schristos
4817*75f6d617Schristos
4818*75f6d617Schristos /* All cases after this match the empty string. These end with
4819*75f6d617Schristos `continue'. */
4820*75f6d617Schristos
4821*75f6d617Schristos
4822*75f6d617Schristos case before_dot:
4823*75f6d617Schristos case at_dot:
4824*75f6d617Schristos case after_dot:
4825*75f6d617Schristos continue;
4826*75f6d617Schristos #endif /* emacs */
4827*75f6d617Schristos
4828*75f6d617Schristos
4829*75f6d617Schristos case no_op:
4830*75f6d617Schristos case begline:
4831*75f6d617Schristos case endline:
4832*75f6d617Schristos case begbuf:
4833*75f6d617Schristos case endbuf:
4834*75f6d617Schristos case wordbound:
4835*75f6d617Schristos case notwordbound:
4836*75f6d617Schristos case wordbeg:
4837*75f6d617Schristos case wordend:
4838*75f6d617Schristos case push_dummy_failure:
4839*75f6d617Schristos continue;
4840*75f6d617Schristos
4841*75f6d617Schristos
4842*75f6d617Schristos case jump_n:
4843*75f6d617Schristos case pop_failure_jump:
4844*75f6d617Schristos case maybe_pop_jump:
4845*75f6d617Schristos case jump:
4846*75f6d617Schristos case jump_past_alt:
4847*75f6d617Schristos case dummy_failure_jump:
4848*75f6d617Schristos EXTRACT_NUMBER_AND_INCR (j, p);
4849*75f6d617Schristos p += j;
4850*75f6d617Schristos if (j > 0)
4851*75f6d617Schristos continue;
4852*75f6d617Schristos
4853*75f6d617Schristos /* Jump backward implies we just went through the body of a
4854*75f6d617Schristos loop and matched nothing. Opcode jumped to should be
4855*75f6d617Schristos `on_failure_jump' or `succeed_n'. Just treat it like an
4856*75f6d617Schristos ordinary jump. For a * loop, it has pushed its failure
4857*75f6d617Schristos point already; if so, discard that as redundant. */
4858*75f6d617Schristos if ((re_opcode_t) *p != on_failure_jump
4859*75f6d617Schristos && (re_opcode_t) *p != succeed_n)
4860*75f6d617Schristos continue;
4861*75f6d617Schristos
4862*75f6d617Schristos p++;
4863*75f6d617Schristos EXTRACT_NUMBER_AND_INCR (j, p);
4864*75f6d617Schristos p += j;
4865*75f6d617Schristos
4866*75f6d617Schristos /* If what's on the stack is where we are now, pop it. */
4867*75f6d617Schristos if (!FAIL_STACK_EMPTY ()
4868*75f6d617Schristos && fail_stack.stack[fail_stack.avail - 1].pointer == p)
4869*75f6d617Schristos fail_stack.avail--;
4870*75f6d617Schristos
4871*75f6d617Schristos continue;
4872*75f6d617Schristos
4873*75f6d617Schristos
4874*75f6d617Schristos case on_failure_jump:
4875*75f6d617Schristos case on_failure_keep_string_jump:
4876*75f6d617Schristos handle_on_failure_jump:
4877*75f6d617Schristos EXTRACT_NUMBER_AND_INCR (j, p);
4878*75f6d617Schristos
4879*75f6d617Schristos /* For some patterns, e.g., `(a?)?', `p+j' here points to the
4880*75f6d617Schristos end of the pattern. We don't want to push such a point,
4881*75f6d617Schristos since when we restore it above, entering the switch will
4882*75f6d617Schristos increment `p' past the end of the pattern. We don't need
4883*75f6d617Schristos to push such a point since we obviously won't find any more
4884*75f6d617Schristos fastmap entries beyond `pend'. Such a pattern can match
4885*75f6d617Schristos the null string, though. */
4886*75f6d617Schristos if (p + j < pend)
4887*75f6d617Schristos {
4888*75f6d617Schristos if (!PUSH_PATTERN_OP (p + j, fail_stack))
4889*75f6d617Schristos {
4890*75f6d617Schristos RESET_FAIL_STACK ();
4891*75f6d617Schristos return -2;
4892*75f6d617Schristos }
4893*75f6d617Schristos }
4894*75f6d617Schristos else
4895*75f6d617Schristos bufp->can_be_null = 1;
4896*75f6d617Schristos
4897*75f6d617Schristos if (succeed_n_p)
4898*75f6d617Schristos {
4899*75f6d617Schristos EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
4900*75f6d617Schristos succeed_n_p = false;
4901*75f6d617Schristos }
4902*75f6d617Schristos
4903*75f6d617Schristos continue;
4904*75f6d617Schristos
4905*75f6d617Schristos
4906*75f6d617Schristos case succeed_n:
4907*75f6d617Schristos /* Get to the number of times to succeed. */
4908*75f6d617Schristos p += OFFSET_ADDRESS_SIZE;
4909*75f6d617Schristos
4910*75f6d617Schristos /* Increment p past the n for when k != 0. */
4911*75f6d617Schristos EXTRACT_NUMBER_AND_INCR (k, p);
4912*75f6d617Schristos if (k == 0)
4913*75f6d617Schristos {
4914*75f6d617Schristos p -= 2 * OFFSET_ADDRESS_SIZE;
4915*75f6d617Schristos succeed_n_p = true; /* Spaghetti code alert. */
4916*75f6d617Schristos goto handle_on_failure_jump;
4917*75f6d617Schristos }
4918*75f6d617Schristos continue;
4919*75f6d617Schristos
4920*75f6d617Schristos
4921*75f6d617Schristos case set_number_at:
4922*75f6d617Schristos p += 2 * OFFSET_ADDRESS_SIZE;
4923*75f6d617Schristos continue;
4924*75f6d617Schristos
4925*75f6d617Schristos
4926*75f6d617Schristos case start_memory:
4927*75f6d617Schristos case stop_memory:
4928*75f6d617Schristos p += 2;
4929*75f6d617Schristos continue;
4930*75f6d617Schristos
4931*75f6d617Schristos
4932*75f6d617Schristos default:
4933*75f6d617Schristos abort (); /* We have listed all the cases. */
4934*75f6d617Schristos } /* switch *p++ */
4935*75f6d617Schristos
4936*75f6d617Schristos /* Getting here means we have found the possible starting
4937*75f6d617Schristos characters for one path of the pattern -- and that the empty
4938*75f6d617Schristos string does not match. We need not follow this path further.
4939*75f6d617Schristos Instead, look at the next alternative (remembered on the
4940*75f6d617Schristos stack), or quit if no more. The test at the top of the loop
4941*75f6d617Schristos does these things. */
4942*75f6d617Schristos path_can_be_null = false;
4943*75f6d617Schristos p = pend;
4944*75f6d617Schristos } /* while p */
4945*75f6d617Schristos
4946*75f6d617Schristos /* Set `can_be_null' for the last path (also the first path, if the
4947*75f6d617Schristos pattern is empty). */
4948*75f6d617Schristos bufp->can_be_null |= path_can_be_null;
4949*75f6d617Schristos
4950*75f6d617Schristos done:
4951*75f6d617Schristos RESET_FAIL_STACK ();
4952*75f6d617Schristos return 0;
4953*75f6d617Schristos }
4954*75f6d617Schristos
4955*75f6d617Schristos #else /* not INSIDE_RECURSION */
4956*75f6d617Schristos
4957*75f6d617Schristos int
re_compile_fastmap(bufp)4958*75f6d617Schristos re_compile_fastmap (bufp)
4959*75f6d617Schristos struct re_pattern_buffer *bufp;
4960*75f6d617Schristos {
4961*75f6d617Schristos # ifdef MBS_SUPPORT
4962*75f6d617Schristos if (MB_CUR_MAX != 1)
4963*75f6d617Schristos return wcs_re_compile_fastmap(bufp);
4964*75f6d617Schristos else
4965*75f6d617Schristos # endif
4966*75f6d617Schristos return byte_re_compile_fastmap(bufp);
4967*75f6d617Schristos } /* re_compile_fastmap */
4968*75f6d617Schristos #ifdef _LIBC
4969*75f6d617Schristos weak_alias (__re_compile_fastmap, re_compile_fastmap)
4970*75f6d617Schristos #endif
4971*75f6d617Schristos
4972*75f6d617Schristos
4973*75f6d617Schristos /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
4974*75f6d617Schristos ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
4975*75f6d617Schristos this memory for recording register information. STARTS and ENDS
4976*75f6d617Schristos must be allocated using the malloc library routine, and must each
4977*75f6d617Schristos be at least NUM_REGS * sizeof (regoff_t) bytes long.
4978*75f6d617Schristos
4979*75f6d617Schristos If NUM_REGS == 0, then subsequent matches should allocate their own
4980*75f6d617Schristos register data.
4981*75f6d617Schristos
4982*75f6d617Schristos Unless this function is called, the first search or match using
4983*75f6d617Schristos PATTERN_BUFFER will allocate its own register data, without
4984*75f6d617Schristos freeing the old data. */
4985*75f6d617Schristos
4986*75f6d617Schristos void
4987*75f6d617Schristos re_set_registers (bufp, regs, num_regs, starts, ends)
4988*75f6d617Schristos struct re_pattern_buffer *bufp;
4989*75f6d617Schristos struct re_registers *regs;
4990*75f6d617Schristos unsigned num_regs;
4991*75f6d617Schristos regoff_t *starts, *ends;
4992*75f6d617Schristos {
4993*75f6d617Schristos if (num_regs)
4994*75f6d617Schristos {
4995*75f6d617Schristos bufp->regs_allocated = REGS_REALLOCATE;
4996*75f6d617Schristos regs->num_regs = num_regs;
4997*75f6d617Schristos regs->start = starts;
4998*75f6d617Schristos regs->end = ends;
4999*75f6d617Schristos }
5000*75f6d617Schristos else
5001*75f6d617Schristos {
5002*75f6d617Schristos bufp->regs_allocated = REGS_UNALLOCATED;
5003*75f6d617Schristos regs->num_regs = 0;
5004*75f6d617Schristos regs->start = regs->end = (regoff_t *) 0;
5005*75f6d617Schristos }
5006*75f6d617Schristos }
5007*75f6d617Schristos #ifdef _LIBC
5008*75f6d617Schristos weak_alias (__re_set_registers, re_set_registers)
5009*75f6d617Schristos #endif
5010*75f6d617Schristos
5011*75f6d617Schristos /* Searching routines. */
5012*75f6d617Schristos
5013*75f6d617Schristos /* Like re_search_2, below, but only one string is specified, and
5014*75f6d617Schristos doesn't let you say where to stop matching. */
5015*75f6d617Schristos
5016*75f6d617Schristos int
5017*75f6d617Schristos re_search (bufp, string, size, startpos, range, regs)
5018*75f6d617Schristos struct re_pattern_buffer *bufp;
5019*75f6d617Schristos const char *string;
5020*75f6d617Schristos int size, startpos, range;
5021*75f6d617Schristos struct re_registers *regs;
5022*75f6d617Schristos {
5023*75f6d617Schristos return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
5024*75f6d617Schristos regs, size);
5025*75f6d617Schristos }
5026*75f6d617Schristos #ifdef _LIBC
5027*75f6d617Schristos weak_alias (__re_search, re_search)
5028*75f6d617Schristos #endif
5029*75f6d617Schristos
5030*75f6d617Schristos
5031*75f6d617Schristos /* Using the compiled pattern in BUFP->buffer, first tries to match the
5032*75f6d617Schristos virtual concatenation of STRING1 and STRING2, starting first at index
5033*75f6d617Schristos STARTPOS, then at STARTPOS + 1, and so on.
5034*75f6d617Schristos
5035*75f6d617Schristos STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
5036*75f6d617Schristos
5037*75f6d617Schristos RANGE is how far to scan while trying to match. RANGE = 0 means try
5038*75f6d617Schristos only at STARTPOS; in general, the last start tried is STARTPOS +
5039*75f6d617Schristos RANGE.
5040*75f6d617Schristos
5041*75f6d617Schristos In REGS, return the indices of the virtual concatenation of STRING1
5042*75f6d617Schristos and STRING2 that matched the entire BUFP->buffer and its contained
5043*75f6d617Schristos subexpressions.
5044*75f6d617Schristos
5045*75f6d617Schristos Do not consider matching one past the index STOP in the virtual
5046*75f6d617Schristos concatenation of STRING1 and STRING2.
5047*75f6d617Schristos
5048*75f6d617Schristos We return either the position in the strings at which the match was
5049*75f6d617Schristos found, -1 if no match, or -2 if error (such as failure
5050*75f6d617Schristos stack overflow). */
5051*75f6d617Schristos
5052*75f6d617Schristos int
5053*75f6d617Schristos re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
5054*75f6d617Schristos struct re_pattern_buffer *bufp;
5055*75f6d617Schristos const char *string1, *string2;
5056*75f6d617Schristos int size1, size2;
5057*75f6d617Schristos int startpos;
5058*75f6d617Schristos int range;
5059*75f6d617Schristos struct re_registers *regs;
5060*75f6d617Schristos int stop;
5061*75f6d617Schristos {
5062*75f6d617Schristos # ifdef MBS_SUPPORT
5063*75f6d617Schristos if (MB_CUR_MAX != 1)
5064*75f6d617Schristos return wcs_re_search_2 (bufp, string1, size1, string2, size2, startpos,
5065*75f6d617Schristos range, regs, stop);
5066*75f6d617Schristos else
5067*75f6d617Schristos # endif
5068*75f6d617Schristos return byte_re_search_2 (bufp, string1, size1, string2, size2, startpos,
5069*75f6d617Schristos range, regs, stop);
5070*75f6d617Schristos } /* re_search_2 */
5071*75f6d617Schristos #ifdef _LIBC
5072*75f6d617Schristos weak_alias (__re_search_2, re_search_2)
5073*75f6d617Schristos #endif
5074*75f6d617Schristos
5075*75f6d617Schristos #endif /* not INSIDE_RECURSION */
5076*75f6d617Schristos
5077*75f6d617Schristos #ifdef INSIDE_RECURSION
5078*75f6d617Schristos
5079*75f6d617Schristos #ifdef MATCH_MAY_ALLOCATE
5080*75f6d617Schristos # define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
5081*75f6d617Schristos #else
5082*75f6d617Schristos # define FREE_VAR(var) if (var) free (var); var = NULL
5083*75f6d617Schristos #endif
5084*75f6d617Schristos
5085*75f6d617Schristos #ifdef WCHAR
5086*75f6d617Schristos # define MAX_ALLOCA_SIZE 2000
5087*75f6d617Schristos
5088*75f6d617Schristos # define FREE_WCS_BUFFERS() \
5089*75f6d617Schristos do { \
5090*75f6d617Schristos if (size1 > MAX_ALLOCA_SIZE) \
5091*75f6d617Schristos { \
5092*75f6d617Schristos free (wcs_string1); \
5093*75f6d617Schristos free (mbs_offset1); \
5094*75f6d617Schristos } \
5095*75f6d617Schristos else \
5096*75f6d617Schristos { \
5097*75f6d617Schristos FREE_VAR (wcs_string1); \
5098*75f6d617Schristos FREE_VAR (mbs_offset1); \
5099*75f6d617Schristos } \
5100*75f6d617Schristos if (size2 > MAX_ALLOCA_SIZE) \
5101*75f6d617Schristos { \
5102*75f6d617Schristos free (wcs_string2); \
5103*75f6d617Schristos free (mbs_offset2); \
5104*75f6d617Schristos } \
5105*75f6d617Schristos else \
5106*75f6d617Schristos { \
5107*75f6d617Schristos FREE_VAR (wcs_string2); \
5108*75f6d617Schristos FREE_VAR (mbs_offset2); \
5109*75f6d617Schristos } \
5110*75f6d617Schristos } while (0)
5111*75f6d617Schristos
5112*75f6d617Schristos #endif
5113*75f6d617Schristos
5114*75f6d617Schristos
5115*75f6d617Schristos static int
5116*75f6d617Schristos PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range,
5117*75f6d617Schristos regs, stop)
5118*75f6d617Schristos struct re_pattern_buffer *bufp;
5119*75f6d617Schristos const char *string1, *string2;
5120*75f6d617Schristos int size1, size2;
5121*75f6d617Schristos int startpos;
5122*75f6d617Schristos int range;
5123*75f6d617Schristos struct re_registers *regs;
5124*75f6d617Schristos int stop;
5125*75f6d617Schristos {
5126*75f6d617Schristos int val;
5127*75f6d617Schristos register char *fastmap = bufp->fastmap;
5128*75f6d617Schristos register RE_TRANSLATE_TYPE translate = bufp->translate;
5129*75f6d617Schristos int total_size = size1 + size2;
5130*75f6d617Schristos int endpos = startpos + range;
5131*75f6d617Schristos #ifdef WCHAR
5132*75f6d617Schristos /* We need wchar_t* buffers correspond to cstring1, cstring2. */
5133*75f6d617Schristos wchar_t *wcs_string1 = NULL, *wcs_string2 = NULL;
5134*75f6d617Schristos /* We need the size of wchar_t buffers correspond to csize1, csize2. */
5135*75f6d617Schristos int wcs_size1 = 0, wcs_size2 = 0;
5136*75f6d617Schristos /* offset buffer for optimizatoin. See convert_mbs_to_wc. */
5137*75f6d617Schristos int *mbs_offset1 = NULL, *mbs_offset2 = NULL;
5138*75f6d617Schristos /* They hold whether each wchar_t is binary data or not. */
5139*75f6d617Schristos char *is_binary = NULL;
5140*75f6d617Schristos #endif /* WCHAR */
5141*75f6d617Schristos
5142*75f6d617Schristos /* Check for out-of-range STARTPOS. */
5143*75f6d617Schristos if (startpos < 0 || startpos > total_size)
5144*75f6d617Schristos return -1;
5145*75f6d617Schristos
5146*75f6d617Schristos /* Fix up RANGE if it might eventually take us outside
5147*75f6d617Schristos the virtual concatenation of STRING1 and STRING2.
5148*75f6d617Schristos Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */
5149*75f6d617Schristos if (endpos < 0)
5150*75f6d617Schristos range = 0 - startpos;
5151*75f6d617Schristos else if (endpos > total_size)
5152*75f6d617Schristos range = total_size - startpos;
5153*75f6d617Schristos
5154*75f6d617Schristos /* If the search isn't to be a backwards one, don't waste time in a
5155*75f6d617Schristos search for a pattern that must be anchored. */
5156*75f6d617Schristos if (bufp->used > 0 && range > 0
5157*75f6d617Schristos && ((re_opcode_t) bufp->buffer[0] == begbuf
5158*75f6d617Schristos /* `begline' is like `begbuf' if it cannot match at newlines. */
5159*75f6d617Schristos || ((re_opcode_t) bufp->buffer[0] == begline
5160*75f6d617Schristos && !bufp->newline_anchor)))
5161*75f6d617Schristos {
5162*75f6d617Schristos if (startpos > 0)
5163*75f6d617Schristos return -1;
5164*75f6d617Schristos else
5165*75f6d617Schristos range = 1;
5166*75f6d617Schristos }
5167*75f6d617Schristos
5168*75f6d617Schristos #ifdef emacs
5169*75f6d617Schristos /* In a forward search for something that starts with \=.
5170*75f6d617Schristos don't keep searching past point. */
5171*75f6d617Schristos if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
5172*75f6d617Schristos {
5173*75f6d617Schristos range = PT - startpos;
5174*75f6d617Schristos if (range <= 0)
5175*75f6d617Schristos return -1;
5176*75f6d617Schristos }
5177*75f6d617Schristos #endif /* emacs */
5178*75f6d617Schristos
5179*75f6d617Schristos /* Update the fastmap now if not correct already. */
5180*75f6d617Schristos if (fastmap && !bufp->fastmap_accurate)
5181*75f6d617Schristos if (re_compile_fastmap (bufp) == -2)
5182*75f6d617Schristos return -2;
5183*75f6d617Schristos
5184*75f6d617Schristos #ifdef WCHAR
5185*75f6d617Schristos /* Allocate wchar_t array for wcs_string1 and wcs_string2 and
5186*75f6d617Schristos fill them with converted string. */
5187*75f6d617Schristos if (size1 != 0)
5188*75f6d617Schristos {
5189*75f6d617Schristos if (size1 > MAX_ALLOCA_SIZE)
5190*75f6d617Schristos {
5191*75f6d617Schristos wcs_string1 = TALLOC (size1 + 1, CHAR_T);
5192*75f6d617Schristos mbs_offset1 = TALLOC (size1 + 1, int);
5193*75f6d617Schristos is_binary = TALLOC (size1 + 1, char);
5194*75f6d617Schristos }
5195*75f6d617Schristos else
5196*75f6d617Schristos {
5197*75f6d617Schristos wcs_string1 = REGEX_TALLOC (size1 + 1, CHAR_T);
5198*75f6d617Schristos mbs_offset1 = REGEX_TALLOC (size1 + 1, int);
5199*75f6d617Schristos is_binary = REGEX_TALLOC (size1 + 1, char);
5200*75f6d617Schristos }
5201*75f6d617Schristos if (!wcs_string1 || !mbs_offset1 || !is_binary)
5202*75f6d617Schristos {
5203*75f6d617Schristos if (size1 > MAX_ALLOCA_SIZE)
5204*75f6d617Schristos {
5205*75f6d617Schristos free (wcs_string1);
5206*75f6d617Schristos free (mbs_offset1);
5207*75f6d617Schristos free (is_binary);
5208*75f6d617Schristos }
5209*75f6d617Schristos else
5210*75f6d617Schristos {
5211*75f6d617Schristos FREE_VAR (wcs_string1);
5212*75f6d617Schristos FREE_VAR (mbs_offset1);
5213*75f6d617Schristos FREE_VAR (is_binary);
5214*75f6d617Schristos }
5215*75f6d617Schristos return -2;
5216*75f6d617Schristos }
5217*75f6d617Schristos wcs_size1 = convert_mbs_to_wcs(wcs_string1, string1, size1,
5218*75f6d617Schristos mbs_offset1, is_binary);
5219*75f6d617Schristos wcs_string1[wcs_size1] = L'\0'; /* for a sentinel */
5220*75f6d617Schristos if (size1 > MAX_ALLOCA_SIZE)
5221*75f6d617Schristos free (is_binary);
5222*75f6d617Schristos else
5223*75f6d617Schristos FREE_VAR (is_binary);
5224*75f6d617Schristos }
5225*75f6d617Schristos if (size2 != 0)
5226*75f6d617Schristos {
5227*75f6d617Schristos if (size2 > MAX_ALLOCA_SIZE)
5228*75f6d617Schristos {
5229*75f6d617Schristos wcs_string2 = TALLOC (size2 + 1, CHAR_T);
5230*75f6d617Schristos mbs_offset2 = TALLOC (size2 + 1, int);
5231*75f6d617Schristos is_binary = TALLOC (size2 + 1, char);
5232*75f6d617Schristos }
5233*75f6d617Schristos else
5234*75f6d617Schristos {
5235*75f6d617Schristos wcs_string2 = REGEX_TALLOC (size2 + 1, CHAR_T);
5236*75f6d617Schristos mbs_offset2 = REGEX_TALLOC (size2 + 1, int);
5237*75f6d617Schristos is_binary = REGEX_TALLOC (size2 + 1, char);
5238*75f6d617Schristos }
5239*75f6d617Schristos if (!wcs_string2 || !mbs_offset2 || !is_binary)
5240*75f6d617Schristos {
5241*75f6d617Schristos FREE_WCS_BUFFERS ();
5242*75f6d617Schristos if (size2 > MAX_ALLOCA_SIZE)
5243*75f6d617Schristos free (is_binary);
5244*75f6d617Schristos else
5245*75f6d617Schristos FREE_VAR (is_binary);
5246*75f6d617Schristos return -2;
5247*75f6d617Schristos }
5248*75f6d617Schristos wcs_size2 = convert_mbs_to_wcs(wcs_string2, string2, size2,
5249*75f6d617Schristos mbs_offset2, is_binary);
5250*75f6d617Schristos wcs_string2[wcs_size2] = L'\0'; /* for a sentinel */
5251*75f6d617Schristos if (size2 > MAX_ALLOCA_SIZE)
5252*75f6d617Schristos free (is_binary);
5253*75f6d617Schristos else
5254*75f6d617Schristos FREE_VAR (is_binary);
5255*75f6d617Schristos }
5256*75f6d617Schristos #endif /* WCHAR */
5257*75f6d617Schristos
5258*75f6d617Schristos
5259*75f6d617Schristos /* Loop through the string, looking for a place to start matching. */
5260*75f6d617Schristos for (;;)
5261*75f6d617Schristos {
5262*75f6d617Schristos /* If a fastmap is supplied, skip quickly over characters that
5263*75f6d617Schristos cannot be the start of a match. If the pattern can match the
5264*75f6d617Schristos null string, however, we don't need to skip characters; we want
5265*75f6d617Schristos the first null string. */
5266*75f6d617Schristos if (fastmap && startpos < total_size && !bufp->can_be_null)
5267*75f6d617Schristos {
5268*75f6d617Schristos if (range > 0) /* Searching forwards. */
5269*75f6d617Schristos {
5270*75f6d617Schristos register const char *d;
5271*75f6d617Schristos register int lim = 0;
5272*75f6d617Schristos int irange = range;
5273*75f6d617Schristos
5274*75f6d617Schristos if (startpos < size1 && startpos + range >= size1)
5275*75f6d617Schristos lim = range - (size1 - startpos);
5276*75f6d617Schristos
5277*75f6d617Schristos d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
5278*75f6d617Schristos
5279*75f6d617Schristos /* Written out as an if-else to avoid testing `translate'
5280*75f6d617Schristos inside the loop. */
5281*75f6d617Schristos if (translate)
5282*75f6d617Schristos while (range > lim
5283*75f6d617Schristos && !fastmap[(unsigned char)
5284*75f6d617Schristos translate[(unsigned char) *d++]])
5285*75f6d617Schristos range--;
5286*75f6d617Schristos else
5287*75f6d617Schristos while (range > lim && !fastmap[(unsigned char) *d++])
5288*75f6d617Schristos range--;
5289*75f6d617Schristos
5290*75f6d617Schristos startpos += irange - range;
5291*75f6d617Schristos }
5292*75f6d617Schristos else /* Searching backwards. */
5293*75f6d617Schristos {
5294*75f6d617Schristos register CHAR_T c = (size1 == 0 || startpos >= size1
5295*75f6d617Schristos ? string2[startpos - size1]
5296*75f6d617Schristos : string1[startpos]);
5297*75f6d617Schristos
5298*75f6d617Schristos if (!fastmap[(unsigned char) TRANSLATE (c)])
5299*75f6d617Schristos goto advance;
5300*75f6d617Schristos }
5301*75f6d617Schristos }
5302*75f6d617Schristos
5303*75f6d617Schristos /* If can't match the null string, and that's all we have left, fail. */
5304*75f6d617Schristos if (range >= 0 && startpos == total_size && fastmap
5305*75f6d617Schristos && !bufp->can_be_null)
5306*75f6d617Schristos {
5307*75f6d617Schristos #ifdef WCHAR
5308*75f6d617Schristos FREE_WCS_BUFFERS ();
5309*75f6d617Schristos #endif
5310*75f6d617Schristos return -1;
5311*75f6d617Schristos }
5312*75f6d617Schristos
5313*75f6d617Schristos #ifdef WCHAR
5314*75f6d617Schristos val = wcs_re_match_2_internal (bufp, string1, size1, string2,
5315*75f6d617Schristos size2, startpos, regs, stop,
5316*75f6d617Schristos wcs_string1, wcs_size1,
5317*75f6d617Schristos wcs_string2, wcs_size2,
5318*75f6d617Schristos mbs_offset1, mbs_offset2);
5319*75f6d617Schristos #else /* BYTE */
5320*75f6d617Schristos val = byte_re_match_2_internal (bufp, string1, size1, string2,
5321*75f6d617Schristos size2, startpos, regs, stop);
5322*75f6d617Schristos #endif /* BYTE */
5323*75f6d617Schristos
5324*75f6d617Schristos #ifndef REGEX_MALLOC
5325*75f6d617Schristos # ifdef C_ALLOCA
5326*75f6d617Schristos alloca (0);
5327*75f6d617Schristos # endif
5328*75f6d617Schristos #endif
5329*75f6d617Schristos
5330*75f6d617Schristos if (val >= 0)
5331*75f6d617Schristos {
5332*75f6d617Schristos #ifdef WCHAR
5333*75f6d617Schristos FREE_WCS_BUFFERS ();
5334*75f6d617Schristos #endif
5335*75f6d617Schristos return startpos;
5336*75f6d617Schristos }
5337*75f6d617Schristos
5338*75f6d617Schristos if (val == -2)
5339*75f6d617Schristos {
5340*75f6d617Schristos #ifdef WCHAR
5341*75f6d617Schristos FREE_WCS_BUFFERS ();
5342*75f6d617Schristos #endif
5343*75f6d617Schristos return -2;
5344*75f6d617Schristos }
5345*75f6d617Schristos
5346*75f6d617Schristos advance:
5347*75f6d617Schristos if (!range)
5348*75f6d617Schristos break;
5349*75f6d617Schristos else if (range > 0)
5350*75f6d617Schristos {
5351*75f6d617Schristos range--;
5352*75f6d617Schristos startpos++;
5353*75f6d617Schristos }
5354*75f6d617Schristos else
5355*75f6d617Schristos {
5356*75f6d617Schristos range++;
5357*75f6d617Schristos startpos--;
5358*75f6d617Schristos }
5359*75f6d617Schristos }
5360*75f6d617Schristos #ifdef WCHAR
5361*75f6d617Schristos FREE_WCS_BUFFERS ();
5362*75f6d617Schristos #endif
5363*75f6d617Schristos return -1;
5364*75f6d617Schristos }
5365*75f6d617Schristos
5366*75f6d617Schristos #ifdef WCHAR
5367*75f6d617Schristos /* This converts PTR, a pointer into one of the search wchar_t strings
5368*75f6d617Schristos `string1' and `string2' into an multibyte string offset from the
5369*75f6d617Schristos beginning of that string. We use mbs_offset to optimize.
5370*75f6d617Schristos See convert_mbs_to_wcs. */
5371*75f6d617Schristos # define POINTER_TO_OFFSET(ptr) \
5372*75f6d617Schristos (FIRST_STRING_P (ptr) \
5373*75f6d617Schristos ? ((regoff_t)(mbs_offset1 != NULL? mbs_offset1[(ptr)-string1] : 0)) \
5374*75f6d617Schristos : ((regoff_t)((mbs_offset2 != NULL? mbs_offset2[(ptr)-string2] : 0) \
5375*75f6d617Schristos + csize1)))
5376*75f6d617Schristos #else /* BYTE */
5377*75f6d617Schristos /* This converts PTR, a pointer into one of the search strings `string1'
5378*75f6d617Schristos and `string2' into an offset from the beginning of that string. */
5379*75f6d617Schristos # define POINTER_TO_OFFSET(ptr) \
5380*75f6d617Schristos (FIRST_STRING_P (ptr) \
5381*75f6d617Schristos ? ((regoff_t) ((ptr) - string1)) \
5382*75f6d617Schristos : ((regoff_t) ((ptr) - string2 + size1)))
5383*75f6d617Schristos #endif /* WCHAR */
5384*75f6d617Schristos
5385*75f6d617Schristos /* Macros for dealing with the split strings in re_match_2. */
5386*75f6d617Schristos
5387*75f6d617Schristos #define MATCHING_IN_FIRST_STRING (dend == end_match_1)
5388*75f6d617Schristos
5389*75f6d617Schristos /* Call before fetching a character with *d. This switches over to
5390*75f6d617Schristos string2 if necessary. */
5391*75f6d617Schristos #define PREFETCH() \
5392*75f6d617Schristos while (d == dend) \
5393*75f6d617Schristos { \
5394*75f6d617Schristos /* End of string2 => fail. */ \
5395*75f6d617Schristos if (dend == end_match_2) \
5396*75f6d617Schristos goto fail; \
5397*75f6d617Schristos /* End of string1 => advance to string2. */ \
5398*75f6d617Schristos d = string2; \
5399*75f6d617Schristos dend = end_match_2; \
5400*75f6d617Schristos }
5401*75f6d617Schristos
5402*75f6d617Schristos /* Test if at very beginning or at very end of the virtual concatenation
5403*75f6d617Schristos of `string1' and `string2'. If only one string, it's `string2'. */
5404*75f6d617Schristos #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
5405*75f6d617Schristos #define AT_STRINGS_END(d) ((d) == end2)
5406*75f6d617Schristos
5407*75f6d617Schristos
5408*75f6d617Schristos /* Test if D points to a character which is word-constituent. We have
5409*75f6d617Schristos two special cases to check for: if past the end of string1, look at
5410*75f6d617Schristos the first character in string2; and if before the beginning of
5411*75f6d617Schristos string2, look at the last character in string1. */
5412*75f6d617Schristos #ifdef WCHAR
5413*75f6d617Schristos /* Use internationalized API instead of SYNTAX. */
5414*75f6d617Schristos # define WORDCHAR_P(d) \
5415*75f6d617Schristos (iswalnum ((wint_t)((d) == end1 ? *string2 \
5416*75f6d617Schristos : (d) == string2 - 1 ? *(end1 - 1) : *(d))) != 0 \
5417*75f6d617Schristos || ((d) == end1 ? *string2 \
5418*75f6d617Schristos : (d) == string2 - 1 ? *(end1 - 1) : *(d)) == L'_')
5419*75f6d617Schristos #else /* BYTE */
5420*75f6d617Schristos # define WORDCHAR_P(d) \
5421*75f6d617Schristos (SYNTAX ((d) == end1 ? *string2 \
5422*75f6d617Schristos : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
5423*75f6d617Schristos == Sword)
5424*75f6d617Schristos #endif /* WCHAR */
5425*75f6d617Schristos
5426*75f6d617Schristos /* Disabled due to a compiler bug -- see comment at case wordbound */
5427*75f6d617Schristos #if 0
5428*75f6d617Schristos /* Test if the character before D and the one at D differ with respect
5429*75f6d617Schristos to being word-constituent. */
5430*75f6d617Schristos #define AT_WORD_BOUNDARY(d) \
5431*75f6d617Schristos (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
5432*75f6d617Schristos || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
5433*75f6d617Schristos #endif
5434*75f6d617Schristos
5435*75f6d617Schristos /* Free everything we malloc. */
5436*75f6d617Schristos #ifdef MATCH_MAY_ALLOCATE
5437*75f6d617Schristos # ifdef WCHAR
5438*75f6d617Schristos # define FREE_VARIABLES() \
5439*75f6d617Schristos do { \
5440*75f6d617Schristos REGEX_FREE_STACK (fail_stack.stack); \
5441*75f6d617Schristos FREE_VAR (regstart); \
5442*75f6d617Schristos FREE_VAR (regend); \
5443*75f6d617Schristos FREE_VAR (old_regstart); \
5444*75f6d617Schristos FREE_VAR (old_regend); \
5445*75f6d617Schristos FREE_VAR (best_regstart); \
5446*75f6d617Schristos FREE_VAR (best_regend); \
5447*75f6d617Schristos FREE_VAR (reg_info); \
5448*75f6d617Schristos FREE_VAR (reg_dummy); \
5449*75f6d617Schristos FREE_VAR (reg_info_dummy); \
5450*75f6d617Schristos if (!cant_free_wcs_buf) \
5451*75f6d617Schristos { \
5452*75f6d617Schristos FREE_VAR (string1); \
5453*75f6d617Schristos FREE_VAR (string2); \
5454*75f6d617Schristos FREE_VAR (mbs_offset1); \
5455*75f6d617Schristos FREE_VAR (mbs_offset2); \
5456*75f6d617Schristos } \
5457*75f6d617Schristos } while (0)
5458*75f6d617Schristos # else /* BYTE */
5459*75f6d617Schristos # define FREE_VARIABLES() \
5460*75f6d617Schristos do { \
5461*75f6d617Schristos REGEX_FREE_STACK (fail_stack.stack); \
5462*75f6d617Schristos FREE_VAR (regstart); \
5463*75f6d617Schristos FREE_VAR (regend); \
5464*75f6d617Schristos FREE_VAR (old_regstart); \
5465*75f6d617Schristos FREE_VAR (old_regend); \
5466*75f6d617Schristos FREE_VAR (best_regstart); \
5467*75f6d617Schristos FREE_VAR (best_regend); \
5468*75f6d617Schristos FREE_VAR (reg_info); \
5469*75f6d617Schristos FREE_VAR (reg_dummy); \
5470*75f6d617Schristos FREE_VAR (reg_info_dummy); \
5471*75f6d617Schristos } while (0)
5472*75f6d617Schristos # endif /* WCHAR */
5473*75f6d617Schristos #else
5474*75f6d617Schristos # ifdef WCHAR
5475*75f6d617Schristos # define FREE_VARIABLES() \
5476*75f6d617Schristos do { \
5477*75f6d617Schristos if (!cant_free_wcs_buf) \
5478*75f6d617Schristos { \
5479*75f6d617Schristos FREE_VAR (string1); \
5480*75f6d617Schristos FREE_VAR (string2); \
5481*75f6d617Schristos FREE_VAR (mbs_offset1); \
5482*75f6d617Schristos FREE_VAR (mbs_offset2); \
5483*75f6d617Schristos } \
5484*75f6d617Schristos } while (0)
5485*75f6d617Schristos # else /* BYTE */
5486*75f6d617Schristos # define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
5487*75f6d617Schristos # endif /* WCHAR */
5488*75f6d617Schristos #endif /* not MATCH_MAY_ALLOCATE */
5489*75f6d617Schristos
5490*75f6d617Schristos /* These values must meet several constraints. They must not be valid
5491*75f6d617Schristos register values; since we have a limit of 255 registers (because
5492*75f6d617Schristos we use only one byte in the pattern for the register number), we can
5493*75f6d617Schristos use numbers larger than 255. They must differ by 1, because of
5494*75f6d617Schristos NUM_FAILURE_ITEMS above. And the value for the lowest register must
5495*75f6d617Schristos be larger than the value for the highest register, so we do not try
5496*75f6d617Schristos to actually save any registers when none are active. */
5497*75f6d617Schristos #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
5498*75f6d617Schristos #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
5499*75f6d617Schristos
5500*75f6d617Schristos #else /* not INSIDE_RECURSION */
5501*75f6d617Schristos /* Matching routines. */
5502*75f6d617Schristos
5503*75f6d617Schristos #ifndef emacs /* Emacs never uses this. */
5504*75f6d617Schristos /* re_match is like re_match_2 except it takes only a single string. */
5505*75f6d617Schristos
5506*75f6d617Schristos int
5507*75f6d617Schristos re_match (bufp, string, size, pos, regs)
5508*75f6d617Schristos struct re_pattern_buffer *bufp;
5509*75f6d617Schristos const char *string;
5510*75f6d617Schristos int size, pos;
5511*75f6d617Schristos struct re_registers *regs;
5512*75f6d617Schristos {
5513*75f6d617Schristos int result;
5514*75f6d617Schristos # ifdef MBS_SUPPORT
5515*75f6d617Schristos if (MB_CUR_MAX != 1)
5516*75f6d617Schristos result = wcs_re_match_2_internal (bufp, NULL, 0, string, size,
5517*75f6d617Schristos pos, regs, size,
5518*75f6d617Schristos NULL, 0, NULL, 0, NULL, NULL);
5519*75f6d617Schristos else
5520*75f6d617Schristos # endif
5521*75f6d617Schristos result = byte_re_match_2_internal (bufp, NULL, 0, string, size,
5522*75f6d617Schristos pos, regs, size);
5523*75f6d617Schristos # ifndef REGEX_MALLOC
5524*75f6d617Schristos # ifdef C_ALLOCA
5525*75f6d617Schristos alloca (0);
5526*75f6d617Schristos # endif
5527*75f6d617Schristos # endif
5528*75f6d617Schristos return result;
5529*75f6d617Schristos }
5530*75f6d617Schristos # ifdef _LIBC
5531*75f6d617Schristos weak_alias (__re_match, re_match)
5532*75f6d617Schristos # endif
5533*75f6d617Schristos #endif /* not emacs */
5534*75f6d617Schristos
5535*75f6d617Schristos #endif /* not INSIDE_RECURSION */
5536*75f6d617Schristos
5537*75f6d617Schristos #ifdef INSIDE_RECURSION
5538*75f6d617Schristos static boolean PREFIX(group_match_null_string_p) _RE_ARGS ((UCHAR_T **p,
5539*75f6d617Schristos UCHAR_T *end,
5540*75f6d617Schristos PREFIX(register_info_type) *reg_info));
5541*75f6d617Schristos static boolean PREFIX(alt_match_null_string_p) _RE_ARGS ((UCHAR_T *p,
5542*75f6d617Schristos UCHAR_T *end,
5543*75f6d617Schristos PREFIX(register_info_type) *reg_info));
5544*75f6d617Schristos static boolean PREFIX(common_op_match_null_string_p) _RE_ARGS ((UCHAR_T **p,
5545*75f6d617Schristos UCHAR_T *end,
5546*75f6d617Schristos PREFIX(register_info_type) *reg_info));
5547*75f6d617Schristos static int PREFIX(bcmp_translate) _RE_ARGS ((const CHAR_T *s1, const CHAR_T *s2,
5548*75f6d617Schristos int len, char *translate));
5549*75f6d617Schristos #else /* not INSIDE_RECURSION */
5550*75f6d617Schristos
5551*75f6d617Schristos /* re_match_2 matches the compiled pattern in BUFP against the
5552*75f6d617Schristos the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
5553*75f6d617Schristos and SIZE2, respectively). We start matching at POS, and stop
5554*75f6d617Schristos matching at STOP.
5555*75f6d617Schristos
5556*75f6d617Schristos If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
5557*75f6d617Schristos store offsets for the substring each group matched in REGS. See the
5558*75f6d617Schristos documentation for exactly how many groups we fill.
5559*75f6d617Schristos
5560*75f6d617Schristos We return -1 if no match, -2 if an internal error (such as the
5561*75f6d617Schristos failure stack overflowing). Otherwise, we return the length of the
5562*75f6d617Schristos matched substring. */
5563*75f6d617Schristos
5564*75f6d617Schristos int
re_match_2(bufp,string1,size1,string2,size2,pos,regs,stop)5565*75f6d617Schristos re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
5566*75f6d617Schristos struct re_pattern_buffer *bufp;
5567*75f6d617Schristos const char *string1, *string2;
5568*75f6d617Schristos int size1, size2;
5569*75f6d617Schristos int pos;
5570*75f6d617Schristos struct re_registers *regs;
5571*75f6d617Schristos int stop;
5572*75f6d617Schristos {
5573*75f6d617Schristos int result;
5574*75f6d617Schristos # ifdef MBS_SUPPORT
5575*75f6d617Schristos if (MB_CUR_MAX != 1)
5576*75f6d617Schristos result = wcs_re_match_2_internal (bufp, string1, size1, string2, size2,
5577*75f6d617Schristos pos, regs, stop,
5578*75f6d617Schristos NULL, 0, NULL, 0, NULL, NULL);
5579*75f6d617Schristos else
5580*75f6d617Schristos # endif
5581*75f6d617Schristos result = byte_re_match_2_internal (bufp, string1, size1, string2, size2,
5582*75f6d617Schristos pos, regs, stop);
5583*75f6d617Schristos
5584*75f6d617Schristos #ifndef REGEX_MALLOC
5585*75f6d617Schristos # ifdef C_ALLOCA
5586*75f6d617Schristos alloca (0);
5587*75f6d617Schristos # endif
5588*75f6d617Schristos #endif
5589*75f6d617Schristos return result;
5590*75f6d617Schristos }
5591*75f6d617Schristos #ifdef _LIBC
5592*75f6d617Schristos weak_alias (__re_match_2, re_match_2)
5593*75f6d617Schristos #endif
5594*75f6d617Schristos
5595*75f6d617Schristos #endif /* not INSIDE_RECURSION */
5596*75f6d617Schristos
5597*75f6d617Schristos #ifdef INSIDE_RECURSION
5598*75f6d617Schristos
5599*75f6d617Schristos #ifdef WCHAR
5600*75f6d617Schristos static int count_mbs_length PARAMS ((int *, int));
5601*75f6d617Schristos
5602*75f6d617Schristos /* This check the substring (from 0, to length) of the multibyte string,
5603*75f6d617Schristos to which offset_buffer correspond. And count how many wchar_t_characters
5604*75f6d617Schristos the substring occupy. We use offset_buffer to optimization.
5605*75f6d617Schristos See convert_mbs_to_wcs. */
5606*75f6d617Schristos
5607*75f6d617Schristos static int
count_mbs_length(offset_buffer,length)5608*75f6d617Schristos count_mbs_length(offset_buffer, length)
5609*75f6d617Schristos int *offset_buffer;
5610*75f6d617Schristos int length;
5611*75f6d617Schristos {
5612*75f6d617Schristos int upper, lower;
5613*75f6d617Schristos
5614*75f6d617Schristos /* Check whether the size is valid. */
5615*75f6d617Schristos if (length < 0)
5616*75f6d617Schristos return -1;
5617*75f6d617Schristos
5618*75f6d617Schristos if (offset_buffer == NULL)
5619*75f6d617Schristos return 0;
5620*75f6d617Schristos
5621*75f6d617Schristos /* If there are no multibyte character, offset_buffer[i] == i.
5622*75f6d617Schristos Optmize for this case. */
5623*75f6d617Schristos if (offset_buffer[length] == length)
5624*75f6d617Schristos return length;
5625*75f6d617Schristos
5626*75f6d617Schristos /* Set up upper with length. (because for all i, offset_buffer[i] >= i) */
5627*75f6d617Schristos upper = length;
5628*75f6d617Schristos lower = 0;
5629*75f6d617Schristos
5630*75f6d617Schristos while (true)
5631*75f6d617Schristos {
5632*75f6d617Schristos int middle = (lower + upper) / 2;
5633*75f6d617Schristos if (middle == lower || middle == upper)
5634*75f6d617Schristos break;
5635*75f6d617Schristos if (offset_buffer[middle] > length)
5636*75f6d617Schristos upper = middle;
5637*75f6d617Schristos else if (offset_buffer[middle] < length)
5638*75f6d617Schristos lower = middle;
5639*75f6d617Schristos else
5640*75f6d617Schristos return middle;
5641*75f6d617Schristos }
5642*75f6d617Schristos
5643*75f6d617Schristos return -1;
5644*75f6d617Schristos }
5645*75f6d617Schristos #endif /* WCHAR */
5646*75f6d617Schristos
5647*75f6d617Schristos /* This is a separate function so that we can force an alloca cleanup
5648*75f6d617Schristos afterwards. */
5649*75f6d617Schristos #ifdef WCHAR
5650*75f6d617Schristos static int
wcs_re_match_2_internal(bufp,cstring1,csize1,cstring2,csize2,pos,regs,stop,string1,size1,string2,size2,mbs_offset1,mbs_offset2)5651*75f6d617Schristos wcs_re_match_2_internal (bufp, cstring1, csize1, cstring2, csize2, pos,
5652*75f6d617Schristos regs, stop, string1, size1, string2, size2,
5653*75f6d617Schristos mbs_offset1, mbs_offset2)
5654*75f6d617Schristos struct re_pattern_buffer *bufp;
5655*75f6d617Schristos const char *cstring1, *cstring2;
5656*75f6d617Schristos int csize1, csize2;
5657*75f6d617Schristos int pos;
5658*75f6d617Schristos struct re_registers *regs;
5659*75f6d617Schristos int stop;
5660*75f6d617Schristos /* string1 == string2 == NULL means string1/2, size1/2 and
5661*75f6d617Schristos mbs_offset1/2 need seting up in this function. */
5662*75f6d617Schristos /* We need wchar_t* buffers correspond to cstring1, cstring2. */
5663*75f6d617Schristos wchar_t *string1, *string2;
5664*75f6d617Schristos /* We need the size of wchar_t buffers correspond to csize1, csize2. */
5665*75f6d617Schristos int size1, size2;
5666*75f6d617Schristos /* offset buffer for optimizatoin. See convert_mbs_to_wc. */
5667*75f6d617Schristos int *mbs_offset1, *mbs_offset2;
5668*75f6d617Schristos #else /* BYTE */
5669*75f6d617Schristos static int
5670*75f6d617Schristos byte_re_match_2_internal (bufp, string1, size1,string2, size2, pos,
5671*75f6d617Schristos regs, stop)
5672*75f6d617Schristos struct re_pattern_buffer *bufp;
5673*75f6d617Schristos const char *string1, *string2;
5674*75f6d617Schristos int size1, size2;
5675*75f6d617Schristos int pos;
5676*75f6d617Schristos struct re_registers *regs;
5677*75f6d617Schristos int stop;
5678*75f6d617Schristos #endif /* BYTE */
5679*75f6d617Schristos {
5680*75f6d617Schristos /* General temporaries. */
5681*75f6d617Schristos int mcnt;
5682*75f6d617Schristos UCHAR_T *p1;
5683*75f6d617Schristos #ifdef WCHAR
5684*75f6d617Schristos /* They hold whether each wchar_t is binary data or not. */
5685*75f6d617Schristos char *is_binary = NULL;
5686*75f6d617Schristos /* If true, we can't free string1/2, mbs_offset1/2. */
5687*75f6d617Schristos int cant_free_wcs_buf = 1;
5688*75f6d617Schristos #endif /* WCHAR */
5689*75f6d617Schristos
5690*75f6d617Schristos /* Just past the end of the corresponding string. */
5691*75f6d617Schristos const CHAR_T *end1, *end2;
5692*75f6d617Schristos
5693*75f6d617Schristos /* Pointers into string1 and string2, just past the last characters in
5694*75f6d617Schristos each to consider matching. */
5695*75f6d617Schristos const CHAR_T *end_match_1, *end_match_2;
5696*75f6d617Schristos
5697*75f6d617Schristos /* Where we are in the data, and the end of the current string. */
5698*75f6d617Schristos const CHAR_T *d, *dend;
5699*75f6d617Schristos
5700*75f6d617Schristos /* Where we are in the pattern, and the end of the pattern. */
5701*75f6d617Schristos #ifdef WCHAR
5702*75f6d617Schristos UCHAR_T *pattern, *p;
5703*75f6d617Schristos register UCHAR_T *pend;
5704*75f6d617Schristos #else /* BYTE */
5705*75f6d617Schristos UCHAR_T *p = bufp->buffer;
5706*75f6d617Schristos register UCHAR_T *pend = p + bufp->used;
5707*75f6d617Schristos #endif /* WCHAR */
5708*75f6d617Schristos
5709*75f6d617Schristos /* Mark the opcode just after a start_memory, so we can test for an
5710*75f6d617Schristos empty subpattern when we get to the stop_memory. */
5711*75f6d617Schristos UCHAR_T *just_past_start_mem = 0;
5712*75f6d617Schristos
5713*75f6d617Schristos /* We use this to map every character in the string. */
5714*75f6d617Schristos RE_TRANSLATE_TYPE translate = bufp->translate;
5715*75f6d617Schristos
5716*75f6d617Schristos /* Failure point stack. Each place that can handle a failure further
5717*75f6d617Schristos down the line pushes a failure point on this stack. It consists of
5718*75f6d617Schristos restart, regend, and reg_info for all registers corresponding to
5719*75f6d617Schristos the subexpressions we're currently inside, plus the number of such
5720*75f6d617Schristos registers, and, finally, two char *'s. The first char * is where
5721*75f6d617Schristos to resume scanning the pattern; the second one is where to resume
5722*75f6d617Schristos scanning the strings. If the latter is zero, the failure point is
5723*75f6d617Schristos a ``dummy''; if a failure happens and the failure point is a dummy,
5724*75f6d617Schristos it gets discarded and the next next one is tried. */
5725*75f6d617Schristos #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
5726*75f6d617Schristos PREFIX(fail_stack_type) fail_stack;
5727*75f6d617Schristos #endif
5728*75f6d617Schristos #ifdef DEBUG
5729*75f6d617Schristos static unsigned failure_id;
5730*75f6d617Schristos unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
5731*75f6d617Schristos #endif
5732*75f6d617Schristos
5733*75f6d617Schristos #ifdef REL_ALLOC
5734*75f6d617Schristos /* This holds the pointer to the failure stack, when
5735*75f6d617Schristos it is allocated relocatably. */
5736*75f6d617Schristos fail_stack_elt_t *failure_stack_ptr;
5737*75f6d617Schristos #endif
5738*75f6d617Schristos
5739*75f6d617Schristos /* We fill all the registers internally, independent of what we
5740*75f6d617Schristos return, for use in backreferences. The number here includes
5741*75f6d617Schristos an element for register zero. */
5742*75f6d617Schristos size_t num_regs = bufp->re_nsub + 1;
5743*75f6d617Schristos
5744*75f6d617Schristos /* The currently active registers. */
5745*75f6d617Schristos active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
5746*75f6d617Schristos active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
5747*75f6d617Schristos
5748*75f6d617Schristos /* Information on the contents of registers. These are pointers into
5749*75f6d617Schristos the input strings; they record just what was matched (on this
5750*75f6d617Schristos attempt) by a subexpression part of the pattern, that is, the
5751*75f6d617Schristos regnum-th regstart pointer points to where in the pattern we began
5752*75f6d617Schristos matching and the regnum-th regend points to right after where we
5753*75f6d617Schristos stopped matching the regnum-th subexpression. (The zeroth register
5754*75f6d617Schristos keeps track of what the whole pattern matches.) */
5755*75f6d617Schristos #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
5756*75f6d617Schristos const CHAR_T **regstart, **regend;
5757*75f6d617Schristos #endif
5758*75f6d617Schristos
5759*75f6d617Schristos /* If a group that's operated upon by a repetition operator fails to
5760*75f6d617Schristos match anything, then the register for its start will need to be
5761*75f6d617Schristos restored because it will have been set to wherever in the string we
5762*75f6d617Schristos are when we last see its open-group operator. Similarly for a
5763*75f6d617Schristos register's end. */
5764*75f6d617Schristos #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
5765*75f6d617Schristos const CHAR_T **old_regstart, **old_regend;
5766*75f6d617Schristos #endif
5767*75f6d617Schristos
5768*75f6d617Schristos /* The is_active field of reg_info helps us keep track of which (possibly
5769*75f6d617Schristos nested) subexpressions we are currently in. The matched_something
5770*75f6d617Schristos field of reg_info[reg_num] helps us tell whether or not we have
5771*75f6d617Schristos matched any of the pattern so far this time through the reg_num-th
5772*75f6d617Schristos subexpression. These two fields get reset each time through any
5773*75f6d617Schristos loop their register is in. */
5774*75f6d617Schristos #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
5775*75f6d617Schristos PREFIX(register_info_type) *reg_info;
5776*75f6d617Schristos #endif
5777*75f6d617Schristos
5778*75f6d617Schristos /* The following record the register info as found in the above
5779*75f6d617Schristos variables when we find a match better than any we've seen before.
5780*75f6d617Schristos This happens as we backtrack through the failure points, which in
5781*75f6d617Schristos turn happens only if we have not yet matched the entire string. */
5782*75f6d617Schristos unsigned best_regs_set = false;
5783*75f6d617Schristos #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
5784*75f6d617Schristos const CHAR_T **best_regstart, **best_regend;
5785*75f6d617Schristos #endif
5786*75f6d617Schristos
5787*75f6d617Schristos /* Logically, this is `best_regend[0]'. But we don't want to have to
5788*75f6d617Schristos allocate space for that if we're not allocating space for anything
5789*75f6d617Schristos else (see below). Also, we never need info about register 0 for
5790*75f6d617Schristos any of the other register vectors, and it seems rather a kludge to
5791*75f6d617Schristos treat `best_regend' differently than the rest. So we keep track of
5792*75f6d617Schristos the end of the best match so far in a separate variable. We
5793*75f6d617Schristos initialize this to NULL so that when we backtrack the first time
5794*75f6d617Schristos and need to test it, it's not garbage. */
5795*75f6d617Schristos const CHAR_T *match_end = NULL;
5796*75f6d617Schristos
5797*75f6d617Schristos /* This helps SET_REGS_MATCHED avoid doing redundant work. */
5798*75f6d617Schristos int set_regs_matched_done = 0;
5799*75f6d617Schristos
5800*75f6d617Schristos /* Used when we pop values we don't care about. */
5801*75f6d617Schristos #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
5802*75f6d617Schristos const CHAR_T **reg_dummy;
5803*75f6d617Schristos PREFIX(register_info_type) *reg_info_dummy;
5804*75f6d617Schristos #endif
5805*75f6d617Schristos
5806*75f6d617Schristos #ifdef DEBUG
5807*75f6d617Schristos /* Counts the total number of registers pushed. */
5808*75f6d617Schristos unsigned num_regs_pushed = 0;
5809*75f6d617Schristos #endif
5810*75f6d617Schristos
5811*75f6d617Schristos /* Definitions for state transitions. More efficiently for gcc. */
5812*75f6d617Schristos #ifdef __GNUC__
5813*75f6d617Schristos # if defined HAVE_SUBTRACT_LOCAL_LABELS && defined SHARED
5814*75f6d617Schristos # define NEXT \
5815*75f6d617Schristos do \
5816*75f6d617Schristos { \
5817*75f6d617Schristos int offset; \
5818*75f6d617Schristos const void *__unbounded ptr; \
5819*75f6d617Schristos offset = (p == pend \
5820*75f6d617Schristos ? 0 : jmptable[SWITCH_ENUM_CAST ((re_opcode_t) *p++)]); \
5821*75f6d617Schristos ptr = &&end_of_pattern + offset; \
5822*75f6d617Schristos goto *ptr; \
5823*75f6d617Schristos } \
5824*75f6d617Schristos while (0)
5825*75f6d617Schristos # define REF(x) \
5826*75f6d617Schristos &&label_##x - &&end_of_pattern
5827*75f6d617Schristos # define JUMP_TABLE_TYPE const int
5828*75f6d617Schristos # else
5829*75f6d617Schristos # define NEXT \
5830*75f6d617Schristos do \
5831*75f6d617Schristos { \
5832*75f6d617Schristos const void *__unbounded ptr; \
5833*75f6d617Schristos ptr = (p == pend ? &&end_of_pattern \
5834*75f6d617Schristos : jmptable[SWITCH_ENUM_CAST ((re_opcode_t) *p++)]); \
5835*75f6d617Schristos goto *ptr; \
5836*75f6d617Schristos } \
5837*75f6d617Schristos while (0)
5838*75f6d617Schristos # define REF(x) \
5839*75f6d617Schristos &&label_##x
5840*75f6d617Schristos # define JUMP_TABLE_TYPE const void *const
5841*75f6d617Schristos # endif
5842*75f6d617Schristos # define CASE(x) label_##x
5843*75f6d617Schristos static JUMP_TABLE_TYPE jmptable[] =
5844*75f6d617Schristos {
5845*75f6d617Schristos REF (no_op),
5846*75f6d617Schristos REF (succeed),
5847*75f6d617Schristos REF (exactn),
5848*75f6d617Schristos # ifdef MBS_SUPPORT
5849*75f6d617Schristos REF (exactn_bin),
5850*75f6d617Schristos # endif
5851*75f6d617Schristos REF (anychar),
5852*75f6d617Schristos REF (charset),
5853*75f6d617Schristos REF (charset_not),
5854*75f6d617Schristos REF (start_memory),
5855*75f6d617Schristos REF (stop_memory),
5856*75f6d617Schristos REF (duplicate),
5857*75f6d617Schristos REF (begline),
5858*75f6d617Schristos REF (endline),
5859*75f6d617Schristos REF (begbuf),
5860*75f6d617Schristos REF (endbuf),
5861*75f6d617Schristos REF (jump),
5862*75f6d617Schristos REF (jump_past_alt),
5863*75f6d617Schristos REF (on_failure_jump),
5864*75f6d617Schristos REF (on_failure_keep_string_jump),
5865*75f6d617Schristos REF (pop_failure_jump),
5866*75f6d617Schristos REF (maybe_pop_jump),
5867*75f6d617Schristos REF (dummy_failure_jump),
5868*75f6d617Schristos REF (push_dummy_failure),
5869*75f6d617Schristos REF (succeed_n),
5870*75f6d617Schristos REF (jump_n),
5871*75f6d617Schristos REF (set_number_at),
5872*75f6d617Schristos REF (wordchar),
5873*75f6d617Schristos REF (notwordchar),
5874*75f6d617Schristos REF (wordbeg),
5875*75f6d617Schristos REF (wordend),
5876*75f6d617Schristos REF (wordbound),
5877*75f6d617Schristos REF (notwordbound)
5878*75f6d617Schristos # ifdef emacs
5879*75f6d617Schristos ,REF (before_dot),
5880*75f6d617Schristos REF (at_dot),
5881*75f6d617Schristos REF (after_dot),
5882*75f6d617Schristos REF (syntaxspec),
5883*75f6d617Schristos REF (notsyntaxspec)
5884*75f6d617Schristos # endif
5885*75f6d617Schristos };
5886*75f6d617Schristos #else
5887*75f6d617Schristos # define NEXT \
5888*75f6d617Schristos break
5889*75f6d617Schristos # define CASE(x) \
5890*75f6d617Schristos case x
5891*75f6d617Schristos #endif
5892*75f6d617Schristos
5893*75f6d617Schristos DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
5894*75f6d617Schristos
5895*75f6d617Schristos INIT_FAIL_STACK ();
5896*75f6d617Schristos
5897*75f6d617Schristos #ifdef MATCH_MAY_ALLOCATE
5898*75f6d617Schristos /* Do not bother to initialize all the register variables if there are
5899*75f6d617Schristos no groups in the pattern, as it takes a fair amount of time. If
5900*75f6d617Schristos there are groups, we include space for register 0 (the whole
5901*75f6d617Schristos pattern), even though we never use it, since it simplifies the
5902*75f6d617Schristos array indexing. We should fix this. */
5903*75f6d617Schristos if (bufp->re_nsub)
5904*75f6d617Schristos {
5905*75f6d617Schristos regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
5906*75f6d617Schristos regend = REGEX_TALLOC (num_regs, const CHAR_T *);
5907*75f6d617Schristos old_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
5908*75f6d617Schristos old_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
5909*75f6d617Schristos best_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
5910*75f6d617Schristos best_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
5911*75f6d617Schristos reg_info = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
5912*75f6d617Schristos reg_dummy = REGEX_TALLOC (num_regs, const CHAR_T *);
5913*75f6d617Schristos reg_info_dummy = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
5914*75f6d617Schristos
5915*75f6d617Schristos if (!(regstart && regend && old_regstart && old_regend && reg_info
5916*75f6d617Schristos && best_regstart && best_regend && reg_dummy && reg_info_dummy))
5917*75f6d617Schristos {
5918*75f6d617Schristos FREE_VARIABLES ();
5919*75f6d617Schristos return -2;
5920*75f6d617Schristos }
5921*75f6d617Schristos }
5922*75f6d617Schristos else
5923*75f6d617Schristos {
5924*75f6d617Schristos /* We must initialize all our variables to NULL, so that
5925*75f6d617Schristos `FREE_VARIABLES' doesn't try to free them. */
5926*75f6d617Schristos regstart = regend = old_regstart = old_regend = best_regstart
5927*75f6d617Schristos = best_regend = reg_dummy = NULL;
5928*75f6d617Schristos reg_info = reg_info_dummy = (PREFIX(register_info_type) *) NULL;
5929*75f6d617Schristos }
5930*75f6d617Schristos #endif /* MATCH_MAY_ALLOCATE */
5931*75f6d617Schristos
5932*75f6d617Schristos /* The starting position is bogus. */
5933*75f6d617Schristos #ifdef WCHAR
5934*75f6d617Schristos if (pos < 0 || pos > csize1 + csize2)
5935*75f6d617Schristos #else /* BYTE */
5936*75f6d617Schristos if (pos < 0 || pos > size1 + size2)
5937*75f6d617Schristos #endif
5938*75f6d617Schristos {
5939*75f6d617Schristos FREE_VARIABLES ();
5940*75f6d617Schristos return -1;
5941*75f6d617Schristos }
5942*75f6d617Schristos
5943*75f6d617Schristos #ifdef WCHAR
5944*75f6d617Schristos /* Allocate wchar_t array for string1 and string2 and
5945*75f6d617Schristos fill them with converted string. */
5946*75f6d617Schristos if (string1 == NULL && string2 == NULL)
5947*75f6d617Schristos {
5948*75f6d617Schristos /* We need seting up buffers here. */
5949*75f6d617Schristos
5950*75f6d617Schristos /* We must free wcs buffers in this function. */
5951*75f6d617Schristos cant_free_wcs_buf = 0;
5952*75f6d617Schristos
5953*75f6d617Schristos if (csize1 != 0)
5954*75f6d617Schristos {
5955*75f6d617Schristos string1 = REGEX_TALLOC (csize1 + 1, CHAR_T);
5956*75f6d617Schristos mbs_offset1 = REGEX_TALLOC (csize1 + 1, int);
5957*75f6d617Schristos is_binary = REGEX_TALLOC (csize1 + 1, char);
5958*75f6d617Schristos if (!string1 || !mbs_offset1 || !is_binary)
5959*75f6d617Schristos {
5960*75f6d617Schristos FREE_VAR (string1);
5961*75f6d617Schristos FREE_VAR (mbs_offset1);
5962*75f6d617Schristos FREE_VAR (is_binary);
5963*75f6d617Schristos return -2;
5964*75f6d617Schristos }
5965*75f6d617Schristos }
5966*75f6d617Schristos if (csize2 != 0)
5967*75f6d617Schristos {
5968*75f6d617Schristos string2 = REGEX_TALLOC (csize2 + 1, CHAR_T);
5969*75f6d617Schristos mbs_offset2 = REGEX_TALLOC (csize2 + 1, int);
5970*75f6d617Schristos is_binary = REGEX_TALLOC (csize2 + 1, char);
5971*75f6d617Schristos if (!string2 || !mbs_offset2 || !is_binary)
5972*75f6d617Schristos {
5973*75f6d617Schristos FREE_VAR (string1);
5974*75f6d617Schristos FREE_VAR (mbs_offset1);
5975*75f6d617Schristos FREE_VAR (string2);
5976*75f6d617Schristos FREE_VAR (mbs_offset2);
5977*75f6d617Schristos FREE_VAR (is_binary);
5978*75f6d617Schristos return -2;
5979*75f6d617Schristos }
5980*75f6d617Schristos size2 = convert_mbs_to_wcs(string2, cstring2, csize2,
5981*75f6d617Schristos mbs_offset2, is_binary);
5982*75f6d617Schristos string2[size2] = L'\0'; /* for a sentinel */
5983*75f6d617Schristos FREE_VAR (is_binary);
5984*75f6d617Schristos }
5985*75f6d617Schristos }
5986*75f6d617Schristos
5987*75f6d617Schristos /* We need to cast pattern to (wchar_t*), because we casted this compiled
5988*75f6d617Schristos pattern to (char*) in regex_compile. */
5989*75f6d617Schristos p = pattern = (CHAR_T*)bufp->buffer;
5990*75f6d617Schristos pend = (CHAR_T*)(bufp->buffer + bufp->used);
5991*75f6d617Schristos
5992*75f6d617Schristos #endif /* WCHAR */
5993*75f6d617Schristos
5994*75f6d617Schristos /* Initialize subexpression text positions to -1 to mark ones that no
5995*75f6d617Schristos start_memory/stop_memory has been seen for. Also initialize the
5996*75f6d617Schristos register information struct. */
5997*75f6d617Schristos for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
5998*75f6d617Schristos {
5999*75f6d617Schristos regstart[mcnt] = regend[mcnt]
6000*75f6d617Schristos = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
6001*75f6d617Schristos
6002*75f6d617Schristos REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
6003*75f6d617Schristos IS_ACTIVE (reg_info[mcnt]) = 0;
6004*75f6d617Schristos MATCHED_SOMETHING (reg_info[mcnt]) = 0;
6005*75f6d617Schristos EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
6006*75f6d617Schristos }
6007*75f6d617Schristos
6008*75f6d617Schristos /* We move `string1' into `string2' if the latter's empty -- but not if
6009*75f6d617Schristos `string1' is null. */
6010*75f6d617Schristos if (size2 == 0 && string1 != NULL)
6011*75f6d617Schristos {
6012*75f6d617Schristos string2 = string1;
6013*75f6d617Schristos size2 = size1;
6014*75f6d617Schristos string1 = 0;
6015*75f6d617Schristos size1 = 0;
6016*75f6d617Schristos #ifdef WCHAR
6017*75f6d617Schristos mbs_offset2 = mbs_offset1;
6018*75f6d617Schristos csize2 = csize1;
6019*75f6d617Schristos mbs_offset1 = NULL;
6020*75f6d617Schristos csize1 = 0;
6021*75f6d617Schristos #endif
6022*75f6d617Schristos }
6023*75f6d617Schristos end1 = string1 + size1;
6024*75f6d617Schristos end2 = string2 + size2;
6025*75f6d617Schristos
6026*75f6d617Schristos /* Compute where to stop matching, within the two strings. */
6027*75f6d617Schristos #ifdef WCHAR
6028*75f6d617Schristos if (stop <= csize1)
6029*75f6d617Schristos {
6030*75f6d617Schristos mcnt = count_mbs_length(mbs_offset1, stop);
6031*75f6d617Schristos end_match_1 = string1 + mcnt;
6032*75f6d617Schristos end_match_2 = string2;
6033*75f6d617Schristos }
6034*75f6d617Schristos else
6035*75f6d617Schristos {
6036*75f6d617Schristos if (stop > csize1 + csize2)
6037*75f6d617Schristos stop = csize1 + csize2;
6038*75f6d617Schristos end_match_1 = end1;
6039*75f6d617Schristos mcnt = count_mbs_length(mbs_offset2, stop-csize1);
6040*75f6d617Schristos end_match_2 = string2 + mcnt;
6041*75f6d617Schristos }
6042*75f6d617Schristos if (mcnt < 0)
6043*75f6d617Schristos { /* count_mbs_length return error. */
6044*75f6d617Schristos FREE_VARIABLES ();
6045*75f6d617Schristos return -1;
6046*75f6d617Schristos }
6047*75f6d617Schristos #else
6048*75f6d617Schristos if (stop <= size1)
6049*75f6d617Schristos {
6050*75f6d617Schristos end_match_1 = string1 + stop;
6051*75f6d617Schristos end_match_2 = string2;
6052*75f6d617Schristos }
6053*75f6d617Schristos else
6054*75f6d617Schristos {
6055*75f6d617Schristos end_match_1 = end1;
6056*75f6d617Schristos end_match_2 = string2 + stop - size1;
6057*75f6d617Schristos }
6058*75f6d617Schristos #endif /* WCHAR */
6059*75f6d617Schristos
6060*75f6d617Schristos /* `p' scans through the pattern as `d' scans through the data.
6061*75f6d617Schristos `dend' is the end of the input string that `d' points within. `d'
6062*75f6d617Schristos is advanced into the following input string whenever necessary, but
6063*75f6d617Schristos this happens before fetching; therefore, at the beginning of the
6064*75f6d617Schristos loop, `d' can be pointing at the end of a string, but it cannot
6065*75f6d617Schristos equal `string2'. */
6066*75f6d617Schristos #ifdef WCHAR
6067*75f6d617Schristos if (size1 > 0 && pos <= csize1)
6068*75f6d617Schristos {
6069*75f6d617Schristos mcnt = count_mbs_length(mbs_offset1, pos);
6070*75f6d617Schristos d = string1 + mcnt;
6071*75f6d617Schristos dend = end_match_1;
6072*75f6d617Schristos }
6073*75f6d617Schristos else
6074*75f6d617Schristos {
6075*75f6d617Schristos mcnt = count_mbs_length(mbs_offset2, pos-csize1);
6076*75f6d617Schristos d = string2 + mcnt;
6077*75f6d617Schristos dend = end_match_2;
6078*75f6d617Schristos }
6079*75f6d617Schristos
6080*75f6d617Schristos if (mcnt < 0)
6081*75f6d617Schristos { /* count_mbs_length return error. */
6082*75f6d617Schristos FREE_VARIABLES ();
6083*75f6d617Schristos return -1;
6084*75f6d617Schristos }
6085*75f6d617Schristos #else
6086*75f6d617Schristos if (size1 > 0 && pos <= size1)
6087*75f6d617Schristos {
6088*75f6d617Schristos d = string1 + pos;
6089*75f6d617Schristos dend = end_match_1;
6090*75f6d617Schristos }
6091*75f6d617Schristos else
6092*75f6d617Schristos {
6093*75f6d617Schristos d = string2 + pos - size1;
6094*75f6d617Schristos dend = end_match_2;
6095*75f6d617Schristos }
6096*75f6d617Schristos #endif /* WCHAR */
6097*75f6d617Schristos
6098*75f6d617Schristos DEBUG_PRINT1 ("The compiled pattern is:\n");
6099*75f6d617Schristos DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
6100*75f6d617Schristos DEBUG_PRINT1 ("The string to match is: `");
6101*75f6d617Schristos DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
6102*75f6d617Schristos DEBUG_PRINT1 ("'\n");
6103*75f6d617Schristos
6104*75f6d617Schristos /* This loops over pattern commands. It exits by returning from the
6105*75f6d617Schristos function if the match is complete, or it drops through if the match
6106*75f6d617Schristos fails at this starting point in the input data. */
6107*75f6d617Schristos for (;;)
6108*75f6d617Schristos {
6109*75f6d617Schristos #ifdef _LIBC
6110*75f6d617Schristos DEBUG_PRINT2 ("\n%p: ", p);
6111*75f6d617Schristos #else
6112*75f6d617Schristos DEBUG_PRINT2 ("\n0x%x: ", p);
6113*75f6d617Schristos #endif
6114*75f6d617Schristos
6115*75f6d617Schristos #ifdef __GNUC__
6116*75f6d617Schristos NEXT;
6117*75f6d617Schristos #else
6118*75f6d617Schristos if (p == pend)
6119*75f6d617Schristos #endif
6120*75f6d617Schristos {
6121*75f6d617Schristos #ifdef __GNUC__
6122*75f6d617Schristos end_of_pattern:
6123*75f6d617Schristos #endif
6124*75f6d617Schristos /* End of pattern means we might have succeeded. */
6125*75f6d617Schristos DEBUG_PRINT1 ("end of pattern ... ");
6126*75f6d617Schristos
6127*75f6d617Schristos /* If we haven't matched the entire string, and we want the
6128*75f6d617Schristos longest match, try backtracking. */
6129*75f6d617Schristos if (d != end_match_2)
6130*75f6d617Schristos {
6131*75f6d617Schristos /* 1 if this match ends in the same string (string1 or string2)
6132*75f6d617Schristos as the best previous match. */
6133*75f6d617Schristos boolean same_str_p = (FIRST_STRING_P (match_end)
6134*75f6d617Schristos == MATCHING_IN_FIRST_STRING);
6135*75f6d617Schristos /* 1 if this match is the best seen so far. */
6136*75f6d617Schristos boolean best_match_p;
6137*75f6d617Schristos
6138*75f6d617Schristos /* AIX compiler got confused when this was combined
6139*75f6d617Schristos with the previous declaration. */
6140*75f6d617Schristos if (same_str_p)
6141*75f6d617Schristos best_match_p = d > match_end;
6142*75f6d617Schristos else
6143*75f6d617Schristos best_match_p = !MATCHING_IN_FIRST_STRING;
6144*75f6d617Schristos
6145*75f6d617Schristos DEBUG_PRINT1 ("backtracking.\n");
6146*75f6d617Schristos
6147*75f6d617Schristos if (!FAIL_STACK_EMPTY ())
6148*75f6d617Schristos { /* More failure points to try. */
6149*75f6d617Schristos
6150*75f6d617Schristos /* If exceeds best match so far, save it. */
6151*75f6d617Schristos if (!best_regs_set || best_match_p)
6152*75f6d617Schristos {
6153*75f6d617Schristos best_regs_set = true;
6154*75f6d617Schristos match_end = d;
6155*75f6d617Schristos
6156*75f6d617Schristos DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
6157*75f6d617Schristos
6158*75f6d617Schristos for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
6159*75f6d617Schristos {
6160*75f6d617Schristos best_regstart[mcnt] = regstart[mcnt];
6161*75f6d617Schristos best_regend[mcnt] = regend[mcnt];
6162*75f6d617Schristos }
6163*75f6d617Schristos }
6164*75f6d617Schristos goto fail;
6165*75f6d617Schristos }
6166*75f6d617Schristos
6167*75f6d617Schristos /* If no failure points, don't restore garbage. And if
6168*75f6d617Schristos last match is real best match, don't restore second
6169*75f6d617Schristos best one. */
6170*75f6d617Schristos else if (best_regs_set && !best_match_p)
6171*75f6d617Schristos {
6172*75f6d617Schristos restore_best_regs:
6173*75f6d617Schristos /* Restore best match. It may happen that `dend ==
6174*75f6d617Schristos end_match_1' while the restored d is in string2.
6175*75f6d617Schristos For example, the pattern `x.*y.*z' against the
6176*75f6d617Schristos strings `x-' and `y-z-', if the two strings are
6177*75f6d617Schristos not consecutive in memory. */
6178*75f6d617Schristos DEBUG_PRINT1 ("Restoring best registers.\n");
6179*75f6d617Schristos
6180*75f6d617Schristos d = match_end;
6181*75f6d617Schristos dend = ((d >= string1 && d <= end1)
6182*75f6d617Schristos ? end_match_1 : end_match_2);
6183*75f6d617Schristos
6184*75f6d617Schristos for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
6185*75f6d617Schristos {
6186*75f6d617Schristos regstart[mcnt] = best_regstart[mcnt];
6187*75f6d617Schristos regend[mcnt] = best_regend[mcnt];
6188*75f6d617Schristos }
6189*75f6d617Schristos }
6190*75f6d617Schristos } /* d != end_match_2 */
6191*75f6d617Schristos
6192*75f6d617Schristos succeed_label:
6193*75f6d617Schristos DEBUG_PRINT1 ("Accepting match.\n");
6194*75f6d617Schristos /* If caller wants register contents data back, do it. */
6195*75f6d617Schristos if (regs && !bufp->no_sub)
6196*75f6d617Schristos {
6197*75f6d617Schristos /* Have the register data arrays been allocated? */
6198*75f6d617Schristos if (bufp->regs_allocated == REGS_UNALLOCATED)
6199*75f6d617Schristos { /* No. So allocate them with malloc. We need one
6200*75f6d617Schristos extra element beyond `num_regs' for the `-1' marker
6201*75f6d617Schristos GNU code uses. */
6202*75f6d617Schristos regs->num_regs = MAX (RE_NREGS, num_regs + 1);
6203*75f6d617Schristos regs->start = TALLOC (regs->num_regs, regoff_t);
6204*75f6d617Schristos regs->end = TALLOC (regs->num_regs, regoff_t);
6205*75f6d617Schristos if (regs->start == NULL || regs->end == NULL)
6206*75f6d617Schristos {
6207*75f6d617Schristos FREE_VARIABLES ();
6208*75f6d617Schristos return -2;
6209*75f6d617Schristos }
6210*75f6d617Schristos bufp->regs_allocated = REGS_REALLOCATE;
6211*75f6d617Schristos }
6212*75f6d617Schristos else if (bufp->regs_allocated == REGS_REALLOCATE)
6213*75f6d617Schristos { /* Yes. If we need more elements than were already
6214*75f6d617Schristos allocated, reallocate them. If we need fewer, just
6215*75f6d617Schristos leave it alone. */
6216*75f6d617Schristos if (regs->num_regs < num_regs + 1)
6217*75f6d617Schristos {
6218*75f6d617Schristos regs->num_regs = num_regs + 1;
6219*75f6d617Schristos RETALLOC (regs->start, regs->num_regs, regoff_t);
6220*75f6d617Schristos RETALLOC (regs->end, regs->num_regs, regoff_t);
6221*75f6d617Schristos if (regs->start == NULL || regs->end == NULL)
6222*75f6d617Schristos {
6223*75f6d617Schristos FREE_VARIABLES ();
6224*75f6d617Schristos return -2;
6225*75f6d617Schristos }
6226*75f6d617Schristos }
6227*75f6d617Schristos }
6228*75f6d617Schristos else
6229*75f6d617Schristos {
6230*75f6d617Schristos /* These braces fend off a "empty body in an else-statement"
6231*75f6d617Schristos warning under GCC when assert expands to nothing. */
6232*75f6d617Schristos assert (bufp->regs_allocated == REGS_FIXED);
6233*75f6d617Schristos }
6234*75f6d617Schristos
6235*75f6d617Schristos /* Convert the pointer data in `regstart' and `regend' to
6236*75f6d617Schristos indices. Register zero has to be set differently,
6237*75f6d617Schristos since we haven't kept track of any info for it. */
6238*75f6d617Schristos if (regs->num_regs > 0)
6239*75f6d617Schristos {
6240*75f6d617Schristos regs->start[0] = pos;
6241*75f6d617Schristos #ifdef WCHAR
6242*75f6d617Schristos if (MATCHING_IN_FIRST_STRING)
6243*75f6d617Schristos regs->end[0] = (mbs_offset1 != NULL ?
6244*75f6d617Schristos mbs_offset1[d-string1] : 0);
6245*75f6d617Schristos else
6246*75f6d617Schristos regs->end[0] = csize1 + (mbs_offset2 != NULL
6247*75f6d617Schristos ? mbs_offset2[d-string2] : 0);
6248*75f6d617Schristos #else
6249*75f6d617Schristos regs->end[0] = (MATCHING_IN_FIRST_STRING
6250*75f6d617Schristos ? ((regoff_t) (d - string1))
6251*75f6d617Schristos : ((regoff_t) (d - string2 + size1)));
6252*75f6d617Schristos #endif /* WCHAR */
6253*75f6d617Schristos }
6254*75f6d617Schristos
6255*75f6d617Schristos /* Go through the first `min (num_regs, regs->num_regs)'
6256*75f6d617Schristos registers, since that is all we initialized. */
6257*75f6d617Schristos for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
6258*75f6d617Schristos mcnt++)
6259*75f6d617Schristos {
6260*75f6d617Schristos if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
6261*75f6d617Schristos regs->start[mcnt] = regs->end[mcnt] = -1;
6262*75f6d617Schristos else
6263*75f6d617Schristos {
6264*75f6d617Schristos regs->start[mcnt]
6265*75f6d617Schristos = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
6266*75f6d617Schristos regs->end[mcnt]
6267*75f6d617Schristos = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
6268*75f6d617Schristos }
6269*75f6d617Schristos }
6270*75f6d617Schristos
6271*75f6d617Schristos /* If the regs structure we return has more elements than
6272*75f6d617Schristos were in the pattern, set the extra elements to -1. If
6273*75f6d617Schristos we (re)allocated the registers, this is the case,
6274*75f6d617Schristos because we always allocate enough to have at least one
6275*75f6d617Schristos -1 at the end. */
6276*75f6d617Schristos for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
6277*75f6d617Schristos regs->start[mcnt] = regs->end[mcnt] = -1;
6278*75f6d617Schristos } /* regs && !bufp->no_sub */
6279*75f6d617Schristos
6280*75f6d617Schristos DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
6281*75f6d617Schristos nfailure_points_pushed, nfailure_points_popped,
6282*75f6d617Schristos nfailure_points_pushed - nfailure_points_popped);
6283*75f6d617Schristos DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
6284*75f6d617Schristos
6285*75f6d617Schristos #ifdef WCHAR
6286*75f6d617Schristos if (MATCHING_IN_FIRST_STRING)
6287*75f6d617Schristos mcnt = mbs_offset1 != NULL ? mbs_offset1[d-string1] : 0;
6288*75f6d617Schristos else
6289*75f6d617Schristos mcnt = (mbs_offset2 != NULL ? mbs_offset2[d-string2] : 0) +
6290*75f6d617Schristos csize1;
6291*75f6d617Schristos mcnt -= pos;
6292*75f6d617Schristos #else
6293*75f6d617Schristos mcnt = d - pos - (MATCHING_IN_FIRST_STRING
6294*75f6d617Schristos ? string1 : string2 - size1);
6295*75f6d617Schristos #endif /* WCHAR */
6296*75f6d617Schristos
6297*75f6d617Schristos DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
6298*75f6d617Schristos
6299*75f6d617Schristos FREE_VARIABLES ();
6300*75f6d617Schristos return mcnt;
6301*75f6d617Schristos }
6302*75f6d617Schristos
6303*75f6d617Schristos #ifndef __GNUC__
6304*75f6d617Schristos /* Otherwise match next pattern command. */
6305*75f6d617Schristos switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
6306*75f6d617Schristos {
6307*75f6d617Schristos #endif
6308*75f6d617Schristos /* Ignore these. Used to ignore the n of succeed_n's which
6309*75f6d617Schristos currently have n == 0. */
6310*75f6d617Schristos CASE (no_op):
6311*75f6d617Schristos DEBUG_PRINT1 ("EXECUTING no_op.\n");
6312*75f6d617Schristos NEXT;
6313*75f6d617Schristos
6314*75f6d617Schristos CASE (succeed):
6315*75f6d617Schristos DEBUG_PRINT1 ("EXECUTING succeed.\n");
6316*75f6d617Schristos goto succeed_label;
6317*75f6d617Schristos
6318*75f6d617Schristos /* Match the next n pattern characters exactly. The following
6319*75f6d617Schristos byte in the pattern defines n, and the n bytes after that
6320*75f6d617Schristos are the characters to match. */
6321*75f6d617Schristos CASE (exactn):
6322*75f6d617Schristos #ifdef MBS_SUPPORT
6323*75f6d617Schristos CASE (exactn_bin):
6324*75f6d617Schristos #endif
6325*75f6d617Schristos mcnt = *p++;
6326*75f6d617Schristos DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
6327*75f6d617Schristos
6328*75f6d617Schristos /* This is written out as an if-else so we don't waste time
6329*75f6d617Schristos testing `translate' inside the loop. */
6330*75f6d617Schristos if (translate)
6331*75f6d617Schristos {
6332*75f6d617Schristos do
6333*75f6d617Schristos {
6334*75f6d617Schristos PREFETCH ();
6335*75f6d617Schristos #ifdef WCHAR
6336*75f6d617Schristos if (*d <= 0xff)
6337*75f6d617Schristos {
6338*75f6d617Schristos if ((UCHAR_T) translate[(unsigned char) *d++]
6339*75f6d617Schristos != (UCHAR_T) *p++)
6340*75f6d617Schristos goto fail;
6341*75f6d617Schristos }
6342*75f6d617Schristos else
6343*75f6d617Schristos {
6344*75f6d617Schristos if (*d++ != (CHAR_T) *p++)
6345*75f6d617Schristos goto fail;
6346*75f6d617Schristos }
6347*75f6d617Schristos #else
6348*75f6d617Schristos if ((UCHAR_T) translate[(unsigned char) *d++]
6349*75f6d617Schristos != (UCHAR_T) *p++)
6350*75f6d617Schristos goto fail;
6351*75f6d617Schristos #endif /* WCHAR */
6352*75f6d617Schristos }
6353*75f6d617Schristos while (--mcnt);
6354*75f6d617Schristos }
6355*75f6d617Schristos else
6356*75f6d617Schristos {
6357*75f6d617Schristos do
6358*75f6d617Schristos {
6359*75f6d617Schristos PREFETCH ();
6360*75f6d617Schristos if (*d++ != (CHAR_T) *p++) goto fail;
6361*75f6d617Schristos }
6362*75f6d617Schristos while (--mcnt);
6363*75f6d617Schristos }
6364*75f6d617Schristos SET_REGS_MATCHED ();
6365*75f6d617Schristos NEXT;
6366*75f6d617Schristos
6367*75f6d617Schristos
6368*75f6d617Schristos /* Match any character except possibly a newline or a null. */
6369*75f6d617Schristos CASE (anychar):
6370*75f6d617Schristos DEBUG_PRINT1 ("EXECUTING anychar.\n");
6371*75f6d617Schristos
6372*75f6d617Schristos PREFETCH ();
6373*75f6d617Schristos
6374*75f6d617Schristos if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
6375*75f6d617Schristos || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
6376*75f6d617Schristos goto fail;
6377*75f6d617Schristos
6378*75f6d617Schristos SET_REGS_MATCHED ();
6379*75f6d617Schristos DEBUG_PRINT2 (" Matched `%ld'.\n", (long int) *d);
6380*75f6d617Schristos d++;
6381*75f6d617Schristos NEXT;
6382*75f6d617Schristos
6383*75f6d617Schristos
6384*75f6d617Schristos CASE (charset):
6385*75f6d617Schristos CASE (charset_not):
6386*75f6d617Schristos {
6387*75f6d617Schristos register UCHAR_T c;
6388*75f6d617Schristos #ifdef WCHAR
6389*75f6d617Schristos unsigned int i, char_class_length, coll_symbol_length,
6390*75f6d617Schristos equiv_class_length, ranges_length, chars_length, length;
6391*75f6d617Schristos CHAR_T *workp, *workp2, *charset_top;
6392*75f6d617Schristos #define WORK_BUFFER_SIZE 128
6393*75f6d617Schristos CHAR_T str_buf[WORK_BUFFER_SIZE];
6394*75f6d617Schristos # ifdef _LIBC
6395*75f6d617Schristos uint32_t nrules;
6396*75f6d617Schristos # endif /* _LIBC */
6397*75f6d617Schristos #endif /* WCHAR */
6398*75f6d617Schristos boolean not = (re_opcode_t) *(p - 1) == charset_not;
6399*75f6d617Schristos
6400*75f6d617Schristos DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
6401*75f6d617Schristos PREFETCH ();
6402*75f6d617Schristos c = TRANSLATE (*d); /* The character to match. */
6403*75f6d617Schristos #ifdef WCHAR
6404*75f6d617Schristos # ifdef _LIBC
6405*75f6d617Schristos nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
6406*75f6d617Schristos # endif /* _LIBC */
6407*75f6d617Schristos charset_top = p - 1;
6408*75f6d617Schristos char_class_length = *p++;
6409*75f6d617Schristos coll_symbol_length = *p++;
6410*75f6d617Schristos equiv_class_length = *p++;
6411*75f6d617Schristos ranges_length = *p++;
6412*75f6d617Schristos chars_length = *p++;
6413*75f6d617Schristos /* p points charset[6], so the address of the next instruction
6414*75f6d617Schristos (charset[l+m+n+2o+k+p']) equals p[l+m+n+2*o+p'],
6415*75f6d617Schristos where l=length of char_classes, m=length of collating_symbol,
6416*75f6d617Schristos n=equivalence_class, o=length of char_range,
6417*75f6d617Schristos p'=length of character. */
6418*75f6d617Schristos workp = p;
6419*75f6d617Schristos /* Update p to indicate the next instruction. */
6420*75f6d617Schristos p += char_class_length + coll_symbol_length+ equiv_class_length +
6421*75f6d617Schristos 2*ranges_length + chars_length;
6422*75f6d617Schristos
6423*75f6d617Schristos /* match with char_class? */
6424*75f6d617Schristos for (i = 0; i < char_class_length ; i += CHAR_CLASS_SIZE)
6425*75f6d617Schristos {
6426*75f6d617Schristos wctype_t wctype;
6427*75f6d617Schristos uintptr_t alignedp = ((uintptr_t)workp
6428*75f6d617Schristos + __alignof__(wctype_t) - 1)
6429*75f6d617Schristos & ~(uintptr_t)(__alignof__(wctype_t) - 1);
6430*75f6d617Schristos wctype = *((wctype_t*)alignedp);
6431*75f6d617Schristos workp += CHAR_CLASS_SIZE;
6432*75f6d617Schristos if (iswctype((wint_t)c, wctype))
6433*75f6d617Schristos goto char_set_matched;
6434*75f6d617Schristos }
6435*75f6d617Schristos
6436*75f6d617Schristos /* match with collating_symbol? */
6437*75f6d617Schristos # ifdef _LIBC
6438*75f6d617Schristos if (nrules != 0)
6439*75f6d617Schristos {
6440*75f6d617Schristos const unsigned char *extra = (const unsigned char *)
6441*75f6d617Schristos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
6442*75f6d617Schristos
6443*75f6d617Schristos for (workp2 = workp + coll_symbol_length ; workp < workp2 ;
6444*75f6d617Schristos workp++)
6445*75f6d617Schristos {
6446*75f6d617Schristos int32_t *wextra;
6447*75f6d617Schristos wextra = (int32_t*)(extra + *workp++);
6448*75f6d617Schristos for (i = 0; i < *wextra; ++i)
6449*75f6d617Schristos if (TRANSLATE(d[i]) != wextra[1 + i])
6450*75f6d617Schristos break;
6451*75f6d617Schristos
6452*75f6d617Schristos if (i == *wextra)
6453*75f6d617Schristos {
6454*75f6d617Schristos /* Update d, however d will be incremented at
6455*75f6d617Schristos char_set_matched:, we decrement d here. */
6456*75f6d617Schristos d += i - 1;
6457*75f6d617Schristos goto char_set_matched;
6458*75f6d617Schristos }
6459*75f6d617Schristos }
6460*75f6d617Schristos }
6461*75f6d617Schristos else /* (nrules == 0) */
6462*75f6d617Schristos # endif
6463*75f6d617Schristos /* If we can't look up collation data, we use wcscoll
6464*75f6d617Schristos instead. */
6465*75f6d617Schristos {
6466*75f6d617Schristos for (workp2 = workp + coll_symbol_length ; workp < workp2 ;)
6467*75f6d617Schristos {
6468*75f6d617Schristos const CHAR_T *backup_d = d, *backup_dend = dend;
6469*75f6d617Schristos length = wcslen (workp);
6470*75f6d617Schristos
6471*75f6d617Schristos /* If wcscoll(the collating symbol, whole string) > 0,
6472*75f6d617Schristos any substring of the string never match with the
6473*75f6d617Schristos collating symbol. */
6474*75f6d617Schristos if (wcscoll (workp, d) > 0)
6475*75f6d617Schristos {
6476*75f6d617Schristos workp += length + 1;
6477*75f6d617Schristos continue;
6478*75f6d617Schristos }
6479*75f6d617Schristos
6480*75f6d617Schristos /* First, we compare the collating symbol with
6481*75f6d617Schristos the first character of the string.
6482*75f6d617Schristos If it don't match, we add the next character to
6483*75f6d617Schristos the compare buffer in turn. */
6484*75f6d617Schristos for (i = 0 ; i < WORK_BUFFER_SIZE-1 ; i++, d++)
6485*75f6d617Schristos {
6486*75f6d617Schristos int match;
6487*75f6d617Schristos if (d == dend)
6488*75f6d617Schristos {
6489*75f6d617Schristos if (dend == end_match_2)
6490*75f6d617Schristos break;
6491*75f6d617Schristos d = string2;
6492*75f6d617Schristos dend = end_match_2;
6493*75f6d617Schristos }
6494*75f6d617Schristos
6495*75f6d617Schristos /* add next character to the compare buffer. */
6496*75f6d617Schristos str_buf[i] = TRANSLATE(*d);
6497*75f6d617Schristos str_buf[i+1] = '\0';
6498*75f6d617Schristos
6499*75f6d617Schristos match = wcscoll (workp, str_buf);
6500*75f6d617Schristos if (match == 0)
6501*75f6d617Schristos goto char_set_matched;
6502*75f6d617Schristos
6503*75f6d617Schristos if (match < 0)
6504*75f6d617Schristos /* (str_buf > workp) indicate (str_buf + X > workp),
6505*75f6d617Schristos because for all X (str_buf + X > str_buf).
6506*75f6d617Schristos So we don't need continue this loop. */
6507*75f6d617Schristos break;
6508*75f6d617Schristos
6509*75f6d617Schristos /* Otherwise(str_buf < workp),
6510*75f6d617Schristos (str_buf+next_character) may equals (workp).
6511*75f6d617Schristos So we continue this loop. */
6512*75f6d617Schristos }
6513*75f6d617Schristos /* not matched */
6514*75f6d617Schristos d = backup_d;
6515*75f6d617Schristos dend = backup_dend;
6516*75f6d617Schristos workp += length + 1;
6517*75f6d617Schristos }
6518*75f6d617Schristos }
6519*75f6d617Schristos /* match with equivalence_class? */
6520*75f6d617Schristos # ifdef _LIBC
6521*75f6d617Schristos if (nrules != 0)
6522*75f6d617Schristos {
6523*75f6d617Schristos const CHAR_T *backup_d = d, *backup_dend = dend;
6524*75f6d617Schristos /* Try to match the equivalence class against
6525*75f6d617Schristos those known to the collate implementation. */
6526*75f6d617Schristos const int32_t *table;
6527*75f6d617Schristos const int32_t *weights;
6528*75f6d617Schristos const int32_t *extra;
6529*75f6d617Schristos const int32_t *indirect;
6530*75f6d617Schristos int32_t idx, idx2;
6531*75f6d617Schristos wint_t *cp;
6532*75f6d617Schristos size_t len;
6533*75f6d617Schristos
6534*75f6d617Schristos /* This #include defines a local function! */
6535*75f6d617Schristos # include <locale/weightwc.h>
6536*75f6d617Schristos
6537*75f6d617Schristos table = (const int32_t *)
6538*75f6d617Schristos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
6539*75f6d617Schristos weights = (const wint_t *)
6540*75f6d617Schristos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
6541*75f6d617Schristos extra = (const wint_t *)
6542*75f6d617Schristos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
6543*75f6d617Schristos indirect = (const int32_t *)
6544*75f6d617Schristos _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
6545*75f6d617Schristos
6546*75f6d617Schristos /* Write 1 collating element to str_buf, and
6547*75f6d617Schristos get its index. */
6548*75f6d617Schristos idx2 = 0;
6549*75f6d617Schristos
6550*75f6d617Schristos for (i = 0 ; idx2 == 0 && i < WORK_BUFFER_SIZE - 1; i++)
6551*75f6d617Schristos {
6552*75f6d617Schristos cp = (wint_t*)str_buf;
6553*75f6d617Schristos if (d == dend)
6554*75f6d617Schristos {
6555*75f6d617Schristos if (dend == end_match_2)
6556*75f6d617Schristos break;
6557*75f6d617Schristos d = string2;
6558*75f6d617Schristos dend = end_match_2;
6559*75f6d617Schristos }
6560*75f6d617Schristos str_buf[i] = TRANSLATE(*(d+i));
6561*75f6d617Schristos str_buf[i+1] = '\0'; /* sentinel */
6562*75f6d617Schristos idx2 = findidx ((const wint_t**)&cp);
6563*75f6d617Schristos }
6564*75f6d617Schristos
6565*75f6d617Schristos /* Update d, however d will be incremented at
6566*75f6d617Schristos char_set_matched:, we decrement d here. */
6567*75f6d617Schristos d = backup_d + ((wchar_t*)cp - (wchar_t*)str_buf - 1);
6568*75f6d617Schristos if (d >= dend)
6569*75f6d617Schristos {
6570*75f6d617Schristos if (dend == end_match_2)
6571*75f6d617Schristos d = dend;
6572*75f6d617Schristos else
6573*75f6d617Schristos {
6574*75f6d617Schristos d = string2;
6575*75f6d617Schristos dend = end_match_2;
6576*75f6d617Schristos }
6577*75f6d617Schristos }
6578*75f6d617Schristos
6579*75f6d617Schristos len = weights[idx2];
6580*75f6d617Schristos
6581*75f6d617Schristos for (workp2 = workp + equiv_class_length ; workp < workp2 ;
6582*75f6d617Schristos workp++)
6583*75f6d617Schristos {
6584*75f6d617Schristos idx = (int32_t)*workp;
6585*75f6d617Schristos /* We already checked idx != 0 in regex_compile. */
6586*75f6d617Schristos
6587*75f6d617Schristos if (idx2 != 0 && len == weights[idx])
6588*75f6d617Schristos {
6589*75f6d617Schristos int cnt = 0;
6590*75f6d617Schristos while (cnt < len && (weights[idx + 1 + cnt]
6591*75f6d617Schristos == weights[idx2 + 1 + cnt]))
6592*75f6d617Schristos ++cnt;
6593*75f6d617Schristos
6594*75f6d617Schristos if (cnt == len)
6595*75f6d617Schristos goto char_set_matched;
6596*75f6d617Schristos }
6597*75f6d617Schristos }
6598*75f6d617Schristos /* not matched */
6599*75f6d617Schristos d = backup_d;
6600*75f6d617Schristos dend = backup_dend;
6601*75f6d617Schristos }
6602*75f6d617Schristos else /* (nrules == 0) */
6603*75f6d617Schristos # endif
6604*75f6d617Schristos /* If we can't look up collation data, we use wcscoll
6605*75f6d617Schristos instead. */
6606*75f6d617Schristos {
6607*75f6d617Schristos for (workp2 = workp + equiv_class_length ; workp < workp2 ;)
6608*75f6d617Schristos {
6609*75f6d617Schristos const CHAR_T *backup_d = d, *backup_dend = dend;
6610*75f6d617Schristos length = wcslen (workp);
6611*75f6d617Schristos
6612*75f6d617Schristos /* If wcscoll(the collating symbol, whole string) > 0,
6613*75f6d617Schristos any substring of the string never match with the
6614*75f6d617Schristos collating symbol. */
6615*75f6d617Schristos if (wcscoll (workp, d) > 0)
6616*75f6d617Schristos {
6617*75f6d617Schristos workp += length + 1;
6618*75f6d617Schristos break;
6619*75f6d617Schristos }
6620*75f6d617Schristos
6621*75f6d617Schristos /* First, we compare the equivalence class with
6622*75f6d617Schristos the first character of the string.
6623*75f6d617Schristos If it don't match, we add the next character to
6624*75f6d617Schristos the compare buffer in turn. */
6625*75f6d617Schristos for (i = 0 ; i < WORK_BUFFER_SIZE - 1 ; i++, d++)
6626*75f6d617Schristos {
6627*75f6d617Schristos int match;
6628*75f6d617Schristos if (d == dend)
6629*75f6d617Schristos {
6630*75f6d617Schristos if (dend == end_match_2)
6631*75f6d617Schristos break;
6632*75f6d617Schristos d = string2;
6633*75f6d617Schristos dend = end_match_2;
6634*75f6d617Schristos }
6635*75f6d617Schristos
6636*75f6d617Schristos /* add next character to the compare buffer. */
6637*75f6d617Schristos str_buf[i] = TRANSLATE(*d);
6638*75f6d617Schristos str_buf[i+1] = '\0';
6639*75f6d617Schristos
6640*75f6d617Schristos match = wcscoll (workp, str_buf);
6641*75f6d617Schristos
6642*75f6d617Schristos if (match == 0)
6643*75f6d617Schristos goto char_set_matched;
6644*75f6d617Schristos
6645*75f6d617Schristos if (match < 0)
6646*75f6d617Schristos /* (str_buf > workp) indicate (str_buf + X > workp),
6647*75f6d617Schristos because for all X (str_buf + X > str_buf).
6648*75f6d617Schristos So we don't need continue this loop. */
6649*75f6d617Schristos break;
6650*75f6d617Schristos
6651*75f6d617Schristos /* Otherwise(str_buf < workp),
6652*75f6d617Schristos (str_buf+next_character) may equals (workp).
6653*75f6d617Schristos So we continue this loop. */
6654*75f6d617Schristos }
6655*75f6d617Schristos /* not matched */
6656*75f6d617Schristos d = backup_d;
6657*75f6d617Schristos dend = backup_dend;
6658*75f6d617Schristos workp += length + 1;
6659*75f6d617Schristos }
6660*75f6d617Schristos }
6661*75f6d617Schristos
6662*75f6d617Schristos /* match with char_range? */
6663*75f6d617Schristos # ifdef _LIBC
6664*75f6d617Schristos if (nrules != 0)
6665*75f6d617Schristos {
6666*75f6d617Schristos uint32_t collseqval;
6667*75f6d617Schristos const char *collseq = (const char *)
6668*75f6d617Schristos _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
6669*75f6d617Schristos
6670*75f6d617Schristos collseqval = collseq_table_lookup (collseq, c);
6671*75f6d617Schristos
6672*75f6d617Schristos for (; workp < p - chars_length ;)
6673*75f6d617Schristos {
6674*75f6d617Schristos uint32_t start_val, end_val;
6675*75f6d617Schristos
6676*75f6d617Schristos /* We already compute the collation sequence value
6677*75f6d617Schristos of the characters (or collating symbols). */
6678*75f6d617Schristos start_val = (uint32_t) *workp++; /* range_start */
6679*75f6d617Schristos end_val = (uint32_t) *workp++; /* range_end */
6680*75f6d617Schristos
6681*75f6d617Schristos if (start_val <= collseqval && collseqval <= end_val)
6682*75f6d617Schristos goto char_set_matched;
6683*75f6d617Schristos }
6684*75f6d617Schristos }
6685*75f6d617Schristos else
6686*75f6d617Schristos # endif
6687*75f6d617Schristos {
6688*75f6d617Schristos /* We set range_start_char at str_buf[0], range_end_char
6689*75f6d617Schristos at str_buf[4], and compared char at str_buf[2]. */
6690*75f6d617Schristos str_buf[1] = 0;
6691*75f6d617Schristos str_buf[2] = c;
6692*75f6d617Schristos str_buf[3] = 0;
6693*75f6d617Schristos str_buf[5] = 0;
6694*75f6d617Schristos for (; workp < p - chars_length ;)
6695*75f6d617Schristos {
6696*75f6d617Schristos wchar_t *range_start_char, *range_end_char;
6697*75f6d617Schristos
6698*75f6d617Schristos /* match if (range_start_char <= c <= range_end_char). */
6699*75f6d617Schristos
6700*75f6d617Schristos /* If range_start(or end) < 0, we assume -range_start(end)
6701*75f6d617Schristos is the offset of the collating symbol which is specified
6702*75f6d617Schristos as the character of the range start(end). */
6703*75f6d617Schristos
6704*75f6d617Schristos /* range_start */
6705*75f6d617Schristos if (*workp < 0)
6706*75f6d617Schristos range_start_char = charset_top - (*workp++);
6707*75f6d617Schristos else
6708*75f6d617Schristos {
6709*75f6d617Schristos str_buf[0] = *workp++;
6710*75f6d617Schristos range_start_char = str_buf;
6711*75f6d617Schristos }
6712*75f6d617Schristos
6713*75f6d617Schristos /* range_end */
6714*75f6d617Schristos if (*workp < 0)
6715*75f6d617Schristos range_end_char = charset_top - (*workp++);
6716*75f6d617Schristos else
6717*75f6d617Schristos {
6718*75f6d617Schristos str_buf[4] = *workp++;
6719*75f6d617Schristos range_end_char = str_buf + 4;
6720*75f6d617Schristos }
6721*75f6d617Schristos
6722*75f6d617Schristos if (wcscoll (range_start_char, str_buf+2) <= 0
6723*75f6d617Schristos && wcscoll (str_buf+2, range_end_char) <= 0)
6724*75f6d617Schristos goto char_set_matched;
6725*75f6d617Schristos }
6726*75f6d617Schristos }
6727*75f6d617Schristos
6728*75f6d617Schristos /* match with char? */
6729*75f6d617Schristos for (; workp < p ; workp++)
6730*75f6d617Schristos if (c == *workp)
6731*75f6d617Schristos goto char_set_matched;
6732*75f6d617Schristos
6733*75f6d617Schristos not = !not;
6734*75f6d617Schristos
6735*75f6d617Schristos char_set_matched:
6736*75f6d617Schristos if (not) goto fail;
6737*75f6d617Schristos #else
6738*75f6d617Schristos /* Cast to `unsigned' instead of `unsigned char' in case the
6739*75f6d617Schristos bit list is a full 32 bytes long. */
6740*75f6d617Schristos if (c < (unsigned) (*p * BYTEWIDTH)
6741*75f6d617Schristos && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
6742*75f6d617Schristos not = !not;
6743*75f6d617Schristos
6744*75f6d617Schristos p += 1 + *p;
6745*75f6d617Schristos
6746*75f6d617Schristos if (!not) goto fail;
6747*75f6d617Schristos #undef WORK_BUFFER_SIZE
6748*75f6d617Schristos #endif /* WCHAR */
6749*75f6d617Schristos SET_REGS_MATCHED ();
6750*75f6d617Schristos d++;
6751*75f6d617Schristos NEXT;
6752*75f6d617Schristos }
6753*75f6d617Schristos
6754*75f6d617Schristos
6755*75f6d617Schristos /* The beginning of a group is represented by start_memory.
6756*75f6d617Schristos The arguments are the register number in the next byte, and the
6757*75f6d617Schristos number of groups inner to this one in the next. The text
6758*75f6d617Schristos matched within the group is recorded (in the internal
6759*75f6d617Schristos registers data structure) under the register number. */
6760*75f6d617Schristos CASE (start_memory):
6761*75f6d617Schristos DEBUG_PRINT3 ("EXECUTING start_memory %ld (%ld):\n",
6762*75f6d617Schristos (long int) *p, (long int) p[1]);
6763*75f6d617Schristos
6764*75f6d617Schristos /* Find out if this group can match the empty string. */
6765*75f6d617Schristos p1 = p; /* To send to group_match_null_string_p. */
6766*75f6d617Schristos
6767*75f6d617Schristos if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
6768*75f6d617Schristos REG_MATCH_NULL_STRING_P (reg_info[*p])
6769*75f6d617Schristos = PREFIX(group_match_null_string_p) (&p1, pend, reg_info);
6770*75f6d617Schristos
6771*75f6d617Schristos /* Save the position in the string where we were the last time
6772*75f6d617Schristos we were at this open-group operator in case the group is
6773*75f6d617Schristos operated upon by a repetition operator, e.g., with `(a*)*b'
6774*75f6d617Schristos against `ab'; then we want to ignore where we are now in
6775*75f6d617Schristos the string in case this attempt to match fails. */
6776*75f6d617Schristos old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
6777*75f6d617Schristos ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
6778*75f6d617Schristos : regstart[*p];
6779*75f6d617Schristos DEBUG_PRINT2 (" old_regstart: %d\n",
6780*75f6d617Schristos POINTER_TO_OFFSET (old_regstart[*p]));
6781*75f6d617Schristos
6782*75f6d617Schristos regstart[*p] = d;
6783*75f6d617Schristos DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
6784*75f6d617Schristos
6785*75f6d617Schristos IS_ACTIVE (reg_info[*p]) = 1;
6786*75f6d617Schristos MATCHED_SOMETHING (reg_info[*p]) = 0;
6787*75f6d617Schristos
6788*75f6d617Schristos /* Clear this whenever we change the register activity status. */
6789*75f6d617Schristos set_regs_matched_done = 0;
6790*75f6d617Schristos
6791*75f6d617Schristos /* This is the new highest active register. */
6792*75f6d617Schristos highest_active_reg = *p;
6793*75f6d617Schristos
6794*75f6d617Schristos /* If nothing was active before, this is the new lowest active
6795*75f6d617Schristos register. */
6796*75f6d617Schristos if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
6797*75f6d617Schristos lowest_active_reg = *p;
6798*75f6d617Schristos
6799*75f6d617Schristos /* Move past the register number and inner group count. */
6800*75f6d617Schristos p += 2;
6801*75f6d617Schristos just_past_start_mem = p;
6802*75f6d617Schristos
6803*75f6d617Schristos NEXT;
6804*75f6d617Schristos
6805*75f6d617Schristos
6806*75f6d617Schristos /* The stop_memory opcode represents the end of a group. Its
6807*75f6d617Schristos arguments are the same as start_memory's: the register
6808*75f6d617Schristos number, and the number of inner groups. */
6809*75f6d617Schristos CASE (stop_memory):
6810*75f6d617Schristos DEBUG_PRINT3 ("EXECUTING stop_memory %ld (%ld):\n",
6811*75f6d617Schristos (long int) *p, (long int) p[1]);
6812*75f6d617Schristos
6813*75f6d617Schristos /* We need to save the string position the last time we were at
6814*75f6d617Schristos this close-group operator in case the group is operated
6815*75f6d617Schristos upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
6816*75f6d617Schristos against `aba'; then we want to ignore where we are now in
6817*75f6d617Schristos the string in case this attempt to match fails. */
6818*75f6d617Schristos old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
6819*75f6d617Schristos ? REG_UNSET (regend[*p]) ? d : regend[*p]
6820*75f6d617Schristos : regend[*p];
6821*75f6d617Schristos DEBUG_PRINT2 (" old_regend: %d\n",
6822*75f6d617Schristos POINTER_TO_OFFSET (old_regend[*p]));
6823*75f6d617Schristos
6824*75f6d617Schristos regend[*p] = d;
6825*75f6d617Schristos DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
6826*75f6d617Schristos
6827*75f6d617Schristos /* This register isn't active anymore. */
6828*75f6d617Schristos IS_ACTIVE (reg_info[*p]) = 0;
6829*75f6d617Schristos
6830*75f6d617Schristos /* Clear this whenever we change the register activity status. */
6831*75f6d617Schristos set_regs_matched_done = 0;
6832*75f6d617Schristos
6833*75f6d617Schristos /* If this was the only register active, nothing is active
6834*75f6d617Schristos anymore. */
6835*75f6d617Schristos if (lowest_active_reg == highest_active_reg)
6836*75f6d617Schristos {
6837*75f6d617Schristos lowest_active_reg = NO_LOWEST_ACTIVE_REG;
6838*75f6d617Schristos highest_active_reg = NO_HIGHEST_ACTIVE_REG;
6839*75f6d617Schristos }
6840*75f6d617Schristos else
6841*75f6d617Schristos { /* We must scan for the new highest active register, since
6842*75f6d617Schristos it isn't necessarily one less than now: consider
6843*75f6d617Schristos (a(b)c(d(e)f)g). When group 3 ends, after the f), the
6844*75f6d617Schristos new highest active register is 1. */
6845*75f6d617Schristos UCHAR_T r = *p - 1;
6846*75f6d617Schristos while (r > 0 && !IS_ACTIVE (reg_info[r]))
6847*75f6d617Schristos r--;
6848*75f6d617Schristos
6849*75f6d617Schristos /* If we end up at register zero, that means that we saved
6850*75f6d617Schristos the registers as the result of an `on_failure_jump', not
6851*75f6d617Schristos a `start_memory', and we jumped to past the innermost
6852*75f6d617Schristos `stop_memory'. For example, in ((.)*) we save
6853*75f6d617Schristos registers 1 and 2 as a result of the *, but when we pop
6854*75f6d617Schristos back to the second ), we are at the stop_memory 1.
6855*75f6d617Schristos Thus, nothing is active. */
6856*75f6d617Schristos if (r == 0)
6857*75f6d617Schristos {
6858*75f6d617Schristos lowest_active_reg = NO_LOWEST_ACTIVE_REG;
6859*75f6d617Schristos highest_active_reg = NO_HIGHEST_ACTIVE_REG;
6860*75f6d617Schristos }
6861*75f6d617Schristos else
6862*75f6d617Schristos highest_active_reg = r;
6863*75f6d617Schristos }
6864*75f6d617Schristos
6865*75f6d617Schristos /* If just failed to match something this time around with a
6866*75f6d617Schristos group that's operated on by a repetition operator, try to
6867*75f6d617Schristos force exit from the ``loop'', and restore the register
6868*75f6d617Schristos information for this group that we had before trying this
6869*75f6d617Schristos last match. */
6870*75f6d617Schristos if ((!MATCHED_SOMETHING (reg_info[*p])
6871*75f6d617Schristos || just_past_start_mem == p - 1)
6872*75f6d617Schristos && (p + 2) < pend)
6873*75f6d617Schristos {
6874*75f6d617Schristos boolean is_a_jump_n = false;
6875*75f6d617Schristos
6876*75f6d617Schristos p1 = p + 2;
6877*75f6d617Schristos mcnt = 0;
6878*75f6d617Schristos switch ((re_opcode_t) *p1++)
6879*75f6d617Schristos {
6880*75f6d617Schristos case jump_n:
6881*75f6d617Schristos is_a_jump_n = true;
6882*75f6d617Schristos case pop_failure_jump:
6883*75f6d617Schristos case maybe_pop_jump:
6884*75f6d617Schristos case jump:
6885*75f6d617Schristos case dummy_failure_jump:
6886*75f6d617Schristos EXTRACT_NUMBER_AND_INCR (mcnt, p1);
6887*75f6d617Schristos if (is_a_jump_n)
6888*75f6d617Schristos p1 += OFFSET_ADDRESS_SIZE;
6889*75f6d617Schristos break;
6890*75f6d617Schristos
6891*75f6d617Schristos default:
6892*75f6d617Schristos /* do nothing */ ;
6893*75f6d617Schristos }
6894*75f6d617Schristos p1 += mcnt;
6895*75f6d617Schristos
6896*75f6d617Schristos /* If the next operation is a jump backwards in the pattern
6897*75f6d617Schristos to an on_failure_jump right before the start_memory
6898*75f6d617Schristos corresponding to this stop_memory, exit from the loop
6899*75f6d617Schristos by forcing a failure after pushing on the stack the
6900*75f6d617Schristos on_failure_jump's jump in the pattern, and d. */
6901*75f6d617Schristos if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
6902*75f6d617Schristos && (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == start_memory
6903*75f6d617Schristos && p1[2+OFFSET_ADDRESS_SIZE] == *p)
6904*75f6d617Schristos {
6905*75f6d617Schristos /* If this group ever matched anything, then restore
6906*75f6d617Schristos what its registers were before trying this last
6907*75f6d617Schristos failed match, e.g., with `(a*)*b' against `ab' for
6908*75f6d617Schristos regstart[1], and, e.g., with `((a*)*(b*)*)*'
6909*75f6d617Schristos against `aba' for regend[3].
6910*75f6d617Schristos
6911*75f6d617Schristos Also restore the registers for inner groups for,
6912*75f6d617Schristos e.g., `((a*)(b*))*' against `aba' (register 3 would
6913*75f6d617Schristos otherwise get trashed). */
6914*75f6d617Schristos
6915*75f6d617Schristos if (EVER_MATCHED_SOMETHING (reg_info[*p]))
6916*75f6d617Schristos {
6917*75f6d617Schristos unsigned r;
6918*75f6d617Schristos
6919*75f6d617Schristos EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
6920*75f6d617Schristos
6921*75f6d617Schristos /* Restore this and inner groups' (if any) registers. */
6922*75f6d617Schristos for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
6923*75f6d617Schristos r++)
6924*75f6d617Schristos {
6925*75f6d617Schristos regstart[r] = old_regstart[r];
6926*75f6d617Schristos
6927*75f6d617Schristos /* xx why this test? */
6928*75f6d617Schristos if (old_regend[r] >= regstart[r])
6929*75f6d617Schristos regend[r] = old_regend[r];
6930*75f6d617Schristos }
6931*75f6d617Schristos }
6932*75f6d617Schristos p1++;
6933*75f6d617Schristos EXTRACT_NUMBER_AND_INCR (mcnt, p1);
6934*75f6d617Schristos PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
6935*75f6d617Schristos
6936*75f6d617Schristos goto fail;
6937*75f6d617Schristos }
6938*75f6d617Schristos }
6939*75f6d617Schristos
6940*75f6d617Schristos /* Move past the register number and the inner group count. */
6941*75f6d617Schristos p += 2;
6942*75f6d617Schristos NEXT;
6943*75f6d617Schristos
6944*75f6d617Schristos
6945*75f6d617Schristos /* \<digit> has been turned into a `duplicate' command which is
6946*75f6d617Schristos followed by the numeric value of <digit> as the register number. */
6947*75f6d617Schristos CASE (duplicate):
6948*75f6d617Schristos {
6949*75f6d617Schristos register const CHAR_T *d2, *dend2;
6950*75f6d617Schristos int regno = *p++; /* Get which register to match against. */
6951*75f6d617Schristos DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
6952*75f6d617Schristos
6953*75f6d617Schristos /* Can't back reference a group which we've never matched. */
6954*75f6d617Schristos if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
6955*75f6d617Schristos goto fail;
6956*75f6d617Schristos
6957*75f6d617Schristos /* Where in input to try to start matching. */
6958*75f6d617Schristos d2 = regstart[regno];
6959*75f6d617Schristos
6960*75f6d617Schristos /* Where to stop matching; if both the place to start and
6961*75f6d617Schristos the place to stop matching are in the same string, then
6962*75f6d617Schristos set to the place to stop, otherwise, for now have to use
6963*75f6d617Schristos the end of the first string. */
6964*75f6d617Schristos
6965*75f6d617Schristos dend2 = ((FIRST_STRING_P (regstart[regno])
6966*75f6d617Schristos == FIRST_STRING_P (regend[regno]))
6967*75f6d617Schristos ? regend[regno] : end_match_1);
6968*75f6d617Schristos for (;;)
6969*75f6d617Schristos {
6970*75f6d617Schristos /* If necessary, advance to next segment in register
6971*75f6d617Schristos contents. */
6972*75f6d617Schristos while (d2 == dend2)
6973*75f6d617Schristos {
6974*75f6d617Schristos if (dend2 == end_match_2) break;
6975*75f6d617Schristos if (dend2 == regend[regno]) break;
6976*75f6d617Schristos
6977*75f6d617Schristos /* End of string1 => advance to string2. */
6978*75f6d617Schristos d2 = string2;
6979*75f6d617Schristos dend2 = regend[regno];
6980*75f6d617Schristos }
6981*75f6d617Schristos /* At end of register contents => success */
6982*75f6d617Schristos if (d2 == dend2) break;
6983*75f6d617Schristos
6984*75f6d617Schristos /* If necessary, advance to next segment in data. */
6985*75f6d617Schristos PREFETCH ();
6986*75f6d617Schristos
6987*75f6d617Schristos /* How many characters left in this segment to match. */
6988*75f6d617Schristos mcnt = dend - d;
6989*75f6d617Schristos
6990*75f6d617Schristos /* Want how many consecutive characters we can match in
6991*75f6d617Schristos one shot, so, if necessary, adjust the count. */
6992*75f6d617Schristos if (mcnt > dend2 - d2)
6993*75f6d617Schristos mcnt = dend2 - d2;
6994*75f6d617Schristos
6995*75f6d617Schristos /* Compare that many; failure if mismatch, else move
6996*75f6d617Schristos past them. */
6997*75f6d617Schristos if (translate
6998*75f6d617Schristos ? PREFIX(bcmp_translate) (d, d2, mcnt, translate)
6999*75f6d617Schristos : memcmp (d, d2, mcnt*sizeof(UCHAR_T)))
7000*75f6d617Schristos goto fail;
7001*75f6d617Schristos d += mcnt, d2 += mcnt;
7002*75f6d617Schristos
7003*75f6d617Schristos /* Do this because we've match some characters. */
7004*75f6d617Schristos SET_REGS_MATCHED ();
7005*75f6d617Schristos }
7006*75f6d617Schristos }
7007*75f6d617Schristos NEXT;
7008*75f6d617Schristos
7009*75f6d617Schristos
7010*75f6d617Schristos /* begline matches the empty string at the beginning of the string
7011*75f6d617Schristos (unless `not_bol' is set in `bufp'), and, if
7012*75f6d617Schristos `newline_anchor' is set, after newlines. */
7013*75f6d617Schristos CASE (begline):
7014*75f6d617Schristos DEBUG_PRINT1 ("EXECUTING begline.\n");
7015*75f6d617Schristos
7016*75f6d617Schristos if (AT_STRINGS_BEG (d))
7017*75f6d617Schristos {
7018*75f6d617Schristos if (!bufp->not_bol)
7019*75f6d617Schristos {
7020*75f6d617Schristos NEXT;
7021*75f6d617Schristos }
7022*75f6d617Schristos }
7023*75f6d617Schristos else if (d[-1] == '\n' && bufp->newline_anchor)
7024*75f6d617Schristos {
7025*75f6d617Schristos NEXT;
7026*75f6d617Schristos }
7027*75f6d617Schristos /* In all other cases, we fail. */
7028*75f6d617Schristos goto fail;
7029*75f6d617Schristos
7030*75f6d617Schristos
7031*75f6d617Schristos /* endline is the dual of begline. */
7032*75f6d617Schristos CASE (endline):
7033*75f6d617Schristos DEBUG_PRINT1 ("EXECUTING endline.\n");
7034*75f6d617Schristos
7035*75f6d617Schristos if (AT_STRINGS_END (d))
7036*75f6d617Schristos {
7037*75f6d617Schristos if (!bufp->not_eol)
7038*75f6d617Schristos {
7039*75f6d617Schristos NEXT;
7040*75f6d617Schristos }
7041*75f6d617Schristos }
7042*75f6d617Schristos
7043*75f6d617Schristos /* We have to ``prefetch'' the next character. */
7044*75f6d617Schristos else if ((d == end1 ? *string2 : *d) == '\n'
7045*75f6d617Schristos && bufp->newline_anchor)
7046*75f6d617Schristos {
7047*75f6d617Schristos NEXT;
7048*75f6d617Schristos }
7049*75f6d617Schristos goto fail;
7050*75f6d617Schristos
7051*75f6d617Schristos
7052*75f6d617Schristos /* Match at the very beginning of the data. */
7053*75f6d617Schristos CASE (begbuf):
7054*75f6d617Schristos DEBUG_PRINT1 ("EXECUTING begbuf.\n");
7055*75f6d617Schristos if (AT_STRINGS_BEG (d))
7056*75f6d617Schristos {
7057*75f6d617Schristos NEXT;
7058*75f6d617Schristos }
7059*75f6d617Schristos goto fail;
7060*75f6d617Schristos
7061*75f6d617Schristos
7062*75f6d617Schristos /* Match at the very end of the data. */
7063*75f6d617Schristos CASE (endbuf):
7064*75f6d617Schristos DEBUG_PRINT1 ("EXECUTING endbuf.\n");
7065*75f6d617Schristos if (AT_STRINGS_END (d))
7066*75f6d617Schristos {
7067*75f6d617Schristos NEXT;
7068*75f6d617Schristos }
7069*75f6d617Schristos goto fail;
7070*75f6d617Schristos
7071*75f6d617Schristos
7072*75f6d617Schristos /* on_failure_keep_string_jump is used to optimize `.*\n'. It
7073*75f6d617Schristos pushes NULL as the value for the string on the stack. Then
7074*75f6d617Schristos `pop_failure_point' will keep the current value for the
7075*75f6d617Schristos string, instead of restoring it. To see why, consider
7076*75f6d617Schristos matching `foo\nbar' against `.*\n'. The .* matches the foo;
7077*75f6d617Schristos then the . fails against the \n. But the next thing we want
7078*75f6d617Schristos to do is match the \n against the \n; if we restored the
7079*75f6d617Schristos string value, we would be back at the foo.
7080*75f6d617Schristos
7081*75f6d617Schristos Because this is used only in specific cases, we don't need to
7082*75f6d617Schristos check all the things that `on_failure_jump' does, to make
7083*75f6d617Schristos sure the right things get saved on the stack. Hence we don't
7084*75f6d617Schristos share its code. The only reason to push anything on the
7085*75f6d617Schristos stack at all is that otherwise we would have to change
7086*75f6d617Schristos `anychar's code to do something besides goto fail in this
7087*75f6d617Schristos case; that seems worse than this. */
7088*75f6d617Schristos CASE (on_failure_keep_string_jump):
7089*75f6d617Schristos DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
7090*75f6d617Schristos
7091*75f6d617Schristos EXTRACT_NUMBER_AND_INCR (mcnt, p);
7092*75f6d617Schristos #ifdef _LIBC
7093*75f6d617Schristos DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
7094*75f6d617Schristos #else
7095*75f6d617Schristos DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
7096*75f6d617Schristos #endif
7097*75f6d617Schristos
7098*75f6d617Schristos PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
7099*75f6d617Schristos NEXT;
7100*75f6d617Schristos
7101*75f6d617Schristos
7102*75f6d617Schristos /* Uses of on_failure_jump:
7103*75f6d617Schristos
7104*75f6d617Schristos Each alternative starts with an on_failure_jump that points
7105*75f6d617Schristos to the beginning of the next alternative. Each alternative
7106*75f6d617Schristos except the last ends with a jump that in effect jumps past
7107*75f6d617Schristos the rest of the alternatives. (They really jump to the
7108*75f6d617Schristos ending jump of the following alternative, because tensioning
7109*75f6d617Schristos these jumps is a hassle.)
7110*75f6d617Schristos
7111*75f6d617Schristos Repeats start with an on_failure_jump that points past both
7112*75f6d617Schristos the repetition text and either the following jump or
7113*75f6d617Schristos pop_failure_jump back to this on_failure_jump. */
7114*75f6d617Schristos CASE (on_failure_jump):
7115*75f6d617Schristos on_failure:
7116*75f6d617Schristos DEBUG_PRINT1 ("EXECUTING on_failure_jump");
7117*75f6d617Schristos
7118*75f6d617Schristos EXTRACT_NUMBER_AND_INCR (mcnt, p);
7119*75f6d617Schristos #ifdef _LIBC
7120*75f6d617Schristos DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
7121*75f6d617Schristos #else
7122*75f6d617Schristos DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
7123*75f6d617Schristos #endif
7124*75f6d617Schristos
7125*75f6d617Schristos /* If this on_failure_jump comes right before a group (i.e.,
7126*75f6d617Schristos the original * applied to a group), save the information
7127*75f6d617Schristos for that group and all inner ones, so that if we fail back
7128*75f6d617Schristos to this point, the group's information will be correct.
7129*75f6d617Schristos For example, in \(a*\)*\1, we need the preceding group,
7130*75f6d617Schristos and in \(zz\(a*\)b*\)\2, we need the inner group. */
7131*75f6d617Schristos
7132*75f6d617Schristos /* We can't use `p' to check ahead because we push
7133*75f6d617Schristos a failure point to `p + mcnt' after we do this. */
7134*75f6d617Schristos p1 = p;
7135*75f6d617Schristos
7136*75f6d617Schristos /* We need to skip no_op's before we look for the
7137*75f6d617Schristos start_memory in case this on_failure_jump is happening as
7138*75f6d617Schristos the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
7139*75f6d617Schristos against aba. */
7140*75f6d617Schristos while (p1 < pend && (re_opcode_t) *p1 == no_op)
7141*75f6d617Schristos p1++;
7142*75f6d617Schristos
7143*75f6d617Schristos if (p1 < pend && (re_opcode_t) *p1 == start_memory)
7144*75f6d617Schristos {
7145*75f6d617Schristos /* We have a new highest active register now. This will
7146*75f6d617Schristos get reset at the start_memory we are about to get to,
7147*75f6d617Schristos but we will have saved all the registers relevant to
7148*75f6d617Schristos this repetition op, as described above. */
7149*75f6d617Schristos highest_active_reg = *(p1 + 1) + *(p1 + 2);
7150*75f6d617Schristos if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
7151*75f6d617Schristos lowest_active_reg = *(p1 + 1);
7152*75f6d617Schristos }
7153*75f6d617Schristos
7154*75f6d617Schristos DEBUG_PRINT1 (":\n");
7155*75f6d617Schristos PUSH_FAILURE_POINT (p + mcnt, d, -2);
7156*75f6d617Schristos NEXT;
7157*75f6d617Schristos
7158*75f6d617Schristos
7159*75f6d617Schristos /* A smart repeat ends with `maybe_pop_jump'.
7160*75f6d617Schristos We change it to either `pop_failure_jump' or `jump'. */
7161*75f6d617Schristos CASE (maybe_pop_jump):
7162*75f6d617Schristos EXTRACT_NUMBER_AND_INCR (mcnt, p);
7163*75f6d617Schristos DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
7164*75f6d617Schristos {
7165*75f6d617Schristos register UCHAR_T *p2 = p;
7166*75f6d617Schristos
7167*75f6d617Schristos /* Compare the beginning of the repeat with what in the
7168*75f6d617Schristos pattern follows its end. If we can establish that there
7169*75f6d617Schristos is nothing that they would both match, i.e., that we
7170*75f6d617Schristos would have to backtrack because of (as in, e.g., `a*a')
7171*75f6d617Schristos then we can change to pop_failure_jump, because we'll
7172*75f6d617Schristos never have to backtrack.
7173*75f6d617Schristos
7174*75f6d617Schristos This is not true in the case of alternatives: in
7175*75f6d617Schristos `(a|ab)*' we do need to backtrack to the `ab' alternative
7176*75f6d617Schristos (e.g., if the string was `ab'). But instead of trying to
7177*75f6d617Schristos detect that here, the alternative has put on a dummy
7178*75f6d617Schristos failure point which is what we will end up popping. */
7179*75f6d617Schristos
7180*75f6d617Schristos /* Skip over open/close-group commands.
7181*75f6d617Schristos If what follows this loop is a ...+ construct,
7182*75f6d617Schristos look at what begins its body, since we will have to
7183*75f6d617Schristos match at least one of that. */
7184*75f6d617Schristos while (1)
7185*75f6d617Schristos {
7186*75f6d617Schristos if (p2 + 2 < pend
7187*75f6d617Schristos && ((re_opcode_t) *p2 == stop_memory
7188*75f6d617Schristos || (re_opcode_t) *p2 == start_memory))
7189*75f6d617Schristos p2 += 3;
7190*75f6d617Schristos else if (p2 + 2 + 2 * OFFSET_ADDRESS_SIZE < pend
7191*75f6d617Schristos && (re_opcode_t) *p2 == dummy_failure_jump)
7192*75f6d617Schristos p2 += 2 + 2 * OFFSET_ADDRESS_SIZE;
7193*75f6d617Schristos else
7194*75f6d617Schristos break;
7195*75f6d617Schristos }
7196*75f6d617Schristos
7197*75f6d617Schristos p1 = p + mcnt;
7198*75f6d617Schristos /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
7199*75f6d617Schristos to the `maybe_finalize_jump' of this case. Examine what
7200*75f6d617Schristos follows. */
7201*75f6d617Schristos
7202*75f6d617Schristos /* If we're at the end of the pattern, we can change. */
7203*75f6d617Schristos if (p2 == pend)
7204*75f6d617Schristos {
7205*75f6d617Schristos /* Consider what happens when matching ":\(.*\)"
7206*75f6d617Schristos against ":/". I don't really understand this code
7207*75f6d617Schristos yet. */
7208*75f6d617Schristos p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
7209*75f6d617Schristos pop_failure_jump;
7210*75f6d617Schristos DEBUG_PRINT1
7211*75f6d617Schristos (" End of pattern: change to `pop_failure_jump'.\n");
7212*75f6d617Schristos }
7213*75f6d617Schristos
7214*75f6d617Schristos else if ((re_opcode_t) *p2 == exactn
7215*75f6d617Schristos #ifdef MBS_SUPPORT
7216*75f6d617Schristos || (re_opcode_t) *p2 == exactn_bin
7217*75f6d617Schristos #endif
7218*75f6d617Schristos || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
7219*75f6d617Schristos {
7220*75f6d617Schristos register UCHAR_T c
7221*75f6d617Schristos = *p2 == (UCHAR_T) endline ? '\n' : p2[2];
7222*75f6d617Schristos
7223*75f6d617Schristos if (((re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn
7224*75f6d617Schristos #ifdef MBS_SUPPORT
7225*75f6d617Schristos || (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn_bin
7226*75f6d617Schristos #endif
7227*75f6d617Schristos ) && p1[3+OFFSET_ADDRESS_SIZE] != c)
7228*75f6d617Schristos {
7229*75f6d617Schristos p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
7230*75f6d617Schristos pop_failure_jump;
7231*75f6d617Schristos #ifdef WCHAR
7232*75f6d617Schristos DEBUG_PRINT3 (" %C != %C => pop_failure_jump.\n",
7233*75f6d617Schristos (wint_t) c,
7234*75f6d617Schristos (wint_t) p1[3+OFFSET_ADDRESS_SIZE]);
7235*75f6d617Schristos #else
7236*75f6d617Schristos DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
7237*75f6d617Schristos (char) c,
7238*75f6d617Schristos (char) p1[3+OFFSET_ADDRESS_SIZE]);
7239*75f6d617Schristos #endif
7240*75f6d617Schristos }
7241*75f6d617Schristos
7242*75f6d617Schristos #ifndef WCHAR
7243*75f6d617Schristos else if ((re_opcode_t) p1[3] == charset
7244*75f6d617Schristos || (re_opcode_t) p1[3] == charset_not)
7245*75f6d617Schristos {
7246*75f6d617Schristos int not = (re_opcode_t) p1[3] == charset_not;
7247*75f6d617Schristos
7248*75f6d617Schristos if (c < (unsigned) (p1[4] * BYTEWIDTH)
7249*75f6d617Schristos && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
7250*75f6d617Schristos not = !not;
7251*75f6d617Schristos
7252*75f6d617Schristos /* `not' is equal to 1 if c would match, which means
7253*75f6d617Schristos that we can't change to pop_failure_jump. */
7254*75f6d617Schristos if (!not)
7255*75f6d617Schristos {
7256*75f6d617Schristos p[-3] = (unsigned char) pop_failure_jump;
7257*75f6d617Schristos DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
7258*75f6d617Schristos }
7259*75f6d617Schristos }
7260*75f6d617Schristos #endif /* not WCHAR */
7261*75f6d617Schristos }
7262*75f6d617Schristos #ifndef WCHAR
7263*75f6d617Schristos else if ((re_opcode_t) *p2 == charset)
7264*75f6d617Schristos {
7265*75f6d617Schristos /* We win if the first character of the loop is not part
7266*75f6d617Schristos of the charset. */
7267*75f6d617Schristos if ((re_opcode_t) p1[3] == exactn
7268*75f6d617Schristos && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
7269*75f6d617Schristos && (p2[2 + p1[5] / BYTEWIDTH]
7270*75f6d617Schristos & (1 << (p1[5] % BYTEWIDTH)))))
7271*75f6d617Schristos {
7272*75f6d617Schristos p[-3] = (unsigned char) pop_failure_jump;
7273*75f6d617Schristos DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
7274*75f6d617Schristos }
7275*75f6d617Schristos
7276*75f6d617Schristos else if ((re_opcode_t) p1[3] == charset_not)
7277*75f6d617Schristos {
7278*75f6d617Schristos int idx;
7279*75f6d617Schristos /* We win if the charset_not inside the loop
7280*75f6d617Schristos lists every character listed in the charset after. */
7281*75f6d617Schristos for (idx = 0; idx < (int) p2[1]; idx++)
7282*75f6d617Schristos if (! (p2[2 + idx] == 0
7283*75f6d617Schristos || (idx < (int) p1[4]
7284*75f6d617Schristos && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
7285*75f6d617Schristos break;
7286*75f6d617Schristos
7287*75f6d617Schristos if (idx == p2[1])
7288*75f6d617Schristos {
7289*75f6d617Schristos p[-3] = (unsigned char) pop_failure_jump;
7290*75f6d617Schristos DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
7291*75f6d617Schristos }
7292*75f6d617Schristos }
7293*75f6d617Schristos else if ((re_opcode_t) p1[3] == charset)
7294*75f6d617Schristos {
7295*75f6d617Schristos int idx;
7296*75f6d617Schristos /* We win if the charset inside the loop
7297*75f6d617Schristos has no overlap with the one after the loop. */
7298*75f6d617Schristos for (idx = 0;
7299*75f6d617Schristos idx < (int) p2[1] && idx < (int) p1[4];
7300*75f6d617Schristos idx++)
7301*75f6d617Schristos if ((p2[2 + idx] & p1[5 + idx]) != 0)
7302*75f6d617Schristos break;
7303*75f6d617Schristos
7304*75f6d617Schristos if (idx == p2[1] || idx == p1[4])
7305*75f6d617Schristos {
7306*75f6d617Schristos p[-3] = (unsigned char) pop_failure_jump;
7307*75f6d617Schristos DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
7308*75f6d617Schristos }
7309*75f6d617Schristos }
7310*75f6d617Schristos }
7311*75f6d617Schristos #endif /* not WCHAR */
7312*75f6d617Schristos }
7313*75f6d617Schristos p -= OFFSET_ADDRESS_SIZE; /* Point at relative address again. */
7314*75f6d617Schristos if ((re_opcode_t) p[-1] != pop_failure_jump)
7315*75f6d617Schristos {
7316*75f6d617Schristos p[-1] = (UCHAR_T) jump;
7317*75f6d617Schristos DEBUG_PRINT1 (" Match => jump.\n");
7318*75f6d617Schristos goto unconditional_jump;
7319*75f6d617Schristos }
7320*75f6d617Schristos /* Note fall through. */
7321*75f6d617Schristos
7322*75f6d617Schristos
7323*75f6d617Schristos /* The end of a simple repeat has a pop_failure_jump back to
7324*75f6d617Schristos its matching on_failure_jump, where the latter will push a
7325*75f6d617Schristos failure point. The pop_failure_jump takes off failure
7326*75f6d617Schristos points put on by this pop_failure_jump's matching
7327*75f6d617Schristos on_failure_jump; we got through the pattern to here from the
7328*75f6d617Schristos matching on_failure_jump, so didn't fail. */
7329*75f6d617Schristos CASE (pop_failure_jump):
7330*75f6d617Schristos {
7331*75f6d617Schristos /* We need to pass separate storage for the lowest and
7332*75f6d617Schristos highest registers, even though we don't care about the
7333*75f6d617Schristos actual values. Otherwise, we will restore only one
7334*75f6d617Schristos register from the stack, since lowest will == highest in
7335*75f6d617Schristos `pop_failure_point'. */
7336*75f6d617Schristos active_reg_t dummy_low_reg, dummy_high_reg;
7337*75f6d617Schristos UCHAR_T *pdummy = NULL;
7338*75f6d617Schristos const CHAR_T *sdummy = NULL;
7339*75f6d617Schristos
7340*75f6d617Schristos DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
7341*75f6d617Schristos POP_FAILURE_POINT (sdummy, pdummy,
7342*75f6d617Schristos dummy_low_reg, dummy_high_reg,
7343*75f6d617Schristos reg_dummy, reg_dummy, reg_info_dummy);
7344*75f6d617Schristos }
7345*75f6d617Schristos /* Note fall through. */
7346*75f6d617Schristos
7347*75f6d617Schristos unconditional_jump:
7348*75f6d617Schristos #ifdef _LIBC
7349*75f6d617Schristos DEBUG_PRINT2 ("\n%p: ", p);
7350*75f6d617Schristos #else
7351*75f6d617Schristos DEBUG_PRINT2 ("\n0x%x: ", p);
7352*75f6d617Schristos #endif
7353*75f6d617Schristos /* Note fall through. */
7354*75f6d617Schristos
7355*75f6d617Schristos /* Unconditionally jump (without popping any failure points). */
7356*75f6d617Schristos CASE (jump):
7357*75f6d617Schristos EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
7358*75f6d617Schristos DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
7359*75f6d617Schristos p += mcnt; /* Do the jump. */
7360*75f6d617Schristos #ifdef _LIBC
7361*75f6d617Schristos DEBUG_PRINT2 ("(to %p).\n", p);
7362*75f6d617Schristos #else
7363*75f6d617Schristos DEBUG_PRINT2 ("(to 0x%x).\n", p);
7364*75f6d617Schristos #endif
7365*75f6d617Schristos NEXT;
7366*75f6d617Schristos
7367*75f6d617Schristos
7368*75f6d617Schristos /* We need this opcode so we can detect where alternatives end
7369*75f6d617Schristos in `group_match_null_string_p' et al. */
7370*75f6d617Schristos CASE (jump_past_alt):
7371*75f6d617Schristos DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
7372*75f6d617Schristos goto unconditional_jump;
7373*75f6d617Schristos
7374*75f6d617Schristos
7375*75f6d617Schristos /* Normally, the on_failure_jump pushes a failure point, which
7376*75f6d617Schristos then gets popped at pop_failure_jump. We will end up at
7377*75f6d617Schristos pop_failure_jump, also, and with a pattern of, say, `a+', we
7378*75f6d617Schristos are skipping over the on_failure_jump, so we have to push
7379*75f6d617Schristos something meaningless for pop_failure_jump to pop. */
7380*75f6d617Schristos CASE (dummy_failure_jump):
7381*75f6d617Schristos DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
7382*75f6d617Schristos /* It doesn't matter what we push for the string here. What
7383*75f6d617Schristos the code at `fail' tests is the value for the pattern. */
7384*75f6d617Schristos PUSH_FAILURE_POINT (NULL, NULL, -2);
7385*75f6d617Schristos goto unconditional_jump;
7386*75f6d617Schristos
7387*75f6d617Schristos
7388*75f6d617Schristos /* At the end of an alternative, we need to push a dummy failure
7389*75f6d617Schristos point in case we are followed by a `pop_failure_jump', because
7390*75f6d617Schristos we don't want the failure point for the alternative to be
7391*75f6d617Schristos popped. For example, matching `(a|ab)*' against `aab'
7392*75f6d617Schristos requires that we match the `ab' alternative. */
7393*75f6d617Schristos CASE (push_dummy_failure):
7394*75f6d617Schristos DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
7395*75f6d617Schristos /* See comments just above at `dummy_failure_jump' about the
7396*75f6d617Schristos two zeroes. */
7397*75f6d617Schristos PUSH_FAILURE_POINT (NULL, NULL, -2);
7398*75f6d617Schristos NEXT;
7399*75f6d617Schristos
7400*75f6d617Schristos /* Have to succeed matching what follows at least n times.
7401*75f6d617Schristos After that, handle like `on_failure_jump'. */
7402*75f6d617Schristos CASE (succeed_n):
7403*75f6d617Schristos EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
7404*75f6d617Schristos DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
7405*75f6d617Schristos
7406*75f6d617Schristos assert (mcnt >= 0);
7407*75f6d617Schristos /* Originally, this is how many times we HAVE to succeed. */
7408*75f6d617Schristos if (mcnt > 0)
7409*75f6d617Schristos {
7410*75f6d617Schristos mcnt--;
7411*75f6d617Schristos p += OFFSET_ADDRESS_SIZE;
7412*75f6d617Schristos STORE_NUMBER_AND_INCR (p, mcnt);
7413*75f6d617Schristos #ifdef _LIBC
7414*75f6d617Schristos DEBUG_PRINT3 (" Setting %p to %d.\n", p - OFFSET_ADDRESS_SIZE
7415*75f6d617Schristos , mcnt);
7416*75f6d617Schristos #else
7417*75f6d617Schristos DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p - OFFSET_ADDRESS_SIZE
7418*75f6d617Schristos , mcnt);
7419*75f6d617Schristos #endif
7420*75f6d617Schristos }
7421*75f6d617Schristos else if (mcnt == 0)
7422*75f6d617Schristos {
7423*75f6d617Schristos #ifdef _LIBC
7424*75f6d617Schristos DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n",
7425*75f6d617Schristos p + OFFSET_ADDRESS_SIZE);
7426*75f6d617Schristos #else
7427*75f6d617Schristos DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n",
7428*75f6d617Schristos p + OFFSET_ADDRESS_SIZE);
7429*75f6d617Schristos #endif /* _LIBC */
7430*75f6d617Schristos
7431*75f6d617Schristos #ifdef WCHAR
7432*75f6d617Schristos p[1] = (UCHAR_T) no_op;
7433*75f6d617Schristos #else
7434*75f6d617Schristos p[2] = (UCHAR_T) no_op;
7435*75f6d617Schristos p[3] = (UCHAR_T) no_op;
7436*75f6d617Schristos #endif /* WCHAR */
7437*75f6d617Schristos goto on_failure;
7438*75f6d617Schristos }
7439*75f6d617Schristos NEXT;
7440*75f6d617Schristos
7441*75f6d617Schristos CASE (jump_n):
7442*75f6d617Schristos EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
7443*75f6d617Schristos DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
7444*75f6d617Schristos
7445*75f6d617Schristos /* Originally, this is how many times we CAN jump. */
7446*75f6d617Schristos if (mcnt)
7447*75f6d617Schristos {
7448*75f6d617Schristos mcnt--;
7449*75f6d617Schristos STORE_NUMBER (p + OFFSET_ADDRESS_SIZE, mcnt);
7450*75f6d617Schristos
7451*75f6d617Schristos #ifdef _LIBC
7452*75f6d617Schristos DEBUG_PRINT3 (" Setting %p to %d.\n", p + OFFSET_ADDRESS_SIZE,
7453*75f6d617Schristos mcnt);
7454*75f6d617Schristos #else
7455*75f6d617Schristos DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p + OFFSET_ADDRESS_SIZE,
7456*75f6d617Schristos mcnt);
7457*75f6d617Schristos #endif /* _LIBC */
7458*75f6d617Schristos goto unconditional_jump;
7459*75f6d617Schristos }
7460*75f6d617Schristos /* If don't have to jump any more, skip over the rest of command. */
7461*75f6d617Schristos else
7462*75f6d617Schristos p += 2 * OFFSET_ADDRESS_SIZE;
7463*75f6d617Schristos NEXT;
7464*75f6d617Schristos
7465*75f6d617Schristos CASE (set_number_at):
7466*75f6d617Schristos {
7467*75f6d617Schristos DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
7468*75f6d617Schristos
7469*75f6d617Schristos EXTRACT_NUMBER_AND_INCR (mcnt, p);
7470*75f6d617Schristos p1 = p + mcnt;
7471*75f6d617Schristos EXTRACT_NUMBER_AND_INCR (mcnt, p);
7472*75f6d617Schristos #ifdef _LIBC
7473*75f6d617Schristos DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt);
7474*75f6d617Schristos #else
7475*75f6d617Schristos DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
7476*75f6d617Schristos #endif
7477*75f6d617Schristos STORE_NUMBER (p1, mcnt);
7478*75f6d617Schristos NEXT;
7479*75f6d617Schristos }
7480*75f6d617Schristos
7481*75f6d617Schristos #if 0
7482*75f6d617Schristos /* The DEC Alpha C compiler 3.x generates incorrect code for the
7483*75f6d617Schristos test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of
7484*75f6d617Schristos AT_WORD_BOUNDARY, so this code is disabled. Expanding the
7485*75f6d617Schristos macro and introducing temporary variables works around the bug. */
7486*75f6d617Schristos
7487*75f6d617Schristos CASE (wordbound):
7488*75f6d617Schristos DEBUG_PRINT1 ("EXECUTING wordbound.\n");
7489*75f6d617Schristos if (AT_WORD_BOUNDARY (d))
7490*75f6d617Schristos {
7491*75f6d617Schristos NEXT;
7492*75f6d617Schristos }
7493*75f6d617Schristos goto fail;
7494*75f6d617Schristos
7495*75f6d617Schristos CASE (notwordbound):
7496*75f6d617Schristos DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
7497*75f6d617Schristos if (AT_WORD_BOUNDARY (d))
7498*75f6d617Schristos goto fail;
7499*75f6d617Schristos NEXT;
7500*75f6d617Schristos #else
7501*75f6d617Schristos CASE (wordbound):
7502*75f6d617Schristos {
7503*75f6d617Schristos boolean prevchar, thischar;
7504*75f6d617Schristos
7505*75f6d617Schristos DEBUG_PRINT1 ("EXECUTING wordbound.\n");
7506*75f6d617Schristos if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
7507*75f6d617Schristos {
7508*75f6d617Schristos NEXT;
7509*75f6d617Schristos }
7510*75f6d617Schristos
7511*75f6d617Schristos prevchar = WORDCHAR_P (d - 1);
7512*75f6d617Schristos thischar = WORDCHAR_P (d);
7513*75f6d617Schristos if (prevchar != thischar)
7514*75f6d617Schristos {
7515*75f6d617Schristos NEXT;
7516*75f6d617Schristos }
7517*75f6d617Schristos goto fail;
7518*75f6d617Schristos }
7519*75f6d617Schristos
7520*75f6d617Schristos CASE (notwordbound):
7521*75f6d617Schristos {
7522*75f6d617Schristos boolean prevchar, thischar;
7523*75f6d617Schristos
7524*75f6d617Schristos DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
7525*75f6d617Schristos if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
7526*75f6d617Schristos goto fail;
7527*75f6d617Schristos
7528*75f6d617Schristos prevchar = WORDCHAR_P (d - 1);
7529*75f6d617Schristos thischar = WORDCHAR_P (d);
7530*75f6d617Schristos if (prevchar != thischar)
7531*75f6d617Schristos goto fail;
7532*75f6d617Schristos NEXT;
7533*75f6d617Schristos }
7534*75f6d617Schristos #endif
7535*75f6d617Schristos
7536*75f6d617Schristos CASE (wordbeg):
7537*75f6d617Schristos DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
7538*75f6d617Schristos if (!AT_STRINGS_END (d) && WORDCHAR_P (d)
7539*75f6d617Schristos && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
7540*75f6d617Schristos {
7541*75f6d617Schristos NEXT;
7542*75f6d617Schristos }
7543*75f6d617Schristos goto fail;
7544*75f6d617Schristos
7545*75f6d617Schristos CASE (wordend):
7546*75f6d617Schristos DEBUG_PRINT1 ("EXECUTING wordend.\n");
7547*75f6d617Schristos if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
7548*75f6d617Schristos && (AT_STRINGS_END (d) || !WORDCHAR_P (d)))
7549*75f6d617Schristos {
7550*75f6d617Schristos NEXT;
7551*75f6d617Schristos }
7552*75f6d617Schristos goto fail;
7553*75f6d617Schristos
7554*75f6d617Schristos #ifdef emacs
7555*75f6d617Schristos CASE (before_dot):
7556*75f6d617Schristos DEBUG_PRINT1 ("EXECUTING before_dot.\n");
7557*75f6d617Schristos if (PTR_CHAR_POS ((unsigned char *) d) >= point)
7558*75f6d617Schristos goto fail;
7559*75f6d617Schristos NEXT;
7560*75f6d617Schristos
7561*75f6d617Schristos CASE (at_dot):
7562*75f6d617Schristos DEBUG_PRINT1 ("EXECUTING at_dot.\n");
7563*75f6d617Schristos if (PTR_CHAR_POS ((unsigned char *) d) != point)
7564*75f6d617Schristos goto fail;
7565*75f6d617Schristos NEXT;
7566*75f6d617Schristos
7567*75f6d617Schristos CASE (after_dot):
7568*75f6d617Schristos DEBUG_PRINT1 ("EXECUTING after_dot.\n");
7569*75f6d617Schristos if (PTR_CHAR_POS ((unsigned char *) d) <= point)
7570*75f6d617Schristos goto fail;
7571*75f6d617Schristos NEXT;
7572*75f6d617Schristos
7573*75f6d617Schristos CASE (syntaxspec):
7574*75f6d617Schristos DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
7575*75f6d617Schristos mcnt = *p++;
7576*75f6d617Schristos goto matchsyntax;
7577*75f6d617Schristos
7578*75f6d617Schristos CASE (wordchar):
7579*75f6d617Schristos DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
7580*75f6d617Schristos mcnt = (int) Sword;
7581*75f6d617Schristos matchsyntax:
7582*75f6d617Schristos PREFETCH ();
7583*75f6d617Schristos /* Can't use *d++ here; SYNTAX may be an unsafe macro. */
7584*75f6d617Schristos d++;
7585*75f6d617Schristos if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
7586*75f6d617Schristos goto fail;
7587*75f6d617Schristos SET_REGS_MATCHED ();
7588*75f6d617Schristos NEXT;
7589*75f6d617Schristos
7590*75f6d617Schristos CASE (notsyntaxspec):
7591*75f6d617Schristos DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
7592*75f6d617Schristos mcnt = *p++;
7593*75f6d617Schristos goto matchnotsyntax;
7594*75f6d617Schristos
7595*75f6d617Schristos CASE (notwordchar):
7596*75f6d617Schristos DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
7597*75f6d617Schristos mcnt = (int) Sword;
7598*75f6d617Schristos matchnotsyntax:
7599*75f6d617Schristos PREFETCH ();
7600*75f6d617Schristos /* Can't use *d++ here; SYNTAX may be an unsafe macro. */
7601*75f6d617Schristos d++;
7602*75f6d617Schristos if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
7603*75f6d617Schristos goto fail;
7604*75f6d617Schristos SET_REGS_MATCHED ();
7605*75f6d617Schristos NEXT;
7606*75f6d617Schristos
7607*75f6d617Schristos #else /* not emacs */
7608*75f6d617Schristos CASE (wordchar):
7609*75f6d617Schristos DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
7610*75f6d617Schristos PREFETCH ();
7611*75f6d617Schristos if (!WORDCHAR_P (d))
7612*75f6d617Schristos goto fail;
7613*75f6d617Schristos SET_REGS_MATCHED ();
7614*75f6d617Schristos d++;
7615*75f6d617Schristos NEXT;
7616*75f6d617Schristos
7617*75f6d617Schristos CASE (notwordchar):
7618*75f6d617Schristos DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
7619*75f6d617Schristos PREFETCH ();
7620*75f6d617Schristos if (WORDCHAR_P (d))
7621*75f6d617Schristos goto fail;
7622*75f6d617Schristos SET_REGS_MATCHED ();
7623*75f6d617Schristos d++;
7624*75f6d617Schristos NEXT;
7625*75f6d617Schristos #endif /* not emacs */
7626*75f6d617Schristos
7627*75f6d617Schristos #ifndef __GNUC__
7628*75f6d617Schristos default:
7629*75f6d617Schristos abort ();
7630*75f6d617Schristos }
7631*75f6d617Schristos continue; /* Successfully executed one pattern command; keep going. */
7632*75f6d617Schristos #endif
7633*75f6d617Schristos
7634*75f6d617Schristos
7635*75f6d617Schristos /* We goto here if a matching operation fails. */
7636*75f6d617Schristos fail:
7637*75f6d617Schristos if (!FAIL_STACK_EMPTY ())
7638*75f6d617Schristos { /* A restart point is known. Restore to that state. */
7639*75f6d617Schristos DEBUG_PRINT1 ("\nFAIL:\n");
7640*75f6d617Schristos POP_FAILURE_POINT (d, p,
7641*75f6d617Schristos lowest_active_reg, highest_active_reg,
7642*75f6d617Schristos regstart, regend, reg_info);
7643*75f6d617Schristos
7644*75f6d617Schristos /* If this failure point is a dummy, try the next one. */
7645*75f6d617Schristos if (!p)
7646*75f6d617Schristos goto fail;
7647*75f6d617Schristos
7648*75f6d617Schristos /* If we failed to the end of the pattern, don't examine *p. */
7649*75f6d617Schristos assert (p <= pend);
7650*75f6d617Schristos if (p < pend)
7651*75f6d617Schristos {
7652*75f6d617Schristos boolean is_a_jump_n = false;
7653*75f6d617Schristos
7654*75f6d617Schristos /* If failed to a backwards jump that's part of a repetition
7655*75f6d617Schristos loop, need to pop this failure point and use the next one. */
7656*75f6d617Schristos switch ((re_opcode_t) *p)
7657*75f6d617Schristos {
7658*75f6d617Schristos case jump_n:
7659*75f6d617Schristos is_a_jump_n = true;
7660*75f6d617Schristos case maybe_pop_jump:
7661*75f6d617Schristos case pop_failure_jump:
7662*75f6d617Schristos case jump:
7663*75f6d617Schristos p1 = p + 1;
7664*75f6d617Schristos EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7665*75f6d617Schristos p1 += mcnt;
7666*75f6d617Schristos
7667*75f6d617Schristos if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
7668*75f6d617Schristos || (!is_a_jump_n
7669*75f6d617Schristos && (re_opcode_t) *p1 == on_failure_jump))
7670*75f6d617Schristos goto fail;
7671*75f6d617Schristos break;
7672*75f6d617Schristos default:
7673*75f6d617Schristos /* do nothing */ ;
7674*75f6d617Schristos }
7675*75f6d617Schristos }
7676*75f6d617Schristos
7677*75f6d617Schristos if (d >= string1 && d <= end1)
7678*75f6d617Schristos dend = end_match_1;
7679*75f6d617Schristos }
7680*75f6d617Schristos else
7681*75f6d617Schristos break; /* Matching at this starting point really fails. */
7682*75f6d617Schristos } /* for (;;) */
7683*75f6d617Schristos
7684*75f6d617Schristos if (best_regs_set)
7685*75f6d617Schristos goto restore_best_regs;
7686*75f6d617Schristos
7687*75f6d617Schristos FREE_VARIABLES ();
7688*75f6d617Schristos
7689*75f6d617Schristos return -1; /* Failure to match. */
7690*75f6d617Schristos } /* re_match_2 */
7691*75f6d617Schristos
7692*75f6d617Schristos /* Subroutine definitions for re_match_2. */
7693*75f6d617Schristos
7694*75f6d617Schristos
7695*75f6d617Schristos /* We are passed P pointing to a register number after a start_memory.
7696*75f6d617Schristos
7697*75f6d617Schristos Return true if the pattern up to the corresponding stop_memory can
7698*75f6d617Schristos match the empty string, and false otherwise.
7699*75f6d617Schristos
7700*75f6d617Schristos If we find the matching stop_memory, sets P to point to one past its number.
7701*75f6d617Schristos Otherwise, sets P to an undefined byte less than or equal to END.
7702*75f6d617Schristos
7703*75f6d617Schristos We don't handle duplicates properly (yet). */
7704*75f6d617Schristos
7705*75f6d617Schristos static boolean
7706*75f6d617Schristos PREFIX(group_match_null_string_p) (p, end, reg_info)
7707*75f6d617Schristos UCHAR_T **p, *end;
PREFIX(register_info_type)7708*75f6d617Schristos PREFIX(register_info_type) *reg_info;
7709*75f6d617Schristos {
7710*75f6d617Schristos int mcnt;
7711*75f6d617Schristos /* Point to after the args to the start_memory. */
7712*75f6d617Schristos UCHAR_T *p1 = *p + 2;
7713*75f6d617Schristos
7714*75f6d617Schristos while (p1 < end)
7715*75f6d617Schristos {
7716*75f6d617Schristos /* Skip over opcodes that can match nothing, and return true or
7717*75f6d617Schristos false, as appropriate, when we get to one that can't, or to the
7718*75f6d617Schristos matching stop_memory. */
7719*75f6d617Schristos
7720*75f6d617Schristos switch ((re_opcode_t) *p1)
7721*75f6d617Schristos {
7722*75f6d617Schristos /* Could be either a loop or a series of alternatives. */
7723*75f6d617Schristos case on_failure_jump:
7724*75f6d617Schristos p1++;
7725*75f6d617Schristos EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7726*75f6d617Schristos
7727*75f6d617Schristos /* If the next operation is not a jump backwards in the
7728*75f6d617Schristos pattern. */
7729*75f6d617Schristos
7730*75f6d617Schristos if (mcnt >= 0)
7731*75f6d617Schristos {
7732*75f6d617Schristos /* Go through the on_failure_jumps of the alternatives,
7733*75f6d617Schristos seeing if any of the alternatives cannot match nothing.
7734*75f6d617Schristos The last alternative starts with only a jump,
7735*75f6d617Schristos whereas the rest start with on_failure_jump and end
7736*75f6d617Schristos with a jump, e.g., here is the pattern for `a|b|c':
7737*75f6d617Schristos
7738*75f6d617Schristos /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
7739*75f6d617Schristos /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
7740*75f6d617Schristos /exactn/1/c
7741*75f6d617Schristos
7742*75f6d617Schristos So, we have to first go through the first (n-1)
7743*75f6d617Schristos alternatives and then deal with the last one separately. */
7744*75f6d617Schristos
7745*75f6d617Schristos
7746*75f6d617Schristos /* Deal with the first (n-1) alternatives, which start
7747*75f6d617Schristos with an on_failure_jump (see above) that jumps to right
7748*75f6d617Schristos past a jump_past_alt. */
7749*75f6d617Schristos
7750*75f6d617Schristos while ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] ==
7751*75f6d617Schristos jump_past_alt)
7752*75f6d617Schristos {
7753*75f6d617Schristos /* `mcnt' holds how many bytes long the alternative
7754*75f6d617Schristos is, including the ending `jump_past_alt' and
7755*75f6d617Schristos its number. */
7756*75f6d617Schristos
7757*75f6d617Schristos if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt -
7758*75f6d617Schristos (1 + OFFSET_ADDRESS_SIZE),
7759*75f6d617Schristos reg_info))
7760*75f6d617Schristos return false;
7761*75f6d617Schristos
7762*75f6d617Schristos /* Move to right after this alternative, including the
7763*75f6d617Schristos jump_past_alt. */
7764*75f6d617Schristos p1 += mcnt;
7765*75f6d617Schristos
7766*75f6d617Schristos /* Break if it's the beginning of an n-th alternative
7767*75f6d617Schristos that doesn't begin with an on_failure_jump. */
7768*75f6d617Schristos if ((re_opcode_t) *p1 != on_failure_jump)
7769*75f6d617Schristos break;
7770*75f6d617Schristos
7771*75f6d617Schristos /* Still have to check that it's not an n-th
7772*75f6d617Schristos alternative that starts with an on_failure_jump. */
7773*75f6d617Schristos p1++;
7774*75f6d617Schristos EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7775*75f6d617Schristos if ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] !=
7776*75f6d617Schristos jump_past_alt)
7777*75f6d617Schristos {
7778*75f6d617Schristos /* Get to the beginning of the n-th alternative. */
7779*75f6d617Schristos p1 -= 1 + OFFSET_ADDRESS_SIZE;
7780*75f6d617Schristos break;
7781*75f6d617Schristos }
7782*75f6d617Schristos }
7783*75f6d617Schristos
7784*75f6d617Schristos /* Deal with the last alternative: go back and get number
7785*75f6d617Schristos of the `jump_past_alt' just before it. `mcnt' contains
7786*75f6d617Schristos the length of the alternative. */
7787*75f6d617Schristos EXTRACT_NUMBER (mcnt, p1 - OFFSET_ADDRESS_SIZE);
7788*75f6d617Schristos
7789*75f6d617Schristos if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt, reg_info))
7790*75f6d617Schristos return false;
7791*75f6d617Schristos
7792*75f6d617Schristos p1 += mcnt; /* Get past the n-th alternative. */
7793*75f6d617Schristos } /* if mcnt > 0 */
7794*75f6d617Schristos break;
7795*75f6d617Schristos
7796*75f6d617Schristos
7797*75f6d617Schristos case stop_memory:
7798*75f6d617Schristos assert (p1[1] == **p);
7799*75f6d617Schristos *p = p1 + 2;
7800*75f6d617Schristos return true;
7801*75f6d617Schristos
7802*75f6d617Schristos
7803*75f6d617Schristos default:
7804*75f6d617Schristos if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
7805*75f6d617Schristos return false;
7806*75f6d617Schristos }
7807*75f6d617Schristos } /* while p1 < end */
7808*75f6d617Schristos
7809*75f6d617Schristos return false;
7810*75f6d617Schristos } /* group_match_null_string_p */
7811*75f6d617Schristos
7812*75f6d617Schristos
7813*75f6d617Schristos /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
7814*75f6d617Schristos It expects P to be the first byte of a single alternative and END one
7815*75f6d617Schristos byte past the last. The alternative can contain groups. */
7816*75f6d617Schristos
7817*75f6d617Schristos static boolean
7818*75f6d617Schristos PREFIX(alt_match_null_string_p) (p, end, reg_info)
7819*75f6d617Schristos UCHAR_T *p, *end;
PREFIX(register_info_type)7820*75f6d617Schristos PREFIX(register_info_type) *reg_info;
7821*75f6d617Schristos {
7822*75f6d617Schristos int mcnt;
7823*75f6d617Schristos UCHAR_T *p1 = p;
7824*75f6d617Schristos
7825*75f6d617Schristos while (p1 < end)
7826*75f6d617Schristos {
7827*75f6d617Schristos /* Skip over opcodes that can match nothing, and break when we get
7828*75f6d617Schristos to one that can't. */
7829*75f6d617Schristos
7830*75f6d617Schristos switch ((re_opcode_t) *p1)
7831*75f6d617Schristos {
7832*75f6d617Schristos /* It's a loop. */
7833*75f6d617Schristos case on_failure_jump:
7834*75f6d617Schristos p1++;
7835*75f6d617Schristos EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7836*75f6d617Schristos p1 += mcnt;
7837*75f6d617Schristos break;
7838*75f6d617Schristos
7839*75f6d617Schristos default:
7840*75f6d617Schristos if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
7841*75f6d617Schristos return false;
7842*75f6d617Schristos }
7843*75f6d617Schristos } /* while p1 < end */
7844*75f6d617Schristos
7845*75f6d617Schristos return true;
7846*75f6d617Schristos } /* alt_match_null_string_p */
7847*75f6d617Schristos
7848*75f6d617Schristos
7849*75f6d617Schristos /* Deals with the ops common to group_match_null_string_p and
7850*75f6d617Schristos alt_match_null_string_p.
7851*75f6d617Schristos
7852*75f6d617Schristos Sets P to one after the op and its arguments, if any. */
7853*75f6d617Schristos
7854*75f6d617Schristos static boolean
7855*75f6d617Schristos PREFIX(common_op_match_null_string_p) (p, end, reg_info)
7856*75f6d617Schristos UCHAR_T **p, *end;
PREFIX(register_info_type)7857*75f6d617Schristos PREFIX(register_info_type) *reg_info;
7858*75f6d617Schristos {
7859*75f6d617Schristos int mcnt;
7860*75f6d617Schristos boolean ret;
7861*75f6d617Schristos int reg_no;
7862*75f6d617Schristos UCHAR_T *p1 = *p;
7863*75f6d617Schristos
7864*75f6d617Schristos switch ((re_opcode_t) *p1++)
7865*75f6d617Schristos {
7866*75f6d617Schristos case no_op:
7867*75f6d617Schristos case begline:
7868*75f6d617Schristos case endline:
7869*75f6d617Schristos case begbuf:
7870*75f6d617Schristos case endbuf:
7871*75f6d617Schristos case wordbeg:
7872*75f6d617Schristos case wordend:
7873*75f6d617Schristos case wordbound:
7874*75f6d617Schristos case notwordbound:
7875*75f6d617Schristos #ifdef emacs
7876*75f6d617Schristos case before_dot:
7877*75f6d617Schristos case at_dot:
7878*75f6d617Schristos case after_dot:
7879*75f6d617Schristos #endif
7880*75f6d617Schristos break;
7881*75f6d617Schristos
7882*75f6d617Schristos case start_memory:
7883*75f6d617Schristos reg_no = *p1;
7884*75f6d617Schristos assert (reg_no > 0 && reg_no <= MAX_REGNUM);
7885*75f6d617Schristos ret = PREFIX(group_match_null_string_p) (&p1, end, reg_info);
7886*75f6d617Schristos
7887*75f6d617Schristos /* Have to set this here in case we're checking a group which
7888*75f6d617Schristos contains a group and a back reference to it. */
7889*75f6d617Schristos
7890*75f6d617Schristos if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
7891*75f6d617Schristos REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
7892*75f6d617Schristos
7893*75f6d617Schristos if (!ret)
7894*75f6d617Schristos return false;
7895*75f6d617Schristos break;
7896*75f6d617Schristos
7897*75f6d617Schristos /* If this is an optimized succeed_n for zero times, make the jump. */
7898*75f6d617Schristos case jump:
7899*75f6d617Schristos EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7900*75f6d617Schristos if (mcnt >= 0)
7901*75f6d617Schristos p1 += mcnt;
7902*75f6d617Schristos else
7903*75f6d617Schristos return false;
7904*75f6d617Schristos break;
7905*75f6d617Schristos
7906*75f6d617Schristos case succeed_n:
7907*75f6d617Schristos /* Get to the number of times to succeed. */
7908*75f6d617Schristos p1 += OFFSET_ADDRESS_SIZE;
7909*75f6d617Schristos EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7910*75f6d617Schristos
7911*75f6d617Schristos if (mcnt == 0)
7912*75f6d617Schristos {
7913*75f6d617Schristos p1 -= 2 * OFFSET_ADDRESS_SIZE;
7914*75f6d617Schristos EXTRACT_NUMBER_AND_INCR (mcnt, p1);
7915*75f6d617Schristos p1 += mcnt;
7916*75f6d617Schristos }
7917*75f6d617Schristos else
7918*75f6d617Schristos return false;
7919*75f6d617Schristos break;
7920*75f6d617Schristos
7921*75f6d617Schristos case duplicate:
7922*75f6d617Schristos if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
7923*75f6d617Schristos return false;
7924*75f6d617Schristos break;
7925*75f6d617Schristos
7926*75f6d617Schristos case set_number_at:
7927*75f6d617Schristos p1 += 2 * OFFSET_ADDRESS_SIZE;
7928*75f6d617Schristos
7929*75f6d617Schristos default:
7930*75f6d617Schristos /* All other opcodes mean we cannot match the empty string. */
7931*75f6d617Schristos return false;
7932*75f6d617Schristos }
7933*75f6d617Schristos
7934*75f6d617Schristos *p = p1;
7935*75f6d617Schristos return true;
7936*75f6d617Schristos } /* common_op_match_null_string_p */
7937*75f6d617Schristos
7938*75f6d617Schristos
7939*75f6d617Schristos /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
7940*75f6d617Schristos bytes; nonzero otherwise. */
7941*75f6d617Schristos
7942*75f6d617Schristos static int
7943*75f6d617Schristos PREFIX(bcmp_translate) (s1, s2, len, translate)
7944*75f6d617Schristos const CHAR_T *s1, *s2;
7945*75f6d617Schristos register int len;
7946*75f6d617Schristos RE_TRANSLATE_TYPE translate;
7947*75f6d617Schristos {
7948*75f6d617Schristos register const UCHAR_T *p1 = (const UCHAR_T *) s1;
7949*75f6d617Schristos register const UCHAR_T *p2 = (const UCHAR_T *) s2;
7950*75f6d617Schristos while (len)
7951*75f6d617Schristos {
7952*75f6d617Schristos #ifdef WCHAR
7953*75f6d617Schristos if (((*p1<=0xff)?translate[*p1++]:*p1++)
7954*75f6d617Schristos != ((*p2<=0xff)?translate[*p2++]:*p2++))
7955*75f6d617Schristos return 1;
7956*75f6d617Schristos #else /* BYTE */
7957*75f6d617Schristos if (translate[*p1++] != translate[*p2++]) return 1;
7958*75f6d617Schristos #endif /* WCHAR */
7959*75f6d617Schristos len--;
7960*75f6d617Schristos }
7961*75f6d617Schristos return 0;
7962*75f6d617Schristos }
7963*75f6d617Schristos
7964*75f6d617Schristos
7965*75f6d617Schristos #else /* not INSIDE_RECURSION */
7966*75f6d617Schristos
7967*75f6d617Schristos /* Entry points for GNU code. */
7968*75f6d617Schristos
7969*75f6d617Schristos /* re_compile_pattern is the GNU regular expression compiler: it
7970*75f6d617Schristos compiles PATTERN (of length SIZE) and puts the result in BUFP.
7971*75f6d617Schristos Returns 0 if the pattern was valid, otherwise an error string.
7972*75f6d617Schristos
7973*75f6d617Schristos Assumes the `allocated' (and perhaps `buffer') and `translate' fields
7974*75f6d617Schristos are set in BUFP on entry.
7975*75f6d617Schristos
7976*75f6d617Schristos We call regex_compile to do the actual compilation. */
7977*75f6d617Schristos
7978*75f6d617Schristos const char *
7979*75f6d617Schristos re_compile_pattern (pattern, length, bufp)
7980*75f6d617Schristos const char *pattern;
7981*75f6d617Schristos size_t length;
7982*75f6d617Schristos struct re_pattern_buffer *bufp;
7983*75f6d617Schristos {
7984*75f6d617Schristos reg_errcode_t ret;
7985*75f6d617Schristos
7986*75f6d617Schristos /* GNU code is written to assume at least RE_NREGS registers will be set
7987*75f6d617Schristos (and at least one extra will be -1). */
7988*75f6d617Schristos bufp->regs_allocated = REGS_UNALLOCATED;
7989*75f6d617Schristos
7990*75f6d617Schristos /* And GNU code determines whether or not to get register information
7991*75f6d617Schristos by passing null for the REGS argument to re_match, etc., not by
7992*75f6d617Schristos setting no_sub. */
7993*75f6d617Schristos bufp->no_sub = 0;
7994*75f6d617Schristos
7995*75f6d617Schristos /* Match anchors at newline. */
7996*75f6d617Schristos bufp->newline_anchor = 1;
7997*75f6d617Schristos
7998*75f6d617Schristos # ifdef MBS_SUPPORT
7999*75f6d617Schristos if (MB_CUR_MAX != 1)
8000*75f6d617Schristos ret = wcs_regex_compile (pattern, length, re_syntax_options, bufp);
8001*75f6d617Schristos else
8002*75f6d617Schristos # endif
8003*75f6d617Schristos ret = byte_regex_compile (pattern, length, re_syntax_options, bufp);
8004*75f6d617Schristos
8005*75f6d617Schristos if (!ret)
8006*75f6d617Schristos return NULL;
8007*75f6d617Schristos return gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
8008*75f6d617Schristos }
8009*75f6d617Schristos #ifdef _LIBC
8010*75f6d617Schristos weak_alias (__re_compile_pattern, re_compile_pattern)
8011*75f6d617Schristos #endif
8012*75f6d617Schristos
8013*75f6d617Schristos /* Entry points compatible with 4.2 BSD regex library. We don't define
8014*75f6d617Schristos them unless specifically requested. */
8015*75f6d617Schristos
8016*75f6d617Schristos #if defined _REGEX_RE_COMP || defined _LIBC
8017*75f6d617Schristos
8018*75f6d617Schristos /* BSD has one and only one pattern buffer. */
8019*75f6d617Schristos static struct re_pattern_buffer re_comp_buf;
8020*75f6d617Schristos
8021*75f6d617Schristos char *
8022*75f6d617Schristos #ifdef _LIBC
8023*75f6d617Schristos /* Make these definitions weak in libc, so POSIX programs can redefine
8024*75f6d617Schristos these names if they don't use our functions, and still use
8025*75f6d617Schristos regcomp/regexec below without link errors. */
8026*75f6d617Schristos weak_function
8027*75f6d617Schristos #endif
8028*75f6d617Schristos re_comp (s)
8029*75f6d617Schristos const char *s;
8030*75f6d617Schristos {
8031*75f6d617Schristos reg_errcode_t ret;
8032*75f6d617Schristos
8033*75f6d617Schristos if (!s)
8034*75f6d617Schristos {
8035*75f6d617Schristos if (!re_comp_buf.buffer)
8036*75f6d617Schristos return gettext ("No previous regular expression");
8037*75f6d617Schristos return 0;
8038*75f6d617Schristos }
8039*75f6d617Schristos
8040*75f6d617Schristos if (!re_comp_buf.buffer)
8041*75f6d617Schristos {
8042*75f6d617Schristos re_comp_buf.buffer = (unsigned char *) malloc (200);
8043*75f6d617Schristos if (re_comp_buf.buffer == NULL)
8044*75f6d617Schristos return (char *) gettext (re_error_msgid
8045*75f6d617Schristos + re_error_msgid_idx[(int) REG_ESPACE]);
8046*75f6d617Schristos re_comp_buf.allocated = 200;
8047*75f6d617Schristos
8048*75f6d617Schristos re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
8049*75f6d617Schristos if (re_comp_buf.fastmap == NULL)
8050*75f6d617Schristos return (char *) gettext (re_error_msgid
8051*75f6d617Schristos + re_error_msgid_idx[(int) REG_ESPACE]);
8052*75f6d617Schristos }
8053*75f6d617Schristos
8054*75f6d617Schristos /* Since `re_exec' always passes NULL for the `regs' argument, we
8055*75f6d617Schristos don't need to initialize the pattern buffer fields which affect it. */
8056*75f6d617Schristos
8057*75f6d617Schristos /* Match anchors at newlines. */
8058*75f6d617Schristos re_comp_buf.newline_anchor = 1;
8059*75f6d617Schristos
8060*75f6d617Schristos # ifdef MBS_SUPPORT
8061*75f6d617Schristos if (MB_CUR_MAX != 1)
8062*75f6d617Schristos ret = wcs_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
8063*75f6d617Schristos else
8064*75f6d617Schristos # endif
8065*75f6d617Schristos ret = byte_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
8066*75f6d617Schristos
8067*75f6d617Schristos if (!ret)
8068*75f6d617Schristos return NULL;
8069*75f6d617Schristos
8070*75f6d617Schristos /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
8071*75f6d617Schristos return (char *) gettext (re_error_msgid + re_error_msgid_idx[(int) ret]);
8072*75f6d617Schristos }
8073*75f6d617Schristos
8074*75f6d617Schristos
8075*75f6d617Schristos int
8076*75f6d617Schristos #ifdef _LIBC
8077*75f6d617Schristos weak_function
8078*75f6d617Schristos #endif
8079*75f6d617Schristos re_exec (s)
8080*75f6d617Schristos const char *s;
8081*75f6d617Schristos {
8082*75f6d617Schristos const int len = strlen (s);
8083*75f6d617Schristos return
8084*75f6d617Schristos 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
8085*75f6d617Schristos }
8086*75f6d617Schristos
8087*75f6d617Schristos #endif /* _REGEX_RE_COMP */
8088*75f6d617Schristos
8089*75f6d617Schristos /* POSIX.2 functions. Don't define these for Emacs. */
8090*75f6d617Schristos
8091*75f6d617Schristos #ifndef emacs
8092*75f6d617Schristos
8093*75f6d617Schristos /* regcomp takes a regular expression as a string and compiles it.
8094*75f6d617Schristos
8095*75f6d617Schristos PREG is a regex_t *. We do not expect any fields to be initialized,
8096*75f6d617Schristos since POSIX says we shouldn't. Thus, we set
8097*75f6d617Schristos
8098*75f6d617Schristos `buffer' to the compiled pattern;
8099*75f6d617Schristos `used' to the length of the compiled pattern;
8100*75f6d617Schristos `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
8101*75f6d617Schristos REG_EXTENDED bit in CFLAGS is set; otherwise, to
8102*75f6d617Schristos RE_SYNTAX_POSIX_BASIC;
8103*75f6d617Schristos `newline_anchor' to REG_NEWLINE being set in CFLAGS;
8104*75f6d617Schristos `fastmap' to an allocated space for the fastmap;
8105*75f6d617Schristos `fastmap_accurate' to zero;
8106*75f6d617Schristos `re_nsub' to the number of subexpressions in PATTERN.
8107*75f6d617Schristos
8108*75f6d617Schristos PATTERN is the address of the pattern string.
8109*75f6d617Schristos
8110*75f6d617Schristos CFLAGS is a series of bits which affect compilation.
8111*75f6d617Schristos
8112*75f6d617Schristos If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
8113*75f6d617Schristos use POSIX basic syntax.
8114*75f6d617Schristos
8115*75f6d617Schristos If REG_NEWLINE is set, then . and [^...] don't match newline.
8116*75f6d617Schristos Also, regexec will try a match beginning after every newline.
8117*75f6d617Schristos
8118*75f6d617Schristos If REG_ICASE is set, then we considers upper- and lowercase
8119*75f6d617Schristos versions of letters to be equivalent when matching.
8120*75f6d617Schristos
8121*75f6d617Schristos If REG_NOSUB is set, then when PREG is passed to regexec, that
8122*75f6d617Schristos routine will report only success or failure, and nothing about the
8123*75f6d617Schristos registers.
8124*75f6d617Schristos
8125*75f6d617Schristos It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
8126*75f6d617Schristos the return codes and their meanings.) */
8127*75f6d617Schristos
8128*75f6d617Schristos int
8129*75f6d617Schristos regcomp (preg, pattern, cflags)
8130*75f6d617Schristos regex_t *preg;
8131*75f6d617Schristos const char *pattern;
8132*75f6d617Schristos int cflags;
8133*75f6d617Schristos {
8134*75f6d617Schristos reg_errcode_t ret;
8135*75f6d617Schristos reg_syntax_t syntax
8136*75f6d617Schristos = (cflags & REG_EXTENDED) ?
8137*75f6d617Schristos RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
8138*75f6d617Schristos
8139*75f6d617Schristos /* regex_compile will allocate the space for the compiled pattern. */
8140*75f6d617Schristos preg->buffer = 0;
8141*75f6d617Schristos preg->allocated = 0;
8142*75f6d617Schristos preg->used = 0;
8143*75f6d617Schristos
8144*75f6d617Schristos /* Try to allocate space for the fastmap. */
8145*75f6d617Schristos preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
8146*75f6d617Schristos
8147*75f6d617Schristos if (cflags & REG_ICASE)
8148*75f6d617Schristos {
8149*75f6d617Schristos unsigned i;
8150*75f6d617Schristos
8151*75f6d617Schristos preg->translate
8152*75f6d617Schristos = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
8153*75f6d617Schristos * sizeof (*(RE_TRANSLATE_TYPE)0));
8154*75f6d617Schristos if (preg->translate == NULL)
8155*75f6d617Schristos return (int) REG_ESPACE;
8156*75f6d617Schristos
8157*75f6d617Schristos /* Map uppercase characters to corresponding lowercase ones. */
8158*75f6d617Schristos for (i = 0; i < CHAR_SET_SIZE; i++)
8159*75f6d617Schristos preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
8160*75f6d617Schristos }
8161*75f6d617Schristos else
8162*75f6d617Schristos preg->translate = NULL;
8163*75f6d617Schristos
8164*75f6d617Schristos /* If REG_NEWLINE is set, newlines are treated differently. */
8165*75f6d617Schristos if (cflags & REG_NEWLINE)
8166*75f6d617Schristos { /* REG_NEWLINE implies neither . nor [^...] match newline. */
8167*75f6d617Schristos syntax &= ~RE_DOT_NEWLINE;
8168*75f6d617Schristos syntax |= RE_HAT_LISTS_NOT_NEWLINE;
8169*75f6d617Schristos /* It also changes the matching behavior. */
8170*75f6d617Schristos preg->newline_anchor = 1;
8171*75f6d617Schristos }
8172*75f6d617Schristos else
8173*75f6d617Schristos preg->newline_anchor = 0;
8174*75f6d617Schristos
8175*75f6d617Schristos preg->no_sub = !!(cflags & REG_NOSUB);
8176*75f6d617Schristos
8177*75f6d617Schristos /* POSIX says a null character in the pattern terminates it, so we
8178*75f6d617Schristos can use strlen here in compiling the pattern. */
8179*75f6d617Schristos # ifdef MBS_SUPPORT
8180*75f6d617Schristos if (MB_CUR_MAX != 1)
8181*75f6d617Schristos ret = wcs_regex_compile (pattern, strlen (pattern), syntax, preg);
8182*75f6d617Schristos else
8183*75f6d617Schristos # endif
8184*75f6d617Schristos ret = byte_regex_compile (pattern, strlen (pattern), syntax, preg);
8185*75f6d617Schristos
8186*75f6d617Schristos /* POSIX doesn't distinguish between an unmatched open-group and an
8187*75f6d617Schristos unmatched close-group: both are REG_EPAREN. */
8188*75f6d617Schristos if (ret == REG_ERPAREN) ret = REG_EPAREN;
8189*75f6d617Schristos
8190*75f6d617Schristos if (ret == REG_NOERROR && preg->fastmap)
8191*75f6d617Schristos {
8192*75f6d617Schristos /* Compute the fastmap now, since regexec cannot modify the pattern
8193*75f6d617Schristos buffer. */
8194*75f6d617Schristos if (re_compile_fastmap (preg) == -2)
8195*75f6d617Schristos {
8196*75f6d617Schristos /* Some error occurred while computing the fastmap, just forget
8197*75f6d617Schristos about it. */
8198*75f6d617Schristos free (preg->fastmap);
8199*75f6d617Schristos preg->fastmap = NULL;
8200*75f6d617Schristos }
8201*75f6d617Schristos }
8202*75f6d617Schristos
8203*75f6d617Schristos return (int) ret;
8204*75f6d617Schristos }
8205*75f6d617Schristos #ifdef _LIBC
8206*75f6d617Schristos weak_alias (__regcomp, regcomp)
8207*75f6d617Schristos #endif
8208*75f6d617Schristos
8209*75f6d617Schristos
8210*75f6d617Schristos /* regexec searches for a given pattern, specified by PREG, in the
8211*75f6d617Schristos string STRING.
8212*75f6d617Schristos
8213*75f6d617Schristos If NMATCH is zero or REG_NOSUB was set in the cflags argument to
8214*75f6d617Schristos `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
8215*75f6d617Schristos least NMATCH elements, and we set them to the offsets of the
8216*75f6d617Schristos corresponding matched substrings.
8217*75f6d617Schristos
8218*75f6d617Schristos EFLAGS specifies `execution flags' which affect matching: if
8219*75f6d617Schristos REG_NOTBOL is set, then ^ does not match at the beginning of the
8220*75f6d617Schristos string; if REG_NOTEOL is set, then $ does not match at the end.
8221*75f6d617Schristos
8222*75f6d617Schristos We return 0 if we find a match and REG_NOMATCH if not. */
8223*75f6d617Schristos
8224*75f6d617Schristos int
8225*75f6d617Schristos regexec (preg, string, nmatch, pmatch, eflags)
8226*75f6d617Schristos const regex_t *preg;
8227*75f6d617Schristos const char *string;
8228*75f6d617Schristos size_t nmatch;
8229*75f6d617Schristos regmatch_t pmatch[];
8230*75f6d617Schristos int eflags;
8231*75f6d617Schristos {
8232*75f6d617Schristos int ret;
8233*75f6d617Schristos struct re_registers regs;
8234*75f6d617Schristos regex_t private_preg;
8235*75f6d617Schristos int len = strlen (string);
8236*75f6d617Schristos boolean want_reg_info = !preg->no_sub && nmatch > 0;
8237*75f6d617Schristos
8238*75f6d617Schristos private_preg = *preg;
8239*75f6d617Schristos
8240*75f6d617Schristos private_preg.not_bol = !!(eflags & REG_NOTBOL);
8241*75f6d617Schristos private_preg.not_eol = !!(eflags & REG_NOTEOL);
8242*75f6d617Schristos
8243*75f6d617Schristos /* The user has told us exactly how many registers to return
8244*75f6d617Schristos information about, via `nmatch'. We have to pass that on to the
8245*75f6d617Schristos matching routines. */
8246*75f6d617Schristos private_preg.regs_allocated = REGS_FIXED;
8247*75f6d617Schristos
8248*75f6d617Schristos if (want_reg_info)
8249*75f6d617Schristos {
8250*75f6d617Schristos regs.num_regs = nmatch;
8251*75f6d617Schristos regs.start = TALLOC (nmatch * 2, regoff_t);
8252*75f6d617Schristos if (regs.start == NULL)
8253*75f6d617Schristos return (int) REG_NOMATCH;
8254*75f6d617Schristos regs.end = regs.start + nmatch;
8255*75f6d617Schristos }
8256*75f6d617Schristos
8257*75f6d617Schristos /* Perform the searching operation. */
8258*75f6d617Schristos ret = re_search (&private_preg, string, len,
8259*75f6d617Schristos /* start: */ 0, /* range: */ len,
8260*75f6d617Schristos want_reg_info ? ®s : (struct re_registers *) 0);
8261*75f6d617Schristos
8262*75f6d617Schristos /* Copy the register information to the POSIX structure. */
8263*75f6d617Schristos if (want_reg_info)
8264*75f6d617Schristos {
8265*75f6d617Schristos if (ret >= 0)
8266*75f6d617Schristos {
8267*75f6d617Schristos unsigned r;
8268*75f6d617Schristos
8269*75f6d617Schristos for (r = 0; r < nmatch; r++)
8270*75f6d617Schristos {
8271*75f6d617Schristos pmatch[r].rm_so = regs.start[r];
8272*75f6d617Schristos pmatch[r].rm_eo = regs.end[r];
8273*75f6d617Schristos }
8274*75f6d617Schristos }
8275*75f6d617Schristos
8276*75f6d617Schristos /* If we needed the temporary register info, free the space now. */
8277*75f6d617Schristos free (regs.start);
8278*75f6d617Schristos }
8279*75f6d617Schristos
8280*75f6d617Schristos /* We want zero return to mean success, unlike `re_search'. */
8281*75f6d617Schristos return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
8282*75f6d617Schristos }
8283*75f6d617Schristos #ifdef _LIBC
8284*75f6d617Schristos weak_alias (__regexec, regexec)
8285*75f6d617Schristos #endif
8286*75f6d617Schristos
8287*75f6d617Schristos
8288*75f6d617Schristos /* Returns a message corresponding to an error code, ERRCODE, returned
8289*75f6d617Schristos from either regcomp or regexec. We don't use PREG here. */
8290*75f6d617Schristos
8291*75f6d617Schristos size_t
8292*75f6d617Schristos regerror (errcode, preg, errbuf, errbuf_size)
8293*75f6d617Schristos int errcode;
8294*75f6d617Schristos const regex_t *preg;
8295*75f6d617Schristos char *errbuf;
8296*75f6d617Schristos size_t errbuf_size;
8297*75f6d617Schristos {
8298*75f6d617Schristos const char *msg;
8299*75f6d617Schristos size_t msg_size;
8300*75f6d617Schristos
8301*75f6d617Schristos if (errcode < 0
8302*75f6d617Schristos || errcode >= (int) (sizeof (re_error_msgid_idx)
8303*75f6d617Schristos / sizeof (re_error_msgid_idx[0])))
8304*75f6d617Schristos /* Only error codes returned by the rest of the code should be passed
8305*75f6d617Schristos to this routine. If we are given anything else, or if other regex
8306*75f6d617Schristos code generates an invalid error code, then the program has a bug.
8307*75f6d617Schristos Dump core so we can fix it. */
8308*75f6d617Schristos abort ();
8309*75f6d617Schristos
8310*75f6d617Schristos msg = gettext (re_error_msgid + re_error_msgid_idx[errcode]);
8311*75f6d617Schristos
8312*75f6d617Schristos msg_size = strlen (msg) + 1; /* Includes the null. */
8313*75f6d617Schristos
8314*75f6d617Schristos if (errbuf_size != 0)
8315*75f6d617Schristos {
8316*75f6d617Schristos if (msg_size > errbuf_size)
8317*75f6d617Schristos {
8318*75f6d617Schristos #if defined HAVE_MEMPCPY || defined _LIBC
8319*75f6d617Schristos *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
8320*75f6d617Schristos #else
8321*75f6d617Schristos memcpy (errbuf, msg, errbuf_size - 1);
8322*75f6d617Schristos errbuf[errbuf_size - 1] = 0;
8323*75f6d617Schristos #endif
8324*75f6d617Schristos }
8325*75f6d617Schristos else
8326*75f6d617Schristos memcpy (errbuf, msg, msg_size);
8327*75f6d617Schristos }
8328*75f6d617Schristos
8329*75f6d617Schristos return msg_size;
8330*75f6d617Schristos }
8331*75f6d617Schristos #ifdef _LIBC
8332*75f6d617Schristos weak_alias (__regerror, regerror)
8333*75f6d617Schristos #endif
8334*75f6d617Schristos
8335*75f6d617Schristos
8336*75f6d617Schristos /* Free dynamically allocated space used by PREG. */
8337*75f6d617Schristos
8338*75f6d617Schristos void
8339*75f6d617Schristos regfree (preg)
8340*75f6d617Schristos regex_t *preg;
8341*75f6d617Schristos {
8342*75f6d617Schristos if (preg->buffer != NULL)
8343*75f6d617Schristos free (preg->buffer);
8344*75f6d617Schristos preg->buffer = NULL;
8345*75f6d617Schristos
8346*75f6d617Schristos preg->allocated = 0;
8347*75f6d617Schristos preg->used = 0;
8348*75f6d617Schristos
8349*75f6d617Schristos if (preg->fastmap != NULL)
8350*75f6d617Schristos free (preg->fastmap);
8351*75f6d617Schristos preg->fastmap = NULL;
8352*75f6d617Schristos preg->fastmap_accurate = 0;
8353*75f6d617Schristos
8354*75f6d617Schristos if (preg->translate != NULL)
8355*75f6d617Schristos free (preg->translate);
8356*75f6d617Schristos preg->translate = NULL;
8357*75f6d617Schristos }
8358*75f6d617Schristos #ifdef _LIBC
8359*75f6d617Schristos weak_alias (__regfree, regfree)
8360*75f6d617Schristos #endif
8361*75f6d617Schristos
8362*75f6d617Schristos #endif /* not emacs */
8363*75f6d617Schristos
8364*75f6d617Schristos #endif /* not INSIDE_RECURSION */
8365*75f6d617Schristos
8366*75f6d617Schristos
8367*75f6d617Schristos #undef STORE_NUMBER
8368*75f6d617Schristos #undef STORE_NUMBER_AND_INCR
8369*75f6d617Schristos #undef EXTRACT_NUMBER
8370*75f6d617Schristos #undef EXTRACT_NUMBER_AND_INCR
8371*75f6d617Schristos
8372*75f6d617Schristos #undef DEBUG_PRINT_COMPILED_PATTERN
8373*75f6d617Schristos #undef DEBUG_PRINT_DOUBLE_STRING
8374*75f6d617Schristos
8375*75f6d617Schristos #undef INIT_FAIL_STACK
8376*75f6d617Schristos #undef RESET_FAIL_STACK
8377*75f6d617Schristos #undef DOUBLE_FAIL_STACK
8378*75f6d617Schristos #undef PUSH_PATTERN_OP
8379*75f6d617Schristos #undef PUSH_FAILURE_POINTER
8380*75f6d617Schristos #undef PUSH_FAILURE_INT
8381*75f6d617Schristos #undef PUSH_FAILURE_ELT
8382*75f6d617Schristos #undef POP_FAILURE_POINTER
8383*75f6d617Schristos #undef POP_FAILURE_INT
8384*75f6d617Schristos #undef POP_FAILURE_ELT
8385*75f6d617Schristos #undef DEBUG_PUSH
8386*75f6d617Schristos #undef DEBUG_POP
8387*75f6d617Schristos #undef PUSH_FAILURE_POINT
8388*75f6d617Schristos #undef POP_FAILURE_POINT
8389*75f6d617Schristos
8390*75f6d617Schristos #undef REG_UNSET_VALUE
8391*75f6d617Schristos #undef REG_UNSET
8392*75f6d617Schristos
8393*75f6d617Schristos #undef PATFETCH
8394*75f6d617Schristos #undef PATFETCH_RAW
8395*75f6d617Schristos #undef PATUNFETCH
8396*75f6d617Schristos #undef TRANSLATE
8397*75f6d617Schristos
8398*75f6d617Schristos #undef INIT_BUF_SIZE
8399*75f6d617Schristos #undef GET_BUFFER_SPACE
8400*75f6d617Schristos #undef BUF_PUSH
8401*75f6d617Schristos #undef BUF_PUSH_2
8402*75f6d617Schristos #undef BUF_PUSH_3
8403*75f6d617Schristos #undef STORE_JUMP
8404*75f6d617Schristos #undef STORE_JUMP2
8405*75f6d617Schristos #undef INSERT_JUMP
8406*75f6d617Schristos #undef INSERT_JUMP2
8407*75f6d617Schristos #undef EXTEND_BUFFER
8408*75f6d617Schristos #undef GET_UNSIGNED_NUMBER
8409*75f6d617Schristos #undef FREE_STACK_RETURN
8410*75f6d617Schristos
8411*75f6d617Schristos # undef POINTER_TO_OFFSET
8412*75f6d617Schristos # undef MATCHING_IN_FRST_STRING
8413*75f6d617Schristos # undef PREFETCH
8414*75f6d617Schristos # undef AT_STRINGS_BEG
8415*75f6d617Schristos # undef AT_STRINGS_END
8416*75f6d617Schristos # undef WORDCHAR_P
8417*75f6d617Schristos # undef FREE_VAR
8418*75f6d617Schristos # undef FREE_VARIABLES
8419*75f6d617Schristos # undef NO_HIGHEST_ACTIVE_REG
8420*75f6d617Schristos # undef NO_LOWEST_ACTIVE_REG
8421*75f6d617Schristos
8422*75f6d617Schristos # undef CHAR_T
8423*75f6d617Schristos # undef UCHAR_T
8424*75f6d617Schristos # undef COMPILED_BUFFER_VAR
8425*75f6d617Schristos # undef OFFSET_ADDRESS_SIZE
8426*75f6d617Schristos # undef CHAR_CLASS_SIZE
8427*75f6d617Schristos # undef PREFIX
8428*75f6d617Schristos # undef ARG_PREFIX
8429*75f6d617Schristos # undef PUT_CHAR
8430*75f6d617Schristos # undef BYTE
8431*75f6d617Schristos # undef WCHAR
8432*75f6d617Schristos
8433*75f6d617Schristos # define DEFINED_ONCE
8434