xref: /dflybsd-src/lib/libc/tre-regex/regex.3 (revision 6af9a77b394698e42f3a7ec6126497a3fc2fd470)
1*6af9a77bSJohn Marino.\" Copyright (c) 1992, 1993, 1994 Henry Spencer.
2*6af9a77bSJohn Marino.\" Copyright (c) 1992, 1993, 1994
3*6af9a77bSJohn Marino.\"	The Regents of the University of California.  All rights reserved.
4*6af9a77bSJohn Marino.\"
5*6af9a77bSJohn Marino.\" This code is derived from software contributed to Berkeley by
6*6af9a77bSJohn Marino.\" Henry Spencer.
7*6af9a77bSJohn Marino.\"
8*6af9a77bSJohn Marino.\" Redistribution and use in source and binary forms, with or without
9*6af9a77bSJohn Marino.\" modification, are permitted provided that the following conditions
10*6af9a77bSJohn Marino.\" are met:
11*6af9a77bSJohn Marino.\" 1. Redistributions of source code must retain the above copyright
12*6af9a77bSJohn Marino.\"    notice, this list of conditions and the following disclaimer.
13*6af9a77bSJohn Marino.\" 2. Redistributions in binary form must reproduce the above copyright
14*6af9a77bSJohn Marino.\"    notice, this list of conditions and the following disclaimer in the
15*6af9a77bSJohn Marino.\"    documentation and/or other materials provided with the distribution.
16*6af9a77bSJohn Marino.\" 3. Neither the name of the University nor the names of its contributors
17*6af9a77bSJohn Marino.\"    may be used to endorse or promote products derived from this software
18*6af9a77bSJohn Marino.\"    without specific prior written permission.
19*6af9a77bSJohn Marino.\"
20*6af9a77bSJohn Marino.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21*6af9a77bSJohn Marino.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22*6af9a77bSJohn Marino.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23*6af9a77bSJohn Marino.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24*6af9a77bSJohn Marino.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25*6af9a77bSJohn Marino.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26*6af9a77bSJohn Marino.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27*6af9a77bSJohn Marino.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28*6af9a77bSJohn Marino.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29*6af9a77bSJohn Marino.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30*6af9a77bSJohn Marino.\" SUCH DAMAGE.
31*6af9a77bSJohn Marino.\"
32*6af9a77bSJohn Marino.\"	@(#)regex.3	8.4 (Berkeley) 3/20/94
33*6af9a77bSJohn Marino.\" $FreeBSD: src/lib/libc/regex/regex.3,v 1.21 2007/01/09 00:28:04 imp Exp $
34*6af9a77bSJohn Marino.\"
35*6af9a77bSJohn Marino.Dd August 6, 2015
36*6af9a77bSJohn Marino.Dt REGEX 3
37*6af9a77bSJohn Marino.Os
38*6af9a77bSJohn Marino.Sh NAME
39*6af9a77bSJohn Marino.Nm regcomp ,
40*6af9a77bSJohn Marino.Nm regcomp_l ,
41*6af9a77bSJohn Marino.Nm regerror ,
42*6af9a77bSJohn Marino.Nm regexec ,
43*6af9a77bSJohn Marino.Nm regfree ,
44*6af9a77bSJohn Marino.Nm regncomp ,
45*6af9a77bSJohn Marino.Nm regncomp_l ,
46*6af9a77bSJohn Marino.Nm regnexec ,
47*6af9a77bSJohn Marino.Nm regnwcomp ,
48*6af9a77bSJohn Marino.Nm regnwcomp_l ,
49*6af9a77bSJohn Marino.Nm regnwexec ,
50*6af9a77bSJohn Marino.Nm regwcomp ,
51*6af9a77bSJohn Marino.Nm regwcomp_l ,
52*6af9a77bSJohn Marino.Nm regwexec
53*6af9a77bSJohn Marino.Nd regular-expression library
54*6af9a77bSJohn Marino.Sh SYNOPSIS
55*6af9a77bSJohn Marino.Sy (Standards-compliant APIs)
56*6af9a77bSJohn Marino.Pp
57*6af9a77bSJohn Marino.In regex.h
58*6af9a77bSJohn Marino.Ft int
59*6af9a77bSJohn Marino.Fo regcomp
60*6af9a77bSJohn Marino.Fa "regex_t *restrict preg"
61*6af9a77bSJohn Marino.Fa "const char *restrict pattern"
62*6af9a77bSJohn Marino.Fa "int cflags"
63*6af9a77bSJohn Marino.Fc
64*6af9a77bSJohn Marino.Ft size_t
65*6af9a77bSJohn Marino.Fo regerror
66*6af9a77bSJohn Marino.Fa "int errcode"
67*6af9a77bSJohn Marino.Fa "const regex_t *restrict preg"
68*6af9a77bSJohn Marino.Fa "char *restrict errbuf"
69*6af9a77bSJohn Marino.Fa "size_t errbuf_size"
70*6af9a77bSJohn Marino.Fc
71*6af9a77bSJohn Marino.Ft int
72*6af9a77bSJohn Marino.Fo regexec
73*6af9a77bSJohn Marino.Fa "const regex_t *restrict preg"
74*6af9a77bSJohn Marino.Fa "const char *restrict string"
75*6af9a77bSJohn Marino.Fa "size_t nmatch"
76*6af9a77bSJohn Marino.Fa "regmatch_t pmatch[restrict]"
77*6af9a77bSJohn Marino.Fa "int eflags"
78*6af9a77bSJohn Marino.Fc
79*6af9a77bSJohn Marino.Ft void
80*6af9a77bSJohn Marino.Fo regfree
81*6af9a77bSJohn Marino.Fa "regex_t *preg"
82*6af9a77bSJohn Marino.Fc
83*6af9a77bSJohn Marino.Pp
84*6af9a77bSJohn Marino.Sy (Non-portable extensions)
85*6af9a77bSJohn Marino.Ft int
86*6af9a77bSJohn Marino.Fo regncomp
87*6af9a77bSJohn Marino.Fa "regex_t *restrict preg"
88*6af9a77bSJohn Marino.Fa "const char *restrict pattern"
89*6af9a77bSJohn Marino.Fa "size_t len"
90*6af9a77bSJohn Marino.Fa "int cflags"
91*6af9a77bSJohn Marino.Fc
92*6af9a77bSJohn Marino.Ft int
93*6af9a77bSJohn Marino.Fo regnexec
94*6af9a77bSJohn Marino.Fa "const regex_t *restrict preg"
95*6af9a77bSJohn Marino.Fa "const char *restrict string"
96*6af9a77bSJohn Marino.Fa "size_t len"
97*6af9a77bSJohn Marino.Fa "size_t nmatch"
98*6af9a77bSJohn Marino.Fa "regmatch_t pmatch[restrict]"
99*6af9a77bSJohn Marino.Fa "int eflags"
100*6af9a77bSJohn Marino.Fc
101*6af9a77bSJohn Marino.Ft int
102*6af9a77bSJohn Marino.Fo regwcomp
103*6af9a77bSJohn Marino.Fa "regex_t *restrict preg"
104*6af9a77bSJohn Marino.Fa "const wchar_t *restrict widepat"
105*6af9a77bSJohn Marino.Fa "int cflags"
106*6af9a77bSJohn Marino.Fc
107*6af9a77bSJohn Marino.Ft int
108*6af9a77bSJohn Marino.Fo regwexec
109*6af9a77bSJohn Marino.Fa "const regex_t *restrict preg"
110*6af9a77bSJohn Marino.Fa "const wchar_t *restrict widestr"
111*6af9a77bSJohn Marino.Fa "size_t nmatch"
112*6af9a77bSJohn Marino.Fa "regmatch_t pmatch[restrict]"
113*6af9a77bSJohn Marino.Fa "int eflags"
114*6af9a77bSJohn Marino.Fc
115*6af9a77bSJohn Marino.Ft int
116*6af9a77bSJohn Marino.Fo regwncomp
117*6af9a77bSJohn Marino.Fa "regex_t *restrict preg"
118*6af9a77bSJohn Marino.Fa "const wchar_t *restrict widepat"
119*6af9a77bSJohn Marino.Fa "size_t len"
120*6af9a77bSJohn Marino.Fa "int cflags"
121*6af9a77bSJohn Marino.Fc
122*6af9a77bSJohn Marino.Ft int
123*6af9a77bSJohn Marino.Fo regwnexec
124*6af9a77bSJohn Marino.Fa "const regex_t *restrict preg"
125*6af9a77bSJohn Marino.Fa "const wchar_t *restrict widestr"
126*6af9a77bSJohn Marino.Fa "size_t len"
127*6af9a77bSJohn Marino.Fa "size_t nmatch"
128*6af9a77bSJohn Marino.Fa "regmatch_t pmatch[restrict]"
129*6af9a77bSJohn Marino.Fa "int eflags"
130*6af9a77bSJohn Marino.Fc
131*6af9a77bSJohn Marino.In regex.h
132*6af9a77bSJohn Marino.In xlocale.h
133*6af9a77bSJohn Marino.Ft int
134*6af9a77bSJohn Marino.Fo regcomp_l
135*6af9a77bSJohn Marino.Fa "regex_t *restrict preg"
136*6af9a77bSJohn Marino.Fa "const char *restrict pattern"
137*6af9a77bSJohn Marino.Fa "int cflags"
138*6af9a77bSJohn Marino.Fa "locale_t restrict"
139*6af9a77bSJohn Marino.Fc
140*6af9a77bSJohn Marino.Ft int
141*6af9a77bSJohn Marino.Fo regncomp_l
142*6af9a77bSJohn Marino.Fa "regex_t *restrict preg"
143*6af9a77bSJohn Marino.Fa "const char *restrict pattern"
144*6af9a77bSJohn Marino.Fa "size_t len"
145*6af9a77bSJohn Marino.Fa "int cflags"
146*6af9a77bSJohn Marino.Fa "locale_t restrict"
147*6af9a77bSJohn Marino.Fc
148*6af9a77bSJohn Marino.Ft int
149*6af9a77bSJohn Marino.Fo regwcomp_l
150*6af9a77bSJohn Marino.Fa "regex_t *restrict preg"
151*6af9a77bSJohn Marino.Fa "const wchar_t *restrict widepat"
152*6af9a77bSJohn Marino.Fa "int cflags"
153*6af9a77bSJohn Marino.Fa "locale_t restrict"
154*6af9a77bSJohn Marino.Fc
155*6af9a77bSJohn Marino.Ft int
156*6af9a77bSJohn Marino.Fo regwncomp_l
157*6af9a77bSJohn Marino.Fa "regex_t *restrict preg"
158*6af9a77bSJohn Marino.Fa "const wchar_t *restrict widepat"
159*6af9a77bSJohn Marino.Fa "size_t len"
160*6af9a77bSJohn Marino.Fa "int cflags"
161*6af9a77bSJohn Marino.Fa "locale_t restrict"
162*6af9a77bSJohn Marino.Fc
163*6af9a77bSJohn Marino.Sh DESCRIPTION
164*6af9a77bSJohn MarinoThese routines implement
165*6af9a77bSJohn Marino.St -p1003.2
166*6af9a77bSJohn Marinoregular expressions
167*6af9a77bSJohn Marino.Pq Do RE Dc Ns s ;
168*6af9a77bSJohn Marinosee
169*6af9a77bSJohn Marino.Xr re_format 7 .
170*6af9a77bSJohn MarinoThe
171*6af9a77bSJohn Marino.Fn regcomp
172*6af9a77bSJohn Marinofunction
173*6af9a77bSJohn Marinocompiles an RE, written as a string, into an internal form.
174*6af9a77bSJohn Marino.Fn regexec
175*6af9a77bSJohn Marinomatches that internal form against a string and reports results.
176*6af9a77bSJohn Marino.Fn regerror
177*6af9a77bSJohn Marinotransforms error codes from either into human-readable messages.
178*6af9a77bSJohn Marino.Fn regfree
179*6af9a77bSJohn Marinofrees any dynamically-allocated storage used by the internal form
180*6af9a77bSJohn Marinoof an RE.
181*6af9a77bSJohn Marino.Pp
182*6af9a77bSJohn MarinoThe header
183*6af9a77bSJohn Marino.In regex.h
184*6af9a77bSJohn Marinodeclares two structure types,
185*6af9a77bSJohn Marino.Ft regex_t
186*6af9a77bSJohn Marinoand
187*6af9a77bSJohn Marino.Ft regmatch_t ,
188*6af9a77bSJohn Marinothe former for compiled internal forms and the latter for match reporting.
189*6af9a77bSJohn MarinoIt also declares the four functions,
190*6af9a77bSJohn Marinoa type
191*6af9a77bSJohn Marino.Ft regoff_t ,
192*6af9a77bSJohn Marinoand a number of constants with names starting with
193*6af9a77bSJohn Marino.Dq Dv REG_ .
194*6af9a77bSJohn Marino.Pp
195*6af9a77bSJohn MarinoThe
196*6af9a77bSJohn Marino.Fn regcomp
197*6af9a77bSJohn Marinofunction
198*6af9a77bSJohn Marinocompiles the regular expression contained in the
199*6af9a77bSJohn Marino.Fa pattern
200*6af9a77bSJohn Marinostring,
201*6af9a77bSJohn Marinosubject to the flags in
202*6af9a77bSJohn Marino.Fa cflags ,
203*6af9a77bSJohn Marinoand places the results in the
204*6af9a77bSJohn Marino.Ft regex_t
205*6af9a77bSJohn Marinostructure pointed to by
206*6af9a77bSJohn Marino.Fa preg .
207*6af9a77bSJohn MarinoThe
208*6af9a77bSJohn Marino.Fa cflags
209*6af9a77bSJohn Marinoargument
210*6af9a77bSJohn Marinois the bitwise OR of zero or more of the following flags:
211*6af9a77bSJohn Marino.Bl -tag -width REG_EXTENDED
212*6af9a77bSJohn Marino.It Dv REG_EXTENDED
213*6af9a77bSJohn MarinoCompile modern
214*6af9a77bSJohn Marino.Pq Dq extended
215*6af9a77bSJohn MarinoREs,
216*6af9a77bSJohn Marinorather than the obsolete
217*6af9a77bSJohn Marino.Pq Dq basic
218*6af9a77bSJohn MarinoREs that
219*6af9a77bSJohn Marinoare the default.
220*6af9a77bSJohn Marino.It Dv REG_BASIC
221*6af9a77bSJohn MarinoThis is a synonym for 0,
222*6af9a77bSJohn Marinoprovided as a counterpart to
223*6af9a77bSJohn Marino.Dv REG_EXTENDED
224*6af9a77bSJohn Marinoto improve readability.
225*6af9a77bSJohn Marino.It Dv REG_NOSPEC
226*6af9a77bSJohn MarinoCompile with recognition of all special characters turned off.
227*6af9a77bSJohn MarinoAll characters are thus considered ordinary,
228*6af9a77bSJohn Marinoso the
229*6af9a77bSJohn Marino.Dq RE
230*6af9a77bSJohn Marinois a literal string.
231*6af9a77bSJohn MarinoThis is an extension,
232*6af9a77bSJohn Marinocompatible with but not specified by
233*6af9a77bSJohn Marino.St -p1003.2 ,
234*6af9a77bSJohn Marinoand should be used with
235*6af9a77bSJohn Marinocaution in software intended to be portable to other systems.
236*6af9a77bSJohn Marino.Dv REG_EXTENDED
237*6af9a77bSJohn Marinoand
238*6af9a77bSJohn Marino.Dv REG_NOSPEC
239*6af9a77bSJohn Marinomay not be used
240*6af9a77bSJohn Marinoin the same call to
241*6af9a77bSJohn Marino.Fn regcomp .
242*6af9a77bSJohn Marino.It Dv REG_LITERAL
243*6af9a77bSJohn MarinoAn alias of
244*6af9a77bSJohn Marino.Dv REG_NOSPEC .
245*6af9a77bSJohn Marino.It Dv REG_ICASE
246*6af9a77bSJohn MarinoCompile for matching that ignores upper/lower case distinctions.
247*6af9a77bSJohn MarinoSee
248*6af9a77bSJohn Marino.Xr re_format 7 .
249*6af9a77bSJohn Marino.It Dv REG_NOSUB
250*6af9a77bSJohn MarinoCompile for matching that need only report success or failure,
251*6af9a77bSJohn Marinonot what was matched.
252*6af9a77bSJohn Marino.It Dv REG_NEWLINE
253*6af9a77bSJohn MarinoCompile for newline-sensitive matching.
254*6af9a77bSJohn MarinoBy default, newline is a completely ordinary character with no special
255*6af9a77bSJohn Marinomeaning in either REs or strings.
256*6af9a77bSJohn MarinoWith this flag,
257*6af9a77bSJohn Marino.Ql [^
258*6af9a77bSJohn Marinobracket expressions and
259*6af9a77bSJohn Marino.Ql .\&
260*6af9a77bSJohn Marinonever match newline,
261*6af9a77bSJohn Marinoa
262*6af9a77bSJohn Marino.Ql ^\&
263*6af9a77bSJohn Marinoanchor matches the null string after any newline in the string
264*6af9a77bSJohn Marinoin addition to its normal function,
265*6af9a77bSJohn Marinoand the
266*6af9a77bSJohn Marino.Ql $\&
267*6af9a77bSJohn Marinoanchor matches the null string before any newline in the
268*6af9a77bSJohn Marinostring in addition to its normal function.
269*6af9a77bSJohn Marino.It Dv REG_PEND
270*6af9a77bSJohn Marino(Note that
271*6af9a77bSJohn Marino.Dv REG_PEND
272*6af9a77bSJohn Marinois not recognized by any of the wide character or
273*6af9a77bSJohn Marino.Dq Nm n
274*6af9a77bSJohn Marinovariants.
275*6af9a77bSJohn MarinoBesides, the
276*6af9a77bSJohn Marino.Dq Nm n
277*6af9a77bSJohn Marinovariants can be used instead of
278*6af9a77bSJohn Marino.Dv REG_PEND ;
279*6af9a77bSJohn Marinosee EXTENDED APIS below.)
280*6af9a77bSJohn MarinoThe regular expression ends,
281*6af9a77bSJohn Marinonot at the first NUL,
282*6af9a77bSJohn Marinobut just before the character pointed to by the
283*6af9a77bSJohn Marino.Va re_endp
284*6af9a77bSJohn Marinomember of the structure pointed to by
285*6af9a77bSJohn Marino.Fa preg .
286*6af9a77bSJohn MarinoThe
287*6af9a77bSJohn Marino.Va re_endp
288*6af9a77bSJohn Marinomember is of type
289*6af9a77bSJohn Marino.Ft "const char *" .
290*6af9a77bSJohn MarinoThis flag permits inclusion of NULs in the RE;
291*6af9a77bSJohn Marinothey are considered ordinary characters.
292*6af9a77bSJohn MarinoThis is an extension,
293*6af9a77bSJohn Marinocompatible with but not specified by
294*6af9a77bSJohn Marino.St -p1003.2 ,
295*6af9a77bSJohn Marinoand should be used with
296*6af9a77bSJohn Marinocaution in software intended to be portable to other systems.
297*6af9a77bSJohn Marino.It Dv REG_ENHANCED
298*6af9a77bSJohn MarinoRecognized enhanced regular expression features; see
299*6af9a77bSJohn Marino.Xr re_format 7
300*6af9a77bSJohn Marinofor details.
301*6af9a77bSJohn MarinoThis is an extension not specified by
302*6af9a77bSJohn Marino.St -p1003.2 ,
303*6af9a77bSJohn Marinoand should be used with
304*6af9a77bSJohn Marinocaution in software intended to be portable to other systems.
305*6af9a77bSJohn Marino.It Dv REG_MINIMAL
306*6af9a77bSJohn MarinoUse minimal (non-greedy) repetitions instead of the normal greedy ones; see
307*6af9a77bSJohn Marino.Xr re_format 7
308*6af9a77bSJohn Marinofor details.
309*6af9a77bSJohn Marino(This only applies when both
310*6af9a77bSJohn Marino.Dv REG_ENHANCED
311*6af9a77bSJohn Marinoand
312*6af9a77bSJohn Marino.Dv REG_EXTENDED
313*6af9a77bSJohn Marinoare also set.)
314*6af9a77bSJohn MarinoThis is an extension not specified by
315*6af9a77bSJohn Marino.St -p1003.2 ,
316*6af9a77bSJohn Marinoand should be used with
317*6af9a77bSJohn Marinocaution in software intended to be portable to other systems.
318*6af9a77bSJohn Marino.It Dv REG_UNGREEDY
319*6af9a77bSJohn MarinoAlias of
320*6af9a77bSJohn Marino.Dv REG_MINIMAL .
321*6af9a77bSJohn Marino.El
322*6af9a77bSJohn Marino.Pp
323*6af9a77bSJohn MarinoWhen successful,
324*6af9a77bSJohn Marino.Fn regcomp
325*6af9a77bSJohn Marinoreturns 0 and fills in the structure pointed to by
326*6af9a77bSJohn Marino.Fa preg .
327*6af9a77bSJohn MarinoOne member of that structure
328*6af9a77bSJohn Marino(other than
329*6af9a77bSJohn Marino.Va re_endp )
330*6af9a77bSJohn Marinois publicized:
331*6af9a77bSJohn Marino.Va re_nsub ,
332*6af9a77bSJohn Marinoof type
333*6af9a77bSJohn Marino.Ft size_t ,
334*6af9a77bSJohn Marinocontains the number of parenthesized subexpressions within the RE
335*6af9a77bSJohn Marino(except that the value of this member is undefined if the
336*6af9a77bSJohn Marino.Dv REG_NOSUB
337*6af9a77bSJohn Marinoflag was used).
338*6af9a77bSJohn MarinoIf
339*6af9a77bSJohn Marino.Fn regcomp
340*6af9a77bSJohn Marinofails, it returns a non-zero error code;
341*6af9a77bSJohn Marinosee
342*6af9a77bSJohn Marino.Sx DIAGNOSTICS .
343*6af9a77bSJohn Marino.Pp
344*6af9a77bSJohn MarinoThe
345*6af9a77bSJohn Marino.Fn regexec
346*6af9a77bSJohn Marinofunction
347*6af9a77bSJohn Marinomatches the compiled RE pointed to by
348*6af9a77bSJohn Marino.Fa preg
349*6af9a77bSJohn Marinoagainst the
350*6af9a77bSJohn Marino.Fa string ,
351*6af9a77bSJohn Marinosubject to the flags in
352*6af9a77bSJohn Marino.Fa eflags ,
353*6af9a77bSJohn Marinoand reports results using
354*6af9a77bSJohn Marino.Fa nmatch ,
355*6af9a77bSJohn Marino.Fa pmatch ,
356*6af9a77bSJohn Marinoand the returned value.
357*6af9a77bSJohn MarinoThe RE must have been compiled by a previous invocation of
358*6af9a77bSJohn Marino.Fn regcomp .
359*6af9a77bSJohn MarinoThe compiled form is not altered during execution of
360*6af9a77bSJohn Marino.Fn regexec ,
361*6af9a77bSJohn Marinoso a single compiled RE can be used simultaneously by multiple threads.
362*6af9a77bSJohn Marino.Pp
363*6af9a77bSJohn MarinoBy default,
364*6af9a77bSJohn Marinothe NUL-terminated string pointed to by
365*6af9a77bSJohn Marino.Fa string
366*6af9a77bSJohn Marinois considered to be the text of an entire line, minus any terminating
367*6af9a77bSJohn Marinonewline.
368*6af9a77bSJohn MarinoThe
369*6af9a77bSJohn Marino.Fa eflags
370*6af9a77bSJohn Marinoargument is the bitwise OR of zero or more of the following flags:
371*6af9a77bSJohn Marino.Bl -tag -width REG_STARTEND
372*6af9a77bSJohn Marino.It Dv REG_NOTBOL
373*6af9a77bSJohn MarinoThe first character of
374*6af9a77bSJohn Marinothe string
375*6af9a77bSJohn Marinois not the beginning of a line, so the
376*6af9a77bSJohn Marino.Ql ^\&
377*6af9a77bSJohn Marinoanchor should not match before it.
378*6af9a77bSJohn MarinoThis does not affect the behavior of newlines under
379*6af9a77bSJohn Marino.Dv REG_NEWLINE .
380*6af9a77bSJohn Marino.It Dv REG_NOTEOL
381*6af9a77bSJohn MarinoThe NUL terminating
382*6af9a77bSJohn Marinothe string
383*6af9a77bSJohn Marinodoes not end a line, so the
384*6af9a77bSJohn Marino.Ql $\&
385*6af9a77bSJohn Marinoanchor should not match before it.
386*6af9a77bSJohn MarinoThis does not affect the behavior of newlines under
387*6af9a77bSJohn Marino.Dv REG_NEWLINE .
388*6af9a77bSJohn Marino.It Dv REG_STARTEND
389*6af9a77bSJohn MarinoThe string is considered to start at
390*6af9a77bSJohn Marino.Fa string
391*6af9a77bSJohn Marino+
392*6af9a77bSJohn Marino.Fa pmatch Ns [0]. Ns Va rm_so
393*6af9a77bSJohn Marinoand to have a terminating NUL located at
394*6af9a77bSJohn Marino.Fa string
395*6af9a77bSJohn Marino+
396*6af9a77bSJohn Marino.Fa pmatch Ns [0]. Ns Va rm_eo
397*6af9a77bSJohn Marino(there need not actually be a NUL at that location),
398*6af9a77bSJohn Marinoregardless of the value of
399*6af9a77bSJohn Marino.Fa nmatch .
400*6af9a77bSJohn MarinoSee below for the definition of
401*6af9a77bSJohn Marino.Fa pmatch
402*6af9a77bSJohn Marinoand
403*6af9a77bSJohn Marino.Fa nmatch .
404*6af9a77bSJohn MarinoThis is an extension,
405*6af9a77bSJohn Marinocompatible with but not specified by
406*6af9a77bSJohn Marino.St -p1003.2 ,
407*6af9a77bSJohn Marinoand should be used with
408*6af9a77bSJohn Marinocaution in software intended to be portable to other systems.
409*6af9a77bSJohn MarinoNote that a non-zero
410*6af9a77bSJohn Marino.Va rm_so
411*6af9a77bSJohn Marinodoes not imply
412*6af9a77bSJohn Marino.Dv REG_NOTBOL ;
413*6af9a77bSJohn Marino.Dv REG_STARTEND
414*6af9a77bSJohn Marinoaffects only the location of the string,
415*6af9a77bSJohn Marinonot how it is matched.
416*6af9a77bSJohn Marino.El
417*6af9a77bSJohn Marino.Pp
418*6af9a77bSJohn MarinoSee
419*6af9a77bSJohn Marino.Xr re_format 7
420*6af9a77bSJohn Marinofor a discussion of what is matched in situations where an RE or a
421*6af9a77bSJohn Marinoportion thereof could match any of several substrings of
422*6af9a77bSJohn Marino.Fa string .
423*6af9a77bSJohn Marino.Pp
424*6af9a77bSJohn MarinoNormally,
425*6af9a77bSJohn Marino.Fn regexec
426*6af9a77bSJohn Marinoreturns 0 for success and the non-zero code
427*6af9a77bSJohn Marino.Dv REG_NOMATCH
428*6af9a77bSJohn Marinofor failure.
429*6af9a77bSJohn MarinoOther non-zero error codes may be returned in exceptional situations;
430*6af9a77bSJohn Marinosee
431*6af9a77bSJohn Marino.Sx DIAGNOSTICS .
432*6af9a77bSJohn Marino.Pp
433*6af9a77bSJohn MarinoIf
434*6af9a77bSJohn Marino.Dv REG_NOSUB
435*6af9a77bSJohn Marinowas specified in the compilation of the RE,
436*6af9a77bSJohn Marinoor if
437*6af9a77bSJohn Marino.Fa nmatch
438*6af9a77bSJohn Marinois 0,
439*6af9a77bSJohn Marino.Fn regexec
440*6af9a77bSJohn Marinoignores the
441*6af9a77bSJohn Marino.Fa pmatch
442*6af9a77bSJohn Marinoargument (but see below for the case where
443*6af9a77bSJohn Marino.Dv REG_STARTEND
444*6af9a77bSJohn Marinois specified).
445*6af9a77bSJohn MarinoOtherwise,
446*6af9a77bSJohn Marino.Fa pmatch
447*6af9a77bSJohn Marinopoints to an array of
448*6af9a77bSJohn Marino.Fa nmatch
449*6af9a77bSJohn Marinostructures of type
450*6af9a77bSJohn Marino.Ft regmatch_t .
451*6af9a77bSJohn MarinoSuch a structure has at least the members
452*6af9a77bSJohn Marino.Va rm_so
453*6af9a77bSJohn Marinoand
454*6af9a77bSJohn Marino.Va rm_eo ,
455*6af9a77bSJohn Marinoboth of type
456*6af9a77bSJohn Marino.Ft regoff_t
457*6af9a77bSJohn Marino(a signed arithmetic type at least as large as an
458*6af9a77bSJohn Marino.Ft off_t
459*6af9a77bSJohn Marinoand a
460*6af9a77bSJohn Marino.Ft ssize_t ) ,
461*6af9a77bSJohn Marinocontaining respectively the offset of the first character of a substring
462*6af9a77bSJohn Marinoand the offset of the first character after the end of the substring.
463*6af9a77bSJohn MarinoOffsets are measured from the beginning of the
464*6af9a77bSJohn Marino.Fa string
465*6af9a77bSJohn Marinoargument given to
466*6af9a77bSJohn Marino.Fn regexec .
467*6af9a77bSJohn MarinoAn empty substring is denoted by equal offsets,
468*6af9a77bSJohn Marinoboth indicating the character following the empty substring.
469*6af9a77bSJohn Marino.Pp
470*6af9a77bSJohn MarinoThe 0th member of the
471*6af9a77bSJohn Marino.Fa pmatch
472*6af9a77bSJohn Marinoarray is filled in to indicate what substring of
473*6af9a77bSJohn Marino.Fa string
474*6af9a77bSJohn Marinowas matched by the entire RE.
475*6af9a77bSJohn MarinoRemaining members report what substring was matched by parenthesized
476*6af9a77bSJohn Marinosubexpressions within the RE;
477*6af9a77bSJohn Marinomember
478*6af9a77bSJohn Marino.Va i
479*6af9a77bSJohn Marinoreports subexpression
480*6af9a77bSJohn Marino.Va i ,
481*6af9a77bSJohn Marinowith subexpressions counted (starting at 1) by the order of their opening
482*6af9a77bSJohn Marinoparentheses in the RE, left to right.
483*6af9a77bSJohn MarinoUnused entries in the array (corresponding either to subexpressions that
484*6af9a77bSJohn Marinodid not participate in the match at all, or to subexpressions that do not
485*6af9a77bSJohn Marinoexist in the RE (that is,
486*6af9a77bSJohn Marino.Va i
487*6af9a77bSJohn Marino>
488*6af9a77bSJohn Marino.Fa preg Ns -> Ns Va re_nsub ) )
489*6af9a77bSJohn Marinohave both
490*6af9a77bSJohn Marino.Va rm_so
491*6af9a77bSJohn Marinoand
492*6af9a77bSJohn Marino.Va rm_eo
493*6af9a77bSJohn Marinoset to -1.
494*6af9a77bSJohn MarinoIf a subexpression participated in the match several times,
495*6af9a77bSJohn Marinothe reported substring is the last one it matched.
496*6af9a77bSJohn Marino(Note, as an example in particular, that when the RE
497*6af9a77bSJohn Marino.Ql "(b*)+"
498*6af9a77bSJohn Marinomatches
499*6af9a77bSJohn Marino.Ql bbb ,
500*6af9a77bSJohn Marinothe parenthesized subexpression matches each of the three
501*6af9a77bSJohn Marino.So Li b Sc Ns s
502*6af9a77bSJohn Marinoand then
503*6af9a77bSJohn Marinoan infinite number of empty strings following the last
504*6af9a77bSJohn Marino.Ql b ,
505*6af9a77bSJohn Marinoso the reported substring is one of the empties.)
506*6af9a77bSJohn Marino.Pp
507*6af9a77bSJohn MarinoIf
508*6af9a77bSJohn Marino.Dv REG_STARTEND
509*6af9a77bSJohn Marinois specified,
510*6af9a77bSJohn Marino.Fa pmatch
511*6af9a77bSJohn Marinomust point to at least one
512*6af9a77bSJohn Marino.Ft regmatch_t
513*6af9a77bSJohn Marino(even if
514*6af9a77bSJohn Marino.Fa nmatch
515*6af9a77bSJohn Marinois 0 or
516*6af9a77bSJohn Marino.Dv REG_NOSUB
517*6af9a77bSJohn Marinowas specified),
518*6af9a77bSJohn Marinoto hold the input offsets for
519*6af9a77bSJohn Marino.Dv REG_STARTEND .
520*6af9a77bSJohn MarinoUse for output is still entirely controlled by
521*6af9a77bSJohn Marino.Fa nmatch ;
522*6af9a77bSJohn Marinoif
523*6af9a77bSJohn Marino.Fa nmatch
524*6af9a77bSJohn Marinois 0 or
525*6af9a77bSJohn Marino.Dv REG_NOSUB
526*6af9a77bSJohn Marinowas specified,
527*6af9a77bSJohn Marinothe value of
528*6af9a77bSJohn Marino.Fa pmatch Ns [0]
529*6af9a77bSJohn Marinowill not be changed by a successful
530*6af9a77bSJohn Marino.Fn regexec .
531*6af9a77bSJohn Marino.Pp
532*6af9a77bSJohn MarinoThe
533*6af9a77bSJohn Marino.Fn regerror
534*6af9a77bSJohn Marinofunction
535*6af9a77bSJohn Marinomaps a non-zero
536*6af9a77bSJohn Marino.Fa errcode
537*6af9a77bSJohn Marinofrom either
538*6af9a77bSJohn Marino.Fn regcomp
539*6af9a77bSJohn Marinoor
540*6af9a77bSJohn Marino.Fn regexec
541*6af9a77bSJohn Marinoto a human-readable, printable message.
542*6af9a77bSJohn MarinoIf
543*6af9a77bSJohn Marino.Fa preg
544*6af9a77bSJohn Marinois
545*6af9a77bSJohn Marino.No non\- Ns Dv NULL ,
546*6af9a77bSJohn Marinothe error code should have arisen from use of
547*6af9a77bSJohn Marinothe
548*6af9a77bSJohn Marino.Ft regex_t
549*6af9a77bSJohn Marinopointed to by
550*6af9a77bSJohn Marino.Fa preg ,
551*6af9a77bSJohn Marinoand if the error code came from
552*6af9a77bSJohn Marino.Fn regcomp ,
553*6af9a77bSJohn Marinoit should have been the result from the most recent
554*6af9a77bSJohn Marino.Fn regcomp
555*6af9a77bSJohn Marinousing that
556*6af9a77bSJohn Marino.Ft regex_t .
557*6af9a77bSJohn MarinoThe
558*6af9a77bSJohn Marino.Fn ( regerror
559*6af9a77bSJohn Marinomay be able to supply a more detailed message using information
560*6af9a77bSJohn Marinofrom the
561*6af9a77bSJohn Marino.Ft regex_t . )
562*6af9a77bSJohn MarinoThe
563*6af9a77bSJohn Marino.Fn regerror
564*6af9a77bSJohn Marinofunction
565*6af9a77bSJohn Marinoplaces the NUL-terminated message into the buffer pointed to by
566*6af9a77bSJohn Marino.Fa errbuf ,
567*6af9a77bSJohn Marinolimiting the length (including the NUL) to at most
568*6af9a77bSJohn Marino.Fa errbuf_size
569*6af9a77bSJohn Marinobytes.
570*6af9a77bSJohn MarinoIf the whole message will not fit,
571*6af9a77bSJohn Marinoas much of it as will fit before the terminating NUL is supplied.
572*6af9a77bSJohn MarinoIn any case,
573*6af9a77bSJohn Marinothe returned value is the size of buffer needed to hold the whole
574*6af9a77bSJohn Marinomessage (including terminating NUL).
575*6af9a77bSJohn MarinoIf
576*6af9a77bSJohn Marino.Fa errbuf_size
577*6af9a77bSJohn Marinois 0,
578*6af9a77bSJohn Marino.Fa errbuf
579*6af9a77bSJohn Marinois ignored but the return value is still correct.
580*6af9a77bSJohn Marino.Pp
581*6af9a77bSJohn MarinoIf the
582*6af9a77bSJohn Marino.Fa errcode
583*6af9a77bSJohn Marinogiven to
584*6af9a77bSJohn Marino.Fn regerror
585*6af9a77bSJohn Marinois first ORed with
586*6af9a77bSJohn Marino.Dv REG_ITOA ,
587*6af9a77bSJohn Marinothe
588*6af9a77bSJohn Marino.Dq message
589*6af9a77bSJohn Marinothat results is the printable name of the error code,
590*6af9a77bSJohn Marinoe.g.\&
591*6af9a77bSJohn Marino.Dq Dv REG_NOMATCH ,
592*6af9a77bSJohn Marinorather than an explanation thereof.
593*6af9a77bSJohn MarinoIf
594*6af9a77bSJohn Marino.Fa errcode
595*6af9a77bSJohn Marinois
596*6af9a77bSJohn Marino.Dv REG_ATOI ,
597*6af9a77bSJohn Marinothen
598*6af9a77bSJohn Marino.Fa preg
599*6af9a77bSJohn Marinoshall be
600*6af9a77bSJohn Marino.No non\- Ns Dv NULL
601*6af9a77bSJohn Marinoand the
602*6af9a77bSJohn Marino.Va re_endp
603*6af9a77bSJohn Marinomember of the structure it points to
604*6af9a77bSJohn Marinomust point to the printable name of an error code;
605*6af9a77bSJohn Marinoin this case, the result in
606*6af9a77bSJohn Marino.Fa errbuf
607*6af9a77bSJohn Marinois the decimal digits of
608*6af9a77bSJohn Marinothe numeric value of the error code
609*6af9a77bSJohn Marino(0 if the name is not recognized).
610*6af9a77bSJohn Marino.Dv REG_ITOA
611*6af9a77bSJohn Marinoand
612*6af9a77bSJohn Marino.Dv REG_ATOI
613*6af9a77bSJohn Marinoare intended primarily as debugging facilities;
614*6af9a77bSJohn Marinothey are extensions,
615*6af9a77bSJohn Marinocompatible with but not specified by
616*6af9a77bSJohn Marino.St -p1003.2 ,
617*6af9a77bSJohn Marinoand should be used with
618*6af9a77bSJohn Marinocaution in software intended to be portable to other systems.
619*6af9a77bSJohn MarinoBe warned also that they are considered experimental and changes are possible.
620*6af9a77bSJohn Marino.Pp
621*6af9a77bSJohn MarinoThe
622*6af9a77bSJohn Marino.Fn regfree
623*6af9a77bSJohn Marinofunction
624*6af9a77bSJohn Marinofrees any dynamically-allocated storage associated with the compiled RE
625*6af9a77bSJohn Marinopointed to by
626*6af9a77bSJohn Marino.Fa preg .
627*6af9a77bSJohn MarinoThe remaining
628*6af9a77bSJohn Marino.Ft regex_t
629*6af9a77bSJohn Marinois no longer a valid compiled RE
630*6af9a77bSJohn Marinoand the effect of supplying it to
631*6af9a77bSJohn Marino.Fn regexec
632*6af9a77bSJohn Marinoor
633*6af9a77bSJohn Marino.Fn regerror
634*6af9a77bSJohn Marinois undefined.
635*6af9a77bSJohn Marino.Pp
636*6af9a77bSJohn MarinoNone of these functions references global variables except for tables
637*6af9a77bSJohn Marinoof constants;
638*6af9a77bSJohn Marinoall are safe for use from multiple threads if the arguments are safe.
639*6af9a77bSJohn Marino.Sh EXTENDED APIS
640*6af9a77bSJohn MarinoThese extended APIs are available in Mac OS X 10.8 and beyond, when the
641*6af9a77bSJohn Marinodeployment target is 10.8 or later.
642*6af9a77bSJohn MarinoIt should also be noted that any of the
643*6af9a77bSJohn Marino.Fn regcomp
644*6af9a77bSJohn Marinovariants may be used to initialize a
645*6af9a77bSJohn Marino.Ft regex_t
646*6af9a77bSJohn Marinostructure, that can then be passed to any of the
647*6af9a77bSJohn Marino.Fn regexec
648*6af9a77bSJohn Marinovariants.
649*6af9a77bSJohn MarinoSo it is quite legal to compile a wide character RE and use it to match a
650*6af9a77bSJohn Marinomultibyte character string, or vice versa.
651*6af9a77bSJohn Marino.Pp
652*6af9a77bSJohn MarinoThe
653*6af9a77bSJohn Marino.Fn regncomp
654*6af9a77bSJohn Marinoroutine compiles regular expressions like
655*6af9a77bSJohn Marino.Fn regcomp ,
656*6af9a77bSJohn Marinobut the length of the regular expression string is specified, allowing a string
657*6af9a77bSJohn Marinothat is not NUL terminated and/or contains NUL characters.
658*6af9a77bSJohn MarinoThis is a modern replacement for using
659*6af9a77bSJohn Marino.Fn regcomp
660*6af9a77bSJohn Marinowith the
661*6af9a77bSJohn Marino.Dv REG_PEND
662*6af9a77bSJohn Marinooption.
663*6af9a77bSJohn Marino.Pp
664*6af9a77bSJohn MarinoSimilarly, the
665*6af9a77bSJohn Marino.Fn regnexec
666*6af9a77bSJohn Marinoroutine is like
667*6af9a77bSJohn Marino.Fn regexec ,
668*6af9a77bSJohn Marinobut the length of the string to match is specified, allowing a string
669*6af9a77bSJohn Marinothat is not NUL terminated and/or contains NUL characters.
670*6af9a77bSJohn Marino.Pp
671*6af9a77bSJohn MarinoThe
672*6af9a77bSJohn Marino.Fn regwcomp
673*6af9a77bSJohn Marinoand
674*6af9a77bSJohn Marino.Fn regwexec
675*6af9a77bSJohn Marinovariants take a wide-character
676*6af9a77bSJohn Marino.Vt ( wchar_t )
677*6af9a77bSJohn Marinostring for the regular expression and string to match.
678*6af9a77bSJohn MarinoAnd
679*6af9a77bSJohn Marino.Fn regwncomp
680*6af9a77bSJohn Marinoand
681*6af9a77bSJohn Marino.Fn regwnexec
682*6af9a77bSJohn Marinoare variants that allow specifying the wide character string length, and
683*6af9a77bSJohn Marinoso allows wide character strings that are not NUL terminated and/or
684*6af9a77bSJohn Marinocontains NUL characters.
685*6af9a77bSJohn Marino.Sh INTERACTION WITH THE LOCALE
686*6af9a77bSJohn MarinoWhen
687*6af9a77bSJohn Marino.Fn regcomp
688*6af9a77bSJohn Marinoor one of its variants is run, the regular expression is compiled into an
689*6af9a77bSJohn Marinointernal form, which may include specific information about the locale currently
690*6af9a77bSJohn Marinoin effect, such as equivalence classes or multi-character collation symbols.
691*6af9a77bSJohn MarinoSo a reference to the current locale is also stored with the internal form,
692*6af9a77bSJohn Marinoso that when
693*6af9a77bSJohn Marino.Fn regexec
694*6af9a77bSJohn Marinois run, it can use the same locale (even if the locale is changed in-between
695*6af9a77bSJohn Marinothe calls to
696*6af9a77bSJohn Marino.Fn regcomp
697*6af9a77bSJohn Marinoand
698*6af9a77bSJohn Marino.Fn regexec ) .
699*6af9a77bSJohn Marino.Pp
700*6af9a77bSJohn MarinoTo provide more direct control over which locale is used,
701*6af9a77bSJohn Marinoroutines with
702*6af9a77bSJohn Marino.Dq Nm _l
703*6af9a77bSJohn Marinoappended to their names are provided that work just like the variants
704*6af9a77bSJohn Marinowithout the
705*6af9a77bSJohn Marino.Dq Nm _l ,
706*6af9a77bSJohn Marinoexcept that a locale (via a
707*6af9a77bSJohn Marino.Vt locale_t
708*6af9a77bSJohn Marinovariable type) is specified directly.
709*6af9a77bSJohn MarinoNote that only variants of
710*6af9a77bSJohn Marino.Fn regcomp
711*6af9a77bSJohn Marinohave
712*6af9a77bSJohn Marino.Dq Nm _l
713*6af9a77bSJohn Marinovariants, since the
714*6af9a77bSJohn Marino.Fn regexec
715*6af9a77bSJohn Marinovariants just use the reference to the locale stored in the internal form.
716*6af9a77bSJohn Marino.Sh IMPLEMENTATION CHOICES
717*6af9a77bSJohn MarinoThe
718*6af9a77bSJohn Marino.Nm regex
719*6af9a77bSJohn Marinoimplementation in Mac OS X 10.8 and later is based on a heavily modified subset
720*6af9a77bSJohn Marinoof TRE (http://laurikari.net/tre/).
721*6af9a77bSJohn MarinoThis provides improved performance, better conformance and additional features.
722*6af9a77bSJohn MarinoHowever, both API and binary compatibility have been maintained with previous
723*6af9a77bSJohn Marinoreleases, so binaries
724*6af9a77bSJohn Marinobuilt on previous releases should work on 10.8 and later, and binaries built on
725*6af9a77bSJohn Marino10.8 and later should be able to run on previous releases (as long as none of
726*6af9a77bSJohn Marinothe new variants or new features are used.
727*6af9a77bSJohn Marino.Pp
728*6af9a77bSJohn MarinoThere are a number of decisions that
729*6af9a77bSJohn Marino.St -p1003.2
730*6af9a77bSJohn Marinoleaves up to the implementor,
731*6af9a77bSJohn Marinoeither by explicitly saying
732*6af9a77bSJohn Marino.Dq undefined
733*6af9a77bSJohn Marinoor by virtue of them being
734*6af9a77bSJohn Marinoforbidden by the RE grammar.
735*6af9a77bSJohn MarinoThis implementation treats them as follows.
736*6af9a77bSJohn Marino.Pp
737*6af9a77bSJohn MarinoSee
738*6af9a77bSJohn Marino.Xr re_format 7
739*6af9a77bSJohn Marinofor a discussion of the definition of case-independent matching.
740*6af9a77bSJohn Marino.Pp
741*6af9a77bSJohn MarinoThere is no particular limit on the length of REs,
742*6af9a77bSJohn Marinoexcept insofar as memory is limited.
743*6af9a77bSJohn MarinoMemory usage is approximately linear in RE size, and largely insensitive
744*6af9a77bSJohn Marinoto RE complexity, except for bounded repetitions.
745*6af9a77bSJohn MarinoSee
746*6af9a77bSJohn Marino.Sx BUGS
747*6af9a77bSJohn Marinofor one short RE using them
748*6af9a77bSJohn Marinothat will run almost any system out of memory.
749*6af9a77bSJohn Marino.Pp
750*6af9a77bSJohn MarinoA backslashed character other than one specifically given a magic meaning
751*6af9a77bSJohn Marinoby
752*6af9a77bSJohn Marino.St -p1003.2
753*6af9a77bSJohn Marino(such magic meanings occur only in obsolete
754*6af9a77bSJohn Marino.Bq Dq basic
755*6af9a77bSJohn MarinoREs)
756*6af9a77bSJohn Marinois taken as an ordinary character.
757*6af9a77bSJohn Marino.Pp
758*6af9a77bSJohn MarinoAny unmatched
759*6af9a77bSJohn Marino.Ql [\&
760*6af9a77bSJohn Marinois a
761*6af9a77bSJohn Marino.Dv REG_EBRACK
762*6af9a77bSJohn Marinoerror.
763*6af9a77bSJohn Marino.Pp
764*6af9a77bSJohn MarinoEquivalence classes cannot begin or end bracket-expression ranges.
765*6af9a77bSJohn MarinoThe endpoint of one range cannot begin another.
766*6af9a77bSJohn Marino.Pp
767*6af9a77bSJohn Marino.Dv RE_DUP_MAX ,
768*6af9a77bSJohn Marinothe limit on repetition counts in bounded repetitions, is 255.
769*6af9a77bSJohn Marino.Pp
770*6af9a77bSJohn MarinoA repetition operator
771*6af9a77bSJohn Marino.Ql ( ?\& ,
772*6af9a77bSJohn Marino.Ql *\& ,
773*6af9a77bSJohn Marino.Ql +\& ,
774*6af9a77bSJohn Marinoor bounds)
775*6af9a77bSJohn Marinocannot follow another
776*6af9a77bSJohn Marinorepetition operator, except for the use of
777*6af9a77bSJohn Marino.Ql ?\&
778*6af9a77bSJohn Marinofor minimal repetition (for enhanced extended REs; see
779*6af9a77bSJohn Marino.Xr re_format 7
780*6af9a77bSJohn Marinofor details).
781*6af9a77bSJohn MarinoA repetition operator cannot begin an expression or subexpression
782*6af9a77bSJohn Marinoor follow
783*6af9a77bSJohn Marino.Ql ^\&
784*6af9a77bSJohn Marinoor
785*6af9a77bSJohn Marino.Ql |\& .
786*6af9a77bSJohn Marino.Pp
787*6af9a77bSJohn Marino.Ql |\&
788*6af9a77bSJohn Marinocannot appear first or last in a (sub)expression or after another
789*6af9a77bSJohn Marino.Ql |\& ,
790*6af9a77bSJohn Marinoi.e., an operand of
791*6af9a77bSJohn Marino.Ql |\&
792*6af9a77bSJohn Marinocannot be an empty subexpression.
793*6af9a77bSJohn MarinoAn empty parenthesized subexpression,
794*6af9a77bSJohn Marino.Ql "()" ,
795*6af9a77bSJohn Marinois legal and matches an
796*6af9a77bSJohn Marinoempty (sub)string.
797*6af9a77bSJohn MarinoAn empty string is not a legal RE.
798*6af9a77bSJohn Marino.Pp
799*6af9a77bSJohn MarinoA
800*6af9a77bSJohn Marino.Ql {\&
801*6af9a77bSJohn Marinofollowed by a digit is considered the beginning of bounds for a
802*6af9a77bSJohn Marinobounded repetition, which must then follow the syntax for bounds.
803*6af9a77bSJohn MarinoA
804*6af9a77bSJohn Marino.Ql {\&
805*6af9a77bSJohn Marino.Em not
806*6af9a77bSJohn Marinofollowed by a digit is considered an ordinary character.
807*6af9a77bSJohn Marino.Pp
808*6af9a77bSJohn Marino.Ql ^\&
809*6af9a77bSJohn Marinoand
810*6af9a77bSJohn Marino.Ql $\&
811*6af9a77bSJohn Marinobeginning and ending subexpressions in obsolete
812*6af9a77bSJohn Marino.Pq Dq basic
813*6af9a77bSJohn MarinoREs are anchors, not ordinary characters.
814*6af9a77bSJohn Marino.Sh DIAGNOSTICS
815*6af9a77bSJohn MarinoNon-zero error codes from
816*6af9a77bSJohn Marino.Fn regcomp
817*6af9a77bSJohn Marinoand
818*6af9a77bSJohn Marino.Fn regexec
819*6af9a77bSJohn Marinoinclude the following:
820*6af9a77bSJohn Marino.Pp
821*6af9a77bSJohn Marino.Bl -tag -width REG_ECOLLATE -compact
822*6af9a77bSJohn Marino.It Dv REG_NOMATCH
823*6af9a77bSJohn MarinoThe
824*6af9a77bSJohn Marino.Fn regexec
825*6af9a77bSJohn Marinofunction
826*6af9a77bSJohn Marinofailed to match
827*6af9a77bSJohn Marino.It Dv REG_BADPAT
828*6af9a77bSJohn Marinoinvalid regular expression
829*6af9a77bSJohn Marino.It Dv REG_ECOLLATE
830*6af9a77bSJohn Marinoinvalid collating element
831*6af9a77bSJohn Marino.It Dv REG_ECTYPE
832*6af9a77bSJohn Marinoinvalid character class
833*6af9a77bSJohn Marino.It Dv REG_EESCAPE
834*6af9a77bSJohn Marino.Ql \e
835*6af9a77bSJohn Marinoapplied to unescapable character
836*6af9a77bSJohn Marino.It Dv REG_ESUBREG
837*6af9a77bSJohn Marinoinvalid backreference number
838*6af9a77bSJohn Marino.It Dv REG_EBRACK
839*6af9a77bSJohn Marinobrackets
840*6af9a77bSJohn Marino.Ql "[ ]"
841*6af9a77bSJohn Marinonot balanced
842*6af9a77bSJohn Marino.It Dv REG_EPAREN
843*6af9a77bSJohn Marinoparentheses
844*6af9a77bSJohn Marino.Ql "( )"
845*6af9a77bSJohn Marinonot balanced
846*6af9a77bSJohn Marino.It Dv REG_EBRACE
847*6af9a77bSJohn Marinobraces
848*6af9a77bSJohn Marino.Ql "{ }"
849*6af9a77bSJohn Marinonot balanced
850*6af9a77bSJohn Marino.It Dv REG_BADBR
851*6af9a77bSJohn Marinoinvalid repetition count(s) in
852*6af9a77bSJohn Marino.Ql "{ }"
853*6af9a77bSJohn Marino.It Dv REG_ERANGE
854*6af9a77bSJohn Marinoinvalid character range in
855*6af9a77bSJohn Marino.Ql "[ ]"
856*6af9a77bSJohn Marino.It Dv REG_ESPACE
857*6af9a77bSJohn Marinoran out of memory
858*6af9a77bSJohn Marino.It Dv REG_BADRPT
859*6af9a77bSJohn Marino.Ql ?\& ,
860*6af9a77bSJohn Marino.Ql *\& ,
861*6af9a77bSJohn Marinoor
862*6af9a77bSJohn Marino.Ql +\&
863*6af9a77bSJohn Marinooperand invalid
864*6af9a77bSJohn Marino.It Dv REG_EMPTY
865*6af9a77bSJohn Marinoempty (sub)expression
866*6af9a77bSJohn Marino.It Dv REG_ASSERT
867*6af9a77bSJohn Marinocannot happen - you found a bug
868*6af9a77bSJohn Marino.It Dv REG_INVARG
869*6af9a77bSJohn Marinoinvalid argument, e.g.\& negative-length string
870*6af9a77bSJohn Marino.It Dv REG_ILLSEQ
871*6af9a77bSJohn Marinoillegal byte sequence (bad multibyte character)
872*6af9a77bSJohn Marino.El
873*6af9a77bSJohn Marino.Sh SEE ALSO
874*6af9a77bSJohn Marino.Xr grep 1 ,
875*6af9a77bSJohn Marino.Xr re_format 7
876*6af9a77bSJohn Marino.Pp
877*6af9a77bSJohn Marino.St -p1003.2 ,
878*6af9a77bSJohn Marinosections 2.8 (Regular Expression Notation)
879*6af9a77bSJohn Marinoand
880*6af9a77bSJohn MarinoB.5 (C Binding for Regular Expression Matching).
881*6af9a77bSJohn Marino.Sh HISTORY
882*6af9a77bSJohn MarinoThe
883*6af9a77bSJohn Marino.Nm regex
884*6af9a77bSJohn Marinoimplementation is based on a heavily modified subset of TRE
885*6af9a77bSJohn Marino(http://laurikari.net/tre/), originally written by Ville Laurikari.
886*6af9a77bSJohn MarinoPrevious releases used an implementation originally written by
887*6af9a77bSJohn Marino.An Henry Spencer ,
888*6af9a77bSJohn Marinoand altered for inclusion in the
889*6af9a77bSJohn Marino.Bx 4.4
890*6af9a77bSJohn Marinodistribution.
891*6af9a77bSJohn Marino.Sh BUGS
892*6af9a77bSJohn MarinoThe beginning-of-line and end-of-line anchors (
893*6af9a77bSJohn Marino.Dq ^\&
894*6af9a77bSJohn Marinoand
895*6af9a77bSJohn Marino.Dq $\& )
896*6af9a77bSJohn Marinoare currently implemented so that repetitions can not be applied to them.
897*6af9a77bSJohn MarinoThe standards are unclear about whether this is legal, but other
898*6af9a77bSJohn Marino.Nm regex
899*6af9a77bSJohn Marinopackages do support this case.
900*6af9a77bSJohn MarinoIt is best to avoid this non-portable (and not really very useful) case.
901*6af9a77bSJohn Marino.Pp
902*6af9a77bSJohn MarinoThe back-reference code is subtle and doubts linger about its correctness
903*6af9a77bSJohn Marinoin complex cases.
904*6af9a77bSJohn Marino.Pp
905*6af9a77bSJohn MarinoThe
906*6af9a77bSJohn Marino.Fn regexec
907*6af9a77bSJohn Marinovariants use one of two internal matching engines.
908*6af9a77bSJohn MarinoThe normal one is linear worst-case time in the length of the text being
909*6af9a77bSJohn Marinosearched, and quadratic worst-case time in the length of the used regular
910*6af9a77bSJohn Marinoexpression.
911*6af9a77bSJohn MarinoWhen back-references are used, a slower, backtracking engine is used.
912*6af9a77bSJohn MarinoWhile all backtracking matching engines suffer from extreme slowness for certain
913*6af9a77bSJohn Marinopathological cases, the normal engines doesn't suffer from these cases.
914*6af9a77bSJohn MarinoIt is advised to avoid back-references whenever possible.
915*6af9a77bSJohn Marino.Pp
916*6af9a77bSJohn MarinoThe
917*6af9a77bSJohn Marino.Fn regcomp
918*6af9a77bSJohn Marinovariants
919*6af9a77bSJohn Marinoimplements bounded repetitions by macro expansion,
920*6af9a77bSJohn Marinowhich is costly in time and space if counts are large
921*6af9a77bSJohn Marinoor bounded repetitions are nested.
922*6af9a77bSJohn MarinoAn RE like, say,
923*6af9a77bSJohn Marino.Ql "((((a{1,100}){1,100}){1,100}){1,100}){1,100}"
924*6af9a77bSJohn Marinowill (eventually) run almost any existing machine out of swap space.
925*6af9a77bSJohn Marino.Pp
926*6af9a77bSJohn MarinoDue to a mistake in
927*6af9a77bSJohn Marino.St -p1003.2 ,
928*6af9a77bSJohn Marinothings like
929*6af9a77bSJohn Marino.Ql "a)b"
930*6af9a77bSJohn Marinoare legal REs because
931*6af9a77bSJohn Marino.Ql )\&
932*6af9a77bSJohn Marinois
933*6af9a77bSJohn Marinoa special character only in the presence of a previous unmatched
934*6af9a77bSJohn Marino.Ql (\& .
935*6af9a77bSJohn MarinoThis cannot be fixed until the spec is fixed.
936*6af9a77bSJohn Marino.Pp
937*6af9a77bSJohn MarinoThe standard's definition of back references is vague.
938*6af9a77bSJohn MarinoFor example, does
939*6af9a77bSJohn Marino.Ql "a\e(\e(b\e)*\e2\e)*d"
940*6af9a77bSJohn Marinomatch
941*6af9a77bSJohn Marino.Ql "abbbd" ?
942*6af9a77bSJohn MarinoUntil the standard is clarified,
943*6af9a77bSJohn Marinobehavior in such cases should not be relied on.
944