1*6af9a77bSJohn Marino.\" Copyright (c) 1992, 1993, 1994 Henry Spencer. 2*6af9a77bSJohn Marino.\" Copyright (c) 1992, 1993, 1994 3*6af9a77bSJohn Marino.\" The Regents of the University of California. All rights reserved. 4*6af9a77bSJohn Marino.\" 5*6af9a77bSJohn Marino.\" This code is derived from software contributed to Berkeley by 6*6af9a77bSJohn Marino.\" Henry Spencer. 7*6af9a77bSJohn Marino.\" 8*6af9a77bSJohn Marino.\" Redistribution and use in source and binary forms, with or without 9*6af9a77bSJohn Marino.\" modification, are permitted provided that the following conditions 10*6af9a77bSJohn Marino.\" are met: 11*6af9a77bSJohn Marino.\" 1. Redistributions of source code must retain the above copyright 12*6af9a77bSJohn Marino.\" notice, this list of conditions and the following disclaimer. 13*6af9a77bSJohn Marino.\" 2. Redistributions in binary form must reproduce the above copyright 14*6af9a77bSJohn Marino.\" notice, this list of conditions and the following disclaimer in the 15*6af9a77bSJohn Marino.\" documentation and/or other materials provided with the distribution. 16*6af9a77bSJohn Marino.\" 3. Neither the name of the University nor the names of its contributors 17*6af9a77bSJohn Marino.\" may be used to endorse or promote products derived from this software 18*6af9a77bSJohn Marino.\" without specific prior written permission. 19*6af9a77bSJohn Marino.\" 20*6af9a77bSJohn Marino.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21*6af9a77bSJohn Marino.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22*6af9a77bSJohn Marino.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23*6af9a77bSJohn Marino.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24*6af9a77bSJohn Marino.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25*6af9a77bSJohn Marino.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26*6af9a77bSJohn Marino.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27*6af9a77bSJohn Marino.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28*6af9a77bSJohn Marino.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29*6af9a77bSJohn Marino.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30*6af9a77bSJohn Marino.\" SUCH DAMAGE. 31*6af9a77bSJohn Marino.\" 32*6af9a77bSJohn Marino.\" @(#)regex.3 8.4 (Berkeley) 3/20/94 33*6af9a77bSJohn Marino.\" $FreeBSD: src/lib/libc/regex/regex.3,v 1.21 2007/01/09 00:28:04 imp Exp $ 34*6af9a77bSJohn Marino.\" 35*6af9a77bSJohn Marino.Dd August 6, 2015 36*6af9a77bSJohn Marino.Dt REGEX 3 37*6af9a77bSJohn Marino.Os 38*6af9a77bSJohn Marino.Sh NAME 39*6af9a77bSJohn Marino.Nm regcomp , 40*6af9a77bSJohn Marino.Nm regcomp_l , 41*6af9a77bSJohn Marino.Nm regerror , 42*6af9a77bSJohn Marino.Nm regexec , 43*6af9a77bSJohn Marino.Nm regfree , 44*6af9a77bSJohn Marino.Nm regncomp , 45*6af9a77bSJohn Marino.Nm regncomp_l , 46*6af9a77bSJohn Marino.Nm regnexec , 47*6af9a77bSJohn Marino.Nm regnwcomp , 48*6af9a77bSJohn Marino.Nm regnwcomp_l , 49*6af9a77bSJohn Marino.Nm regnwexec , 50*6af9a77bSJohn Marino.Nm regwcomp , 51*6af9a77bSJohn Marino.Nm regwcomp_l , 52*6af9a77bSJohn Marino.Nm regwexec 53*6af9a77bSJohn Marino.Nd regular-expression library 54*6af9a77bSJohn Marino.Sh SYNOPSIS 55*6af9a77bSJohn Marino.Sy (Standards-compliant APIs) 56*6af9a77bSJohn Marino.Pp 57*6af9a77bSJohn Marino.In regex.h 58*6af9a77bSJohn Marino.Ft int 59*6af9a77bSJohn Marino.Fo regcomp 60*6af9a77bSJohn Marino.Fa "regex_t *restrict preg" 61*6af9a77bSJohn Marino.Fa "const char *restrict pattern" 62*6af9a77bSJohn Marino.Fa "int cflags" 63*6af9a77bSJohn Marino.Fc 64*6af9a77bSJohn Marino.Ft size_t 65*6af9a77bSJohn Marino.Fo regerror 66*6af9a77bSJohn Marino.Fa "int errcode" 67*6af9a77bSJohn Marino.Fa "const regex_t *restrict preg" 68*6af9a77bSJohn Marino.Fa "char *restrict errbuf" 69*6af9a77bSJohn Marino.Fa "size_t errbuf_size" 70*6af9a77bSJohn Marino.Fc 71*6af9a77bSJohn Marino.Ft int 72*6af9a77bSJohn Marino.Fo regexec 73*6af9a77bSJohn Marino.Fa "const regex_t *restrict preg" 74*6af9a77bSJohn Marino.Fa "const char *restrict string" 75*6af9a77bSJohn Marino.Fa "size_t nmatch" 76*6af9a77bSJohn Marino.Fa "regmatch_t pmatch[restrict]" 77*6af9a77bSJohn Marino.Fa "int eflags" 78*6af9a77bSJohn Marino.Fc 79*6af9a77bSJohn Marino.Ft void 80*6af9a77bSJohn Marino.Fo regfree 81*6af9a77bSJohn Marino.Fa "regex_t *preg" 82*6af9a77bSJohn Marino.Fc 83*6af9a77bSJohn Marino.Pp 84*6af9a77bSJohn Marino.Sy (Non-portable extensions) 85*6af9a77bSJohn Marino.Ft int 86*6af9a77bSJohn Marino.Fo regncomp 87*6af9a77bSJohn Marino.Fa "regex_t *restrict preg" 88*6af9a77bSJohn Marino.Fa "const char *restrict pattern" 89*6af9a77bSJohn Marino.Fa "size_t len" 90*6af9a77bSJohn Marino.Fa "int cflags" 91*6af9a77bSJohn Marino.Fc 92*6af9a77bSJohn Marino.Ft int 93*6af9a77bSJohn Marino.Fo regnexec 94*6af9a77bSJohn Marino.Fa "const regex_t *restrict preg" 95*6af9a77bSJohn Marino.Fa "const char *restrict string" 96*6af9a77bSJohn Marino.Fa "size_t len" 97*6af9a77bSJohn Marino.Fa "size_t nmatch" 98*6af9a77bSJohn Marino.Fa "regmatch_t pmatch[restrict]" 99*6af9a77bSJohn Marino.Fa "int eflags" 100*6af9a77bSJohn Marino.Fc 101*6af9a77bSJohn Marino.Ft int 102*6af9a77bSJohn Marino.Fo regwcomp 103*6af9a77bSJohn Marino.Fa "regex_t *restrict preg" 104*6af9a77bSJohn Marino.Fa "const wchar_t *restrict widepat" 105*6af9a77bSJohn Marino.Fa "int cflags" 106*6af9a77bSJohn Marino.Fc 107*6af9a77bSJohn Marino.Ft int 108*6af9a77bSJohn Marino.Fo regwexec 109*6af9a77bSJohn Marino.Fa "const regex_t *restrict preg" 110*6af9a77bSJohn Marino.Fa "const wchar_t *restrict widestr" 111*6af9a77bSJohn Marino.Fa "size_t nmatch" 112*6af9a77bSJohn Marino.Fa "regmatch_t pmatch[restrict]" 113*6af9a77bSJohn Marino.Fa "int eflags" 114*6af9a77bSJohn Marino.Fc 115*6af9a77bSJohn Marino.Ft int 116*6af9a77bSJohn Marino.Fo regwncomp 117*6af9a77bSJohn Marino.Fa "regex_t *restrict preg" 118*6af9a77bSJohn Marino.Fa "const wchar_t *restrict widepat" 119*6af9a77bSJohn Marino.Fa "size_t len" 120*6af9a77bSJohn Marino.Fa "int cflags" 121*6af9a77bSJohn Marino.Fc 122*6af9a77bSJohn Marino.Ft int 123*6af9a77bSJohn Marino.Fo regwnexec 124*6af9a77bSJohn Marino.Fa "const regex_t *restrict preg" 125*6af9a77bSJohn Marino.Fa "const wchar_t *restrict widestr" 126*6af9a77bSJohn Marino.Fa "size_t len" 127*6af9a77bSJohn Marino.Fa "size_t nmatch" 128*6af9a77bSJohn Marino.Fa "regmatch_t pmatch[restrict]" 129*6af9a77bSJohn Marino.Fa "int eflags" 130*6af9a77bSJohn Marino.Fc 131*6af9a77bSJohn Marino.In regex.h 132*6af9a77bSJohn Marino.In xlocale.h 133*6af9a77bSJohn Marino.Ft int 134*6af9a77bSJohn Marino.Fo regcomp_l 135*6af9a77bSJohn Marino.Fa "regex_t *restrict preg" 136*6af9a77bSJohn Marino.Fa "const char *restrict pattern" 137*6af9a77bSJohn Marino.Fa "int cflags" 138*6af9a77bSJohn Marino.Fa "locale_t restrict" 139*6af9a77bSJohn Marino.Fc 140*6af9a77bSJohn Marino.Ft int 141*6af9a77bSJohn Marino.Fo regncomp_l 142*6af9a77bSJohn Marino.Fa "regex_t *restrict preg" 143*6af9a77bSJohn Marino.Fa "const char *restrict pattern" 144*6af9a77bSJohn Marino.Fa "size_t len" 145*6af9a77bSJohn Marino.Fa "int cflags" 146*6af9a77bSJohn Marino.Fa "locale_t restrict" 147*6af9a77bSJohn Marino.Fc 148*6af9a77bSJohn Marino.Ft int 149*6af9a77bSJohn Marino.Fo regwcomp_l 150*6af9a77bSJohn Marino.Fa "regex_t *restrict preg" 151*6af9a77bSJohn Marino.Fa "const wchar_t *restrict widepat" 152*6af9a77bSJohn Marino.Fa "int cflags" 153*6af9a77bSJohn Marino.Fa "locale_t restrict" 154*6af9a77bSJohn Marino.Fc 155*6af9a77bSJohn Marino.Ft int 156*6af9a77bSJohn Marino.Fo regwncomp_l 157*6af9a77bSJohn Marino.Fa "regex_t *restrict preg" 158*6af9a77bSJohn Marino.Fa "const wchar_t *restrict widepat" 159*6af9a77bSJohn Marino.Fa "size_t len" 160*6af9a77bSJohn Marino.Fa "int cflags" 161*6af9a77bSJohn Marino.Fa "locale_t restrict" 162*6af9a77bSJohn Marino.Fc 163*6af9a77bSJohn Marino.Sh DESCRIPTION 164*6af9a77bSJohn MarinoThese routines implement 165*6af9a77bSJohn Marino.St -p1003.2 166*6af9a77bSJohn Marinoregular expressions 167*6af9a77bSJohn Marino.Pq Do RE Dc Ns s ; 168*6af9a77bSJohn Marinosee 169*6af9a77bSJohn Marino.Xr re_format 7 . 170*6af9a77bSJohn MarinoThe 171*6af9a77bSJohn Marino.Fn regcomp 172*6af9a77bSJohn Marinofunction 173*6af9a77bSJohn Marinocompiles an RE, written as a string, into an internal form. 174*6af9a77bSJohn Marino.Fn regexec 175*6af9a77bSJohn Marinomatches that internal form against a string and reports results. 176*6af9a77bSJohn Marino.Fn regerror 177*6af9a77bSJohn Marinotransforms error codes from either into human-readable messages. 178*6af9a77bSJohn Marino.Fn regfree 179*6af9a77bSJohn Marinofrees any dynamically-allocated storage used by the internal form 180*6af9a77bSJohn Marinoof an RE. 181*6af9a77bSJohn Marino.Pp 182*6af9a77bSJohn MarinoThe header 183*6af9a77bSJohn Marino.In regex.h 184*6af9a77bSJohn Marinodeclares two structure types, 185*6af9a77bSJohn Marino.Ft regex_t 186*6af9a77bSJohn Marinoand 187*6af9a77bSJohn Marino.Ft regmatch_t , 188*6af9a77bSJohn Marinothe former for compiled internal forms and the latter for match reporting. 189*6af9a77bSJohn MarinoIt also declares the four functions, 190*6af9a77bSJohn Marinoa type 191*6af9a77bSJohn Marino.Ft regoff_t , 192*6af9a77bSJohn Marinoand a number of constants with names starting with 193*6af9a77bSJohn Marino.Dq Dv REG_ . 194*6af9a77bSJohn Marino.Pp 195*6af9a77bSJohn MarinoThe 196*6af9a77bSJohn Marino.Fn regcomp 197*6af9a77bSJohn Marinofunction 198*6af9a77bSJohn Marinocompiles the regular expression contained in the 199*6af9a77bSJohn Marino.Fa pattern 200*6af9a77bSJohn Marinostring, 201*6af9a77bSJohn Marinosubject to the flags in 202*6af9a77bSJohn Marino.Fa cflags , 203*6af9a77bSJohn Marinoand places the results in the 204*6af9a77bSJohn Marino.Ft regex_t 205*6af9a77bSJohn Marinostructure pointed to by 206*6af9a77bSJohn Marino.Fa preg . 207*6af9a77bSJohn MarinoThe 208*6af9a77bSJohn Marino.Fa cflags 209*6af9a77bSJohn Marinoargument 210*6af9a77bSJohn Marinois the bitwise OR of zero or more of the following flags: 211*6af9a77bSJohn Marino.Bl -tag -width REG_EXTENDED 212*6af9a77bSJohn Marino.It Dv REG_EXTENDED 213*6af9a77bSJohn MarinoCompile modern 214*6af9a77bSJohn Marino.Pq Dq extended 215*6af9a77bSJohn MarinoREs, 216*6af9a77bSJohn Marinorather than the obsolete 217*6af9a77bSJohn Marino.Pq Dq basic 218*6af9a77bSJohn MarinoREs that 219*6af9a77bSJohn Marinoare the default. 220*6af9a77bSJohn Marino.It Dv REG_BASIC 221*6af9a77bSJohn MarinoThis is a synonym for 0, 222*6af9a77bSJohn Marinoprovided as a counterpart to 223*6af9a77bSJohn Marino.Dv REG_EXTENDED 224*6af9a77bSJohn Marinoto improve readability. 225*6af9a77bSJohn Marino.It Dv REG_NOSPEC 226*6af9a77bSJohn MarinoCompile with recognition of all special characters turned off. 227*6af9a77bSJohn MarinoAll characters are thus considered ordinary, 228*6af9a77bSJohn Marinoso the 229*6af9a77bSJohn Marino.Dq RE 230*6af9a77bSJohn Marinois a literal string. 231*6af9a77bSJohn MarinoThis is an extension, 232*6af9a77bSJohn Marinocompatible with but not specified by 233*6af9a77bSJohn Marino.St -p1003.2 , 234*6af9a77bSJohn Marinoand should be used with 235*6af9a77bSJohn Marinocaution in software intended to be portable to other systems. 236*6af9a77bSJohn Marino.Dv REG_EXTENDED 237*6af9a77bSJohn Marinoand 238*6af9a77bSJohn Marino.Dv REG_NOSPEC 239*6af9a77bSJohn Marinomay not be used 240*6af9a77bSJohn Marinoin the same call to 241*6af9a77bSJohn Marino.Fn regcomp . 242*6af9a77bSJohn Marino.It Dv REG_LITERAL 243*6af9a77bSJohn MarinoAn alias of 244*6af9a77bSJohn Marino.Dv REG_NOSPEC . 245*6af9a77bSJohn Marino.It Dv REG_ICASE 246*6af9a77bSJohn MarinoCompile for matching that ignores upper/lower case distinctions. 247*6af9a77bSJohn MarinoSee 248*6af9a77bSJohn Marino.Xr re_format 7 . 249*6af9a77bSJohn Marino.It Dv REG_NOSUB 250*6af9a77bSJohn MarinoCompile for matching that need only report success or failure, 251*6af9a77bSJohn Marinonot what was matched. 252*6af9a77bSJohn Marino.It Dv REG_NEWLINE 253*6af9a77bSJohn MarinoCompile for newline-sensitive matching. 254*6af9a77bSJohn MarinoBy default, newline is a completely ordinary character with no special 255*6af9a77bSJohn Marinomeaning in either REs or strings. 256*6af9a77bSJohn MarinoWith this flag, 257*6af9a77bSJohn Marino.Ql [^ 258*6af9a77bSJohn Marinobracket expressions and 259*6af9a77bSJohn Marino.Ql .\& 260*6af9a77bSJohn Marinonever match newline, 261*6af9a77bSJohn Marinoa 262*6af9a77bSJohn Marino.Ql ^\& 263*6af9a77bSJohn Marinoanchor matches the null string after any newline in the string 264*6af9a77bSJohn Marinoin addition to its normal function, 265*6af9a77bSJohn Marinoand the 266*6af9a77bSJohn Marino.Ql $\& 267*6af9a77bSJohn Marinoanchor matches the null string before any newline in the 268*6af9a77bSJohn Marinostring in addition to its normal function. 269*6af9a77bSJohn Marino.It Dv REG_PEND 270*6af9a77bSJohn Marino(Note that 271*6af9a77bSJohn Marino.Dv REG_PEND 272*6af9a77bSJohn Marinois not recognized by any of the wide character or 273*6af9a77bSJohn Marino.Dq Nm n 274*6af9a77bSJohn Marinovariants. 275*6af9a77bSJohn MarinoBesides, the 276*6af9a77bSJohn Marino.Dq Nm n 277*6af9a77bSJohn Marinovariants can be used instead of 278*6af9a77bSJohn Marino.Dv REG_PEND ; 279*6af9a77bSJohn Marinosee EXTENDED APIS below.) 280*6af9a77bSJohn MarinoThe regular expression ends, 281*6af9a77bSJohn Marinonot at the first NUL, 282*6af9a77bSJohn Marinobut just before the character pointed to by the 283*6af9a77bSJohn Marino.Va re_endp 284*6af9a77bSJohn Marinomember of the structure pointed to by 285*6af9a77bSJohn Marino.Fa preg . 286*6af9a77bSJohn MarinoThe 287*6af9a77bSJohn Marino.Va re_endp 288*6af9a77bSJohn Marinomember is of type 289*6af9a77bSJohn Marino.Ft "const char *" . 290*6af9a77bSJohn MarinoThis flag permits inclusion of NULs in the RE; 291*6af9a77bSJohn Marinothey are considered ordinary characters. 292*6af9a77bSJohn MarinoThis is an extension, 293*6af9a77bSJohn Marinocompatible with but not specified by 294*6af9a77bSJohn Marino.St -p1003.2 , 295*6af9a77bSJohn Marinoand should be used with 296*6af9a77bSJohn Marinocaution in software intended to be portable to other systems. 297*6af9a77bSJohn Marino.It Dv REG_ENHANCED 298*6af9a77bSJohn MarinoRecognized enhanced regular expression features; see 299*6af9a77bSJohn Marino.Xr re_format 7 300*6af9a77bSJohn Marinofor details. 301*6af9a77bSJohn MarinoThis is an extension not specified by 302*6af9a77bSJohn Marino.St -p1003.2 , 303*6af9a77bSJohn Marinoand should be used with 304*6af9a77bSJohn Marinocaution in software intended to be portable to other systems. 305*6af9a77bSJohn Marino.It Dv REG_MINIMAL 306*6af9a77bSJohn MarinoUse minimal (non-greedy) repetitions instead of the normal greedy ones; see 307*6af9a77bSJohn Marino.Xr re_format 7 308*6af9a77bSJohn Marinofor details. 309*6af9a77bSJohn Marino(This only applies when both 310*6af9a77bSJohn Marino.Dv REG_ENHANCED 311*6af9a77bSJohn Marinoand 312*6af9a77bSJohn Marino.Dv REG_EXTENDED 313*6af9a77bSJohn Marinoare also set.) 314*6af9a77bSJohn MarinoThis is an extension not specified by 315*6af9a77bSJohn Marino.St -p1003.2 , 316*6af9a77bSJohn Marinoand should be used with 317*6af9a77bSJohn Marinocaution in software intended to be portable to other systems. 318*6af9a77bSJohn Marino.It Dv REG_UNGREEDY 319*6af9a77bSJohn MarinoAlias of 320*6af9a77bSJohn Marino.Dv REG_MINIMAL . 321*6af9a77bSJohn Marino.El 322*6af9a77bSJohn Marino.Pp 323*6af9a77bSJohn MarinoWhen successful, 324*6af9a77bSJohn Marino.Fn regcomp 325*6af9a77bSJohn Marinoreturns 0 and fills in the structure pointed to by 326*6af9a77bSJohn Marino.Fa preg . 327*6af9a77bSJohn MarinoOne member of that structure 328*6af9a77bSJohn Marino(other than 329*6af9a77bSJohn Marino.Va re_endp ) 330*6af9a77bSJohn Marinois publicized: 331*6af9a77bSJohn Marino.Va re_nsub , 332*6af9a77bSJohn Marinoof type 333*6af9a77bSJohn Marino.Ft size_t , 334*6af9a77bSJohn Marinocontains the number of parenthesized subexpressions within the RE 335*6af9a77bSJohn Marino(except that the value of this member is undefined if the 336*6af9a77bSJohn Marino.Dv REG_NOSUB 337*6af9a77bSJohn Marinoflag was used). 338*6af9a77bSJohn MarinoIf 339*6af9a77bSJohn Marino.Fn regcomp 340*6af9a77bSJohn Marinofails, it returns a non-zero error code; 341*6af9a77bSJohn Marinosee 342*6af9a77bSJohn Marino.Sx DIAGNOSTICS . 343*6af9a77bSJohn Marino.Pp 344*6af9a77bSJohn MarinoThe 345*6af9a77bSJohn Marino.Fn regexec 346*6af9a77bSJohn Marinofunction 347*6af9a77bSJohn Marinomatches the compiled RE pointed to by 348*6af9a77bSJohn Marino.Fa preg 349*6af9a77bSJohn Marinoagainst the 350*6af9a77bSJohn Marino.Fa string , 351*6af9a77bSJohn Marinosubject to the flags in 352*6af9a77bSJohn Marino.Fa eflags , 353*6af9a77bSJohn Marinoand reports results using 354*6af9a77bSJohn Marino.Fa nmatch , 355*6af9a77bSJohn Marino.Fa pmatch , 356*6af9a77bSJohn Marinoand the returned value. 357*6af9a77bSJohn MarinoThe RE must have been compiled by a previous invocation of 358*6af9a77bSJohn Marino.Fn regcomp . 359*6af9a77bSJohn MarinoThe compiled form is not altered during execution of 360*6af9a77bSJohn Marino.Fn regexec , 361*6af9a77bSJohn Marinoso a single compiled RE can be used simultaneously by multiple threads. 362*6af9a77bSJohn Marino.Pp 363*6af9a77bSJohn MarinoBy default, 364*6af9a77bSJohn Marinothe NUL-terminated string pointed to by 365*6af9a77bSJohn Marino.Fa string 366*6af9a77bSJohn Marinois considered to be the text of an entire line, minus any terminating 367*6af9a77bSJohn Marinonewline. 368*6af9a77bSJohn MarinoThe 369*6af9a77bSJohn Marino.Fa eflags 370*6af9a77bSJohn Marinoargument is the bitwise OR of zero or more of the following flags: 371*6af9a77bSJohn Marino.Bl -tag -width REG_STARTEND 372*6af9a77bSJohn Marino.It Dv REG_NOTBOL 373*6af9a77bSJohn MarinoThe first character of 374*6af9a77bSJohn Marinothe string 375*6af9a77bSJohn Marinois not the beginning of a line, so the 376*6af9a77bSJohn Marino.Ql ^\& 377*6af9a77bSJohn Marinoanchor should not match before it. 378*6af9a77bSJohn MarinoThis does not affect the behavior of newlines under 379*6af9a77bSJohn Marino.Dv REG_NEWLINE . 380*6af9a77bSJohn Marino.It Dv REG_NOTEOL 381*6af9a77bSJohn MarinoThe NUL terminating 382*6af9a77bSJohn Marinothe string 383*6af9a77bSJohn Marinodoes not end a line, so the 384*6af9a77bSJohn Marino.Ql $\& 385*6af9a77bSJohn Marinoanchor should not match before it. 386*6af9a77bSJohn MarinoThis does not affect the behavior of newlines under 387*6af9a77bSJohn Marino.Dv REG_NEWLINE . 388*6af9a77bSJohn Marino.It Dv REG_STARTEND 389*6af9a77bSJohn MarinoThe string is considered to start at 390*6af9a77bSJohn Marino.Fa string 391*6af9a77bSJohn Marino+ 392*6af9a77bSJohn Marino.Fa pmatch Ns [0]. Ns Va rm_so 393*6af9a77bSJohn Marinoand to have a terminating NUL located at 394*6af9a77bSJohn Marino.Fa string 395*6af9a77bSJohn Marino+ 396*6af9a77bSJohn Marino.Fa pmatch Ns [0]. Ns Va rm_eo 397*6af9a77bSJohn Marino(there need not actually be a NUL at that location), 398*6af9a77bSJohn Marinoregardless of the value of 399*6af9a77bSJohn Marino.Fa nmatch . 400*6af9a77bSJohn MarinoSee below for the definition of 401*6af9a77bSJohn Marino.Fa pmatch 402*6af9a77bSJohn Marinoand 403*6af9a77bSJohn Marino.Fa nmatch . 404*6af9a77bSJohn MarinoThis is an extension, 405*6af9a77bSJohn Marinocompatible with but not specified by 406*6af9a77bSJohn Marino.St -p1003.2 , 407*6af9a77bSJohn Marinoand should be used with 408*6af9a77bSJohn Marinocaution in software intended to be portable to other systems. 409*6af9a77bSJohn MarinoNote that a non-zero 410*6af9a77bSJohn Marino.Va rm_so 411*6af9a77bSJohn Marinodoes not imply 412*6af9a77bSJohn Marino.Dv REG_NOTBOL ; 413*6af9a77bSJohn Marino.Dv REG_STARTEND 414*6af9a77bSJohn Marinoaffects only the location of the string, 415*6af9a77bSJohn Marinonot how it is matched. 416*6af9a77bSJohn Marino.El 417*6af9a77bSJohn Marino.Pp 418*6af9a77bSJohn MarinoSee 419*6af9a77bSJohn Marino.Xr re_format 7 420*6af9a77bSJohn Marinofor a discussion of what is matched in situations where an RE or a 421*6af9a77bSJohn Marinoportion thereof could match any of several substrings of 422*6af9a77bSJohn Marino.Fa string . 423*6af9a77bSJohn Marino.Pp 424*6af9a77bSJohn MarinoNormally, 425*6af9a77bSJohn Marino.Fn regexec 426*6af9a77bSJohn Marinoreturns 0 for success and the non-zero code 427*6af9a77bSJohn Marino.Dv REG_NOMATCH 428*6af9a77bSJohn Marinofor failure. 429*6af9a77bSJohn MarinoOther non-zero error codes may be returned in exceptional situations; 430*6af9a77bSJohn Marinosee 431*6af9a77bSJohn Marino.Sx DIAGNOSTICS . 432*6af9a77bSJohn Marino.Pp 433*6af9a77bSJohn MarinoIf 434*6af9a77bSJohn Marino.Dv REG_NOSUB 435*6af9a77bSJohn Marinowas specified in the compilation of the RE, 436*6af9a77bSJohn Marinoor if 437*6af9a77bSJohn Marino.Fa nmatch 438*6af9a77bSJohn Marinois 0, 439*6af9a77bSJohn Marino.Fn regexec 440*6af9a77bSJohn Marinoignores the 441*6af9a77bSJohn Marino.Fa pmatch 442*6af9a77bSJohn Marinoargument (but see below for the case where 443*6af9a77bSJohn Marino.Dv REG_STARTEND 444*6af9a77bSJohn Marinois specified). 445*6af9a77bSJohn MarinoOtherwise, 446*6af9a77bSJohn Marino.Fa pmatch 447*6af9a77bSJohn Marinopoints to an array of 448*6af9a77bSJohn Marino.Fa nmatch 449*6af9a77bSJohn Marinostructures of type 450*6af9a77bSJohn Marino.Ft regmatch_t . 451*6af9a77bSJohn MarinoSuch a structure has at least the members 452*6af9a77bSJohn Marino.Va rm_so 453*6af9a77bSJohn Marinoand 454*6af9a77bSJohn Marino.Va rm_eo , 455*6af9a77bSJohn Marinoboth of type 456*6af9a77bSJohn Marino.Ft regoff_t 457*6af9a77bSJohn Marino(a signed arithmetic type at least as large as an 458*6af9a77bSJohn Marino.Ft off_t 459*6af9a77bSJohn Marinoand a 460*6af9a77bSJohn Marino.Ft ssize_t ) , 461*6af9a77bSJohn Marinocontaining respectively the offset of the first character of a substring 462*6af9a77bSJohn Marinoand the offset of the first character after the end of the substring. 463*6af9a77bSJohn MarinoOffsets are measured from the beginning of the 464*6af9a77bSJohn Marino.Fa string 465*6af9a77bSJohn Marinoargument given to 466*6af9a77bSJohn Marino.Fn regexec . 467*6af9a77bSJohn MarinoAn empty substring is denoted by equal offsets, 468*6af9a77bSJohn Marinoboth indicating the character following the empty substring. 469*6af9a77bSJohn Marino.Pp 470*6af9a77bSJohn MarinoThe 0th member of the 471*6af9a77bSJohn Marino.Fa pmatch 472*6af9a77bSJohn Marinoarray is filled in to indicate what substring of 473*6af9a77bSJohn Marino.Fa string 474*6af9a77bSJohn Marinowas matched by the entire RE. 475*6af9a77bSJohn MarinoRemaining members report what substring was matched by parenthesized 476*6af9a77bSJohn Marinosubexpressions within the RE; 477*6af9a77bSJohn Marinomember 478*6af9a77bSJohn Marino.Va i 479*6af9a77bSJohn Marinoreports subexpression 480*6af9a77bSJohn Marino.Va i , 481*6af9a77bSJohn Marinowith subexpressions counted (starting at 1) by the order of their opening 482*6af9a77bSJohn Marinoparentheses in the RE, left to right. 483*6af9a77bSJohn MarinoUnused entries in the array (corresponding either to subexpressions that 484*6af9a77bSJohn Marinodid not participate in the match at all, or to subexpressions that do not 485*6af9a77bSJohn Marinoexist in the RE (that is, 486*6af9a77bSJohn Marino.Va i 487*6af9a77bSJohn Marino> 488*6af9a77bSJohn Marino.Fa preg Ns -> Ns Va re_nsub ) ) 489*6af9a77bSJohn Marinohave both 490*6af9a77bSJohn Marino.Va rm_so 491*6af9a77bSJohn Marinoand 492*6af9a77bSJohn Marino.Va rm_eo 493*6af9a77bSJohn Marinoset to -1. 494*6af9a77bSJohn MarinoIf a subexpression participated in the match several times, 495*6af9a77bSJohn Marinothe reported substring is the last one it matched. 496*6af9a77bSJohn Marino(Note, as an example in particular, that when the RE 497*6af9a77bSJohn Marino.Ql "(b*)+" 498*6af9a77bSJohn Marinomatches 499*6af9a77bSJohn Marino.Ql bbb , 500*6af9a77bSJohn Marinothe parenthesized subexpression matches each of the three 501*6af9a77bSJohn Marino.So Li b Sc Ns s 502*6af9a77bSJohn Marinoand then 503*6af9a77bSJohn Marinoan infinite number of empty strings following the last 504*6af9a77bSJohn Marino.Ql b , 505*6af9a77bSJohn Marinoso the reported substring is one of the empties.) 506*6af9a77bSJohn Marino.Pp 507*6af9a77bSJohn MarinoIf 508*6af9a77bSJohn Marino.Dv REG_STARTEND 509*6af9a77bSJohn Marinois specified, 510*6af9a77bSJohn Marino.Fa pmatch 511*6af9a77bSJohn Marinomust point to at least one 512*6af9a77bSJohn Marino.Ft regmatch_t 513*6af9a77bSJohn Marino(even if 514*6af9a77bSJohn Marino.Fa nmatch 515*6af9a77bSJohn Marinois 0 or 516*6af9a77bSJohn Marino.Dv REG_NOSUB 517*6af9a77bSJohn Marinowas specified), 518*6af9a77bSJohn Marinoto hold the input offsets for 519*6af9a77bSJohn Marino.Dv REG_STARTEND . 520*6af9a77bSJohn MarinoUse for output is still entirely controlled by 521*6af9a77bSJohn Marino.Fa nmatch ; 522*6af9a77bSJohn Marinoif 523*6af9a77bSJohn Marino.Fa nmatch 524*6af9a77bSJohn Marinois 0 or 525*6af9a77bSJohn Marino.Dv REG_NOSUB 526*6af9a77bSJohn Marinowas specified, 527*6af9a77bSJohn Marinothe value of 528*6af9a77bSJohn Marino.Fa pmatch Ns [0] 529*6af9a77bSJohn Marinowill not be changed by a successful 530*6af9a77bSJohn Marino.Fn regexec . 531*6af9a77bSJohn Marino.Pp 532*6af9a77bSJohn MarinoThe 533*6af9a77bSJohn Marino.Fn regerror 534*6af9a77bSJohn Marinofunction 535*6af9a77bSJohn Marinomaps a non-zero 536*6af9a77bSJohn Marino.Fa errcode 537*6af9a77bSJohn Marinofrom either 538*6af9a77bSJohn Marino.Fn regcomp 539*6af9a77bSJohn Marinoor 540*6af9a77bSJohn Marino.Fn regexec 541*6af9a77bSJohn Marinoto a human-readable, printable message. 542*6af9a77bSJohn MarinoIf 543*6af9a77bSJohn Marino.Fa preg 544*6af9a77bSJohn Marinois 545*6af9a77bSJohn Marino.No non\- Ns Dv NULL , 546*6af9a77bSJohn Marinothe error code should have arisen from use of 547*6af9a77bSJohn Marinothe 548*6af9a77bSJohn Marino.Ft regex_t 549*6af9a77bSJohn Marinopointed to by 550*6af9a77bSJohn Marino.Fa preg , 551*6af9a77bSJohn Marinoand if the error code came from 552*6af9a77bSJohn Marino.Fn regcomp , 553*6af9a77bSJohn Marinoit should have been the result from the most recent 554*6af9a77bSJohn Marino.Fn regcomp 555*6af9a77bSJohn Marinousing that 556*6af9a77bSJohn Marino.Ft regex_t . 557*6af9a77bSJohn MarinoThe 558*6af9a77bSJohn Marino.Fn ( regerror 559*6af9a77bSJohn Marinomay be able to supply a more detailed message using information 560*6af9a77bSJohn Marinofrom the 561*6af9a77bSJohn Marino.Ft regex_t . ) 562*6af9a77bSJohn MarinoThe 563*6af9a77bSJohn Marino.Fn regerror 564*6af9a77bSJohn Marinofunction 565*6af9a77bSJohn Marinoplaces the NUL-terminated message into the buffer pointed to by 566*6af9a77bSJohn Marino.Fa errbuf , 567*6af9a77bSJohn Marinolimiting the length (including the NUL) to at most 568*6af9a77bSJohn Marino.Fa errbuf_size 569*6af9a77bSJohn Marinobytes. 570*6af9a77bSJohn MarinoIf the whole message will not fit, 571*6af9a77bSJohn Marinoas much of it as will fit before the terminating NUL is supplied. 572*6af9a77bSJohn MarinoIn any case, 573*6af9a77bSJohn Marinothe returned value is the size of buffer needed to hold the whole 574*6af9a77bSJohn Marinomessage (including terminating NUL). 575*6af9a77bSJohn MarinoIf 576*6af9a77bSJohn Marino.Fa errbuf_size 577*6af9a77bSJohn Marinois 0, 578*6af9a77bSJohn Marino.Fa errbuf 579*6af9a77bSJohn Marinois ignored but the return value is still correct. 580*6af9a77bSJohn Marino.Pp 581*6af9a77bSJohn MarinoIf the 582*6af9a77bSJohn Marino.Fa errcode 583*6af9a77bSJohn Marinogiven to 584*6af9a77bSJohn Marino.Fn regerror 585*6af9a77bSJohn Marinois first ORed with 586*6af9a77bSJohn Marino.Dv REG_ITOA , 587*6af9a77bSJohn Marinothe 588*6af9a77bSJohn Marino.Dq message 589*6af9a77bSJohn Marinothat results is the printable name of the error code, 590*6af9a77bSJohn Marinoe.g.\& 591*6af9a77bSJohn Marino.Dq Dv REG_NOMATCH , 592*6af9a77bSJohn Marinorather than an explanation thereof. 593*6af9a77bSJohn MarinoIf 594*6af9a77bSJohn Marino.Fa errcode 595*6af9a77bSJohn Marinois 596*6af9a77bSJohn Marino.Dv REG_ATOI , 597*6af9a77bSJohn Marinothen 598*6af9a77bSJohn Marino.Fa preg 599*6af9a77bSJohn Marinoshall be 600*6af9a77bSJohn Marino.No non\- Ns Dv NULL 601*6af9a77bSJohn Marinoand the 602*6af9a77bSJohn Marino.Va re_endp 603*6af9a77bSJohn Marinomember of the structure it points to 604*6af9a77bSJohn Marinomust point to the printable name of an error code; 605*6af9a77bSJohn Marinoin this case, the result in 606*6af9a77bSJohn Marino.Fa errbuf 607*6af9a77bSJohn Marinois the decimal digits of 608*6af9a77bSJohn Marinothe numeric value of the error code 609*6af9a77bSJohn Marino(0 if the name is not recognized). 610*6af9a77bSJohn Marino.Dv REG_ITOA 611*6af9a77bSJohn Marinoand 612*6af9a77bSJohn Marino.Dv REG_ATOI 613*6af9a77bSJohn Marinoare intended primarily as debugging facilities; 614*6af9a77bSJohn Marinothey are extensions, 615*6af9a77bSJohn Marinocompatible with but not specified by 616*6af9a77bSJohn Marino.St -p1003.2 , 617*6af9a77bSJohn Marinoand should be used with 618*6af9a77bSJohn Marinocaution in software intended to be portable to other systems. 619*6af9a77bSJohn MarinoBe warned also that they are considered experimental and changes are possible. 620*6af9a77bSJohn Marino.Pp 621*6af9a77bSJohn MarinoThe 622*6af9a77bSJohn Marino.Fn regfree 623*6af9a77bSJohn Marinofunction 624*6af9a77bSJohn Marinofrees any dynamically-allocated storage associated with the compiled RE 625*6af9a77bSJohn Marinopointed to by 626*6af9a77bSJohn Marino.Fa preg . 627*6af9a77bSJohn MarinoThe remaining 628*6af9a77bSJohn Marino.Ft regex_t 629*6af9a77bSJohn Marinois no longer a valid compiled RE 630*6af9a77bSJohn Marinoand the effect of supplying it to 631*6af9a77bSJohn Marino.Fn regexec 632*6af9a77bSJohn Marinoor 633*6af9a77bSJohn Marino.Fn regerror 634*6af9a77bSJohn Marinois undefined. 635*6af9a77bSJohn Marino.Pp 636*6af9a77bSJohn MarinoNone of these functions references global variables except for tables 637*6af9a77bSJohn Marinoof constants; 638*6af9a77bSJohn Marinoall are safe for use from multiple threads if the arguments are safe. 639*6af9a77bSJohn Marino.Sh EXTENDED APIS 640*6af9a77bSJohn MarinoThese extended APIs are available in Mac OS X 10.8 and beyond, when the 641*6af9a77bSJohn Marinodeployment target is 10.8 or later. 642*6af9a77bSJohn MarinoIt should also be noted that any of the 643*6af9a77bSJohn Marino.Fn regcomp 644*6af9a77bSJohn Marinovariants may be used to initialize a 645*6af9a77bSJohn Marino.Ft regex_t 646*6af9a77bSJohn Marinostructure, that can then be passed to any of the 647*6af9a77bSJohn Marino.Fn regexec 648*6af9a77bSJohn Marinovariants. 649*6af9a77bSJohn MarinoSo it is quite legal to compile a wide character RE and use it to match a 650*6af9a77bSJohn Marinomultibyte character string, or vice versa. 651*6af9a77bSJohn Marino.Pp 652*6af9a77bSJohn MarinoThe 653*6af9a77bSJohn Marino.Fn regncomp 654*6af9a77bSJohn Marinoroutine compiles regular expressions like 655*6af9a77bSJohn Marino.Fn regcomp , 656*6af9a77bSJohn Marinobut the length of the regular expression string is specified, allowing a string 657*6af9a77bSJohn Marinothat is not NUL terminated and/or contains NUL characters. 658*6af9a77bSJohn MarinoThis is a modern replacement for using 659*6af9a77bSJohn Marino.Fn regcomp 660*6af9a77bSJohn Marinowith the 661*6af9a77bSJohn Marino.Dv REG_PEND 662*6af9a77bSJohn Marinooption. 663*6af9a77bSJohn Marino.Pp 664*6af9a77bSJohn MarinoSimilarly, the 665*6af9a77bSJohn Marino.Fn regnexec 666*6af9a77bSJohn Marinoroutine is like 667*6af9a77bSJohn Marino.Fn regexec , 668*6af9a77bSJohn Marinobut the length of the string to match is specified, allowing a string 669*6af9a77bSJohn Marinothat is not NUL terminated and/or contains NUL characters. 670*6af9a77bSJohn Marino.Pp 671*6af9a77bSJohn MarinoThe 672*6af9a77bSJohn Marino.Fn regwcomp 673*6af9a77bSJohn Marinoand 674*6af9a77bSJohn Marino.Fn regwexec 675*6af9a77bSJohn Marinovariants take a wide-character 676*6af9a77bSJohn Marino.Vt ( wchar_t ) 677*6af9a77bSJohn Marinostring for the regular expression and string to match. 678*6af9a77bSJohn MarinoAnd 679*6af9a77bSJohn Marino.Fn regwncomp 680*6af9a77bSJohn Marinoand 681*6af9a77bSJohn Marino.Fn regwnexec 682*6af9a77bSJohn Marinoare variants that allow specifying the wide character string length, and 683*6af9a77bSJohn Marinoso allows wide character strings that are not NUL terminated and/or 684*6af9a77bSJohn Marinocontains NUL characters. 685*6af9a77bSJohn Marino.Sh INTERACTION WITH THE LOCALE 686*6af9a77bSJohn MarinoWhen 687*6af9a77bSJohn Marino.Fn regcomp 688*6af9a77bSJohn Marinoor one of its variants is run, the regular expression is compiled into an 689*6af9a77bSJohn Marinointernal form, which may include specific information about the locale currently 690*6af9a77bSJohn Marinoin effect, such as equivalence classes or multi-character collation symbols. 691*6af9a77bSJohn MarinoSo a reference to the current locale is also stored with the internal form, 692*6af9a77bSJohn Marinoso that when 693*6af9a77bSJohn Marino.Fn regexec 694*6af9a77bSJohn Marinois run, it can use the same locale (even if the locale is changed in-between 695*6af9a77bSJohn Marinothe calls to 696*6af9a77bSJohn Marino.Fn regcomp 697*6af9a77bSJohn Marinoand 698*6af9a77bSJohn Marino.Fn regexec ) . 699*6af9a77bSJohn Marino.Pp 700*6af9a77bSJohn MarinoTo provide more direct control over which locale is used, 701*6af9a77bSJohn Marinoroutines with 702*6af9a77bSJohn Marino.Dq Nm _l 703*6af9a77bSJohn Marinoappended to their names are provided that work just like the variants 704*6af9a77bSJohn Marinowithout the 705*6af9a77bSJohn Marino.Dq Nm _l , 706*6af9a77bSJohn Marinoexcept that a locale (via a 707*6af9a77bSJohn Marino.Vt locale_t 708*6af9a77bSJohn Marinovariable type) is specified directly. 709*6af9a77bSJohn MarinoNote that only variants of 710*6af9a77bSJohn Marino.Fn regcomp 711*6af9a77bSJohn Marinohave 712*6af9a77bSJohn Marino.Dq Nm _l 713*6af9a77bSJohn Marinovariants, since the 714*6af9a77bSJohn Marino.Fn regexec 715*6af9a77bSJohn Marinovariants just use the reference to the locale stored in the internal form. 716*6af9a77bSJohn Marino.Sh IMPLEMENTATION CHOICES 717*6af9a77bSJohn MarinoThe 718*6af9a77bSJohn Marino.Nm regex 719*6af9a77bSJohn Marinoimplementation in Mac OS X 10.8 and later is based on a heavily modified subset 720*6af9a77bSJohn Marinoof TRE (http://laurikari.net/tre/). 721*6af9a77bSJohn MarinoThis provides improved performance, better conformance and additional features. 722*6af9a77bSJohn MarinoHowever, both API and binary compatibility have been maintained with previous 723*6af9a77bSJohn Marinoreleases, so binaries 724*6af9a77bSJohn Marinobuilt on previous releases should work on 10.8 and later, and binaries built on 725*6af9a77bSJohn Marino10.8 and later should be able to run on previous releases (as long as none of 726*6af9a77bSJohn Marinothe new variants or new features are used. 727*6af9a77bSJohn Marino.Pp 728*6af9a77bSJohn MarinoThere are a number of decisions that 729*6af9a77bSJohn Marino.St -p1003.2 730*6af9a77bSJohn Marinoleaves up to the implementor, 731*6af9a77bSJohn Marinoeither by explicitly saying 732*6af9a77bSJohn Marino.Dq undefined 733*6af9a77bSJohn Marinoor by virtue of them being 734*6af9a77bSJohn Marinoforbidden by the RE grammar. 735*6af9a77bSJohn MarinoThis implementation treats them as follows. 736*6af9a77bSJohn Marino.Pp 737*6af9a77bSJohn MarinoSee 738*6af9a77bSJohn Marino.Xr re_format 7 739*6af9a77bSJohn Marinofor a discussion of the definition of case-independent matching. 740*6af9a77bSJohn Marino.Pp 741*6af9a77bSJohn MarinoThere is no particular limit on the length of REs, 742*6af9a77bSJohn Marinoexcept insofar as memory is limited. 743*6af9a77bSJohn MarinoMemory usage is approximately linear in RE size, and largely insensitive 744*6af9a77bSJohn Marinoto RE complexity, except for bounded repetitions. 745*6af9a77bSJohn MarinoSee 746*6af9a77bSJohn Marino.Sx BUGS 747*6af9a77bSJohn Marinofor one short RE using them 748*6af9a77bSJohn Marinothat will run almost any system out of memory. 749*6af9a77bSJohn Marino.Pp 750*6af9a77bSJohn MarinoA backslashed character other than one specifically given a magic meaning 751*6af9a77bSJohn Marinoby 752*6af9a77bSJohn Marino.St -p1003.2 753*6af9a77bSJohn Marino(such magic meanings occur only in obsolete 754*6af9a77bSJohn Marino.Bq Dq basic 755*6af9a77bSJohn MarinoREs) 756*6af9a77bSJohn Marinois taken as an ordinary character. 757*6af9a77bSJohn Marino.Pp 758*6af9a77bSJohn MarinoAny unmatched 759*6af9a77bSJohn Marino.Ql [\& 760*6af9a77bSJohn Marinois a 761*6af9a77bSJohn Marino.Dv REG_EBRACK 762*6af9a77bSJohn Marinoerror. 763*6af9a77bSJohn Marino.Pp 764*6af9a77bSJohn MarinoEquivalence classes cannot begin or end bracket-expression ranges. 765*6af9a77bSJohn MarinoThe endpoint of one range cannot begin another. 766*6af9a77bSJohn Marino.Pp 767*6af9a77bSJohn Marino.Dv RE_DUP_MAX , 768*6af9a77bSJohn Marinothe limit on repetition counts in bounded repetitions, is 255. 769*6af9a77bSJohn Marino.Pp 770*6af9a77bSJohn MarinoA repetition operator 771*6af9a77bSJohn Marino.Ql ( ?\& , 772*6af9a77bSJohn Marino.Ql *\& , 773*6af9a77bSJohn Marino.Ql +\& , 774*6af9a77bSJohn Marinoor bounds) 775*6af9a77bSJohn Marinocannot follow another 776*6af9a77bSJohn Marinorepetition operator, except for the use of 777*6af9a77bSJohn Marino.Ql ?\& 778*6af9a77bSJohn Marinofor minimal repetition (for enhanced extended REs; see 779*6af9a77bSJohn Marino.Xr re_format 7 780*6af9a77bSJohn Marinofor details). 781*6af9a77bSJohn MarinoA repetition operator cannot begin an expression or subexpression 782*6af9a77bSJohn Marinoor follow 783*6af9a77bSJohn Marino.Ql ^\& 784*6af9a77bSJohn Marinoor 785*6af9a77bSJohn Marino.Ql |\& . 786*6af9a77bSJohn Marino.Pp 787*6af9a77bSJohn Marino.Ql |\& 788*6af9a77bSJohn Marinocannot appear first or last in a (sub)expression or after another 789*6af9a77bSJohn Marino.Ql |\& , 790*6af9a77bSJohn Marinoi.e., an operand of 791*6af9a77bSJohn Marino.Ql |\& 792*6af9a77bSJohn Marinocannot be an empty subexpression. 793*6af9a77bSJohn MarinoAn empty parenthesized subexpression, 794*6af9a77bSJohn Marino.Ql "()" , 795*6af9a77bSJohn Marinois legal and matches an 796*6af9a77bSJohn Marinoempty (sub)string. 797*6af9a77bSJohn MarinoAn empty string is not a legal RE. 798*6af9a77bSJohn Marino.Pp 799*6af9a77bSJohn MarinoA 800*6af9a77bSJohn Marino.Ql {\& 801*6af9a77bSJohn Marinofollowed by a digit is considered the beginning of bounds for a 802*6af9a77bSJohn Marinobounded repetition, which must then follow the syntax for bounds. 803*6af9a77bSJohn MarinoA 804*6af9a77bSJohn Marino.Ql {\& 805*6af9a77bSJohn Marino.Em not 806*6af9a77bSJohn Marinofollowed by a digit is considered an ordinary character. 807*6af9a77bSJohn Marino.Pp 808*6af9a77bSJohn Marino.Ql ^\& 809*6af9a77bSJohn Marinoand 810*6af9a77bSJohn Marino.Ql $\& 811*6af9a77bSJohn Marinobeginning and ending subexpressions in obsolete 812*6af9a77bSJohn Marino.Pq Dq basic 813*6af9a77bSJohn MarinoREs are anchors, not ordinary characters. 814*6af9a77bSJohn Marino.Sh DIAGNOSTICS 815*6af9a77bSJohn MarinoNon-zero error codes from 816*6af9a77bSJohn Marino.Fn regcomp 817*6af9a77bSJohn Marinoand 818*6af9a77bSJohn Marino.Fn regexec 819*6af9a77bSJohn Marinoinclude the following: 820*6af9a77bSJohn Marino.Pp 821*6af9a77bSJohn Marino.Bl -tag -width REG_ECOLLATE -compact 822*6af9a77bSJohn Marino.It Dv REG_NOMATCH 823*6af9a77bSJohn MarinoThe 824*6af9a77bSJohn Marino.Fn regexec 825*6af9a77bSJohn Marinofunction 826*6af9a77bSJohn Marinofailed to match 827*6af9a77bSJohn Marino.It Dv REG_BADPAT 828*6af9a77bSJohn Marinoinvalid regular expression 829*6af9a77bSJohn Marino.It Dv REG_ECOLLATE 830*6af9a77bSJohn Marinoinvalid collating element 831*6af9a77bSJohn Marino.It Dv REG_ECTYPE 832*6af9a77bSJohn Marinoinvalid character class 833*6af9a77bSJohn Marino.It Dv REG_EESCAPE 834*6af9a77bSJohn Marino.Ql \e 835*6af9a77bSJohn Marinoapplied to unescapable character 836*6af9a77bSJohn Marino.It Dv REG_ESUBREG 837*6af9a77bSJohn Marinoinvalid backreference number 838*6af9a77bSJohn Marino.It Dv REG_EBRACK 839*6af9a77bSJohn Marinobrackets 840*6af9a77bSJohn Marino.Ql "[ ]" 841*6af9a77bSJohn Marinonot balanced 842*6af9a77bSJohn Marino.It Dv REG_EPAREN 843*6af9a77bSJohn Marinoparentheses 844*6af9a77bSJohn Marino.Ql "( )" 845*6af9a77bSJohn Marinonot balanced 846*6af9a77bSJohn Marino.It Dv REG_EBRACE 847*6af9a77bSJohn Marinobraces 848*6af9a77bSJohn Marino.Ql "{ }" 849*6af9a77bSJohn Marinonot balanced 850*6af9a77bSJohn Marino.It Dv REG_BADBR 851*6af9a77bSJohn Marinoinvalid repetition count(s) in 852*6af9a77bSJohn Marino.Ql "{ }" 853*6af9a77bSJohn Marino.It Dv REG_ERANGE 854*6af9a77bSJohn Marinoinvalid character range in 855*6af9a77bSJohn Marino.Ql "[ ]" 856*6af9a77bSJohn Marino.It Dv REG_ESPACE 857*6af9a77bSJohn Marinoran out of memory 858*6af9a77bSJohn Marino.It Dv REG_BADRPT 859*6af9a77bSJohn Marino.Ql ?\& , 860*6af9a77bSJohn Marino.Ql *\& , 861*6af9a77bSJohn Marinoor 862*6af9a77bSJohn Marino.Ql +\& 863*6af9a77bSJohn Marinooperand invalid 864*6af9a77bSJohn Marino.It Dv REG_EMPTY 865*6af9a77bSJohn Marinoempty (sub)expression 866*6af9a77bSJohn Marino.It Dv REG_ASSERT 867*6af9a77bSJohn Marinocannot happen - you found a bug 868*6af9a77bSJohn Marino.It Dv REG_INVARG 869*6af9a77bSJohn Marinoinvalid argument, e.g.\& negative-length string 870*6af9a77bSJohn Marino.It Dv REG_ILLSEQ 871*6af9a77bSJohn Marinoillegal byte sequence (bad multibyte character) 872*6af9a77bSJohn Marino.El 873*6af9a77bSJohn Marino.Sh SEE ALSO 874*6af9a77bSJohn Marino.Xr grep 1 , 875*6af9a77bSJohn Marino.Xr re_format 7 876*6af9a77bSJohn Marino.Pp 877*6af9a77bSJohn Marino.St -p1003.2 , 878*6af9a77bSJohn Marinosections 2.8 (Regular Expression Notation) 879*6af9a77bSJohn Marinoand 880*6af9a77bSJohn MarinoB.5 (C Binding for Regular Expression Matching). 881*6af9a77bSJohn Marino.Sh HISTORY 882*6af9a77bSJohn MarinoThe 883*6af9a77bSJohn Marino.Nm regex 884*6af9a77bSJohn Marinoimplementation is based on a heavily modified subset of TRE 885*6af9a77bSJohn Marino(http://laurikari.net/tre/), originally written by Ville Laurikari. 886*6af9a77bSJohn MarinoPrevious releases used an implementation originally written by 887*6af9a77bSJohn Marino.An Henry Spencer , 888*6af9a77bSJohn Marinoand altered for inclusion in the 889*6af9a77bSJohn Marino.Bx 4.4 890*6af9a77bSJohn Marinodistribution. 891*6af9a77bSJohn Marino.Sh BUGS 892*6af9a77bSJohn MarinoThe beginning-of-line and end-of-line anchors ( 893*6af9a77bSJohn Marino.Dq ^\& 894*6af9a77bSJohn Marinoand 895*6af9a77bSJohn Marino.Dq $\& ) 896*6af9a77bSJohn Marinoare currently implemented so that repetitions can not be applied to them. 897*6af9a77bSJohn MarinoThe standards are unclear about whether this is legal, but other 898*6af9a77bSJohn Marino.Nm regex 899*6af9a77bSJohn Marinopackages do support this case. 900*6af9a77bSJohn MarinoIt is best to avoid this non-portable (and not really very useful) case. 901*6af9a77bSJohn Marino.Pp 902*6af9a77bSJohn MarinoThe back-reference code is subtle and doubts linger about its correctness 903*6af9a77bSJohn Marinoin complex cases. 904*6af9a77bSJohn Marino.Pp 905*6af9a77bSJohn MarinoThe 906*6af9a77bSJohn Marino.Fn regexec 907*6af9a77bSJohn Marinovariants use one of two internal matching engines. 908*6af9a77bSJohn MarinoThe normal one is linear worst-case time in the length of the text being 909*6af9a77bSJohn Marinosearched, and quadratic worst-case time in the length of the used regular 910*6af9a77bSJohn Marinoexpression. 911*6af9a77bSJohn MarinoWhen back-references are used, a slower, backtracking engine is used. 912*6af9a77bSJohn MarinoWhile all backtracking matching engines suffer from extreme slowness for certain 913*6af9a77bSJohn Marinopathological cases, the normal engines doesn't suffer from these cases. 914*6af9a77bSJohn MarinoIt is advised to avoid back-references whenever possible. 915*6af9a77bSJohn Marino.Pp 916*6af9a77bSJohn MarinoThe 917*6af9a77bSJohn Marino.Fn regcomp 918*6af9a77bSJohn Marinovariants 919*6af9a77bSJohn Marinoimplements bounded repetitions by macro expansion, 920*6af9a77bSJohn Marinowhich is costly in time and space if counts are large 921*6af9a77bSJohn Marinoor bounded repetitions are nested. 922*6af9a77bSJohn MarinoAn RE like, say, 923*6af9a77bSJohn Marino.Ql "((((a{1,100}){1,100}){1,100}){1,100}){1,100}" 924*6af9a77bSJohn Marinowill (eventually) run almost any existing machine out of swap space. 925*6af9a77bSJohn Marino.Pp 926*6af9a77bSJohn MarinoDue to a mistake in 927*6af9a77bSJohn Marino.St -p1003.2 , 928*6af9a77bSJohn Marinothings like 929*6af9a77bSJohn Marino.Ql "a)b" 930*6af9a77bSJohn Marinoare legal REs because 931*6af9a77bSJohn Marino.Ql )\& 932*6af9a77bSJohn Marinois 933*6af9a77bSJohn Marinoa special character only in the presence of a previous unmatched 934*6af9a77bSJohn Marino.Ql (\& . 935*6af9a77bSJohn MarinoThis cannot be fixed until the spec is fixed. 936*6af9a77bSJohn Marino.Pp 937*6af9a77bSJohn MarinoThe standard's definition of back references is vague. 938*6af9a77bSJohn MarinoFor example, does 939*6af9a77bSJohn Marino.Ql "a\e(\e(b\e)*\e2\e)*d" 940*6af9a77bSJohn Marinomatch 941*6af9a77bSJohn Marino.Ql "abbbd" ? 942*6af9a77bSJohn MarinoUntil the standard is clarified, 943*6af9a77bSJohn Marinobehavior in such cases should not be relied on. 944