1.\" Copyright (c) 1992, 1993, 1994 Henry Spencer. 2.\" Copyright (c) 1992, 1993, 1994 3.\" The Regents of the University of California. All rights reserved. 4.\" 5.\" This code is derived from software contributed to Berkeley by 6.\" Henry Spencer. 7.\" 8.\" Redistribution and use in source and binary forms, with or without 9.\" modification, are permitted provided that the following conditions 10.\" are met: 11.\" 1. Redistributions of source code must retain the above copyright 12.\" notice, this list of conditions and the following disclaimer. 13.\" 2. Redistributions in binary form must reproduce the above copyright 14.\" notice, this list of conditions and the following disclaimer in the 15.\" documentation and/or other materials provided with the distribution. 16.\" 3. Neither the name of the University nor the names of its contributors 17.\" may be used to endorse or promote products derived from this software 18.\" without specific prior written permission. 19.\" 20.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30.\" SUCH DAMAGE. 31.\" 32.\" @(#)regex.3 8.4 (Berkeley) 3/20/94 33.\" $FreeBSD: src/lib/libc/regex/regex.3,v 1.21 2007/01/09 00:28:04 imp Exp $ 34.\" 35.Dd August 6, 2015 36.Dt REGEX 3 37.Os 38.Sh NAME 39.Nm regcomp , 40.Nm regcomp_l , 41.Nm regerror , 42.Nm regexec , 43.Nm regfree , 44.Nm regncomp , 45.Nm regncomp_l , 46.Nm regnexec , 47.Nm regnwcomp , 48.Nm regnwcomp_l , 49.Nm regnwexec , 50.Nm regwcomp , 51.Nm regwcomp_l , 52.Nm regwexec 53.Nd regular-expression library 54.Sh SYNOPSIS 55.Sy (Standards-compliant APIs) 56.Pp 57.In regex.h 58.Ft int 59.Fo regcomp 60.Fa "regex_t *restrict preg" 61.Fa "const char *restrict pattern" 62.Fa "int cflags" 63.Fc 64.Ft size_t 65.Fo regerror 66.Fa "int errcode" 67.Fa "const regex_t *restrict preg" 68.Fa "char *restrict errbuf" 69.Fa "size_t errbuf_size" 70.Fc 71.Ft int 72.Fo regexec 73.Fa "const regex_t *restrict preg" 74.Fa "const char *restrict string" 75.Fa "size_t nmatch" 76.Fa "regmatch_t pmatch[restrict]" 77.Fa "int eflags" 78.Fc 79.Ft void 80.Fo regfree 81.Fa "regex_t *preg" 82.Fc 83.Pp 84.Sy (Non-portable extensions) 85.Ft int 86.Fo regncomp 87.Fa "regex_t *restrict preg" 88.Fa "const char *restrict pattern" 89.Fa "size_t len" 90.Fa "int cflags" 91.Fc 92.Ft int 93.Fo regnexec 94.Fa "const regex_t *restrict preg" 95.Fa "const char *restrict string" 96.Fa "size_t len" 97.Fa "size_t nmatch" 98.Fa "regmatch_t pmatch[restrict]" 99.Fa "int eflags" 100.Fc 101.Ft int 102.Fo regwcomp 103.Fa "regex_t *restrict preg" 104.Fa "const wchar_t *restrict widepat" 105.Fa "int cflags" 106.Fc 107.Ft int 108.Fo regwexec 109.Fa "const regex_t *restrict preg" 110.Fa "const wchar_t *restrict widestr" 111.Fa "size_t nmatch" 112.Fa "regmatch_t pmatch[restrict]" 113.Fa "int eflags" 114.Fc 115.Ft int 116.Fo regwncomp 117.Fa "regex_t *restrict preg" 118.Fa "const wchar_t *restrict widepat" 119.Fa "size_t len" 120.Fa "int cflags" 121.Fc 122.Ft int 123.Fo regwnexec 124.Fa "const regex_t *restrict preg" 125.Fa "const wchar_t *restrict widestr" 126.Fa "size_t len" 127.Fa "size_t nmatch" 128.Fa "regmatch_t pmatch[restrict]" 129.Fa "int eflags" 130.Fc 131.In regex.h 132.In xlocale.h 133.Ft int 134.Fo regcomp_l 135.Fa "regex_t *restrict preg" 136.Fa "const char *restrict pattern" 137.Fa "int cflags" 138.Fa "locale_t restrict" 139.Fc 140.Ft int 141.Fo regncomp_l 142.Fa "regex_t *restrict preg" 143.Fa "const char *restrict pattern" 144.Fa "size_t len" 145.Fa "int cflags" 146.Fa "locale_t restrict" 147.Fc 148.Ft int 149.Fo regwcomp_l 150.Fa "regex_t *restrict preg" 151.Fa "const wchar_t *restrict widepat" 152.Fa "int cflags" 153.Fa "locale_t restrict" 154.Fc 155.Ft int 156.Fo regwncomp_l 157.Fa "regex_t *restrict preg" 158.Fa "const wchar_t *restrict widepat" 159.Fa "size_t len" 160.Fa "int cflags" 161.Fa "locale_t restrict" 162.Fc 163.Sh DESCRIPTION 164These routines implement 165.St -p1003.2 166regular expressions 167.Pq Do RE Dc Ns s ; 168see 169.Xr re_format 7 . 170The 171.Fn regcomp 172function 173compiles an RE, written as a string, into an internal form. 174.Fn regexec 175matches that internal form against a string and reports results. 176.Fn regerror 177transforms error codes from either into human-readable messages. 178.Fn regfree 179frees any dynamically-allocated storage used by the internal form 180of an RE. 181.Pp 182The header 183.In regex.h 184declares two structure types, 185.Ft regex_t 186and 187.Ft regmatch_t , 188the former for compiled internal forms and the latter for match reporting. 189It also declares the four functions, 190a type 191.Ft regoff_t , 192and a number of constants with names starting with 193.Dq Dv REG_ . 194.Pp 195The 196.Fn regcomp 197function 198compiles the regular expression contained in the 199.Fa pattern 200string, 201subject to the flags in 202.Fa cflags , 203and places the results in the 204.Ft regex_t 205structure pointed to by 206.Fa preg . 207The 208.Fa cflags 209argument 210is the bitwise OR of zero or more of the following flags: 211.Bl -tag -width REG_EXTENDED 212.It Dv REG_EXTENDED 213Compile modern 214.Pq Dq extended 215REs, 216rather than the obsolete 217.Pq Dq basic 218REs that 219are the default. 220.It Dv REG_BASIC 221This is a synonym for 0, 222provided as a counterpart to 223.Dv REG_EXTENDED 224to improve readability. 225.It Dv REG_NOSPEC 226Compile with recognition of all special characters turned off. 227All characters are thus considered ordinary, 228so the 229.Dq RE 230is a literal string. 231This is an extension, 232compatible with but not specified by 233.St -p1003.2 , 234and should be used with 235caution in software intended to be portable to other systems. 236.Dv REG_EXTENDED 237and 238.Dv REG_NOSPEC 239may not be used 240in the same call to 241.Fn regcomp . 242.It Dv REG_LITERAL 243An alias of 244.Dv REG_NOSPEC . 245.It Dv REG_ICASE 246Compile for matching that ignores upper/lower case distinctions. 247See 248.Xr re_format 7 . 249.It Dv REG_NOSUB 250Compile for matching that need only report success or failure, 251not what was matched. 252.It Dv REG_NEWLINE 253Compile for newline-sensitive matching. 254By default, newline is a completely ordinary character with no special 255meaning in either REs or strings. 256With this flag, 257.Ql [^ 258bracket expressions and 259.Ql .\& 260never match newline, 261a 262.Ql ^\& 263anchor matches the null string after any newline in the string 264in addition to its normal function, 265and the 266.Ql $\& 267anchor matches the null string before any newline in the 268string in addition to its normal function. 269.It Dv REG_PEND 270(Note that 271.Dv REG_PEND 272is not recognized by any of the wide character or 273.Dq Nm n 274variants. 275Besides, the 276.Dq Nm n 277variants can be used instead of 278.Dv REG_PEND ; 279see EXTENDED APIS below.) 280The regular expression ends, 281not at the first NUL, 282but just before the character pointed to by the 283.Va re_endp 284member of the structure pointed to by 285.Fa preg . 286The 287.Va re_endp 288member is of type 289.Ft "const char *" . 290This flag permits inclusion of NULs in the RE; 291they are considered ordinary characters. 292This is an extension, 293compatible with but not specified by 294.St -p1003.2 , 295and should be used with 296caution in software intended to be portable to other systems. 297.It Dv REG_ENHANCED 298Recognized enhanced regular expression features; see 299.Xr re_format 7 300for details. 301This is an extension not specified by 302.St -p1003.2 , 303and should be used with 304caution in software intended to be portable to other systems. 305.It Dv REG_MINIMAL 306Use minimal (non-greedy) repetitions instead of the normal greedy ones; see 307.Xr re_format 7 308for details. 309(This only applies when both 310.Dv REG_ENHANCED 311and 312.Dv REG_EXTENDED 313are also set.) 314This is an extension not specified by 315.St -p1003.2 , 316and should be used with 317caution in software intended to be portable to other systems. 318.It Dv REG_UNGREEDY 319Alias of 320.Dv REG_MINIMAL . 321.El 322.Pp 323When successful, 324.Fn regcomp 325returns 0 and fills in the structure pointed to by 326.Fa preg . 327One member of that structure 328(other than 329.Va re_endp ) 330is publicized: 331.Va re_nsub , 332of type 333.Ft size_t , 334contains the number of parenthesized subexpressions within the RE 335(except that the value of this member is undefined if the 336.Dv REG_NOSUB 337flag was used). 338If 339.Fn regcomp 340fails, it returns a non-zero error code; 341see 342.Sx DIAGNOSTICS . 343.Pp 344The 345.Fn regexec 346function 347matches the compiled RE pointed to by 348.Fa preg 349against the 350.Fa string , 351subject to the flags in 352.Fa eflags , 353and reports results using 354.Fa nmatch , 355.Fa pmatch , 356and the returned value. 357The RE must have been compiled by a previous invocation of 358.Fn regcomp . 359The compiled form is not altered during execution of 360.Fn regexec , 361so a single compiled RE can be used simultaneously by multiple threads. 362.Pp 363By default, 364the NUL-terminated string pointed to by 365.Fa string 366is considered to be the text of an entire line, minus any terminating 367newline. 368The 369.Fa eflags 370argument is the bitwise OR of zero or more of the following flags: 371.Bl -tag -width REG_STARTEND 372.It Dv REG_NOTBOL 373The first character of 374the string 375is not the beginning of a line, so the 376.Ql ^\& 377anchor should not match before it. 378This does not affect the behavior of newlines under 379.Dv REG_NEWLINE . 380.It Dv REG_NOTEOL 381The NUL terminating 382the string 383does not end a line, so the 384.Ql $\& 385anchor should not match before it. 386This does not affect the behavior of newlines under 387.Dv REG_NEWLINE . 388.It Dv REG_STARTEND 389The string is considered to start at 390.Fa string 391+ 392.Fa pmatch Ns [0]. Ns Va rm_so 393and to have a terminating NUL located at 394.Fa string 395+ 396.Fa pmatch Ns [0]. Ns Va rm_eo 397(there need not actually be a NUL at that location), 398regardless of the value of 399.Fa nmatch . 400See below for the definition of 401.Fa pmatch 402and 403.Fa nmatch . 404This is an extension, 405compatible with but not specified by 406.St -p1003.2 , 407and should be used with 408caution in software intended to be portable to other systems. 409Note that a non-zero 410.Va rm_so 411does not imply 412.Dv REG_NOTBOL ; 413.Dv REG_STARTEND 414affects only the location of the string, 415not how it is matched. 416.El 417.Pp 418See 419.Xr re_format 7 420for a discussion of what is matched in situations where an RE or a 421portion thereof could match any of several substrings of 422.Fa string . 423.Pp 424Normally, 425.Fn regexec 426returns 0 for success and the non-zero code 427.Dv REG_NOMATCH 428for failure. 429Other non-zero error codes may be returned in exceptional situations; 430see 431.Sx DIAGNOSTICS . 432.Pp 433If 434.Dv REG_NOSUB 435was specified in the compilation of the RE, 436or if 437.Fa nmatch 438is 0, 439.Fn regexec 440ignores the 441.Fa pmatch 442argument (but see below for the case where 443.Dv REG_STARTEND 444is specified). 445Otherwise, 446.Fa pmatch 447points to an array of 448.Fa nmatch 449structures of type 450.Ft regmatch_t . 451Such a structure has at least the members 452.Va rm_so 453and 454.Va rm_eo , 455both of type 456.Ft regoff_t 457(a signed arithmetic type at least as large as an 458.Ft off_t 459and a 460.Ft ssize_t ) , 461containing respectively the offset of the first character of a substring 462and the offset of the first character after the end of the substring. 463Offsets are measured from the beginning of the 464.Fa string 465argument given to 466.Fn regexec . 467An empty substring is denoted by equal offsets, 468both indicating the character following the empty substring. 469.Pp 470The 0th member of the 471.Fa pmatch 472array is filled in to indicate what substring of 473.Fa string 474was matched by the entire RE. 475Remaining members report what substring was matched by parenthesized 476subexpressions within the RE; 477member 478.Va i 479reports subexpression 480.Va i , 481with subexpressions counted (starting at 1) by the order of their opening 482parentheses in the RE, left to right. 483Unused entries in the array (corresponding either to subexpressions that 484did not participate in the match at all, or to subexpressions that do not 485exist in the RE (that is, 486.Va i 487> 488.Fa preg Ns -> Ns Va re_nsub ) ) 489have both 490.Va rm_so 491and 492.Va rm_eo 493set to -1. 494If a subexpression participated in the match several times, 495the reported substring is the last one it matched. 496(Note, as an example in particular, that when the RE 497.Ql "(b*)+" 498matches 499.Ql bbb , 500the parenthesized subexpression matches each of the three 501.So Li b Sc Ns s 502and then 503an infinite number of empty strings following the last 504.Ql b , 505so the reported substring is one of the empties.) 506.Pp 507If 508.Dv REG_STARTEND 509is specified, 510.Fa pmatch 511must point to at least one 512.Ft regmatch_t 513(even if 514.Fa nmatch 515is 0 or 516.Dv REG_NOSUB 517was specified), 518to hold the input offsets for 519.Dv REG_STARTEND . 520Use for output is still entirely controlled by 521.Fa nmatch ; 522if 523.Fa nmatch 524is 0 or 525.Dv REG_NOSUB 526was specified, 527the value of 528.Fa pmatch Ns [0] 529will not be changed by a successful 530.Fn regexec . 531.Pp 532The 533.Fn regerror 534function 535maps a non-zero 536.Fa errcode 537from either 538.Fn regcomp 539or 540.Fn regexec 541to a human-readable, printable message. 542If 543.Fa preg 544is 545.No non\- Ns Dv NULL , 546the error code should have arisen from use of 547the 548.Ft regex_t 549pointed to by 550.Fa preg , 551and if the error code came from 552.Fn regcomp , 553it should have been the result from the most recent 554.Fn regcomp 555using that 556.Ft regex_t . 557The 558.Fn ( regerror 559may be able to supply a more detailed message using information 560from the 561.Ft regex_t . ) 562The 563.Fn regerror 564function 565places the NUL-terminated message into the buffer pointed to by 566.Fa errbuf , 567limiting the length (including the NUL) to at most 568.Fa errbuf_size 569bytes. 570If the whole message will not fit, 571as much of it as will fit before the terminating NUL is supplied. 572In any case, 573the returned value is the size of buffer needed to hold the whole 574message (including terminating NUL). 575If 576.Fa errbuf_size 577is 0, 578.Fa errbuf 579is ignored but the return value is still correct. 580.Pp 581If the 582.Fa errcode 583given to 584.Fn regerror 585is first ORed with 586.Dv REG_ITOA , 587the 588.Dq message 589that results is the printable name of the error code, 590e.g.\& 591.Dq Dv REG_NOMATCH , 592rather than an explanation thereof. 593If 594.Fa errcode 595is 596.Dv REG_ATOI , 597then 598.Fa preg 599shall be 600.No non\- Ns Dv NULL 601and the 602.Va re_endp 603member of the structure it points to 604must point to the printable name of an error code; 605in this case, the result in 606.Fa errbuf 607is the decimal digits of 608the numeric value of the error code 609(0 if the name is not recognized). 610.Dv REG_ITOA 611and 612.Dv REG_ATOI 613are intended primarily as debugging facilities; 614they are extensions, 615compatible with but not specified by 616.St -p1003.2 , 617and should be used with 618caution in software intended to be portable to other systems. 619Be warned also that they are considered experimental and changes are possible. 620.Pp 621The 622.Fn regfree 623function 624frees any dynamically-allocated storage associated with the compiled RE 625pointed to by 626.Fa preg . 627The remaining 628.Ft regex_t 629is no longer a valid compiled RE 630and the effect of supplying it to 631.Fn regexec 632or 633.Fn regerror 634is undefined. 635.Pp 636None of these functions references global variables except for tables 637of constants; 638all are safe for use from multiple threads if the arguments are safe. 639.Sh EXTENDED APIS 640These extended APIs are available in Mac OS X 10.8 and beyond, when the 641deployment target is 10.8 or later. 642It should also be noted that any of the 643.Fn regcomp 644variants may be used to initialize a 645.Ft regex_t 646structure, that can then be passed to any of the 647.Fn regexec 648variants. 649So it is quite legal to compile a wide character RE and use it to match a 650multibyte character string, or vice versa. 651.Pp 652The 653.Fn regncomp 654routine compiles regular expressions like 655.Fn regcomp , 656but the length of the regular expression string is specified, allowing a string 657that is not NUL terminated and/or contains NUL characters. 658This is a modern replacement for using 659.Fn regcomp 660with the 661.Dv REG_PEND 662option. 663.Pp 664Similarly, the 665.Fn regnexec 666routine is like 667.Fn regexec , 668but the length of the string to match is specified, allowing a string 669that is not NUL terminated and/or contains NUL characters. 670.Pp 671The 672.Fn regwcomp 673and 674.Fn regwexec 675variants take a wide-character 676.Vt ( wchar_t ) 677string for the regular expression and string to match. 678And 679.Fn regwncomp 680and 681.Fn regwnexec 682are variants that allow specifying the wide character string length, and 683so allows wide character strings that are not NUL terminated and/or 684contains NUL characters. 685.Sh INTERACTION WITH THE LOCALE 686When 687.Fn regcomp 688or one of its variants is run, the regular expression is compiled into an 689internal form, which may include specific information about the locale currently 690in effect, such as equivalence classes or multi-character collation symbols. 691So a reference to the current locale is also stored with the internal form, 692so that when 693.Fn regexec 694is run, it can use the same locale (even if the locale is changed in-between 695the calls to 696.Fn regcomp 697and 698.Fn regexec ) . 699.Pp 700To provide more direct control over which locale is used, 701routines with 702.Dq Nm _l 703appended to their names are provided that work just like the variants 704without the 705.Dq Nm _l , 706except that a locale (via a 707.Vt locale_t 708variable type) is specified directly. 709Note that only variants of 710.Fn regcomp 711have 712.Dq Nm _l 713variants, since the 714.Fn regexec 715variants just use the reference to the locale stored in the internal form. 716.Sh IMPLEMENTATION CHOICES 717The 718.Nm regex 719implementation in Mac OS X 10.8 and later is based on a heavily modified subset 720of TRE (http://laurikari.net/tre/). 721This provides improved performance, better conformance and additional features. 722However, both API and binary compatibility have been maintained with previous 723releases, so binaries 724built on previous releases should work on 10.8 and later, and binaries built on 72510.8 and later should be able to run on previous releases (as long as none of 726the new variants or new features are used. 727.Pp 728There are a number of decisions that 729.St -p1003.2 730leaves up to the implementor, 731either by explicitly saying 732.Dq undefined 733or by virtue of them being 734forbidden by the RE grammar. 735This implementation treats them as follows. 736.Pp 737See 738.Xr re_format 7 739for a discussion of the definition of case-independent matching. 740.Pp 741There is no particular limit on the length of REs, 742except insofar as memory is limited. 743Memory usage is approximately linear in RE size, and largely insensitive 744to RE complexity, except for bounded repetitions. 745See 746.Sx BUGS 747for one short RE using them 748that will run almost any system out of memory. 749.Pp 750A backslashed character other than one specifically given a magic meaning 751by 752.St -p1003.2 753(such magic meanings occur only in obsolete 754.Bq Dq basic 755REs) 756is taken as an ordinary character. 757.Pp 758Any unmatched 759.Ql [\& 760is a 761.Dv REG_EBRACK 762error. 763.Pp 764Equivalence classes cannot begin or end bracket-expression ranges. 765The endpoint of one range cannot begin another. 766.Pp 767.Dv RE_DUP_MAX , 768the limit on repetition counts in bounded repetitions, is 255. 769.Pp 770A repetition operator 771.Ql ( ?\& , 772.Ql *\& , 773.Ql +\& , 774or bounds) 775cannot follow another 776repetition operator, except for the use of 777.Ql ?\& 778for minimal repetition (for enhanced extended REs; see 779.Xr re_format 7 780for details). 781A repetition operator cannot begin an expression or subexpression 782or follow 783.Ql ^\& 784or 785.Ql |\& . 786.Pp 787.Ql |\& 788cannot appear first or last in a (sub)expression or after another 789.Ql |\& , 790i.e., an operand of 791.Ql |\& 792cannot be an empty subexpression. 793An empty parenthesized subexpression, 794.Ql "()" , 795is legal and matches an 796empty (sub)string. 797An empty string is not a legal RE. 798.Pp 799A 800.Ql {\& 801followed by a digit is considered the beginning of bounds for a 802bounded repetition, which must then follow the syntax for bounds. 803A 804.Ql {\& 805.Em not 806followed by a digit is considered an ordinary character. 807.Pp 808.Ql ^\& 809and 810.Ql $\& 811beginning and ending subexpressions in obsolete 812.Pq Dq basic 813REs are anchors, not ordinary characters. 814.Sh DIAGNOSTICS 815Non-zero error codes from 816.Fn regcomp 817and 818.Fn regexec 819include the following: 820.Pp 821.Bl -tag -width REG_ECOLLATE -compact 822.It Dv REG_NOMATCH 823The 824.Fn regexec 825function 826failed to match 827.It Dv REG_BADPAT 828invalid regular expression 829.It Dv REG_ECOLLATE 830invalid collating element 831.It Dv REG_ECTYPE 832invalid character class 833.It Dv REG_EESCAPE 834.Ql \e 835applied to unescapable character 836.It Dv REG_ESUBREG 837invalid backreference number 838.It Dv REG_EBRACK 839brackets 840.Ql "[ ]" 841not balanced 842.It Dv REG_EPAREN 843parentheses 844.Ql "( )" 845not balanced 846.It Dv REG_EBRACE 847braces 848.Ql "{ }" 849not balanced 850.It Dv REG_BADBR 851invalid repetition count(s) in 852.Ql "{ }" 853.It Dv REG_ERANGE 854invalid character range in 855.Ql "[ ]" 856.It Dv REG_ESPACE 857ran out of memory 858.It Dv REG_BADRPT 859.Ql ?\& , 860.Ql *\& , 861or 862.Ql +\& 863operand invalid 864.It Dv REG_EMPTY 865empty (sub)expression 866.It Dv REG_ASSERT 867cannot happen - you found a bug 868.It Dv REG_INVARG 869invalid argument, e.g.\& negative-length string 870.It Dv REG_ILLSEQ 871illegal byte sequence (bad multibyte character) 872.El 873.Sh SEE ALSO 874.Xr grep 1 , 875.Xr re_format 7 876.Pp 877.St -p1003.2 , 878sections 2.8 (Regular Expression Notation) 879and 880B.5 (C Binding for Regular Expression Matching). 881.Sh HISTORY 882The 883.Nm regex 884implementation is based on a heavily modified subset of TRE 885(http://laurikari.net/tre/), originally written by Ville Laurikari. 886Previous releases used an implementation originally written by 887.An Henry Spencer , 888and altered for inclusion in the 889.Bx 4.4 890distribution. 891.Sh BUGS 892The beginning-of-line and end-of-line anchors ( 893.Dq ^\& 894and 895.Dq $\& ) 896are currently implemented so that repetitions can not be applied to them. 897The standards are unclear about whether this is legal, but other 898.Nm regex 899packages do support this case. 900It is best to avoid this non-portable (and not really very useful) case. 901.Pp 902The back-reference code is subtle and doubts linger about its correctness 903in complex cases. 904.Pp 905The 906.Fn regexec 907variants use one of two internal matching engines. 908The normal one is linear worst-case time in the length of the text being 909searched, and quadratic worst-case time in the length of the used regular 910expression. 911When back-references are used, a slower, backtracking engine is used. 912While all backtracking matching engines suffer from extreme slowness for certain 913pathological cases, the normal engines doesn't suffer from these cases. 914It is advised to avoid back-references whenever possible. 915.Pp 916The 917.Fn regcomp 918variants 919implements bounded repetitions by macro expansion, 920which is costly in time and space if counts are large 921or bounded repetitions are nested. 922An RE like, say, 923.Ql "((((a{1,100}){1,100}){1,100}){1,100}){1,100}" 924will (eventually) run almost any existing machine out of swap space. 925.Pp 926Due to a mistake in 927.St -p1003.2 , 928things like 929.Ql "a)b" 930are legal REs because 931.Ql )\& 932is 933a special character only in the presence of a previous unmatched 934.Ql (\& . 935This cannot be fixed until the spec is fixed. 936.Pp 937The standard's definition of back references is vague. 938For example, does 939.Ql "a\e(\e(b\e)*\e2\e)*d" 940match 941.Ql "abbbd" ? 942Until the standard is clarified, 943behavior in such cases should not be relied on. 944