1*0Sstevel@tonic-gate /* 2*0Sstevel@tonic-gate * 3*0Sstevel@tonic-gate * Portions Copyright %G% Sun Microsystems, Inc. All Rights Reserved 4*0Sstevel@tonic-gate * 5*0Sstevel@tonic-gate */ 6*0Sstevel@tonic-gate 7*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 8*0Sstevel@tonic-gate 9*0Sstevel@tonic-gate #include "portable.h" 10*0Sstevel@tonic-gate #include "stdio.h" 11*0Sstevel@tonic-gate #include "stdlib.h" 12*0Sstevel@tonic-gate 13*0Sstevel@tonic-gate #if defined( MACOS ) || defined( DOS ) || defined( _WIN32 ) || defined( NEED_BSDREGEX ) 14*0Sstevel@tonic-gate #include "regex.h" 15*0Sstevel@tonic-gate 16*0Sstevel@tonic-gate /* 17*0Sstevel@tonic-gate * regex - Regular expression pattern matching and replacement 18*0Sstevel@tonic-gate * 19*0Sstevel@tonic-gate * By: Ozan S. Yigit (oz) 20*0Sstevel@tonic-gate * Dept. of Computer Science 21*0Sstevel@tonic-gate * York University 22*0Sstevel@tonic-gate * 23*0Sstevel@tonic-gate * These routines are the PUBLIC DOMAIN equivalents of regex 24*0Sstevel@tonic-gate * routines as found in 4.nBSD UN*X, with minor extensions. 25*0Sstevel@tonic-gate * 26*0Sstevel@tonic-gate * These routines are derived from various implementations found 27*0Sstevel@tonic-gate * in software tools books, and Conroy's grep. They are NOT derived 28*0Sstevel@tonic-gate * from licensed/restricted software. 29*0Sstevel@tonic-gate * For more interesting/academic/complicated implementations, 30*0Sstevel@tonic-gate * see Henry Spencer's regexp routines, or GNU Emacs pattern 31*0Sstevel@tonic-gate * matching module. 32*0Sstevel@tonic-gate * 33*0Sstevel@tonic-gate * Modification history: 34*0Sstevel@tonic-gate * 35*0Sstevel@tonic-gate * $Log: regex.c,v $ 36*0Sstevel@tonic-gate * Revision 1.12 1996/04/25 16:20:59 mcs 37*0Sstevel@tonic-gate * make re_exec() match "" with ".*" and similar patterns 38*0Sstevel@tonic-gate * hopefully this change doesn't break anything else! 39*0Sstevel@tonic-gate * 40*0Sstevel@tonic-gate * Revision 1.11 1994/12/14 21:33:45 mcs 41*0Sstevel@tonic-gate * use new NEED_BSDREGEX 42*0Sstevel@tonic-gate * fix pmatch() prototype 43*0Sstevel@tonic-gate * 44*0Sstevel@tonic-gate * Revision 1.10 1994/12/12 18:16:39 mcs 45*0Sstevel@tonic-gate * use on NetBSD 46*0Sstevel@tonic-gate * 47*0Sstevel@tonic-gate * Revision 1.9 1994/11/15 19:16:35 mcs 48*0Sstevel@tonic-gate * add (CHAR) cast to make VisualC++ happy 49*0Sstevel@tonic-gate * 50*0Sstevel@tonic-gate * Revision 1.8 1994/11/08 21:14:32 mcs 51*0Sstevel@tonic-gate * WIN32 changes 52*0Sstevel@tonic-gate * 53*0Sstevel@tonic-gate * Revision 1.7 1994/07/23 19:51:24 mcs 54*0Sstevel@tonic-gate * use ANSI-style inline function parameters 55*0Sstevel@tonic-gate * 56*0Sstevel@tonic-gate * Revision 1.6 1993/10/18 01:52:32 tim 57*0Sstevel@tonic-gate * include for VMS 58*0Sstevel@tonic-gate * 59*0Sstevel@tonic-gate * Revision 1.5 1993/09/28 21:37:54 mcs 60*0Sstevel@tonic-gate * HP/UX needs the regex we include (not in its libc) 61*0Sstevel@tonic-gate * 62*0Sstevel@tonic-gate * Revision 1.4 1993/08/27 15:59:52 mcs 63*0Sstevel@tonic-gate * use CHAR for deftab 64*0Sstevel@tonic-gate * 65*0Sstevel@tonic-gate * Revision 1.3 1993/08/27 15:49:47 mcs 66*0Sstevel@tonic-gate * added missing 0 to octal constants 67*0Sstevel@tonic-gate * use unsigned char for CHAR under DOS 68*0Sstevel@tonic-gate * 69*0Sstevel@tonic-gate * Revision 1.2 1993/08/27 14:57:48 mcs 70*0Sstevel@tonic-gate * add proto. for pmatch 71*0Sstevel@tonic-gate * 72*0Sstevel@tonic-gate * Revision 1.1 1993/08/18 21:20:02 mcs 73*0Sstevel@tonic-gate * Initial revision 74*0Sstevel@tonic-gate * 75*0Sstevel@tonic-gate * Revision 1.4 1991/10/17 03:56:42 oz 76*0Sstevel@tonic-gate * miscellaneous changes, small cleanups etc. 77*0Sstevel@tonic-gate * 78*0Sstevel@tonic-gate * Revision 1.3 1989/04/01 14:18:09 oz 79*0Sstevel@tonic-gate * Change all references to a dfa: this is actually an nfa. 80*0Sstevel@tonic-gate * 81*0Sstevel@tonic-gate * Revision 1.2 88/08/28 15:36:04 oz 82*0Sstevel@tonic-gate * Use a complement bitmap to represent NCL. 83*0Sstevel@tonic-gate * This removes the need to have seperate 84*0Sstevel@tonic-gate * code in the pmatch case block - it is 85*0Sstevel@tonic-gate * just CCL code now. 86*0Sstevel@tonic-gate * 87*0Sstevel@tonic-gate * Use the actual CCL code in the CLO 88*0Sstevel@tonic-gate * section of pmatch. No need for a recursive 89*0Sstevel@tonic-gate * pmatch call. 90*0Sstevel@tonic-gate * 91*0Sstevel@tonic-gate * Use a bitmap table to set char bits in an 92*0Sstevel@tonic-gate * 8-bit chunk. 93*0Sstevel@tonic-gate * 94*0Sstevel@tonic-gate * Interfaces: 95*0Sstevel@tonic-gate * re_comp: compile a regular expression into a NFA. 96*0Sstevel@tonic-gate * 97*0Sstevel@tonic-gate * char *re_comp(s) 98*0Sstevel@tonic-gate * char *s; 99*0Sstevel@tonic-gate * 100*0Sstevel@tonic-gate * re_exec: execute the NFA to match a pattern. 101*0Sstevel@tonic-gate * 102*0Sstevel@tonic-gate * int re_exec(s) 103*0Sstevel@tonic-gate * char *s; 104*0Sstevel@tonic-gate * 105*0Sstevel@tonic-gate * re_modw change re_exec's understanding of what a "word" 106*0Sstevel@tonic-gate * looks like (for \< and \>) by adding into the 107*0Sstevel@tonic-gate * hidden word-syntax table. 108*0Sstevel@tonic-gate * 109*0Sstevel@tonic-gate * void re_modw(s) 110*0Sstevel@tonic-gate * char *s; 111*0Sstevel@tonic-gate * 112*0Sstevel@tonic-gate * re_subs: substitute the matched portions in a new string. 113*0Sstevel@tonic-gate * 114*0Sstevel@tonic-gate * int re_subs(src, dst) 115*0Sstevel@tonic-gate * char *src; 116*0Sstevel@tonic-gate * char *dst; 117*0Sstevel@tonic-gate * 118*0Sstevel@tonic-gate * re_fail: failure routine for re_exec. 119*0Sstevel@tonic-gate * 120*0Sstevel@tonic-gate * void re_fail(msg, op) 121*0Sstevel@tonic-gate * char *msg; 122*0Sstevel@tonic-gate * char op; 123*0Sstevel@tonic-gate * 124*0Sstevel@tonic-gate * Regular Expressions: 125*0Sstevel@tonic-gate * 126*0Sstevel@tonic-gate * [1] char matches itself, unless it is a special 127*0Sstevel@tonic-gate * character (metachar): . \ [ ] * + ^ $ 128*0Sstevel@tonic-gate * 129*0Sstevel@tonic-gate * [2] . matches any character. 130*0Sstevel@tonic-gate * 131*0Sstevel@tonic-gate * [3] \ matches the character following it, except 132*0Sstevel@tonic-gate * when followed by a left or right round bracket, 133*0Sstevel@tonic-gate * a digit 1 to 9 or a left or right angle bracket. 134*0Sstevel@tonic-gate * (see [7], [8] and [9]) 135*0Sstevel@tonic-gate * It is used as an escape character for all 136*0Sstevel@tonic-gate * other meta-characters, and itself. When used 137*0Sstevel@tonic-gate * in a set ([4]), it is treated as an ordinary 138*0Sstevel@tonic-gate * character. 139*0Sstevel@tonic-gate * 140*0Sstevel@tonic-gate * [4] [set] matches one of the characters in the set. 141*0Sstevel@tonic-gate * If the first character in the set is "^", 142*0Sstevel@tonic-gate * it matches a character NOT in the set, i.e. 143*0Sstevel@tonic-gate * complements the set. A shorthand S-E is 144*0Sstevel@tonic-gate * used to specify a set of characters S upto 145*0Sstevel@tonic-gate * E, inclusive. The special characters "]" and 146*0Sstevel@tonic-gate * "-" have no special meaning if they appear 147*0Sstevel@tonic-gate * as the first chars in the set. 148*0Sstevel@tonic-gate * examples: match: 149*0Sstevel@tonic-gate * 150*0Sstevel@tonic-gate * [a-z] any lowercase alpha 151*0Sstevel@tonic-gate * 152*0Sstevel@tonic-gate * [^]-] any char except ] and - 153*0Sstevel@tonic-gate * 154*0Sstevel@tonic-gate * [^A-Z] any char except uppercase 155*0Sstevel@tonic-gate * alpha 156*0Sstevel@tonic-gate * 157*0Sstevel@tonic-gate * [a-zA-Z] any alpha 158*0Sstevel@tonic-gate * 159*0Sstevel@tonic-gate * [5] * any regular expression form [1] to [4], followed by 160*0Sstevel@tonic-gate * closure char (*) matches zero or more matches of 161*0Sstevel@tonic-gate * that form. 162*0Sstevel@tonic-gate * 163*0Sstevel@tonic-gate * [6] + same as [5], except it matches one or more. 164*0Sstevel@tonic-gate * 165*0Sstevel@tonic-gate * [7] a regular expression in the form [1] to [10], enclosed 166*0Sstevel@tonic-gate * as \(form\) matches what form matches. The enclosure 167*0Sstevel@tonic-gate * creates a set of tags, used for [8] and for 168*0Sstevel@tonic-gate * pattern substution. The tagged forms are numbered 169*0Sstevel@tonic-gate * starting from 1. 170*0Sstevel@tonic-gate * 171*0Sstevel@tonic-gate * [8] a \ followed by a digit 1 to 9 matches whatever a 172*0Sstevel@tonic-gate * previously tagged regular expression ([7]) matched. 173*0Sstevel@tonic-gate * 174*0Sstevel@tonic-gate * [9] \< a regular expression starting with a \< construct 175*0Sstevel@tonic-gate * \> and/or ending with a \> construct, restricts the 176*0Sstevel@tonic-gate * pattern matching to the beginning of a word, and/or 177*0Sstevel@tonic-gate * the end of a word. A word is defined to be a character 178*0Sstevel@tonic-gate * string beginning and/or ending with the characters 179*0Sstevel@tonic-gate * A-Z a-z 0-9 and _. It must also be preceded and/or 180*0Sstevel@tonic-gate * followed by any character outside those mentioned. 181*0Sstevel@tonic-gate * 182*0Sstevel@tonic-gate * [10] a composite regular expression xy where x and y 183*0Sstevel@tonic-gate * are in the form [1] to [10] matches the longest 184*0Sstevel@tonic-gate * match of x followed by a match for y. 185*0Sstevel@tonic-gate * 186*0Sstevel@tonic-gate * [11] ^ a regular expression starting with a ^ character 187*0Sstevel@tonic-gate * $ and/or ending with a $ character, restricts the 188*0Sstevel@tonic-gate * pattern matching to the beginning of the line, 189*0Sstevel@tonic-gate * or the end of line. [anchors] Elsewhere in the 190*0Sstevel@tonic-gate * pattern, ^ and $ are treated as ordinary characters. 191*0Sstevel@tonic-gate * 192*0Sstevel@tonic-gate * 193*0Sstevel@tonic-gate * Acknowledgements: 194*0Sstevel@tonic-gate * 195*0Sstevel@tonic-gate * HCR's Hugh Redelmeier has been most helpful in various 196*0Sstevel@tonic-gate * stages of development. He convinced me to include BOW 197*0Sstevel@tonic-gate * and EOW constructs, originally invented by Rob Pike at 198*0Sstevel@tonic-gate * the University of Toronto. 199*0Sstevel@tonic-gate * 200*0Sstevel@tonic-gate * References: 201*0Sstevel@tonic-gate * Software tools Kernighan & Plauger 202*0Sstevel@tonic-gate * Software tools in Pascal Kernighan & Plauger 203*0Sstevel@tonic-gate * Grep [rsx-11 C dist] David Conroy 204*0Sstevel@tonic-gate * ed - text editor Un*x Programmer's Manual 205*0Sstevel@tonic-gate * Advanced editing on Un*x B. W. Kernighan 206*0Sstevel@tonic-gate * RegExp routines Henry Spencer 207*0Sstevel@tonic-gate * 208*0Sstevel@tonic-gate * Notes: 209*0Sstevel@tonic-gate * 210*0Sstevel@tonic-gate * This implementation uses a bit-set representation for character 211*0Sstevel@tonic-gate * classes for speed and compactness. Each character is represented 212*0Sstevel@tonic-gate * by one bit in a 128-bit block. Thus, CCL always takes a 213*0Sstevel@tonic-gate * constant 16 bytes in the internal nfa, and re_exec does a single 214*0Sstevel@tonic-gate * bit comparison to locate the character in the set. 215*0Sstevel@tonic-gate * 216*0Sstevel@tonic-gate * Examples: 217*0Sstevel@tonic-gate * 218*0Sstevel@tonic-gate * pattern: foo*.* 219*0Sstevel@tonic-gate * compile: CHR f CHR o CLO CHR o END CLO ANY END END 220*0Sstevel@tonic-gate * matches: fo foo fooo foobar fobar foxx ... 221*0Sstevel@tonic-gate * 222*0Sstevel@tonic-gate * pattern: fo[ob]a[rz] 223*0Sstevel@tonic-gate * compile: CHR f CHR o CCL bitset CHR a CCL bitset END 224*0Sstevel@tonic-gate * matches: fobar fooar fobaz fooaz 225*0Sstevel@tonic-gate * 226*0Sstevel@tonic-gate * pattern: foo\\+ 227*0Sstevel@tonic-gate * compile: CHR f CHR o CHR o CHR \ CLO CHR \ END END 228*0Sstevel@tonic-gate * matches: foo\ foo\\ foo\\\ ... 229*0Sstevel@tonic-gate * 230*0Sstevel@tonic-gate * pattern: \(foo\)[1-3]\1 (same as foo[1-3]foo) 231*0Sstevel@tonic-gate * compile: BOT 1 CHR f CHR o CHR o EOT 1 CCL bitset REF 1 END 232*0Sstevel@tonic-gate * matches: foo1foo foo2foo foo3foo 233*0Sstevel@tonic-gate * 234*0Sstevel@tonic-gate * pattern: \(fo.*\)-\1 235*0Sstevel@tonic-gate * compile: BOT 1 CHR f CHR o CLO ANY END EOT 1 CHR - REF 1 END 236*0Sstevel@tonic-gate * matches: foo-foo fo-fo fob-fob foobar-foobar ... 237*0Sstevel@tonic-gate */ 238*0Sstevel@tonic-gate 239*0Sstevel@tonic-gate #define MAXNFA 1024 240*0Sstevel@tonic-gate #define MAXTAG 10 241*0Sstevel@tonic-gate 242*0Sstevel@tonic-gate #define OKP 1 243*0Sstevel@tonic-gate #define NOP 0 244*0Sstevel@tonic-gate 245*0Sstevel@tonic-gate #define CHR 1 246*0Sstevel@tonic-gate #define ANY 2 247*0Sstevel@tonic-gate #define CCL 3 248*0Sstevel@tonic-gate #define BOL 4 249*0Sstevel@tonic-gate #define EOL 5 250*0Sstevel@tonic-gate #define BOT 6 251*0Sstevel@tonic-gate #define EOT 7 252*0Sstevel@tonic-gate #define BOW 8 253*0Sstevel@tonic-gate #define EOW 9 254*0Sstevel@tonic-gate #define REF 10 255*0Sstevel@tonic-gate #define CLO 11 256*0Sstevel@tonic-gate 257*0Sstevel@tonic-gate #define END 0 258*0Sstevel@tonic-gate 259*0Sstevel@tonic-gate /* 260*0Sstevel@tonic-gate * The following defines are not meant to be changeable. 261*0Sstevel@tonic-gate * They are for readability only. 262*0Sstevel@tonic-gate */ 263*0Sstevel@tonic-gate #define MAXCHR 128 264*0Sstevel@tonic-gate #define CHRBIT 8 265*0Sstevel@tonic-gate #define BITBLK MAXCHR/CHRBIT 266*0Sstevel@tonic-gate #define BLKIND 0170 267*0Sstevel@tonic-gate #define BITIND 07 268*0Sstevel@tonic-gate 269*0Sstevel@tonic-gate #define ASCIIB 0177 270*0Sstevel@tonic-gate 271*0Sstevel@tonic-gate #if defined( DOS ) || defined( _WIN32 ) || defined(SUN) 272*0Sstevel@tonic-gate typedef unsigned char CHAR; 273*0Sstevel@tonic-gate #else /* DOS */ 274*0Sstevel@tonic-gate typedef /*unsigned*/ char CHAR; 275*0Sstevel@tonic-gate #endif /* DOS */ 276*0Sstevel@tonic-gate 277*0Sstevel@tonic-gate static int tagstk[MAXTAG]; /* subpat tag stack..*/ 278*0Sstevel@tonic-gate static CHAR nfa[MAXNFA]; /* automaton.. */ 279*0Sstevel@tonic-gate static int sta = NOP; /* status of lastpat */ 280*0Sstevel@tonic-gate 281*0Sstevel@tonic-gate static CHAR bittab[BITBLK]; /* bit table for CCL */ 282*0Sstevel@tonic-gate /* pre-set bits... */ 283*0Sstevel@tonic-gate static CHAR bitarr[] = {1,2,4,8,16,32,64,128}; 284*0Sstevel@tonic-gate 285*0Sstevel@tonic-gate #ifdef DEBUG 286*0Sstevel@tonic-gate static void nfadump(CHAR *); 287*0Sstevel@tonic-gate void symbolic(char *); 288*0Sstevel@tonic-gate #endif 289*0Sstevel@tonic-gate 290*0Sstevel@tonic-gate static void 291*0Sstevel@tonic-gate chset(CHAR c) 292*0Sstevel@tonic-gate { 293*0Sstevel@tonic-gate bittab[((c) & BLKIND) >> 3] |= bitarr[(c) & BITIND]; 294*0Sstevel@tonic-gate } 295*0Sstevel@tonic-gate 296*0Sstevel@tonic-gate #define badpat(x) (*nfa = END, x) 297*0Sstevel@tonic-gate #define store(x) *mp++ = x 298*0Sstevel@tonic-gate 299*0Sstevel@tonic-gate char * 300*0Sstevel@tonic-gate re_comp( char *pat ) 301*0Sstevel@tonic-gate { 302*0Sstevel@tonic-gate register char *p; /* pattern pointer */ 303*0Sstevel@tonic-gate register CHAR *mp=nfa; /* nfa pointer */ 304*0Sstevel@tonic-gate register CHAR *lp; /* saved pointer.. */ 305*0Sstevel@tonic-gate register CHAR *sp=nfa; /* another one.. */ 306*0Sstevel@tonic-gate 307*0Sstevel@tonic-gate register int tagi = 0; /* tag stack index */ 308*0Sstevel@tonic-gate register int tagc = 1; /* actual tag count */ 309*0Sstevel@tonic-gate 310*0Sstevel@tonic-gate register int n; 311*0Sstevel@tonic-gate register CHAR mask; /* xor mask -CCL/NCL */ 312*0Sstevel@tonic-gate int c1, c2; 313*0Sstevel@tonic-gate 314*0Sstevel@tonic-gate if (!pat || !*pat) 315*0Sstevel@tonic-gate if (sta) 316*0Sstevel@tonic-gate return 0; 317*0Sstevel@tonic-gate else 318*0Sstevel@tonic-gate return badpat("No previous regular expression"); 319*0Sstevel@tonic-gate sta = NOP; 320*0Sstevel@tonic-gate 321*0Sstevel@tonic-gate for (p = pat; *p; p++) { 322*0Sstevel@tonic-gate lp = mp; 323*0Sstevel@tonic-gate switch(*p) { 324*0Sstevel@tonic-gate 325*0Sstevel@tonic-gate case '.': /* match any char.. */ 326*0Sstevel@tonic-gate store(ANY); 327*0Sstevel@tonic-gate break; 328*0Sstevel@tonic-gate 329*0Sstevel@tonic-gate case '^': /* match beginning.. */ 330*0Sstevel@tonic-gate if (p == pat) 331*0Sstevel@tonic-gate store(BOL); 332*0Sstevel@tonic-gate else { 333*0Sstevel@tonic-gate store(CHR); 334*0Sstevel@tonic-gate store(*p); 335*0Sstevel@tonic-gate } 336*0Sstevel@tonic-gate break; 337*0Sstevel@tonic-gate 338*0Sstevel@tonic-gate case '$': /* match endofline.. */ 339*0Sstevel@tonic-gate if (!*(p+1)) 340*0Sstevel@tonic-gate store(EOL); 341*0Sstevel@tonic-gate else { 342*0Sstevel@tonic-gate store(CHR); 343*0Sstevel@tonic-gate store(*p); 344*0Sstevel@tonic-gate } 345*0Sstevel@tonic-gate break; 346*0Sstevel@tonic-gate 347*0Sstevel@tonic-gate case '[': /* match char class..*/ 348*0Sstevel@tonic-gate store(CCL); 349*0Sstevel@tonic-gate 350*0Sstevel@tonic-gate if (*++p == '^') { 351*0Sstevel@tonic-gate mask = 0377; 352*0Sstevel@tonic-gate p++; 353*0Sstevel@tonic-gate } 354*0Sstevel@tonic-gate else 355*0Sstevel@tonic-gate mask = 0; 356*0Sstevel@tonic-gate 357*0Sstevel@tonic-gate if (*p == '-') /* real dash */ 358*0Sstevel@tonic-gate chset(*p++); 359*0Sstevel@tonic-gate if (*p == ']') /* real brac */ 360*0Sstevel@tonic-gate chset(*p++); 361*0Sstevel@tonic-gate while (*p && *p != ']') { 362*0Sstevel@tonic-gate if (*p == '-' && *(p+1) && *(p+1) != ']') { 363*0Sstevel@tonic-gate p++; 364*0Sstevel@tonic-gate c1 = *(p-2) + 1; 365*0Sstevel@tonic-gate c2 = *p++; 366*0Sstevel@tonic-gate while (c1 <= c2) 367*0Sstevel@tonic-gate chset((CHAR)c1++); 368*0Sstevel@tonic-gate } 369*0Sstevel@tonic-gate #ifdef EXTEND 370*0Sstevel@tonic-gate else if (*p == '\\' && *(p+1)) { 371*0Sstevel@tonic-gate p++; 372*0Sstevel@tonic-gate chset(*p++); 373*0Sstevel@tonic-gate } 374*0Sstevel@tonic-gate #endif 375*0Sstevel@tonic-gate else 376*0Sstevel@tonic-gate chset(*p++); 377*0Sstevel@tonic-gate } 378*0Sstevel@tonic-gate if (!*p) 379*0Sstevel@tonic-gate return badpat("Missing ]"); 380*0Sstevel@tonic-gate 381*0Sstevel@tonic-gate for (n = 0; n < BITBLK; bittab[n++] = (char) 0) 382*0Sstevel@tonic-gate store(mask ^ bittab[n]); 383*0Sstevel@tonic-gate 384*0Sstevel@tonic-gate break; 385*0Sstevel@tonic-gate 386*0Sstevel@tonic-gate case '*': /* match 0 or more.. */ 387*0Sstevel@tonic-gate case '+': /* match 1 or more.. */ 388*0Sstevel@tonic-gate if (p == pat) 389*0Sstevel@tonic-gate return badpat("Empty closure"); 390*0Sstevel@tonic-gate lp = sp; /* previous opcode */ 391*0Sstevel@tonic-gate if (*lp == CLO) /* equivalence.. */ 392*0Sstevel@tonic-gate break; 393*0Sstevel@tonic-gate switch(*lp) { 394*0Sstevel@tonic-gate 395*0Sstevel@tonic-gate case BOL: 396*0Sstevel@tonic-gate case BOT: 397*0Sstevel@tonic-gate case EOT: 398*0Sstevel@tonic-gate case BOW: 399*0Sstevel@tonic-gate case EOW: 400*0Sstevel@tonic-gate case REF: 401*0Sstevel@tonic-gate return badpat("Illegal closure"); 402*0Sstevel@tonic-gate default: 403*0Sstevel@tonic-gate break; 404*0Sstevel@tonic-gate } 405*0Sstevel@tonic-gate 406*0Sstevel@tonic-gate if (*p == '+') 407*0Sstevel@tonic-gate for (sp = mp; lp < sp; lp++) 408*0Sstevel@tonic-gate store(*lp); 409*0Sstevel@tonic-gate 410*0Sstevel@tonic-gate store(END); 411*0Sstevel@tonic-gate store(END); 412*0Sstevel@tonic-gate sp = mp; 413*0Sstevel@tonic-gate while (--mp > lp) 414*0Sstevel@tonic-gate *mp = mp[-1]; 415*0Sstevel@tonic-gate store(CLO); 416*0Sstevel@tonic-gate mp = sp; 417*0Sstevel@tonic-gate break; 418*0Sstevel@tonic-gate 419*0Sstevel@tonic-gate case '\\': /* tags, backrefs .. */ 420*0Sstevel@tonic-gate switch(*++p) { 421*0Sstevel@tonic-gate 422*0Sstevel@tonic-gate case '(': 423*0Sstevel@tonic-gate if (tagc < MAXTAG) { 424*0Sstevel@tonic-gate tagstk[++tagi] = tagc; 425*0Sstevel@tonic-gate store(BOT); 426*0Sstevel@tonic-gate store(tagc++); 427*0Sstevel@tonic-gate } 428*0Sstevel@tonic-gate else 429*0Sstevel@tonic-gate return badpat("Too many \\(\\) pairs"); 430*0Sstevel@tonic-gate break; 431*0Sstevel@tonic-gate case ')': 432*0Sstevel@tonic-gate if (*sp == BOT) 433*0Sstevel@tonic-gate return badpat("Null pattern inside \\(\\)"); 434*0Sstevel@tonic-gate if (tagi > 0) { 435*0Sstevel@tonic-gate store(EOT); 436*0Sstevel@tonic-gate store(tagstk[tagi--]); 437*0Sstevel@tonic-gate } 438*0Sstevel@tonic-gate else 439*0Sstevel@tonic-gate return badpat("Unmatched \\)"); 440*0Sstevel@tonic-gate break; 441*0Sstevel@tonic-gate case '<': 442*0Sstevel@tonic-gate store(BOW); 443*0Sstevel@tonic-gate break; 444*0Sstevel@tonic-gate case '>': 445*0Sstevel@tonic-gate if (*sp == BOW) 446*0Sstevel@tonic-gate return badpat("Null pattern inside \\<\\>"); 447*0Sstevel@tonic-gate store(EOW); 448*0Sstevel@tonic-gate break; 449*0Sstevel@tonic-gate case '1': 450*0Sstevel@tonic-gate case '2': 451*0Sstevel@tonic-gate case '3': 452*0Sstevel@tonic-gate case '4': 453*0Sstevel@tonic-gate case '5': 454*0Sstevel@tonic-gate case '6': 455*0Sstevel@tonic-gate case '7': 456*0Sstevel@tonic-gate case '8': 457*0Sstevel@tonic-gate case '9': 458*0Sstevel@tonic-gate n = *p-'0'; 459*0Sstevel@tonic-gate if (tagi > 0 && tagstk[tagi] == n) 460*0Sstevel@tonic-gate return badpat("Cyclical reference"); 461*0Sstevel@tonic-gate if (tagc > n) { 462*0Sstevel@tonic-gate store(REF); 463*0Sstevel@tonic-gate store(n); 464*0Sstevel@tonic-gate } 465*0Sstevel@tonic-gate else 466*0Sstevel@tonic-gate return badpat("Undetermined reference"); 467*0Sstevel@tonic-gate break; 468*0Sstevel@tonic-gate #ifdef EXTEND 469*0Sstevel@tonic-gate case 'b': 470*0Sstevel@tonic-gate store(CHR); 471*0Sstevel@tonic-gate store('\b'); 472*0Sstevel@tonic-gate break; 473*0Sstevel@tonic-gate case 'n': 474*0Sstevel@tonic-gate store(CHR); 475*0Sstevel@tonic-gate store('\n'); 476*0Sstevel@tonic-gate break; 477*0Sstevel@tonic-gate case 'f': 478*0Sstevel@tonic-gate store(CHR); 479*0Sstevel@tonic-gate store('\f'); 480*0Sstevel@tonic-gate break; 481*0Sstevel@tonic-gate case 'r': 482*0Sstevel@tonic-gate store(CHR); 483*0Sstevel@tonic-gate store('\r'); 484*0Sstevel@tonic-gate break; 485*0Sstevel@tonic-gate case 't': 486*0Sstevel@tonic-gate store(CHR); 487*0Sstevel@tonic-gate store('\t'); 488*0Sstevel@tonic-gate break; 489*0Sstevel@tonic-gate #endif 490*0Sstevel@tonic-gate default: 491*0Sstevel@tonic-gate store(CHR); 492*0Sstevel@tonic-gate store(*p); 493*0Sstevel@tonic-gate } 494*0Sstevel@tonic-gate break; 495*0Sstevel@tonic-gate 496*0Sstevel@tonic-gate default : /* an ordinary char */ 497*0Sstevel@tonic-gate store(CHR); 498*0Sstevel@tonic-gate store(*p); 499*0Sstevel@tonic-gate break; 500*0Sstevel@tonic-gate } 501*0Sstevel@tonic-gate sp = lp; 502*0Sstevel@tonic-gate } 503*0Sstevel@tonic-gate if (tagi > 0) 504*0Sstevel@tonic-gate return badpat("Unmatched \\("); 505*0Sstevel@tonic-gate store(END); 506*0Sstevel@tonic-gate sta = OKP; 507*0Sstevel@tonic-gate return 0; 508*0Sstevel@tonic-gate } 509*0Sstevel@tonic-gate 510*0Sstevel@tonic-gate 511*0Sstevel@tonic-gate static char *bol; 512*0Sstevel@tonic-gate char *bopat[MAXTAG]; 513*0Sstevel@tonic-gate char *eopat[MAXTAG]; 514*0Sstevel@tonic-gate #ifdef NEEDPROTOS 515*0Sstevel@tonic-gate static char *pmatch( char *lp, CHAR *ap ); 516*0Sstevel@tonic-gate #else /* NEEDPROTOS */ 517*0Sstevel@tonic-gate static char *pmatch(); 518*0Sstevel@tonic-gate #endif /* NEEDPROTOS */ 519*0Sstevel@tonic-gate 520*0Sstevel@tonic-gate /* 521*0Sstevel@tonic-gate * re_exec: 522*0Sstevel@tonic-gate * execute nfa to find a match. 523*0Sstevel@tonic-gate * 524*0Sstevel@tonic-gate * special cases: (nfa[0]) 525*0Sstevel@tonic-gate * BOL 526*0Sstevel@tonic-gate * Match only once, starting from the 527*0Sstevel@tonic-gate * beginning. 528*0Sstevel@tonic-gate * CHR 529*0Sstevel@tonic-gate * First locate the character without 530*0Sstevel@tonic-gate * calling pmatch, and if found, call 531*0Sstevel@tonic-gate * pmatch for the remaining string. 532*0Sstevel@tonic-gate * END 533*0Sstevel@tonic-gate * re_comp failed, poor luser did not 534*0Sstevel@tonic-gate * check for it. Fail fast. 535*0Sstevel@tonic-gate * 536*0Sstevel@tonic-gate * If a match is found, bopat[0] and eopat[0] are set 537*0Sstevel@tonic-gate * to the beginning and the end of the matched fragment, 538*0Sstevel@tonic-gate * respectively. 539*0Sstevel@tonic-gate * 540*0Sstevel@tonic-gate */ 541*0Sstevel@tonic-gate 542*0Sstevel@tonic-gate int 543*0Sstevel@tonic-gate re_exec( char *lp ) 544*0Sstevel@tonic-gate { 545*0Sstevel@tonic-gate register char c; 546*0Sstevel@tonic-gate register char *ep = 0; 547*0Sstevel@tonic-gate register CHAR *ap = nfa; 548*0Sstevel@tonic-gate 549*0Sstevel@tonic-gate bol = lp; 550*0Sstevel@tonic-gate 551*0Sstevel@tonic-gate bopat[0] = 0; 552*0Sstevel@tonic-gate bopat[1] = 0; 553*0Sstevel@tonic-gate bopat[2] = 0; 554*0Sstevel@tonic-gate bopat[3] = 0; 555*0Sstevel@tonic-gate bopat[4] = 0; 556*0Sstevel@tonic-gate bopat[5] = 0; 557*0Sstevel@tonic-gate bopat[6] = 0; 558*0Sstevel@tonic-gate bopat[7] = 0; 559*0Sstevel@tonic-gate bopat[8] = 0; 560*0Sstevel@tonic-gate bopat[9] = 0; 561*0Sstevel@tonic-gate 562*0Sstevel@tonic-gate switch(*ap) { 563*0Sstevel@tonic-gate 564*0Sstevel@tonic-gate case BOL: /* anchored: match from BOL only */ 565*0Sstevel@tonic-gate ep = pmatch(lp,ap); 566*0Sstevel@tonic-gate break; 567*0Sstevel@tonic-gate case CHR: /* ordinary char: locate it fast */ 568*0Sstevel@tonic-gate c = *(ap+1); 569*0Sstevel@tonic-gate while (*lp && *lp != c) 570*0Sstevel@tonic-gate lp++; 571*0Sstevel@tonic-gate if (!*lp) /* if EOS, fail, else fall thru. */ 572*0Sstevel@tonic-gate return 0; 573*0Sstevel@tonic-gate default: /* regular matching all the way. */ 574*0Sstevel@tonic-gate do { 575*0Sstevel@tonic-gate if ((ep = pmatch(lp,ap))) 576*0Sstevel@tonic-gate break; 577*0Sstevel@tonic-gate lp++; 578*0Sstevel@tonic-gate } while (*lp); 579*0Sstevel@tonic-gate 580*0Sstevel@tonic-gate break; 581*0Sstevel@tonic-gate case END: /* munged automaton. fail always */ 582*0Sstevel@tonic-gate return 0; 583*0Sstevel@tonic-gate } 584*0Sstevel@tonic-gate if (!ep) 585*0Sstevel@tonic-gate return 0; 586*0Sstevel@tonic-gate 587*0Sstevel@tonic-gate bopat[0] = lp; 588*0Sstevel@tonic-gate eopat[0] = ep; 589*0Sstevel@tonic-gate return 1; 590*0Sstevel@tonic-gate } 591*0Sstevel@tonic-gate 592*0Sstevel@tonic-gate /* 593*0Sstevel@tonic-gate * pmatch: internal routine for the hard part 594*0Sstevel@tonic-gate * 595*0Sstevel@tonic-gate * This code is partly snarfed from an early grep written by 596*0Sstevel@tonic-gate * David Conroy. The backref and tag stuff, and various other 597*0Sstevel@tonic-gate * innovations are by oz. 598*0Sstevel@tonic-gate * 599*0Sstevel@tonic-gate * special case optimizations: (nfa[n], nfa[n+1]) 600*0Sstevel@tonic-gate * CLO ANY 601*0Sstevel@tonic-gate * We KNOW .* will match everything upto the 602*0Sstevel@tonic-gate * end of line. Thus, directly go to the end of 603*0Sstevel@tonic-gate * line, without recursive pmatch calls. As in 604*0Sstevel@tonic-gate * the other closure cases, the remaining pattern 605*0Sstevel@tonic-gate * must be matched by moving backwards on the 606*0Sstevel@tonic-gate * string recursively, to find a match for xy 607*0Sstevel@tonic-gate * (x is ".*" and y is the remaining pattern) 608*0Sstevel@tonic-gate * where the match satisfies the LONGEST match for 609*0Sstevel@tonic-gate * x followed by a match for y. 610*0Sstevel@tonic-gate * CLO CHR 611*0Sstevel@tonic-gate * We can again scan the string forward for the 612*0Sstevel@tonic-gate * single char and at the point of failure, we 613*0Sstevel@tonic-gate * execute the remaining nfa recursively, same as 614*0Sstevel@tonic-gate * above. 615*0Sstevel@tonic-gate * 616*0Sstevel@tonic-gate * At the end of a successful match, bopat[n] and eopat[n] 617*0Sstevel@tonic-gate * are set to the beginning and end of subpatterns matched 618*0Sstevel@tonic-gate * by tagged expressions (n = 1 to 9). 619*0Sstevel@tonic-gate * 620*0Sstevel@tonic-gate */ 621*0Sstevel@tonic-gate 622*0Sstevel@tonic-gate #ifndef re_fail 623*0Sstevel@tonic-gate extern void re_fail(); 624*0Sstevel@tonic-gate #endif /* re_fail */ 625*0Sstevel@tonic-gate 626*0Sstevel@tonic-gate /* 627*0Sstevel@tonic-gate * character classification table for word boundary operators BOW 628*0Sstevel@tonic-gate * and EOW. the reason for not using ctype macros is that we can 629*0Sstevel@tonic-gate * let the user add into our own table. see re_modw. This table 630*0Sstevel@tonic-gate * is not in the bitset form, since we may wish to extend it in the 631*0Sstevel@tonic-gate * future for other character classifications. 632*0Sstevel@tonic-gate * 633*0Sstevel@tonic-gate * TRUE for 0-9 A-Z a-z _ 634*0Sstevel@tonic-gate */ 635*0Sstevel@tonic-gate static char chrtyp[MAXCHR] = { 636*0Sstevel@tonic-gate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 637*0Sstevel@tonic-gate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 638*0Sstevel@tonic-gate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 639*0Sstevel@tonic-gate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 640*0Sstevel@tonic-gate 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 641*0Sstevel@tonic-gate 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 642*0Sstevel@tonic-gate 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 643*0Sstevel@tonic-gate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 644*0Sstevel@tonic-gate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 645*0Sstevel@tonic-gate 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 646*0Sstevel@tonic-gate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 647*0Sstevel@tonic-gate 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 648*0Sstevel@tonic-gate 1, 1, 1, 0, 0, 0, 0, 0 649*0Sstevel@tonic-gate }; 650*0Sstevel@tonic-gate 651*0Sstevel@tonic-gate #define inascii(x) (0177&(x)) 652*0Sstevel@tonic-gate #define iswordc(x) chrtyp[inascii(x)] 653*0Sstevel@tonic-gate #define isinset(x,y) ((x)[((y)&BLKIND)>>3] & bitarr[(y)&BITIND]) 654*0Sstevel@tonic-gate 655*0Sstevel@tonic-gate /* 656*0Sstevel@tonic-gate * skip values for CLO XXX to skip past the closure 657*0Sstevel@tonic-gate */ 658*0Sstevel@tonic-gate 659*0Sstevel@tonic-gate #define ANYSKIP 2 /* [CLO] ANY END ... */ 660*0Sstevel@tonic-gate #define CHRSKIP 3 /* [CLO] CHR chr END ... */ 661*0Sstevel@tonic-gate #define CCLSKIP 18 /* [CLO] CCL 16bytes END ... */ 662*0Sstevel@tonic-gate 663*0Sstevel@tonic-gate static char * 664*0Sstevel@tonic-gate pmatch( char *lp, CHAR *ap) 665*0Sstevel@tonic-gate { 666*0Sstevel@tonic-gate register int op, c, n; 667*0Sstevel@tonic-gate register char *e; /* extra pointer for CLO */ 668*0Sstevel@tonic-gate register char *bp; /* beginning of subpat.. */ 669*0Sstevel@tonic-gate register char *ep; /* ending of subpat.. */ 670*0Sstevel@tonic-gate char *are; /* to save the line ptr. */ 671*0Sstevel@tonic-gate 672*0Sstevel@tonic-gate while ((op = *ap++) != END) 673*0Sstevel@tonic-gate switch(op) { 674*0Sstevel@tonic-gate 675*0Sstevel@tonic-gate case CHR: 676*0Sstevel@tonic-gate if (*lp++ != *ap++) 677*0Sstevel@tonic-gate return 0; 678*0Sstevel@tonic-gate break; 679*0Sstevel@tonic-gate case ANY: 680*0Sstevel@tonic-gate if (!*lp++) 681*0Sstevel@tonic-gate return 0; 682*0Sstevel@tonic-gate break; 683*0Sstevel@tonic-gate case CCL: 684*0Sstevel@tonic-gate c = *lp++; 685*0Sstevel@tonic-gate if (!isinset(ap,c)) 686*0Sstevel@tonic-gate return 0; 687*0Sstevel@tonic-gate ap += BITBLK; 688*0Sstevel@tonic-gate break; 689*0Sstevel@tonic-gate case BOL: 690*0Sstevel@tonic-gate if (lp != bol) 691*0Sstevel@tonic-gate return 0; 692*0Sstevel@tonic-gate break; 693*0Sstevel@tonic-gate case EOL: 694*0Sstevel@tonic-gate if (*lp) 695*0Sstevel@tonic-gate return 0; 696*0Sstevel@tonic-gate break; 697*0Sstevel@tonic-gate case BOT: 698*0Sstevel@tonic-gate bopat[*ap++] = lp; 699*0Sstevel@tonic-gate break; 700*0Sstevel@tonic-gate case EOT: 701*0Sstevel@tonic-gate eopat[*ap++] = lp; 702*0Sstevel@tonic-gate break; 703*0Sstevel@tonic-gate case BOW: 704*0Sstevel@tonic-gate if (lp!=bol && iswordc(lp[-1]) || !iswordc(*lp)) 705*0Sstevel@tonic-gate return 0; 706*0Sstevel@tonic-gate break; 707*0Sstevel@tonic-gate case EOW: 708*0Sstevel@tonic-gate if (lp==bol || !iswordc(lp[-1]) || iswordc(*lp)) 709*0Sstevel@tonic-gate return 0; 710*0Sstevel@tonic-gate break; 711*0Sstevel@tonic-gate case REF: 712*0Sstevel@tonic-gate n = *ap++; 713*0Sstevel@tonic-gate bp = bopat[n]; 714*0Sstevel@tonic-gate ep = eopat[n]; 715*0Sstevel@tonic-gate while (bp < ep) 716*0Sstevel@tonic-gate if (*bp++ != *lp++) 717*0Sstevel@tonic-gate return 0; 718*0Sstevel@tonic-gate break; 719*0Sstevel@tonic-gate case CLO: 720*0Sstevel@tonic-gate are = lp; 721*0Sstevel@tonic-gate switch(*ap) { 722*0Sstevel@tonic-gate 723*0Sstevel@tonic-gate case ANY: 724*0Sstevel@tonic-gate while (*lp) 725*0Sstevel@tonic-gate lp++; 726*0Sstevel@tonic-gate n = ANYSKIP; 727*0Sstevel@tonic-gate break; 728*0Sstevel@tonic-gate case CHR: 729*0Sstevel@tonic-gate c = *(ap+1); 730*0Sstevel@tonic-gate while (*lp && c == *lp) 731*0Sstevel@tonic-gate lp++; 732*0Sstevel@tonic-gate n = CHRSKIP; 733*0Sstevel@tonic-gate break; 734*0Sstevel@tonic-gate case CCL: 735*0Sstevel@tonic-gate while ((c = *lp) && isinset(ap+1,c)) 736*0Sstevel@tonic-gate lp++; 737*0Sstevel@tonic-gate n = CCLSKIP; 738*0Sstevel@tonic-gate break; 739*0Sstevel@tonic-gate default: 740*0Sstevel@tonic-gate re_fail("closure: bad nfa.", *ap); 741*0Sstevel@tonic-gate return 0; 742*0Sstevel@tonic-gate } 743*0Sstevel@tonic-gate 744*0Sstevel@tonic-gate ap += n; 745*0Sstevel@tonic-gate 746*0Sstevel@tonic-gate while (lp >= are) { 747*0Sstevel@tonic-gate if (e = pmatch(lp, ap)) 748*0Sstevel@tonic-gate return e; 749*0Sstevel@tonic-gate --lp; 750*0Sstevel@tonic-gate } 751*0Sstevel@tonic-gate return 0; 752*0Sstevel@tonic-gate default: 753*0Sstevel@tonic-gate re_fail("re_exec: bad nfa.", op); 754*0Sstevel@tonic-gate return 0; 755*0Sstevel@tonic-gate } 756*0Sstevel@tonic-gate return lp; 757*0Sstevel@tonic-gate } 758*0Sstevel@tonic-gate 759*0Sstevel@tonic-gate /* 760*0Sstevel@tonic-gate * re_modw: 761*0Sstevel@tonic-gate * add new characters into the word table to change re_exec's 762*0Sstevel@tonic-gate * understanding of what a word should look like. Note that we 763*0Sstevel@tonic-gate * only accept additions into the word definition. 764*0Sstevel@tonic-gate * 765*0Sstevel@tonic-gate * If the string parameter is 0 or null string, the table is 766*0Sstevel@tonic-gate * reset back to the default containing A-Z a-z 0-9 _. [We use 767*0Sstevel@tonic-gate * the compact bitset representation for the default table] 768*0Sstevel@tonic-gate */ 769*0Sstevel@tonic-gate 770*0Sstevel@tonic-gate static CHAR deftab[16] = { 771*0Sstevel@tonic-gate 0, 0, 0, 0, 0, 0, 0377, 003, 0376, 0377, 0377, 0207, 772*0Sstevel@tonic-gate 0376, 0377, 0377, 007 773*0Sstevel@tonic-gate }; 774*0Sstevel@tonic-gate 775*0Sstevel@tonic-gate void 776*0Sstevel@tonic-gate re_modw( char *s ) 777*0Sstevel@tonic-gate { 778*0Sstevel@tonic-gate register int i; 779*0Sstevel@tonic-gate 780*0Sstevel@tonic-gate if (!s || !*s) { 781*0Sstevel@tonic-gate for (i = 0; i < MAXCHR; i++) 782*0Sstevel@tonic-gate if (!isinset(deftab,i)) 783*0Sstevel@tonic-gate iswordc(i) = 0; 784*0Sstevel@tonic-gate } 785*0Sstevel@tonic-gate else 786*0Sstevel@tonic-gate while(*s) 787*0Sstevel@tonic-gate iswordc(*s++) = 1; 788*0Sstevel@tonic-gate } 789*0Sstevel@tonic-gate 790*0Sstevel@tonic-gate /* 791*0Sstevel@tonic-gate * re_subs: 792*0Sstevel@tonic-gate * substitute the matched portions of the src in dst. 793*0Sstevel@tonic-gate * 794*0Sstevel@tonic-gate * & substitute the entire matched pattern. 795*0Sstevel@tonic-gate * 796*0Sstevel@tonic-gate * \digit substitute a subpattern, with the given tag number. 797*0Sstevel@tonic-gate * Tags are numbered from 1 to 9. If the particular 798*0Sstevel@tonic-gate * tagged subpattern does not exist, null is substituted. 799*0Sstevel@tonic-gate */ 800*0Sstevel@tonic-gate int 801*0Sstevel@tonic-gate re_subs( char *src, char *dst) 802*0Sstevel@tonic-gate { 803*0Sstevel@tonic-gate register char c; 804*0Sstevel@tonic-gate register int pin; 805*0Sstevel@tonic-gate register char *bp; 806*0Sstevel@tonic-gate register char *ep; 807*0Sstevel@tonic-gate 808*0Sstevel@tonic-gate if (!*src || !bopat[0]) 809*0Sstevel@tonic-gate return 0; 810*0Sstevel@tonic-gate 811*0Sstevel@tonic-gate while (c = *src++) { 812*0Sstevel@tonic-gate switch(c) { 813*0Sstevel@tonic-gate 814*0Sstevel@tonic-gate case '&': 815*0Sstevel@tonic-gate pin = 0; 816*0Sstevel@tonic-gate break; 817*0Sstevel@tonic-gate 818*0Sstevel@tonic-gate case '\\': 819*0Sstevel@tonic-gate c = *src++; 820*0Sstevel@tonic-gate if (c >= '0' && c <= '9') { 821*0Sstevel@tonic-gate pin = c - '0'; 822*0Sstevel@tonic-gate break; 823*0Sstevel@tonic-gate } 824*0Sstevel@tonic-gate 825*0Sstevel@tonic-gate default: 826*0Sstevel@tonic-gate *dst++ = c; 827*0Sstevel@tonic-gate continue; 828*0Sstevel@tonic-gate } 829*0Sstevel@tonic-gate 830*0Sstevel@tonic-gate if ((bp = bopat[pin]) && (ep = eopat[pin])) { 831*0Sstevel@tonic-gate while (*bp && bp < ep) 832*0Sstevel@tonic-gate *dst++ = *bp++; 833*0Sstevel@tonic-gate if (bp < ep) 834*0Sstevel@tonic-gate return 0; 835*0Sstevel@tonic-gate } 836*0Sstevel@tonic-gate } 837*0Sstevel@tonic-gate *dst = (char) 0; 838*0Sstevel@tonic-gate return 1; 839*0Sstevel@tonic-gate } 840*0Sstevel@tonic-gate 841*0Sstevel@tonic-gate #ifdef DEBUG 842*0Sstevel@tonic-gate /* 843*0Sstevel@tonic-gate * symbolic - produce a symbolic dump of the nfa 844*0Sstevel@tonic-gate */ 845*0Sstevel@tonic-gate void 846*0Sstevel@tonic-gate symbolic( char *s ) 847*0Sstevel@tonic-gate { 848*0Sstevel@tonic-gate (void) printf("pattern: %s\n", s); 849*0Sstevel@tonic-gate (void) printf("nfacode:\n"); 850*0Sstevel@tonic-gate nfadump(nfa); 851*0Sstevel@tonic-gate } 852*0Sstevel@tonic-gate 853*0Sstevel@tonic-gate static void 854*0Sstevel@tonic-gate nfadump( CHAR *ap) 855*0Sstevel@tonic-gate { 856*0Sstevel@tonic-gate register int n; 857*0Sstevel@tonic-gate 858*0Sstevel@tonic-gate while (*ap != END) 859*0Sstevel@tonic-gate switch(*ap++) { 860*0Sstevel@tonic-gate case CLO: 861*0Sstevel@tonic-gate (void) printf("CLOSURE"); 862*0Sstevel@tonic-gate nfadump(ap); 863*0Sstevel@tonic-gate switch(*ap) { 864*0Sstevel@tonic-gate case CHR: 865*0Sstevel@tonic-gate n = CHRSKIP; 866*0Sstevel@tonic-gate break; 867*0Sstevel@tonic-gate case ANY: 868*0Sstevel@tonic-gate n = ANYSKIP; 869*0Sstevel@tonic-gate break; 870*0Sstevel@tonic-gate case CCL: 871*0Sstevel@tonic-gate n = CCLSKIP; 872*0Sstevel@tonic-gate break; 873*0Sstevel@tonic-gate } 874*0Sstevel@tonic-gate ap += n; 875*0Sstevel@tonic-gate break; 876*0Sstevel@tonic-gate case CHR: 877*0Sstevel@tonic-gate (void) printf("\tCHR %c\n",*ap++); 878*0Sstevel@tonic-gate break; 879*0Sstevel@tonic-gate case ANY: 880*0Sstevel@tonic-gate (void) printf("\tANY .\n"); 881*0Sstevel@tonic-gate break; 882*0Sstevel@tonic-gate case BOL: 883*0Sstevel@tonic-gate (void) printf("\tBOL -\n"); 884*0Sstevel@tonic-gate break; 885*0Sstevel@tonic-gate case EOL: 886*0Sstevel@tonic-gate (void) printf("\tEOL -\n"); 887*0Sstevel@tonic-gate break; 888*0Sstevel@tonic-gate case BOT: 889*0Sstevel@tonic-gate (void) printf("BOT: %d\n",*ap++); 890*0Sstevel@tonic-gate break; 891*0Sstevel@tonic-gate case EOT: 892*0Sstevel@tonic-gate (void) printf("EOT: %d\n",*ap++); 893*0Sstevel@tonic-gate break; 894*0Sstevel@tonic-gate case BOW: 895*0Sstevel@tonic-gate (void) printf("BOW\n"); 896*0Sstevel@tonic-gate break; 897*0Sstevel@tonic-gate case EOW: 898*0Sstevel@tonic-gate (void) printf("EOW\n"); 899*0Sstevel@tonic-gate break; 900*0Sstevel@tonic-gate case REF: 901*0Sstevel@tonic-gate (void) printf("REF: %d\n",*ap++); 902*0Sstevel@tonic-gate break; 903*0Sstevel@tonic-gate case CCL: 904*0Sstevel@tonic-gate (void) printf("\tCCL ["); 905*0Sstevel@tonic-gate for (n = 0; n < MAXCHR; n++) 906*0Sstevel@tonic-gate if (isinset(ap,(CHAR)n)) { 907*0Sstevel@tonic-gate if (n < ' ') 908*0Sstevel@tonic-gate (void) printf("^%c", n ^ 0x040); 909*0Sstevel@tonic-gate else 910*0Sstevel@tonic-gate (void) printf("%c", n); 911*0Sstevel@tonic-gate } 912*0Sstevel@tonic-gate (void) printf("]\n"); 913*0Sstevel@tonic-gate ap += BITBLK; 914*0Sstevel@tonic-gate break; 915*0Sstevel@tonic-gate default: 916*0Sstevel@tonic-gate (void) printf("bad nfa. opcode %o\n", ap[-1]); 917*0Sstevel@tonic-gate exit(1); 918*0Sstevel@tonic-gate break; 919*0Sstevel@tonic-gate } 920*0Sstevel@tonic-gate } 921*0Sstevel@tonic-gate #endif 922*0Sstevel@tonic-gate #endif /* MACOS or DOS or NEED_BSDREGEX */ 923