1*0Sstevel@tonic-gate /* regcomp.h 2*0Sstevel@tonic-gate * 3*0Sstevel@tonic-gate * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 4*0Sstevel@tonic-gate * 2000, 2001, 2002, 2003, by Larry Wall and others 5*0Sstevel@tonic-gate * 6*0Sstevel@tonic-gate * You may distribute under the terms of either the GNU General Public 7*0Sstevel@tonic-gate * License or the Artistic License, as specified in the README file. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate */ 10*0Sstevel@tonic-gate 11*0Sstevel@tonic-gate typedef OP OP_4tree; /* Will be redefined later. */ 12*0Sstevel@tonic-gate 13*0Sstevel@tonic-gate /* 14*0Sstevel@tonic-gate * The "internal use only" fields in regexp.h are present to pass info from 15*0Sstevel@tonic-gate * compile to execute that permits the execute phase to run lots faster on 16*0Sstevel@tonic-gate * simple cases. They are: 17*0Sstevel@tonic-gate * 18*0Sstevel@tonic-gate * regstart sv that must begin a match; Nullch if none obvious 19*0Sstevel@tonic-gate * reganch is the match anchored (at beginning-of-line only)? 20*0Sstevel@tonic-gate * regmust string (pointer into program) that match must include, or NULL 21*0Sstevel@tonic-gate * [regmust changed to SV* for bminstr()--law] 22*0Sstevel@tonic-gate * regmlen length of regmust string 23*0Sstevel@tonic-gate * [regmlen not used currently] 24*0Sstevel@tonic-gate * 25*0Sstevel@tonic-gate * Regstart and reganch permit very fast decisions on suitable starting points 26*0Sstevel@tonic-gate * for a match, cutting down the work a lot. Regmust permits fast rejection 27*0Sstevel@tonic-gate * of lines that cannot possibly match. The regmust tests are costly enough 28*0Sstevel@tonic-gate * that pregcomp() supplies a regmust only if the r.e. contains something 29*0Sstevel@tonic-gate * potentially expensive (at present, the only such thing detected is * or + 30*0Sstevel@tonic-gate * at the start of the r.e., which can involve a lot of backup). Regmlen is 31*0Sstevel@tonic-gate * supplied because the test in pregexec() needs it and pregcomp() is computing 32*0Sstevel@tonic-gate * it anyway. 33*0Sstevel@tonic-gate * [regmust is now supplied always. The tests that use regmust have a 34*0Sstevel@tonic-gate * heuristic that disables the test if it usually matches.] 35*0Sstevel@tonic-gate * 36*0Sstevel@tonic-gate * [In fact, we now use regmust in many cases to locate where the search 37*0Sstevel@tonic-gate * starts in the string, so if regback is >= 0, the regmust search is never 38*0Sstevel@tonic-gate * wasted effort. The regback variable says how many characters back from 39*0Sstevel@tonic-gate * where regmust matched is the earliest possible start of the match. 40*0Sstevel@tonic-gate * For instance, /[a-z].foo/ has a regmust of 'foo' and a regback of 2.] 41*0Sstevel@tonic-gate */ 42*0Sstevel@tonic-gate 43*0Sstevel@tonic-gate /* 44*0Sstevel@tonic-gate * Structure for regexp "program". This is essentially a linear encoding 45*0Sstevel@tonic-gate * of a nondeterministic finite-state machine (aka syntax charts or 46*0Sstevel@tonic-gate * "railroad normal form" in parsing technology). Each node is an opcode 47*0Sstevel@tonic-gate * plus a "next" pointer, possibly plus an operand. "Next" pointers of 48*0Sstevel@tonic-gate * all nodes except BRANCH implement concatenation; a "next" pointer with 49*0Sstevel@tonic-gate * a BRANCH on both ends of it is connecting two alternatives. (Here we 50*0Sstevel@tonic-gate * have one of the subtle syntax dependencies: an individual BRANCH (as 51*0Sstevel@tonic-gate * opposed to a collection of them) is never concatenated with anything 52*0Sstevel@tonic-gate * because of operator precedence.) The operand of some types of node is 53*0Sstevel@tonic-gate * a literal string; for others, it is a node leading into a sub-FSM. In 54*0Sstevel@tonic-gate * particular, the operand of a BRANCH node is the first node of the branch. 55*0Sstevel@tonic-gate * (NB this is *not* a tree structure: the tail of the branch connects 56*0Sstevel@tonic-gate * to the thing following the set of BRANCHes.) The opcodes are: 57*0Sstevel@tonic-gate */ 58*0Sstevel@tonic-gate 59*0Sstevel@tonic-gate /* 60*0Sstevel@tonic-gate * A node is one char of opcode followed by two chars of "next" pointer. 61*0Sstevel@tonic-gate * "Next" pointers are stored as two 8-bit pieces, high order first. The 62*0Sstevel@tonic-gate * value is a positive offset from the opcode of the node containing it. 63*0Sstevel@tonic-gate * An operand, if any, simply follows the node. (Note that much of the 64*0Sstevel@tonic-gate * code generation knows about this implicit relationship.) 65*0Sstevel@tonic-gate * 66*0Sstevel@tonic-gate * Using two bytes for the "next" pointer is vast overkill for most things, 67*0Sstevel@tonic-gate * but allows patterns to get big without disasters. 68*0Sstevel@tonic-gate * 69*0Sstevel@tonic-gate * [The "next" pointer is always aligned on an even 70*0Sstevel@tonic-gate * boundary, and reads the offset directly as a short. Also, there is no 71*0Sstevel@tonic-gate * special test to reverse the sign of BACK pointers since the offset is 72*0Sstevel@tonic-gate * stored negative.] 73*0Sstevel@tonic-gate */ 74*0Sstevel@tonic-gate 75*0Sstevel@tonic-gate struct regnode_string { 76*0Sstevel@tonic-gate U8 str_len; 77*0Sstevel@tonic-gate U8 type; 78*0Sstevel@tonic-gate U16 next_off; 79*0Sstevel@tonic-gate char string[1]; 80*0Sstevel@tonic-gate }; 81*0Sstevel@tonic-gate 82*0Sstevel@tonic-gate struct regnode_1 { 83*0Sstevel@tonic-gate U8 flags; 84*0Sstevel@tonic-gate U8 type; 85*0Sstevel@tonic-gate U16 next_off; 86*0Sstevel@tonic-gate U32 arg1; 87*0Sstevel@tonic-gate }; 88*0Sstevel@tonic-gate 89*0Sstevel@tonic-gate struct regnode_2 { 90*0Sstevel@tonic-gate U8 flags; 91*0Sstevel@tonic-gate U8 type; 92*0Sstevel@tonic-gate U16 next_off; 93*0Sstevel@tonic-gate U16 arg1; 94*0Sstevel@tonic-gate U16 arg2; 95*0Sstevel@tonic-gate }; 96*0Sstevel@tonic-gate 97*0Sstevel@tonic-gate #define ANYOF_BITMAP_SIZE 32 /* 256 b/(8 b/B) */ 98*0Sstevel@tonic-gate #define ANYOF_CLASSBITMAP_SIZE 4 /* up to 32 (8*4) named classes */ 99*0Sstevel@tonic-gate 100*0Sstevel@tonic-gate struct regnode_charclass { 101*0Sstevel@tonic-gate U8 flags; 102*0Sstevel@tonic-gate U8 type; 103*0Sstevel@tonic-gate U16 next_off; 104*0Sstevel@tonic-gate U32 arg1; 105*0Sstevel@tonic-gate char bitmap[ANYOF_BITMAP_SIZE]; /* only compile-time */ 106*0Sstevel@tonic-gate }; 107*0Sstevel@tonic-gate 108*0Sstevel@tonic-gate struct regnode_charclass_class { /* has [[:blah:]] classes */ 109*0Sstevel@tonic-gate U8 flags; /* should have ANYOF_CLASS here */ 110*0Sstevel@tonic-gate U8 type; 111*0Sstevel@tonic-gate U16 next_off; 112*0Sstevel@tonic-gate U32 arg1; 113*0Sstevel@tonic-gate char bitmap[ANYOF_BITMAP_SIZE]; /* both compile-time */ 114*0Sstevel@tonic-gate char classflags[ANYOF_CLASSBITMAP_SIZE]; /* and run-time */ 115*0Sstevel@tonic-gate }; 116*0Sstevel@tonic-gate 117*0Sstevel@tonic-gate /* XXX fix this description. 118*0Sstevel@tonic-gate Impose a limit of REG_INFTY on various pattern matching operations 119*0Sstevel@tonic-gate to limit stack growth and to avoid "infinite" recursions. 120*0Sstevel@tonic-gate */ 121*0Sstevel@tonic-gate /* The default size for REG_INFTY is I16_MAX, which is the same as 122*0Sstevel@tonic-gate SHORT_MAX (see perl.h). Unfortunately I16 isn't necessarily 16 bits 123*0Sstevel@tonic-gate (see handy.h). On the Cray C90, sizeof(short)==4 and hence I16_MAX is 124*0Sstevel@tonic-gate ((1<<31)-1), while on the Cray T90, sizeof(short)==8 and I16_MAX is 125*0Sstevel@tonic-gate ((1<<63)-1). To limit stack growth to reasonable sizes, supply a 126*0Sstevel@tonic-gate smaller default. 127*0Sstevel@tonic-gate --Andy Dougherty 11 June 1998 128*0Sstevel@tonic-gate */ 129*0Sstevel@tonic-gate #if SHORTSIZE > 2 130*0Sstevel@tonic-gate # ifndef REG_INFTY 131*0Sstevel@tonic-gate # define REG_INFTY ((1<<15)-1) 132*0Sstevel@tonic-gate # endif 133*0Sstevel@tonic-gate #endif 134*0Sstevel@tonic-gate 135*0Sstevel@tonic-gate #ifndef REG_INFTY 136*0Sstevel@tonic-gate # define REG_INFTY I16_MAX 137*0Sstevel@tonic-gate #endif 138*0Sstevel@tonic-gate 139*0Sstevel@tonic-gate #define ARG_VALUE(arg) (arg) 140*0Sstevel@tonic-gate #define ARG__SET(arg,val) ((arg) = (val)) 141*0Sstevel@tonic-gate 142*0Sstevel@tonic-gate #undef ARG 143*0Sstevel@tonic-gate #undef ARG1 144*0Sstevel@tonic-gate #undef ARG2 145*0Sstevel@tonic-gate 146*0Sstevel@tonic-gate #define ARG(p) ARG_VALUE(ARG_LOC(p)) 147*0Sstevel@tonic-gate #define ARG1(p) ARG_VALUE(ARG1_LOC(p)) 148*0Sstevel@tonic-gate #define ARG2(p) ARG_VALUE(ARG2_LOC(p)) 149*0Sstevel@tonic-gate #define ARG_SET(p, val) ARG__SET(ARG_LOC(p), (val)) 150*0Sstevel@tonic-gate #define ARG1_SET(p, val) ARG__SET(ARG1_LOC(p), (val)) 151*0Sstevel@tonic-gate #define ARG2_SET(p, val) ARG__SET(ARG2_LOC(p), (val)) 152*0Sstevel@tonic-gate 153*0Sstevel@tonic-gate #undef NEXT_OFF 154*0Sstevel@tonic-gate #undef NODE_ALIGN 155*0Sstevel@tonic-gate 156*0Sstevel@tonic-gate #ifndef lint 157*0Sstevel@tonic-gate # define NEXT_OFF(p) ((p)->next_off) 158*0Sstevel@tonic-gate # define NODE_ALIGN(node) 159*0Sstevel@tonic-gate # define NODE_ALIGN_FILL(node) ((node)->flags = 0xde) /* deadbeef */ 160*0Sstevel@tonic-gate #else /* lint */ 161*0Sstevel@tonic-gate # define NEXT_OFF(p) 0 162*0Sstevel@tonic-gate # define NODE_ALIGN(node) 163*0Sstevel@tonic-gate # define NODE_ALIGN_FILL(node) 164*0Sstevel@tonic-gate #endif /* lint */ 165*0Sstevel@tonic-gate 166*0Sstevel@tonic-gate #define SIZE_ALIGN NODE_ALIGN 167*0Sstevel@tonic-gate 168*0Sstevel@tonic-gate #undef OP 169*0Sstevel@tonic-gate #undef OPERAND 170*0Sstevel@tonic-gate #undef MASK 171*0Sstevel@tonic-gate #undef STRING 172*0Sstevel@tonic-gate 173*0Sstevel@tonic-gate #define OP(p) ((p)->type) 174*0Sstevel@tonic-gate #define OPERAND(p) (((struct regnode_string *)p)->string) 175*0Sstevel@tonic-gate #define MASK(p) ((char*)OPERAND(p)) 176*0Sstevel@tonic-gate #define STR_LEN(p) (((struct regnode_string *)p)->str_len) 177*0Sstevel@tonic-gate #define STRING(p) (((struct regnode_string *)p)->string) 178*0Sstevel@tonic-gate #define STR_SZ(l) ((l + sizeof(regnode) - 1) / sizeof(regnode)) 179*0Sstevel@tonic-gate #define NODE_SZ_STR(p) (STR_SZ(STR_LEN(p))+1) 180*0Sstevel@tonic-gate 181*0Sstevel@tonic-gate #undef NODE_ALIGN 182*0Sstevel@tonic-gate #undef ARG_LOC 183*0Sstevel@tonic-gate #undef NEXTOPER 184*0Sstevel@tonic-gate #undef PREVOPER 185*0Sstevel@tonic-gate 186*0Sstevel@tonic-gate #define NODE_ALIGN(node) 187*0Sstevel@tonic-gate #define ARG_LOC(p) (((struct regnode_1 *)p)->arg1) 188*0Sstevel@tonic-gate #define ARG1_LOC(p) (((struct regnode_2 *)p)->arg1) 189*0Sstevel@tonic-gate #define ARG2_LOC(p) (((struct regnode_2 *)p)->arg2) 190*0Sstevel@tonic-gate #define NODE_STEP_REGNODE 1 /* sizeof(regnode)/sizeof(regnode) */ 191*0Sstevel@tonic-gate #define EXTRA_STEP_2ARGS EXTRA_SIZE(struct regnode_2) 192*0Sstevel@tonic-gate 193*0Sstevel@tonic-gate #define NODE_STEP_B 4 194*0Sstevel@tonic-gate 195*0Sstevel@tonic-gate #define NEXTOPER(p) ((p) + NODE_STEP_REGNODE) 196*0Sstevel@tonic-gate #define PREVOPER(p) ((p) - NODE_STEP_REGNODE) 197*0Sstevel@tonic-gate 198*0Sstevel@tonic-gate #define FILL_ADVANCE_NODE(ptr, op) STMT_START { \ 199*0Sstevel@tonic-gate (ptr)->type = op; (ptr)->next_off = 0; (ptr)++; } STMT_END 200*0Sstevel@tonic-gate #define FILL_ADVANCE_NODE_ARG(ptr, op, arg) STMT_START { \ 201*0Sstevel@tonic-gate ARG_SET(ptr, arg); FILL_ADVANCE_NODE(ptr, op); (ptr) += 1; } STMT_END 202*0Sstevel@tonic-gate 203*0Sstevel@tonic-gate #define REG_MAGIC 0234 204*0Sstevel@tonic-gate 205*0Sstevel@tonic-gate #define SIZE_ONLY (RExC_emit == &PL_regdummy) 206*0Sstevel@tonic-gate 207*0Sstevel@tonic-gate /* Flags for node->flags of ANYOF */ 208*0Sstevel@tonic-gate 209*0Sstevel@tonic-gate #define ANYOF_CLASS 0x08 /* has [[:blah:]] classes */ 210*0Sstevel@tonic-gate #define ANYOF_INVERT 0x04 211*0Sstevel@tonic-gate #define ANYOF_FOLD 0x02 212*0Sstevel@tonic-gate #define ANYOF_LOCALE 0x01 213*0Sstevel@tonic-gate 214*0Sstevel@tonic-gate /* Used for regstclass only */ 215*0Sstevel@tonic-gate #define ANYOF_EOS 0x10 /* Can match an empty string too */ 216*0Sstevel@tonic-gate 217*0Sstevel@tonic-gate /* There is a character or a range past 0xff */ 218*0Sstevel@tonic-gate #define ANYOF_UNICODE 0x20 219*0Sstevel@tonic-gate #define ANYOF_UNICODE_ALL 0x40 /* Can match any char past 0xff */ 220*0Sstevel@tonic-gate 221*0Sstevel@tonic-gate /* size of node is large (includes class pointer) */ 222*0Sstevel@tonic-gate #define ANYOF_LARGE 0x80 223*0Sstevel@tonic-gate 224*0Sstevel@tonic-gate /* Are there any runtime flags on in this node? */ 225*0Sstevel@tonic-gate #define ANYOF_RUNTIME(s) (ANYOF_FLAGS(s) & 0x0f) 226*0Sstevel@tonic-gate 227*0Sstevel@tonic-gate #define ANYOF_FLAGS_ALL 0xff 228*0Sstevel@tonic-gate 229*0Sstevel@tonic-gate /* Character classes for node->classflags of ANYOF */ 230*0Sstevel@tonic-gate /* Should be synchronized with a table in regprop() */ 231*0Sstevel@tonic-gate /* 2n should pair with 2n+1 */ 232*0Sstevel@tonic-gate 233*0Sstevel@tonic-gate #define ANYOF_ALNUM 0 /* \w, PL_utf8_alnum, utf8::IsWord, ALNUM */ 234*0Sstevel@tonic-gate #define ANYOF_NALNUM 1 235*0Sstevel@tonic-gate #define ANYOF_SPACE 2 /* \s */ 236*0Sstevel@tonic-gate #define ANYOF_NSPACE 3 237*0Sstevel@tonic-gate #define ANYOF_DIGIT 4 238*0Sstevel@tonic-gate #define ANYOF_NDIGIT 5 239*0Sstevel@tonic-gate #define ANYOF_ALNUMC 6 /* isalnum(3), utf8::IsAlnum, ALNUMC */ 240*0Sstevel@tonic-gate #define ANYOF_NALNUMC 7 241*0Sstevel@tonic-gate #define ANYOF_ALPHA 8 242*0Sstevel@tonic-gate #define ANYOF_NALPHA 9 243*0Sstevel@tonic-gate #define ANYOF_ASCII 10 244*0Sstevel@tonic-gate #define ANYOF_NASCII 11 245*0Sstevel@tonic-gate #define ANYOF_CNTRL 12 246*0Sstevel@tonic-gate #define ANYOF_NCNTRL 13 247*0Sstevel@tonic-gate #define ANYOF_GRAPH 14 248*0Sstevel@tonic-gate #define ANYOF_NGRAPH 15 249*0Sstevel@tonic-gate #define ANYOF_LOWER 16 250*0Sstevel@tonic-gate #define ANYOF_NLOWER 17 251*0Sstevel@tonic-gate #define ANYOF_PRINT 18 252*0Sstevel@tonic-gate #define ANYOF_NPRINT 19 253*0Sstevel@tonic-gate #define ANYOF_PUNCT 20 254*0Sstevel@tonic-gate #define ANYOF_NPUNCT 21 255*0Sstevel@tonic-gate #define ANYOF_UPPER 22 256*0Sstevel@tonic-gate #define ANYOF_NUPPER 23 257*0Sstevel@tonic-gate #define ANYOF_XDIGIT 24 258*0Sstevel@tonic-gate #define ANYOF_NXDIGIT 25 259*0Sstevel@tonic-gate #define ANYOF_PSXSPC 26 /* POSIX space: \s plus the vertical tab */ 260*0Sstevel@tonic-gate #define ANYOF_NPSXSPC 27 261*0Sstevel@tonic-gate #define ANYOF_BLANK 28 /* GNU extension: space and tab: non-vertical space */ 262*0Sstevel@tonic-gate #define ANYOF_NBLANK 29 263*0Sstevel@tonic-gate 264*0Sstevel@tonic-gate #define ANYOF_MAX 32 265*0Sstevel@tonic-gate 266*0Sstevel@tonic-gate /* Backward source code compatibility. */ 267*0Sstevel@tonic-gate 268*0Sstevel@tonic-gate #define ANYOF_ALNUML ANYOF_ALNUM 269*0Sstevel@tonic-gate #define ANYOF_NALNUML ANYOF_NALNUM 270*0Sstevel@tonic-gate #define ANYOF_SPACEL ANYOF_SPACE 271*0Sstevel@tonic-gate #define ANYOF_NSPACEL ANYOF_NSPACE 272*0Sstevel@tonic-gate 273*0Sstevel@tonic-gate /* Utility macros for the bitmap and classes of ANYOF */ 274*0Sstevel@tonic-gate 275*0Sstevel@tonic-gate #define ANYOF_SIZE (sizeof(struct regnode_charclass)) 276*0Sstevel@tonic-gate #define ANYOF_CLASS_SIZE (sizeof(struct regnode_charclass_class)) 277*0Sstevel@tonic-gate 278*0Sstevel@tonic-gate #define ANYOF_FLAGS(p) ((p)->flags) 279*0Sstevel@tonic-gate 280*0Sstevel@tonic-gate #define ANYOF_BIT(c) (1 << ((c) & 7)) 281*0Sstevel@tonic-gate 282*0Sstevel@tonic-gate #define ANYOF_CLASS_BYTE(p, c) (((struct regnode_charclass_class*)(p))->classflags[((c) >> 3) & 3]) 283*0Sstevel@tonic-gate #define ANYOF_CLASS_SET(p, c) (ANYOF_CLASS_BYTE(p, c) |= ANYOF_BIT(c)) 284*0Sstevel@tonic-gate #define ANYOF_CLASS_CLEAR(p, c) (ANYOF_CLASS_BYTE(p, c) &= ~ANYOF_BIT(c)) 285*0Sstevel@tonic-gate #define ANYOF_CLASS_TEST(p, c) (ANYOF_CLASS_BYTE(p, c) & ANYOF_BIT(c)) 286*0Sstevel@tonic-gate 287*0Sstevel@tonic-gate #define ANYOF_CLASS_ZERO(ret) Zero(((struct regnode_charclass_class*)(ret))->classflags, ANYOF_CLASSBITMAP_SIZE, char) 288*0Sstevel@tonic-gate #define ANYOF_BITMAP_ZERO(ret) Zero(((struct regnode_charclass*)(ret))->bitmap, ANYOF_BITMAP_SIZE, char) 289*0Sstevel@tonic-gate 290*0Sstevel@tonic-gate #define ANYOF_BITMAP(p) (((struct regnode_charclass*)(p))->bitmap) 291*0Sstevel@tonic-gate #define ANYOF_BITMAP_BYTE(p, c) (ANYOF_BITMAP(p)[((c) >> 3) & 31]) 292*0Sstevel@tonic-gate #define ANYOF_BITMAP_SET(p, c) (ANYOF_BITMAP_BYTE(p, c) |= ANYOF_BIT(c)) 293*0Sstevel@tonic-gate #define ANYOF_BITMAP_CLEAR(p,c) (ANYOF_BITMAP_BYTE(p, c) &= ~ANYOF_BIT(c)) 294*0Sstevel@tonic-gate #define ANYOF_BITMAP_TEST(p, c) (ANYOF_BITMAP_BYTE(p, c) & ANYOF_BIT(c)) 295*0Sstevel@tonic-gate 296*0Sstevel@tonic-gate #define ANYOF_BITMAP_SETALL(p) \ 297*0Sstevel@tonic-gate memset (ANYOF_BITMAP(p), 255, ANYOF_BITMAP_SIZE) 298*0Sstevel@tonic-gate #define ANYOF_BITMAP_CLEARALL(p) \ 299*0Sstevel@tonic-gate Zero (ANYOF_BITMAP(p), ANYOF_BITMAP_SIZE) 300*0Sstevel@tonic-gate /* Check that all 256 bits are all set. Used in S_cl_is_anything() */ 301*0Sstevel@tonic-gate #define ANYOF_BITMAP_TESTALLSET(p) \ 302*0Sstevel@tonic-gate memEQ (ANYOF_BITMAP(p), "\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377", ANYOF_BITMAP_SIZE) 303*0Sstevel@tonic-gate 304*0Sstevel@tonic-gate #define ANYOF_SKIP ((ANYOF_SIZE - 1)/sizeof(regnode)) 305*0Sstevel@tonic-gate #define ANYOF_CLASS_SKIP ((ANYOF_CLASS_SIZE - 1)/sizeof(regnode)) 306*0Sstevel@tonic-gate #define ANYOF_CLASS_ADD_SKIP (ANYOF_CLASS_SKIP - ANYOF_SKIP) 307*0Sstevel@tonic-gate 308*0Sstevel@tonic-gate /* 309*0Sstevel@tonic-gate * Utility definitions. 310*0Sstevel@tonic-gate */ 311*0Sstevel@tonic-gate #ifndef lint 312*0Sstevel@tonic-gate #ifndef CHARMASK 313*0Sstevel@tonic-gate #define UCHARAT(p) ((int)*(U8*)(p)) 314*0Sstevel@tonic-gate #else 315*0Sstevel@tonic-gate #define UCHARAT(p) ((int)*(p)&CHARMASK) 316*0Sstevel@tonic-gate #endif 317*0Sstevel@tonic-gate #else /* lint */ 318*0Sstevel@tonic-gate #define UCHARAT(p) PL_regdummy 319*0Sstevel@tonic-gate #endif /* lint */ 320*0Sstevel@tonic-gate 321*0Sstevel@tonic-gate #define EXTRA_SIZE(guy) ((sizeof(guy)-1)/sizeof(struct regnode)) 322*0Sstevel@tonic-gate 323*0Sstevel@tonic-gate #define REG_SEEN_ZERO_LEN 1 324*0Sstevel@tonic-gate #define REG_SEEN_LOOKBEHIND 2 325*0Sstevel@tonic-gate #define REG_SEEN_GPOS 4 326*0Sstevel@tonic-gate #define REG_SEEN_EVAL 8 327*0Sstevel@tonic-gate #define REG_SEEN_CANY 16 328*0Sstevel@tonic-gate #define REG_SEEN_SANY REG_SEEN_CANY /* src bckwrd cmpt */ 329*0Sstevel@tonic-gate 330*0Sstevel@tonic-gate START_EXTERN_C 331*0Sstevel@tonic-gate 332*0Sstevel@tonic-gate #include "regnodes.h" 333*0Sstevel@tonic-gate 334*0Sstevel@tonic-gate /* The following have no fixed length. U8 so we can do strchr() on it. */ 335*0Sstevel@tonic-gate #ifndef DOINIT 336*0Sstevel@tonic-gate EXTCONST U8 PL_varies[]; 337*0Sstevel@tonic-gate #else 338*0Sstevel@tonic-gate EXTCONST U8 PL_varies[] = { 339*0Sstevel@tonic-gate BRANCH, BACK, STAR, PLUS, CURLY, CURLYX, REF, REFF, REFFL, 340*0Sstevel@tonic-gate WHILEM, CURLYM, CURLYN, BRANCHJ, IFTHEN, SUSPEND, CLUMP, 0 341*0Sstevel@tonic-gate }; 342*0Sstevel@tonic-gate #endif 343*0Sstevel@tonic-gate 344*0Sstevel@tonic-gate /* The following always have a length of 1. U8 we can do strchr() on it. */ 345*0Sstevel@tonic-gate /* (Note that length 1 means "one character" under UTF8, not "one octet".) */ 346*0Sstevel@tonic-gate #ifndef DOINIT 347*0Sstevel@tonic-gate EXTCONST U8 PL_simple[]; 348*0Sstevel@tonic-gate #else 349*0Sstevel@tonic-gate EXTCONST U8 PL_simple[] = { 350*0Sstevel@tonic-gate REG_ANY, SANY, CANY, 351*0Sstevel@tonic-gate ANYOF, 352*0Sstevel@tonic-gate ALNUM, ALNUML, 353*0Sstevel@tonic-gate NALNUM, NALNUML, 354*0Sstevel@tonic-gate SPACE, SPACEL, 355*0Sstevel@tonic-gate NSPACE, NSPACEL, 356*0Sstevel@tonic-gate DIGIT, NDIGIT, 357*0Sstevel@tonic-gate 0 358*0Sstevel@tonic-gate }; 359*0Sstevel@tonic-gate #endif 360*0Sstevel@tonic-gate 361*0Sstevel@tonic-gate END_EXTERN_C 362*0Sstevel@tonic-gate 363*0Sstevel@tonic-gate typedef struct re_scream_pos_data_s 364*0Sstevel@tonic-gate { 365*0Sstevel@tonic-gate char **scream_olds; /* match pos */ 366*0Sstevel@tonic-gate I32 *scream_pos; /* Internal iterator of scream. */ 367*0Sstevel@tonic-gate } re_scream_pos_data; 368*0Sstevel@tonic-gate 369*0Sstevel@tonic-gate /* .what is a character array with one character for each member of .data 370*0Sstevel@tonic-gate * The character describes the function of the corresponding .data item: 371*0Sstevel@tonic-gate * f - start-class data for regstclass optimization 372*0Sstevel@tonic-gate * n - Root of op tree for (?{EVAL}) item 373*0Sstevel@tonic-gate * o - Start op for (?{EVAL}) item 374*0Sstevel@tonic-gate * p - Pad for (?{EVAL} item 375*0Sstevel@tonic-gate * s - swash for unicode-style character class, and the multicharacter 376*0Sstevel@tonic-gate * strings resulting from casefolding the single-character entries 377*0Sstevel@tonic-gate * in the character class 378*0Sstevel@tonic-gate * 20010712 mjd@plover.com 379*0Sstevel@tonic-gate * (Remember to update re_dup() and pregfree() if you add any items.) 380*0Sstevel@tonic-gate */ 381*0Sstevel@tonic-gate struct reg_data { 382*0Sstevel@tonic-gate U32 count; 383*0Sstevel@tonic-gate U8 *what; 384*0Sstevel@tonic-gate void* data[1]; 385*0Sstevel@tonic-gate }; 386*0Sstevel@tonic-gate 387*0Sstevel@tonic-gate struct reg_substr_datum { 388*0Sstevel@tonic-gate I32 min_offset; 389*0Sstevel@tonic-gate I32 max_offset; 390*0Sstevel@tonic-gate SV *substr; /* non-utf8 variant */ 391*0Sstevel@tonic-gate SV *utf8_substr; /* utf8 variant */ 392*0Sstevel@tonic-gate }; 393*0Sstevel@tonic-gate 394*0Sstevel@tonic-gate struct reg_substr_data { 395*0Sstevel@tonic-gate struct reg_substr_datum data[3]; /* Actual array */ 396*0Sstevel@tonic-gate }; 397*0Sstevel@tonic-gate 398*0Sstevel@tonic-gate #define anchored_substr substrs->data[0].substr 399*0Sstevel@tonic-gate #define anchored_utf8 substrs->data[0].utf8_substr 400*0Sstevel@tonic-gate #define anchored_offset substrs->data[0].min_offset 401*0Sstevel@tonic-gate #define float_substr substrs->data[1].substr 402*0Sstevel@tonic-gate #define float_utf8 substrs->data[1].utf8_substr 403*0Sstevel@tonic-gate #define float_min_offset substrs->data[1].min_offset 404*0Sstevel@tonic-gate #define float_max_offset substrs->data[1].max_offset 405*0Sstevel@tonic-gate #define check_substr substrs->data[2].substr 406*0Sstevel@tonic-gate #define check_utf8 substrs->data[2].utf8_substr 407*0Sstevel@tonic-gate #define check_offset_min substrs->data[2].min_offset 408*0Sstevel@tonic-gate #define check_offset_max substrs->data[2].max_offset 409