1 /* $OpenBSD: regcomp.c,v 1.22 2013/04/17 17:39:29 tedu Exp $ */ 2 /*- 3 * Copyright (c) 1992, 1993, 1994 Henry Spencer. 4 * Copyright (c) 1992, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Henry Spencer. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)regcomp.c 8.5 (Berkeley) 3/20/94 35 */ 36 37 #include <sys/types.h> 38 #include <stdio.h> 39 #include <string.h> 40 #include <ctype.h> 41 #include <limits.h> 42 #include <stdlib.h> 43 #include <regex.h> 44 45 #include "utils.h" 46 #include "regex2.h" 47 48 #include "cclass.h" 49 #include "cname.h" 50 51 /* 52 * parse structure, passed up and down to avoid global variables and 53 * other clumsinesses 54 */ 55 struct parse { 56 char *next; /* next character in RE */ 57 char *end; /* end of string (-> NUL normally) */ 58 int error; /* has an error been seen? */ 59 sop *strip; /* malloced strip */ 60 sopno ssize; /* malloced strip size (allocated) */ 61 sopno slen; /* malloced strip length (used) */ 62 int ncsalloc; /* number of csets allocated */ 63 struct re_guts *g; 64 # define NPAREN 10 /* we need to remember () 1-9 for back refs */ 65 sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ 66 sopno pend[NPAREN]; /* -> ) ([0] unused) */ 67 }; 68 69 static void p_ere(struct parse *, int); 70 static void p_ere_exp(struct parse *); 71 static void p_str(struct parse *); 72 static void p_bre(struct parse *, int, int); 73 static int p_simp_re(struct parse *, int); 74 static int p_count(struct parse *); 75 static void p_bracket(struct parse *); 76 static void p_b_term(struct parse *, cset *); 77 static void p_b_cclass(struct parse *, cset *); 78 static void p_b_eclass(struct parse *, cset *); 79 static char p_b_symbol(struct parse *); 80 static char p_b_coll_elem(struct parse *, int); 81 static char othercase(int); 82 static void bothcases(struct parse *, int); 83 static void ordinary(struct parse *, int); 84 static void nonnewline(struct parse *); 85 static void repeat(struct parse *, sopno, int, int); 86 static int seterr(struct parse *, int); 87 static cset *allocset(struct parse *); 88 static void freeset(struct parse *, cset *); 89 static int freezeset(struct parse *, cset *); 90 static int firstch(struct parse *, cset *); 91 static int nch(struct parse *, cset *); 92 static void mcadd(struct parse *, cset *, char *); 93 static void mcinvert(struct parse *, cset *); 94 static void mccase(struct parse *, cset *); 95 static int isinsets(struct re_guts *, int); 96 static int samesets(struct re_guts *, int, int); 97 static void categorize(struct parse *, struct re_guts *); 98 static sopno dupl(struct parse *, sopno, sopno); 99 static void doemit(struct parse *, sop, size_t); 100 static void doinsert(struct parse *, sop, size_t, sopno); 101 static void dofwd(struct parse *, sopno, sop); 102 static int enlarge(struct parse *, sopno); 103 static void stripsnug(struct parse *, struct re_guts *); 104 static void findmust(struct parse *, struct re_guts *); 105 static sopno pluscount(struct parse *, struct re_guts *); 106 107 static char nuls[10]; /* place to point scanner in event of error */ 108 109 /* 110 * macros for use with parse structure 111 * BEWARE: these know that the parse structure is named `p' !!! 112 */ 113 #define PEEK() (*p->next) 114 #define PEEK2() (*(p->next+1)) 115 #define MORE() (p->next < p->end) 116 #define MORE2() (p->next+1 < p->end) 117 #define SEE(c) (MORE() && PEEK() == (c)) 118 #define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) 119 #define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) 120 #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) 121 #define NEXT() (p->next++) 122 #define NEXT2() (p->next += 2) 123 #define NEXTn(n) (p->next += (n)) 124 #define GETNEXT() (*p->next++) 125 #define SETERROR(e) seterr(p, (e)) 126 #define REQUIRE(co, e) ((co) || SETERROR(e)) 127 #define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) 128 #define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) 129 #define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) 130 #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) 131 #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) 132 #define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) 133 #define ASTERN(sop, pos) EMIT(sop, HERE()-pos) 134 #define HERE() (p->slen) 135 #define THERE() (p->slen - 1) 136 #define THERETHERE() (p->slen - 2) 137 #define DROP(n) (p->slen -= (n)) 138 139 #ifndef NDEBUG 140 static int never = 0; /* for use in asserts; shuts lint up */ 141 #else 142 #define never 0 /* some <assert.h>s have bugs too */ 143 #endif 144 145 /* 146 - regcomp - interface for parser and compilation 147 */ 148 int /* 0 success, otherwise REG_something */ 149 regcomp(regex_t *preg, const char *pattern, int cflags) 150 { 151 struct parse pa; 152 struct re_guts *g; 153 struct parse *p = &pa; 154 int i; 155 size_t len; 156 #ifdef REDEBUG 157 # define GOODFLAGS(f) (f) 158 #else 159 # define GOODFLAGS(f) ((f)&~REG_DUMP) 160 #endif 161 162 cflags = GOODFLAGS(cflags); 163 if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) 164 return(REG_INVARG); 165 166 if (cflags®_PEND) { 167 if (preg->re_endp < pattern) 168 return(REG_INVARG); 169 len = preg->re_endp - pattern; 170 } else 171 len = strlen((char *)pattern); 172 173 /* do the mallocs early so failure handling is easy */ 174 g = (struct re_guts *)malloc(sizeof(struct re_guts)); 175 if (g == NULL) 176 return(REG_ESPACE); 177 p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ 178 p->strip = (sop *)calloc(p->ssize, sizeof(sop)); 179 p->slen = 0; 180 if (p->strip == NULL) { 181 free((char *)g); 182 return(REG_ESPACE); 183 } 184 185 /* set things up */ 186 p->g = g; 187 p->next = (char *)pattern; /* convenience; we do not modify it */ 188 p->end = p->next + len; 189 p->error = 0; 190 p->ncsalloc = 0; 191 for (i = 0; i < NPAREN; i++) { 192 p->pbegin[i] = 0; 193 p->pend[i] = 0; 194 } 195 g->csetsize = NC; 196 g->sets = NULL; 197 g->setbits = NULL; 198 g->ncsets = 0; 199 g->cflags = cflags; 200 g->iflags = 0; 201 g->nbol = 0; 202 g->neol = 0; 203 g->must = NULL; 204 g->mlen = 0; 205 g->nsub = 0; 206 g->ncategories = 1; /* category 0 is "everything else" */ 207 g->categories = &g->catspace[-(CHAR_MIN)]; 208 memset(g->catspace, 0, sizeof(g->catspace)); 209 g->backrefs = 0; 210 211 /* do it */ 212 EMIT(OEND, 0); 213 g->firststate = THERE(); 214 if (cflags®_EXTENDED) 215 p_ere(p, OUT); 216 else if (cflags®_NOSPEC) 217 p_str(p); 218 else 219 p_bre(p, OUT, OUT); 220 EMIT(OEND, 0); 221 g->laststate = THERE(); 222 223 /* tidy up loose ends and fill things in */ 224 categorize(p, g); 225 stripsnug(p, g); 226 findmust(p, g); 227 g->nplus = pluscount(p, g); 228 g->magic = MAGIC2; 229 preg->re_nsub = g->nsub; 230 preg->re_g = g; 231 preg->re_magic = MAGIC1; 232 #ifndef REDEBUG 233 /* not debugging, so can't rely on the assert() in regexec() */ 234 if (g->iflags&BAD) 235 SETERROR(REG_ASSERT); 236 #endif 237 238 /* win or lose, we're done */ 239 if (p->error != 0) /* lose */ 240 regfree(preg); 241 return(p->error); 242 } 243 244 /* 245 - p_ere - ERE parser top level, concatenation and alternation 246 */ 247 static void 248 p_ere(struct parse *p, int stop) /* character this ERE should end at */ 249 { 250 char c; 251 sopno prevback; 252 sopno prevfwd; 253 sopno conc; 254 int first = 1; /* is this the first alternative? */ 255 256 for (;;) { 257 /* do a bunch of concatenated expressions */ 258 conc = HERE(); 259 while (MORE() && (c = PEEK()) != '|' && c != stop) 260 p_ere_exp(p); 261 REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ 262 263 if (!EAT('|')) 264 break; /* NOTE BREAK OUT */ 265 266 if (first) { 267 INSERT(OCH_, conc); /* offset is wrong */ 268 prevfwd = conc; 269 prevback = conc; 270 first = 0; 271 } 272 ASTERN(OOR1, prevback); 273 prevback = THERE(); 274 AHEAD(prevfwd); /* fix previous offset */ 275 prevfwd = HERE(); 276 EMIT(OOR2, 0); /* offset is very wrong */ 277 } 278 279 if (!first) { /* tail-end fixups */ 280 AHEAD(prevfwd); 281 ASTERN(O_CH, prevback); 282 } 283 284 assert(!MORE() || SEE(stop)); 285 } 286 287 /* 288 - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op 289 */ 290 static void 291 p_ere_exp(struct parse *p) 292 { 293 char c; 294 sopno pos; 295 int count; 296 int count2; 297 sopno subno; 298 int wascaret = 0; 299 300 assert(MORE()); /* caller should have ensured this */ 301 c = GETNEXT(); 302 303 pos = HERE(); 304 switch (c) { 305 case '(': 306 REQUIRE(MORE(), REG_EPAREN); 307 p->g->nsub++; 308 subno = p->g->nsub; 309 if (subno < NPAREN) 310 p->pbegin[subno] = HERE(); 311 EMIT(OLPAREN, subno); 312 if (!SEE(')')) 313 p_ere(p, ')'); 314 if (subno < NPAREN) { 315 p->pend[subno] = HERE(); 316 assert(p->pend[subno] != 0); 317 } 318 EMIT(ORPAREN, subno); 319 MUSTEAT(')', REG_EPAREN); 320 break; 321 case '^': 322 EMIT(OBOL, 0); 323 p->g->iflags |= USEBOL; 324 p->g->nbol++; 325 wascaret = 1; 326 break; 327 case '$': 328 EMIT(OEOL, 0); 329 p->g->iflags |= USEEOL; 330 p->g->neol++; 331 break; 332 case '|': 333 SETERROR(REG_EMPTY); 334 break; 335 case '*': 336 case '+': 337 case '?': 338 SETERROR(REG_BADRPT); 339 break; 340 case '.': 341 if (p->g->cflags®_NEWLINE) 342 nonnewline(p); 343 else 344 EMIT(OANY, 0); 345 break; 346 case '[': 347 p_bracket(p); 348 break; 349 case '\\': 350 REQUIRE(MORE(), REG_EESCAPE); 351 c = GETNEXT(); 352 ordinary(p, c); 353 break; 354 case '{': /* okay as ordinary except if digit follows */ 355 REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT); 356 /* FALLTHROUGH */ 357 default: 358 ordinary(p, c); 359 break; 360 } 361 362 if (!MORE()) 363 return; 364 c = PEEK(); 365 /* we call { a repetition if followed by a digit */ 366 if (!( c == '*' || c == '+' || c == '?' || 367 (c == '{' && MORE2() && isdigit((uch)PEEK2())) )) 368 return; /* no repetition, we're done */ 369 NEXT(); 370 371 REQUIRE(!wascaret, REG_BADRPT); 372 switch (c) { 373 case '*': /* implemented as +? */ 374 /* this case does not require the (y|) trick, noKLUDGE */ 375 INSERT(OPLUS_, pos); 376 ASTERN(O_PLUS, pos); 377 INSERT(OQUEST_, pos); 378 ASTERN(O_QUEST, pos); 379 break; 380 case '+': 381 INSERT(OPLUS_, pos); 382 ASTERN(O_PLUS, pos); 383 break; 384 case '?': 385 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 386 INSERT(OCH_, pos); /* offset slightly wrong */ 387 ASTERN(OOR1, pos); /* this one's right */ 388 AHEAD(pos); /* fix the OCH_ */ 389 EMIT(OOR2, 0); /* offset very wrong... */ 390 AHEAD(THERE()); /* ...so fix it */ 391 ASTERN(O_CH, THERETHERE()); 392 break; 393 case '{': 394 count = p_count(p); 395 if (EAT(',')) { 396 if (isdigit((uch)PEEK())) { 397 count2 = p_count(p); 398 REQUIRE(count <= count2, REG_BADBR); 399 } else /* single number with comma */ 400 count2 = INFINITY; 401 } else /* just a single number */ 402 count2 = count; 403 repeat(p, pos, count, count2); 404 if (!EAT('}')) { /* error heuristics */ 405 while (MORE() && PEEK() != '}') 406 NEXT(); 407 REQUIRE(MORE(), REG_EBRACE); 408 SETERROR(REG_BADBR); 409 } 410 break; 411 } 412 413 if (!MORE()) 414 return; 415 c = PEEK(); 416 if (!( c == '*' || c == '+' || c == '?' || 417 (c == '{' && MORE2() && isdigit((uch)PEEK2())) ) ) 418 return; 419 SETERROR(REG_BADRPT); 420 } 421 422 /* 423 - p_str - string (no metacharacters) "parser" 424 */ 425 static void 426 p_str(struct parse *p) 427 { 428 REQUIRE(MORE(), REG_EMPTY); 429 while (MORE()) 430 ordinary(p, GETNEXT()); 431 } 432 433 /* 434 - p_bre - BRE parser top level, anchoring and concatenation 435 * Giving end1 as OUT essentially eliminates the end1/end2 check. 436 * 437 * This implementation is a bit of a kludge, in that a trailing $ is first 438 * taken as an ordinary character and then revised to be an anchor. The 439 * only undesirable side effect is that '$' gets included as a character 440 * category in such cases. This is fairly harmless; not worth fixing. 441 * The amount of lookahead needed to avoid this kludge is excessive. 442 */ 443 static void 444 p_bre(struct parse *p, 445 int end1, /* first terminating character */ 446 int end2) /* second terminating character */ 447 { 448 sopno start = HERE(); 449 int first = 1; /* first subexpression? */ 450 int wasdollar = 0; 451 452 if (EAT('^')) { 453 EMIT(OBOL, 0); 454 p->g->iflags |= USEBOL; 455 p->g->nbol++; 456 } 457 while (MORE() && !SEETWO(end1, end2)) { 458 wasdollar = p_simp_re(p, first); 459 first = 0; 460 } 461 if (wasdollar) { /* oops, that was a trailing anchor */ 462 DROP(1); 463 EMIT(OEOL, 0); 464 p->g->iflags |= USEEOL; 465 p->g->neol++; 466 } 467 468 REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ 469 } 470 471 /* 472 - p_simp_re - parse a simple RE, an atom possibly followed by a repetition 473 */ 474 static int /* was the simple RE an unbackslashed $? */ 475 p_simp_re(struct parse *p, 476 int starordinary) /* is a leading * an ordinary character? */ 477 { 478 int c; 479 int count; 480 int count2; 481 sopno pos; 482 int i; 483 sopno subno; 484 # define BACKSL (1<<CHAR_BIT) 485 486 pos = HERE(); /* repetion op, if any, covers from here */ 487 488 assert(MORE()); /* caller should have ensured this */ 489 c = GETNEXT(); 490 if (c == '\\') { 491 REQUIRE(MORE(), REG_EESCAPE); 492 c = BACKSL | GETNEXT(); 493 } 494 switch (c) { 495 case '.': 496 if (p->g->cflags®_NEWLINE) 497 nonnewline(p); 498 else 499 EMIT(OANY, 0); 500 break; 501 case '[': 502 p_bracket(p); 503 break; 504 case BACKSL|'{': 505 SETERROR(REG_BADRPT); 506 break; 507 case BACKSL|'(': 508 p->g->nsub++; 509 subno = p->g->nsub; 510 if (subno < NPAREN) 511 p->pbegin[subno] = HERE(); 512 EMIT(OLPAREN, subno); 513 /* the MORE here is an error heuristic */ 514 if (MORE() && !SEETWO('\\', ')')) 515 p_bre(p, '\\', ')'); 516 if (subno < NPAREN) { 517 p->pend[subno] = HERE(); 518 assert(p->pend[subno] != 0); 519 } 520 EMIT(ORPAREN, subno); 521 REQUIRE(EATTWO('\\', ')'), REG_EPAREN); 522 break; 523 case BACKSL|')': /* should not get here -- must be user */ 524 case BACKSL|'}': 525 SETERROR(REG_EPAREN); 526 break; 527 case BACKSL|'1': 528 case BACKSL|'2': 529 case BACKSL|'3': 530 case BACKSL|'4': 531 case BACKSL|'5': 532 case BACKSL|'6': 533 case BACKSL|'7': 534 case BACKSL|'8': 535 case BACKSL|'9': 536 i = (c&~BACKSL) - '0'; 537 assert(i < NPAREN); 538 if (p->pend[i] != 0) { 539 assert(i <= p->g->nsub); 540 EMIT(OBACK_, i); 541 assert(p->pbegin[i] != 0); 542 assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); 543 assert(OP(p->strip[p->pend[i]]) == ORPAREN); 544 (void) dupl(p, p->pbegin[i]+1, p->pend[i]); 545 EMIT(O_BACK, i); 546 } else 547 SETERROR(REG_ESUBREG); 548 p->g->backrefs = 1; 549 break; 550 case '*': 551 REQUIRE(starordinary, REG_BADRPT); 552 /* FALLTHROUGH */ 553 default: 554 ordinary(p, (char)c); 555 break; 556 } 557 558 if (EAT('*')) { /* implemented as +? */ 559 /* this case does not require the (y|) trick, noKLUDGE */ 560 INSERT(OPLUS_, pos); 561 ASTERN(O_PLUS, pos); 562 INSERT(OQUEST_, pos); 563 ASTERN(O_QUEST, pos); 564 } else if (EATTWO('\\', '{')) { 565 count = p_count(p); 566 if (EAT(',')) { 567 if (MORE() && isdigit((uch)PEEK())) { 568 count2 = p_count(p); 569 REQUIRE(count <= count2, REG_BADBR); 570 } else /* single number with comma */ 571 count2 = INFINITY; 572 } else /* just a single number */ 573 count2 = count; 574 repeat(p, pos, count, count2); 575 if (!EATTWO('\\', '}')) { /* error heuristics */ 576 while (MORE() && !SEETWO('\\', '}')) 577 NEXT(); 578 REQUIRE(MORE(), REG_EBRACE); 579 SETERROR(REG_BADBR); 580 } 581 } else if (c == '$') /* $ (but not \$) ends it */ 582 return(1); 583 584 return(0); 585 } 586 587 /* 588 - p_count - parse a repetition count 589 */ 590 static int /* the value */ 591 p_count(struct parse *p) 592 { 593 int count = 0; 594 int ndigits = 0; 595 596 while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) { 597 count = count*10 + (GETNEXT() - '0'); 598 ndigits++; 599 } 600 601 REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); 602 return(count); 603 } 604 605 /* 606 - p_bracket - parse a bracketed character list 607 * 608 * Note a significant property of this code: if the allocset() did SETERROR, 609 * no set operations are done. 610 */ 611 static void 612 p_bracket(struct parse *p) 613 { 614 cset *cs; 615 int invert = 0; 616 617 /* Dept of Truly Sickening Special-Case Kludges */ 618 if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { 619 EMIT(OBOW, 0); 620 NEXTn(6); 621 return; 622 } 623 if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { 624 EMIT(OEOW, 0); 625 NEXTn(6); 626 return; 627 } 628 629 if ((cs = allocset(p)) == NULL) { 630 /* allocset did set error status in p */ 631 return; 632 } 633 634 if (EAT('^')) 635 invert++; /* make note to invert set at end */ 636 if (EAT(']')) 637 CHadd(cs, ']'); 638 else if (EAT('-')) 639 CHadd(cs, '-'); 640 while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) 641 p_b_term(p, cs); 642 if (EAT('-')) 643 CHadd(cs, '-'); 644 MUSTEAT(']', REG_EBRACK); 645 646 if (p->error != 0) { /* don't mess things up further */ 647 freeset(p, cs); 648 return; 649 } 650 651 if (p->g->cflags®_ICASE) { 652 int i; 653 int ci; 654 655 for (i = p->g->csetsize - 1; i >= 0; i--) 656 if (CHIN(cs, i) && isalpha(i)) { 657 ci = othercase(i); 658 if (ci != i) 659 CHadd(cs, ci); 660 } 661 if (cs->multis != NULL) 662 mccase(p, cs); 663 } 664 if (invert) { 665 int i; 666 667 for (i = p->g->csetsize - 1; i >= 0; i--) 668 if (CHIN(cs, i)) 669 CHsub(cs, i); 670 else 671 CHadd(cs, i); 672 if (p->g->cflags®_NEWLINE) 673 CHsub(cs, '\n'); 674 if (cs->multis != NULL) 675 mcinvert(p, cs); 676 } 677 678 assert(cs->multis == NULL); /* xxx */ 679 680 if (nch(p, cs) == 1) { /* optimize singleton sets */ 681 ordinary(p, firstch(p, cs)); 682 freeset(p, cs); 683 } else 684 EMIT(OANYOF, freezeset(p, cs)); 685 } 686 687 /* 688 - p_b_term - parse one term of a bracketed character list 689 */ 690 static void 691 p_b_term(struct parse *p, cset *cs) 692 { 693 char c; 694 char start, finish; 695 int i; 696 697 /* classify what we've got */ 698 switch ((MORE()) ? PEEK() : '\0') { 699 case '[': 700 c = (MORE2()) ? PEEK2() : '\0'; 701 break; 702 case '-': 703 SETERROR(REG_ERANGE); 704 return; /* NOTE RETURN */ 705 break; 706 default: 707 c = '\0'; 708 break; 709 } 710 711 switch (c) { 712 case ':': /* character class */ 713 NEXT2(); 714 REQUIRE(MORE(), REG_EBRACK); 715 c = PEEK(); 716 REQUIRE(c != '-' && c != ']', REG_ECTYPE); 717 p_b_cclass(p, cs); 718 REQUIRE(MORE(), REG_EBRACK); 719 REQUIRE(EATTWO(':', ']'), REG_ECTYPE); 720 break; 721 case '=': /* equivalence class */ 722 NEXT2(); 723 REQUIRE(MORE(), REG_EBRACK); 724 c = PEEK(); 725 REQUIRE(c != '-' && c != ']', REG_ECOLLATE); 726 p_b_eclass(p, cs); 727 REQUIRE(MORE(), REG_EBRACK); 728 REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); 729 break; 730 default: /* symbol, ordinary character, or range */ 731 /* xxx revision needed for multichar stuff */ 732 start = p_b_symbol(p); 733 if (SEE('-') && MORE2() && PEEK2() != ']') { 734 /* range */ 735 NEXT(); 736 if (EAT('-')) 737 finish = '-'; 738 else 739 finish = p_b_symbol(p); 740 } else 741 finish = start; 742 /* xxx what about signed chars here... */ 743 REQUIRE(start <= finish, REG_ERANGE); 744 for (i = start; i <= finish; i++) 745 CHadd(cs, i); 746 break; 747 } 748 } 749 750 /* 751 - p_b_cclass - parse a character-class name and deal with it 752 */ 753 static void 754 p_b_cclass(struct parse *p, cset *cs) 755 { 756 char *sp = p->next; 757 struct cclass *cp; 758 size_t len; 759 char *u; 760 char c; 761 762 while (MORE() && isalpha(PEEK())) 763 NEXT(); 764 len = p->next - sp; 765 for (cp = cclasses; cp->name != NULL; cp++) 766 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') 767 break; 768 if (cp->name == NULL) { 769 /* oops, didn't find it */ 770 SETERROR(REG_ECTYPE); 771 return; 772 } 773 774 u = cp->chars; 775 while ((c = *u++) != '\0') 776 CHadd(cs, c); 777 for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) 778 MCadd(p, cs, u); 779 } 780 781 /* 782 - p_b_eclass - parse an equivalence-class name and deal with it 783 * 784 * This implementation is incomplete. xxx 785 */ 786 static void 787 p_b_eclass(struct parse *p, cset *cs) 788 { 789 char c; 790 791 c = p_b_coll_elem(p, '='); 792 CHadd(cs, c); 793 } 794 795 /* 796 - p_b_symbol - parse a character or [..]ed multicharacter collating symbol 797 */ 798 static char /* value of symbol */ 799 p_b_symbol(struct parse *p) 800 { 801 char value; 802 803 REQUIRE(MORE(), REG_EBRACK); 804 if (!EATTWO('[', '.')) 805 return(GETNEXT()); 806 807 /* collating symbol */ 808 value = p_b_coll_elem(p, '.'); 809 REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); 810 return(value); 811 } 812 813 /* 814 - p_b_coll_elem - parse a collating-element name and look it up 815 */ 816 static char /* value of collating element */ 817 p_b_coll_elem(struct parse *p, 818 int endc) /* name ended by endc,']' */ 819 { 820 char *sp = p->next; 821 struct cname *cp; 822 int len; 823 824 while (MORE() && !SEETWO(endc, ']')) 825 NEXT(); 826 if (!MORE()) { 827 SETERROR(REG_EBRACK); 828 return(0); 829 } 830 len = p->next - sp; 831 for (cp = cnames; cp->name != NULL; cp++) 832 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') 833 return(cp->code); /* known name */ 834 if (len == 1) 835 return(*sp); /* single character */ 836 SETERROR(REG_ECOLLATE); /* neither */ 837 return(0); 838 } 839 840 /* 841 - othercase - return the case counterpart of an alphabetic 842 */ 843 static char /* if no counterpart, return ch */ 844 othercase(int ch) 845 { 846 ch = (uch)ch; 847 assert(isalpha(ch)); 848 if (isupper(ch)) 849 return ((uch)tolower(ch)); 850 else if (islower(ch)) 851 return ((uch)toupper(ch)); 852 else /* peculiar, but could happen */ 853 return(ch); 854 } 855 856 /* 857 - bothcases - emit a dualcase version of a two-case character 858 * 859 * Boy, is this implementation ever a kludge... 860 */ 861 static void 862 bothcases(struct parse *p, int ch) 863 { 864 char *oldnext = p->next; 865 char *oldend = p->end; 866 char bracket[3]; 867 868 ch = (uch)ch; 869 assert(othercase(ch) != ch); /* p_bracket() would recurse */ 870 p->next = bracket; 871 p->end = bracket+2; 872 bracket[0] = ch; 873 bracket[1] = ']'; 874 bracket[2] = '\0'; 875 p_bracket(p); 876 assert(p->next == bracket+2); 877 p->next = oldnext; 878 p->end = oldend; 879 } 880 881 /* 882 - ordinary - emit an ordinary character 883 */ 884 static void 885 ordinary(struct parse *p, int ch) 886 { 887 cat_t *cap = p->g->categories; 888 889 if ((p->g->cflags®_ICASE) && isalpha((uch)ch) && othercase(ch) != ch) 890 bothcases(p, ch); 891 else { 892 EMIT(OCHAR, (uch)ch); 893 if (cap[ch] == 0) 894 cap[ch] = p->g->ncategories++; 895 } 896 } 897 898 /* 899 - nonnewline - emit REG_NEWLINE version of OANY 900 * 901 * Boy, is this implementation ever a kludge... 902 */ 903 static void 904 nonnewline(struct parse *p) 905 { 906 char *oldnext = p->next; 907 char *oldend = p->end; 908 char bracket[4]; 909 910 p->next = bracket; 911 p->end = bracket+3; 912 bracket[0] = '^'; 913 bracket[1] = '\n'; 914 bracket[2] = ']'; 915 bracket[3] = '\0'; 916 p_bracket(p); 917 assert(p->next == bracket+3); 918 p->next = oldnext; 919 p->end = oldend; 920 } 921 922 /* 923 - repeat - generate code for a bounded repetition, recursively if needed 924 */ 925 static void 926 repeat(struct parse *p, 927 sopno start, /* operand from here to end of strip */ 928 int from, /* repeated from this number */ 929 int to) /* to this number of times (maybe INFINITY) */ 930 { 931 sopno finish = HERE(); 932 # define N 2 933 # define INF 3 934 # define REP(f, t) ((f)*8 + (t)) 935 # define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) 936 sopno copy; 937 938 if (p->error != 0) /* head off possible runaway recursion */ 939 return; 940 941 assert(from <= to); 942 943 switch (REP(MAP(from), MAP(to))) { 944 case REP(0, 0): /* must be user doing this */ 945 DROP(finish-start); /* drop the operand */ 946 break; 947 case REP(0, 1): /* as x{1,1}? */ 948 case REP(0, N): /* as x{1,n}? */ 949 case REP(0, INF): /* as x{1,}? */ 950 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 951 INSERT(OCH_, start); /* offset is wrong... */ 952 repeat(p, start+1, 1, to); 953 ASTERN(OOR1, start); 954 AHEAD(start); /* ... fix it */ 955 EMIT(OOR2, 0); 956 AHEAD(THERE()); 957 ASTERN(O_CH, THERETHERE()); 958 break; 959 case REP(1, 1): /* trivial case */ 960 /* done */ 961 break; 962 case REP(1, N): /* as x?x{1,n-1} */ 963 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 964 INSERT(OCH_, start); 965 ASTERN(OOR1, start); 966 AHEAD(start); 967 EMIT(OOR2, 0); /* offset very wrong... */ 968 AHEAD(THERE()); /* ...so fix it */ 969 ASTERN(O_CH, THERETHERE()); 970 copy = dupl(p, start+1, finish+1); 971 assert(copy == finish+4); 972 repeat(p, copy, 1, to-1); 973 break; 974 case REP(1, INF): /* as x+ */ 975 INSERT(OPLUS_, start); 976 ASTERN(O_PLUS, start); 977 break; 978 case REP(N, N): /* as xx{m-1,n-1} */ 979 copy = dupl(p, start, finish); 980 repeat(p, copy, from-1, to-1); 981 break; 982 case REP(N, INF): /* as xx{n-1,INF} */ 983 copy = dupl(p, start, finish); 984 repeat(p, copy, from-1, to); 985 break; 986 default: /* "can't happen" */ 987 SETERROR(REG_ASSERT); /* just in case */ 988 break; 989 } 990 } 991 992 /* 993 - seterr - set an error condition 994 */ 995 static int /* useless but makes type checking happy */ 996 seterr(struct parse *p, int e) 997 { 998 if (p->error == 0) /* keep earliest error condition */ 999 p->error = e; 1000 p->next = nuls; /* try to bring things to a halt */ 1001 p->end = nuls; 1002 return(0); /* make the return value well-defined */ 1003 } 1004 1005 /* 1006 - allocset - allocate a set of characters for [] 1007 */ 1008 static cset * 1009 allocset(struct parse *p) 1010 { 1011 int no = p->g->ncsets++; 1012 size_t nc; 1013 size_t nbytes; 1014 cset *cs; 1015 size_t css = (size_t)p->g->csetsize; 1016 int i; 1017 1018 if (no >= p->ncsalloc) { /* need another column of space */ 1019 void *ptr; 1020 1021 p->ncsalloc += CHAR_BIT; 1022 nc = p->ncsalloc; 1023 assert(nc % CHAR_BIT == 0); 1024 nbytes = nc / CHAR_BIT * css; 1025 1026 ptr = (cset *)realloc((char *)p->g->sets, nc * sizeof(cset)); 1027 if (ptr == NULL) 1028 goto nomem; 1029 p->g->sets = ptr; 1030 1031 ptr = (uch *)realloc((char *)p->g->setbits, nbytes); 1032 if (ptr == NULL) 1033 goto nomem; 1034 p->g->setbits = ptr; 1035 1036 for (i = 0; i < no; i++) 1037 p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); 1038 1039 (void) memset((char *)p->g->setbits + (nbytes - css), 0, css); 1040 } 1041 /* XXX should not happen */ 1042 if (p->g->sets == NULL || p->g->setbits == NULL) 1043 goto nomem; 1044 1045 cs = &p->g->sets[no]; 1046 cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); 1047 cs->mask = 1 << ((no) % CHAR_BIT); 1048 cs->hash = 0; 1049 cs->smultis = 0; 1050 cs->multis = NULL; 1051 1052 return(cs); 1053 nomem: 1054 free(p->g->sets); 1055 p->g->sets = NULL; 1056 free(p->g->setbits); 1057 p->g->setbits = NULL; 1058 1059 SETERROR(REG_ESPACE); 1060 /* caller's responsibility not to do set ops */ 1061 return(NULL); 1062 } 1063 1064 /* 1065 - freeset - free a now-unused set 1066 */ 1067 static void 1068 freeset(struct parse *p, cset *cs) 1069 { 1070 int i; 1071 cset *top = &p->g->sets[p->g->ncsets]; 1072 size_t css = (size_t)p->g->csetsize; 1073 1074 for (i = 0; i < css; i++) 1075 CHsub(cs, i); 1076 if (cs == top-1) /* recover only the easy case */ 1077 p->g->ncsets--; 1078 } 1079 1080 /* 1081 - freezeset - final processing on a set of characters 1082 * 1083 * The main task here is merging identical sets. This is usually a waste 1084 * of time (although the hash code minimizes the overhead), but can win 1085 * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash 1086 * is done using addition rather than xor -- all ASCII [aA] sets xor to 1087 * the same value! 1088 */ 1089 static int /* set number */ 1090 freezeset(struct parse *p, cset *cs) 1091 { 1092 uch h = cs->hash; 1093 int i; 1094 cset *top = &p->g->sets[p->g->ncsets]; 1095 cset *cs2; 1096 size_t css = (size_t)p->g->csetsize; 1097 1098 /* look for an earlier one which is the same */ 1099 for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) 1100 if (cs2->hash == h && cs2 != cs) { 1101 /* maybe */ 1102 for (i = 0; i < css; i++) 1103 if (!!CHIN(cs2, i) != !!CHIN(cs, i)) 1104 break; /* no */ 1105 if (i == css) 1106 break; /* yes */ 1107 } 1108 1109 if (cs2 < top) { /* found one */ 1110 freeset(p, cs); 1111 cs = cs2; 1112 } 1113 1114 return((int)(cs - p->g->sets)); 1115 } 1116 1117 /* 1118 - firstch - return first character in a set (which must have at least one) 1119 */ 1120 static int /* character; there is no "none" value */ 1121 firstch(struct parse *p, cset *cs) 1122 { 1123 int i; 1124 size_t css = (size_t)p->g->csetsize; 1125 1126 for (i = 0; i < css; i++) 1127 if (CHIN(cs, i)) 1128 return((char)i); 1129 assert(never); 1130 return(0); /* arbitrary */ 1131 } 1132 1133 /* 1134 - nch - number of characters in a set 1135 */ 1136 static int 1137 nch(struct parse *p, cset *cs) 1138 { 1139 int i; 1140 size_t css = (size_t)p->g->csetsize; 1141 int n = 0; 1142 1143 for (i = 0; i < css; i++) 1144 if (CHIN(cs, i)) 1145 n++; 1146 return(n); 1147 } 1148 1149 /* 1150 - mcadd - add a collating element to a cset 1151 */ 1152 static void 1153 mcadd( struct parse *p, cset *cs, char *cp) 1154 { 1155 size_t oldend = cs->smultis; 1156 void *np; 1157 1158 cs->smultis += strlen(cp) + 1; 1159 np = realloc(cs->multis, cs->smultis); 1160 if (np == NULL) { 1161 if (cs->multis) 1162 free(cs->multis); 1163 cs->multis = NULL; 1164 SETERROR(REG_ESPACE); 1165 return; 1166 } 1167 cs->multis = np; 1168 1169 strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1); 1170 } 1171 1172 /* 1173 - mcinvert - invert the list of collating elements in a cset 1174 * 1175 * This would have to know the set of possibilities. Implementation 1176 * is deferred. 1177 */ 1178 /* ARGSUSED */ 1179 static void 1180 mcinvert(struct parse *p, cset *cs) 1181 { 1182 assert(cs->multis == NULL); /* xxx */ 1183 } 1184 1185 /* 1186 - mccase - add case counterparts of the list of collating elements in a cset 1187 * 1188 * This would have to know the set of possibilities. Implementation 1189 * is deferred. 1190 */ 1191 /* ARGSUSED */ 1192 static void 1193 mccase(struct parse *p, cset *cs) 1194 { 1195 assert(cs->multis == NULL); /* xxx */ 1196 } 1197 1198 /* 1199 - isinsets - is this character in any sets? 1200 */ 1201 static int /* predicate */ 1202 isinsets(struct re_guts *g, int c) 1203 { 1204 uch *col; 1205 int i; 1206 int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 1207 unsigned uc = (uch)c; 1208 1209 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 1210 if (col[uc] != 0) 1211 return(1); 1212 return(0); 1213 } 1214 1215 /* 1216 - samesets - are these two characters in exactly the same sets? 1217 */ 1218 static int /* predicate */ 1219 samesets(struct re_guts *g, int c1, int c2) 1220 { 1221 uch *col; 1222 int i; 1223 int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 1224 unsigned uc1 = (uch)c1; 1225 unsigned uc2 = (uch)c2; 1226 1227 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 1228 if (col[uc1] != col[uc2]) 1229 return(0); 1230 return(1); 1231 } 1232 1233 /* 1234 - categorize - sort out character categories 1235 */ 1236 static void 1237 categorize(struct parse *p, struct re_guts *g) 1238 { 1239 cat_t *cats = g->categories; 1240 int c; 1241 int c2; 1242 cat_t cat; 1243 1244 /* avoid making error situations worse */ 1245 if (p->error != 0) 1246 return; 1247 1248 for (c = CHAR_MIN; c <= CHAR_MAX; c++) 1249 if (cats[c] == 0 && isinsets(g, c)) { 1250 cat = g->ncategories++; 1251 cats[c] = cat; 1252 for (c2 = c+1; c2 <= CHAR_MAX; c2++) 1253 if (cats[c2] == 0 && samesets(g, c, c2)) 1254 cats[c2] = cat; 1255 } 1256 } 1257 1258 /* 1259 - dupl - emit a duplicate of a bunch of sops 1260 */ 1261 static sopno /* start of duplicate */ 1262 dupl(struct parse *p, 1263 sopno start, /* from here */ 1264 sopno finish) /* to this less one */ 1265 { 1266 sopno ret = HERE(); 1267 sopno len = finish - start; 1268 1269 assert(finish >= start); 1270 if (len == 0) 1271 return(ret); 1272 if (!enlarge(p, p->ssize + len)) /* this many unexpected additions */ 1273 return(ret); 1274 (void) memcpy((char *)(p->strip + p->slen), 1275 (char *)(p->strip + start), (size_t)len*sizeof(sop)); 1276 p->slen += len; 1277 return(ret); 1278 } 1279 1280 /* 1281 - doemit - emit a strip operator 1282 * 1283 * It might seem better to implement this as a macro with a function as 1284 * hard-case backup, but it's just too big and messy unless there are 1285 * some changes to the data structures. Maybe later. 1286 */ 1287 static void 1288 doemit(struct parse *p, sop op, size_t opnd) 1289 { 1290 /* avoid making error situations worse */ 1291 if (p->error != 0) 1292 return; 1293 1294 /* deal with oversize operands ("can't happen", more or less) */ 1295 assert(opnd < 1<<OPSHIFT); 1296 1297 /* deal with undersized strip */ 1298 if (p->slen >= p->ssize) 1299 if (!enlarge(p, (p->ssize+1) / 2 * 3)) /* +50% */ 1300 return; 1301 1302 /* finally, it's all reduced to the easy case */ 1303 p->strip[p->slen++] = SOP(op, opnd); 1304 } 1305 1306 /* 1307 - doinsert - insert a sop into the strip 1308 */ 1309 static void 1310 doinsert(struct parse *p, sop op, size_t opnd, sopno pos) 1311 { 1312 sopno sn; 1313 sop s; 1314 int i; 1315 1316 /* avoid making error situations worse */ 1317 if (p->error != 0) 1318 return; 1319 1320 sn = HERE(); 1321 EMIT(op, opnd); /* do checks, ensure space */ 1322 assert(HERE() == sn+1); 1323 s = p->strip[sn]; 1324 1325 /* adjust paren pointers */ 1326 assert(pos > 0); 1327 for (i = 1; i < NPAREN; i++) { 1328 if (p->pbegin[i] >= pos) { 1329 p->pbegin[i]++; 1330 } 1331 if (p->pend[i] >= pos) { 1332 p->pend[i]++; 1333 } 1334 } 1335 1336 memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], 1337 (HERE()-pos-1)*sizeof(sop)); 1338 p->strip[pos] = s; 1339 } 1340 1341 /* 1342 - dofwd - complete a forward reference 1343 */ 1344 static void 1345 dofwd(struct parse *p, sopno pos, sop value) 1346 { 1347 /* avoid making error situations worse */ 1348 if (p->error != 0) 1349 return; 1350 1351 assert(value < 1<<OPSHIFT); 1352 p->strip[pos] = OP(p->strip[pos]) | value; 1353 } 1354 1355 /* 1356 - enlarge - enlarge the strip 1357 */ 1358 static int 1359 enlarge(struct parse *p, sopno size) 1360 { 1361 sop *sp; 1362 1363 if (p->ssize >= size) 1364 return 1; 1365 1366 sp = (sop *)realloc(p->strip, size*sizeof(sop)); 1367 if (sp == NULL) { 1368 SETERROR(REG_ESPACE); 1369 return 0; 1370 } 1371 p->strip = sp; 1372 p->ssize = size; 1373 return 1; 1374 } 1375 1376 /* 1377 - stripsnug - compact the strip 1378 */ 1379 static void 1380 stripsnug(struct parse *p, struct re_guts *g) 1381 { 1382 g->nstates = p->slen; 1383 g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop)); 1384 if (g->strip == NULL) { 1385 SETERROR(REG_ESPACE); 1386 g->strip = p->strip; 1387 } 1388 } 1389 1390 /* 1391 - findmust - fill in must and mlen with longest mandatory literal string 1392 * 1393 * This algorithm could do fancy things like analyzing the operands of | 1394 * for common subsequences. Someday. This code is simple and finds most 1395 * of the interesting cases. 1396 * 1397 * Note that must and mlen got initialized during setup. 1398 */ 1399 static void 1400 findmust(struct parse *p, struct re_guts *g) 1401 { 1402 sop *scan; 1403 sop *start; /* start initialized in the default case, after that */ 1404 sop *newstart; /* newstart was initialized in the OCHAR case */ 1405 sopno newlen; 1406 sop s; 1407 char *cp; 1408 sopno i; 1409 1410 /* avoid making error situations worse */ 1411 if (p->error != 0) 1412 return; 1413 1414 /* find the longest OCHAR sequence in strip */ 1415 newlen = 0; 1416 scan = g->strip + 1; 1417 do { 1418 s = *scan++; 1419 switch (OP(s)) { 1420 case OCHAR: /* sequence member */ 1421 if (newlen == 0) /* new sequence */ 1422 newstart = scan - 1; 1423 newlen++; 1424 break; 1425 case OPLUS_: /* things that don't break one */ 1426 case OLPAREN: 1427 case ORPAREN: 1428 break; 1429 case OQUEST_: /* things that must be skipped */ 1430 case OCH_: 1431 scan--; 1432 do { 1433 scan += OPND(s); 1434 s = *scan; 1435 /* assert() interferes w debug printouts */ 1436 if (OP(s) != O_QUEST && OP(s) != O_CH && 1437 OP(s) != OOR2) { 1438 g->iflags |= BAD; 1439 return; 1440 } 1441 } while (OP(s) != O_QUEST && OP(s) != O_CH); 1442 /* fallthrough */ 1443 default: /* things that break a sequence */ 1444 if (newlen > g->mlen) { /* ends one */ 1445 start = newstart; 1446 g->mlen = newlen; 1447 } 1448 newlen = 0; 1449 break; 1450 } 1451 } while (OP(s) != OEND); 1452 1453 if (g->mlen == 0) /* there isn't one */ 1454 return; 1455 1456 /* turn it into a character string */ 1457 g->must = malloc((size_t)g->mlen + 1); 1458 if (g->must == NULL) { /* argh; just forget it */ 1459 g->mlen = 0; 1460 return; 1461 } 1462 cp = g->must; 1463 scan = start; 1464 for (i = g->mlen; i > 0; i--) { 1465 while (OP(s = *scan++) != OCHAR) 1466 continue; 1467 assert(cp < g->must + g->mlen); 1468 *cp++ = (char)OPND(s); 1469 } 1470 assert(cp == g->must + g->mlen); 1471 *cp++ = '\0'; /* just on general principles */ 1472 } 1473 1474 /* 1475 - pluscount - count + nesting 1476 */ 1477 static sopno /* nesting depth */ 1478 pluscount(struct parse *p, struct re_guts *g) 1479 { 1480 sop *scan; 1481 sop s; 1482 sopno plusnest = 0; 1483 sopno maxnest = 0; 1484 1485 if (p->error != 0) 1486 return(0); /* there may not be an OEND */ 1487 1488 scan = g->strip + 1; 1489 do { 1490 s = *scan++; 1491 switch (OP(s)) { 1492 case OPLUS_: 1493 plusnest++; 1494 break; 1495 case O_PLUS: 1496 if (plusnest > maxnest) 1497 maxnest = plusnest; 1498 plusnest--; 1499 break; 1500 } 1501 } while (OP(s) != OEND); 1502 if (plusnest != 0) 1503 g->iflags |= BAD; 1504 return(maxnest); 1505 } 1506