1 #include <sys/types.h> 2 #include <stdio.h> 3 #include <string.h> 4 #include <ctype.h> 5 #include <limits.h> 6 #include <stdlib.h> 7 #include <regex.h> 8 9 #include "utils.h" 10 #include "regex2.h" 11 12 #include "cclass.h" 13 #include "cname.h" 14 15 /* 16 * parse structure, passed up and down to avoid global variables and 17 * other clumsinesses 18 */ 19 struct parse { 20 char *next; /* next character in RE */ 21 char *end; /* end of string (-> NUL normally) */ 22 int error; /* has an error been seen? */ 23 sop *strip; /* malloced strip */ 24 sopno ssize; /* malloced strip size (allocated) */ 25 sopno slen; /* malloced strip length (used) */ 26 int ncsalloc; /* number of csets allocated */ 27 struct re_guts *g; 28 # define NPAREN 10 /* we need to remember () 1-9 for back refs */ 29 sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ 30 sopno pend[NPAREN]; /* -> ) ([0] unused) */ 31 }; 32 33 #include "regcomp.ih" 34 35 static char nuls[10]; /* place to point scanner in event of error */ 36 37 /* 38 * macros for use with parse structure 39 * BEWARE: these know that the parse structure is named `p' !!! 40 */ 41 #define PEEK() (*p->next) 42 #define PEEK2() (*(p->next+1)) 43 #define MORE() (p->next < p->end) 44 #define MORE2() (p->next+1 < p->end) 45 #define SEE(c) (MORE() && PEEK() == (c)) 46 #define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) 47 #define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) 48 #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) 49 #define NEXT() (p->next++) 50 #define NEXT2() (p->next += 2) 51 #define NEXTn(n) (p->next += (n)) 52 #define GETNEXT() (*p->next++) 53 #define SETERROR(e) seterr(p, (e)) 54 #define REQUIRE(co, e) ((co) || SETERROR(e)) 55 #define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) 56 #define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) 57 #define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) 58 #define EMIT(sop, sopnd) doemit(p, sop, (size_t)(sopnd)) 59 #define INSERT(sop, pos) doinsert(p, sop, HERE()-(pos)+1, pos) 60 #define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) 61 #define ASTERN(sop, pos) EMIT(sop, HERE()-pos) 62 #define HERE() (p->slen) 63 #define THERE() (p->slen - 1) 64 #define DROP(n) (p->slen -= (n)) 65 66 #ifndef NDEBUG 67 static int never = 0; /* for use in asserts; shuts lint up */ 68 #endif 69 70 /* 71 - regcomp - interface for parser and compilation 72 = extern int regcomp(regex_t *, const char *, int); 73 = #define REG_BASIC 0000 74 = #define REG_EXTENDED 0001 75 = #define REG_ICASE 0002 76 = #define REG_NOSUB 0004 77 = #define REG_NEWLINE 0010 78 = #define REG_NOSPEC 0020 79 = #define REG_PEND 0040 80 = #define REG_DUMP 0200 81 */ 82 int /* 0 success, otherwise REG_something */ 83 regcomp(preg, pattern, cflags) 84 regex_t *preg; 85 const char *pattern; 86 int cflags; 87 { 88 struct parse pa; 89 register struct re_guts *g; 90 register struct parse *p = &pa; 91 register int i; 92 register size_t len; 93 94 if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) 95 return(REG_INVARG); 96 97 if (cflags®_PEND) { 98 if (preg->re_endp < pattern) 99 return(REG_INVARG); 100 len = preg->re_endp - pattern; 101 } else 102 len = strlen((char *)pattern); 103 104 /* do the mallocs early so failure handling is easy */ 105 g = (struct re_guts *)malloc(sizeof(struct re_guts) + 106 (NC-1)*sizeof(cat_t)); 107 if (g == NULL) 108 return(REG_ESPACE); 109 p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ 110 p->strip = (sop *)malloc(p->ssize * sizeof(sop)); 111 p->slen = 0; 112 if (p->strip == NULL) { 113 free((char *)g); 114 return(REG_ESPACE); 115 } 116 117 /* set things up */ 118 p->g = g; 119 p->next = (char *)pattern; /* convenience; we do not modify it */ 120 p->end = p->next + len; 121 p->error = 0; 122 p->ncsalloc = 0; 123 for (i = 0; i < NPAREN; i++) { 124 p->pbegin[i] = 0; 125 p->pend[i] = 0; 126 } 127 g->csetsize = NC; 128 g->sets = NULL; 129 g->setbits = NULL; 130 g->ncsets = 0; 131 g->cflags = cflags; 132 g->iflags = 0; 133 g->nbol = 0; 134 g->neol = 0; 135 g->must = NULL; 136 g->mlen = 0; 137 g->nsub = 0; 138 g->ncategories = 1; /* category 0 is "everything else" */ 139 g->categories = &g->catspace[-(CHAR_MIN)]; 140 (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t)); 141 g->backrefs = 0; 142 143 /* do it */ 144 EMIT(OEND, 0); 145 g->firststate = THERE(); 146 if (cflags®_EXTENDED) 147 p_ere(p, OUT); 148 else if (cflags®_NOSPEC) 149 p_str(p); 150 else 151 p_bre(p, OUT, OUT); 152 EMIT(OEND, 0); 153 g->laststate = THERE(); 154 155 /* tidy up loose ends and fill things in */ 156 categorize(p, g); 157 stripsnug(p, g); 158 findmust(p, g); 159 g->nplus = pluscount(p, g); 160 g->magic = MAGIC2; 161 preg->re_nsub = g->nsub; 162 preg->re_g = g; 163 preg->re_magic = MAGIC1; 164 #ifndef REDEBUG 165 /* not debugging, so can't rely on the assert() in regexec() */ 166 if (g->iflags&BAD) 167 SETERROR(REG_ASSERT); 168 #endif 169 170 /* win or lose, we're done */ 171 if (p->error != 0) /* lose */ 172 regfree(preg); 173 return(p->error); 174 } 175 176 /* 177 - p_ere - ERE parser top level, concatenation and alternation 178 == static void p_ere(register struct parse *p, int stop); 179 */ 180 static void 181 p_ere(p, stop) 182 register struct parse *p; 183 int stop; /* character this ERE should end at */ 184 { 185 register char c; 186 register sopno prevback; 187 register sopno prevfwd; 188 register sopno conc; 189 register int first = 1; /* is this the first alternative? */ 190 191 for (;;) { 192 /* do a bunch of concatenated expressions */ 193 conc = HERE(); 194 while (MORE() && (c = PEEK()) != '|' && c != stop) 195 p_ere_exp(p); 196 REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ 197 198 if (!EAT('|')) 199 break; /* NOTE BREAK OUT */ 200 201 if (first) { 202 INSERT(OCH_, conc); /* offset is wrong */ 203 prevfwd = conc; 204 prevback = conc; 205 first = 0; 206 } 207 ASTERN(OOR1, prevback); 208 prevback = THERE(); 209 AHEAD(prevfwd); /* fix previous offset */ 210 prevfwd = HERE(); 211 EMIT(OOR2, 0); /* offset is very wrong */ 212 } 213 214 if (!first) { /* tail-end fixups */ 215 AHEAD(prevfwd); 216 ASTERN(O_CH, prevback); 217 } 218 219 assert(!MORE() || SEE(stop)); 220 } 221 222 /* 223 - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op 224 == static void p_ere_exp(register struct parse *p); 225 */ 226 static void 227 p_ere_exp(p) 228 register struct parse *p; 229 { 230 register char c; 231 register sopno pos; 232 register int count; 233 register int count2; 234 register sopno subno; 235 int wascaret = 0; 236 237 assert(MORE()); /* caller should have ensured this */ 238 c = GETNEXT(); 239 240 pos = HERE(); 241 switch (c) { 242 case '(': 243 REQUIRE(MORE(), REG_EPAREN); 244 p->g->nsub++; 245 subno = p->g->nsub; 246 if (subno < NPAREN) 247 p->pbegin[subno] = HERE(); 248 EMIT(OLPAREN, subno); 249 if (!SEE(')')) 250 p_ere(p, ')'); 251 if (subno < NPAREN) { 252 p->pend[subno] = HERE(); 253 assert(p->pend[subno] != 0); 254 } 255 EMIT(ORPAREN, subno); 256 MUSTEAT(')', REG_EPAREN); 257 break; 258 #ifndef POSIX_MISTAKE 259 case ')': /* happens only if no current unmatched ( */ 260 /* 261 * You may ask, why the ifndef? Because I didn't notice 262 * this until slightly too late for 1003.2, and none of the 263 * other 1003.2 regular-expression reviewers noticed it at 264 * all. So an unmatched ) is legal POSIX, at least until 265 * we can get it fixed. 266 */ 267 SETERROR(REG_EPAREN); 268 break; 269 #endif 270 case '^': 271 EMIT(OBOL, 0); 272 p->g->iflags |= USEBOL; 273 p->g->nbol++; 274 wascaret = 1; 275 break; 276 case '$': 277 EMIT(OEOL, 0); 278 p->g->iflags |= USEEOL; 279 p->g->neol++; 280 break; 281 case '|': 282 SETERROR(REG_EMPTY); 283 break; 284 case '*': 285 case '+': 286 case '?': 287 SETERROR(REG_BADRPT); 288 break; 289 case '.': 290 if (p->g->cflags®_NEWLINE) 291 nonnewline(p); 292 else 293 EMIT(OANY, 0); 294 break; 295 case '[': 296 p_bracket(p); 297 break; 298 case '\\': 299 REQUIRE(MORE(), REG_EESCAPE); 300 c = GETNEXT(); 301 ordinary(p, c); 302 break; 303 case '{': /* okay as ordinary except if digit follows */ 304 REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT); 305 /* FALLTHROUGH */ 306 default: 307 ordinary(p, c); 308 break; 309 } 310 311 if (!MORE()) 312 return; 313 c = PEEK(); 314 /* we call { a repetition if followed by a digit */ 315 if (!( c == '*' || c == '+' || c == '?' || 316 (c == '{' && MORE2() && isdigit(PEEK2())) )) 317 return; /* no repetition, we're done */ 318 NEXT(); 319 320 REQUIRE(!wascaret, REG_BADRPT); 321 switch (c) { 322 case '*': /* implemented as +? */ 323 INSERT(OPLUS_, pos); 324 ASTERN(O_PLUS, pos); 325 INSERT(OQUEST_, pos); 326 ASTERN(O_QUEST, pos); 327 break; 328 case '+': 329 INSERT(OPLUS_, pos); 330 ASTERN(O_PLUS, pos); 331 break; 332 case '?': 333 INSERT(OQUEST_, pos); 334 ASTERN(O_QUEST, pos); 335 break; 336 case '{': 337 count = p_count(p); 338 if (EAT(',')) { 339 if (isdigit(PEEK())) { 340 count2 = p_count(p); 341 REQUIRE(count <= count2, REG_BADBR); 342 } else /* single number with comma */ 343 count2 = INFINITY; 344 } else /* just a single number */ 345 count2 = count; 346 repeat(p, pos, count, count2); 347 if (!EAT('}')) { /* error heuristics */ 348 while (MORE() && PEEK() != '}') 349 NEXT(); 350 REQUIRE(MORE(), REG_EBRACE); 351 SETERROR(REG_BADBR); 352 } 353 break; 354 } 355 356 if (!MORE()) 357 return; 358 c = PEEK(); 359 if (!( c == '*' || c == '+' || c == '?' || 360 (c == '{' && MORE2() && isdigit(PEEK2())) ) ) 361 return; 362 SETERROR(REG_BADRPT); 363 } 364 365 /* 366 - p_str - string (no metacharacters) "parser" 367 == static void p_str(register struct parse *p); 368 */ 369 static void 370 p_str(p) 371 register struct parse *p; 372 { 373 REQUIRE(MORE(), REG_EMPTY); 374 while (MORE()) 375 ordinary(p, GETNEXT()); 376 } 377 378 /* 379 - p_bre - BRE parser top level, anchoring and concatenation 380 == static void p_bre(register struct parse *p, register int end1, \ 381 == register int end2); 382 * Giving end1 as OUT essentially eliminates the end1/end2 check. 383 * 384 * This implementation is a bit of a kludge, in that a trailing $ is first 385 * taken as an ordinary character and then revised to be an anchor. The 386 * only undesirable side effect is that '$' gets included as a character 387 * category in such cases. This is fairly harmless; not worth fixing. 388 * The amount of lookahead needed to avoid this kludge is excessive. 389 */ 390 static void 391 p_bre(p, end1, end2) 392 register struct parse *p; 393 register int end1; /* first terminating character */ 394 register int end2; /* second terminating character */ 395 { 396 register sopno start = HERE(); 397 register int first = 1; /* first subexpression? */ 398 register int wasdollar = 0; 399 400 if (EAT('^')) { 401 EMIT(OBOL, 0); 402 p->g->iflags |= USEBOL; 403 p->g->nbol++; 404 } 405 while (MORE() && !SEETWO(end1, end2)) { 406 wasdollar = p_simp_re(p, first); 407 first = 0; 408 } 409 if (wasdollar) { /* oops, that was a trailing anchor */ 410 DROP(1); 411 EMIT(OEOL, 0); 412 p->g->iflags |= USEEOL; 413 p->g->neol++; 414 } 415 416 REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ 417 } 418 419 /* 420 - p_simp_re - parse a simple RE, an atom possibly followed by a repetition 421 == static int p_simp_re(register struct parse *p, int starordinary); 422 */ 423 static int /* was the simple RE an unbackslashed $? */ 424 p_simp_re(p, starordinary) 425 register struct parse *p; 426 int starordinary; /* is a leading * an ordinary character? */ 427 { 428 register int c; 429 register int count; 430 register int count2; 431 register sopno pos; 432 register int i; 433 register sopno subno; 434 # define BACKSL (1<<CHAR_BIT) 435 436 pos = HERE(); /* repetion op, if any, covers from here */ 437 438 assert(MORE()); /* caller should have ensured this */ 439 c = GETNEXT(); 440 if (c == '\\') { 441 REQUIRE(MORE(), REG_EESCAPE); 442 c = BACKSL | (unsigned char)GETNEXT(); 443 } 444 switch (c) { 445 case '.': 446 if (p->g->cflags®_NEWLINE) 447 nonnewline(p); 448 else 449 EMIT(OANY, 0); 450 break; 451 case '[': 452 p_bracket(p); 453 break; 454 case BACKSL|'{': 455 SETERROR(REG_BADRPT); 456 break; 457 case BACKSL|'(': 458 p->g->nsub++; 459 subno = p->g->nsub; 460 if (subno < NPAREN) 461 p->pbegin[subno] = HERE(); 462 EMIT(OLPAREN, subno); 463 /* the MORE here is an error heuristic */ 464 if (MORE() && !SEETWO('\\', ')')) 465 p_bre(p, '\\', ')'); 466 if (subno < NPAREN) { 467 p->pend[subno] = HERE(); 468 assert(p->pend[subno] != 0); 469 } 470 EMIT(ORPAREN, subno); 471 REQUIRE(EATTWO('\\', ')'), REG_EPAREN); 472 break; 473 case BACKSL|')': /* should not get here -- must be user */ 474 case BACKSL|'}': 475 SETERROR(REG_EPAREN); 476 break; 477 case BACKSL|'1': 478 case BACKSL|'2': 479 case BACKSL|'3': 480 case BACKSL|'4': 481 case BACKSL|'5': 482 case BACKSL|'6': 483 case BACKSL|'7': 484 case BACKSL|'8': 485 case BACKSL|'9': 486 i = (c&~BACKSL) - '0'; 487 assert(i < NPAREN); 488 if (p->pend[i] != 0) { 489 assert(i <= p->g->nsub); 490 EMIT(OBACK_, i); 491 assert(p->pbegin[i] != 0); 492 assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); 493 assert(OP(p->strip[p->pend[i]]) == ORPAREN); 494 (void) dupl(p, p->pbegin[i]+1, p->pend[i]); 495 EMIT(O_BACK, i); 496 } else 497 SETERROR(REG_ESUBREG); 498 p->g->backrefs = 1; 499 break; 500 case '*': 501 REQUIRE(starordinary, REG_BADRPT); 502 /* FALLTHROUGH */ 503 default: 504 ordinary(p, c &~ BACKSL); 505 break; 506 } 507 508 if (EAT('*')) { /* implemented as +? */ 509 INSERT(OPLUS_, pos); 510 ASTERN(O_PLUS, pos); 511 INSERT(OQUEST_, pos); 512 ASTERN(O_QUEST, pos); 513 } else if (EATTWO('\\', '{')) { 514 count = p_count(p); 515 if (EAT(',')) { 516 if (MORE() && isdigit(PEEK())) { 517 count2 = p_count(p); 518 REQUIRE(count <= count2, REG_BADBR); 519 } else /* single number with comma */ 520 count2 = INFINITY; 521 } else /* just a single number */ 522 count2 = count; 523 repeat(p, pos, count, count2); 524 if (!EATTWO('\\', '}')) { /* error heuristics */ 525 while (MORE() && !SEETWO('\\', '}')) 526 NEXT(); 527 REQUIRE(MORE(), REG_EBRACE); 528 SETERROR(REG_BADBR); 529 } 530 } else if (c == (unsigned char)'$') /* $ (but not \$) ends it */ 531 return(1); 532 533 return(0); 534 } 535 536 /* 537 - p_count - parse a repetition count 538 == static int p_count(register struct parse *p); 539 */ 540 static int /* the value */ 541 p_count(p) 542 register struct parse *p; 543 { 544 register int count = 0; 545 register int ndigits = 0; 546 547 while (MORE() && isdigit(PEEK()) && count <= DUPMAX) { 548 count = count*10 + (GETNEXT() - '0'); 549 ndigits++; 550 } 551 552 REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); 553 return(count); 554 } 555 556 /* 557 - p_bracket - parse a bracketed character list 558 == static void p_bracket(register struct parse *p); 559 * 560 * Note a significant property of this code: if the allocset() did SETERROR, 561 * no set operations are done. 562 */ 563 static void 564 p_bracket(p) 565 register struct parse *p; 566 { 567 register char c; 568 register cset *cs = allocset(p); 569 register int invert = 0; 570 571 /* Dept of Truly Sickening Special-Case Kludges */ 572 if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { 573 EMIT(OBOW, 0); 574 NEXTn(6); 575 return; 576 } 577 if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { 578 EMIT(OEOW, 0); 579 NEXTn(6); 580 return; 581 } 582 583 if (EAT('^')) 584 invert++; /* make note to invert set at end */ 585 if (EAT(']')) 586 CHadd(cs, ']'); 587 else if (EAT('-')) 588 CHadd(cs, '-'); 589 while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) 590 p_b_term(p, cs); 591 if (EAT('-')) 592 CHadd(cs, '-'); 593 MUSTEAT(']', REG_EBRACK); 594 595 if (p->error != 0) /* don't mess things up further */ 596 return; 597 598 if (p->g->cflags®_ICASE) { 599 register int i; 600 register int ci; 601 602 for (i = p->g->csetsize - 1; i >= 0; i--) 603 if (CHIN(cs, i) && isalpha(i)) { 604 ci = othercase(i); 605 if (ci != i) 606 CHadd(cs, ci); 607 } 608 if (cs->multis != NULL) 609 mccase(p, cs); 610 } 611 if (invert) { 612 register int i; 613 614 for (i = p->g->csetsize - 1; i >= 0; i--) 615 if (CHIN(cs, i)) 616 CHsub(cs, i); 617 else 618 CHadd(cs, i); 619 if (p->g->cflags®_NEWLINE) 620 CHsub(cs, '\n'); 621 if (cs->multis != NULL) 622 mcinvert(p, cs); 623 } 624 625 assert(cs->multis == NULL); /* xxx */ 626 627 if (nch(p, cs) == 1) { /* optimize singleton sets */ 628 ordinary(p, firstch(p, cs)); 629 freeset(p, cs); 630 } else 631 EMIT(OANYOF, freezeset(p, cs)); 632 } 633 634 /* 635 - p_b_term - parse one term of a bracketed character list 636 == static void p_b_term(register struct parse *p, register cset *cs); 637 */ 638 static void 639 p_b_term(p, cs) 640 register struct parse *p; 641 register cset *cs; 642 { 643 register char c; 644 register char start, finish; 645 register int i; 646 647 /* classify what we've got */ 648 switch ((MORE()) ? PEEK() : '\0') { 649 case '[': 650 c = (MORE2()) ? PEEK2() : '\0'; 651 break; 652 case '-': 653 SETERROR(REG_ERANGE); 654 return; /* NOTE RETURN */ 655 break; 656 default: 657 c = '\0'; 658 break; 659 } 660 661 switch (c) { 662 case ':': /* character class */ 663 NEXT2(); 664 REQUIRE(MORE(), REG_EBRACK); 665 c = PEEK(); 666 REQUIRE(c != '-' && c != ']', REG_ECTYPE); 667 p_b_cclass(p, cs); 668 REQUIRE(MORE(), REG_EBRACK); 669 REQUIRE(EATTWO(':', ']'), REG_ECTYPE); 670 break; 671 case '=': /* equivalence class */ 672 NEXT2(); 673 REQUIRE(MORE(), REG_EBRACK); 674 c = PEEK(); 675 REQUIRE(c != '-' && c != ']', REG_ECOLLATE); 676 p_b_eclass(p, cs); 677 REQUIRE(MORE(), REG_EBRACK); 678 REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); 679 break; 680 default: /* symbol, ordinary character, or range */ 681 /* xxx revision needed for multichar stuff */ 682 start = p_b_symbol(p); 683 if (SEE('-') && MORE2() && PEEK2() != ']') { 684 /* range */ 685 NEXT(); 686 if (EAT('-')) 687 finish = '-'; 688 else 689 finish = p_b_symbol(p); 690 } else 691 finish = start; 692 /* xxx what about signed chars here... */ 693 REQUIRE(start <= finish, REG_ERANGE); 694 for (i = start; i <= finish; i++) 695 CHadd(cs, i); 696 break; 697 } 698 } 699 700 /* 701 - p_b_cclass - parse a character-class name and deal with it 702 == static void p_b_cclass(register struct parse *p, register cset *cs); 703 */ 704 static void 705 p_b_cclass(p, cs) 706 register struct parse *p; 707 register cset *cs; 708 { 709 register char *sp = p->next; 710 register struct cclass *cp; 711 register size_t len; 712 register char *u; 713 register char c; 714 715 while (MORE() && isalpha(PEEK())) 716 NEXT(); 717 len = p->next - sp; 718 for (cp = cclasses; cp->name != NULL; cp++) 719 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') 720 break; 721 if (cp->name == NULL) { 722 /* oops, didn't find it */ 723 SETERROR(REG_ECTYPE); 724 return; 725 } 726 727 u = cp->chars; 728 while ((c = *u++) != '\0') 729 CHadd(cs, c); 730 for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) 731 MCadd(p, cs, u); 732 } 733 734 /* 735 - p_b_eclass - parse an equivalence-class name and deal with it 736 == static void p_b_eclass(register struct parse *p, register cset *cs); 737 * 738 * This implementation is incomplete. xxx 739 */ 740 static void 741 p_b_eclass(p, cs) 742 register struct parse *p; 743 register cset *cs; 744 { 745 register char c; 746 747 c = p_b_coll_elem(p, '='); 748 CHadd(cs, c); 749 } 750 751 /* 752 - p_b_symbol - parse a character or [..]ed multicharacter collating symbol 753 == static char p_b_symbol(register struct parse *p); 754 */ 755 static char /* value of symbol */ 756 p_b_symbol(p) 757 register struct parse *p; 758 { 759 register char value; 760 761 REQUIRE(MORE(), REG_EBRACK); 762 if (!EATTWO('[', '.')) 763 return(GETNEXT()); 764 765 /* collating symbol */ 766 value = p_b_coll_elem(p, '.'); 767 REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); 768 return(value); 769 } 770 771 /* 772 - p_b_coll_elem - parse a collating-element name and look it up 773 == static char p_b_coll_elem(register struct parse *p, int endc); 774 */ 775 static char /* value of collating element */ 776 p_b_coll_elem(p, endc) 777 register struct parse *p; 778 int endc; /* name ended by endc,']' */ 779 { 780 register char *sp = p->next; 781 register struct cname *cp; 782 register int len; 783 register char c; 784 785 while (MORE() && !SEETWO(endc, ']')) 786 NEXT(); 787 if (!MORE()) { 788 SETERROR(REG_EBRACK); 789 return(0); 790 } 791 len = p->next - sp; 792 for (cp = cnames; cp->name != NULL; cp++) 793 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') 794 return(cp->code); /* known name */ 795 if (len == 1) 796 return(*sp); /* single character */ 797 SETERROR(REG_ECOLLATE); /* neither */ 798 return(0); 799 } 800 801 /* 802 - othercase - return the case counterpart of an alphabetic 803 == static char othercase(int ch); 804 */ 805 static char /* if no counterpart, return ch */ 806 othercase(ch) 807 int ch; 808 { 809 assert(isalpha(ch)); 810 if (isupper(ch)) 811 return(tolower(ch)); 812 else if (islower(ch)) 813 return(toupper(ch)); 814 else /* peculiar, but could happen */ 815 return(ch); 816 } 817 818 /* 819 - bothcases - emit a dualcase version of a two-case character 820 == static void bothcases(register struct parse *p, int ch); 821 * 822 * Boy, is this implementation ever a kludge... 823 */ 824 static void 825 bothcases(p, ch) 826 register struct parse *p; 827 int ch; 828 { 829 register char *oldnext = p->next; 830 register char *oldend = p->end; 831 char bracket[3]; 832 833 assert(othercase(ch) != ch); /* p_bracket() would recurse */ 834 p->next = bracket; 835 p->end = bracket+2; 836 bracket[0] = ch; 837 bracket[1] = ']'; 838 bracket[2] = '\0'; 839 p_bracket(p); 840 assert(p->next == bracket+2); 841 p->next = oldnext; 842 p->end = oldend; 843 } 844 845 /* 846 - ordinary - emit an ordinary character 847 == static void ordinary(register struct parse *p, register int ch); 848 */ 849 static void 850 ordinary(p, ch) 851 register struct parse *p; 852 register int ch; 853 { 854 register cat_t *cap = p->g->categories; 855 856 if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch) 857 bothcases(p, ch); 858 else { 859 EMIT(OCHAR, (unsigned char)ch); 860 if (cap[ch] == 0) 861 cap[ch] = p->g->ncategories++; 862 } 863 } 864 865 /* 866 - nonnewline - emit REG_NEWLINE version of OANY 867 == static void nonnewline(register struct parse *p); 868 * 869 * Boy, is this implementation ever a kludge... 870 */ 871 static void 872 nonnewline(p) 873 register struct parse *p; 874 { 875 register char *oldnext = p->next; 876 register char *oldend = p->end; 877 char bracket[4]; 878 879 p->next = bracket; 880 p->end = bracket+3; 881 bracket[0] = '^'; 882 bracket[1] = '\n'; 883 bracket[2] = ']'; 884 bracket[3] = '\0'; 885 p_bracket(p); 886 assert(p->next == bracket+3); 887 p->next = oldnext; 888 p->end = oldend; 889 } 890 891 /* 892 - repeat - generate code for a bounded repetition, recursively if needed 893 == static void repeat(register struct parse *p, sopno start, int from, int to); 894 */ 895 static void 896 repeat(p, start, from, to) 897 register struct parse *p; 898 sopno start; /* operand from here to end of strip */ 899 int from; /* repeated from this number */ 900 int to; /* to this number of times (maybe INFINITY) */ 901 { 902 register sopno finish = HERE(); 903 # define N 2 904 # define INF 3 905 # define REP(f, t) ((f)*8 + (t)) 906 # define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) 907 register sopno copy; 908 909 if (p->error != 0) /* head off possible runaway recursion */ 910 return; 911 912 assert(from <= to); 913 914 switch (REP(MAP(from), MAP(to))) { 915 case REP(0, 0): /* must be user doing this */ 916 DROP(finish-start); /* drop the operand */ 917 break; 918 case REP(0, 1): /* as x{1,1}? */ 919 case REP(0, N): /* as x{1,n}? */ 920 case REP(0, INF): /* as x{1,}? */ 921 INSERT(OQUEST_, start); /* offset is wrong... */ 922 repeat(p, start+1, 1, to); 923 AHEAD(start); /* ... fix it */ 924 ASTERN(O_QUEST, start); 925 break; 926 case REP(1, 1): /* trivial case */ 927 /* done */ 928 break; 929 case REP(1, N): /* as x?x{1,n-1} */ 930 INSERT(OQUEST_, start); 931 ASTERN(O_QUEST, start); 932 copy = dupl(p, start+1, finish+1); 933 assert(copy == finish+2); 934 repeat(p, copy, 1, to-1); 935 break; 936 case REP(1, INF): /* as x+ */ 937 INSERT(OPLUS_, start); 938 ASTERN(O_PLUS, start); 939 break; 940 case REP(N, N): /* as xx{m-1,n-1} */ 941 copy = dupl(p, start, finish); 942 repeat(p, copy, from-1, to-1); 943 break; 944 case REP(N, INF): /* as xx{n-1,INF} */ 945 copy = dupl(p, start, finish); 946 repeat(p, copy, from-1, to); 947 break; 948 default: /* "can't happen" */ 949 SETERROR(REG_ASSERT); /* just in case */ 950 break; 951 } 952 } 953 954 /* 955 - seterr - set an error condition 956 == static int seterr(register struct parse *p, int e); 957 */ 958 static int /* useless but makes type checking happy */ 959 seterr(p, e) 960 register struct parse *p; 961 int e; 962 { 963 if (p->error == 0) /* keep earliest error condition */ 964 p->error = e; 965 p->next = nuls; /* try to bring things to a halt */ 966 p->end = nuls; 967 return(0); /* make the return value well-defined */ 968 } 969 970 /* 971 - allocset - allocate a set of characters for [] 972 == static cset *allocset(register struct parse *p); 973 */ 974 static cset * 975 allocset(p) 976 register struct parse *p; 977 { 978 register int no = p->g->ncsets++; 979 register size_t nc; 980 register size_t nbytes; 981 register cset *cs; 982 register size_t css = (size_t)p->g->csetsize; 983 register int i; 984 985 if (no >= p->ncsalloc) { /* need another column of space */ 986 p->ncsalloc += CHAR_BIT; 987 nc = p->ncsalloc; 988 assert(nc % CHAR_BIT == 0); 989 nbytes = nc / CHAR_BIT * css; 990 if (p->g->sets == NULL) 991 p->g->sets = (cset *)malloc(nc * sizeof(cset)); 992 else 993 p->g->sets = (cset *)realloc((char *)p->g->sets, 994 nc * sizeof(cset)); 995 if (p->g->setbits == NULL) 996 p->g->setbits = (uch *)malloc(nbytes); 997 else { 998 p->g->setbits = (uch *)realloc((char *)p->g->setbits, 999 nbytes); 1000 /* xxx this isn't right if setbits is now NULL */ 1001 for (i = 0; i < no; i++) 1002 p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); 1003 } 1004 if (p->g->sets != NULL && p->g->setbits != NULL) 1005 (void) memset((char *)p->g->setbits + (nbytes - css), 1006 0, css); 1007 else { 1008 no = 0; 1009 SETERROR(REG_ESPACE); 1010 /* caller's responsibility not to do set ops */ 1011 } 1012 } 1013 1014 assert(p->g->sets != NULL); /* xxx */ 1015 cs = &p->g->sets[no]; 1016 cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); 1017 cs->mask = 1 << ((no) % CHAR_BIT); 1018 cs->hash = 0; 1019 cs->smultis = 0; 1020 cs->multis = NULL; 1021 1022 return(cs); 1023 } 1024 1025 /* 1026 - freeset - free a now-unused set 1027 == static void freeset(register struct parse *p, register cset *cs); 1028 */ 1029 static void 1030 freeset(p, cs) 1031 register struct parse *p; 1032 register cset *cs; 1033 { 1034 register int i; 1035 register cset *top = &p->g->sets[p->g->ncsets]; 1036 register size_t css = (size_t)p->g->csetsize; 1037 1038 for (i = 0; i < css; i++) 1039 CHsub(cs, i); 1040 if (cs == top-1) /* recover only the easy case */ 1041 p->g->ncsets--; 1042 } 1043 1044 /* 1045 - freezeset - final processing on a set of characters 1046 == static int freezeset(register struct parse *p, register cset *cs); 1047 * 1048 * The main task here is merging identical sets. This is usually a waste 1049 * of time (although the hash code minimizes the overhead), but can win 1050 * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash 1051 * is done using addition rather than xor -- all ASCII [aA] sets xor to 1052 * the same value! 1053 */ 1054 static int /* set number */ 1055 freezeset(p, cs) 1056 register struct parse *p; 1057 register cset *cs; 1058 { 1059 register uch h = cs->hash; 1060 register int i; 1061 register cset *top = &p->g->sets[p->g->ncsets]; 1062 register cset *cs2; 1063 register size_t css = (size_t)p->g->csetsize; 1064 1065 /* look for an earlier one which is the same */ 1066 for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) 1067 if (cs2->hash == h && cs2 != cs) { 1068 /* maybe */ 1069 for (i = 0; i < css; i++) 1070 if (!!CHIN(cs2, i) != !!CHIN(cs, i)) 1071 break; /* no */ 1072 if (i == css) 1073 break; /* yes */ 1074 } 1075 1076 if (cs2 < top) { /* found one */ 1077 freeset(p, cs); 1078 cs = cs2; 1079 } 1080 1081 return((int)(cs - p->g->sets)); 1082 } 1083 1084 /* 1085 - firstch - return first character in a set (which must have at least one) 1086 == static int firstch(register struct parse *p, register cset *cs); 1087 */ 1088 static int /* character; there is no "none" value */ 1089 firstch(p, cs) 1090 register struct parse *p; 1091 register cset *cs; 1092 { 1093 register int i; 1094 register size_t css = (size_t)p->g->csetsize; 1095 1096 for (i = 0; i < css; i++) 1097 if (CHIN(cs, i)) 1098 return((char)i); 1099 assert(never); 1100 return(0); /* arbitrary */ 1101 } 1102 1103 /* 1104 - nch - number of characters in a set 1105 == static int nch(register struct parse *p, register cset *cs); 1106 */ 1107 static int 1108 nch(p, cs) 1109 register struct parse *p; 1110 register cset *cs; 1111 { 1112 register int i; 1113 register size_t css = (size_t)p->g->csetsize; 1114 register int n = 0; 1115 1116 for (i = 0; i < css; i++) 1117 if (CHIN(cs, i)) 1118 n++; 1119 return(n); 1120 } 1121 1122 /* 1123 - mcadd - add a collating element to a cset 1124 == static void mcadd(register struct parse *p, register cset *cs, \ 1125 == register char *cp); 1126 */ 1127 static void 1128 mcadd(p, cs, cp) 1129 register struct parse *p; 1130 register cset *cs; 1131 register char *cp; 1132 { 1133 register size_t oldend = cs->smultis; 1134 1135 cs->smultis += strlen(cp) + 1; 1136 if (cs->multis == NULL) 1137 cs->multis = malloc(cs->smultis); 1138 else 1139 cs->multis = realloc(cs->multis, cs->smultis); 1140 if (cs->multis == NULL) { 1141 SETERROR(REG_ESPACE); 1142 return; 1143 } 1144 1145 (void) strcpy(cs->multis + oldend - 1, cp); 1146 cs->multis[cs->smultis - 1] = '\0'; 1147 } 1148 1149 /* 1150 - mcsub - subtract a collating element from a cset 1151 == static void mcsub(register cset *cs, register char *cp); 1152 */ 1153 static void 1154 mcsub(cs, cp) 1155 register cset *cs; 1156 register char *cp; 1157 { 1158 register char *fp = mcfind(cs, cp); 1159 register size_t len = strlen(fp); 1160 1161 assert(fp != NULL); 1162 (void) memmove(fp, fp + len + 1, 1163 cs->smultis - (fp + len + 1 - cs->multis)); 1164 cs->smultis -= len; 1165 1166 if (cs->smultis == 0) { 1167 free(cs->multis); 1168 cs->multis = NULL; 1169 return; 1170 } 1171 1172 cs->multis = realloc(cs->multis, cs->smultis); 1173 assert(cs->multis != NULL); 1174 } 1175 1176 /* 1177 - mcin - is a collating element in a cset? 1178 == static int mcin(register cset *cs, register char *cp); 1179 */ 1180 static int 1181 mcin(cs, cp) 1182 register cset *cs; 1183 register char *cp; 1184 { 1185 return(mcfind(cs, cp) != NULL); 1186 } 1187 1188 /* 1189 - mcfind - find a collating element in a cset 1190 == static char *mcfind(register cset *cs, register char *cp); 1191 */ 1192 static char * 1193 mcfind(cs, cp) 1194 register cset *cs; 1195 register char *cp; 1196 { 1197 register char *p; 1198 1199 if (cs->multis == NULL) 1200 return(NULL); 1201 for (p = cs->multis; *p != '\0'; p += strlen(p) + 1) 1202 if (strcmp(cp, p) == 0) 1203 return(p); 1204 return(NULL); 1205 } 1206 1207 /* 1208 - mcinvert - invert the list of collating elements in a cset 1209 == static void mcinvert(register struct parse *p, register cset *cs); 1210 * 1211 * This would have to know the set of possibilities. Implementation 1212 * is deferred. 1213 */ 1214 static void 1215 mcinvert(p, cs) 1216 register struct parse *p; 1217 register cset *cs; 1218 { 1219 assert(cs->multis == NULL); /* xxx */ 1220 } 1221 1222 /* 1223 - mccase - add case counterparts of the list of collating elements in a cset 1224 == static void mccase(register struct parse *p, register cset *cs); 1225 * 1226 * This would have to know the set of possibilities. Implementation 1227 * is deferred. 1228 */ 1229 static void 1230 mccase(p, cs) 1231 register struct parse *p; 1232 register cset *cs; 1233 { 1234 assert(cs->multis == NULL); /* xxx */ 1235 } 1236 1237 /* 1238 - isinsets - is this character in any sets? 1239 == static int isinsets(register struct re_guts *g, int c); 1240 */ 1241 static int /* predicate */ 1242 isinsets(g, c) 1243 register struct re_guts *g; 1244 int c; 1245 { 1246 register uch *col; 1247 register int i; 1248 register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 1249 register unsigned uc = (unsigned char)c; 1250 1251 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 1252 if (col[uc] != 0) 1253 return(1); 1254 return(0); 1255 } 1256 1257 /* 1258 - samesets - are these two characters in exactly the same sets? 1259 == static int samesets(register struct re_guts *g, int c1, int c2); 1260 */ 1261 static int /* predicate */ 1262 samesets(g, c1, c2) 1263 register struct re_guts *g; 1264 int c1; 1265 int c2; 1266 { 1267 register uch *col; 1268 register int i; 1269 register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 1270 register unsigned uc1 = (unsigned char)c1; 1271 register unsigned uc2 = (unsigned char)c2; 1272 1273 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 1274 if (col[uc1] != col[uc2]) 1275 return(0); 1276 return(1); 1277 } 1278 1279 /* 1280 - categorize - sort out character categories 1281 == static void categorize(struct parse *p, register struct re_guts *g); 1282 */ 1283 static void 1284 categorize(p, g) 1285 struct parse *p; 1286 register struct re_guts *g; 1287 { 1288 register cat_t *cats = g->categories; 1289 register int c; 1290 register int c2; 1291 register cat_t cat; 1292 1293 /* avoid making error situations worse */ 1294 if (p->error != 0) 1295 return; 1296 1297 for (c = CHAR_MIN; c <= CHAR_MAX; c++) 1298 if (cats[c] == 0 && isinsets(g, c)) { 1299 cat = g->ncategories++; 1300 cats[c] = cat; 1301 for (c2 = c+1; c2 <= CHAR_MAX; c2++) 1302 if (cats[c2] == 0 && samesets(g, c, c2)) 1303 cats[c2] = cat; 1304 } 1305 } 1306 1307 /* 1308 - dupl - emit a duplicate of a bunch of sops 1309 == static sopno dupl(register struct parse *p, sopno start, sopno finish); 1310 */ 1311 static sopno /* start of duplicate */ 1312 dupl(p, start, finish) 1313 register struct parse *p; 1314 sopno start; /* from here */ 1315 sopno finish; /* to this less one */ 1316 { 1317 register sopno ret = HERE(); 1318 register sopno len = finish - start; 1319 1320 assert(finish >= start); 1321 if (len == 0) 1322 return(ret); 1323 enlarge(p, p->ssize + len); /* this many unexpected additions */ 1324 assert(p->ssize >= p->slen + len); 1325 (void) memcpy((char *)(p->strip + p->slen), 1326 (char *)(p->strip + start), (size_t)len*sizeof(sop)); 1327 p->slen += len; 1328 return(ret); 1329 } 1330 1331 /* 1332 - doemit - emit a strip operator 1333 == static void doemit(register struct parse *p, sop op, size_t opnd); 1334 * 1335 * It might seem better to implement this as a macro with a function as 1336 * hard-case backup, but it's just too big and messy unless there are 1337 * some changes to the data structures. Maybe later. 1338 */ 1339 static void 1340 doemit(p, op, opnd) 1341 register struct parse *p; 1342 sop op; 1343 size_t opnd; 1344 { 1345 /* avoid making error situations worse */ 1346 if (p->error != 0) 1347 return; 1348 1349 /* deal with oversize operands ("can't happen", more or less) */ 1350 assert(opnd < 1<<OPSHIFT); 1351 1352 /* deal with undersized strip */ 1353 if (p->slen >= p->ssize) 1354 enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ 1355 assert(p->slen < p->ssize); 1356 1357 /* finally, it's all reduced to the easy case */ 1358 p->strip[p->slen++] = SOP(op, opnd); 1359 } 1360 1361 /* 1362 - doinsert - insert a sop into the strip 1363 == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos); 1364 */ 1365 static void 1366 doinsert(p, op, opnd, pos) 1367 register struct parse *p; 1368 sop op; 1369 size_t opnd; 1370 sopno pos; 1371 { 1372 register sopno sn; 1373 register sop s; 1374 register int i; 1375 1376 /* avoid making error situations worse */ 1377 if (p->error != 0) 1378 return; 1379 1380 sn = HERE(); 1381 EMIT(op, opnd); /* do checks, ensure space */ 1382 assert(HERE() == sn+1); 1383 s = p->strip[sn]; 1384 1385 /* adjust paren pointers */ 1386 assert(pos > 0); 1387 for (i = 1; i < NPAREN; i++) { 1388 if (p->pbegin[i] >= pos) { 1389 p->pbegin[i]++; 1390 } 1391 if (p->pend[i] >= pos) { 1392 p->pend[i]++; 1393 } 1394 } 1395 1396 memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], 1397 (HERE()-pos-1)*sizeof(sop)); 1398 p->strip[pos] = s; 1399 } 1400 1401 /* 1402 - dofwd - complete a forward reference 1403 == static void dofwd(register struct parse *p, sopno pos, sop value); 1404 */ 1405 static void 1406 dofwd(p, pos, value) 1407 register struct parse *p; 1408 register sopno pos; 1409 sop value; 1410 { 1411 /* avoid making error situations worse */ 1412 if (p->error != 0) 1413 return; 1414 1415 assert(value < 1<<OPSHIFT); 1416 p->strip[pos] = OP(p->strip[pos]) | value; 1417 } 1418 1419 /* 1420 - enlarge - enlarge the strip 1421 == static void enlarge(register struct parse *p, sopno size); 1422 */ 1423 static void 1424 enlarge(p, size) 1425 register struct parse *p; 1426 register sopno size; 1427 { 1428 register sop *sp; 1429 1430 if (p->ssize >= size) 1431 return; 1432 1433 sp = (sop *)realloc(p->strip, size*sizeof(sop)); 1434 if (sp == NULL) { 1435 SETERROR(REG_ESPACE); 1436 return; 1437 } 1438 p->strip = sp; 1439 p->ssize = size; 1440 } 1441 1442 /* 1443 - stripsnug - compact the strip 1444 == static void stripsnug(register struct parse *p, register struct re_guts *g); 1445 */ 1446 static void 1447 stripsnug(p, g) 1448 register struct parse *p; 1449 register struct re_guts *g; 1450 { 1451 g->nstates = p->slen; 1452 g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop)); 1453 if (g->strip == NULL) { 1454 SETERROR(REG_ESPACE); 1455 g->strip = p->strip; 1456 } 1457 } 1458 1459 /* 1460 - findmust - fill in must and mlen with longest mandatory literal string 1461 == static void findmust(register struct parse *p, register struct re_guts *g); 1462 * 1463 * This algorithm could do fancy things like analyzing the operands of | 1464 * for common subsequences. Someday. This code is simple and finds most 1465 * of the interesting cases. 1466 * 1467 * Note that must and mlen got initialized during setup. 1468 */ 1469 static void 1470 findmust(p, g) 1471 struct parse *p; 1472 register struct re_guts *g; 1473 { 1474 register sop *scan; 1475 sop *start; 1476 register sop *newstart; 1477 register sopno newlen; 1478 register sop s; 1479 register char *cp; 1480 register sopno i; 1481 1482 /* avoid making error situations worse */ 1483 if (p->error != 0) 1484 return; 1485 1486 /* find the longest OCHAR sequence in strip */ 1487 newlen = 0; 1488 scan = g->strip + 1; 1489 do { 1490 s = *scan++; 1491 switch (OP(s)) { 1492 case OCHAR: /* sequence member */ 1493 if (newlen == 0) /* new sequence */ 1494 newstart = scan - 1; 1495 newlen++; 1496 break; 1497 case OPLUS_: /* things that don't break one */ 1498 case OLPAREN: 1499 case ORPAREN: 1500 break; 1501 case OQUEST_: /* things that must be skipped */ 1502 case OCH_: 1503 scan--; 1504 do { 1505 scan += OPND(s); 1506 s = *scan; 1507 /* assert() interferes w debug printouts */ 1508 if (OP(s) != O_QUEST && OP(s) != O_CH && 1509 OP(s) != OOR2) { 1510 g->iflags |= BAD; 1511 return; 1512 } 1513 } while (OP(s) != O_QUEST && OP(s) != O_CH); 1514 /* fallthrough */ 1515 default: /* things that break a sequence */ 1516 if (newlen > g->mlen) { /* ends one */ 1517 start = newstart; 1518 g->mlen = newlen; 1519 } 1520 newlen = 0; 1521 break; 1522 } 1523 } while (OP(s) != OEND); 1524 1525 if (g->mlen == 0) /* there isn't one */ 1526 return; 1527 1528 /* turn it into a character string */ 1529 g->must = malloc((size_t)g->mlen + 1); 1530 if (g->must == NULL) { /* argh; just forget it */ 1531 g->mlen = 0; 1532 return; 1533 } 1534 cp = g->must; 1535 scan = start; 1536 for (i = g->mlen; i > 0; i--) { 1537 while (OP(s = *scan++) != OCHAR) 1538 continue; 1539 assert(cp < g->must + g->mlen); 1540 *cp++ = (char)OPND(s); 1541 } 1542 assert(cp == g->must + g->mlen); 1543 *cp++ = '\0'; /* just on general principles */ 1544 } 1545 1546 /* 1547 - pluscount - count + nesting 1548 == static sopno pluscount(register struct parse *p, register struct re_guts *g); 1549 */ 1550 static sopno /* nesting depth */ 1551 pluscount(p, g) 1552 struct parse *p; 1553 register struct re_guts *g; 1554 { 1555 register sop *scan; 1556 register sop s; 1557 register sopno plusnest = 0; 1558 register sopno maxnest = 0; 1559 1560 if (p->error != 0) 1561 return(0); /* there may not be an OEND */ 1562 1563 scan = g->strip + 1; 1564 do { 1565 s = *scan++; 1566 switch (OP(s)) { 1567 case OPLUS_: 1568 plusnest++; 1569 break; 1570 case O_PLUS: 1571 if (plusnest > maxnest) 1572 maxnest = plusnest; 1573 plusnest--; 1574 break; 1575 } 1576 } while (OP(s) != OEND); 1577 if (plusnest != 0) 1578 g->iflags |= BAD; 1579 return(maxnest); 1580 } 1581