1 #include <sys/types.h> 2 #include <stdio.h> 3 #include <string.h> 4 #include <ctype.h> 5 #include <limits.h> 6 #include <stdlib.h> 7 #include <regex.h> 8 9 #include "utils.h" 10 #include "regex2.h" 11 12 #include "cclass.h" 13 #include "cname.h" 14 15 /* 16 * parse structure, passed up and down to avoid global variables and 17 * other clumsinesses 18 */ 19 struct parse { 20 char *next; /* next character in RE */ 21 char *end; /* end of string (-> NUL normally) */ 22 int error; /* has an error been seen? */ 23 sop *strip; /* malloced strip */ 24 sopno ssize; /* malloced strip size (allocated) */ 25 sopno slen; /* malloced strip length (used) */ 26 int ncsalloc; /* number of csets allocated */ 27 struct re_guts *g; 28 # define NPAREN 10 /* we need to remember () 1-9 for back refs */ 29 sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ 30 sopno pend[NPAREN]; /* -> ) ([0] unused) */ 31 }; 32 33 #include "regcomp.ih" 34 35 static char nuls[10]; /* place to point scanner in event of error */ 36 37 /* 38 * macros for use with parse structure 39 * BEWARE: these know that the parse structure is named `p' !!! 40 */ 41 #define PEEK() (*p->next) 42 #define PEEK2() (*(p->next+1)) 43 #define MORE() (p->next < p->end) 44 #define MORE2() (p->next+1 < p->end) 45 #define SEE(c) (MORE() && PEEK() == (c)) 46 #define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) 47 #define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) 48 #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) 49 #define NEXT() (p->next++) 50 #define NEXT2() (p->next += 2) 51 #define NEXTn(n) (p->next += (n)) 52 #define GETNEXT() (*p->next++) 53 #define SETERROR(e) seterr(p, (e)) 54 #define REQUIRE(co, e) ((co) || SETERROR(e)) 55 #define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) 56 #define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) 57 #define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) 58 #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) 59 #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) 60 #define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) 61 #define ASTERN(sop, pos) EMIT(sop, HERE()-pos) 62 #define HERE() (p->slen) 63 #define THERE() (p->slen - 1) 64 #define THERETHERE() (p->slen - 2) 65 #define DROP(n) (p->slen -= (n)) 66 67 #ifndef NDEBUG 68 static int never = 0; /* for use in asserts; shuts lint up */ 69 #else 70 #define never 0 /* some <assert.h>s have bugs too */ 71 #endif 72 73 /* 74 - regcomp - interface for parser and compilation 75 = extern int regcomp(regex_t *, const char *, int); 76 = #define REG_BASIC 0000 77 = #define REG_EXTENDED 0001 78 = #define REG_ICASE 0002 79 = #define REG_NOSUB 0004 80 = #define REG_NEWLINE 0010 81 = #define REG_NOSPEC 0020 82 = #define REG_PEND 0040 83 = #define REG_DUMP 0200 84 */ 85 int /* 0 success, otherwise REG_something */ 86 regcomp(preg, pattern, cflags) 87 regex_t *preg; 88 const char *pattern; 89 int cflags; 90 { 91 struct parse pa; 92 register struct re_guts *g; 93 register struct parse *p = &pa; 94 register int i; 95 register size_t len; 96 #ifdef REDEBUG 97 # define GOODFLAGS(f) (f) 98 #else 99 # define GOODFLAGS(f) ((f)&~REG_DUMP) 100 #endif 101 102 cflags = GOODFLAGS(cflags); 103 if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) 104 return(REG_INVARG); 105 106 if (cflags®_PEND) { 107 if (preg->re_endp < pattern) 108 return(REG_INVARG); 109 len = preg->re_endp - pattern; 110 } else 111 len = strlen((char *)pattern); 112 113 /* do the mallocs early so failure handling is easy */ 114 g = (struct re_guts *)malloc(sizeof(struct re_guts) + 115 (NC-1)*sizeof(cat_t)); 116 if (g == NULL) 117 return(REG_ESPACE); 118 p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ 119 p->strip = (sop *)malloc(p->ssize * sizeof(sop)); 120 p->slen = 0; 121 if (p->strip == NULL) { 122 free((char *)g); 123 return(REG_ESPACE); 124 } 125 126 /* set things up */ 127 p->g = g; 128 p->next = (char *)pattern; /* convenience; we do not modify it */ 129 p->end = p->next + len; 130 p->error = 0; 131 p->ncsalloc = 0; 132 for (i = 0; i < NPAREN; i++) { 133 p->pbegin[i] = 0; 134 p->pend[i] = 0; 135 } 136 g->csetsize = NC; 137 g->sets = NULL; 138 g->setbits = NULL; 139 g->ncsets = 0; 140 g->cflags = cflags; 141 g->iflags = 0; 142 g->nbol = 0; 143 g->neol = 0; 144 g->must = NULL; 145 g->mlen = 0; 146 g->nsub = 0; 147 g->ncategories = 1; /* category 0 is "everything else" */ 148 g->categories = &g->catspace[-(CHAR_MIN)]; 149 (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t)); 150 g->backrefs = 0; 151 152 /* do it */ 153 EMIT(OEND, 0); 154 g->firststate = THERE(); 155 if (cflags®_EXTENDED) 156 p_ere(p, OUT); 157 else if (cflags®_NOSPEC) 158 p_str(p); 159 else 160 p_bre(p, OUT, OUT); 161 EMIT(OEND, 0); 162 g->laststate = THERE(); 163 164 /* tidy up loose ends and fill things in */ 165 categorize(p, g); 166 stripsnug(p, g); 167 findmust(p, g); 168 g->nplus = pluscount(p, g); 169 g->magic = MAGIC2; 170 preg->re_nsub = g->nsub; 171 preg->re_g = g; 172 preg->re_magic = MAGIC1; 173 #ifndef REDEBUG 174 /* not debugging, so can't rely on the assert() in regexec() */ 175 if (g->iflags&BAD) 176 SETERROR(REG_ASSERT); 177 #endif 178 179 /* win or lose, we're done */ 180 if (p->error != 0) /* lose */ 181 regfree(preg); 182 return(p->error); 183 } 184 185 /* 186 - p_ere - ERE parser top level, concatenation and alternation 187 == static void p_ere(register struct parse *p, int stop); 188 */ 189 static void 190 p_ere(p, stop) 191 register struct parse *p; 192 int stop; /* character this ERE should end at */ 193 { 194 register char c; 195 register sopno prevback; 196 register sopno prevfwd; 197 register sopno conc; 198 register int first = 1; /* is this the first alternative? */ 199 200 for (;;) { 201 /* do a bunch of concatenated expressions */ 202 conc = HERE(); 203 while (MORE() && (c = PEEK()) != '|' && c != stop) 204 p_ere_exp(p); 205 REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ 206 207 if (!EAT('|')) 208 break; /* NOTE BREAK OUT */ 209 210 if (first) { 211 INSERT(OCH_, conc); /* offset is wrong */ 212 prevfwd = conc; 213 prevback = conc; 214 first = 0; 215 } 216 ASTERN(OOR1, prevback); 217 prevback = THERE(); 218 AHEAD(prevfwd); /* fix previous offset */ 219 prevfwd = HERE(); 220 EMIT(OOR2, 0); /* offset is very wrong */ 221 } 222 223 if (!first) { /* tail-end fixups */ 224 AHEAD(prevfwd); 225 ASTERN(O_CH, prevback); 226 } 227 228 assert(!MORE() || SEE(stop)); 229 } 230 231 /* 232 - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op 233 == static void p_ere_exp(register struct parse *p); 234 */ 235 static void 236 p_ere_exp(p) 237 register struct parse *p; 238 { 239 register char c; 240 register sopno pos; 241 register int count; 242 register int count2; 243 register sopno subno; 244 int wascaret = 0; 245 246 assert(MORE()); /* caller should have ensured this */ 247 c = GETNEXT(); 248 249 pos = HERE(); 250 switch (c) { 251 case '(': 252 REQUIRE(MORE(), REG_EPAREN); 253 p->g->nsub++; 254 subno = p->g->nsub; 255 if (subno < NPAREN) 256 p->pbegin[subno] = HERE(); 257 EMIT(OLPAREN, subno); 258 if (!SEE(')')) 259 p_ere(p, ')'); 260 if (subno < NPAREN) { 261 p->pend[subno] = HERE(); 262 assert(p->pend[subno] != 0); 263 } 264 EMIT(ORPAREN, subno); 265 MUSTEAT(')', REG_EPAREN); 266 break; 267 #ifndef POSIX_MISTAKE 268 case ')': /* happens only if no current unmatched ( */ 269 /* 270 * You may ask, why the ifndef? Because I didn't notice 271 * this until slightly too late for 1003.2, and none of the 272 * other 1003.2 regular-expression reviewers noticed it at 273 * all. So an unmatched ) is legal POSIX, at least until 274 * we can get it fixed. 275 */ 276 SETERROR(REG_EPAREN); 277 break; 278 #endif 279 case '^': 280 EMIT(OBOL, 0); 281 p->g->iflags |= USEBOL; 282 p->g->nbol++; 283 wascaret = 1; 284 break; 285 case '$': 286 EMIT(OEOL, 0); 287 p->g->iflags |= USEEOL; 288 p->g->neol++; 289 break; 290 case '|': 291 SETERROR(REG_EMPTY); 292 break; 293 case '*': 294 case '+': 295 case '?': 296 SETERROR(REG_BADRPT); 297 break; 298 case '.': 299 if (p->g->cflags®_NEWLINE) 300 nonnewline(p); 301 else 302 EMIT(OANY, 0); 303 break; 304 case '[': 305 p_bracket(p); 306 break; 307 case '\\': 308 REQUIRE(MORE(), REG_EESCAPE); 309 c = GETNEXT(); 310 ordinary(p, c); 311 break; 312 case '{': /* okay as ordinary except if digit follows */ 313 REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT); 314 /* FALLTHROUGH */ 315 default: 316 ordinary(p, c); 317 break; 318 } 319 320 if (!MORE()) 321 return; 322 c = PEEK(); 323 /* we call { a repetition if followed by a digit */ 324 if (!( c == '*' || c == '+' || c == '?' || 325 (c == '{' && MORE2() && isdigit(PEEK2())) )) 326 return; /* no repetition, we're done */ 327 NEXT(); 328 329 REQUIRE(!wascaret, REG_BADRPT); 330 switch (c) { 331 case '*': /* implemented as +? */ 332 /* this case does not require the (y|) trick, noKLUDGE */ 333 INSERT(OPLUS_, pos); 334 ASTERN(O_PLUS, pos); 335 INSERT(OQUEST_, pos); 336 ASTERN(O_QUEST, pos); 337 break; 338 case '+': 339 INSERT(OPLUS_, pos); 340 ASTERN(O_PLUS, pos); 341 break; 342 case '?': 343 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 344 INSERT(OCH_, pos); /* offset slightly wrong */ 345 ASTERN(OOR1, pos); /* this one's right */ 346 AHEAD(pos); /* fix the OCH_ */ 347 EMIT(OOR2, 0); /* offset very wrong... */ 348 AHEAD(THERE()); /* ...so fix it */ 349 ASTERN(O_CH, THERETHERE()); 350 break; 351 case '{': 352 count = p_count(p); 353 if (EAT(',')) { 354 if (isdigit(PEEK())) { 355 count2 = p_count(p); 356 REQUIRE(count <= count2, REG_BADBR); 357 } else /* single number with comma */ 358 count2 = INFINITY; 359 } else /* just a single number */ 360 count2 = count; 361 repeat(p, pos, count, count2); 362 if (!EAT('}')) { /* error heuristics */ 363 while (MORE() && PEEK() != '}') 364 NEXT(); 365 REQUIRE(MORE(), REG_EBRACE); 366 SETERROR(REG_BADBR); 367 } 368 break; 369 } 370 371 if (!MORE()) 372 return; 373 c = PEEK(); 374 if (!( c == '*' || c == '+' || c == '?' || 375 (c == '{' && MORE2() && isdigit(PEEK2())) ) ) 376 return; 377 SETERROR(REG_BADRPT); 378 } 379 380 /* 381 - p_str - string (no metacharacters) "parser" 382 == static void p_str(register struct parse *p); 383 */ 384 static void 385 p_str(p) 386 register struct parse *p; 387 { 388 REQUIRE(MORE(), REG_EMPTY); 389 while (MORE()) 390 ordinary(p, GETNEXT()); 391 } 392 393 /* 394 - p_bre - BRE parser top level, anchoring and concatenation 395 == static void p_bre(register struct parse *p, register int end1, \ 396 == register int end2); 397 * Giving end1 as OUT essentially eliminates the end1/end2 check. 398 * 399 * This implementation is a bit of a kludge, in that a trailing $ is first 400 * taken as an ordinary character and then revised to be an anchor. The 401 * only undesirable side effect is that '$' gets included as a character 402 * category in such cases. This is fairly harmless; not worth fixing. 403 * The amount of lookahead needed to avoid this kludge is excessive. 404 */ 405 static void 406 p_bre(p, end1, end2) 407 register struct parse *p; 408 register int end1; /* first terminating character */ 409 register int end2; /* second terminating character */ 410 { 411 register sopno start = HERE(); 412 register int first = 1; /* first subexpression? */ 413 register int wasdollar = 0; 414 415 if (EAT('^')) { 416 EMIT(OBOL, 0); 417 p->g->iflags |= USEBOL; 418 p->g->nbol++; 419 } 420 while (MORE() && !SEETWO(end1, end2)) { 421 wasdollar = p_simp_re(p, first); 422 first = 0; 423 } 424 if (wasdollar) { /* oops, that was a trailing anchor */ 425 DROP(1); 426 EMIT(OEOL, 0); 427 p->g->iflags |= USEEOL; 428 p->g->neol++; 429 } 430 431 REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ 432 } 433 434 /* 435 - p_simp_re - parse a simple RE, an atom possibly followed by a repetition 436 == static int p_simp_re(register struct parse *p, int starordinary); 437 */ 438 static int /* was the simple RE an unbackslashed $? */ 439 p_simp_re(p, starordinary) 440 register struct parse *p; 441 int starordinary; /* is a leading * an ordinary character? */ 442 { 443 register int c; 444 register int count; 445 register int count2; 446 register sopno pos; 447 register int i; 448 register sopno subno; 449 # define BACKSL (1<<CHAR_BIT) 450 451 pos = HERE(); /* repetion op, if any, covers from here */ 452 453 assert(MORE()); /* caller should have ensured this */ 454 c = GETNEXT(); 455 if (c == '\\') { 456 REQUIRE(MORE(), REG_EESCAPE); 457 c = BACKSL | (unsigned char)GETNEXT(); 458 } 459 switch (c) { 460 case '.': 461 if (p->g->cflags®_NEWLINE) 462 nonnewline(p); 463 else 464 EMIT(OANY, 0); 465 break; 466 case '[': 467 p_bracket(p); 468 break; 469 case BACKSL|'{': 470 SETERROR(REG_BADRPT); 471 break; 472 case BACKSL|'(': 473 p->g->nsub++; 474 subno = p->g->nsub; 475 if (subno < NPAREN) 476 p->pbegin[subno] = HERE(); 477 EMIT(OLPAREN, subno); 478 /* the MORE here is an error heuristic */ 479 if (MORE() && !SEETWO('\\', ')')) 480 p_bre(p, '\\', ')'); 481 if (subno < NPAREN) { 482 p->pend[subno] = HERE(); 483 assert(p->pend[subno] != 0); 484 } 485 EMIT(ORPAREN, subno); 486 REQUIRE(EATTWO('\\', ')'), REG_EPAREN); 487 break; 488 case BACKSL|')': /* should not get here -- must be user */ 489 case BACKSL|'}': 490 SETERROR(REG_EPAREN); 491 break; 492 case BACKSL|'1': 493 case BACKSL|'2': 494 case BACKSL|'3': 495 case BACKSL|'4': 496 case BACKSL|'5': 497 case BACKSL|'6': 498 case BACKSL|'7': 499 case BACKSL|'8': 500 case BACKSL|'9': 501 i = (c&~BACKSL) - '0'; 502 assert(i < NPAREN); 503 if (p->pend[i] != 0) { 504 assert(i <= p->g->nsub); 505 EMIT(OBACK_, i); 506 assert(p->pbegin[i] != 0); 507 assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); 508 assert(OP(p->strip[p->pend[i]]) == ORPAREN); 509 (void) dupl(p, p->pbegin[i]+1, p->pend[i]); 510 EMIT(O_BACK, i); 511 } else 512 SETERROR(REG_ESUBREG); 513 p->g->backrefs = 1; 514 break; 515 case '*': 516 REQUIRE(starordinary, REG_BADRPT); 517 /* FALLTHROUGH */ 518 default: 519 ordinary(p, c &~ BACKSL); 520 break; 521 } 522 523 if (EAT('*')) { /* implemented as +? */ 524 /* this case does not require the (y|) trick, noKLUDGE */ 525 INSERT(OPLUS_, pos); 526 ASTERN(O_PLUS, pos); 527 INSERT(OQUEST_, pos); 528 ASTERN(O_QUEST, pos); 529 } else if (EATTWO('\\', '{')) { 530 count = p_count(p); 531 if (EAT(',')) { 532 if (MORE() && isdigit(PEEK())) { 533 count2 = p_count(p); 534 REQUIRE(count <= count2, REG_BADBR); 535 } else /* single number with comma */ 536 count2 = INFINITY; 537 } else /* just a single number */ 538 count2 = count; 539 repeat(p, pos, count, count2); 540 if (!EATTWO('\\', '}')) { /* error heuristics */ 541 while (MORE() && !SEETWO('\\', '}')) 542 NEXT(); 543 REQUIRE(MORE(), REG_EBRACE); 544 SETERROR(REG_BADBR); 545 } 546 } else if (c == (unsigned char)'$') /* $ (but not \$) ends it */ 547 return(1); 548 549 return(0); 550 } 551 552 /* 553 - p_count - parse a repetition count 554 == static int p_count(register struct parse *p); 555 */ 556 static int /* the value */ 557 p_count(p) 558 register struct parse *p; 559 { 560 register int count = 0; 561 register int ndigits = 0; 562 563 while (MORE() && isdigit(PEEK()) && count <= DUPMAX) { 564 count = count*10 + (GETNEXT() - '0'); 565 ndigits++; 566 } 567 568 REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); 569 return(count); 570 } 571 572 /* 573 - p_bracket - parse a bracketed character list 574 == static void p_bracket(register struct parse *p); 575 * 576 * Note a significant property of this code: if the allocset() did SETERROR, 577 * no set operations are done. 578 */ 579 static void 580 p_bracket(p) 581 register struct parse *p; 582 { 583 register char c; 584 register cset *cs = allocset(p); 585 register int invert = 0; 586 587 /* Dept of Truly Sickening Special-Case Kludges */ 588 if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { 589 EMIT(OBOW, 0); 590 NEXTn(6); 591 return; 592 } 593 if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { 594 EMIT(OEOW, 0); 595 NEXTn(6); 596 return; 597 } 598 599 if (EAT('^')) 600 invert++; /* make note to invert set at end */ 601 if (EAT(']')) 602 CHadd(cs, ']'); 603 else if (EAT('-')) 604 CHadd(cs, '-'); 605 while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) 606 p_b_term(p, cs); 607 if (EAT('-')) 608 CHadd(cs, '-'); 609 MUSTEAT(']', REG_EBRACK); 610 611 if (p->error != 0) /* don't mess things up further */ 612 return; 613 614 if (p->g->cflags®_ICASE) { 615 register int i; 616 register int ci; 617 618 for (i = p->g->csetsize - 1; i >= 0; i--) 619 if (CHIN(cs, i) && isalpha(i)) { 620 ci = othercase(i); 621 if (ci != i) 622 CHadd(cs, ci); 623 } 624 if (cs->multis != NULL) 625 mccase(p, cs); 626 } 627 if (invert) { 628 register int i; 629 630 for (i = p->g->csetsize - 1; i >= 0; i--) 631 if (CHIN(cs, i)) 632 CHsub(cs, i); 633 else 634 CHadd(cs, i); 635 if (p->g->cflags®_NEWLINE) 636 CHsub(cs, '\n'); 637 if (cs->multis != NULL) 638 mcinvert(p, cs); 639 } 640 641 assert(cs->multis == NULL); /* xxx */ 642 643 if (nch(p, cs) == 1) { /* optimize singleton sets */ 644 ordinary(p, firstch(p, cs)); 645 freeset(p, cs); 646 } else 647 EMIT(OANYOF, freezeset(p, cs)); 648 } 649 650 /* 651 - p_b_term - parse one term of a bracketed character list 652 == static void p_b_term(register struct parse *p, register cset *cs); 653 */ 654 static void 655 p_b_term(p, cs) 656 register struct parse *p; 657 register cset *cs; 658 { 659 register char c; 660 register char start, finish; 661 register int i; 662 663 /* classify what we've got */ 664 switch ((MORE()) ? PEEK() : '\0') { 665 case '[': 666 c = (MORE2()) ? PEEK2() : '\0'; 667 break; 668 case '-': 669 SETERROR(REG_ERANGE); 670 return; /* NOTE RETURN */ 671 break; 672 default: 673 c = '\0'; 674 break; 675 } 676 677 switch (c) { 678 case ':': /* character class */ 679 NEXT2(); 680 REQUIRE(MORE(), REG_EBRACK); 681 c = PEEK(); 682 REQUIRE(c != '-' && c != ']', REG_ECTYPE); 683 p_b_cclass(p, cs); 684 REQUIRE(MORE(), REG_EBRACK); 685 REQUIRE(EATTWO(':', ']'), REG_ECTYPE); 686 break; 687 case '=': /* equivalence class */ 688 NEXT2(); 689 REQUIRE(MORE(), REG_EBRACK); 690 c = PEEK(); 691 REQUIRE(c != '-' && c != ']', REG_ECOLLATE); 692 p_b_eclass(p, cs); 693 REQUIRE(MORE(), REG_EBRACK); 694 REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); 695 break; 696 default: /* symbol, ordinary character, or range */ 697 /* xxx revision needed for multichar stuff */ 698 start = p_b_symbol(p); 699 if (SEE('-') && MORE2() && PEEK2() != ']') { 700 /* range */ 701 NEXT(); 702 if (EAT('-')) 703 finish = '-'; 704 else 705 finish = p_b_symbol(p); 706 } else 707 finish = start; 708 /* xxx what about signed chars here... */ 709 REQUIRE(start <= finish, REG_ERANGE); 710 for (i = start; i <= finish; i++) 711 CHadd(cs, i); 712 break; 713 } 714 } 715 716 /* 717 - p_b_cclass - parse a character-class name and deal with it 718 == static void p_b_cclass(register struct parse *p, register cset *cs); 719 */ 720 static void 721 p_b_cclass(p, cs) 722 register struct parse *p; 723 register cset *cs; 724 { 725 register char *sp = p->next; 726 register struct cclass *cp; 727 register size_t len; 728 register char *u; 729 register char c; 730 731 while (MORE() && isalpha(PEEK())) 732 NEXT(); 733 len = p->next - sp; 734 for (cp = cclasses; cp->name != NULL; cp++) 735 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') 736 break; 737 if (cp->name == NULL) { 738 /* oops, didn't find it */ 739 SETERROR(REG_ECTYPE); 740 return; 741 } 742 743 u = cp->chars; 744 while ((c = *u++) != '\0') 745 CHadd(cs, c); 746 for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) 747 MCadd(p, cs, u); 748 } 749 750 /* 751 - p_b_eclass - parse an equivalence-class name and deal with it 752 == static void p_b_eclass(register struct parse *p, register cset *cs); 753 * 754 * This implementation is incomplete. xxx 755 */ 756 static void 757 p_b_eclass(p, cs) 758 register struct parse *p; 759 register cset *cs; 760 { 761 register char c; 762 763 c = p_b_coll_elem(p, '='); 764 CHadd(cs, c); 765 } 766 767 /* 768 - p_b_symbol - parse a character or [..]ed multicharacter collating symbol 769 == static char p_b_symbol(register struct parse *p); 770 */ 771 static char /* value of symbol */ 772 p_b_symbol(p) 773 register struct parse *p; 774 { 775 register char value; 776 777 REQUIRE(MORE(), REG_EBRACK); 778 if (!EATTWO('[', '.')) 779 return(GETNEXT()); 780 781 /* collating symbol */ 782 value = p_b_coll_elem(p, '.'); 783 REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); 784 return(value); 785 } 786 787 /* 788 - p_b_coll_elem - parse a collating-element name and look it up 789 == static char p_b_coll_elem(register struct parse *p, int endc); 790 */ 791 static char /* value of collating element */ 792 p_b_coll_elem(p, endc) 793 register struct parse *p; 794 int endc; /* name ended by endc,']' */ 795 { 796 register char *sp = p->next; 797 register struct cname *cp; 798 register int len; 799 register char c; 800 801 while (MORE() && !SEETWO(endc, ']')) 802 NEXT(); 803 if (!MORE()) { 804 SETERROR(REG_EBRACK); 805 return(0); 806 } 807 len = p->next - sp; 808 for (cp = cnames; cp->name != NULL; cp++) 809 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') 810 return(cp->code); /* known name */ 811 if (len == 1) 812 return(*sp); /* single character */ 813 SETERROR(REG_ECOLLATE); /* neither */ 814 return(0); 815 } 816 817 /* 818 - othercase - return the case counterpart of an alphabetic 819 == static char othercase(int ch); 820 */ 821 static char /* if no counterpart, return ch */ 822 othercase(ch) 823 int ch; 824 { 825 assert(isalpha(ch)); 826 if (isupper(ch)) 827 return(tolower(ch)); 828 else if (islower(ch)) 829 return(toupper(ch)); 830 else /* peculiar, but could happen */ 831 return(ch); 832 } 833 834 /* 835 - bothcases - emit a dualcase version of a two-case character 836 == static void bothcases(register struct parse *p, int ch); 837 * 838 * Boy, is this implementation ever a kludge... 839 */ 840 static void 841 bothcases(p, ch) 842 register struct parse *p; 843 int ch; 844 { 845 register char *oldnext = p->next; 846 register char *oldend = p->end; 847 char bracket[3]; 848 849 assert(othercase(ch) != ch); /* p_bracket() would recurse */ 850 p->next = bracket; 851 p->end = bracket+2; 852 bracket[0] = ch; 853 bracket[1] = ']'; 854 bracket[2] = '\0'; 855 p_bracket(p); 856 assert(p->next == bracket+2); 857 p->next = oldnext; 858 p->end = oldend; 859 } 860 861 /* 862 - ordinary - emit an ordinary character 863 == static void ordinary(register struct parse *p, register int ch); 864 */ 865 static void 866 ordinary(p, ch) 867 register struct parse *p; 868 register int ch; 869 { 870 register cat_t *cap = p->g->categories; 871 872 if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch) 873 bothcases(p, ch); 874 else { 875 EMIT(OCHAR, (unsigned char)ch); 876 if (cap[ch] == 0) 877 cap[ch] = p->g->ncategories++; 878 } 879 } 880 881 /* 882 - nonnewline - emit REG_NEWLINE version of OANY 883 == static void nonnewline(register struct parse *p); 884 * 885 * Boy, is this implementation ever a kludge... 886 */ 887 static void 888 nonnewline(p) 889 register struct parse *p; 890 { 891 register char *oldnext = p->next; 892 register char *oldend = p->end; 893 char bracket[4]; 894 895 p->next = bracket; 896 p->end = bracket+3; 897 bracket[0] = '^'; 898 bracket[1] = '\n'; 899 bracket[2] = ']'; 900 bracket[3] = '\0'; 901 p_bracket(p); 902 assert(p->next == bracket+3); 903 p->next = oldnext; 904 p->end = oldend; 905 } 906 907 /* 908 - repeat - generate code for a bounded repetition, recursively if needed 909 == static void repeat(register struct parse *p, sopno start, int from, int to); 910 */ 911 static void 912 repeat(p, start, from, to) 913 register struct parse *p; 914 sopno start; /* operand from here to end of strip */ 915 int from; /* repeated from this number */ 916 int to; /* to this number of times (maybe INFINITY) */ 917 { 918 register sopno finish = HERE(); 919 # define N 2 920 # define INF 3 921 # define REP(f, t) ((f)*8 + (t)) 922 # define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) 923 register sopno copy; 924 925 if (p->error != 0) /* head off possible runaway recursion */ 926 return; 927 928 assert(from <= to); 929 930 switch (REP(MAP(from), MAP(to))) { 931 case REP(0, 0): /* must be user doing this */ 932 DROP(finish-start); /* drop the operand */ 933 break; 934 case REP(0, 1): /* as x{1,1}? */ 935 case REP(0, N): /* as x{1,n}? */ 936 case REP(0, INF): /* as x{1,}? */ 937 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 938 INSERT(OCH_, start); /* offset is wrong... */ 939 repeat(p, start+1, 1, to); 940 ASTERN(OOR1, start); 941 AHEAD(start); /* ... fix it */ 942 EMIT(OOR2, 0); 943 AHEAD(THERE()); 944 ASTERN(O_CH, THERETHERE()); 945 break; 946 case REP(1, 1): /* trivial case */ 947 /* done */ 948 break; 949 case REP(1, N): /* as x?x{1,n-1} */ 950 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 951 INSERT(OCH_, start); 952 ASTERN(OOR1, start); 953 AHEAD(start); 954 EMIT(OOR2, 0); /* offset very wrong... */ 955 AHEAD(THERE()); /* ...so fix it */ 956 ASTERN(O_CH, THERETHERE()); 957 copy = dupl(p, start+1, finish+1); 958 assert(copy == finish+4); 959 repeat(p, copy, 1, to-1); 960 break; 961 case REP(1, INF): /* as x+ */ 962 INSERT(OPLUS_, start); 963 ASTERN(O_PLUS, start); 964 break; 965 case REP(N, N): /* as xx{m-1,n-1} */ 966 copy = dupl(p, start, finish); 967 repeat(p, copy, from-1, to-1); 968 break; 969 case REP(N, INF): /* as xx{n-1,INF} */ 970 copy = dupl(p, start, finish); 971 repeat(p, copy, from-1, to); 972 break; 973 default: /* "can't happen" */ 974 SETERROR(REG_ASSERT); /* just in case */ 975 break; 976 } 977 } 978 979 /* 980 - seterr - set an error condition 981 == static int seterr(register struct parse *p, int e); 982 */ 983 static int /* useless but makes type checking happy */ 984 seterr(p, e) 985 register struct parse *p; 986 int e; 987 { 988 if (p->error == 0) /* keep earliest error condition */ 989 p->error = e; 990 p->next = nuls; /* try to bring things to a halt */ 991 p->end = nuls; 992 return(0); /* make the return value well-defined */ 993 } 994 995 /* 996 - allocset - allocate a set of characters for [] 997 == static cset *allocset(register struct parse *p); 998 */ 999 static cset * 1000 allocset(p) 1001 register struct parse *p; 1002 { 1003 register int no = p->g->ncsets++; 1004 register size_t nc; 1005 register size_t nbytes; 1006 register cset *cs; 1007 register size_t css = (size_t)p->g->csetsize; 1008 register int i; 1009 1010 if (no >= p->ncsalloc) { /* need another column of space */ 1011 p->ncsalloc += CHAR_BIT; 1012 nc = p->ncsalloc; 1013 assert(nc % CHAR_BIT == 0); 1014 nbytes = nc / CHAR_BIT * css; 1015 if (p->g->sets == NULL) 1016 p->g->sets = (cset *)malloc(nc * sizeof(cset)); 1017 else 1018 p->g->sets = (cset *)realloc((char *)p->g->sets, 1019 nc * sizeof(cset)); 1020 if (p->g->setbits == NULL) 1021 p->g->setbits = (uch *)malloc(nbytes); 1022 else { 1023 p->g->setbits = (uch *)realloc((char *)p->g->setbits, 1024 nbytes); 1025 /* xxx this isn't right if setbits is now NULL */ 1026 for (i = 0; i < no; i++) 1027 p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); 1028 } 1029 if (p->g->sets != NULL && p->g->setbits != NULL) 1030 (void) memset((char *)p->g->setbits + (nbytes - css), 1031 0, css); 1032 else { 1033 no = 0; 1034 SETERROR(REG_ESPACE); 1035 /* caller's responsibility not to do set ops */ 1036 } 1037 } 1038 1039 assert(p->g->sets != NULL); /* xxx */ 1040 cs = &p->g->sets[no]; 1041 cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); 1042 cs->mask = 1 << ((no) % CHAR_BIT); 1043 cs->hash = 0; 1044 cs->smultis = 0; 1045 cs->multis = NULL; 1046 1047 return(cs); 1048 } 1049 1050 /* 1051 - freeset - free a now-unused set 1052 == static void freeset(register struct parse *p, register cset *cs); 1053 */ 1054 static void 1055 freeset(p, cs) 1056 register struct parse *p; 1057 register cset *cs; 1058 { 1059 register int i; 1060 register cset *top = &p->g->sets[p->g->ncsets]; 1061 register size_t css = (size_t)p->g->csetsize; 1062 1063 for (i = 0; i < css; i++) 1064 CHsub(cs, i); 1065 if (cs == top-1) /* recover only the easy case */ 1066 p->g->ncsets--; 1067 } 1068 1069 /* 1070 - freezeset - final processing on a set of characters 1071 == static int freezeset(register struct parse *p, register cset *cs); 1072 * 1073 * The main task here is merging identical sets. This is usually a waste 1074 * of time (although the hash code minimizes the overhead), but can win 1075 * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash 1076 * is done using addition rather than xor -- all ASCII [aA] sets xor to 1077 * the same value! 1078 */ 1079 static int /* set number */ 1080 freezeset(p, cs) 1081 register struct parse *p; 1082 register cset *cs; 1083 { 1084 register uch h = cs->hash; 1085 register int i; 1086 register cset *top = &p->g->sets[p->g->ncsets]; 1087 register cset *cs2; 1088 register size_t css = (size_t)p->g->csetsize; 1089 1090 /* look for an earlier one which is the same */ 1091 for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) 1092 if (cs2->hash == h && cs2 != cs) { 1093 /* maybe */ 1094 for (i = 0; i < css; i++) 1095 if (!!CHIN(cs2, i) != !!CHIN(cs, i)) 1096 break; /* no */ 1097 if (i == css) 1098 break; /* yes */ 1099 } 1100 1101 if (cs2 < top) { /* found one */ 1102 freeset(p, cs); 1103 cs = cs2; 1104 } 1105 1106 return((int)(cs - p->g->sets)); 1107 } 1108 1109 /* 1110 - firstch - return first character in a set (which must have at least one) 1111 == static int firstch(register struct parse *p, register cset *cs); 1112 */ 1113 static int /* character; there is no "none" value */ 1114 firstch(p, cs) 1115 register struct parse *p; 1116 register cset *cs; 1117 { 1118 register int i; 1119 register size_t css = (size_t)p->g->csetsize; 1120 1121 for (i = 0; i < css; i++) 1122 if (CHIN(cs, i)) 1123 return((char)i); 1124 assert(never); 1125 return(0); /* arbitrary */ 1126 } 1127 1128 /* 1129 - nch - number of characters in a set 1130 == static int nch(register struct parse *p, register cset *cs); 1131 */ 1132 static int 1133 nch(p, cs) 1134 register struct parse *p; 1135 register cset *cs; 1136 { 1137 register int i; 1138 register size_t css = (size_t)p->g->csetsize; 1139 register int n = 0; 1140 1141 for (i = 0; i < css; i++) 1142 if (CHIN(cs, i)) 1143 n++; 1144 return(n); 1145 } 1146 1147 /* 1148 - mcadd - add a collating element to a cset 1149 == static void mcadd(register struct parse *p, register cset *cs, \ 1150 == register char *cp); 1151 */ 1152 static void 1153 mcadd(p, cs, cp) 1154 register struct parse *p; 1155 register cset *cs; 1156 register char *cp; 1157 { 1158 register size_t oldend = cs->smultis; 1159 1160 cs->smultis += strlen(cp) + 1; 1161 if (cs->multis == NULL) 1162 cs->multis = malloc(cs->smultis); 1163 else 1164 cs->multis = realloc(cs->multis, cs->smultis); 1165 if (cs->multis == NULL) { 1166 SETERROR(REG_ESPACE); 1167 return; 1168 } 1169 1170 (void) strcpy(cs->multis + oldend - 1, cp); 1171 cs->multis[cs->smultis - 1] = '\0'; 1172 } 1173 1174 /* 1175 - mcsub - subtract a collating element from a cset 1176 == static void mcsub(register cset *cs, register char *cp); 1177 */ 1178 static void 1179 mcsub(cs, cp) 1180 register cset *cs; 1181 register char *cp; 1182 { 1183 register char *fp = mcfind(cs, cp); 1184 register size_t len = strlen(fp); 1185 1186 assert(fp != NULL); 1187 (void) memmove(fp, fp + len + 1, 1188 cs->smultis - (fp + len + 1 - cs->multis)); 1189 cs->smultis -= len; 1190 1191 if (cs->smultis == 0) { 1192 free(cs->multis); 1193 cs->multis = NULL; 1194 return; 1195 } 1196 1197 cs->multis = realloc(cs->multis, cs->smultis); 1198 assert(cs->multis != NULL); 1199 } 1200 1201 /* 1202 - mcin - is a collating element in a cset? 1203 == static int mcin(register cset *cs, register char *cp); 1204 */ 1205 static int 1206 mcin(cs, cp) 1207 register cset *cs; 1208 register char *cp; 1209 { 1210 return(mcfind(cs, cp) != NULL); 1211 } 1212 1213 /* 1214 - mcfind - find a collating element in a cset 1215 == static char *mcfind(register cset *cs, register char *cp); 1216 */ 1217 static char * 1218 mcfind(cs, cp) 1219 register cset *cs; 1220 register char *cp; 1221 { 1222 register char *p; 1223 1224 if (cs->multis == NULL) 1225 return(NULL); 1226 for (p = cs->multis; *p != '\0'; p += strlen(p) + 1) 1227 if (strcmp(cp, p) == 0) 1228 return(p); 1229 return(NULL); 1230 } 1231 1232 /* 1233 - mcinvert - invert the list of collating elements in a cset 1234 == static void mcinvert(register struct parse *p, register cset *cs); 1235 * 1236 * This would have to know the set of possibilities. Implementation 1237 * is deferred. 1238 */ 1239 static void 1240 mcinvert(p, cs) 1241 register struct parse *p; 1242 register cset *cs; 1243 { 1244 assert(cs->multis == NULL); /* xxx */ 1245 } 1246 1247 /* 1248 - mccase - add case counterparts of the list of collating elements in a cset 1249 == static void mccase(register struct parse *p, register cset *cs); 1250 * 1251 * This would have to know the set of possibilities. Implementation 1252 * is deferred. 1253 */ 1254 static void 1255 mccase(p, cs) 1256 register struct parse *p; 1257 register cset *cs; 1258 { 1259 assert(cs->multis == NULL); /* xxx */ 1260 } 1261 1262 /* 1263 - isinsets - is this character in any sets? 1264 == static int isinsets(register struct re_guts *g, int c); 1265 */ 1266 static int /* predicate */ 1267 isinsets(g, c) 1268 register struct re_guts *g; 1269 int c; 1270 { 1271 register uch *col; 1272 register int i; 1273 register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 1274 register unsigned uc = (unsigned char)c; 1275 1276 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 1277 if (col[uc] != 0) 1278 return(1); 1279 return(0); 1280 } 1281 1282 /* 1283 - samesets - are these two characters in exactly the same sets? 1284 == static int samesets(register struct re_guts *g, int c1, int c2); 1285 */ 1286 static int /* predicate */ 1287 samesets(g, c1, c2) 1288 register struct re_guts *g; 1289 int c1; 1290 int c2; 1291 { 1292 register uch *col; 1293 register int i; 1294 register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 1295 register unsigned uc1 = (unsigned char)c1; 1296 register unsigned uc2 = (unsigned char)c2; 1297 1298 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 1299 if (col[uc1] != col[uc2]) 1300 return(0); 1301 return(1); 1302 } 1303 1304 /* 1305 - categorize - sort out character categories 1306 == static void categorize(struct parse *p, register struct re_guts *g); 1307 */ 1308 static void 1309 categorize(p, g) 1310 struct parse *p; 1311 register struct re_guts *g; 1312 { 1313 register cat_t *cats = g->categories; 1314 register int c; 1315 register int c2; 1316 register cat_t cat; 1317 1318 /* avoid making error situations worse */ 1319 if (p->error != 0) 1320 return; 1321 1322 for (c = CHAR_MIN; c <= CHAR_MAX; c++) 1323 if (cats[c] == 0 && isinsets(g, c)) { 1324 cat = g->ncategories++; 1325 cats[c] = cat; 1326 for (c2 = c+1; c2 <= CHAR_MAX; c2++) 1327 if (cats[c2] == 0 && samesets(g, c, c2)) 1328 cats[c2] = cat; 1329 } 1330 } 1331 1332 /* 1333 - dupl - emit a duplicate of a bunch of sops 1334 == static sopno dupl(register struct parse *p, sopno start, sopno finish); 1335 */ 1336 static sopno /* start of duplicate */ 1337 dupl(p, start, finish) 1338 register struct parse *p; 1339 sopno start; /* from here */ 1340 sopno finish; /* to this less one */ 1341 { 1342 register sopno ret = HERE(); 1343 register sopno len = finish - start; 1344 1345 assert(finish >= start); 1346 if (len == 0) 1347 return(ret); 1348 enlarge(p, p->ssize + len); /* this many unexpected additions */ 1349 assert(p->ssize >= p->slen + len); 1350 (void) memcpy((char *)(p->strip + p->slen), 1351 (char *)(p->strip + start), (size_t)len*sizeof(sop)); 1352 p->slen += len; 1353 return(ret); 1354 } 1355 1356 /* 1357 - doemit - emit a strip operator 1358 == static void doemit(register struct parse *p, sop op, size_t opnd); 1359 * 1360 * It might seem better to implement this as a macro with a function as 1361 * hard-case backup, but it's just too big and messy unless there are 1362 * some changes to the data structures. Maybe later. 1363 */ 1364 static void 1365 doemit(p, op, opnd) 1366 register struct parse *p; 1367 sop op; 1368 size_t opnd; 1369 { 1370 /* avoid making error situations worse */ 1371 if (p->error != 0) 1372 return; 1373 1374 /* deal with oversize operands ("can't happen", more or less) */ 1375 assert(opnd < 1<<OPSHIFT); 1376 1377 /* deal with undersized strip */ 1378 if (p->slen >= p->ssize) 1379 enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ 1380 assert(p->slen < p->ssize); 1381 1382 /* finally, it's all reduced to the easy case */ 1383 p->strip[p->slen++] = SOP(op, opnd); 1384 } 1385 1386 /* 1387 - doinsert - insert a sop into the strip 1388 == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos); 1389 */ 1390 static void 1391 doinsert(p, op, opnd, pos) 1392 register struct parse *p; 1393 sop op; 1394 size_t opnd; 1395 sopno pos; 1396 { 1397 register sopno sn; 1398 register sop s; 1399 register int i; 1400 1401 /* avoid making error situations worse */ 1402 if (p->error != 0) 1403 return; 1404 1405 sn = HERE(); 1406 EMIT(op, opnd); /* do checks, ensure space */ 1407 assert(HERE() == sn+1); 1408 s = p->strip[sn]; 1409 1410 /* adjust paren pointers */ 1411 assert(pos > 0); 1412 for (i = 1; i < NPAREN; i++) { 1413 if (p->pbegin[i] >= pos) { 1414 p->pbegin[i]++; 1415 } 1416 if (p->pend[i] >= pos) { 1417 p->pend[i]++; 1418 } 1419 } 1420 1421 memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], 1422 (HERE()-pos-1)*sizeof(sop)); 1423 p->strip[pos] = s; 1424 } 1425 1426 /* 1427 - dofwd - complete a forward reference 1428 == static void dofwd(register struct parse *p, sopno pos, sop value); 1429 */ 1430 static void 1431 dofwd(p, pos, value) 1432 register struct parse *p; 1433 register sopno pos; 1434 sop value; 1435 { 1436 /* avoid making error situations worse */ 1437 if (p->error != 0) 1438 return; 1439 1440 assert(value < 1<<OPSHIFT); 1441 p->strip[pos] = OP(p->strip[pos]) | value; 1442 } 1443 1444 /* 1445 - enlarge - enlarge the strip 1446 == static void enlarge(register struct parse *p, sopno size); 1447 */ 1448 static void 1449 enlarge(p, size) 1450 register struct parse *p; 1451 register sopno size; 1452 { 1453 register sop *sp; 1454 1455 if (p->ssize >= size) 1456 return; 1457 1458 sp = (sop *)realloc(p->strip, size*sizeof(sop)); 1459 if (sp == NULL) { 1460 SETERROR(REG_ESPACE); 1461 return; 1462 } 1463 p->strip = sp; 1464 p->ssize = size; 1465 } 1466 1467 /* 1468 - stripsnug - compact the strip 1469 == static void stripsnug(register struct parse *p, register struct re_guts *g); 1470 */ 1471 static void 1472 stripsnug(p, g) 1473 register struct parse *p; 1474 register struct re_guts *g; 1475 { 1476 g->nstates = p->slen; 1477 g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop)); 1478 if (g->strip == NULL) { 1479 SETERROR(REG_ESPACE); 1480 g->strip = p->strip; 1481 } 1482 } 1483 1484 /* 1485 - findmust - fill in must and mlen with longest mandatory literal string 1486 == static void findmust(register struct parse *p, register struct re_guts *g); 1487 * 1488 * This algorithm could do fancy things like analyzing the operands of | 1489 * for common subsequences. Someday. This code is simple and finds most 1490 * of the interesting cases. 1491 * 1492 * Note that must and mlen got initialized during setup. 1493 */ 1494 static void 1495 findmust(p, g) 1496 struct parse *p; 1497 register struct re_guts *g; 1498 { 1499 register sop *scan; 1500 sop *start; 1501 register sop *newstart; 1502 register sopno newlen; 1503 register sop s; 1504 register char *cp; 1505 register sopno i; 1506 1507 /* avoid making error situations worse */ 1508 if (p->error != 0) 1509 return; 1510 1511 /* find the longest OCHAR sequence in strip */ 1512 newlen = 0; 1513 scan = g->strip + 1; 1514 do { 1515 s = *scan++; 1516 switch (OP(s)) { 1517 case OCHAR: /* sequence member */ 1518 if (newlen == 0) /* new sequence */ 1519 newstart = scan - 1; 1520 newlen++; 1521 break; 1522 case OPLUS_: /* things that don't break one */ 1523 case OLPAREN: 1524 case ORPAREN: 1525 break; 1526 case OQUEST_: /* things that must be skipped */ 1527 case OCH_: 1528 scan--; 1529 do { 1530 scan += OPND(s); 1531 s = *scan; 1532 /* assert() interferes w debug printouts */ 1533 if (OP(s) != O_QUEST && OP(s) != O_CH && 1534 OP(s) != OOR2) { 1535 g->iflags |= BAD; 1536 return; 1537 } 1538 } while (OP(s) != O_QUEST && OP(s) != O_CH); 1539 /* fallthrough */ 1540 default: /* things that break a sequence */ 1541 if (newlen > g->mlen) { /* ends one */ 1542 start = newstart; 1543 g->mlen = newlen; 1544 } 1545 newlen = 0; 1546 break; 1547 } 1548 } while (OP(s) != OEND); 1549 1550 if (g->mlen == 0) /* there isn't one */ 1551 return; 1552 1553 /* turn it into a character string */ 1554 g->must = malloc((size_t)g->mlen + 1); 1555 if (g->must == NULL) { /* argh; just forget it */ 1556 g->mlen = 0; 1557 return; 1558 } 1559 cp = g->must; 1560 scan = start; 1561 for (i = g->mlen; i > 0; i--) { 1562 while (OP(s = *scan++) != OCHAR) 1563 continue; 1564 assert(cp < g->must + g->mlen); 1565 *cp++ = (char)OPND(s); 1566 } 1567 assert(cp == g->must + g->mlen); 1568 *cp++ = '\0'; /* just on general principles */ 1569 } 1570 1571 /* 1572 - pluscount - count + nesting 1573 == static sopno pluscount(register struct parse *p, register struct re_guts *g); 1574 */ 1575 static sopno /* nesting depth */ 1576 pluscount(p, g) 1577 struct parse *p; 1578 register struct re_guts *g; 1579 { 1580 register sop *scan; 1581 register sop s; 1582 register sopno plusnest = 0; 1583 register sopno maxnest = 0; 1584 1585 if (p->error != 0) 1586 return(0); /* there may not be an OEND */ 1587 1588 scan = g->strip + 1; 1589 do { 1590 s = *scan++; 1591 switch (OP(s)) { 1592 case OPLUS_: 1593 plusnest++; 1594 break; 1595 case O_PLUS: 1596 if (plusnest > maxnest) 1597 maxnest = plusnest; 1598 plusnest--; 1599 break; 1600 } 1601 } while (OP(s) != OEND); 1602 if (plusnest != 0) 1603 g->iflags |= BAD; 1604 return(maxnest); 1605 } 1606