1 /* $OpenBSD: regcomp.c,v 1.32 2017/10/30 06:48:20 otto Exp $ */ 2 /*- 3 * Copyright (c) 1992, 1993, 1994 Henry Spencer. 4 * Copyright (c) 1992, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Henry Spencer. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)regcomp.c 8.5 (Berkeley) 3/20/94 35 */ 36 37 #include <sys/types.h> 38 #include <stdio.h> 39 #include <string.h> 40 #include <ctype.h> 41 #include <limits.h> 42 #include <stdlib.h> 43 #include <regex.h> 44 45 #include "utils.h" 46 #include "regex2.h" 47 48 #include "cclass.h" 49 #include "cname.h" 50 51 /* 52 * parse structure, passed up and down to avoid global variables and 53 * other clumsinesses 54 */ 55 struct parse { 56 char *next; /* next character in RE */ 57 char *end; /* end of string (-> NUL normally) */ 58 int error; /* has an error been seen? */ 59 sop *strip; /* malloced strip */ 60 sopno ssize; /* malloced strip size (allocated) */ 61 sopno slen; /* malloced strip length (used) */ 62 int ncsalloc; /* number of csets allocated */ 63 struct re_guts *g; 64 # define NPAREN 10 /* we need to remember () 1-9 for back refs */ 65 sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ 66 sopno pend[NPAREN]; /* -> ) ([0] unused) */ 67 }; 68 69 static void p_ere(struct parse *, int); 70 static void p_ere_exp(struct parse *); 71 static void p_str(struct parse *); 72 static void p_bre(struct parse *, int, int); 73 static int p_simp_re(struct parse *, int); 74 static int p_count(struct parse *); 75 static void p_bracket(struct parse *); 76 static void p_b_term(struct parse *, cset *); 77 static void p_b_cclass(struct parse *, cset *); 78 static void p_b_eclass(struct parse *, cset *); 79 static char p_b_symbol(struct parse *); 80 static char p_b_coll_elem(struct parse *, int); 81 static char othercase(int); 82 static void bothcases(struct parse *, int); 83 static void ordinary(struct parse *, int); 84 static void backslash(struct parse *, int); 85 static void nonnewline(struct parse *); 86 static void repeat(struct parse *, sopno, int, int); 87 static int seterr(struct parse *, int); 88 static cset *allocset(struct parse *); 89 static void freeset(struct parse *, cset *); 90 static int freezeset(struct parse *, cset *); 91 static int firstch(struct parse *, cset *); 92 static int nch(struct parse *, cset *); 93 static void mcadd(struct parse *, cset *, char *); 94 static void mcinvert(struct parse *, cset *); 95 static void mccase(struct parse *, cset *); 96 static int isinsets(struct re_guts *, int); 97 static int samesets(struct re_guts *, int, int); 98 static void categorize(struct parse *, struct re_guts *); 99 static sopno dupl(struct parse *, sopno, sopno); 100 static void doemit(struct parse *, sop, size_t); 101 static void doinsert(struct parse *, sop, size_t, sopno); 102 static void dofwd(struct parse *, sopno, sop); 103 static int enlarge(struct parse *, sopno); 104 static void stripsnug(struct parse *, struct re_guts *); 105 static void findmust(struct parse *, struct re_guts *); 106 static sopno pluscount(struct parse *, struct re_guts *); 107 108 static char nuls[10]; /* place to point scanner in event of error */ 109 110 /* 111 * macros for use with parse structure 112 * BEWARE: these know that the parse structure is named `p' !!! 113 */ 114 #define PEEK() (*p->next) 115 #define PEEK2() (*(p->next+1)) 116 #define MORE() (p->next < p->end) 117 #define MORE2() (p->next+1 < p->end) 118 #define SEE(c) (MORE() && PEEK() == (c)) 119 #define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) 120 #define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) 121 #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) 122 #define NEXT() (p->next++) 123 #define NEXT2() (p->next += 2) 124 #define NEXTn(n) (p->next += (n)) 125 #define GETNEXT() (*p->next++) 126 #define SETERROR(e) seterr(p, (e)) 127 #define REQUIRE(co, e) (void) ((co) || SETERROR(e)) 128 #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) 129 #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) 130 #define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) 131 #define ASTERN(sop, pos) EMIT(sop, HERE()-pos) 132 #define HERE() (p->slen) 133 #define THERE() (p->slen - 1) 134 #define THERETHERE() (p->slen - 2) 135 #define DROP(n) (p->slen -= (n)) 136 137 #ifndef NDEBUG 138 static int never = 0; /* for use in asserts; shuts lint up */ 139 #else 140 #define never 0 /* some <assert.h>s have bugs too */ 141 #endif 142 143 /* 144 - regcomp - interface for parser and compilation 145 */ 146 int /* 0 success, otherwise REG_something */ 147 regcomp(regex_t *preg, const char *pattern, int cflags) 148 { 149 struct parse pa; 150 struct re_guts *g; 151 struct parse *p = &pa; 152 int i; 153 size_t len; 154 #ifdef REDEBUG 155 # define GOODFLAGS(f) (f) 156 #else 157 # define GOODFLAGS(f) ((f)&~REG_DUMP) 158 #endif 159 160 cflags = GOODFLAGS(cflags); 161 if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) 162 return(REG_INVARG); 163 164 if (cflags®_PEND) { 165 if (preg->re_endp < pattern) 166 return(REG_INVARG); 167 len = preg->re_endp - pattern; 168 } else 169 len = strlen((char *)pattern); 170 171 /* do the mallocs early so failure handling is easy */ 172 g = malloc(sizeof(struct re_guts)); 173 if (g == NULL) 174 return(REG_ESPACE); 175 p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ 176 p->strip = reallocarray(NULL, p->ssize, sizeof(sop)); 177 p->slen = 0; 178 if (p->strip == NULL) { 179 free(g); 180 return(REG_ESPACE); 181 } 182 183 /* set things up */ 184 p->g = g; 185 p->next = (char *)pattern; /* convenience; we do not modify it */ 186 p->end = p->next + len; 187 p->error = 0; 188 p->ncsalloc = 0; 189 for (i = 0; i < NPAREN; i++) { 190 p->pbegin[i] = 0; 191 p->pend[i] = 0; 192 } 193 g->csetsize = NC; 194 g->sets = NULL; 195 g->setbits = NULL; 196 g->ncsets = 0; 197 g->cflags = cflags; 198 g->iflags = 0; 199 g->nbol = 0; 200 g->neol = 0; 201 g->must = NULL; 202 g->mlen = 0; 203 g->nsub = 0; 204 g->ncategories = 1; /* category 0 is "everything else" */ 205 g->categories = &g->catspace[-(CHAR_MIN)]; 206 memset(g->catspace, 0, sizeof(g->catspace)); 207 g->backrefs = 0; 208 209 /* do it */ 210 EMIT(OEND, 0); 211 g->firststate = THERE(); 212 if (cflags®_EXTENDED) 213 p_ere(p, OUT); 214 else if (cflags®_NOSPEC) 215 p_str(p); 216 else 217 p_bre(p, OUT, OUT); 218 EMIT(OEND, 0); 219 g->laststate = THERE(); 220 221 /* tidy up loose ends and fill things in */ 222 categorize(p, g); 223 stripsnug(p, g); 224 findmust(p, g); 225 g->nplus = pluscount(p, g); 226 g->magic = MAGIC2; 227 preg->re_nsub = g->nsub; 228 preg->re_g = g; 229 preg->re_magic = MAGIC1; 230 #ifndef REDEBUG 231 /* not debugging, so can't rely on the assert() in regexec() */ 232 if (g->iflags&BAD) 233 SETERROR(REG_ASSERT); 234 #endif 235 236 /* win or lose, we're done */ 237 if (p->error != 0) /* lose */ 238 regfree(preg); 239 return(p->error); 240 } 241 242 /* 243 - p_ere - ERE parser top level, concatenation and alternation 244 */ 245 static void 246 p_ere(struct parse *p, int stop) /* character this ERE should end at */ 247 { 248 char c; 249 sopno prevback; 250 sopno prevfwd; 251 sopno conc; 252 int first = 1; /* is this the first alternative? */ 253 254 for (;;) { 255 /* do a bunch of concatenated expressions */ 256 conc = HERE(); 257 while (MORE() && (c = PEEK()) != '|' && c != stop) 258 p_ere_exp(p); 259 REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ 260 261 if (!EAT('|')) 262 break; /* NOTE BREAK OUT */ 263 264 if (first) { 265 INSERT(OCH_, conc); /* offset is wrong */ 266 prevfwd = conc; 267 prevback = conc; 268 first = 0; 269 } 270 ASTERN(OOR1, prevback); 271 prevback = THERE(); 272 AHEAD(prevfwd); /* fix previous offset */ 273 prevfwd = HERE(); 274 EMIT(OOR2, 0); /* offset is very wrong */ 275 } 276 277 if (!first) { /* tail-end fixups */ 278 AHEAD(prevfwd); 279 ASTERN(O_CH, prevback); 280 } 281 282 assert(!MORE() || SEE(stop)); 283 } 284 285 /* 286 - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op 287 */ 288 static void 289 p_ere_exp(struct parse *p) 290 { 291 char c; 292 sopno pos; 293 int count; 294 int count2; 295 sopno subno; 296 int wascaret = 0; 297 298 assert(MORE()); /* caller should have ensured this */ 299 c = GETNEXT(); 300 301 pos = HERE(); 302 switch (c) { 303 case '(': 304 REQUIRE(MORE(), REG_EPAREN); 305 p->g->nsub++; 306 subno = p->g->nsub; 307 if (subno < NPAREN) 308 p->pbegin[subno] = HERE(); 309 EMIT(OLPAREN, subno); 310 if (!SEE(')')) 311 p_ere(p, ')'); 312 if (subno < NPAREN) { 313 p->pend[subno] = HERE(); 314 assert(p->pend[subno] != 0); 315 } 316 EMIT(ORPAREN, subno); 317 REQUIRE(MORE() && GETNEXT() == ')', REG_EPAREN); 318 break; 319 case '^': 320 EMIT(OBOL, 0); 321 p->g->iflags |= USEBOL; 322 p->g->nbol++; 323 wascaret = 1; 324 break; 325 case '$': 326 EMIT(OEOL, 0); 327 p->g->iflags |= USEEOL; 328 p->g->neol++; 329 break; 330 case '|': 331 SETERROR(REG_EMPTY); 332 break; 333 case '*': 334 case '+': 335 case '?': 336 SETERROR(REG_BADRPT); 337 break; 338 case '.': 339 if (p->g->cflags®_NEWLINE) 340 nonnewline(p); 341 else 342 EMIT(OANY, 0); 343 break; 344 case '[': 345 p_bracket(p); 346 break; 347 case '\\': 348 REQUIRE(MORE(), REG_EESCAPE); 349 c = GETNEXT(); 350 backslash(p, c); 351 break; 352 case '{': /* okay as ordinary except if digit follows */ 353 REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT); 354 /* FALLTHROUGH */ 355 default: 356 ordinary(p, c); 357 break; 358 } 359 360 if (!MORE()) 361 return; 362 c = PEEK(); 363 /* we call { a repetition if followed by a digit */ 364 if (!( c == '*' || c == '+' || c == '?' || 365 (c == '{' && MORE2() && isdigit((uch)PEEK2())) )) 366 return; /* no repetition, we're done */ 367 NEXT(); 368 369 REQUIRE(!wascaret, REG_BADRPT); 370 switch (c) { 371 case '*': /* implemented as +? */ 372 /* this case does not require the (y|) trick, noKLUDGE */ 373 INSERT(OPLUS_, pos); 374 ASTERN(O_PLUS, pos); 375 INSERT(OQUEST_, pos); 376 ASTERN(O_QUEST, pos); 377 break; 378 case '+': 379 INSERT(OPLUS_, pos); 380 ASTERN(O_PLUS, pos); 381 break; 382 case '?': 383 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 384 INSERT(OCH_, pos); /* offset slightly wrong */ 385 ASTERN(OOR1, pos); /* this one's right */ 386 AHEAD(pos); /* fix the OCH_ */ 387 EMIT(OOR2, 0); /* offset very wrong... */ 388 AHEAD(THERE()); /* ...so fix it */ 389 ASTERN(O_CH, THERETHERE()); 390 break; 391 case '{': 392 count = p_count(p); 393 if (EAT(',')) { 394 if (isdigit((uch)PEEK())) { 395 count2 = p_count(p); 396 REQUIRE(count <= count2, REG_BADBR); 397 } else /* single number with comma */ 398 count2 = INFINITY; 399 } else /* just a single number */ 400 count2 = count; 401 repeat(p, pos, count, count2); 402 if (!EAT('}')) { /* error heuristics */ 403 while (MORE() && PEEK() != '}') 404 NEXT(); 405 REQUIRE(MORE(), REG_EBRACE); 406 SETERROR(REG_BADBR); 407 } 408 break; 409 } 410 411 if (!MORE()) 412 return; 413 c = PEEK(); 414 if (!( c == '*' || c == '+' || c == '?' || 415 (c == '{' && MORE2() && isdigit((uch)PEEK2())) ) ) 416 return; 417 SETERROR(REG_BADRPT); 418 } 419 420 /* 421 - p_str - string (no metacharacters) "parser" 422 */ 423 static void 424 p_str(struct parse *p) 425 { 426 REQUIRE(MORE(), REG_EMPTY); 427 while (MORE()) 428 ordinary(p, GETNEXT()); 429 } 430 431 /* 432 - p_bre - BRE parser top level, anchoring and concatenation 433 * Giving end1 as OUT essentially eliminates the end1/end2 check. 434 * 435 * This implementation is a bit of a kludge, in that a trailing $ is first 436 * taken as an ordinary character and then revised to be an anchor. The 437 * only undesirable side effect is that '$' gets included as a character 438 * category in such cases. This is fairly harmless; not worth fixing. 439 * The amount of lookahead needed to avoid this kludge is excessive. 440 */ 441 static void 442 p_bre(struct parse *p, 443 int end1, /* first terminating character */ 444 int end2) /* second terminating character */ 445 { 446 sopno start = HERE(); 447 int first = 1; /* first subexpression? */ 448 int wasdollar = 0; 449 450 if (EAT('^')) { 451 EMIT(OBOL, 0); 452 p->g->iflags |= USEBOL; 453 p->g->nbol++; 454 } 455 while (MORE() && !SEETWO(end1, end2)) { 456 wasdollar = p_simp_re(p, first); 457 first = 0; 458 } 459 if (wasdollar) { /* oops, that was a trailing anchor */ 460 DROP(1); 461 EMIT(OEOL, 0); 462 p->g->iflags |= USEEOL; 463 p->g->neol++; 464 } 465 466 REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ 467 } 468 469 /* 470 - p_simp_re - parse a simple RE, an atom possibly followed by a repetition 471 */ 472 static int /* was the simple RE an unbackslashed $? */ 473 p_simp_re(struct parse *p, 474 int starordinary) /* is a leading * an ordinary character? */ 475 { 476 int c; 477 int count; 478 int count2; 479 sopno pos; 480 int i; 481 sopno subno; 482 # define BACKSL (1<<CHAR_BIT) 483 484 pos = HERE(); /* repetion op, if any, covers from here */ 485 486 assert(MORE()); /* caller should have ensured this */ 487 c = GETNEXT(); 488 if (c == '\\') { 489 REQUIRE(MORE(), REG_EESCAPE); 490 c = BACKSL | GETNEXT(); 491 } 492 switch (c) { 493 case '.': 494 if (p->g->cflags®_NEWLINE) 495 nonnewline(p); 496 else 497 EMIT(OANY, 0); 498 break; 499 case '[': 500 p_bracket(p); 501 break; 502 case BACKSL|'<': 503 EMIT(OBOW, 0); 504 break; 505 case BACKSL|'>': 506 EMIT(OEOW, 0); 507 break; 508 case BACKSL|'{': 509 SETERROR(REG_BADRPT); 510 break; 511 case BACKSL|'(': 512 p->g->nsub++; 513 subno = p->g->nsub; 514 if (subno < NPAREN) 515 p->pbegin[subno] = HERE(); 516 EMIT(OLPAREN, subno); 517 /* the MORE here is an error heuristic */ 518 if (MORE() && !SEETWO('\\', ')')) 519 p_bre(p, '\\', ')'); 520 if (subno < NPAREN) { 521 p->pend[subno] = HERE(); 522 assert(p->pend[subno] != 0); 523 } 524 EMIT(ORPAREN, subno); 525 REQUIRE(EATTWO('\\', ')'), REG_EPAREN); 526 break; 527 case BACKSL|')': /* should not get here -- must be user */ 528 case BACKSL|'}': 529 SETERROR(REG_EPAREN); 530 break; 531 case BACKSL|'1': 532 case BACKSL|'2': 533 case BACKSL|'3': 534 case BACKSL|'4': 535 case BACKSL|'5': 536 case BACKSL|'6': 537 case BACKSL|'7': 538 case BACKSL|'8': 539 case BACKSL|'9': 540 i = (c&~BACKSL) - '0'; 541 assert(i < NPAREN); 542 if (p->pend[i] != 0) { 543 assert(i <= p->g->nsub); 544 EMIT(OBACK_, i); 545 assert(p->pbegin[i] != 0); 546 assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); 547 assert(OP(p->strip[p->pend[i]]) == ORPAREN); 548 (void) dupl(p, p->pbegin[i]+1, p->pend[i]); 549 EMIT(O_BACK, i); 550 } else 551 SETERROR(REG_ESUBREG); 552 p->g->backrefs = 1; 553 break; 554 case '*': 555 REQUIRE(starordinary, REG_BADRPT); 556 /* FALLTHROUGH */ 557 default: 558 ordinary(p, (char)c); 559 break; 560 } 561 562 if (EAT('*')) { /* implemented as +? */ 563 /* this case does not require the (y|) trick, noKLUDGE */ 564 INSERT(OPLUS_, pos); 565 ASTERN(O_PLUS, pos); 566 INSERT(OQUEST_, pos); 567 ASTERN(O_QUEST, pos); 568 } else if (EATTWO('\\', '{')) { 569 count = p_count(p); 570 if (EAT(',')) { 571 if (MORE() && isdigit((uch)PEEK())) { 572 count2 = p_count(p); 573 REQUIRE(count <= count2, REG_BADBR); 574 } else /* single number with comma */ 575 count2 = INFINITY; 576 } else /* just a single number */ 577 count2 = count; 578 repeat(p, pos, count, count2); 579 if (!EATTWO('\\', '}')) { /* error heuristics */ 580 while (MORE() && !SEETWO('\\', '}')) 581 NEXT(); 582 REQUIRE(MORE(), REG_EBRACE); 583 SETERROR(REG_BADBR); 584 } 585 } else if (c == '$') /* $ (but not \$) ends it */ 586 return(1); 587 588 return(0); 589 } 590 591 /* 592 - p_count - parse a repetition count 593 */ 594 static int /* the value */ 595 p_count(struct parse *p) 596 { 597 int count = 0; 598 int ndigits = 0; 599 600 while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) { 601 count = count*10 + (GETNEXT() - '0'); 602 ndigits++; 603 } 604 605 REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); 606 return(count); 607 } 608 609 /* 610 - p_bracket - parse a bracketed character list 611 * 612 * Note a significant property of this code: if the allocset() did SETERROR, 613 * no set operations are done. 614 */ 615 static void 616 p_bracket(struct parse *p) 617 { 618 cset *cs; 619 int invert = 0; 620 621 /* Dept of Truly Sickening Special-Case Kludges */ 622 if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { 623 EMIT(OBOW, 0); 624 NEXTn(6); 625 return; 626 } 627 if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { 628 EMIT(OEOW, 0); 629 NEXTn(6); 630 return; 631 } 632 633 if ((cs = allocset(p)) == NULL) { 634 /* allocset did set error status in p */ 635 return; 636 } 637 638 if (EAT('^')) 639 invert++; /* make note to invert set at end */ 640 if (EAT(']')) 641 CHadd(cs, ']'); 642 else if (EAT('-')) 643 CHadd(cs, '-'); 644 while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) 645 p_b_term(p, cs); 646 if (EAT('-')) 647 CHadd(cs, '-'); 648 REQUIRE(MORE() && GETNEXT() == ']', REG_EBRACK); 649 650 if (p->error != 0) { /* don't mess things up further */ 651 freeset(p, cs); 652 return; 653 } 654 655 if (p->g->cflags®_ICASE) { 656 int i; 657 int ci; 658 659 for (i = p->g->csetsize - 1; i >= 0; i--) 660 if (CHIN(cs, i) && isalpha(i)) { 661 ci = othercase(i); 662 if (ci != i) 663 CHadd(cs, ci); 664 } 665 if (cs->multis != NULL) 666 mccase(p, cs); 667 } 668 if (invert) { 669 int i; 670 671 for (i = p->g->csetsize - 1; i >= 0; i--) 672 if (CHIN(cs, i)) 673 CHsub(cs, i); 674 else 675 CHadd(cs, i); 676 if (p->g->cflags®_NEWLINE) 677 CHsub(cs, '\n'); 678 if (cs->multis != NULL) 679 mcinvert(p, cs); 680 } 681 682 assert(cs->multis == NULL); /* xxx */ 683 684 if (nch(p, cs) == 1) { /* optimize singleton sets */ 685 ordinary(p, firstch(p, cs)); 686 freeset(p, cs); 687 } else 688 EMIT(OANYOF, freezeset(p, cs)); 689 } 690 691 /* 692 - p_b_term - parse one term of a bracketed character list 693 */ 694 static void 695 p_b_term(struct parse *p, cset *cs) 696 { 697 char c; 698 char start, finish; 699 int i; 700 701 /* classify what we've got */ 702 switch ((MORE()) ? PEEK() : '\0') { 703 case '[': 704 c = (MORE2()) ? PEEK2() : '\0'; 705 break; 706 case '-': 707 SETERROR(REG_ERANGE); 708 return; /* NOTE RETURN */ 709 break; 710 default: 711 c = '\0'; 712 break; 713 } 714 715 switch (c) { 716 case ':': /* character class */ 717 NEXT2(); 718 REQUIRE(MORE(), REG_EBRACK); 719 c = PEEK(); 720 REQUIRE(c != '-' && c != ']', REG_ECTYPE); 721 p_b_cclass(p, cs); 722 REQUIRE(MORE(), REG_EBRACK); 723 REQUIRE(EATTWO(':', ']'), REG_ECTYPE); 724 break; 725 case '=': /* equivalence class */ 726 NEXT2(); 727 REQUIRE(MORE(), REG_EBRACK); 728 c = PEEK(); 729 REQUIRE(c != '-' && c != ']', REG_ECOLLATE); 730 p_b_eclass(p, cs); 731 REQUIRE(MORE(), REG_EBRACK); 732 REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); 733 break; 734 default: /* symbol, ordinary character, or range */ 735 /* xxx revision needed for multichar stuff */ 736 start = p_b_symbol(p); 737 if (SEE('-') && MORE2() && PEEK2() != ']') { 738 /* range */ 739 NEXT(); 740 if (EAT('-')) 741 finish = '-'; 742 else 743 finish = p_b_symbol(p); 744 } else 745 finish = start; 746 /* xxx what about signed chars here... */ 747 REQUIRE(start <= finish, REG_ERANGE); 748 for (i = start; i <= finish; i++) 749 CHadd(cs, i); 750 break; 751 } 752 } 753 754 /* 755 - p_b_cclass - parse a character-class name and deal with it 756 */ 757 static void 758 p_b_cclass(struct parse *p, cset *cs) 759 { 760 char *sp = p->next; 761 struct cclass *cp; 762 size_t len; 763 char *u; 764 char c; 765 766 while (MORE() && isalpha((uch)PEEK())) 767 NEXT(); 768 len = p->next - sp; 769 for (cp = cclasses; cp->name != NULL; cp++) 770 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') 771 break; 772 if (cp->name == NULL) { 773 /* oops, didn't find it */ 774 SETERROR(REG_ECTYPE); 775 return; 776 } 777 778 u = cp->chars; 779 while ((c = *u++) != '\0') 780 CHadd(cs, c); 781 for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) 782 MCadd(p, cs, u); 783 } 784 785 /* 786 - p_b_eclass - parse an equivalence-class name and deal with it 787 * 788 * This implementation is incomplete. xxx 789 */ 790 static void 791 p_b_eclass(struct parse *p, cset *cs) 792 { 793 char c; 794 795 c = p_b_coll_elem(p, '='); 796 CHadd(cs, c); 797 } 798 799 /* 800 - p_b_symbol - parse a character or [..]ed multicharacter collating symbol 801 */ 802 static char /* value of symbol */ 803 p_b_symbol(struct parse *p) 804 { 805 char value; 806 807 REQUIRE(MORE(), REG_EBRACK); 808 if (!EATTWO('[', '.')) 809 return(GETNEXT()); 810 811 /* collating symbol */ 812 value = p_b_coll_elem(p, '.'); 813 REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); 814 return(value); 815 } 816 817 /* 818 - p_b_coll_elem - parse a collating-element name and look it up 819 */ 820 static char /* value of collating element */ 821 p_b_coll_elem(struct parse *p, 822 int endc) /* name ended by endc,']' */ 823 { 824 char *sp = p->next; 825 struct cname *cp; 826 size_t len; 827 828 while (MORE() && !SEETWO(endc, ']')) 829 NEXT(); 830 if (!MORE()) { 831 SETERROR(REG_EBRACK); 832 return(0); 833 } 834 len = p->next - sp; 835 for (cp = cnames; cp->name != NULL; cp++) 836 if (strncmp(cp->name, sp, len) == 0 && strlen(cp->name) == len) 837 return(cp->code); /* known name */ 838 if (len == 1) 839 return(*sp); /* single character */ 840 SETERROR(REG_ECOLLATE); /* neither */ 841 return(0); 842 } 843 844 /* 845 - othercase - return the case counterpart of an alphabetic 846 */ 847 static char /* if no counterpart, return ch */ 848 othercase(int ch) 849 { 850 ch = (uch)ch; 851 assert(isalpha(ch)); 852 if (isupper(ch)) 853 return ((uch)tolower(ch)); 854 else if (islower(ch)) 855 return ((uch)toupper(ch)); 856 else /* peculiar, but could happen */ 857 return(ch); 858 } 859 860 /* 861 - bothcases - emit a dualcase version of a two-case character 862 * 863 * Boy, is this implementation ever a kludge... 864 */ 865 static void 866 bothcases(struct parse *p, int ch) 867 { 868 char *oldnext = p->next; 869 char *oldend = p->end; 870 char bracket[3]; 871 872 ch = (uch)ch; 873 assert(othercase(ch) != ch); /* p_bracket() would recurse */ 874 p->next = bracket; 875 p->end = bracket+2; 876 bracket[0] = ch; 877 bracket[1] = ']'; 878 bracket[2] = '\0'; 879 p_bracket(p); 880 assert(p->next == bracket+2); 881 p->next = oldnext; 882 p->end = oldend; 883 } 884 885 /* 886 - ordinary - emit an ordinary character 887 */ 888 static void 889 ordinary(struct parse *p, int ch) 890 { 891 cat_t *cap = p->g->categories; 892 893 if ((p->g->cflags®_ICASE) && isalpha((uch)ch) && othercase(ch) != ch) 894 bothcases(p, ch); 895 else { 896 EMIT(OCHAR, (uch)ch); 897 if (cap[ch] == 0) 898 cap[ch] = p->g->ncategories++; 899 } 900 } 901 902 /* 903 * do something magic with this character, but only if it's extra magic 904 */ 905 static void 906 backslash(struct parse *p, int ch) 907 { 908 switch (ch) { 909 case '<': 910 EMIT(OBOW, 0); 911 break; 912 case '>': 913 EMIT(OEOW, 0); 914 break; 915 default: 916 ordinary(p, ch); 917 break; 918 } 919 } 920 921 /* 922 - nonnewline - emit REG_NEWLINE version of OANY 923 * 924 * Boy, is this implementation ever a kludge... 925 */ 926 static void 927 nonnewline(struct parse *p) 928 { 929 char *oldnext = p->next; 930 char *oldend = p->end; 931 char bracket[4]; 932 933 p->next = bracket; 934 p->end = bracket+3; 935 bracket[0] = '^'; 936 bracket[1] = '\n'; 937 bracket[2] = ']'; 938 bracket[3] = '\0'; 939 p_bracket(p); 940 assert(p->next == bracket+3); 941 p->next = oldnext; 942 p->end = oldend; 943 } 944 945 /* 946 - repeat - generate code for a bounded repetition, recursively if needed 947 */ 948 static void 949 repeat(struct parse *p, 950 sopno start, /* operand from here to end of strip */ 951 int from, /* repeated from this number */ 952 int to) /* to this number of times (maybe INFINITY) */ 953 { 954 sopno finish = HERE(); 955 # define N 2 956 # define INF 3 957 # define REP(f, t) ((f)*8 + (t)) 958 # define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) 959 sopno copy; 960 961 if (p->error != 0) /* head off possible runaway recursion */ 962 return; 963 964 assert(from <= to); 965 966 switch (REP(MAP(from), MAP(to))) { 967 case REP(0, 0): /* must be user doing this */ 968 DROP(finish-start); /* drop the operand */ 969 break; 970 case REP(0, 1): /* as x{1,1}? */ 971 case REP(0, N): /* as x{1,n}? */ 972 case REP(0, INF): /* as x{1,}? */ 973 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 974 INSERT(OCH_, start); /* offset is wrong... */ 975 repeat(p, start+1, 1, to); 976 ASTERN(OOR1, start); 977 AHEAD(start); /* ... fix it */ 978 EMIT(OOR2, 0); 979 AHEAD(THERE()); 980 ASTERN(O_CH, THERETHERE()); 981 break; 982 case REP(1, 1): /* trivial case */ 983 /* done */ 984 break; 985 case REP(1, N): /* as x?x{1,n-1} */ 986 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ 987 INSERT(OCH_, start); 988 ASTERN(OOR1, start); 989 AHEAD(start); 990 EMIT(OOR2, 0); /* offset very wrong... */ 991 AHEAD(THERE()); /* ...so fix it */ 992 ASTERN(O_CH, THERETHERE()); 993 copy = dupl(p, start+1, finish+1); 994 assert(copy == finish+4); 995 repeat(p, copy, 1, to-1); 996 break; 997 case REP(1, INF): /* as x+ */ 998 INSERT(OPLUS_, start); 999 ASTERN(O_PLUS, start); 1000 break; 1001 case REP(N, N): /* as xx{m-1,n-1} */ 1002 copy = dupl(p, start, finish); 1003 repeat(p, copy, from-1, to-1); 1004 break; 1005 case REP(N, INF): /* as xx{n-1,INF} */ 1006 copy = dupl(p, start, finish); 1007 repeat(p, copy, from-1, to); 1008 break; 1009 default: /* "can't happen" */ 1010 SETERROR(REG_ASSERT); /* just in case */ 1011 break; 1012 } 1013 } 1014 1015 /* 1016 - seterr - set an error condition 1017 */ 1018 static int /* useless but makes type checking happy */ 1019 seterr(struct parse *p, int e) 1020 { 1021 if (p->error == 0) /* keep earliest error condition */ 1022 p->error = e; 1023 p->next = nuls; /* try to bring things to a halt */ 1024 p->end = nuls; 1025 return(0); /* make the return value well-defined */ 1026 } 1027 1028 /* 1029 - allocset - allocate a set of characters for [] 1030 */ 1031 static cset * 1032 allocset(struct parse *p) 1033 { 1034 int no = p->g->ncsets++; 1035 size_t nc; 1036 size_t nbytes; 1037 cset *cs; 1038 size_t css = (size_t)p->g->csetsize; 1039 int i; 1040 1041 if (no >= p->ncsalloc) { /* need another column of space */ 1042 void *ptr; 1043 1044 p->ncsalloc += CHAR_BIT; 1045 nc = p->ncsalloc; 1046 assert(nc % CHAR_BIT == 0); 1047 1048 ptr = reallocarray(p->g->sets, nc, sizeof(cset)); 1049 if (ptr == NULL) 1050 goto nomem; 1051 p->g->sets = ptr; 1052 1053 ptr = reallocarray(p->g->setbits, nc / CHAR_BIT, css); 1054 if (ptr == NULL) 1055 goto nomem; 1056 nbytes = (nc / CHAR_BIT) * css; 1057 p->g->setbits = ptr; 1058 1059 for (i = 0; i < no; i++) 1060 p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); 1061 1062 (void) memset((char *)p->g->setbits + (nbytes - css), 0, css); 1063 } 1064 /* XXX should not happen */ 1065 if (p->g->sets == NULL || p->g->setbits == NULL) 1066 goto nomem; 1067 1068 cs = &p->g->sets[no]; 1069 cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); 1070 cs->mask = 1 << ((no) % CHAR_BIT); 1071 cs->hash = 0; 1072 cs->smultis = 0; 1073 cs->multis = NULL; 1074 1075 return(cs); 1076 nomem: 1077 free(p->g->sets); 1078 p->g->sets = NULL; 1079 free(p->g->setbits); 1080 p->g->setbits = NULL; 1081 1082 SETERROR(REG_ESPACE); 1083 /* caller's responsibility not to do set ops */ 1084 return(NULL); 1085 } 1086 1087 /* 1088 - freeset - free a now-unused set 1089 */ 1090 static void 1091 freeset(struct parse *p, cset *cs) 1092 { 1093 int i; 1094 cset *top = &p->g->sets[p->g->ncsets]; 1095 size_t css = (size_t)p->g->csetsize; 1096 1097 for (i = 0; i < css; i++) 1098 CHsub(cs, i); 1099 if (cs == top-1) /* recover only the easy case */ 1100 p->g->ncsets--; 1101 } 1102 1103 /* 1104 - freezeset - final processing on a set of characters 1105 * 1106 * The main task here is merging identical sets. This is usually a waste 1107 * of time (although the hash code minimizes the overhead), but can win 1108 * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash 1109 * is done using addition rather than xor -- all ASCII [aA] sets xor to 1110 * the same value! 1111 */ 1112 static int /* set number */ 1113 freezeset(struct parse *p, cset *cs) 1114 { 1115 uch h = cs->hash; 1116 int i; 1117 cset *top = &p->g->sets[p->g->ncsets]; 1118 cset *cs2; 1119 size_t css = (size_t)p->g->csetsize; 1120 1121 /* look for an earlier one which is the same */ 1122 for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) 1123 if (cs2->hash == h && cs2 != cs) { 1124 /* maybe */ 1125 for (i = 0; i < css; i++) 1126 if (!!CHIN(cs2, i) != !!CHIN(cs, i)) 1127 break; /* no */ 1128 if (i == css) 1129 break; /* yes */ 1130 } 1131 1132 if (cs2 < top) { /* found one */ 1133 freeset(p, cs); 1134 cs = cs2; 1135 } 1136 1137 return((int)(cs - p->g->sets)); 1138 } 1139 1140 /* 1141 - firstch - return first character in a set (which must have at least one) 1142 */ 1143 static int /* character; there is no "none" value */ 1144 firstch(struct parse *p, cset *cs) 1145 { 1146 int i; 1147 size_t css = (size_t)p->g->csetsize; 1148 1149 for (i = 0; i < css; i++) 1150 if (CHIN(cs, i)) 1151 return((char)i); 1152 assert(never); 1153 return(0); /* arbitrary */ 1154 } 1155 1156 /* 1157 - nch - number of characters in a set 1158 */ 1159 static int 1160 nch(struct parse *p, cset *cs) 1161 { 1162 int i; 1163 size_t css = (size_t)p->g->csetsize; 1164 int n = 0; 1165 1166 for (i = 0; i < css; i++) 1167 if (CHIN(cs, i)) 1168 n++; 1169 return(n); 1170 } 1171 1172 /* 1173 - mcadd - add a collating element to a cset 1174 */ 1175 static void 1176 mcadd( struct parse *p, cset *cs, char *cp) 1177 { 1178 size_t oldend = cs->smultis; 1179 void *np; 1180 1181 cs->smultis += strlen(cp) + 1; 1182 np = realloc(cs->multis, cs->smultis); 1183 if (np == NULL) { 1184 free(cs->multis); 1185 cs->multis = NULL; 1186 SETERROR(REG_ESPACE); 1187 return; 1188 } 1189 cs->multis = np; 1190 1191 strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1); 1192 } 1193 1194 /* 1195 - mcinvert - invert the list of collating elements in a cset 1196 * 1197 * This would have to know the set of possibilities. Implementation 1198 * is deferred. 1199 */ 1200 static void 1201 mcinvert(struct parse *p, cset *cs) 1202 { 1203 assert(cs->multis == NULL); /* xxx */ 1204 } 1205 1206 /* 1207 - mccase - add case counterparts of the list of collating elements in a cset 1208 * 1209 * This would have to know the set of possibilities. Implementation 1210 * is deferred. 1211 */ 1212 static void 1213 mccase(struct parse *p, cset *cs) 1214 { 1215 assert(cs->multis == NULL); /* xxx */ 1216 } 1217 1218 /* 1219 - isinsets - is this character in any sets? 1220 */ 1221 static int /* predicate */ 1222 isinsets(struct re_guts *g, int c) 1223 { 1224 uch *col; 1225 int i; 1226 int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 1227 unsigned uc = (uch)c; 1228 1229 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 1230 if (col[uc] != 0) 1231 return(1); 1232 return(0); 1233 } 1234 1235 /* 1236 - samesets - are these two characters in exactly the same sets? 1237 */ 1238 static int /* predicate */ 1239 samesets(struct re_guts *g, int c1, int c2) 1240 { 1241 uch *col; 1242 int i; 1243 int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; 1244 unsigned uc1 = (uch)c1; 1245 unsigned uc2 = (uch)c2; 1246 1247 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) 1248 if (col[uc1] != col[uc2]) 1249 return(0); 1250 return(1); 1251 } 1252 1253 /* 1254 - categorize - sort out character categories 1255 */ 1256 static void 1257 categorize(struct parse *p, struct re_guts *g) 1258 { 1259 cat_t *cats = g->categories; 1260 int c; 1261 int c2; 1262 cat_t cat; 1263 1264 /* avoid making error situations worse */ 1265 if (p->error != 0) 1266 return; 1267 1268 for (c = CHAR_MIN; c <= CHAR_MAX; c++) 1269 if (cats[c] == 0 && isinsets(g, c)) { 1270 cat = g->ncategories++; 1271 cats[c] = cat; 1272 for (c2 = c+1; c2 <= CHAR_MAX; c2++) 1273 if (cats[c2] == 0 && samesets(g, c, c2)) 1274 cats[c2] = cat; 1275 } 1276 } 1277 1278 /* 1279 - dupl - emit a duplicate of a bunch of sops 1280 */ 1281 static sopno /* start of duplicate */ 1282 dupl(struct parse *p, 1283 sopno start, /* from here */ 1284 sopno finish) /* to this less one */ 1285 { 1286 sopno ret = HERE(); 1287 sopno len = finish - start; 1288 1289 assert(finish >= start); 1290 if (len == 0) 1291 return(ret); 1292 if (!enlarge(p, p->ssize + len)) /* this many unexpected additions */ 1293 return(ret); 1294 (void) memcpy(p->strip + p->slen, p->strip + start, len * sizeof(sop)); 1295 p->slen += len; 1296 return(ret); 1297 } 1298 1299 /* 1300 - doemit - emit a strip operator 1301 * 1302 * It might seem better to implement this as a macro with a function as 1303 * hard-case backup, but it's just too big and messy unless there are 1304 * some changes to the data structures. Maybe later. 1305 */ 1306 static void 1307 doemit(struct parse *p, sop op, size_t opnd) 1308 { 1309 /* avoid making error situations worse */ 1310 if (p->error != 0) 1311 return; 1312 1313 /* deal with oversize operands ("can't happen", more or less) */ 1314 assert(opnd < 1<<OPSHIFT); 1315 1316 /* deal with undersized strip */ 1317 if (p->slen >= p->ssize) 1318 if (!enlarge(p, (p->ssize+1) / 2 * 3)) /* +50% */ 1319 return; 1320 1321 /* finally, it's all reduced to the easy case */ 1322 p->strip[p->slen++] = SOP(op, opnd); 1323 } 1324 1325 /* 1326 - doinsert - insert a sop into the strip 1327 */ 1328 static void 1329 doinsert(struct parse *p, sop op, size_t opnd, sopno pos) 1330 { 1331 sopno sn; 1332 sop s; 1333 int i; 1334 1335 /* avoid making error situations worse */ 1336 if (p->error != 0) 1337 return; 1338 1339 sn = HERE(); 1340 EMIT(op, opnd); /* do checks, ensure space */ 1341 assert(HERE() == sn+1); 1342 s = p->strip[sn]; 1343 1344 /* adjust paren pointers */ 1345 assert(pos > 0); 1346 for (i = 1; i < NPAREN; i++) { 1347 if (p->pbegin[i] >= pos) { 1348 p->pbegin[i]++; 1349 } 1350 if (p->pend[i] >= pos) { 1351 p->pend[i]++; 1352 } 1353 } 1354 1355 memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], 1356 (HERE()-pos-1)*sizeof(sop)); 1357 p->strip[pos] = s; 1358 } 1359 1360 /* 1361 - dofwd - complete a forward reference 1362 */ 1363 static void 1364 dofwd(struct parse *p, sopno pos, sop value) 1365 { 1366 /* avoid making error situations worse */ 1367 if (p->error != 0) 1368 return; 1369 1370 assert(value < 1<<OPSHIFT); 1371 p->strip[pos] = OP(p->strip[pos]) | value; 1372 } 1373 1374 /* 1375 - enlarge - enlarge the strip 1376 */ 1377 static int 1378 enlarge(struct parse *p, sopno size) 1379 { 1380 sop *sp; 1381 1382 if (p->ssize >= size) 1383 return 1; 1384 1385 sp = reallocarray(p->strip, size, sizeof(sop)); 1386 if (sp == NULL) { 1387 SETERROR(REG_ESPACE); 1388 return 0; 1389 } 1390 p->strip = sp; 1391 p->ssize = size; 1392 return 1; 1393 } 1394 1395 /* 1396 - stripsnug - compact the strip 1397 */ 1398 static void 1399 stripsnug(struct parse *p, struct re_guts *g) 1400 { 1401 g->nstates = p->slen; 1402 g->strip = reallocarray(p->strip, p->slen, sizeof(sop)); 1403 if (g->strip == NULL) { 1404 SETERROR(REG_ESPACE); 1405 g->strip = p->strip; 1406 } 1407 } 1408 1409 /* 1410 - findmust - fill in must and mlen with longest mandatory literal string 1411 * 1412 * This algorithm could do fancy things like analyzing the operands of | 1413 * for common subsequences. Someday. This code is simple and finds most 1414 * of the interesting cases. 1415 * 1416 * Note that must and mlen got initialized during setup. 1417 */ 1418 static void 1419 findmust(struct parse *p, struct re_guts *g) 1420 { 1421 sop *scan; 1422 sop *start; /* start initialized in the default case, after that */ 1423 sop *newstart; /* newstart was initialized in the OCHAR case */ 1424 sopno newlen; 1425 sop s; 1426 char *cp; 1427 sopno i; 1428 1429 /* avoid making error situations worse */ 1430 if (p->error != 0) 1431 return; 1432 1433 /* find the longest OCHAR sequence in strip */ 1434 newlen = 0; 1435 scan = g->strip + 1; 1436 do { 1437 s = *scan++; 1438 switch (OP(s)) { 1439 case OCHAR: /* sequence member */ 1440 if (newlen == 0) /* new sequence */ 1441 newstart = scan - 1; 1442 newlen++; 1443 break; 1444 case OPLUS_: /* things that don't break one */ 1445 case OLPAREN: 1446 case ORPAREN: 1447 break; 1448 case OQUEST_: /* things that must be skipped */ 1449 case OCH_: 1450 scan--; 1451 do { 1452 scan += OPND(s); 1453 s = *scan; 1454 /* assert() interferes w debug printouts */ 1455 if (OP(s) != O_QUEST && OP(s) != O_CH && 1456 OP(s) != OOR2) { 1457 g->iflags |= BAD; 1458 return; 1459 } 1460 } while (OP(s) != O_QUEST && OP(s) != O_CH); 1461 /* fallthrough */ 1462 default: /* things that break a sequence */ 1463 if (newlen > g->mlen) { /* ends one */ 1464 start = newstart; 1465 g->mlen = newlen; 1466 } 1467 newlen = 0; 1468 break; 1469 } 1470 } while (OP(s) != OEND); 1471 1472 if (g->mlen == 0) /* there isn't one */ 1473 return; 1474 1475 /* turn it into a character string */ 1476 g->must = malloc((size_t)g->mlen + 1); 1477 if (g->must == NULL) { /* argh; just forget it */ 1478 g->mlen = 0; 1479 return; 1480 } 1481 cp = g->must; 1482 scan = start; 1483 for (i = g->mlen; i > 0; i--) { 1484 while (OP(s = *scan++) != OCHAR) 1485 continue; 1486 assert(cp < g->must + g->mlen); 1487 *cp++ = (char)OPND(s); 1488 } 1489 assert(cp == g->must + g->mlen); 1490 *cp++ = '\0'; /* just on general principles */ 1491 } 1492 1493 /* 1494 - pluscount - count + nesting 1495 */ 1496 static sopno /* nesting depth */ 1497 pluscount(struct parse *p, struct re_guts *g) 1498 { 1499 sop *scan; 1500 sop s; 1501 sopno plusnest = 0; 1502 sopno maxnest = 0; 1503 1504 if (p->error != 0) 1505 return(0); /* there may not be an OEND */ 1506 1507 scan = g->strip + 1; 1508 do { 1509 s = *scan++; 1510 switch (OP(s)) { 1511 case OPLUS_: 1512 plusnest++; 1513 break; 1514 case O_PLUS: 1515 if (plusnest > maxnest) 1516 maxnest = plusnest; 1517 plusnest--; 1518 break; 1519 } 1520 } while (OP(s) != OEND); 1521 if (plusnest != 0) 1522 g->iflags |= BAD; 1523 return(maxnest); 1524 } 1525