1 /* 2 * egrep -- print lines containing (or not containing) a regular expression 3 * 4 * status returns: 5 * 0 - ok, and some matches 6 * 1 - ok, but no matches 7 * 2 - some error 8 */ 9 %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST 10 %left OR 11 %left CHAR DOT CCL NCCL '(' 12 %left CAT 13 %left STAR PLUS QUEST 14 15 %{ 16 static char *sccsid = "@(#)old.egrep.y 4.5 (Berkeley) 10/03/87"; 17 #include <stdio.h> 18 #include <sys/types.h> 19 #include <sys/stat.h> 20 #include <ctype.h> 21 22 #define BLKSIZE 8192 23 #define MAXLIN 350 24 #define MAXPOS 4000 25 #define NCHARS 128 26 #define NSTATES 128 27 #define FINAL -1 28 char gotofn[NSTATES][NCHARS]; 29 char cmap[256]; 30 int state[NSTATES]; 31 char out[NSTATES]; 32 int line = 1; 33 int name[MAXLIN]; 34 int left[MAXLIN]; 35 int right[MAXLIN]; 36 int parent[MAXLIN]; 37 int foll[MAXLIN]; 38 int positions[MAXPOS]; 39 char chars[MAXLIN]; 40 int nxtpos; 41 int nxtchar = 0; 42 int tmpstat[MAXLIN]; 43 int initstat[MAXLIN]; 44 int xstate; 45 int count; 46 int icount; 47 char *input; 48 FILE *exprfile; 49 50 long lnum; 51 int bflag; 52 int cflag; 53 int fflag; 54 int iflag; 55 int lflag; 56 int nflag; 57 int hflag = 1; 58 int sflag; 59 int vflag; 60 int retcode = 0; 61 int nfile; 62 int blkno; 63 long tln; 64 int nsucc; 65 66 int f; 67 char *fname; 68 %} 69 70 %% 71 s: t 72 ={ unary(FINAL, $1); 73 line--; 74 } 75 ; 76 t: b r 77 ={ $$ = node(CAT, $1, $2); } 78 | OR b r OR 79 ={ $$ = node(CAT, $2, $3); } 80 | OR b r 81 ={ $$ = node(CAT, $2, $3); } 82 | b r OR 83 ={ $$ = node(CAT, $1, $2); } 84 ; 85 b: 86 ={ $$ = enter(DOT); 87 $$ = unary(STAR, $$); } 88 ; 89 r: CHAR 90 ={ $$ = enter($1); } 91 | DOT 92 ={ $$ = enter(DOT); } 93 | CCL 94 ={ $$ = cclenter(CCL); } 95 | NCCL 96 ={ $$ = cclenter(NCCL); } 97 ; 98 99 r: r OR r 100 ={ $$ = node(OR, $1, $3); } 101 | r r %prec CAT 102 ={ $$ = node(CAT, $1, $2); } 103 | r STAR 104 ={ $$ = unary(STAR, $1); } 105 | r PLUS 106 ={ $$ = unary(PLUS, $1); } 107 | r QUEST 108 ={ $$ = unary(QUEST, $1); } 109 | '(' r ')' 110 ={ $$ = $2; } 111 | error 112 ; 113 114 %% 115 yyerror(s) { 116 fprintf(stderr, "egrep: %s\n", s); 117 exit(2); 118 } 119 120 yylex() { 121 extern int yylval; 122 int cclcnt, x; 123 register char c, d; 124 switch(c = nextch()) { 125 case '$': 126 case '^': c = '\n'; 127 goto defchar; 128 case '|': return (OR); 129 case '*': return (STAR); 130 case '+': return (PLUS); 131 case '?': return (QUEST); 132 case '(': return (c); 133 case ')': return (c); 134 case '.': return (DOT); 135 case '\0': return (0); 136 case '\n': return (OR); 137 case '[': 138 x = CCL; 139 cclcnt = 0; 140 count = nxtchar++; 141 if ((c = nextch()) == '^') { 142 x = NCCL; 143 c = nextch(); 144 } 145 do { 146 if (c == '\0') synerror(); 147 if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) { 148 if ((d = nextch()) != 0) { 149 c = chars[nxtchar-1]; 150 while (c < d) { 151 if (nxtchar >= MAXLIN) overflo(); 152 chars[nxtchar++] = ++c; 153 cclcnt++; 154 } 155 continue; 156 } 157 } 158 if (nxtchar >= MAXLIN) overflo(); 159 chars[nxtchar++] = c; 160 cclcnt++; 161 } while ((c = nextch()) != ']'); 162 chars[count] = cclcnt; 163 return (x); 164 case '\\': 165 if ((c = nextch()) == '\0') synerror(); 166 defchar: 167 default: yylval = c; return (CHAR); 168 } 169 } 170 nextch() { 171 register int c; 172 if (fflag) { 173 if ((c = getc(exprfile)) == EOF) { 174 fclose(exprfile); 175 return(0); 176 } 177 } 178 else c = *input++; 179 return(c); 180 } 181 182 synerror() { 183 fprintf(stderr, "egrep: syntax error\n"); 184 exit(2); 185 } 186 187 enter(x) int x; { 188 if(line >= MAXLIN) overflo(); 189 name[line] = x; 190 left[line] = 0; 191 right[line] = 0; 192 return(line++); 193 } 194 195 cclenter(x) int x; { 196 register linno; 197 linno = enter(x); 198 right[linno] = count; 199 return (linno); 200 } 201 202 node(x, l, r) { 203 if(line >= MAXLIN) overflo(); 204 name[line] = x; 205 left[line] = l; 206 right[line] = r; 207 parent[l] = line; 208 parent[r] = line; 209 return(line++); 210 } 211 212 unary(x, d) { 213 if(line >= MAXLIN) overflo(); 214 name[line] = x; 215 left[line] = d; 216 right[line] = 0; 217 parent[d] = line; 218 return(line++); 219 } 220 overflo() { 221 fprintf(stderr, "egrep: regular expression too long\n"); 222 exit(2); 223 } 224 225 cfoll(v) { 226 register i; 227 if (left[v] == 0) { 228 count = 0; 229 for (i=1; i<=line; i++) tmpstat[i] = 0; 230 follow(v); 231 add(foll, v); 232 } 233 else if (right[v] == 0) cfoll(left[v]); 234 else { 235 cfoll(left[v]); 236 cfoll(right[v]); 237 } 238 } 239 cgotofn() { 240 register c, i, k; 241 int n, s; 242 char symbol[NCHARS]; 243 int j, nc, pc, pos; 244 int curpos, num; 245 int number, newpos; 246 count = 0; 247 for (n=3; n<=line; n++) tmpstat[n] = 0; 248 if (cstate(line-1)==0) { 249 tmpstat[line] = 1; 250 count++; 251 out[0] = 1; 252 } 253 for (n=3; n<=line; n++) initstat[n] = tmpstat[n]; 254 count--; /*leave out position 1 */ 255 icount = count; 256 tmpstat[1] = 0; 257 add(state, 0); 258 n = 0; 259 for (s=0; s<=n; s++) { 260 if (out[s] == 1) continue; 261 for (i=0; i<NCHARS; i++) symbol[i] = 0; 262 num = positions[state[s]]; 263 count = icount; 264 for (i=3; i<=line; i++) tmpstat[i] = initstat[i]; 265 pos = state[s] + 1; 266 for (i=0; i<num; i++) { 267 curpos = positions[pos]; 268 if ((c = name[curpos]) >= 0) { 269 if (c < NCHARS) symbol[c] = 1; 270 else if (c == DOT) { 271 for (k=0; k<NCHARS; k++) 272 if (k!='\n') symbol[k] = 1; 273 } 274 else if (c == CCL) { 275 nc = chars[right[curpos]]; 276 pc = right[curpos] + 1; 277 for (k=0; k<nc; k++) symbol[chars[pc++]] = 1; 278 } 279 else if (c == NCCL) { 280 nc = chars[right[curpos]]; 281 for (j = 0; j < NCHARS; j++) { 282 pc = right[curpos] + 1; 283 for (k = 0; k < nc; k++) 284 if (j==chars[pc++]) goto cont; 285 if (j!='\n') symbol[j] = 1; 286 cont:; 287 } 288 } 289 else printf("something's funny\n"); 290 } 291 pos++; 292 } 293 for (c=0; c<NCHARS; c++) { 294 if (symbol[c] == 1) { /* nextstate(s,c) */ 295 count = icount; 296 for (i=3; i <= line; i++) tmpstat[i] = initstat[i]; 297 pos = state[s] + 1; 298 for (i=0; i<num; i++) { 299 curpos = positions[pos]; 300 if ((k = name[curpos]) >= 0) 301 if ( 302 (k == c) 303 | (k == DOT) 304 | (k == CCL && member(c, right[curpos], 1)) 305 | (k == NCCL && member(c, right[curpos], 0)) 306 ) { 307 number = positions[foll[curpos]]; 308 newpos = foll[curpos] + 1; 309 for (k=0; k<number; k++) { 310 if (tmpstat[positions[newpos]] != 1) { 311 tmpstat[positions[newpos]] = 1; 312 count++; 313 } 314 newpos++; 315 } 316 } 317 pos++; 318 } /* end nextstate */ 319 if (notin(n)) { 320 if (n >= NSTATES) overflo(); 321 add(state, ++n); 322 if (tmpstat[line] == 1) out[n] = 1; 323 gotofn[s][c] = n; 324 } 325 else { 326 gotofn[s][c] = xstate; 327 } 328 } 329 } 330 } 331 } 332 333 cstate(v) { 334 register b; 335 if (left[v] == 0) { 336 if (tmpstat[v] != 1) { 337 tmpstat[v] = 1; 338 count++; 339 } 340 return(1); 341 } 342 else if (right[v] == 0) { 343 if (cstate(left[v]) == 0) return (0); 344 else if (name[v] == PLUS) return (1); 345 else return (0); 346 } 347 else if (name[v] == CAT) { 348 if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0); 349 else return (1); 350 } 351 else { /* name[v] == OR */ 352 b = cstate(right[v]); 353 if (cstate(left[v]) == 0 || b == 0) return (0); 354 else return (1); 355 } 356 } 357 358 359 member(symb, set, torf) { 360 register i, num, pos; 361 num = chars[set]; 362 pos = set + 1; 363 for (i=0; i<num; i++) 364 if (symb == chars[pos++]) return (torf); 365 return (!torf); 366 } 367 368 notin(n) { 369 register i, j, pos; 370 for (i=0; i<=n; i++) { 371 if (positions[state[i]] == count) { 372 pos = state[i] + 1; 373 for (j=0; j < count; j++) 374 if (tmpstat[positions[pos++]] != 1) goto nxt; 375 xstate = i; 376 return (0); 377 } 378 nxt: ; 379 } 380 return (1); 381 } 382 383 add(array, n) int *array; { 384 register i; 385 if (nxtpos + count > MAXPOS) overflo(); 386 array[n] = nxtpos; 387 positions[nxtpos++] = count; 388 for (i=3; i <= line; i++) { 389 if (tmpstat[i] == 1) { 390 positions[nxtpos++] = i; 391 } 392 } 393 } 394 395 follow(v) int v; { 396 int p; 397 if (v == line) return; 398 p = parent[v]; 399 switch(name[p]) { 400 case STAR: 401 case PLUS: cstate(v); 402 follow(p); 403 return; 404 405 case OR: 406 case QUEST: follow(p); 407 return; 408 409 case CAT: if (v == left[p]) { 410 if (cstate(right[p]) == 0) { 411 follow(p); 412 return; 413 } 414 } 415 else follow(p); 416 return; 417 case FINAL: if (tmpstat[line] != 1) { 418 tmpstat[line] = 1; 419 count++; 420 } 421 return; 422 } 423 } 424 425 426 main(argc, argv) 427 char **argv; 428 { 429 register int i; 430 431 while (--argc > 0 && (++argv)[0][0]=='-') 432 switch (argv[0][1]) { 433 434 case 's': 435 sflag++; 436 continue; 437 438 case 'h': 439 hflag = 0; 440 continue; 441 442 case 'b': 443 bflag++; 444 continue; 445 446 case 'c': 447 cflag++; 448 continue; 449 450 case 'e': 451 argc--; 452 argv++; 453 goto out; 454 455 case 'f': 456 fflag++; 457 continue; 458 459 case 'i': 460 iflag++; 461 for ( i = 'A'; i <= 'Z'; i++ ) 462 cmap[i] = (char) tolower ( i ); 463 continue; 464 465 case 'l': 466 lflag++; 467 continue; 468 469 case 'n': 470 nflag++; 471 continue; 472 473 case 'v': 474 vflag++; 475 continue; 476 477 default: 478 fprintf(stderr, "egrep: unknown flag\n"); 479 continue; 480 } 481 out: 482 if (argc<=0) 483 exit(2); 484 485 for (i = 0; i < 256; ++i) 486 cmap[i] = (char)i; 487 488 if (fflag) { 489 fname = *argv; 490 exprfile = fopen(fname, "r"); 491 if (exprfile == (FILE *)NULL) { 492 fprintf(stderr, "egrep: can't open %s\n", fname); 493 exit(2); 494 } 495 } 496 else input = *argv; 497 if ( iflag ) { 498 register char *s; 499 for ( s = input; *s != '\0'; s++ ) 500 if ( isupper ( (int)(*s) ) ) 501 *s = (char) tolower ( (int)(*s) ); 502 } 503 argc--; 504 argv++; 505 506 yyparse(); 507 508 cfoll(line-1); 509 cgotofn(); 510 nfile = argc; 511 if (argc<=0) { 512 if (lflag) exit(1); 513 execute(0); 514 } 515 else while (--argc >= 0) { 516 execute(*argv); 517 argv++; 518 } 519 exit(retcode != 0 ? retcode : nsucc == 0); 520 } 521 522 execute(file) 523 char *file; 524 { 525 register char *p; 526 register cstat; 527 register ccount; 528 register char *cmapr = cmap; 529 static char *buf; 530 static int blksize; 531 struct stat stb; 532 char *nlp; 533 int istat; 534 if (file) { 535 if ((f = open(file, 0)) < 0) { 536 fprintf(stderr, "egrep: can't open %s\n", file); 537 retcode = 2; 538 return; 539 } 540 } 541 else f = 0; 542 if (buf == NULL) { 543 if (fstat(f, &stb) > 0 && stb.st_blksize > 0) 544 blksize = stb.st_blksize; 545 else 546 blksize = BLKSIZE; 547 buf = (char *)malloc(2*blksize); 548 if (buf == NULL) { 549 fprintf(stderr, "egrep: no memory for %s\n", file); 550 retcode = 2; 551 return; 552 } 553 } 554 ccount = 0; 555 lnum = 1; 556 tln = 0; 557 blkno = 0; 558 p = buf; 559 nlp = p; 560 if ((ccount = read(f,p,blksize))<=0) goto done; 561 istat = cstat = gotofn[0]['\n']; 562 if (out[cstat]) goto found; 563 for (;;) { 564 cstat = gotofn[cstat][(unsigned char)cmapr[*(unsigned char *)p]]; 565 if (out[cstat]) { 566 found: for(;;) { 567 if (*p++ == '\n') { 568 if (vflag == 0) { 569 succeed: nsucc = 1; 570 if (cflag) tln++; 571 else if (sflag) 572 ; /* ugh */ 573 else if (lflag) { 574 printf("%s\n", file); 575 close(f); 576 return; 577 } 578 else { 579 if (nfile > 1 && hflag) printf("%s:", file); 580 if (bflag) printf("%d:", blkno); 581 if (nflag) printf("%ld:", lnum); 582 if (p <= nlp) { 583 while (nlp < &buf[2*blksize]) putchar(*nlp++); 584 nlp = buf; 585 } 586 while (nlp < p) putchar(*nlp++); 587 } 588 } 589 lnum++; 590 nlp = p; 591 if ((out[(cstat=istat)]) == 0) goto brk2; 592 } 593 cfound: 594 if (--ccount <= 0) { 595 if (p <= &buf[blksize]) { 596 if ((ccount = read(f, p, blksize)) <= 0) goto done; 597 } 598 else if (p == &buf[2*blksize]) { 599 p = buf; 600 if ((ccount = read(f, p, blksize)) <= 0) goto done; 601 } 602 else { 603 if ((ccount = read(f, p, &buf[2*blksize]-p)) <= 0) goto done; 604 } 605 blkno += ccount / 512; 606 } 607 } 608 } 609 if (*p++ == '\n') { 610 if (vflag) goto succeed; 611 else { 612 lnum++; 613 nlp = p; 614 if (out[(cstat=istat)]) goto cfound; 615 } 616 } 617 brk2: 618 if (--ccount <= 0) { 619 if (p <= &buf[blksize]) { 620 if ((ccount = read(f, p, blksize)) <= 0) break; 621 } 622 else if (p == &buf[2*blksize]) { 623 p = buf; 624 if ((ccount = read(f, p, blksize)) <= 0) break; 625 } 626 else { 627 if ((ccount = read(f, p, &buf[2*blksize] - p)) <= 0) break; 628 } 629 blkno += ccount / 512; 630 } 631 } 632 done: close(f); 633 if (cflag) { 634 if (nfile > 1) 635 printf("%s:", file); 636 printf("%ld\n", tln); 637 } 638 } 639