1 /* 2 * egrep -- print lines containing (or not containing) a regular expression 3 * 4 * status returns: 5 * 0 - ok, and some matches 6 * 1 - ok, but no matches 7 * 2 - some error 8 */ 9 %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST 10 %left OR 11 %left CHAR DOT CCL NCCL '(' 12 %left CAT 13 %left STAR PLUS QUEST 14 15 %{ 16 static char *sccsid = "@(#)old.egrep.y 4.4 (Berkeley) 05/29/85"; 17 #include <stdio.h> 18 #include <sys/types.h> 19 #include <sys/stat.h> 20 21 #define BLKSIZE 8192 22 #define MAXLIN 350 23 #define MAXPOS 4000 24 #define NCHARS 128 25 #define NSTATES 128 26 #define FINAL -1 27 char gotofn[NSTATES][NCHARS]; 28 int state[NSTATES]; 29 char out[NSTATES]; 30 int line = 1; 31 int name[MAXLIN]; 32 int left[MAXLIN]; 33 int right[MAXLIN]; 34 int parent[MAXLIN]; 35 int foll[MAXLIN]; 36 int positions[MAXPOS]; 37 char chars[MAXLIN]; 38 int nxtpos; 39 int nxtchar = 0; 40 int tmpstat[MAXLIN]; 41 int initstat[MAXLIN]; 42 int xstate; 43 int count; 44 int icount; 45 char *input; 46 FILE *exprfile; 47 48 long lnum; 49 int bflag; 50 int cflag; 51 int fflag; 52 int lflag; 53 int nflag; 54 int hflag = 1; 55 int sflag; 56 int vflag; 57 int retcode = 0; 58 int nfile; 59 int blkno; 60 long tln; 61 int nsucc; 62 63 int f; 64 char *fname; 65 %} 66 67 %% 68 s: t 69 ={ unary(FINAL, $1); 70 line--; 71 } 72 ; 73 t: b r 74 ={ $$ = node(CAT, $1, $2); } 75 | OR b r OR 76 ={ $$ = node(CAT, $2, $3); } 77 | OR b r 78 ={ $$ = node(CAT, $2, $3); } 79 | b r OR 80 ={ $$ = node(CAT, $1, $2); } 81 ; 82 b: 83 ={ $$ = enter(DOT); 84 $$ = unary(STAR, $$); } 85 ; 86 r: CHAR 87 ={ $$ = enter($1); } 88 | DOT 89 ={ $$ = enter(DOT); } 90 | CCL 91 ={ $$ = cclenter(CCL); } 92 | NCCL 93 ={ $$ = cclenter(NCCL); } 94 ; 95 96 r: r OR r 97 ={ $$ = node(OR, $1, $3); } 98 | r r %prec CAT 99 ={ $$ = node(CAT, $1, $2); } 100 | r STAR 101 ={ $$ = unary(STAR, $1); } 102 | r PLUS 103 ={ $$ = unary(PLUS, $1); } 104 | r QUEST 105 ={ $$ = unary(QUEST, $1); } 106 | '(' r ')' 107 ={ $$ = $2; } 108 | error 109 ; 110 111 %% 112 yyerror(s) { 113 fprintf(stderr, "egrep: %s\n", s); 114 exit(2); 115 } 116 117 yylex() { 118 extern int yylval; 119 int cclcnt, x; 120 register char c, d; 121 switch(c = nextch()) { 122 case '$': 123 case '^': c = '\n'; 124 goto defchar; 125 case '|': return (OR); 126 case '*': return (STAR); 127 case '+': return (PLUS); 128 case '?': return (QUEST); 129 case '(': return (c); 130 case ')': return (c); 131 case '.': return (DOT); 132 case '\0': return (0); 133 case '\n': return (OR); 134 case '[': 135 x = CCL; 136 cclcnt = 0; 137 count = nxtchar++; 138 if ((c = nextch()) == '^') { 139 x = NCCL; 140 c = nextch(); 141 } 142 do { 143 if (c == '\0') synerror(); 144 if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) { 145 if ((d = nextch()) != 0) { 146 c = chars[nxtchar-1]; 147 while (c < d) { 148 if (nxtchar >= MAXLIN) overflo(); 149 chars[nxtchar++] = ++c; 150 cclcnt++; 151 } 152 continue; 153 } 154 } 155 if (nxtchar >= MAXLIN) overflo(); 156 chars[nxtchar++] = c; 157 cclcnt++; 158 } while ((c = nextch()) != ']'); 159 chars[count] = cclcnt; 160 return (x); 161 case '\\': 162 if ((c = nextch()) == '\0') synerror(); 163 defchar: 164 default: yylval = c; return (CHAR); 165 } 166 } 167 nextch() { 168 register char c; 169 if (fflag) { 170 if ((c = getc(exprfile)) == EOF) { 171 fclose(exprfile); 172 return(0); 173 } 174 } 175 else c = *input++; 176 return(c); 177 } 178 179 synerror() { 180 fprintf(stderr, "egrep: syntax error\n"); 181 exit(2); 182 } 183 184 enter(x) int x; { 185 if(line >= MAXLIN) overflo(); 186 name[line] = x; 187 left[line] = 0; 188 right[line] = 0; 189 return(line++); 190 } 191 192 cclenter(x) int x; { 193 register linno; 194 linno = enter(x); 195 right[linno] = count; 196 return (linno); 197 } 198 199 node(x, l, r) { 200 if(line >= MAXLIN) overflo(); 201 name[line] = x; 202 left[line] = l; 203 right[line] = r; 204 parent[l] = line; 205 parent[r] = line; 206 return(line++); 207 } 208 209 unary(x, d) { 210 if(line >= MAXLIN) overflo(); 211 name[line] = x; 212 left[line] = d; 213 right[line] = 0; 214 parent[d] = line; 215 return(line++); 216 } 217 overflo() { 218 fprintf(stderr, "egrep: regular expression too long\n"); 219 exit(2); 220 } 221 222 cfoll(v) { 223 register i; 224 if (left[v] == 0) { 225 count = 0; 226 for (i=1; i<=line; i++) tmpstat[i] = 0; 227 follow(v); 228 add(foll, v); 229 } 230 else if (right[v] == 0) cfoll(left[v]); 231 else { 232 cfoll(left[v]); 233 cfoll(right[v]); 234 } 235 } 236 cgotofn() { 237 register c, i, k; 238 int n, s; 239 char symbol[NCHARS]; 240 int j, nc, pc, pos; 241 int curpos, num; 242 int number, newpos; 243 count = 0; 244 for (n=3; n<=line; n++) tmpstat[n] = 0; 245 if (cstate(line-1)==0) { 246 tmpstat[line] = 1; 247 count++; 248 out[0] = 1; 249 } 250 for (n=3; n<=line; n++) initstat[n] = tmpstat[n]; 251 count--; /*leave out position 1 */ 252 icount = count; 253 tmpstat[1] = 0; 254 add(state, 0); 255 n = 0; 256 for (s=0; s<=n; s++) { 257 if (out[s] == 1) continue; 258 for (i=0; i<NCHARS; i++) symbol[i] = 0; 259 num = positions[state[s]]; 260 count = icount; 261 for (i=3; i<=line; i++) tmpstat[i] = initstat[i]; 262 pos = state[s] + 1; 263 for (i=0; i<num; i++) { 264 curpos = positions[pos]; 265 if ((c = name[curpos]) >= 0) { 266 if (c < NCHARS) symbol[c] = 1; 267 else if (c == DOT) { 268 for (k=0; k<NCHARS; k++) 269 if (k!='\n') symbol[k] = 1; 270 } 271 else if (c == CCL) { 272 nc = chars[right[curpos]]; 273 pc = right[curpos] + 1; 274 for (k=0; k<nc; k++) symbol[chars[pc++]] = 1; 275 } 276 else if (c == NCCL) { 277 nc = chars[right[curpos]]; 278 for (j = 0; j < NCHARS; j++) { 279 pc = right[curpos] + 1; 280 for (k = 0; k < nc; k++) 281 if (j==chars[pc++]) goto cont; 282 if (j!='\n') symbol[j] = 1; 283 cont:; 284 } 285 } 286 else printf("something's funny\n"); 287 } 288 pos++; 289 } 290 for (c=0; c<NCHARS; c++) { 291 if (symbol[c] == 1) { /* nextstate(s,c) */ 292 count = icount; 293 for (i=3; i <= line; i++) tmpstat[i] = initstat[i]; 294 pos = state[s] + 1; 295 for (i=0; i<num; i++) { 296 curpos = positions[pos]; 297 if ((k = name[curpos]) >= 0) 298 if ( 299 (k == c) 300 | (k == DOT) 301 | (k == CCL && member(c, right[curpos], 1)) 302 | (k == NCCL && member(c, right[curpos], 0)) 303 ) { 304 number = positions[foll[curpos]]; 305 newpos = foll[curpos] + 1; 306 for (k=0; k<number; k++) { 307 if (tmpstat[positions[newpos]] != 1) { 308 tmpstat[positions[newpos]] = 1; 309 count++; 310 } 311 newpos++; 312 } 313 } 314 pos++; 315 } /* end nextstate */ 316 if (notin(n)) { 317 if (n >= NSTATES) overflo(); 318 add(state, ++n); 319 if (tmpstat[line] == 1) out[n] = 1; 320 gotofn[s][c] = n; 321 } 322 else { 323 gotofn[s][c] = xstate; 324 } 325 } 326 } 327 } 328 } 329 330 cstate(v) { 331 register b; 332 if (left[v] == 0) { 333 if (tmpstat[v] != 1) { 334 tmpstat[v] = 1; 335 count++; 336 } 337 return(1); 338 } 339 else if (right[v] == 0) { 340 if (cstate(left[v]) == 0) return (0); 341 else if (name[v] == PLUS) return (1); 342 else return (0); 343 } 344 else if (name[v] == CAT) { 345 if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0); 346 else return (1); 347 } 348 else { /* name[v] == OR */ 349 b = cstate(right[v]); 350 if (cstate(left[v]) == 0 || b == 0) return (0); 351 else return (1); 352 } 353 } 354 355 356 member(symb, set, torf) { 357 register i, num, pos; 358 num = chars[set]; 359 pos = set + 1; 360 for (i=0; i<num; i++) 361 if (symb == chars[pos++]) return (torf); 362 return (!torf); 363 } 364 365 notin(n) { 366 register i, j, pos; 367 for (i=0; i<=n; i++) { 368 if (positions[state[i]] == count) { 369 pos = state[i] + 1; 370 for (j=0; j < count; j++) 371 if (tmpstat[positions[pos++]] != 1) goto nxt; 372 xstate = i; 373 return (0); 374 } 375 nxt: ; 376 } 377 return (1); 378 } 379 380 add(array, n) int *array; { 381 register i; 382 if (nxtpos + count > MAXPOS) overflo(); 383 array[n] = nxtpos; 384 positions[nxtpos++] = count; 385 for (i=3; i <= line; i++) { 386 if (tmpstat[i] == 1) { 387 positions[nxtpos++] = i; 388 } 389 } 390 } 391 392 follow(v) int v; { 393 int p; 394 if (v == line) return; 395 p = parent[v]; 396 switch(name[p]) { 397 case STAR: 398 case PLUS: cstate(v); 399 follow(p); 400 return; 401 402 case OR: 403 case QUEST: follow(p); 404 return; 405 406 case CAT: if (v == left[p]) { 407 if (cstate(right[p]) == 0) { 408 follow(p); 409 return; 410 } 411 } 412 else follow(p); 413 return; 414 case FINAL: if (tmpstat[line] != 1) { 415 tmpstat[line] = 1; 416 count++; 417 } 418 return; 419 } 420 } 421 422 423 main(argc, argv) 424 char **argv; 425 { 426 while (--argc > 0 && (++argv)[0][0]=='-') 427 switch (argv[0][1]) { 428 429 case 's': 430 sflag++; 431 continue; 432 433 case 'h': 434 hflag = 0; 435 continue; 436 437 case 'b': 438 bflag++; 439 continue; 440 441 case 'c': 442 cflag++; 443 continue; 444 445 case 'e': 446 argc--; 447 argv++; 448 goto out; 449 450 case 'f': 451 fflag++; 452 continue; 453 454 case 'l': 455 lflag++; 456 continue; 457 458 case 'n': 459 nflag++; 460 continue; 461 462 case 'v': 463 vflag++; 464 continue; 465 466 default: 467 fprintf(stderr, "egrep: unknown flag\n"); 468 continue; 469 } 470 out: 471 if (argc<=0) 472 exit(2); 473 if (fflag) { 474 fname = *argv; 475 exprfile = fopen(fname, "r"); 476 if (exprfile == (FILE *)NULL) { 477 fprintf(stderr, "egrep: can't open %s\n", fname); 478 exit(2); 479 } 480 } 481 else input = *argv; 482 argc--; 483 argv++; 484 485 yyparse(); 486 487 cfoll(line-1); 488 cgotofn(); 489 nfile = argc; 490 if (argc<=0) { 491 if (lflag) exit(1); 492 execute(0); 493 } 494 else while (--argc >= 0) { 495 execute(*argv); 496 argv++; 497 } 498 exit(retcode != 0 ? retcode : nsucc == 0); 499 } 500 501 execute(file) 502 char *file; 503 { 504 register char *p; 505 register cstat; 506 register ccount; 507 static char *buf; 508 static int blksize; 509 struct stat stb; 510 char *nlp; 511 int istat; 512 if (file) { 513 if ((f = open(file, 0)) < 0) { 514 fprintf(stderr, "egrep: can't open %s\n", file); 515 retcode = 2; 516 return; 517 } 518 } 519 else f = 0; 520 if (buf == NULL) { 521 if (fstat(f, &stb) > 0 && stb.st_blksize > 0) 522 blksize = stb.st_blksize; 523 else 524 blksize = BLKSIZE; 525 buf = (char *)malloc(2*blksize); 526 if (buf == NULL) { 527 fprintf(stderr, "egrep: no memory for %s\n", file); 528 retcode = 2; 529 return; 530 } 531 } 532 ccount = 0; 533 lnum = 1; 534 tln = 0; 535 blkno = 0; 536 p = buf; 537 nlp = p; 538 if ((ccount = read(f,p,blksize))<=0) goto done; 539 istat = cstat = gotofn[0]['\n']; 540 if (out[cstat]) goto found; 541 for (;;) { 542 cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */ 543 if (out[cstat]) { 544 found: for(;;) { 545 if (*p++ == '\n') { 546 if (vflag == 0) { 547 succeed: nsucc = 1; 548 if (cflag) tln++; 549 else if (sflag) 550 ; /* ugh */ 551 else if (lflag) { 552 printf("%s\n", file); 553 close(f); 554 return; 555 } 556 else { 557 if (nfile > 1 && hflag) printf("%s:", file); 558 if (bflag) printf("%d:", blkno); 559 if (nflag) printf("%ld:", lnum); 560 if (p <= nlp) { 561 while (nlp < &buf[2*blksize]) putchar(*nlp++); 562 nlp = buf; 563 } 564 while (nlp < p) putchar(*nlp++); 565 } 566 } 567 lnum++; 568 nlp = p; 569 if ((out[(cstat=istat)]) == 0) goto brk2; 570 } 571 cfound: 572 if (--ccount <= 0) { 573 if (p <= &buf[blksize]) { 574 if ((ccount = read(f, p, blksize)) <= 0) goto done; 575 } 576 else if (p == &buf[2*blksize]) { 577 p = buf; 578 if ((ccount = read(f, p, blksize)) <= 0) goto done; 579 } 580 else { 581 if ((ccount = read(f, p, &buf[2*blksize]-p)) <= 0) goto done; 582 } 583 blkno += ccount / 512; 584 } 585 } 586 } 587 if (*p++ == '\n') { 588 if (vflag) goto succeed; 589 else { 590 lnum++; 591 nlp = p; 592 if (out[(cstat=istat)]) goto cfound; 593 } 594 } 595 brk2: 596 if (--ccount <= 0) { 597 if (p <= &buf[blksize]) { 598 if ((ccount = read(f, p, blksize)) <= 0) break; 599 } 600 else if (p == &buf[2*blksize]) { 601 p = buf; 602 if ((ccount = read(f, p, blksize)) <= 0) break; 603 } 604 else { 605 if ((ccount = read(f, p, &buf[2*blksize] - p)) <= 0) break; 606 } 607 blkno += ccount / 512; 608 } 609 } 610 done: close(f); 611 if (cflag) { 612 if (nfile > 1) 613 printf("%s:", file); 614 printf("%ld\n", tln); 615 } 616 } 617