1*1175Sbill /* 2*1175Sbill * egrep -- print lines containing (or not containing) a regular expression 3*1175Sbill * 4*1175Sbill * status returns: 5*1175Sbill * 0 - ok, and some matches 6*1175Sbill * 1 - ok, but no matches 7*1175Sbill * 2 - some error 8*1175Sbill */ 9*1175Sbill %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST 10*1175Sbill %left OR 11*1175Sbill %left CHAR DOT CCL NCCL '(' 12*1175Sbill %left CAT 13*1175Sbill %left STAR PLUS QUEST 14*1175Sbill 15*1175Sbill %{ 16*1175Sbill static char *sccsid = "@(#)old.egrep.y 4.1 (Berkeley) 10/01/80"; 17*1175Sbill #include <stdio.h> 18*1175Sbill 19*1175Sbill #define MAXLIN 350 20*1175Sbill #define MAXPOS 4000 21*1175Sbill #define NCHARS 128 22*1175Sbill #define NSTATES 128 23*1175Sbill #define FINAL -1 24*1175Sbill char gotofn[NSTATES][NCHARS]; 25*1175Sbill int state[NSTATES]; 26*1175Sbill char out[NSTATES]; 27*1175Sbill int line = 1; 28*1175Sbill int name[MAXLIN]; 29*1175Sbill int left[MAXLIN]; 30*1175Sbill int right[MAXLIN]; 31*1175Sbill int parent[MAXLIN]; 32*1175Sbill int foll[MAXLIN]; 33*1175Sbill int positions[MAXPOS]; 34*1175Sbill char chars[MAXLIN]; 35*1175Sbill int nxtpos; 36*1175Sbill int nxtchar = 0; 37*1175Sbill int tmpstat[MAXLIN]; 38*1175Sbill int initstat[MAXLIN]; 39*1175Sbill int xstate; 40*1175Sbill int count; 41*1175Sbill int icount; 42*1175Sbill char *input; 43*1175Sbill 44*1175Sbill long lnum; 45*1175Sbill int bflag; 46*1175Sbill int cflag; 47*1175Sbill int fflag; 48*1175Sbill int lflag; 49*1175Sbill int nflag; 50*1175Sbill int hflag = 1; 51*1175Sbill int sflag; 52*1175Sbill int vflag; 53*1175Sbill int nfile; 54*1175Sbill int blkno; 55*1175Sbill long tln; 56*1175Sbill int nsucc; 57*1175Sbill 58*1175Sbill int f; 59*1175Sbill int fname; 60*1175Sbill %} 61*1175Sbill 62*1175Sbill %% 63*1175Sbill s: t 64*1175Sbill ={ unary(FINAL, $1); 65*1175Sbill line--; 66*1175Sbill } 67*1175Sbill ; 68*1175Sbill t: b r 69*1175Sbill ={ $$ = node(CAT, $1, $2); } 70*1175Sbill | OR b r OR 71*1175Sbill ={ $$ = node(CAT, $2, $3); } 72*1175Sbill | OR b r 73*1175Sbill ={ $$ = node(CAT, $2, $3); } 74*1175Sbill | b r OR 75*1175Sbill ={ $$ = node(CAT, $1, $2); } 76*1175Sbill ; 77*1175Sbill b: 78*1175Sbill ={ $$ = enter(DOT); 79*1175Sbill $$ = unary(STAR, $$); } 80*1175Sbill ; 81*1175Sbill r: CHAR 82*1175Sbill ={ $$ = enter($1); } 83*1175Sbill | DOT 84*1175Sbill ={ $$ = enter(DOT); } 85*1175Sbill | CCL 86*1175Sbill ={ $$ = cclenter(CCL); } 87*1175Sbill | NCCL 88*1175Sbill ={ $$ = cclenter(NCCL); } 89*1175Sbill ; 90*1175Sbill 91*1175Sbill r: r OR r 92*1175Sbill ={ $$ = node(OR, $1, $3); } 93*1175Sbill | r r %prec CAT 94*1175Sbill ={ $$ = node(CAT, $1, $2); } 95*1175Sbill | r STAR 96*1175Sbill ={ $$ = unary(STAR, $1); } 97*1175Sbill | r PLUS 98*1175Sbill ={ $$ = unary(PLUS, $1); } 99*1175Sbill | r QUEST 100*1175Sbill ={ $$ = unary(QUEST, $1); } 101*1175Sbill | '(' r ')' 102*1175Sbill ={ $$ = $2; } 103*1175Sbill | error 104*1175Sbill ; 105*1175Sbill 106*1175Sbill %% 107*1175Sbill yyerror(s) { 108*1175Sbill fprintf(stderr, "egrep: %s\n", s); 109*1175Sbill exit(2); 110*1175Sbill } 111*1175Sbill 112*1175Sbill yylex() { 113*1175Sbill extern int yylval; 114*1175Sbill int cclcnt, x; 115*1175Sbill register char c, d; 116*1175Sbill switch(c = nextch()) { 117*1175Sbill case '$': 118*1175Sbill case '^': c = '\n'; 119*1175Sbill goto defchar; 120*1175Sbill case '|': return (OR); 121*1175Sbill case '*': return (STAR); 122*1175Sbill case '+': return (PLUS); 123*1175Sbill case '?': return (QUEST); 124*1175Sbill case '(': return (c); 125*1175Sbill case ')': return (c); 126*1175Sbill case '.': return (DOT); 127*1175Sbill case '\0': return (0); 128*1175Sbill case '\n': return (OR); 129*1175Sbill case '[': 130*1175Sbill x = CCL; 131*1175Sbill cclcnt = 0; 132*1175Sbill count = nxtchar++; 133*1175Sbill if ((c = nextch()) == '^') { 134*1175Sbill x = NCCL; 135*1175Sbill c = nextch(); 136*1175Sbill } 137*1175Sbill do { 138*1175Sbill if (c == '\0') synerror(); 139*1175Sbill if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) { 140*1175Sbill if ((d = nextch()) != 0) { 141*1175Sbill c = chars[nxtchar-1]; 142*1175Sbill while (c < d) { 143*1175Sbill if (nxtchar >= MAXLIN) overflo(); 144*1175Sbill chars[nxtchar++] = ++c; 145*1175Sbill cclcnt++; 146*1175Sbill } 147*1175Sbill continue; 148*1175Sbill } 149*1175Sbill } 150*1175Sbill if (nxtchar >= MAXLIN) overflo(); 151*1175Sbill chars[nxtchar++] = c; 152*1175Sbill cclcnt++; 153*1175Sbill } while ((c = nextch()) != ']'); 154*1175Sbill chars[count] = cclcnt; 155*1175Sbill return (x); 156*1175Sbill case '\\': 157*1175Sbill if ((c = nextch()) == '\0') synerror(); 158*1175Sbill defchar: 159*1175Sbill default: yylval = c; return (CHAR); 160*1175Sbill } 161*1175Sbill } 162*1175Sbill nextch() { 163*1175Sbill register char c; 164*1175Sbill if (fflag) { 165*1175Sbill if ((c = getc(stdin)) == EOF) return(0); 166*1175Sbill } 167*1175Sbill else c = *input++; 168*1175Sbill return(c); 169*1175Sbill } 170*1175Sbill 171*1175Sbill synerror() { 172*1175Sbill fprintf(stderr, "egrep: syntax error\n"); 173*1175Sbill exit(2); 174*1175Sbill } 175*1175Sbill 176*1175Sbill enter(x) int x; { 177*1175Sbill if(line >= MAXLIN) overflo(); 178*1175Sbill name[line] = x; 179*1175Sbill left[line] = 0; 180*1175Sbill right[line] = 0; 181*1175Sbill return(line++); 182*1175Sbill } 183*1175Sbill 184*1175Sbill cclenter(x) int x; { 185*1175Sbill register linno; 186*1175Sbill linno = enter(x); 187*1175Sbill right[linno] = count; 188*1175Sbill return (linno); 189*1175Sbill } 190*1175Sbill 191*1175Sbill node(x, l, r) { 192*1175Sbill if(line >= MAXLIN) overflo(); 193*1175Sbill name[line] = x; 194*1175Sbill left[line] = l; 195*1175Sbill right[line] = r; 196*1175Sbill parent[l] = line; 197*1175Sbill parent[r] = line; 198*1175Sbill return(line++); 199*1175Sbill } 200*1175Sbill 201*1175Sbill unary(x, d) { 202*1175Sbill if(line >= MAXLIN) overflo(); 203*1175Sbill name[line] = x; 204*1175Sbill left[line] = d; 205*1175Sbill right[line] = 0; 206*1175Sbill parent[d] = line; 207*1175Sbill return(line++); 208*1175Sbill } 209*1175Sbill overflo() { 210*1175Sbill fprintf(stderr, "egrep: regular expression too long\n"); 211*1175Sbill exit(2); 212*1175Sbill } 213*1175Sbill 214*1175Sbill cfoll(v) { 215*1175Sbill register i; 216*1175Sbill if (left[v] == 0) { 217*1175Sbill count = 0; 218*1175Sbill for (i=1; i<=line; i++) tmpstat[i] = 0; 219*1175Sbill follow(v); 220*1175Sbill add(foll, v); 221*1175Sbill } 222*1175Sbill else if (right[v] == 0) cfoll(left[v]); 223*1175Sbill else { 224*1175Sbill cfoll(left[v]); 225*1175Sbill cfoll(right[v]); 226*1175Sbill } 227*1175Sbill } 228*1175Sbill cgotofn() { 229*1175Sbill register c, i, k; 230*1175Sbill int n, s; 231*1175Sbill char symbol[NCHARS]; 232*1175Sbill int j, nc, pc, pos; 233*1175Sbill int curpos, num; 234*1175Sbill int number, newpos; 235*1175Sbill count = 0; 236*1175Sbill for (n=3; n<=line; n++) tmpstat[n] = 0; 237*1175Sbill if (cstate(line-1)==0) { 238*1175Sbill tmpstat[line] = 1; 239*1175Sbill count++; 240*1175Sbill out[0] = 1; 241*1175Sbill } 242*1175Sbill for (n=3; n<=line; n++) initstat[n] = tmpstat[n]; 243*1175Sbill count--; /*leave out position 1 */ 244*1175Sbill icount = count; 245*1175Sbill tmpstat[1] = 0; 246*1175Sbill add(state, 0); 247*1175Sbill n = 0; 248*1175Sbill for (s=0; s<=n; s++) { 249*1175Sbill if (out[s] == 1) continue; 250*1175Sbill for (i=0; i<NCHARS; i++) symbol[i] = 0; 251*1175Sbill num = positions[state[s]]; 252*1175Sbill count = icount; 253*1175Sbill for (i=3; i<=line; i++) tmpstat[i] = initstat[i]; 254*1175Sbill pos = state[s] + 1; 255*1175Sbill for (i=0; i<num; i++) { 256*1175Sbill curpos = positions[pos]; 257*1175Sbill if ((c = name[curpos]) >= 0) { 258*1175Sbill if (c < NCHARS) symbol[c] = 1; 259*1175Sbill else if (c == DOT) { 260*1175Sbill for (k=0; k<NCHARS; k++) 261*1175Sbill if (k!='\n') symbol[k] = 1; 262*1175Sbill } 263*1175Sbill else if (c == CCL) { 264*1175Sbill nc = chars[right[curpos]]; 265*1175Sbill pc = right[curpos] + 1; 266*1175Sbill for (k=0; k<nc; k++) symbol[chars[pc++]] = 1; 267*1175Sbill } 268*1175Sbill else if (c == NCCL) { 269*1175Sbill nc = chars[right[curpos]]; 270*1175Sbill for (j = 0; j < NCHARS; j++) { 271*1175Sbill pc = right[curpos] + 1; 272*1175Sbill for (k = 0; k < nc; k++) 273*1175Sbill if (j==chars[pc++]) goto cont; 274*1175Sbill if (j!='\n') symbol[j] = 1; 275*1175Sbill cont:; 276*1175Sbill } 277*1175Sbill } 278*1175Sbill else printf("something's funny\n"); 279*1175Sbill } 280*1175Sbill pos++; 281*1175Sbill } 282*1175Sbill for (c=0; c<NCHARS; c++) { 283*1175Sbill if (symbol[c] == 1) { /* nextstate(s,c) */ 284*1175Sbill count = icount; 285*1175Sbill for (i=3; i <= line; i++) tmpstat[i] = initstat[i]; 286*1175Sbill pos = state[s] + 1; 287*1175Sbill for (i=0; i<num; i++) { 288*1175Sbill curpos = positions[pos]; 289*1175Sbill if ((k = name[curpos]) >= 0) 290*1175Sbill if ( 291*1175Sbill (k == c) 292*1175Sbill | (k == DOT) 293*1175Sbill | (k == CCL && member(c, right[curpos], 1)) 294*1175Sbill | (k == NCCL && member(c, right[curpos], 0)) 295*1175Sbill ) { 296*1175Sbill number = positions[foll[curpos]]; 297*1175Sbill newpos = foll[curpos] + 1; 298*1175Sbill for (k=0; k<number; k++) { 299*1175Sbill if (tmpstat[positions[newpos]] != 1) { 300*1175Sbill tmpstat[positions[newpos]] = 1; 301*1175Sbill count++; 302*1175Sbill } 303*1175Sbill newpos++; 304*1175Sbill } 305*1175Sbill } 306*1175Sbill pos++; 307*1175Sbill } /* end nextstate */ 308*1175Sbill if (notin(n)) { 309*1175Sbill if (n >= NSTATES) overflo(); 310*1175Sbill add(state, ++n); 311*1175Sbill if (tmpstat[line] == 1) out[n] = 1; 312*1175Sbill gotofn[s][c] = n; 313*1175Sbill } 314*1175Sbill else { 315*1175Sbill gotofn[s][c] = xstate; 316*1175Sbill } 317*1175Sbill } 318*1175Sbill } 319*1175Sbill } 320*1175Sbill } 321*1175Sbill 322*1175Sbill cstate(v) { 323*1175Sbill register b; 324*1175Sbill if (left[v] == 0) { 325*1175Sbill if (tmpstat[v] != 1) { 326*1175Sbill tmpstat[v] = 1; 327*1175Sbill count++; 328*1175Sbill } 329*1175Sbill return(1); 330*1175Sbill } 331*1175Sbill else if (right[v] == 0) { 332*1175Sbill if (cstate(left[v]) == 0) return (0); 333*1175Sbill else if (name[v] == PLUS) return (1); 334*1175Sbill else return (0); 335*1175Sbill } 336*1175Sbill else if (name[v] == CAT) { 337*1175Sbill if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0); 338*1175Sbill else return (1); 339*1175Sbill } 340*1175Sbill else { /* name[v] == OR */ 341*1175Sbill b = cstate(right[v]); 342*1175Sbill if (cstate(left[v]) == 0 || b == 0) return (0); 343*1175Sbill else return (1); 344*1175Sbill } 345*1175Sbill } 346*1175Sbill 347*1175Sbill 348*1175Sbill member(symb, set, torf) { 349*1175Sbill register i, num, pos; 350*1175Sbill num = chars[set]; 351*1175Sbill pos = set + 1; 352*1175Sbill for (i=0; i<num; i++) 353*1175Sbill if (symb == chars[pos++]) return (torf); 354*1175Sbill return (!torf); 355*1175Sbill } 356*1175Sbill 357*1175Sbill notin(n) { 358*1175Sbill register i, j, pos; 359*1175Sbill for (i=0; i<=n; i++) { 360*1175Sbill if (positions[state[i]] == count) { 361*1175Sbill pos = state[i] + 1; 362*1175Sbill for (j=0; j < count; j++) 363*1175Sbill if (tmpstat[positions[pos++]] != 1) goto nxt; 364*1175Sbill xstate = i; 365*1175Sbill return (0); 366*1175Sbill } 367*1175Sbill nxt: ; 368*1175Sbill } 369*1175Sbill return (1); 370*1175Sbill } 371*1175Sbill 372*1175Sbill add(array, n) int *array; { 373*1175Sbill register i; 374*1175Sbill if (nxtpos + count > MAXPOS) overflo(); 375*1175Sbill array[n] = nxtpos; 376*1175Sbill positions[nxtpos++] = count; 377*1175Sbill for (i=3; i <= line; i++) { 378*1175Sbill if (tmpstat[i] == 1) { 379*1175Sbill positions[nxtpos++] = i; 380*1175Sbill } 381*1175Sbill } 382*1175Sbill } 383*1175Sbill 384*1175Sbill follow(v) int v; { 385*1175Sbill int p; 386*1175Sbill if (v == line) return; 387*1175Sbill p = parent[v]; 388*1175Sbill switch(name[p]) { 389*1175Sbill case STAR: 390*1175Sbill case PLUS: cstate(v); 391*1175Sbill follow(p); 392*1175Sbill return; 393*1175Sbill 394*1175Sbill case OR: 395*1175Sbill case QUEST: follow(p); 396*1175Sbill return; 397*1175Sbill 398*1175Sbill case CAT: if (v == left[p]) { 399*1175Sbill if (cstate(right[p]) == 0) { 400*1175Sbill follow(p); 401*1175Sbill return; 402*1175Sbill } 403*1175Sbill } 404*1175Sbill else follow(p); 405*1175Sbill return; 406*1175Sbill case FINAL: if (tmpstat[line] != 1) { 407*1175Sbill tmpstat[line] = 1; 408*1175Sbill count++; 409*1175Sbill } 410*1175Sbill return; 411*1175Sbill } 412*1175Sbill } 413*1175Sbill 414*1175Sbill 415*1175Sbill main(argc, argv) 416*1175Sbill char **argv; 417*1175Sbill { 418*1175Sbill while (--argc > 0 && (++argv)[0][0]=='-') 419*1175Sbill switch (argv[0][1]) { 420*1175Sbill 421*1175Sbill case 's': 422*1175Sbill sflag++; 423*1175Sbill continue; 424*1175Sbill 425*1175Sbill case 'h': 426*1175Sbill hflag = 0; 427*1175Sbill continue; 428*1175Sbill 429*1175Sbill case 'b': 430*1175Sbill bflag++; 431*1175Sbill continue; 432*1175Sbill 433*1175Sbill case 'c': 434*1175Sbill cflag++; 435*1175Sbill continue; 436*1175Sbill 437*1175Sbill case 'e': 438*1175Sbill argc--; 439*1175Sbill argv++; 440*1175Sbill goto out; 441*1175Sbill 442*1175Sbill case 'f': 443*1175Sbill fflag++; 444*1175Sbill continue; 445*1175Sbill 446*1175Sbill case 'l': 447*1175Sbill lflag++; 448*1175Sbill continue; 449*1175Sbill 450*1175Sbill case 'n': 451*1175Sbill nflag++; 452*1175Sbill continue; 453*1175Sbill 454*1175Sbill case 'v': 455*1175Sbill vflag++; 456*1175Sbill continue; 457*1175Sbill 458*1175Sbill default: 459*1175Sbill fprintf(stderr, "egrep: unknown flag\n"); 460*1175Sbill continue; 461*1175Sbill } 462*1175Sbill out: 463*1175Sbill if (argc<=0) 464*1175Sbill exit(2); 465*1175Sbill if (fflag) { 466*1175Sbill if (freopen(fname = *argv, "r", stdin) == NULL) { 467*1175Sbill fprintf(stderr, "egrep: can't open %s\n", fname); 468*1175Sbill exit(2); 469*1175Sbill } 470*1175Sbill } 471*1175Sbill else input = *argv; 472*1175Sbill argc--; 473*1175Sbill argv++; 474*1175Sbill 475*1175Sbill yyparse(); 476*1175Sbill 477*1175Sbill cfoll(line-1); 478*1175Sbill cgotofn(); 479*1175Sbill nfile = argc; 480*1175Sbill if (argc<=0) { 481*1175Sbill if (lflag) exit(1); 482*1175Sbill execute(0); 483*1175Sbill } 484*1175Sbill else while (--argc >= 0) { 485*1175Sbill execute(*argv); 486*1175Sbill argv++; 487*1175Sbill } 488*1175Sbill exit(nsucc == 0); 489*1175Sbill } 490*1175Sbill 491*1175Sbill execute(file) 492*1175Sbill char *file; 493*1175Sbill { 494*1175Sbill register char *p; 495*1175Sbill register cstat; 496*1175Sbill register ccount; 497*1175Sbill char buf[1024]; 498*1175Sbill char *nlp; 499*1175Sbill int istat; 500*1175Sbill if (file) { 501*1175Sbill if ((f = open(file, 0)) < 0) { 502*1175Sbill fprintf(stderr, "egrep: can't open %s\n", file); 503*1175Sbill exit(2); 504*1175Sbill } 505*1175Sbill } 506*1175Sbill else f = 0; 507*1175Sbill ccount = 0; 508*1175Sbill lnum = 1; 509*1175Sbill tln = 0; 510*1175Sbill blkno = 0; 511*1175Sbill p = buf; 512*1175Sbill nlp = p; 513*1175Sbill if ((ccount = read(f,p,512))<=0) goto done; 514*1175Sbill istat = cstat = gotofn[0]['\n']; 515*1175Sbill if (out[cstat]) goto found; 516*1175Sbill for (;;) { 517*1175Sbill cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */ 518*1175Sbill if (out[cstat]) { 519*1175Sbill found: for(;;) { 520*1175Sbill if (*p++ == '\n') { 521*1175Sbill if (vflag == 0) { 522*1175Sbill succeed: nsucc = 1; 523*1175Sbill if (cflag) tln++; 524*1175Sbill else if (sflag) 525*1175Sbill ; /* ugh */ 526*1175Sbill else if (lflag) { 527*1175Sbill printf("%s\n", file); 528*1175Sbill close(f); 529*1175Sbill return; 530*1175Sbill } 531*1175Sbill else { 532*1175Sbill if (nfile > 1 && hflag) printf("%s:", file); 533*1175Sbill if (bflag) printf("%d:", blkno); 534*1175Sbill if (nflag) printf("%ld:", lnum); 535*1175Sbill if (p <= nlp) { 536*1175Sbill while (nlp < &buf[1024]) putchar(*nlp++); 537*1175Sbill nlp = buf; 538*1175Sbill } 539*1175Sbill while (nlp < p) putchar(*nlp++); 540*1175Sbill } 541*1175Sbill } 542*1175Sbill lnum++; 543*1175Sbill nlp = p; 544*1175Sbill if ((out[(cstat=istat)]) == 0) goto brk2; 545*1175Sbill } 546*1175Sbill cfound: 547*1175Sbill if (--ccount <= 0) { 548*1175Sbill if (p <= &buf[512]) { 549*1175Sbill if ((ccount = read(f, p, 512)) <= 0) goto done; 550*1175Sbill } 551*1175Sbill else if (p == &buf[1024]) { 552*1175Sbill p = buf; 553*1175Sbill if ((ccount = read(f, p, 512)) <= 0) goto done; 554*1175Sbill } 555*1175Sbill else { 556*1175Sbill if ((ccount = read(f, p, &buf[1024]-p)) <= 0) goto done; 557*1175Sbill } 558*1175Sbill blkno++; 559*1175Sbill } 560*1175Sbill } 561*1175Sbill } 562*1175Sbill if (*p++ == '\n') { 563*1175Sbill if (vflag) goto succeed; 564*1175Sbill else { 565*1175Sbill lnum++; 566*1175Sbill nlp = p; 567*1175Sbill if (out[(cstat=istat)]) goto cfound; 568*1175Sbill } 569*1175Sbill } 570*1175Sbill brk2: 571*1175Sbill if (--ccount <= 0) { 572*1175Sbill if (p <= &buf[512]) { 573*1175Sbill if ((ccount = read(f, p, 512)) <= 0) break; 574*1175Sbill } 575*1175Sbill else if (p == &buf[1024]) { 576*1175Sbill p = buf; 577*1175Sbill if ((ccount = read(f, p, 512)) <= 0) break; 578*1175Sbill } 579*1175Sbill else { 580*1175Sbill if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; 581*1175Sbill } 582*1175Sbill blkno++; 583*1175Sbill } 584*1175Sbill } 585*1175Sbill done: close(f); 586*1175Sbill if (cflag) { 587*1175Sbill if (nfile > 1) 588*1175Sbill printf("%s:", file); 589*1175Sbill printf("%ld\n", tln); 590*1175Sbill } 591*1175Sbill } 592