11175Sbill /* 21175Sbill * egrep -- print lines containing (or not containing) a regular expression 31175Sbill * 41175Sbill * status returns: 51175Sbill * 0 - ok, and some matches 61175Sbill * 1 - ok, but no matches 71175Sbill * 2 - some error 81175Sbill */ 91175Sbill %token CHAR DOT CCL NCCL OR CAT STAR PLUS QUEST 101175Sbill %left OR 111175Sbill %left CHAR DOT CCL NCCL '(' 121175Sbill %left CAT 131175Sbill %left STAR PLUS QUEST 141175Sbill 151175Sbill %{ 16*21255Smckusick static char *sccsid = "@(#)old.egrep.y 4.4 (Berkeley) 05/29/85"; 171175Sbill #include <stdio.h> 18*21255Smckusick #include <sys/types.h> 19*21255Smckusick #include <sys/stat.h> 201175Sbill 21*21255Smckusick #define BLKSIZE 8192 221175Sbill #define MAXLIN 350 231175Sbill #define MAXPOS 4000 241175Sbill #define NCHARS 128 251175Sbill #define NSTATES 128 261175Sbill #define FINAL -1 271175Sbill char gotofn[NSTATES][NCHARS]; 281175Sbill int state[NSTATES]; 291175Sbill char out[NSTATES]; 301175Sbill int line = 1; 311175Sbill int name[MAXLIN]; 321175Sbill int left[MAXLIN]; 331175Sbill int right[MAXLIN]; 341175Sbill int parent[MAXLIN]; 351175Sbill int foll[MAXLIN]; 361175Sbill int positions[MAXPOS]; 371175Sbill char chars[MAXLIN]; 381175Sbill int nxtpos; 391175Sbill int nxtchar = 0; 401175Sbill int tmpstat[MAXLIN]; 411175Sbill int initstat[MAXLIN]; 421175Sbill int xstate; 431175Sbill int count; 441175Sbill int icount; 451175Sbill char *input; 469117Srrh FILE *exprfile; 471175Sbill 481175Sbill long lnum; 491175Sbill int bflag; 501175Sbill int cflag; 511175Sbill int fflag; 521175Sbill int lflag; 531175Sbill int nflag; 541175Sbill int hflag = 1; 551175Sbill int sflag; 561175Sbill int vflag; 5711561Sedward int retcode = 0; 581175Sbill int nfile; 591175Sbill int blkno; 601175Sbill long tln; 611175Sbill int nsucc; 621175Sbill 631175Sbill int f; 649117Srrh char *fname; 651175Sbill %} 661175Sbill 671175Sbill %% 681175Sbill s: t 691175Sbill ={ unary(FINAL, $1); 701175Sbill line--; 711175Sbill } 721175Sbill ; 731175Sbill t: b r 741175Sbill ={ $$ = node(CAT, $1, $2); } 751175Sbill | OR b r OR 761175Sbill ={ $$ = node(CAT, $2, $3); } 771175Sbill | OR b r 781175Sbill ={ $$ = node(CAT, $2, $3); } 791175Sbill | b r OR 801175Sbill ={ $$ = node(CAT, $1, $2); } 811175Sbill ; 821175Sbill b: 831175Sbill ={ $$ = enter(DOT); 841175Sbill $$ = unary(STAR, $$); } 851175Sbill ; 861175Sbill r: CHAR 871175Sbill ={ $$ = enter($1); } 881175Sbill | DOT 891175Sbill ={ $$ = enter(DOT); } 901175Sbill | CCL 911175Sbill ={ $$ = cclenter(CCL); } 921175Sbill | NCCL 931175Sbill ={ $$ = cclenter(NCCL); } 941175Sbill ; 951175Sbill 961175Sbill r: r OR r 971175Sbill ={ $$ = node(OR, $1, $3); } 981175Sbill | r r %prec CAT 991175Sbill ={ $$ = node(CAT, $1, $2); } 1001175Sbill | r STAR 1011175Sbill ={ $$ = unary(STAR, $1); } 1021175Sbill | r PLUS 1031175Sbill ={ $$ = unary(PLUS, $1); } 1041175Sbill | r QUEST 1051175Sbill ={ $$ = unary(QUEST, $1); } 1061175Sbill | '(' r ')' 1071175Sbill ={ $$ = $2; } 1081175Sbill | error 1091175Sbill ; 1101175Sbill 1111175Sbill %% 1121175Sbill yyerror(s) { 1131175Sbill fprintf(stderr, "egrep: %s\n", s); 1141175Sbill exit(2); 1151175Sbill } 1161175Sbill 1171175Sbill yylex() { 1181175Sbill extern int yylval; 1191175Sbill int cclcnt, x; 1201175Sbill register char c, d; 1211175Sbill switch(c = nextch()) { 1221175Sbill case '$': 1231175Sbill case '^': c = '\n'; 1241175Sbill goto defchar; 1251175Sbill case '|': return (OR); 1261175Sbill case '*': return (STAR); 1271175Sbill case '+': return (PLUS); 1281175Sbill case '?': return (QUEST); 1291175Sbill case '(': return (c); 1301175Sbill case ')': return (c); 1311175Sbill case '.': return (DOT); 1321175Sbill case '\0': return (0); 1331175Sbill case '\n': return (OR); 1341175Sbill case '[': 1351175Sbill x = CCL; 1361175Sbill cclcnt = 0; 1371175Sbill count = nxtchar++; 1381175Sbill if ((c = nextch()) == '^') { 1391175Sbill x = NCCL; 1401175Sbill c = nextch(); 1411175Sbill } 1421175Sbill do { 1431175Sbill if (c == '\0') synerror(); 1441175Sbill if (c == '-' && cclcnt > 0 && chars[nxtchar-1] != 0) { 1451175Sbill if ((d = nextch()) != 0) { 1461175Sbill c = chars[nxtchar-1]; 1471175Sbill while (c < d) { 1481175Sbill if (nxtchar >= MAXLIN) overflo(); 1491175Sbill chars[nxtchar++] = ++c; 1501175Sbill cclcnt++; 1511175Sbill } 1521175Sbill continue; 1531175Sbill } 1541175Sbill } 1551175Sbill if (nxtchar >= MAXLIN) overflo(); 1561175Sbill chars[nxtchar++] = c; 1571175Sbill cclcnt++; 1581175Sbill } while ((c = nextch()) != ']'); 1591175Sbill chars[count] = cclcnt; 1601175Sbill return (x); 1611175Sbill case '\\': 1621175Sbill if ((c = nextch()) == '\0') synerror(); 1631175Sbill defchar: 1641175Sbill default: yylval = c; return (CHAR); 1651175Sbill } 1661175Sbill } 1671175Sbill nextch() { 1681175Sbill register char c; 1691175Sbill if (fflag) { 1709117Srrh if ((c = getc(exprfile)) == EOF) { 1719117Srrh fclose(exprfile); 1729117Srrh return(0); 1739117Srrh } 1741175Sbill } 1751175Sbill else c = *input++; 1761175Sbill return(c); 1771175Sbill } 1781175Sbill 1791175Sbill synerror() { 1801175Sbill fprintf(stderr, "egrep: syntax error\n"); 1811175Sbill exit(2); 1821175Sbill } 1831175Sbill 1841175Sbill enter(x) int x; { 1851175Sbill if(line >= MAXLIN) overflo(); 1861175Sbill name[line] = x; 1871175Sbill left[line] = 0; 1881175Sbill right[line] = 0; 1891175Sbill return(line++); 1901175Sbill } 1911175Sbill 1921175Sbill cclenter(x) int x; { 1931175Sbill register linno; 1941175Sbill linno = enter(x); 1951175Sbill right[linno] = count; 1961175Sbill return (linno); 1971175Sbill } 1981175Sbill 1991175Sbill node(x, l, r) { 2001175Sbill if(line >= MAXLIN) overflo(); 2011175Sbill name[line] = x; 2021175Sbill left[line] = l; 2031175Sbill right[line] = r; 2041175Sbill parent[l] = line; 2051175Sbill parent[r] = line; 2061175Sbill return(line++); 2071175Sbill } 2081175Sbill 2091175Sbill unary(x, d) { 2101175Sbill if(line >= MAXLIN) overflo(); 2111175Sbill name[line] = x; 2121175Sbill left[line] = d; 2131175Sbill right[line] = 0; 2141175Sbill parent[d] = line; 2151175Sbill return(line++); 2161175Sbill } 2171175Sbill overflo() { 2181175Sbill fprintf(stderr, "egrep: regular expression too long\n"); 2191175Sbill exit(2); 2201175Sbill } 2211175Sbill 2221175Sbill cfoll(v) { 2231175Sbill register i; 2241175Sbill if (left[v] == 0) { 2251175Sbill count = 0; 2261175Sbill for (i=1; i<=line; i++) tmpstat[i] = 0; 2271175Sbill follow(v); 2281175Sbill add(foll, v); 2291175Sbill } 2301175Sbill else if (right[v] == 0) cfoll(left[v]); 2311175Sbill else { 2321175Sbill cfoll(left[v]); 2331175Sbill cfoll(right[v]); 2341175Sbill } 2351175Sbill } 2361175Sbill cgotofn() { 2371175Sbill register c, i, k; 2381175Sbill int n, s; 2391175Sbill char symbol[NCHARS]; 2401175Sbill int j, nc, pc, pos; 2411175Sbill int curpos, num; 2421175Sbill int number, newpos; 2431175Sbill count = 0; 2441175Sbill for (n=3; n<=line; n++) tmpstat[n] = 0; 2451175Sbill if (cstate(line-1)==0) { 2461175Sbill tmpstat[line] = 1; 2471175Sbill count++; 2481175Sbill out[0] = 1; 2491175Sbill } 2501175Sbill for (n=3; n<=line; n++) initstat[n] = tmpstat[n]; 2511175Sbill count--; /*leave out position 1 */ 2521175Sbill icount = count; 2531175Sbill tmpstat[1] = 0; 2541175Sbill add(state, 0); 2551175Sbill n = 0; 2561175Sbill for (s=0; s<=n; s++) { 2571175Sbill if (out[s] == 1) continue; 2581175Sbill for (i=0; i<NCHARS; i++) symbol[i] = 0; 2591175Sbill num = positions[state[s]]; 2601175Sbill count = icount; 2611175Sbill for (i=3; i<=line; i++) tmpstat[i] = initstat[i]; 2621175Sbill pos = state[s] + 1; 2631175Sbill for (i=0; i<num; i++) { 2641175Sbill curpos = positions[pos]; 2651175Sbill if ((c = name[curpos]) >= 0) { 2661175Sbill if (c < NCHARS) symbol[c] = 1; 2671175Sbill else if (c == DOT) { 2681175Sbill for (k=0; k<NCHARS; k++) 2691175Sbill if (k!='\n') symbol[k] = 1; 2701175Sbill } 2711175Sbill else if (c == CCL) { 2721175Sbill nc = chars[right[curpos]]; 2731175Sbill pc = right[curpos] + 1; 2741175Sbill for (k=0; k<nc; k++) symbol[chars[pc++]] = 1; 2751175Sbill } 2761175Sbill else if (c == NCCL) { 2771175Sbill nc = chars[right[curpos]]; 2781175Sbill for (j = 0; j < NCHARS; j++) { 2791175Sbill pc = right[curpos] + 1; 2801175Sbill for (k = 0; k < nc; k++) 2811175Sbill if (j==chars[pc++]) goto cont; 2821175Sbill if (j!='\n') symbol[j] = 1; 2831175Sbill cont:; 2841175Sbill } 2851175Sbill } 2861175Sbill else printf("something's funny\n"); 2871175Sbill } 2881175Sbill pos++; 2891175Sbill } 2901175Sbill for (c=0; c<NCHARS; c++) { 2911175Sbill if (symbol[c] == 1) { /* nextstate(s,c) */ 2921175Sbill count = icount; 2931175Sbill for (i=3; i <= line; i++) tmpstat[i] = initstat[i]; 2941175Sbill pos = state[s] + 1; 2951175Sbill for (i=0; i<num; i++) { 2961175Sbill curpos = positions[pos]; 2971175Sbill if ((k = name[curpos]) >= 0) 2981175Sbill if ( 2991175Sbill (k == c) 3001175Sbill | (k == DOT) 3011175Sbill | (k == CCL && member(c, right[curpos], 1)) 3021175Sbill | (k == NCCL && member(c, right[curpos], 0)) 3031175Sbill ) { 3041175Sbill number = positions[foll[curpos]]; 3051175Sbill newpos = foll[curpos] + 1; 3061175Sbill for (k=0; k<number; k++) { 3071175Sbill if (tmpstat[positions[newpos]] != 1) { 3081175Sbill tmpstat[positions[newpos]] = 1; 3091175Sbill count++; 3101175Sbill } 3111175Sbill newpos++; 3121175Sbill } 3131175Sbill } 3141175Sbill pos++; 3151175Sbill } /* end nextstate */ 3161175Sbill if (notin(n)) { 3171175Sbill if (n >= NSTATES) overflo(); 3181175Sbill add(state, ++n); 3191175Sbill if (tmpstat[line] == 1) out[n] = 1; 3201175Sbill gotofn[s][c] = n; 3211175Sbill } 3221175Sbill else { 3231175Sbill gotofn[s][c] = xstate; 3241175Sbill } 3251175Sbill } 3261175Sbill } 3271175Sbill } 3281175Sbill } 3291175Sbill 3301175Sbill cstate(v) { 3311175Sbill register b; 3321175Sbill if (left[v] == 0) { 3331175Sbill if (tmpstat[v] != 1) { 3341175Sbill tmpstat[v] = 1; 3351175Sbill count++; 3361175Sbill } 3371175Sbill return(1); 3381175Sbill } 3391175Sbill else if (right[v] == 0) { 3401175Sbill if (cstate(left[v]) == 0) return (0); 3411175Sbill else if (name[v] == PLUS) return (1); 3421175Sbill else return (0); 3431175Sbill } 3441175Sbill else if (name[v] == CAT) { 3451175Sbill if (cstate(left[v]) == 0 && cstate(right[v]) == 0) return (0); 3461175Sbill else return (1); 3471175Sbill } 3481175Sbill else { /* name[v] == OR */ 3491175Sbill b = cstate(right[v]); 3501175Sbill if (cstate(left[v]) == 0 || b == 0) return (0); 3511175Sbill else return (1); 3521175Sbill } 3531175Sbill } 3541175Sbill 3551175Sbill 3561175Sbill member(symb, set, torf) { 3571175Sbill register i, num, pos; 3581175Sbill num = chars[set]; 3591175Sbill pos = set + 1; 3601175Sbill for (i=0; i<num; i++) 3611175Sbill if (symb == chars[pos++]) return (torf); 3621175Sbill return (!torf); 3631175Sbill } 3641175Sbill 3651175Sbill notin(n) { 3661175Sbill register i, j, pos; 3671175Sbill for (i=0; i<=n; i++) { 3681175Sbill if (positions[state[i]] == count) { 3691175Sbill pos = state[i] + 1; 3701175Sbill for (j=0; j < count; j++) 3711175Sbill if (tmpstat[positions[pos++]] != 1) goto nxt; 3721175Sbill xstate = i; 3731175Sbill return (0); 3741175Sbill } 3751175Sbill nxt: ; 3761175Sbill } 3771175Sbill return (1); 3781175Sbill } 3791175Sbill 3801175Sbill add(array, n) int *array; { 3811175Sbill register i; 3821175Sbill if (nxtpos + count > MAXPOS) overflo(); 3831175Sbill array[n] = nxtpos; 3841175Sbill positions[nxtpos++] = count; 3851175Sbill for (i=3; i <= line; i++) { 3861175Sbill if (tmpstat[i] == 1) { 3871175Sbill positions[nxtpos++] = i; 3881175Sbill } 3891175Sbill } 3901175Sbill } 3911175Sbill 3921175Sbill follow(v) int v; { 3931175Sbill int p; 3941175Sbill if (v == line) return; 3951175Sbill p = parent[v]; 3961175Sbill switch(name[p]) { 3971175Sbill case STAR: 3981175Sbill case PLUS: cstate(v); 3991175Sbill follow(p); 4001175Sbill return; 4011175Sbill 4021175Sbill case OR: 4031175Sbill case QUEST: follow(p); 4041175Sbill return; 4051175Sbill 4061175Sbill case CAT: if (v == left[p]) { 4071175Sbill if (cstate(right[p]) == 0) { 4081175Sbill follow(p); 4091175Sbill return; 4101175Sbill } 4111175Sbill } 4121175Sbill else follow(p); 4131175Sbill return; 4141175Sbill case FINAL: if (tmpstat[line] != 1) { 4151175Sbill tmpstat[line] = 1; 4161175Sbill count++; 4171175Sbill } 4181175Sbill return; 4191175Sbill } 4201175Sbill } 4211175Sbill 4221175Sbill 4231175Sbill main(argc, argv) 4241175Sbill char **argv; 4251175Sbill { 4261175Sbill while (--argc > 0 && (++argv)[0][0]=='-') 4271175Sbill switch (argv[0][1]) { 4281175Sbill 4291175Sbill case 's': 4301175Sbill sflag++; 4311175Sbill continue; 4321175Sbill 4331175Sbill case 'h': 4341175Sbill hflag = 0; 4351175Sbill continue; 4361175Sbill 4371175Sbill case 'b': 4381175Sbill bflag++; 4391175Sbill continue; 4401175Sbill 4411175Sbill case 'c': 4421175Sbill cflag++; 4431175Sbill continue; 4441175Sbill 4451175Sbill case 'e': 4461175Sbill argc--; 4471175Sbill argv++; 4481175Sbill goto out; 4491175Sbill 4501175Sbill case 'f': 4511175Sbill fflag++; 4521175Sbill continue; 4531175Sbill 4541175Sbill case 'l': 4551175Sbill lflag++; 4561175Sbill continue; 4571175Sbill 4581175Sbill case 'n': 4591175Sbill nflag++; 4601175Sbill continue; 4611175Sbill 4621175Sbill case 'v': 4631175Sbill vflag++; 4641175Sbill continue; 4651175Sbill 4661175Sbill default: 4671175Sbill fprintf(stderr, "egrep: unknown flag\n"); 4681175Sbill continue; 4691175Sbill } 4701175Sbill out: 4711175Sbill if (argc<=0) 4721175Sbill exit(2); 4731175Sbill if (fflag) { 4749117Srrh fname = *argv; 4759117Srrh exprfile = fopen(fname, "r"); 4769117Srrh if (exprfile == (FILE *)NULL) { 4771175Sbill fprintf(stderr, "egrep: can't open %s\n", fname); 4781175Sbill exit(2); 4791175Sbill } 4801175Sbill } 4811175Sbill else input = *argv; 4821175Sbill argc--; 4831175Sbill argv++; 4841175Sbill 4851175Sbill yyparse(); 4861175Sbill 4871175Sbill cfoll(line-1); 4881175Sbill cgotofn(); 4891175Sbill nfile = argc; 4901175Sbill if (argc<=0) { 4911175Sbill if (lflag) exit(1); 4921175Sbill execute(0); 4931175Sbill } 4941175Sbill else while (--argc >= 0) { 4951175Sbill execute(*argv); 4961175Sbill argv++; 4971175Sbill } 49811561Sedward exit(retcode != 0 ? retcode : nsucc == 0); 4991175Sbill } 5001175Sbill 5011175Sbill execute(file) 5021175Sbill char *file; 5031175Sbill { 5041175Sbill register char *p; 5051175Sbill register cstat; 5061175Sbill register ccount; 507*21255Smckusick static char *buf; 508*21255Smckusick static int blksize; 509*21255Smckusick struct stat stb; 5101175Sbill char *nlp; 5111175Sbill int istat; 5121175Sbill if (file) { 5131175Sbill if ((f = open(file, 0)) < 0) { 5141175Sbill fprintf(stderr, "egrep: can't open %s\n", file); 51511561Sedward retcode = 2; 51611561Sedward return; 5171175Sbill } 5181175Sbill } 5191175Sbill else f = 0; 520*21255Smckusick if (buf == NULL) { 521*21255Smckusick if (fstat(f, &stb) > 0 && stb.st_blksize > 0) 522*21255Smckusick blksize = stb.st_blksize; 523*21255Smckusick else 524*21255Smckusick blksize = BLKSIZE; 525*21255Smckusick buf = (char *)malloc(2*blksize); 526*21255Smckusick if (buf == NULL) { 527*21255Smckusick fprintf(stderr, "egrep: no memory for %s\n", file); 528*21255Smckusick retcode = 2; 529*21255Smckusick return; 530*21255Smckusick } 531*21255Smckusick } 5321175Sbill ccount = 0; 5331175Sbill lnum = 1; 5341175Sbill tln = 0; 5351175Sbill blkno = 0; 5361175Sbill p = buf; 5371175Sbill nlp = p; 538*21255Smckusick if ((ccount = read(f,p,blksize))<=0) goto done; 5391175Sbill istat = cstat = gotofn[0]['\n']; 5401175Sbill if (out[cstat]) goto found; 5411175Sbill for (;;) { 5421175Sbill cstat = gotofn[cstat][*p&0377]; /* all input chars made positive */ 5431175Sbill if (out[cstat]) { 5441175Sbill found: for(;;) { 5451175Sbill if (*p++ == '\n') { 5461175Sbill if (vflag == 0) { 5471175Sbill succeed: nsucc = 1; 5481175Sbill if (cflag) tln++; 5491175Sbill else if (sflag) 5501175Sbill ; /* ugh */ 5511175Sbill else if (lflag) { 5521175Sbill printf("%s\n", file); 5531175Sbill close(f); 5541175Sbill return; 5551175Sbill } 5561175Sbill else { 5571175Sbill if (nfile > 1 && hflag) printf("%s:", file); 5581175Sbill if (bflag) printf("%d:", blkno); 5591175Sbill if (nflag) printf("%ld:", lnum); 5601175Sbill if (p <= nlp) { 561*21255Smckusick while (nlp < &buf[2*blksize]) putchar(*nlp++); 5621175Sbill nlp = buf; 5631175Sbill } 5641175Sbill while (nlp < p) putchar(*nlp++); 5651175Sbill } 5661175Sbill } 5671175Sbill lnum++; 5681175Sbill nlp = p; 5691175Sbill if ((out[(cstat=istat)]) == 0) goto brk2; 5701175Sbill } 5711175Sbill cfound: 5721175Sbill if (--ccount <= 0) { 573*21255Smckusick if (p <= &buf[blksize]) { 574*21255Smckusick if ((ccount = read(f, p, blksize)) <= 0) goto done; 5751175Sbill } 576*21255Smckusick else if (p == &buf[2*blksize]) { 5771175Sbill p = buf; 578*21255Smckusick if ((ccount = read(f, p, blksize)) <= 0) goto done; 5791175Sbill } 5801175Sbill else { 581*21255Smckusick if ((ccount = read(f, p, &buf[2*blksize]-p)) <= 0) goto done; 5821175Sbill } 583*21255Smckusick blkno += ccount / 512; 5841175Sbill } 5851175Sbill } 5861175Sbill } 5871175Sbill if (*p++ == '\n') { 5881175Sbill if (vflag) goto succeed; 5891175Sbill else { 5901175Sbill lnum++; 5911175Sbill nlp = p; 5921175Sbill if (out[(cstat=istat)]) goto cfound; 5931175Sbill } 5941175Sbill } 5951175Sbill brk2: 5961175Sbill if (--ccount <= 0) { 597*21255Smckusick if (p <= &buf[blksize]) { 598*21255Smckusick if ((ccount = read(f, p, blksize)) <= 0) break; 5991175Sbill } 600*21255Smckusick else if (p == &buf[2*blksize]) { 6011175Sbill p = buf; 602*21255Smckusick if ((ccount = read(f, p, blksize)) <= 0) break; 6031175Sbill } 6041175Sbill else { 605*21255Smckusick if ((ccount = read(f, p, &buf[2*blksize] - p)) <= 0) break; 6061175Sbill } 607*21255Smckusick blkno += ccount / 512; 6081175Sbill } 6091175Sbill } 6101175Sbill done: close(f); 6111175Sbill if (cflag) { 6121175Sbill if (nfile > 1) 6131175Sbill printf("%s:", file); 6141175Sbill printf("%ld\n", tln); 6151175Sbill } 6161175Sbill } 617