1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29
30 /* Copyright (c) 1987, 1988 Microsoft Corporation */
31 /* All Rights Reserved */
32
33 #pragma ident "%Z%%M% %I% %E% SMI"
34
35 /*
36 * fgrep -- print all lines containing any of a set of keywords
37 *
38 * status returns:
39 * 0 - ok, and some matches
40 * 1 - ok, but no matches
41 * 2 - some error
42 */
43
44 #include <stdio.h>
45 #include <ctype.h>
46 #include <sys/types.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <locale.h>
50 #include <libintl.h>
51 #include <euc.h>
52 #include <sys/stat.h>
53 #include <fcntl.h>
54
55 #include <getwidth.h>
56
57 eucwidth_t WW;
58 #define WIDTH1 WW._eucw1
59 #define WIDTH2 WW._eucw2
60 #define WIDTH3 WW._eucw3
61 #define MULTI_BYTE WW._multibyte
62 #define GETONE(lc, p) \
63 cw = ISASCII(lc = (unsigned char)*p++) ? 1 : \
64 (ISSET2(lc) ? WIDTH2 : \
65 (ISSET3(lc) ? WIDTH3 : WIDTH1)); \
66 if (--cw > --ccount) { \
67 cw -= ccount; \
68 while (ccount--) \
69 lc = (lc << 7) | ((*p++) & 0177); \
70 if (p >= &buf[fw_lBufsiz + BUFSIZ]) { \
71 if (nlp == buf) { \
72 /* Increase the buffer size */ \
73 fw_lBufsiz += BUFSIZ; \
74 if ((buf = realloc(buf, \
75 fw_lBufsiz + BUFSIZ)) == NULL) { \
76 exit(2); /* out of memory */ \
77 } \
78 nlp = buf; \
79 p = &buf[fw_lBufsiz]; \
80 } else { \
81 /* shift the buffer contents down */ \
82 (void) memmove(buf, nlp, \
83 &buf[fw_lBufsiz + BUFSIZ] - nlp);\
84 p -= nlp - buf; \
85 nlp = buf; \
86 } \
87 } \
88 if (p > &buf[fw_lBufsiz]) { \
89 if ((ccount = fread(p, sizeof (char), \
90 &buf[fw_lBufsiz + BUFSIZ] - p, fptr))\
91 <= 0) break; \
92 } else if ((ccount = fread(p, \
93 sizeof (char), BUFSIZ, fptr)) <= 0) \
94 break; \
95 blkno += (long long)ccount; \
96 } \
97 ccount -= cw; \
98 while (cw--) \
99 lc = (lc << 7) | ((*p++) & 0177)
100
101 /*
102 * The same() macro and letter() function were inserted to allow for
103 * the -i option work for the multi-byte environment.
104 */
105 wchar_t letter();
106 #define same(a, b) \
107 (a == b || iflag && (!MULTI_BYTE || ISASCII(a)) && (a ^ b) == ' ' && \
108 letter(a) == letter(b))
109
110
111 #define QSIZE 400
112 struct words {
113 wchar_t inp;
114 char out;
115 struct words *nst;
116 struct words *link;
117 struct words *fail;
118 } *w = NULL, *smax, *q;
119
120 FILE *fptr;
121 long long lnum;
122 int bflag, cflag, lflag, fflag, nflag, vflag, xflag, eflag, sflag;
123 int hflag, iflag;
124 int retcode = 0;
125 int nfile;
126 long long blkno;
127 int nsucc;
128 long long tln;
129 FILE *wordf;
130 char *argptr;
131 off_t input_size = 0;
132
133 void execute(char *);
134 void cgotofn(void);
135 void overflo(void);
136 void cfail(void);
137
138 static long fw_lBufsiz = 0;
139
140 int
main(int argc,char ** argv)141 main(int argc, char **argv)
142 {
143 int c;
144 int errflg = 0;
145 struct stat file_stat;
146
147 (void) setlocale(LC_ALL, "");
148 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
149 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */
150 #endif
151 (void) textdomain(TEXT_DOMAIN);
152
153 while ((c = getopt(argc, argv, "hybcie:f:lnvxs")) != EOF)
154 switch (c) {
155
156 case 's':
157 sflag++;
158 continue;
159 case 'h':
160 hflag++;
161 continue;
162 case 'b':
163 bflag++;
164 continue;
165
166 case 'i':
167 case 'y':
168 iflag++;
169 continue;
170
171 case 'c':
172 cflag++;
173 continue;
174
175 case 'e':
176 eflag++;
177 argptr = optarg;
178 input_size = strlen(argptr);
179 continue;
180
181 case 'f':
182 fflag++;
183 wordf = fopen(optarg, "r");
184 if (wordf == NULL) {
185 (void) fprintf(stderr,
186 gettext("fgrep: can't open %s\n"),
187 optarg);
188 exit(2);
189 }
190
191 if (fstat(fileno(wordf), &file_stat) == 0) {
192 input_size = file_stat.st_size;
193 } else {
194 (void) fprintf(stderr,
195 gettext("fgrep: can't fstat %s\n"),
196 optarg);
197 exit(2);
198 }
199
200 continue;
201
202 case 'l':
203 lflag++;
204 continue;
205
206 case 'n':
207 nflag++;
208 continue;
209
210 case 'v':
211 vflag++;
212 continue;
213
214 case 'x':
215 xflag++;
216 continue;
217
218 case '?':
219 errflg++;
220 }
221
222 argc -= optind;
223 if (errflg || ((argc <= 0) && !fflag && !eflag)) {
224 (void) printf(gettext("usage: fgrep [ -bchilnsvx ] "
225 "[ -e exp ] [ -f file ] [ strings ] [ file ] ...\n"));
226 exit(2);
227 }
228 if (!eflag && !fflag) {
229 argptr = argv[optind];
230 input_size = strlen(argptr);
231 input_size++;
232 optind++;
233 argc--;
234 }
235
236 /*
237 * Normally we need one struct words for each letter in the pattern
238 * plus one terminating struct words with outp = 1, but when -x option
239 * is specified we require one more struct words for `\n` character so we
240 * calculate the input_size as below. We add extra 1 because
241 * (input_size/2) rounds off odd numbers
242 */
243
244 if (xflag) {
245 input_size = input_size + (input_size/2) + 1;
246 }
247
248 input_size++;
249
250 w = (struct words *)calloc(input_size, sizeof (struct words));
251 if (w == NULL) {
252 (void) fprintf(stderr,
253 gettext("fgrep: could not allocate "
254 "memory for wordlist\n"));
255 exit(2);
256 }
257
258 getwidth(&WW);
259 if ((WIDTH1 == 0) && (WIDTH2 == 0) &&
260 (WIDTH3 == 0)) {
261 /*
262 * If non EUC-based locale,
263 * assume WIDTH1 is 1.
264 */
265 WIDTH1 = 1;
266 }
267 WIDTH2++;
268 WIDTH3++;
269
270 cgotofn();
271 cfail();
272 nfile = argc;
273 argv = &argv[optind];
274 if (argc <= 0) {
275 execute((char *)NULL);
276 } else
277 while (--argc >= 0) {
278 execute(*argv);
279 argv++;
280 }
281
282 if (w != NULL) {
283 free(w);
284 }
285
286 return (retcode != 0 ? retcode : nsucc == 0);
287 }
288
289 void
execute(char * file)290 execute(char *file)
291 {
292 char *p;
293 struct words *c;
294 int ccount;
295 static char *buf = NULL;
296 int failed;
297 char *nlp;
298 wchar_t lc;
299 int cw;
300
301 if (buf == NULL) {
302 fw_lBufsiz = BUFSIZ;
303 if ((buf = malloc(fw_lBufsiz + BUFSIZ)) == NULL) {
304 exit(2); /* out of memory */
305 }
306 }
307
308 if (file) {
309 if ((fptr = fopen(file, "r")) == NULL) {
310 (void) fprintf(stderr,
311 gettext("fgrep: can't open %s\n"), file);
312 retcode = 2;
313 return;
314 }
315 } else {
316 file = "<stdin>";
317 fptr = stdin;
318 }
319 ccount = 0;
320 failed = 0;
321 lnum = 1;
322 tln = 0;
323 blkno = 0;
324 p = buf;
325 nlp = p;
326 c = w;
327 for (;;) {
328 if (c == 0)
329 break;
330 if (ccount <= 0) {
331 if (p >= &buf[fw_lBufsiz + BUFSIZ]) {
332 if (nlp == buf) {
333 /* increase the buffer size */
334 fw_lBufsiz += BUFSIZ;
335 if ((buf = realloc(buf,
336 fw_lBufsiz + BUFSIZ)) == NULL) {
337 exit(2); /* out of memory */
338 }
339 nlp = buf;
340 p = &buf[fw_lBufsiz];
341 } else {
342 /* shift the buffer down */
343 (void) memmove(buf, nlp,
344 &buf[fw_lBufsiz + BUFSIZ]
345 - nlp);
346 p -= nlp - buf;
347 nlp = buf;
348 }
349
350 }
351 if (p > &buf[fw_lBufsiz]) {
352 if ((ccount = fread(p, sizeof (char),
353 &buf[fw_lBufsiz + BUFSIZ] - p, fptr))
354 <= 0)
355 break;
356 } else if ((ccount = fread(p, sizeof (char),
357 BUFSIZ, fptr)) <= 0)
358 break;
359 blkno += (long long)ccount;
360 }
361 GETONE(lc, p);
362 nstate:
363 if (same(c->inp, lc)) {
364 c = c->nst;
365 } else if (c->link != 0) {
366 c = c->link;
367 goto nstate;
368 } else {
369 c = c->fail;
370 failed = 1;
371 if (c == 0) {
372 c = w;
373 istate:
374 if (same(c->inp, lc)) {
375 c = c->nst;
376 } else if (c->link != 0) {
377 c = c->link;
378 goto istate;
379 }
380 } else
381 goto nstate;
382 }
383
384 if (c == 0)
385 break;
386
387 if (c->out) {
388 while (lc != '\n') {
389 if (ccount <= 0) {
390 if (p == &buf[fw_lBufsiz + BUFSIZ]) {
391 if (nlp == buf) {
392 /* increase buffer size */
393 fw_lBufsiz += BUFSIZ;
394 if ((buf = realloc(buf, fw_lBufsiz + BUFSIZ)) == NULL) {
395 exit(2); /* out of memory */
396 }
397 nlp = buf;
398 p = &buf[fw_lBufsiz];
399 } else {
400 /* shift buffer down */
401 (void) memmove(buf, nlp, &buf[fw_lBufsiz + BUFSIZ] - nlp);
402 p -= nlp - buf;
403 nlp = buf;
404 }
405 }
406 if (p > &buf[fw_lBufsiz]) {
407 if ((ccount = fread(p, sizeof (char),
408 &buf[fw_lBufsiz + BUFSIZ] - p, fptr)) <= 0) break;
409 } else if ((ccount = fread(p, sizeof (char), BUFSIZ,
410 fptr)) <= 0) break;
411 blkno += (long long)ccount;
412 }
413 GETONE(lc, p);
414 }
415 if ((vflag && (failed == 0 || xflag == 0)) ||
416 (vflag == 0 && xflag && failed))
417 goto nomatch;
418 succeed:
419 nsucc = 1;
420 if (cflag)
421 tln++;
422 else if (lflag && !sflag) {
423 (void) printf("%s\n", file);
424 (void) fclose(fptr);
425 return;
426 } else if (!sflag) {
427 if (nfile > 1 && !hflag)
428 (void) printf("%s:", file);
429 if (bflag)
430 (void) printf("%lld:",
431 (blkno - (long long)(ccount-1))
432 / BUFSIZ);
433 if (nflag)
434 (void) printf("%lld:", lnum);
435 if (p <= nlp) {
436 while (nlp < &buf[fw_lBufsiz + BUFSIZ])
437 (void) putchar(*nlp++);
438 nlp = buf;
439 }
440 while (nlp < p)
441 (void) putchar(*nlp++);
442 }
443 nomatch:
444 lnum++;
445 nlp = p;
446 c = w;
447 failed = 0;
448 continue;
449 }
450 if (lc == '\n')
451 if (vflag)
452 goto succeed;
453 else {
454 lnum++;
455 nlp = p;
456 c = w;
457 failed = 0;
458 }
459 }
460 (void) fclose(fptr);
461 if (cflag) {
462 if ((nfile > 1) && !hflag)
463 (void) printf("%s:", file);
464 (void) printf("%lld\n", tln);
465 }
466 }
467
468
469 wchar_t
getargc(void)470 getargc(void)
471 {
472 /* appends a newline to shell quoted argument list so */
473 /* the list looks like it came from an ed style file */
474 wchar_t c;
475 int cw;
476 int b;
477 static int endflg;
478
479
480 if (wordf) {
481 if ((b = getc(wordf)) == EOF)
482 return (EOF);
483 cw = ISASCII(c = (wchar_t)b) ? 1 :
484 (ISSET2(c) ? WIDTH2 : (ISSET3(c) ? WIDTH3 : WIDTH1));
485 while (--cw) {
486 if ((b = getc(wordf)) == EOF)
487 return (EOF);
488 c = (c << 7) | (b & 0177);
489 }
490 return (iflag ? letter(c) : c);
491 }
492
493 if (endflg)
494 return (EOF);
495
496 {
497 cw = ISASCII(c = (unsigned char)*argptr++) ? 1 :
498 (ISSET2(c) ? WIDTH2 : (ISSET3(c) ? WIDTH3 : WIDTH1));
499
500 while (--cw)
501 c = (c << 7) | ((*argptr++) & 0177);
502 if (c == '\0') {
503 endflg++;
504 return ('\n');
505 }
506 }
507 return (iflag ? letter(c) : c);
508
509
510 }
511
512 void
cgotofn(void)513 cgotofn(void)
514 {
515 int c;
516 struct words *s;
517
518 s = smax = w;
519 nword:
520 for (;;) {
521 c = getargc();
522 if (c == EOF)
523 return;
524 if (c == 0)
525 goto enter;
526 if (c == '\n') {
527 if (xflag) {
528 for (;;) {
529 if (s->inp == c) {
530 s = s->nst;
531 break;
532 }
533 if (s->inp == 0)
534 goto nenter;
535 if (s->link == 0) {
536 if (smax >= &w[input_size -1])
537 overflo();
538 s->link = ++smax;
539 s = smax;
540 goto nenter;
541 }
542 s = s->link;
543 }
544 }
545 s->out = 1;
546 s = w;
547 } else {
548 loop:
549 if (s->inp == c) {
550 s = s->nst;
551 continue;
552 }
553 if (s->inp == 0)
554 goto enter;
555 if (s->link == 0) {
556 if (smax >= &w[input_size -1])
557 overflo();
558 s->link = ++smax;
559 s = smax;
560 goto enter;
561 }
562 s = s->link;
563 goto loop;
564 }
565 }
566
567 enter:
568 do {
569 s->inp = c;
570 if (smax >= &w[input_size -1])
571 overflo();
572 s->nst = ++smax;
573 s = smax;
574 } while ((c = getargc()) != '\n' && c != EOF);
575 if (xflag) {
576 nenter:
577 s->inp = '\n';
578 if (smax >= &w[input_size -1])
579 overflo();
580 s->nst = ++smax;
581 }
582 smax->out = 1;
583 s = w;
584 if (c != EOF)
585 goto nword;
586 }
587
588 /*
589 * This function is an unexpected condition, since input_size should have been
590 * calculated correctly before hand.
591 */
592
593 void
overflo(void)594 overflo(void)
595 {
596 (void) fprintf(stderr, gettext("fgrep: wordlist too large\n"));
597 exit(2);
598 }
599
600 void
cfail(void)601 cfail(void)
602 {
603 int qsize = QSIZE;
604 struct words **queue = NULL;
605
606 /*
607 * front and rear are pointers used to traverse the global words
608 * structure "w" which contains the data of input pattern file
609 */
610 struct words **front, **rear;
611 struct words *state;
612 unsigned long frontoffset = 0, rearoffset = 0;
613 char c;
614 struct words *s;
615 s = w;
616 if ((queue = (struct words **)calloc(qsize, sizeof (struct words *)))
617 == NULL) {
618 perror("fgrep");
619 exit(2);
620 }
621 front = rear = queue;
622 init:
623 if ((s->inp) != 0) {
624 *rear++ = s->nst;
625 /*
626 * Reallocates the queue if the number of distinct starting
627 * character of patterns exceeds the qsize value
628 */
629 if (rear >= &queue[qsize - 1]) {
630 frontoffset = front - queue;
631 rearoffset = rear - queue;
632 qsize += QSIZE;
633 if ((queue = (struct words **)realloc(queue,
634 qsize * sizeof (struct words *))) == NULL) {
635 perror("fgrep");
636 exit(2);
637 }
638 front = queue + frontoffset;
639 rear = queue + rearoffset;
640 }
641 }
642 if ((s = s->link) != 0) {
643 goto init;
644 }
645
646 while (rear != front) {
647 s = *front++;
648 cloop:
649 if ((c = s->inp) != 0) {
650 *rear++ = (q = s->nst);
651 /*
652 * Reallocate the queue if the rear pointer reaches the end
653 * queue
654 */
655 if (rear >= &queue[qsize - 1]) {
656 frontoffset = front - queue;
657 rearoffset = rear - queue;
658 qsize += QSIZE;
659 if ((queue = (struct words **)realloc(queue,
660 qsize * sizeof (struct words *))) == NULL) {
661 perror("fgrep");
662 exit(2);
663 }
664 front = queue + frontoffset;
665 rear = queue + rearoffset;
666 }
667 state = s->fail;
668 floop:
669 if (state == 0)
670 state = w;
671 if (state->inp == c) {
672 qloop:
673 q->fail = state->nst;
674 if ((state->nst)->out == 1)
675 q->out = 1;
676 if ((q = q->link) != 0)
677 goto qloop;
678 } else if ((state = state->link) != 0)
679 goto floop;
680 }
681 if ((s = s->link) != 0)
682 goto cloop;
683 }
684 }
685
686 wchar_t
letter(wchar_t c)687 letter(wchar_t c)
688 {
689 if (c >= 'a' && c <= 'z')
690 return (c);
691 if (c >= 'A' && c <= 'Z')
692 return (c + 'a' - 'A');
693 return (c);
694 }
695