xref: /netbsd-src/external/bsd/tre/dist/src/agrep.c (revision cc8d93b4cd0846e7feba052a418dc6b11b9ec293)
1 /*
2   agrep.c - Approximate grep
3 
4   This software is released under a BSD-style license.
5   See the file LICENSE for details and copyright.
6 
7 */
8 
9 #ifdef HAVE_CONFIG_H
10 #include <config.h>
11 #endif /* HAVE_CONFIG_H */
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <locale.h>
15 #include <string.h>
16 #include <sys/types.h>
17 #include <sys/stat.h>
18 #include <fcntl.h>
19 #include <errno.h>
20 #include <assert.h>
21 #include <limits.h>
22 #include <unistd.h>
23 #ifdef HAVE_GETOPT_H
24 #include <getopt.h>
25 #endif /* HAVE_GETOPT_H */
26 #include "regex.h"
27 
28 #ifdef HAVE_GETTEXT
29 #include <libintl.h>
30 #else
31 #define gettext(s) s
32 #define bindtextdomain(p, d)
33 #define textdomain(p)
34 #endif
35 
36 #define _(String) gettext(String)
37 
38 #undef MAX
39 #undef MIN
40 #define MAX(a, b) (((a) >= (b)) ? (a) : (b))
41 #define MIN(a, b) (((a) <= (b)) ? (a) : (b))
42 
43 /* Short options. */
44 static char const short_options[] =
45 "cd:e:hiklnqrsvwyBD:E:HI:MS:V0123456789-:";
46 
47 static int show_help;
48 char *program_name;
49 
50 #ifdef HAVE_GETOPT_LONG
51 /* Long options that have no corresponding short equivalents. */
52 enum {
53   COLOR_OPTION = CHAR_MAX + 1,
54   SHOW_POSITION_OPTION
55 };
56 
57 /* Long option equivalences. */
58 static struct option const long_options[] =
59 {
60   {"best-match", no_argument, NULL, 'B'},
61   {"color", no_argument, NULL, COLOR_OPTION},
62   {"colour", no_argument, NULL, COLOR_OPTION},
63   {"count", no_argument, NULL, 'c'},
64   {"delete-cost", required_argument, NULL, 'D'},
65   {"delimiter", no_argument, NULL, 'd'},
66   {"delimiter-after", no_argument, NULL, 'M'},
67   {"files-with-matches", no_argument, NULL, 'l'},
68   {"help", no_argument, &show_help, 1},
69   {"ignore-case", no_argument, NULL, 'i'},
70   {"insert-cost", required_argument, NULL, 'I'},
71   {"invert-match", no_argument, NULL, 'v'},
72   {"line-number", no_argument, NULL, 'n'},
73   {"literal", no_argument, NULL, 'k'},
74   {"max-errors", required_argument, NULL, 'E'},
75   {"no-filename", no_argument, NULL, 'h'},
76   {"nothing", no_argument, NULL, 'y'},
77   {"quiet", no_argument, NULL, 'q'},
78   {"record-number", no_argument, NULL, 'n'},
79   {"recursive", no_argument, NULL, 'r'},
80   {"regexp", required_argument, NULL, 'e'},
81   {"show-cost", no_argument, NULL, 's'},
82   {"show-position", no_argument, NULL, SHOW_POSITION_OPTION},
83   {"silent", no_argument, NULL, 'q'},
84   {"substitute-cost", required_argument, NULL, 'S'},
85   {"version", no_argument, NULL, 'V'},
86   {"with-filename", no_argument, NULL, 'H'},
87   {"word-regexp", no_argument, NULL, 'w'},
88   {0, 0, 0, 0}
89 };
90 #endif /* HAVE_GETOPT_LONG */
91 
92 __dead static void
tre_agrep_usage(int status)93 tre_agrep_usage(int status)
94 {
95   if (status != 0)
96     {
97       fprintf(stderr, _("Usage: %s [OPTION]... PATTERN [FILE]...\n"),
98 	      program_name);
99       fprintf(stderr, _("Try `%s --help' for more information.\n"),
100               program_name);
101     }
102   else
103     {
104       printf(_("Usage: %s [OPTION]... PATTERN [FILE]...\n"), program_name);
105       printf(_("\
106 Searches for approximate matches of PATTERN in each FILE or standard input.\n\
107 Example: `%s -2 optimize foo.txt' outputs all lines in file `foo.txt' that\n\
108 match \"optimize\" within two errors.  E.g. lines which contain \"optimise\",\n\
109 \"optmise\", and \"opitmize\" all match.\n"), program_name);
110       printf("\n");
111       printf(_("\
112 Regexp selection and interpretation:\n\
113   -e, --regexp=PATTERN	    use PATTERN as a regular expression\n\
114   -i, --ignore-case	    ignore case distinctions\n\
115   -k, --literal		    PATTERN is a literal string\n\
116   -w, --word-regexp	    force PATTERN to match only whole words\n\
117 \n\
118 Approximate matching settings:\n\
119   -D, --delete-cost=NUM	    set cost of missing characters\n\
120   -I, --insert-cost=NUM	    set cost of extra characters\n\
121   -S, --substitute-cost=NUM set cost of wrong characters\n\
122   -E, --max-errors=NUM	    select records that have at most NUM errors\n\
123   -#			    select records that have at most # errors (# is a\n\
124 			    digit between 0 and 9)\n\
125 \n\
126 Miscellaneous:\n\
127   -d, --delimiter=PATTERN   set the record delimiter regular expression\n\
128   -v, --invert-match	    select non-matching records\n\
129   -V, --version		    print version information and exit\n\
130   -r, --recursive           also search in any subdirectories\n\
131   -y, --nothing		    does nothing (for compatibility with the non-free\n\
132 			    agrep program)\n\
133       --help		    display this help and exit\n\
134 \n\
135 Output control:\n\
136   -B, --best-match	    only output records with least errors\n\
137   -c, --count		    only print a count of matching records per FILE\n\
138   -h, --no-filename	    suppress the prefixing filename on output\n\
139   -H, --with-filename	    print the filename for each match\n\
140   -l, --files-with-matches  only print FILE names containing matches\n\
141   -M, --delimiter-after     print record delimiter after record if -d is used\n\
142   -n, --record-number	    print record number with output\n\
143       --line-number         same as -n\n\
144   -q, --quiet, --silent	    suppress all normal output\n\
145   -s, --show-cost	    print match cost with output\n\
146       --colour, --color     use markers to distinguish the matching \
147 strings\n\
148       --show-position       prefix each output record with start and end\n\
149                             position of the first match within the record\n"));
150       printf("\n");
151       printf(_("\
152 With no FILE, or when FILE is -, reads standard input.  If less than two\n\
153 FILEs are given, -h is assumed.  Exit status is 0 if a match is found, 1 for\n\
154 no match, and 2 if there were errors.  If -E or -# is not specified, only\n\
155 exact matches are selected.\n"));
156       printf("\n");
157       printf(_("\
158 PATTERN is a POSIX extended regular expression (ERE) with the TRE extensions.\n\
159 See tre(7) for a complete description.\n"));
160       printf("\n");
161       printf(_("Report bugs to: "));
162       printf("%s.\n", PACKAGE_BUGREPORT);
163     }
164   exit(status);
165 }
166 
167 static regex_t preg;	  /* Compiled pattern to search for. */
168 static regex_t delim;	  /* Compiled record delimiter pattern. */
169 
170 #define INITIAL_BUF_SIZE 10240	/* Initial size of the buffer. */
171 static char *buf;	   /* Buffer for scanning text. */
172 static int buf_size;	   /* Current size of the buffer. */
173 static int data_len;	   /* Amount of data in the buffer. */
174 static char *record;	   /* Start of current record. */
175 static char *next_record;  /* Start of next record. */
176 static int record_len;	   /* Length of current record. */
177 static int delim_len;      /* Length of delimiter before record. */
178 static int next_delim_len; /* Length of delimiter after record. */
179 static int delim_after = 1;/* If true, print the delimiter after the record. */
180 static int at_eof;
181 static int have_matches;   /* If true, matches have been found. */
182 static int is_binary;      /* -1 unknown, 0 ascii, 1 binary */
183 
184 static int invert_match;   /* Show only non-matching records. */
185 static int print_filename; /* Output filename. */
186 static int print_recnum;   /* Output record number. */
187 static int print_cost;	   /* Output match cost. */
188 static int count_matches;  /* Count matching records. */
189 static int list_files;	   /* List matching files. */
190 static int color_option;   /* Highlight matches. */
191 static int print_position;  /* Show start and end offsets for matches. */
192 static int recursive;       /* Search in subdirectories too */
193 
194 static int best_match;	     /* Output only best matches. */
195 static int best_cost;	     /* Best match cost found so far. */
196 static int be_silent;	     /* Never output anything */
197 
198 static regaparams_t match_params;
199 
200 /* The color string used with the --color option.  If set, the
201    environment variable GREP_COLOR overrides this default value. */
202 static const char *highlight = "01;31";
203 
204 static int
isbinaryfile(void)205 isbinaryfile(void)
206 {
207 	return buf != NULL && memchr(buf, '\0', data_len) != NULL;
208 }
209 
210 /* Sets `record' to the next complete record from file `fd', and `record_len'
211    to the length of the record.	 Returns 1 when there are no more records,
212    0 otherwise. */
213 static inline int
tre_agrep_get_next_record(int fd,const char * filename)214 tre_agrep_get_next_record(int fd, const char *filename)
215 {
216   if (at_eof)
217     return 1;
218 
219   while (1)
220     {
221       int errcode;
222       regmatch_t pmatch[1];
223 
224       if (next_record == NULL)
225 	{
226 	  int r;
227 	  int read_size = buf_size - data_len;
228 
229 	  if (read_size <= 0)
230 	    {
231 	      /* The buffer is full and no record delimiter found yet,
232 		 we need to grow the buffer.  We double the size to
233 		 avoid rescanning the data too many times when the
234 		 records are very large. */
235 	      buf_size *= 2;
236 	      buf = realloc(buf, buf_size);
237 	      if (buf == NULL)
238 		{
239 		  fprintf(stderr, "%s: %s\n", program_name, _("Out of memory"));
240 		  exit(2);
241 		}
242 	      read_size = buf_size - data_len;
243 	    }
244 
245 	  r = read(fd, buf + data_len, read_size);
246 	  if (r < 0)
247 	    {
248 	      /* Read error. */
249 	      char *err;
250 	      if (errno == EINTR)
251 		continue;
252 	      err = strerror(errno);
253 	      fprintf(stderr, "%s: ", program_name);
254 	      fprintf(stderr, _("Error reading from %s: %s\n"), filename, err);
255 	      return 1;
256 	    }
257 
258 	  if (r == 0)
259 	    {
260 	      /* End of file.  Return the last record. */
261 	      record = buf;
262 	      record_len = data_len;
263 	      at_eof = 1;
264 	      /* The empty string after a trailing delimiter is not considered
265 		 to be a record. */
266 	      if (record_len == 0)
267 		return 1;
268 	      return 0;
269 	    }
270 	  data_len += r;
271 	  next_record = buf;
272 
273 	  if (is_binary < 0)
274 	    is_binary = isbinaryfile();
275 	}
276 
277       /* Find the next record delimiter. */
278       errcode = tre_regnexec(&delim, next_record, data_len - (next_record - buf),
279 			 1, pmatch, 0);
280 
281 
282       switch (errcode)
283 	{
284 	case REG_OK:
285 	  /* Record delimiter found, now we know how long the current
286 	     record is. */
287 	  record = next_record;
288 	  record_len = pmatch[0].rm_so;
289 	  delim_len = next_delim_len;
290 
291 	  next_delim_len = pmatch[0].rm_eo - pmatch[0].rm_so;
292 	  next_record = next_record + pmatch[0].rm_eo;
293 	  return 0;
294 	  break;
295 
296 	case REG_NOMATCH:
297 	  if (next_record == buf)
298 	    {
299 	      next_record = NULL;
300 	      continue;
301 	    }
302 
303 	  /* Move the data to start of the buffer and read more
304 	     data. */
305 	  memmove(buf, next_record, buf + data_len - next_record);
306 	  data_len = buf + data_len - next_record;
307 	  next_record = NULL;
308 	  continue;
309 	  break;
310 
311 	case REG_ESPACE:
312 	  fprintf(stderr, "%s: %s\n", program_name, _("Out of memory"));
313 	  exit(2);
314 	  break;
315 
316 	default:
317 	  assert(0);
318 	  break;
319 	}
320     }
321 }
322 
323 #include <dirent.h>
324 
325 static int tre_agrep_handle_file(const char */*filename*/);
326 
327 static int
tre_agrep_handle_dirent(const char * ent)328 tre_agrep_handle_dirent(const char *ent)
329 {
330 	struct dirent	 storage;
331 	struct dirent	*dp;
332 	struct stat	 st;
333 	char		 path[8192];
334 	DIR		*dirp;
335 	int		 ret;
336 	int		 ok;
337 
338 	if (ent == NULL || strcmp(ent, "-") == 0) {
339 		return tre_agrep_handle_file(ent);
340 	}
341 	if (lstat(ent, &st) < 0) {
342 		return tre_agrep_handle_file(ent);
343 	}
344 	if ((st.st_mode & S_IFMT) == S_IFDIR && recursive) {
345 		if ((dirp = opendir(ent)) == NULL) {
346 			fprintf(stderr, "can't open directory '%s'\n", ent);
347 			return 0;
348 		}
349 		for (ret = 0 ; readdir_r(dirp, &storage, &dp) == 0 && dp != NULL ; ) {
350 			if (strcmp(dp->d_name, ".") == 0 ||
351 			    strcmp(dp->d_name, "..") == 0) {
352 				continue;
353 			}
354 			snprintf(path, sizeof(path), "%s/%s", ent, dp->d_name);
355 			if ((ok = tre_agrep_handle_dirent(path)) != 0) {
356 				ret = ok;
357 			}
358 		}
359 		closedir(dirp);
360 		return ret;
361 	}
362 	return tre_agrep_handle_file(ent);
363 }
364 
365 static int
tre_agrep_handle_file(const char * filename)366 tre_agrep_handle_file(const char *filename)
367 {
368   int fd;
369   int count = 0;
370   int recnum = 0;
371 
372   is_binary = -1;
373 
374   /* Allocate the initial buffer. */
375   if (buf == NULL)
376     {
377       buf = malloc(INITIAL_BUF_SIZE);
378       if (buf == NULL)
379 	{
380 	  fprintf(stderr, "%s: %s\n", program_name, _("Out of memory"));
381 	  exit(2);
382 	}
383       buf_size = INITIAL_BUF_SIZE;
384     }
385 
386   /* Reset read buffer state. */
387   next_record = NULL;
388   data_len = 0;
389 
390   if (!filename || strcmp(filename, "-") == 0)
391     {
392       if (best_match)
393 	{
394 	  fprintf(stderr, "%s: %s\n", program_name,
395 		  _("Cannot use -B when reading from standard input."));
396 	  return 2;
397 	}
398       fd = 0;
399       filename = _("(standard input)");
400     }
401   else
402     {
403       fd = open(filename, O_RDONLY);
404     }
405 
406   if (fd < 0)
407     {
408       fprintf(stderr, "%s: %s: %s\n", program_name, filename, strerror(errno));
409       return 1;
410     }
411 
412 
413   /* Go through all records and output the matching ones, or the non-matching
414      ones if `invert_match' is true. */
415   at_eof = 0;
416   while (!tre_agrep_get_next_record(fd, filename))
417     {
418       int errcode;
419       regamatch_t match;
420       regmatch_t pmatch[1];
421       recnum++;
422       memset(&match, 0, sizeof(match));
423       if (best_match)
424 	match_params.max_cost = best_cost;
425       if (color_option || print_position)
426 	{
427 	  match.pmatch = pmatch;
428 	  match.nmatch = 1;
429 	}
430 
431       /* Stop searching for better matches if an exact match is found. */
432       if (best_match == 1 && best_cost == 0)
433 	break;
434 
435       /* See if the record matches. */
436       errcode = tre_reganexec(&preg, record, record_len, &match, match_params, 0);
437       if ((!invert_match && errcode == REG_OK)
438 	  || (invert_match && errcode == REG_NOMATCH))
439 	{
440 	  if (be_silent)
441 	    exit(0);
442 
443 	  count++;
444 	  have_matches = 1;
445 	  if (best_match)
446 	    {
447 	      if (best_match == 1)
448 		{
449 		  /* First best match pass. */
450 		  if (match.cost < best_cost)
451 		    best_cost = match.cost;
452 		  continue;
453 		}
454 	      /* Second best match pass. */
455 	      if (match.cost > best_cost)
456 		continue;
457 	    }
458 
459 	  if (list_files)
460 	    {
461 	      printf("%s\n", filename);
462 	      break;
463 	    }
464 	  else if (!count_matches && is_binary > 0)
465 	    {
466 	      if (print_filename)
467 		printf("%s:", filename);
468 	      printf("Binary file matches\n");
469 	      break;
470 	    }
471 	  else if (!count_matches)
472 	    {
473 	      if (print_filename)
474 		printf("%s:", filename);
475 	      if (print_recnum)
476 		printf("%d:", recnum);
477 	      if (print_cost)
478 		printf("%d:", match.cost);
479 	      if (print_position)
480 		printf("%d-%d:",
481 		       invert_match ? 0 : (int)pmatch[0].rm_so,
482 		       invert_match ? record_len : (int)pmatch[0].rm_eo);
483 
484 	      /* Adjust record boundaries so we print the delimiter
485 		 before or after the record. */
486 	      if (delim_after)
487 		{
488 		  record_len += next_delim_len;
489 		}
490 	      else
491 		{
492 		  record -= delim_len;
493 		  record_len += delim_len;
494 		  pmatch[0].rm_so += delim_len;
495 		  pmatch[0].rm_eo += delim_len;
496 		}
497 
498 	      if (color_option && !invert_match)
499 		{
500 		  printf("%.*s", (int)pmatch[0].rm_so, record);
501 		  printf("\33[%sm", highlight);
502 		  printf("%.*s", (int)(pmatch[0].rm_eo - pmatch[0].rm_so),
503 			 record + pmatch[0].rm_so);
504 		  fputs("\33[00m", stdout);
505 		  printf("%.*s", (int)(record_len - pmatch[0].rm_eo),
506 			 record + pmatch[0].rm_eo);
507 		}
508 	      else
509 		{
510 		  printf("%.*s", record_len, record);
511 		}
512 	    }
513 	}
514     }
515 
516   if (count_matches && !best_match && !be_silent)
517     {
518       if (print_filename)
519 	printf("%s:", filename);
520       printf("%d\n", count);
521     }
522 
523   if (fd)
524     close(fd);
525 
526   return 0;
527 }
528 
529 
530 
531 int
main(int argc,char ** argv)532 main(int argc, char **argv)
533 {
534   int c, errcode;
535   int comp_flags = REG_EXTENDED;
536   char *tmp_str;
537   char *regexp = NULL;
538   const char *delim_regexp = "\n";
539   int word_regexp = 0;
540   int literal_string = 0;
541   int max_cost_set = 0;
542 
543   setlocale (LC_ALL, "");
544   bindtextdomain (PACKAGE, LOCALEDIR);
545   textdomain (PACKAGE);
546 
547   /* Get the program name without the path (for error messages etc). */
548   program_name = argv[0];
549   if (program_name)
550     {
551       tmp_str = strrchr(program_name, '/');
552       if (tmp_str)
553 	program_name = tmp_str + 1;
554     }
555 
556   /* Defaults. */
557   print_filename = -1;
558   print_cost = 0;
559   be_silent = 0;
560   tre_regaparams_default(&match_params);
561   match_params.max_cost = 0;
562 
563   /* Parse command line options. */
564   while (1)
565     {
566 #ifdef HAVE_GETOPT_LONG
567       c = getopt_long(argc, argv, short_options, long_options, NULL);
568 #else /* !HAVE_GETOPT_LONG */
569       c = getopt(argc, argv, short_options);
570 #endif /* !HAVE_GETOPT_LONG */
571       if (c == -1)
572 	break;
573 
574       switch (c)
575 	{
576 	case 'c':
577 	  /* Count number of matching records. */
578 	  count_matches = 1;
579 	  break;
580 	case 'd':
581 	  /* Set record delimiter regexp. */
582 	  delim_regexp = optarg;
583 	  if (delim_after == 1)
584 	    delim_after = 0;
585 	  break;
586 	case 'e':
587 	  /* Regexp to use. */
588 	  regexp = optarg;
589 	  break;
590 	case 'h':
591 	  /* Don't prefix filename on output if there are multiple files. */
592 	  print_filename = 0;
593 	  break;
594 	case 'i':
595 	  /* Ignore case. */
596 	  comp_flags |= REG_ICASE;
597 	  break;
598 	case 'k':
599 	  /* The pattern is a literal string. */
600 	  literal_string = 1;
601 	  break;
602 	case 'l':
603 	  /* Only print files that contain matches. */
604 	  list_files = 1;
605 	  break;
606 	case 'n':
607 	  /* Print record number of matching record. */
608 	  print_recnum = 1;
609 	  break;
610 	case 'q':
611 	  be_silent = 1;
612 	  break;
613 	case 'r':
614 	  /* also search in sub-directories */
615 	  recursive = 1;
616 	  print_filename = 1;
617 	  break;
618 	case 's':
619 	  /* Print match cost of matching record. */
620 	  print_cost = 1;
621 	  break;
622 	case 'v':
623 	  /* Select non-matching records. */
624 	  invert_match = 1;
625 	  break;
626 	case 'w':
627 	  /* Match only whole words. */
628 	  word_regexp = 1;
629 	  break;
630 	case 'y':
631 	  /* Compatibility option, does nothing. */
632 	  break;
633 	case 'B':
634 	  /* Select only the records which have the best match. */
635 	  best_match = 1;
636 	  break;
637 	case 'D':
638 	  /* Set the cost of a deletion. */
639 	  match_params.cost_del = atoi(optarg);
640 	  break;
641 	case 'E':
642 	  /* Set the maximum number of errors allowed for a record to match. */
643 	  match_params.max_cost = atoi(optarg);
644 	  max_cost_set = 1;
645 	  break;
646 	case 'H':
647 	  /* Always print filename prefix on output. */
648 	  print_filename = 1;
649 	  break;
650 	case 'I':
651 	  /* Set the cost of an insertion. */
652 	  match_params.cost_ins = atoi(optarg);
653 	  break;
654 	case 'M':
655 	  /* Print delimiters after matches instead of before. */
656 	  delim_after = 2;
657 	  break;
658 	case 'S':
659 	  /* Set the cost of a substitution. */
660 	  match_params.cost_subst = atoi(optarg);
661 	  break;
662 	case 'V':
663 	  {
664 	    /* Print version string and exit. */
665 	    char *version;
666 	    tre_config(TRE_CONFIG_VERSION, &version);
667 	    printf("%s (TRE agrep) %s\n\n", program_name, version);
668 	    printf(_("\
669 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>.\n"));
670 	    printf("\n");
671 	    exit(0);
672 	    break;
673 	  }
674 	case '?':
675 	  /* Ambiguous match or extraneous parameter. */
676 	  break;
677 
678 	case '-':
679 	  /* Emulate some long options on systems which don't
680 	     have getopt_long. */
681 	  if (strcmp(optarg, "color") == 0
682 	      || strcmp(optarg, "colour") == 0)
683 	    color_option = 1;
684 	  else if (strcmp(optarg, "show-position") == 0)
685 	    print_position = 1;
686 	  else if (strcmp(optarg, "help") == 0)
687 	    show_help = 1;
688 	  else
689 	    {
690 	      fprintf(stderr, _("%s: invalid option --%s\n"),
691 		      program_name, optarg);
692 	      exit(2);
693 	    }
694 	  break;
695 
696 #ifdef HAVE_GETOPT_LONG
697 	case COLOR_OPTION:
698 	  color_option = 1;
699 	  break;
700 	case SHOW_POSITION_OPTION:
701 	  print_position = 1;
702 	  break;
703 #endif /* HAVE_GETOPT_LONG */
704 	case 0:
705 	  /* Long options without corresponding short options. */
706 	  break;
707 
708 	default:
709 	  if (c >= '0' && c <= '9')
710 	    match_params.max_cost = c - '0';
711 	  else
712 	    tre_agrep_usage(2);
713 	  max_cost_set = 1;
714 	  break;
715 	}
716     }
717 
718   if (show_help)
719     tre_agrep_usage(0);
720 
721   if (color_option)
722     {
723       char *user_highlight = getenv("GREP_COLOR");
724       if (user_highlight && *user_highlight != '\0')
725 	highlight = user_highlight;
726     }
727 
728   /* Get the pattern. */
729   if (regexp == NULL)
730     {
731       if (optind >= argc)
732 	tre_agrep_usage(2);
733       regexp = argv[optind++];
734     }
735 
736   /* If -k is specified, make the regexp literal.  This uses
737      the \Q and \E extensions.	If the string already contains
738      occurrences of \E, we need to handle them separately.  This is a
739      pain, but can't really be avoided if we want to create a regexp
740      which works together with -w (see below). */
741   if (literal_string)
742     {
743       char *next_pos = regexp;
744       char *new_re, *new_re_end;
745       int n = 0;
746       int len;
747 
748       next_pos = regexp;
749       while (next_pos)
750 	{
751 	  next_pos = strstr(next_pos, "\\E");
752 	  if (next_pos)
753 	    {
754 	      n++;
755 	      next_pos += 2;
756 	    }
757 	}
758 
759       len = strlen(regexp);
760       new_re = malloc(len + 5 + n * 7);
761       if (!new_re)
762 	{
763 	  fprintf(stderr, "%s: %s\n", program_name, _("Out of memory"));
764 	  return 2;
765 	}
766 
767       next_pos = regexp;
768       new_re_end = new_re;
769       strcpy(new_re_end, "\\Q");
770       new_re_end += 2;
771       while (next_pos)
772 	{
773 	  char *start = next_pos;
774 	  next_pos = strstr(next_pos, "\\E");
775 	  if (next_pos)
776 	    {
777 	      strncpy(new_re_end, start, next_pos - start);
778 	      new_re_end += next_pos - start;
779 	      strcpy(new_re_end, "\\E\\\\E\\Q");
780 	      new_re_end += 7;
781 	      next_pos += 2;
782 	    }
783 	  else
784 	    {
785 	      strcpy(new_re_end, start);
786 	      new_re_end += strlen(start);
787 	    }
788 	}
789       strcpy(new_re_end, "\\E");
790       regexp = new_re;
791     }
792 
793   /* If -w is specified, prepend beginning-of-word and end-of-word
794      assertions to the regexp before compiling. */
795   if (word_regexp)
796     {
797       char *tmp = regexp;
798       int len = strlen(tmp);
799       regexp = malloc(len + 7);
800       if (regexp == NULL)
801 	{
802 	  fprintf(stderr, "%s: %s\n", program_name, _("Out of memory"));
803 	  return 2;
804 	}
805       strcpy(regexp, "\\<(");
806       strcpy(regexp + 3, tmp);
807       strcpy(regexp + len + 3, ")\\>");
808     }
809 
810   /* Compile the pattern. */
811   errcode = tre_regcomp(&preg, regexp, comp_flags);
812   if (errcode)
813     {
814       char errbuf[256];
815       tre_regerror(errcode, &preg, errbuf, sizeof(errbuf));
816       fprintf(stderr, "%s: %s: %s\n",
817 	      program_name, _("Error in search pattern"), errbuf);
818       return 2;
819     }
820 
821   /* Compile the record delimiter pattern. */
822   errcode = tre_regcomp(&delim, delim_regexp, REG_EXTENDED | REG_NEWLINE);
823   if (errcode)
824     {
825       char errbuf[256];
826       tre_regerror(errcode, &preg, errbuf, sizeof(errbuf));
827       fprintf(stderr, "%s: %s: %s\n",
828 	      program_name, _("Error in record delimiter pattern"), errbuf);
829       return 2;
830     }
831 
832   if (tre_regexec(&delim, "", 0, NULL, 0) == REG_OK)
833     {
834       fprintf(stderr, "%s: %s\n", program_name,
835 	      _("Record delimiter pattern must not match an empty string"));
836       return 2;
837     }
838 
839   /* The rest of the arguments are file(s) to match. */
840 
841   /* If -h or -H were not specified, print filenames if there are more
842      than one files specified. */
843   if (print_filename == -1)
844     {
845       if (argc - optind <= 1)
846 	print_filename = 0;
847       else
848 	print_filename = 1;
849     }
850 
851   if (optind >= argc)
852     {
853       /* There are no files specified, read from stdin. */
854       tre_agrep_handle_file(NULL);
855     }
856   else if (best_match)
857     {
858       int first_ind = optind;
859 
860       /* Best match mode.  Set up the limits first. */
861       if (!max_cost_set)
862 	match_params.max_cost = INT_MAX;
863       best_cost = INT_MAX;
864 
865       /* Scan all files once without outputting anything, searching
866 	 for the best matches. */
867       while (optind < argc)
868 	tre_agrep_handle_dirent(argv[optind++]);
869 
870       /* If there were no matches, bail out now. */
871       if (best_cost == INT_MAX)
872 	return 1;
873 
874       /* Otherwise, rescan the files with max_cost set to the cost
875 	 of the best match found previously, this time outputting
876 	 the matches. */
877       match_params.max_cost = best_cost;
878       best_match = 2;
879       optind = first_ind;
880       while (optind < argc)
881 	tre_agrep_handle_dirent(argv[optind++]);
882     }
883   else
884     {
885       /* Normal mode. */
886       while (optind < argc)
887 	tre_agrep_handle_dirent(argv[optind++]);
888     }
889 
890   return have_matches == 0;
891 }
892