xref: /openbsd-src/gnu/usr.bin/cvs/diff/diff.c (revision c71bc7e269286e43816004eb0fcd7a55f036cd69)
1 /* GNU DIFF entry routine.
2    Copyright (C) 1988, 1989, 1992, 1993, 1994, 1997, 1998 Free Software Foundation, Inc.
3 
4 This file is part of GNU DIFF.
5 
6 GNU DIFF is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10 
11 GNU DIFF is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 */
17 
18 /* GNU DIFF was written by Mike Haertel, David Hayes,
19    Richard Stallman, Len Tower, and Paul Eggert.  */
20 
21 #define GDIFF_MAIN
22 #include "diff.h"
23 #include <signal.h>
24 #include "getopt.h"
25 #include "fnmatch.h"
26 
27 #ifndef DEFAULT_WIDTH
28 #define DEFAULT_WIDTH 130
29 #endif
30 
31 #ifndef GUTTER_WIDTH_MINIMUM
32 #define GUTTER_WIDTH_MINIMUM 3
33 #endif
34 
35 /* diff.c has a real initialize_main function. */
36 #ifdef initialize_main
37 #undef initialize_main
38 #endif
39 
40 static char const *filetype PARAMS((struct stat const *));
41 static char *option_list PARAMS((char **, int));
42 static int add_exclude_file PARAMS((char const *));
43 static int ck_atoi PARAMS((char const *, int *));
44 static int compare_files PARAMS((char const *, char const *, char const *, char const *, int));
45 static int specify_format PARAMS((char **, char *));
46 static void add_exclude PARAMS((char const *));
47 static void add_regexp PARAMS((struct regexp_list **, char const *));
48 static void specify_style PARAMS((enum output_style));
49 static int try_help PARAMS((char const *));
50 static void check_output PARAMS((FILE *));
51 static void usage PARAMS((void));
52 static void initialize_main PARAMS((int *, char ***));
53 
54 /* Nonzero for -r: if comparing two directories,
55    compare their common subdirectories recursively.  */
56 
57 static int recursive;
58 
59 /* For debugging: don't do discard_confusing_lines.  */
60 
61 int no_discards;
62 
63 #if HAVE_SETMODE
64 /* I/O mode: nonzero only if using binary input/output.  */
65 static int binary_I_O;
66 #endif
67 
68 /* Return a string containing the command options with which diff was invoked.
69    Spaces appear between what were separate ARGV-elements.
70    There is a space at the beginning but none at the end.
71    If there were no options, the result is an empty string.
72 
73    Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT,
74    the length of that vector.  */
75 
76 static char *
77 option_list (optionvec, count)
78      char **optionvec;  /* Was `vector', but that collides on Alliant.  */
79      int count;
80 {
81   int i;
82   size_t length = 0;
83   char *result;
84 
85   for (i = 0; i < count; i++)
86     length += strlen (optionvec[i]) + 1;
87 
88   result = xmalloc (length + 1);
89   result[0] = 0;
90 
91   for (i = 0; i < count; i++)
92     {
93       strcat (result, " ");
94       strcat (result, optionvec[i]);
95     }
96 
97   return result;
98 }
99 
100 /* Convert STR to a positive integer, storing the result in *OUT.
101    If STR is not a valid integer, return -1 (otherwise 0). */
102 static int
103 ck_atoi (str, out)
104      char const *str;
105      int *out;
106 {
107   char const *p;
108   for (p = str; *p; p++)
109     if (*p < '0' || *p > '9')
110       return -1;
111 
112   *out = atoi (optarg);
113   return 0;
114 }
115 
116 /* Keep track of excluded file name patterns.  */
117 
118 static char const **exclude;
119 static int exclude_alloc, exclude_count;
120 
121 int
122 excluded_filename (f)
123      char const *f;
124 {
125   int i;
126   for (i = 0;  i < exclude_count;  i++)
127     if (fnmatch (exclude[i], f, 0) == 0)
128       return 1;
129   return 0;
130 }
131 
132 static void
133 add_exclude (pattern)
134      char const *pattern;
135 {
136   if (exclude_alloc <= exclude_count)
137     exclude = (char const **)
138 	      (exclude_alloc == 0
139 	       ? xmalloc ((exclude_alloc = 64) * sizeof (*exclude))
140 	       : xrealloc (exclude, (exclude_alloc *= 2) * sizeof (*exclude)));
141 
142   exclude[exclude_count++] = pattern;
143 }
144 
145 static int
146 add_exclude_file (name)
147      char const *name;
148 {
149   struct file_data f;
150   char *p, *q, *lim;
151 
152   f.name = optarg;
153   f.desc = (strcmp (optarg, "-") == 0
154 	    ? STDIN_FILENO
155 	    : open (optarg, O_RDONLY, 0));
156   if (f.desc < 0 || fstat (f.desc, &f.stat) != 0)
157     return -1;
158 
159   sip (&f, 1);
160   slurp (&f);
161 
162   for (p = f.buffer, lim = p + f.buffered_chars;  p < lim;  p = q)
163     {
164       q = (char *) memchr (p, '\n', lim - p);
165       if (!q)
166 	q = lim;
167       *q++ = 0;
168       add_exclude (p);
169     }
170 
171   return close (f.desc);
172 }
173 
174 /* The numbers 129- that appear in the fourth element of some entries
175    tell the big switch in `diff_run' how to process those options.  */
176 
177 static struct option const longopts[] =
178 {
179   {"ignore-blank-lines", 0, 0, 'B'},
180   {"context", 2, 0, 'C'},
181   {"ifdef", 1, 0, 'D'},
182   {"show-function-line", 1, 0, 'F'},
183   {"speed-large-files", 0, 0, 'H'},
184   {"ignore-matching-lines", 1, 0, 'I'},
185   {"label", 1, 0, 'L'},
186   {"file-label", 1, 0, 'L'},	/* An alias, no longer recommended */
187   {"new-file", 0, 0, 'N'},
188   {"entire-new-file", 0, 0, 'N'},	/* An alias, no longer recommended */
189   {"unidirectional-new-file", 0, 0, 'P'},
190   {"starting-file", 1, 0, 'S'},
191   {"initial-tab", 0, 0, 'T'},
192   {"width", 1, 0, 'W'},
193   {"text", 0, 0, 'a'},
194   {"ascii", 0, 0, 'a'},		/* An alias, no longer recommended */
195   {"ignore-space-change", 0, 0, 'b'},
196   {"minimal", 0, 0, 'd'},
197   {"ed", 0, 0, 'e'},
198   {"forward-ed", 0, 0, 'f'},
199   {"ignore-case", 0, 0, 'i'},
200   {"paginate", 0, 0, 'l'},
201   {"print", 0, 0, 'l'},		/* An alias, no longer recommended */
202   {"rcs", 0, 0, 'n'},
203   {"show-c-function", 0, 0, 'p'},
204   {"brief", 0, 0, 'q'},
205   {"recursive", 0, 0, 'r'},
206   {"report-identical-files", 0, 0, 's'},
207   {"expand-tabs", 0, 0, 't'},
208   {"version", 0, 0, 'v'},
209   {"ignore-all-space", 0, 0, 'w'},
210   {"exclude", 1, 0, 'x'},
211   {"exclude-from", 1, 0, 'X'},
212   {"side-by-side", 0, 0, 'y'},
213   {"unified", 2, 0, 'U'},
214   {"left-column", 0, 0, 129},
215   {"suppress-common-lines", 0, 0, 130},
216   {"sdiff-merge-assist", 0, 0, 131},
217   {"old-line-format", 1, 0, 132},
218   {"new-line-format", 1, 0, 133},
219   {"unchanged-line-format", 1, 0, 134},
220   {"line-format", 1, 0, 135},
221   {"old-group-format", 1, 0, 136},
222   {"new-group-format", 1, 0, 137},
223   {"unchanged-group-format", 1, 0, 138},
224   {"changed-group-format", 1, 0, 139},
225   {"horizon-lines", 1, 0, 140},
226   {"help", 0, 0, 141},
227   {"binary", 0, 0, 142},
228   {0, 0, 0, 0}
229 };
230 
231 int
232 diff_run (argc, argv, out, callbacks_arg)
233      int argc;
234      char *argv[];
235      char *out;
236      const struct diff_callbacks *callbacks_arg;
237 {
238   int val;
239   int c;
240   int prev = -1;
241   int width = DEFAULT_WIDTH;
242   int show_c_function = 0;
243   int optind_old;
244   int opened_file = 0;
245 
246   callbacks = callbacks_arg;
247 
248   /* Do our initializations.  */
249   initialize_main (&argc, &argv);
250 
251   /* Decode the options.  */
252 
253   optind_old = optind;
254   optind = 0;
255   while ((c = getopt_long (argc, argv,
256 			   "0123456789abBcC:dD:efF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y",
257 			   longopts, 0)) != EOF)
258     {
259       switch (c)
260 	{
261 	  /* All digits combine in decimal to specify the context-size.  */
262 	case '1':
263 	case '2':
264 	case '3':
265 	case '4':
266 	case '5':
267 	case '6':
268 	case '7':
269 	case '8':
270 	case '9':
271 	case '0':
272 	  if (context == -1)
273 	    context = 0;
274 	  /* If a context length has already been specified,
275 	     more digits allowed only if they follow right after the others.
276 	     Reject two separate runs of digits, or digits after -C.  */
277 	  else if (prev < '0' || prev > '9')
278 	    fatal ("context length specified twice");
279 
280 	  context = context * 10 + c - '0';
281 	  break;
282 
283 	case 'a':
284 	  /* Treat all files as text files; never treat as binary.  */
285 	  always_text_flag = 1;
286 	  break;
287 
288 	case 'b':
289 	  /* Ignore changes in amount of white space.  */
290 	  ignore_space_change_flag = 1;
291 	  ignore_some_changes = 1;
292 	  ignore_some_line_changes = 1;
293 	  break;
294 
295 	case 'B':
296 	  /* Ignore changes affecting only blank lines.  */
297 	  ignore_blank_lines_flag = 1;
298 	  ignore_some_changes = 1;
299 	  break;
300 
301 	case 'C':		/* +context[=lines] */
302 	case 'U':		/* +unified[=lines] */
303 	  if (optarg)
304 	    {
305 	      if (context >= 0)
306 		fatal ("context length specified twice");
307 
308 	      if (ck_atoi (optarg, &context))
309 		fatal ("invalid context length argument");
310 	    }
311 
312 	  /* Falls through.  */
313 	case 'c':
314 	  /* Make context-style output.  */
315 	  specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT);
316 	  break;
317 
318 	case 'd':
319 	  /* Don't discard lines.  This makes things slower (sometimes much
320 	     slower) but will find a guaranteed minimal set of changes.  */
321 	  no_discards = 1;
322 	  break;
323 
324 	case 'D':
325 	  /* Make merged #ifdef output.  */
326 	  specify_style (OUTPUT_IFDEF);
327 	  {
328 	    int i, err = 0;
329 	    static char const C_ifdef_group_formats[] =
330 	      "#ifndef %s\n%%<#endif /* not %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c%%=%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n";
331 	    char *b = xmalloc (sizeof (C_ifdef_group_formats)
332 			       + 7 * strlen(optarg) - 14 /* 7*"%s" */
333 			       - 8 /* 5*"%%" + 3*"%c" */);
334 	    sprintf (b, C_ifdef_group_formats,
335 		     optarg, optarg, 0,
336 		     optarg, optarg, 0, 0,
337 		     optarg, optarg, optarg);
338 	    for (i = 0; i < 4; i++)
339 	      {
340 		err |= specify_format (&group_format[i], b);
341 		b += strlen (b) + 1;
342 	      }
343 	    if (err)
344 	      diff_error ("conflicting #ifdef formats", 0, 0);
345 	  }
346 	  break;
347 
348 	case 'e':
349 	  /* Make output that is a valid `ed' script.  */
350 	  specify_style (OUTPUT_ED);
351 	  break;
352 
353 	case 'f':
354 	  /* Make output that looks vaguely like an `ed' script
355 	     but has changes in the order they appear in the file.  */
356 	  specify_style (OUTPUT_FORWARD_ED);
357 	  break;
358 
359 	case 'F':
360 	  /* Show, for each set of changes, the previous line that
361 	     matches the specified regexp.  Currently affects only
362 	     context-style output.  */
363 	  add_regexp (&function_regexp_list, optarg);
364 	  break;
365 
366 	case 'h':
367 	  /* Split the files into chunks of around 1500 lines
368 	     for faster processing.  Usually does not change the result.
369 
370 	     This currently has no effect.  */
371 	  break;
372 
373 	case 'H':
374 	  /* Turn on heuristics that speed processing of large files
375 	     with a small density of changes.  */
376 	  heuristic = 1;
377 	  break;
378 
379 	case 'i':
380 	  /* Ignore changes in case.  */
381 	  ignore_case_flag = 1;
382 	  ignore_some_changes = 1;
383 	  ignore_some_line_changes = 1;
384 	  break;
385 
386 	case 'I':
387 	  /* Ignore changes affecting only lines that match the
388 	     specified regexp.  */
389 	  add_regexp (&ignore_regexp_list, optarg);
390 	  ignore_some_changes = 1;
391 	  break;
392 
393 	case 'l':
394 	  /* Pass the output through `pr' to paginate it.  */
395 	  paginate_flag = 1;
396 #if !defined(SIGCHLD) && defined(SIGCLD)
397 #define SIGCHLD SIGCLD
398 #endif
399 #ifdef SIGCHLD
400 	  /* Pagination requires forking and waiting, and
401 	     System V fork+wait does not work if SIGCHLD is ignored.  */
402 	  signal (SIGCHLD, SIG_DFL);
403 #endif
404 	  break;
405 
406 	case 'L':
407 	  /* Specify file labels for `-c' output headers.  */
408 	  if (!file_label[0])
409 	    file_label[0] = optarg;
410 	  else if (!file_label[1])
411 	    file_label[1] = optarg;
412 	  else
413 	    fatal ("too many file label options");
414 	  break;
415 
416 	case 'n':
417 	  /* Output RCS-style diffs, like `-f' except that each command
418 	     specifies the number of lines affected.  */
419 	  specify_style (OUTPUT_RCS);
420 	  break;
421 
422 	case 'N':
423 	  /* When comparing directories, if a file appears only in one
424 	     directory, treat it as present but empty in the other.  */
425 	  entire_new_file_flag = 1;
426 	  break;
427 
428 	case 'p':
429 	  /* Make context-style output and show name of last C function.  */
430 	  show_c_function = 1;
431 	  add_regexp (&function_regexp_list, "^[_a-zA-Z$]");
432 	  break;
433 
434 	case 'P':
435 	  /* When comparing directories, if a file appears only in
436 	     the second directory of the two,
437 	     treat it as present but empty in the other.  */
438 	  unidirectional_new_file_flag = 1;
439 	  break;
440 
441 	case 'q':
442 	  no_details_flag = 1;
443 	  break;
444 
445 	case 'r':
446 	  /* When comparing directories,
447 	     recursively compare any subdirectories found.  */
448 	  recursive = 1;
449 	  break;
450 
451 	case 's':
452 	  /* Print a message if the files are the same.  */
453 	  print_file_same_flag = 1;
454 	  break;
455 
456 	case 'S':
457 	  /* When comparing directories, start with the specified
458 	     file name.  This is used for resuming an aborted comparison.  */
459 	  dir_start_file = optarg;
460 	  break;
461 
462 	case 't':
463 	  /* Expand tabs to spaces in the output so that it preserves
464 	     the alignment of the input files.  */
465 	  tab_expand_flag = 1;
466 	  break;
467 
468 	case 'T':
469 	  /* Use a tab in the output, rather than a space, before the
470 	     text of an input line, so as to keep the proper alignment
471 	     in the input line without changing the characters in it.  */
472 	  tab_align_flag = 1;
473 	  break;
474 
475 	case 'u':
476 	  /* Output the context diff in unidiff format.  */
477 	  specify_style (OUTPUT_UNIFIED);
478 	  break;
479 
480 	case 'v':
481 	  if (callbacks && callbacks->write_stdout)
482 	    {
483 	      (*callbacks->write_stdout) ("diff - GNU diffutils version ");
484 	      (*callbacks->write_stdout) (diff_version_string);
485 	      (*callbacks->write_stdout) ("\n");
486 	    }
487 	  else
488 	    printf ("diff - GNU diffutils version %s\n", diff_version_string);
489 	  return 0;
490 
491 	case 'w':
492 	  /* Ignore horizontal white space when comparing lines.  */
493 	  ignore_all_space_flag = 1;
494 	  ignore_some_changes = 1;
495 	  ignore_some_line_changes = 1;
496 	  break;
497 
498 	case 'x':
499 	  add_exclude (optarg);
500 	  break;
501 
502 	case 'X':
503 	  if (add_exclude_file (optarg) != 0)
504 	    pfatal_with_name (optarg);
505 	  break;
506 
507 	case 'y':
508 	  /* Use side-by-side (sdiff-style) columnar output. */
509 	  specify_style (OUTPUT_SDIFF);
510 	  break;
511 
512 	case 'W':
513 	  /* Set the line width for OUTPUT_SDIFF.  */
514 	  if (ck_atoi (optarg, &width) || width <= 0)
515 	    fatal ("column width must be a positive integer");
516 	  break;
517 
518 	case 129:
519 	  sdiff_left_only = 1;
520 	  break;
521 
522 	case 130:
523 	  sdiff_skip_common_lines = 1;
524 	  break;
525 
526 	case 131:
527 	  /* sdiff-style columns output. */
528 	  specify_style (OUTPUT_SDIFF);
529 	  sdiff_help_sdiff = 1;
530 	  break;
531 
532 	case 132:
533 	case 133:
534 	case 134:
535 	  specify_style (OUTPUT_IFDEF);
536 	  if (specify_format (&line_format[c - 132], optarg) != 0)
537 	    diff_error ("conflicting line format", 0, 0);
538 	  break;
539 
540 	case 135:
541 	  specify_style (OUTPUT_IFDEF);
542 	  {
543 	    int i, err = 0;
544 	    for (i = 0; i < sizeof (line_format) / sizeof (*line_format); i++)
545 	      err |= specify_format (&line_format[i], optarg);
546 	    if (err)
547 	      diff_error ("conflicting line format", 0, 0);
548 	  }
549 	  break;
550 
551 	case 136:
552 	case 137:
553 	case 138:
554 	case 139:
555 	  specify_style (OUTPUT_IFDEF);
556 	  if (specify_format (&group_format[c - 136], optarg) != 0)
557 	    diff_error ("conflicting group format", 0, 0);
558 	  break;
559 
560 	case 140:
561 	  if (ck_atoi (optarg, &horizon_lines) || horizon_lines < 0)
562 	    fatal ("horizon must be a nonnegative integer");
563 	  break;
564 
565 	case 141:
566 	  usage ();
567 	  if (! callbacks || ! callbacks->write_stdout)
568 	    check_output (stdout);
569 	  return 0;
570 
571 	case 142:
572 	  /* Use binary I/O when reading and writing data.
573 	     On Posix hosts, this has no effect.  */
574 #if HAVE_SETMODE
575 	  binary_I_O = 1;
576 #  if 0
577 	  /* Because this code is leftover from pre-library days,
578 	     there is no way to set stdout back to the default mode
579 	     when we are done.  As it turns out, I think the only
580 	     parts of CVS that pass out == NULL, and thus cause diff
581 	     to write to stdout, are "cvs diff" and "cvs rdiff".  So
582 	     I'm not going to worry about this too much yet.  */
583 	  setmode (STDOUT_FILENO, O_BINARY);
584 #  else
585 	  if (out == NULL)
586 	    error (0, 0, "warning: did not set stdout to binary mode");
587 #  endif
588 #endif
589 	  break;
590 
591 	default:
592 	  return try_help (0);
593 	}
594       prev = c;
595     }
596 
597   if (argc - optind != 2)
598     return try_help (argc - optind < 2 ? "missing operand" : "extra operand");
599 
600   {
601     /*
602      *	We maximize first the half line width, and then the gutter width,
603      *	according to the following constraints:
604      *	1.  Two half lines plus a gutter must fit in a line.
605      *	2.  If the half line width is nonzero:
606      *	    a.  The gutter width is at least GUTTER_WIDTH_MINIMUM.
607      *	    b.  If tabs are not expanded to spaces,
608      *		a half line plus a gutter is an integral number of tabs,
609      *		so that tabs in the right column line up.
610      */
611     int t = tab_expand_flag ? 1 : TAB_WIDTH;
612     int off = (width + t + GUTTER_WIDTH_MINIMUM) / (2*t)  *  t;
613     sdiff_half_width = max (0, min (off - GUTTER_WIDTH_MINIMUM, width - off)),
614     sdiff_column2_offset = sdiff_half_width ? off : width;
615   }
616 
617   if (show_c_function && output_style != OUTPUT_UNIFIED)
618     specify_style (OUTPUT_CONTEXT);
619 
620   if (output_style != OUTPUT_CONTEXT && output_style != OUTPUT_UNIFIED)
621     context = 0;
622   else if (context == -1)
623     /* Default amount of context for -c.  */
624     context = 3;
625 
626   if (output_style == OUTPUT_IFDEF)
627     {
628       /* Format arrays are char *, not char const *,
629 	 because integer formats are temporarily modified.
630 	 But it is safe to assign a constant like "%=" to a format array,
631 	 since "%=" does not format any integers.  */
632       int i;
633       for (i = 0; i < sizeof (line_format) / sizeof (*line_format); i++)
634 	if (!line_format[i])
635 	  line_format[i] = "%l\n";
636       if (!group_format[OLD])
637 	group_format[OLD]
638 	  = group_format[UNCHANGED] ? group_format[UNCHANGED] : "%<";
639       if (!group_format[NEW])
640 	group_format[NEW]
641 	  = group_format[UNCHANGED] ? group_format[UNCHANGED] : "%>";
642       if (!group_format[UNCHANGED])
643 	group_format[UNCHANGED] = "%=";
644       if (!group_format[CHANGED])
645 	group_format[CHANGED] = concat (group_format[OLD],
646 					group_format[NEW], "");
647     }
648 
649   no_diff_means_no_output =
650     (output_style == OUTPUT_IFDEF ?
651       (!*group_format[UNCHANGED]
652        || (strcmp (group_format[UNCHANGED], "%=") == 0
653 	   && !*line_format[UNCHANGED]))
654      : output_style == OUTPUT_SDIFF ? sdiff_skip_common_lines : 1);
655 
656   switch_string = option_list (argv + 1, optind - 1);
657 
658   if (callbacks && callbacks->write_output)
659     {
660       if (out != NULL)
661 	{
662 	  diff_error ("write callback with output file", 0, 0);
663 	  return 2;
664 	}
665     }
666   else
667     {
668       if (out == NULL)
669 	outfile = stdout;
670       else
671 	{
672 #if HAVE_SETMODE
673 	  /* A diff which is full of ^Z and such isn't going to work
674 	     very well in text mode.  */
675 	  if (binary_I_O)
676 	    outfile = fopen (out, "wb");
677 	  else
678 #endif
679 	    outfile = fopen (out, "w");
680 	  if (outfile == NULL)
681 	    {
682 	      perror_with_name ("could not open output file");
683 	      return 2;
684 	    }
685 	  opened_file = 1;
686 	}
687     }
688 
689   /* Set the jump buffer, so that diff may abort execution without
690      terminating the process. */
691   if ((val = setjmp (diff_abort_buf)) != 0)
692     {
693       optind = optind_old;
694       if (opened_file)
695 	fclose (outfile);
696       return val;
697     }
698 
699   val = compare_files (0, argv[optind], 0, argv[optind + 1], 0);
700 
701   /* Print any messages that were saved up for last.  */
702   print_message_queue ();
703 
704   free (switch_string);
705 
706   optind = optind_old;
707 
708   if (! callbacks || ! callbacks->write_output)
709     check_output (outfile);
710 
711   if (opened_file)
712     if (fclose (outfile) != 0)
713 	perror_with_name ("close error on output file");
714 
715   return val;
716 }
717 
718 /* Add the compiled form of regexp PATTERN to REGLIST.  */
719 
720 static void
721 add_regexp (reglist, pattern)
722      struct regexp_list **reglist;
723      char const *pattern;
724 {
725   struct regexp_list *r;
726   char const *m;
727 
728   r = (struct regexp_list *) xmalloc (sizeof (*r));
729   bzero (r, sizeof (*r));
730   r->buf.fastmap = xmalloc (256);
731   m = re_compile_pattern (pattern, strlen (pattern), &r->buf);
732   if (m != 0)
733     diff_error ("%s: %s", pattern, m);
734 
735   /* Add to the start of the list, since it's easier than the end.  */
736   r->next = *reglist;
737   *reglist = r;
738 }
739 
740 static int
741 try_help (reason)
742      char const *reason;
743 {
744   if (reason)
745     diff_error ("%s", reason, 0);
746   diff_error ("Try `%s --help' for more information.", diff_program_name, 0);
747   return 2;
748 }
749 
750 static void
751 check_output (file)
752     FILE *file;
753 {
754   if (ferror (file) || fflush (file) != 0)
755     fatal ("write error");
756 }
757 
758 static char const * const option_help[] = {
759 "-i  --ignore-case  Consider upper- and lower-case to be the same.",
760 "-w  --ignore-all-space  Ignore all white space.",
761 "-b  --ignore-space-change  Ignore changes in the amount of white space.",
762 "-B  --ignore-blank-lines  Ignore changes whose lines are all blank.",
763 "-I RE  --ignore-matching-lines=RE  Ignore changes whose lines all match RE.",
764 #if HAVE_SETMODE
765 "--binary  Read and write data in binary mode.",
766 #endif
767 "-a  --text  Treat all files as text.\n",
768 "-c  -C NUM  --context[=NUM]  Output NUM (default 2) lines of copied context.",
769 "-u  -U NUM  --unified[=NUM]  Output NUM (default 2) lines of unified context.",
770 "  -NUM  Use NUM context lines.",
771 "  -L LABEL  --label LABEL  Use LABEL instead of file name.",
772 "  -p  --show-c-function  Show which C function each change is in.",
773 "  -F RE  --show-function-line=RE  Show the most recent line matching RE.",
774 "-q  --brief  Output only whether files differ.",
775 "-e  --ed  Output an ed script.",
776 "-n  --rcs  Output an RCS format diff.",
777 "-y  --side-by-side  Output in two columns.",
778 "  -w NUM  --width=NUM  Output at most NUM (default 130) characters per line.",
779 "  --left-column  Output only the left column of common lines.",
780 "  --suppress-common-lines  Do not output common lines.",
781 "-DNAME  --ifdef=NAME  Output merged file to show `#ifdef NAME' diffs.",
782 "--GTYPE-group-format=GFMT  Similar, but format GTYPE input groups with GFMT.",
783 "--line-format=LFMT  Similar, but format all input lines with LFMT.",
784 "--LTYPE-line-format=LFMT  Similar, but format LTYPE input lines with LFMT.",
785 "  LTYPE is `old', `new', or `unchanged'.  GTYPE is LTYPE or `changed'.",
786 "  GFMT may contain:",
787 "    %<  lines from FILE1",
788 "    %>  lines from FILE2",
789 "    %=  lines common to FILE1 and FILE2",
790 "    %[-][WIDTH][.[PREC]]{doxX}LETTER  printf-style spec for LETTER",
791 "      LETTERs are as follows for new group, lower case for old group:",
792 "        F  first line number",
793 "        L  last line number",
794 "        N  number of lines = L-F+1",
795 "        E  F-1",
796 "        M  L+1",
797 "  LFMT may contain:",
798 "    %L  contents of line",
799 "    %l  contents of line, excluding any trailing newline",
800 "    %[-][WIDTH][.[PREC]]{doxX}n  printf-style spec for input line number",
801 "  Either GFMT or LFMT may contain:",
802 "    %%  %",
803 "    %c'C'  the single character C",
804 "    %c'\\OOO'  the character with octal code OOO\n",
805 "-l  --paginate  Pass the output through `pr' to paginate it.",
806 "-t  --expand-tabs  Expand tabs to spaces in output.",
807 "-T  --initial-tab  Make tabs line up by prepending a tab.\n",
808 "-r  --recursive  Recursively compare any subdirectories found.",
809 "-N  --new-file  Treat absent files as empty.",
810 "-P  --unidirectional-new-file  Treat absent first files as empty.",
811 "-s  --report-identical-files  Report when two files are the same.",
812 "-x PAT  --exclude=PAT  Exclude files that match PAT.",
813 "-X FILE  --exclude-from=FILE  Exclude files that match any pattern in FILE.",
814 "-S FILE  --starting-file=FILE  Start with FILE when comparing directories.\n",
815 "--horizon-lines=NUM  Keep NUM lines of the common prefix and suffix.",
816 "-d  --minimal  Try hard to find a smaller set of changes.",
817 "-H  --speed-large-files  Assume large files and many scattered small changes.\n",
818 "-v  --version  Output version info.",
819 "--help  Output this help.",
820 0
821 };
822 
823 static void
824 usage ()
825 {
826   char const * const *p;
827 
828   if (callbacks && callbacks->write_stdout)
829     {
830       (*callbacks->write_stdout) ("Usage: ");
831       (*callbacks->write_stdout) (diff_program_name);
832       (*callbacks->write_stdout) (" [OPTION]... FILE1 FILE2\n\n");
833       for (p = option_help;  *p;  p++)
834 	{
835 	  (*callbacks->write_stdout) ("  ");
836 	  (*callbacks->write_stdout) (*p);
837 	  (*callbacks->write_stdout) ("\n");
838 	}
839       (*callbacks->write_stdout)
840 	("\nIf FILE1 or FILE2 is `-', read standard input.\n");
841     }
842   else
843     {
844       printf ("Usage: %s [OPTION]... FILE1 FILE2\n\n", diff_program_name);
845       for (p = option_help;  *p;  p++)
846 	printf ("  %s\n", *p);
847       printf ("\nIf FILE1 or FILE2 is `-', read standard input.\n");
848     }
849 }
850 
851 static int
852 specify_format (var, value)
853      char **var;
854      char *value;
855 {
856   int err = *var ? strcmp (*var, value) : 0;
857   *var = value;
858   return err;
859 }
860 
861 static void
862 specify_style (style)
863      enum output_style style;
864 {
865   if (output_style != OUTPUT_NORMAL
866       && output_style != style)
867     diff_error ("conflicting specifications of output style", 0, 0);
868   output_style = style;
869 }
870 
871 static char const *
872 filetype (st)
873      struct stat const *st;
874 {
875   /* See Posix.2 section 4.17.6.1.1 and Table 5-1 for these formats.
876      To keep diagnostics grammatical, the returned string must start
877      with a consonant.  */
878 
879   if (S_ISREG (st->st_mode))
880     {
881       if (st->st_size == 0)
882 	return "regular empty file";
883       /* Posix.2 section 5.14.2 seems to suggest that we must read the file
884 	 and guess whether it's C, Fortran, etc., but this is somewhat useless
885 	 and doesn't reflect historical practice.  We're allowed to guess
886 	 wrong, so we don't bother to read the file.  */
887       return "regular file";
888     }
889   if (S_ISDIR (st->st_mode)) return "directory";
890 
891   /* other Posix.1 file types */
892 #ifdef S_ISBLK
893   if (S_ISBLK (st->st_mode)) return "block special file";
894 #endif
895 #ifdef S_ISCHR
896   if (S_ISCHR (st->st_mode)) return "character special file";
897 #endif
898 #ifdef S_ISFIFO
899   if (S_ISFIFO (st->st_mode)) return "fifo";
900 #endif
901 
902   /* other Posix.1b file types */
903 #ifdef S_TYPEISMQ
904   if (S_TYPEISMQ (st)) return "message queue";
905 #endif
906 #ifdef S_TYPEISSEM
907   if (S_TYPEISSEM (st)) return "semaphore";
908 #endif
909 #ifdef S_TYPEISSHM
910   if (S_TYPEISSHM (st)) return "shared memory object";
911 #endif
912 
913   /* other popular file types */
914   /* S_ISLNK is impossible with `fstat' and `stat'.  */
915 #ifdef S_ISSOCK
916   if (S_ISSOCK (st->st_mode)) return "socket";
917 #endif
918 
919   return "weird file";
920 }
921 
922 /* Compare two files (or dirs) with specified names
923    DIR0/NAME0 and DIR1/NAME1, at level DEPTH in directory recursion.
924    (if DIR0 is 0, then the name is just NAME0, etc.)
925    This is self-contained; it opens the files and closes them.
926 
927    Value is 0 if files are the same, 1 if different,
928    2 if there is a problem opening them.  */
929 
930 static int
931 compare_files (dir0, name0, dir1, name1, depth)
932      char const *dir0, *dir1;
933      char const *name0, *name1;
934      int depth;
935 {
936   struct file_data inf[2];
937   register int i;
938   int val;
939   int same_files;
940   int failed = 0;
941   char *free0 = 0, *free1 = 0;
942 
943   /* If this is directory comparison, perhaps we have a file
944      that exists only in one of the directories.
945      If so, just print a message to that effect.  */
946 
947   if (! ((name0 != 0 && name1 != 0)
948 	 || (unidirectional_new_file_flag && name1 != 0)
949 	 || entire_new_file_flag))
950     {
951       char const *name = name0 == 0 ? name1 : name0;
952       char const *dir = name0 == 0 ? dir1 : dir0;
953       message ("Only in %s: %s\n", dir, name);
954       /* Return 1 so that diff_dirs will return 1 ("some files differ").  */
955       return 1;
956     }
957 
958   bzero (inf, sizeof (inf));
959 
960   /* Mark any nonexistent file with -1 in the desc field.  */
961   /* Mark unopened files (e.g. directories) with -2. */
962 
963   inf[0].desc = name0 == 0 ? -1 : -2;
964   inf[1].desc = name1 == 0 ? -1 : -2;
965 
966   /* Now record the full name of each file, including nonexistent ones.  */
967 
968   if (name0 == 0)
969     name0 = name1;
970   if (name1 == 0)
971     name1 = name0;
972 
973   inf[0].name = dir0 == 0 ? name0 : (free0 = dir_file_pathname (dir0, name0));
974   inf[1].name = dir1 == 0 ? name1 : (free1 = dir_file_pathname (dir1, name1));
975 
976   /* Stat the files.  Record whether they are directories.  */
977 
978   for (i = 0; i <= 1; i++)
979     {
980       if (inf[i].desc != -1)
981 	{
982 	  int stat_result;
983 
984 	  if (i && filename_cmp (inf[i].name, inf[0].name) == 0)
985 	    {
986 	      inf[i].stat = inf[0].stat;
987 	      stat_result = 0;
988 	    }
989 	  else if (strcmp (inf[i].name, "-") == 0)
990 	    {
991 	      inf[i].desc = STDIN_FILENO;
992 	      stat_result = fstat (STDIN_FILENO, &inf[i].stat);
993 	      if (stat_result == 0 && S_ISREG (inf[i].stat.st_mode))
994 		{
995 		  off_t pos = lseek (STDIN_FILENO, (off_t) 0, SEEK_CUR);
996 		  if (pos == -1)
997 		    stat_result = -1;
998 		  else
999 		    {
1000 		      if (pos <= inf[i].stat.st_size)
1001 			inf[i].stat.st_size -= pos;
1002 		      else
1003 			inf[i].stat.st_size = 0;
1004 		      /* Posix.2 4.17.6.1.4 requires current time for stdin.  */
1005 		      time (&inf[i].stat.st_mtime);
1006 		    }
1007 		}
1008 	    }
1009 	  else
1010 	    stat_result = stat (inf[i].name, &inf[i].stat);
1011 
1012 	  if (stat_result != 0)
1013 	    {
1014 	      perror_with_name (inf[i].name);
1015 	      failed = 1;
1016 	    }
1017 	  else
1018 	    {
1019 	      inf[i].dir_p = S_ISDIR (inf[i].stat.st_mode) && inf[i].desc != 0;
1020 	      if (inf[1 - i].desc == -1)
1021 		{
1022 		  inf[1 - i].dir_p = inf[i].dir_p;
1023 		  inf[1 - i].stat.st_mode = inf[i].stat.st_mode;
1024 		}
1025 	    }
1026 	}
1027     }
1028 
1029   if (! failed && depth == 0 && inf[0].dir_p != inf[1].dir_p)
1030     {
1031       /* If one is a directory, and it was specified in the command line,
1032 	 use the file in that dir with the other file's basename.  */
1033 
1034       int fnm_arg = inf[0].dir_p;
1035       int dir_arg = 1 - fnm_arg;
1036       char const *fnm = inf[fnm_arg].name;
1037       char const *dir = inf[dir_arg].name;
1038       char const *p = filename_lastdirchar (fnm);
1039       char const *filename = inf[dir_arg].name
1040 	= dir_file_pathname (dir, p ? p + 1 : fnm);
1041 
1042       if (strcmp (fnm, "-") == 0)
1043 	fatal ("can't compare - to a directory");
1044 
1045       if (stat (filename, &inf[dir_arg].stat) != 0)
1046 	{
1047 	  perror_with_name (filename);
1048 	  failed = 1;
1049 	}
1050       else
1051 	inf[dir_arg].dir_p = S_ISDIR (inf[dir_arg].stat.st_mode);
1052     }
1053 
1054   if (failed)
1055     {
1056 
1057       /* If either file should exist but does not, return 2.  */
1058 
1059       val = 2;
1060 
1061     }
1062   else if ((same_files = inf[0].desc != -1 && inf[1].desc != -1
1063 			 && 0 < same_file (&inf[0].stat, &inf[1].stat))
1064 	   && no_diff_means_no_output)
1065     {
1066       /* The two named files are actually the same physical file.
1067 	 We know they are identical without actually reading them.  */
1068 
1069       val = 0;
1070     }
1071   else if (inf[0].dir_p & inf[1].dir_p)
1072     {
1073       if (output_style == OUTPUT_IFDEF)
1074 	fatal ("-D option not supported with directories");
1075 
1076       /* If both are directories, compare the files in them.  */
1077 
1078       if (depth > 0 && !recursive)
1079 	{
1080 	  /* But don't compare dir contents one level down
1081 	     unless -r was specified.  */
1082 	  message ("Common subdirectories: %s and %s\n",
1083 		   inf[0].name, inf[1].name);
1084 	  val = 0;
1085 	}
1086       else
1087 	{
1088 	  val = diff_dirs (inf, compare_files, depth);
1089 	}
1090 
1091     }
1092   else if ((inf[0].dir_p | inf[1].dir_p)
1093 	   || (depth > 0
1094 	       && (! S_ISREG (inf[0].stat.st_mode)
1095 		   || ! S_ISREG (inf[1].stat.st_mode))))
1096     {
1097       /* Perhaps we have a subdirectory that exists only in one directory.
1098 	 If so, just print a message to that effect.  */
1099 
1100       if (inf[0].desc == -1 || inf[1].desc == -1)
1101 	{
1102 	  if ((inf[0].dir_p | inf[1].dir_p)
1103 	      && recursive
1104 	      && (entire_new_file_flag
1105 		  || (unidirectional_new_file_flag && inf[0].desc == -1)))
1106 	    val = diff_dirs (inf, compare_files, depth);
1107 	  else
1108 	    {
1109 	      char const *dir = (inf[0].desc == -1) ? dir1 : dir0;
1110 	      /* See Posix.2 section 4.17.6.1.1 for this format.  */
1111 	      message ("Only in %s: %s\n", dir, name0);
1112 	      val = 1;
1113 	    }
1114 	}
1115       else
1116 	{
1117 	  /* We have two files that are not to be compared.  */
1118 
1119 	  /* See Posix.2 section 4.17.6.1.1 for this format.  */
1120 	  message5 ("File %s is a %s while file %s is a %s\n",
1121 		    inf[0].name, filetype (&inf[0].stat),
1122 		    inf[1].name, filetype (&inf[1].stat));
1123 
1124 	  /* This is a difference.  */
1125 	  val = 1;
1126 	}
1127     }
1128   else if ((no_details_flag & ~ignore_some_changes)
1129 	   && inf[0].stat.st_size != inf[1].stat.st_size
1130 	   && (inf[0].desc == -1 || S_ISREG (inf[0].stat.st_mode))
1131 	   && (inf[1].desc == -1 || S_ISREG (inf[1].stat.st_mode)))
1132     {
1133       message ("Files %s and %s differ\n", inf[0].name, inf[1].name);
1134       val = 1;
1135     }
1136   else
1137     {
1138       /* Both exist and neither is a directory.  */
1139 
1140       /* Open the files and record their descriptors.  */
1141 
1142       if (inf[0].desc == -2)
1143 	if ((inf[0].desc = open (inf[0].name, O_RDONLY, 0)) < 0)
1144 	  {
1145 	    perror_with_name (inf[0].name);
1146 	    failed = 1;
1147 	  }
1148       if (inf[1].desc == -2)
1149 	if (same_files)
1150 	  inf[1].desc = inf[0].desc;
1151 	else if ((inf[1].desc = open (inf[1].name, O_RDONLY, 0)) < 0)
1152 	  {
1153 	    perror_with_name (inf[1].name);
1154 	    failed = 1;
1155 	  }
1156 
1157 #if HAVE_SETMODE
1158       if (binary_I_O)
1159 	for (i = 0; i <= 1; i++)
1160 	  if (0 <= inf[i].desc)
1161 	    setmode (inf[i].desc, O_BINARY);
1162 #endif
1163 
1164       /* Compare the files, if no error was found.  */
1165 
1166       val = failed ? 2 : diff_2_files (inf, depth);
1167 
1168       /* Close the file descriptors.  */
1169 
1170       if (inf[0].desc >= 0 && close (inf[0].desc) != 0)
1171 	{
1172 	  perror_with_name (inf[0].name);
1173 	  val = 2;
1174 	}
1175       if (inf[1].desc >= 0 && inf[0].desc != inf[1].desc
1176 	  && close (inf[1].desc) != 0)
1177 	{
1178 	  perror_with_name (inf[1].name);
1179 	  val = 2;
1180 	}
1181     }
1182 
1183   /* Now the comparison has been done, if no error prevented it,
1184      and VAL is the value this function will return.  */
1185 
1186   if (val == 0 && !inf[0].dir_p)
1187     {
1188       if (print_file_same_flag)
1189 	message ("Files %s and %s are identical\n",
1190 		 inf[0].name, inf[1].name);
1191     }
1192   else
1193     flush_output ();
1194 
1195   if (free0)
1196     free (free0);
1197   if (free1)
1198     free (free1);
1199 
1200   return val;
1201 }
1202 
1203 /* Initialize status variables and flag variables used in libdiff,
1204    to permit repeated calls to diff_run. */
1205 
1206 static void
1207 initialize_main (argcp, argvp)
1208     int *argcp;
1209     char ***argvp;
1210 {
1211   /* These variables really must be reset each time diff_run is called. */
1212   output_style = OUTPUT_NORMAL;
1213   context = -1;
1214   file_label[0] = NULL;
1215   file_label[1] = NULL;
1216   diff_program_name = (*argvp)[0];
1217   outfile = NULL;
1218 
1219   /* Reset these also, just for safety's sake. (If one invocation turns
1220      on ignore_case_flag, it must be turned off before diff_run is called
1221      again.  But it is possible to make many diffs before encountering
1222      such a problem. */
1223   recursive = 0;
1224   no_discards = 0;
1225 #if HAVE_SETMODE
1226   binary_I_O = 0;
1227 #endif
1228   no_diff_means_no_output = 0;
1229   always_text_flag = 0;
1230   horizon_lines = 0;
1231   ignore_space_change_flag = 0;
1232   ignore_all_space_flag = 0;
1233   ignore_blank_lines_flag = 0;
1234   ignore_some_line_changes = 0;
1235   ignore_some_changes = 0;
1236   ignore_case_flag = 0;
1237   function_regexp_list = NULL;
1238   ignore_regexp_list = NULL;
1239   no_details_flag = 0;
1240   print_file_same_flag = 0;
1241   tab_align_flag = 0;
1242   tab_expand_flag = 0;
1243   dir_start_file = NULL;
1244   entire_new_file_flag = 0;
1245   unidirectional_new_file_flag = 0;
1246   paginate_flag = 0;
1247   bzero (group_format, sizeof (group_format));
1248   bzero (line_format, sizeof (line_format));
1249   sdiff_help_sdiff = 0;
1250   sdiff_left_only = 0;
1251   sdiff_skip_common_lines = 0;
1252   sdiff_half_width = 0;
1253   sdiff_column2_offset = 0;
1254   switch_string = NULL;
1255   heuristic = 0;
1256   bzero (files, sizeof (files));
1257 }
1258