xref: /openbsd-src/gnu/usr.bin/cvs/diff/diff.c (revision b2346922a76a50a89e33beab4ebbc0950de8a8df)
1 /* GNU DIFF entry routine.
2    Copyright (C) 1988, 1989, 1992, 1993, 1994, 1997, 1998 Free Software Foundation, Inc.
3 
4 This file is part of GNU DIFF.
5 
6 GNU DIFF is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10 
11 GNU DIFF is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GNU DIFF; see the file COPYING.  If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
19 
20 /* GNU DIFF was written by Mike Haertel, David Hayes,
21    Richard Stallman, Len Tower, and Paul Eggert.  */
22 
23 #define GDIFF_MAIN
24 #include "diff.h"
25 #include <signal.h>
26 #include "getopt.h"
27 #include "fnmatch.h"
28 
29 #ifndef DEFAULT_WIDTH
30 #define DEFAULT_WIDTH 130
31 #endif
32 
33 #ifndef GUTTER_WIDTH_MINIMUM
34 #define GUTTER_WIDTH_MINIMUM 3
35 #endif
36 
37 /* diff.c has a real initialize_main function. */
38 #ifdef initialize_main
39 #undef initialize_main
40 #endif
41 
42 static char const *filetype PARAMS((struct stat const *));
43 static char *option_list PARAMS((char **, int));
44 static int add_exclude_file PARAMS((char const *));
45 static int ck_atoi PARAMS((char const *, int *));
46 static int compare_files PARAMS((char const *, char const *, char const *, char const *, int));
47 static int specify_format PARAMS((char **, char *));
48 static void add_exclude PARAMS((char const *));
49 static void add_regexp PARAMS((struct regexp_list **, char const *));
50 static void specify_style PARAMS((enum output_style));
51 static int try_help PARAMS((char const *));
52 static void check_output PARAMS((FILE *));
53 static void usage PARAMS((void));
54 static void initialize_main PARAMS((int *, char ***));
55 
56 /* Nonzero for -r: if comparing two directories,
57    compare their common subdirectories recursively.  */
58 
59 static int recursive;
60 
61 /* For debugging: don't do discard_confusing_lines.  */
62 
63 int no_discards;
64 
65 #if HAVE_SETMODE
66 /* I/O mode: nonzero only if using binary input/output.  */
67 static int binary_I_O;
68 #endif
69 
70 /* Return a string containing the command options with which diff was invoked.
71    Spaces appear between what were separate ARGV-elements.
72    There is a space at the beginning but none at the end.
73    If there were no options, the result is an empty string.
74 
75    Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT,
76    the length of that vector.  */
77 
78 static char *
79 option_list (optionvec, count)
80      char **optionvec;  /* Was `vector', but that collides on Alliant.  */
81      int count;
82 {
83   int i;
84   size_t length = 0;
85   char *result;
86 
87   for (i = 0; i < count; i++)
88     length += strlen (optionvec[i]) + 1;
89 
90   result = xmalloc (length + 1);
91   result[0] = 0;
92 
93   for (i = 0; i < count; i++)
94     {
95       strcat (result, " ");
96       strcat (result, optionvec[i]);
97     }
98 
99   return result;
100 }
101 
102 /* Convert STR to a positive integer, storing the result in *OUT.
103    If STR is not a valid integer, return -1 (otherwise 0). */
104 static int
105 ck_atoi (str, out)
106      char const *str;
107      int *out;
108 {
109   char const *p;
110   for (p = str; *p; p++)
111     if (*p < '0' || *p > '9')
112       return -1;
113 
114   *out = atoi (optarg);
115   return 0;
116 }
117 
118 /* Keep track of excluded file name patterns.  */
119 
120 static char const **exclude;
121 static int exclude_alloc, exclude_count;
122 
123 int
124 excluded_filename (f)
125      char const *f;
126 {
127   int i;
128   for (i = 0;  i < exclude_count;  i++)
129     if (fnmatch (exclude[i], f, 0) == 0)
130       return 1;
131   return 0;
132 }
133 
134 static void
135 add_exclude (pattern)
136      char const *pattern;
137 {
138   if (exclude_alloc <= exclude_count)
139     exclude = (char const **)
140 	      (exclude_alloc == 0
141 	       ? xmalloc ((exclude_alloc = 64) * sizeof (*exclude))
142 	       : xrealloc (exclude, (exclude_alloc *= 2) * sizeof (*exclude)));
143 
144   exclude[exclude_count++] = pattern;
145 }
146 
147 static int
148 add_exclude_file (name)
149      char const *name;
150 {
151   struct file_data f;
152   char *p, *q, *lim;
153 
154   f.name = optarg;
155   f.desc = (strcmp (optarg, "-") == 0
156 	    ? STDIN_FILENO
157 	    : open (optarg, O_RDONLY, 0));
158   if (f.desc < 0 || fstat (f.desc, &f.stat) != 0)
159     return -1;
160 
161   sip (&f, 1);
162   slurp (&f);
163 
164   for (p = f.buffer, lim = p + f.buffered_chars;  p < lim;  p = q)
165     {
166       q = (char *) memchr (p, '\n', lim - p);
167       if (!q)
168 	q = lim;
169       *q++ = 0;
170       add_exclude (p);
171     }
172 
173   return close (f.desc);
174 }
175 
176 /* The numbers 129- that appear in the fourth element of some entries
177    tell the big switch in `diff_run' how to process those options.  */
178 
179 static struct option const longopts[] =
180 {
181   {"ignore-blank-lines", 0, 0, 'B'},
182   {"context", 2, 0, 'C'},
183   {"ifdef", 1, 0, 'D'},
184   {"show-function-line", 1, 0, 'F'},
185   {"speed-large-files", 0, 0, 'H'},
186   {"ignore-matching-lines", 1, 0, 'I'},
187   {"label", 1, 0, 'L'},
188   {"file-label", 1, 0, 'L'},	/* An alias, no longer recommended */
189   {"new-file", 0, 0, 'N'},
190   {"entire-new-file", 0, 0, 'N'},	/* An alias, no longer recommended */
191   {"unidirectional-new-file", 0, 0, 'P'},
192   {"starting-file", 1, 0, 'S'},
193   {"initial-tab", 0, 0, 'T'},
194   {"width", 1, 0, 'W'},
195   {"text", 0, 0, 'a'},
196   {"ascii", 0, 0, 'a'},		/* An alias, no longer recommended */
197   {"ignore-space-change", 0, 0, 'b'},
198   {"minimal", 0, 0, 'd'},
199   {"ed", 0, 0, 'e'},
200   {"forward-ed", 0, 0, 'f'},
201   {"ignore-case", 0, 0, 'i'},
202   {"paginate", 0, 0, 'l'},
203   {"print", 0, 0, 'l'},		/* An alias, no longer recommended */
204   {"rcs", 0, 0, 'n'},
205   {"show-c-function", 0, 0, 'p'},
206   {"brief", 0, 0, 'q'},
207   {"recursive", 0, 0, 'r'},
208   {"report-identical-files", 0, 0, 's'},
209   {"expand-tabs", 0, 0, 't'},
210   {"version", 0, 0, 'v'},
211   {"ignore-all-space", 0, 0, 'w'},
212   {"exclude", 1, 0, 'x'},
213   {"exclude-from", 1, 0, 'X'},
214   {"side-by-side", 0, 0, 'y'},
215   {"unified", 2, 0, 'U'},
216   {"left-column", 0, 0, 129},
217   {"suppress-common-lines", 0, 0, 130},
218   {"sdiff-merge-assist", 0, 0, 131},
219   {"old-line-format", 1, 0, 132},
220   {"new-line-format", 1, 0, 133},
221   {"unchanged-line-format", 1, 0, 134},
222   {"line-format", 1, 0, 135},
223   {"old-group-format", 1, 0, 136},
224   {"new-group-format", 1, 0, 137},
225   {"unchanged-group-format", 1, 0, 138},
226   {"changed-group-format", 1, 0, 139},
227   {"horizon-lines", 1, 0, 140},
228   {"help", 0, 0, 141},
229   {"binary", 0, 0, 142},
230   {0, 0, 0, 0}
231 };
232 
233 int
234 diff_run (argc, argv, out, callbacks_arg)
235      int argc;
236      char *argv[];
237      char *out;
238      const struct diff_callbacks *callbacks_arg;
239 {
240   int val;
241   int c;
242   int prev = -1;
243   int width = DEFAULT_WIDTH;
244   int show_c_function = 0;
245   int optind_old;
246   int opened_file = 0;
247 
248   callbacks = callbacks_arg;
249 
250   /* Do our initializations.  */
251   initialize_main (&argc, &argv);
252 
253   /* Decode the options.  */
254 
255   optind_old = optind;
256   optind = 0;
257   while ((c = getopt_long (argc, argv,
258 			   "0123456789abBcC:dD:efF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y",
259 			   longopts, 0)) != EOF)
260     {
261       switch (c)
262 	{
263 	  /* All digits combine in decimal to specify the context-size.  */
264 	case '1':
265 	case '2':
266 	case '3':
267 	case '4':
268 	case '5':
269 	case '6':
270 	case '7':
271 	case '8':
272 	case '9':
273 	case '0':
274 	  if (context == -1)
275 	    context = 0;
276 	  /* If a context length has already been specified,
277 	     more digits allowed only if they follow right after the others.
278 	     Reject two separate runs of digits, or digits after -C.  */
279 	  else if (prev < '0' || prev > '9')
280 	    fatal ("context length specified twice");
281 
282 	  context = context * 10 + c - '0';
283 	  break;
284 
285 	case 'a':
286 	  /* Treat all files as text files; never treat as binary.  */
287 	  always_text_flag = 1;
288 	  break;
289 
290 	case 'b':
291 	  /* Ignore changes in amount of white space.  */
292 	  ignore_space_change_flag = 1;
293 	  ignore_some_changes = 1;
294 	  ignore_some_line_changes = 1;
295 	  break;
296 
297 	case 'B':
298 	  /* Ignore changes affecting only blank lines.  */
299 	  ignore_blank_lines_flag = 1;
300 	  ignore_some_changes = 1;
301 	  break;
302 
303 	case 'C':		/* +context[=lines] */
304 	case 'U':		/* +unified[=lines] */
305 	  if (optarg)
306 	    {
307 	      if (context >= 0)
308 		fatal ("context length specified twice");
309 
310 	      if (ck_atoi (optarg, &context))
311 		fatal ("invalid context length argument");
312 	    }
313 
314 	  /* Falls through.  */
315 	case 'c':
316 	  /* Make context-style output.  */
317 	  specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT);
318 	  break;
319 
320 	case 'd':
321 	  /* Don't discard lines.  This makes things slower (sometimes much
322 	     slower) but will find a guaranteed minimal set of changes.  */
323 	  no_discards = 1;
324 	  break;
325 
326 	case 'D':
327 	  /* Make merged #ifdef output.  */
328 	  specify_style (OUTPUT_IFDEF);
329 	  {
330 	    int i, err = 0;
331 	    static char const C_ifdef_group_formats[] =
332 	      "#ifndef %s\n%%<#endif /* not %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c%%=%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n";
333 	    char *b = xmalloc (sizeof (C_ifdef_group_formats)
334 			       + 7 * strlen(optarg) - 14 /* 7*"%s" */
335 			       - 8 /* 5*"%%" + 3*"%c" */);
336 	    sprintf (b, C_ifdef_group_formats,
337 		     optarg, optarg, 0,
338 		     optarg, optarg, 0, 0,
339 		     optarg, optarg, optarg);
340 	    for (i = 0; i < 4; i++)
341 	      {
342 		err |= specify_format (&group_format[i], b);
343 		b += strlen (b) + 1;
344 	      }
345 	    if (err)
346 	      diff_error ("conflicting #ifdef formats", 0, 0);
347 	  }
348 	  break;
349 
350 	case 'e':
351 	  /* Make output that is a valid `ed' script.  */
352 	  specify_style (OUTPUT_ED);
353 	  break;
354 
355 	case 'f':
356 	  /* Make output that looks vaguely like an `ed' script
357 	     but has changes in the order they appear in the file.  */
358 	  specify_style (OUTPUT_FORWARD_ED);
359 	  break;
360 
361 	case 'F':
362 	  /* Show, for each set of changes, the previous line that
363 	     matches the specified regexp.  Currently affects only
364 	     context-style output.  */
365 	  add_regexp (&function_regexp_list, optarg);
366 	  break;
367 
368 	case 'h':
369 	  /* Split the files into chunks of around 1500 lines
370 	     for faster processing.  Usually does not change the result.
371 
372 	     This currently has no effect.  */
373 	  break;
374 
375 	case 'H':
376 	  /* Turn on heuristics that speed processing of large files
377 	     with a small density of changes.  */
378 	  heuristic = 1;
379 	  break;
380 
381 	case 'i':
382 	  /* Ignore changes in case.  */
383 	  ignore_case_flag = 1;
384 	  ignore_some_changes = 1;
385 	  ignore_some_line_changes = 1;
386 	  break;
387 
388 	case 'I':
389 	  /* Ignore changes affecting only lines that match the
390 	     specified regexp.  */
391 	  add_regexp (&ignore_regexp_list, optarg);
392 	  ignore_some_changes = 1;
393 	  break;
394 
395 	case 'l':
396 	  /* Pass the output through `pr' to paginate it.  */
397 	  paginate_flag = 1;
398 #if !defined(SIGCHLD) && defined(SIGCLD)
399 #define SIGCHLD SIGCLD
400 #endif
401 #ifdef SIGCHLD
402 	  /* Pagination requires forking and waiting, and
403 	     System V fork+wait does not work if SIGCHLD is ignored.  */
404 	  signal (SIGCHLD, SIG_DFL);
405 #endif
406 	  break;
407 
408 	case 'L':
409 	  /* Specify file labels for `-c' output headers.  */
410 	  if (!file_label[0])
411 	    file_label[0] = optarg;
412 	  else if (!file_label[1])
413 	    file_label[1] = optarg;
414 	  else
415 	    fatal ("too many file label options");
416 	  break;
417 
418 	case 'n':
419 	  /* Output RCS-style diffs, like `-f' except that each command
420 	     specifies the number of lines affected.  */
421 	  specify_style (OUTPUT_RCS);
422 	  break;
423 
424 	case 'N':
425 	  /* When comparing directories, if a file appears only in one
426 	     directory, treat it as present but empty in the other.  */
427 	  entire_new_file_flag = 1;
428 	  break;
429 
430 	case 'p':
431 	  /* Make context-style output and show name of last C function.  */
432 	  show_c_function = 1;
433 	  add_regexp (&function_regexp_list, "^[_a-zA-Z$]");
434 	  break;
435 
436 	case 'P':
437 	  /* When comparing directories, if a file appears only in
438 	     the second directory of the two,
439 	     treat it as present but empty in the other.  */
440 	  unidirectional_new_file_flag = 1;
441 	  break;
442 
443 	case 'q':
444 	  no_details_flag = 1;
445 	  break;
446 
447 	case 'r':
448 	  /* When comparing directories,
449 	     recursively compare any subdirectories found.  */
450 	  recursive = 1;
451 	  break;
452 
453 	case 's':
454 	  /* Print a message if the files are the same.  */
455 	  print_file_same_flag = 1;
456 	  break;
457 
458 	case 'S':
459 	  /* When comparing directories, start with the specified
460 	     file name.  This is used for resuming an aborted comparison.  */
461 	  dir_start_file = optarg;
462 	  break;
463 
464 	case 't':
465 	  /* Expand tabs to spaces in the output so that it preserves
466 	     the alignment of the input files.  */
467 	  tab_expand_flag = 1;
468 	  break;
469 
470 	case 'T':
471 	  /* Use a tab in the output, rather than a space, before the
472 	     text of an input line, so as to keep the proper alignment
473 	     in the input line without changing the characters in it.  */
474 	  tab_align_flag = 1;
475 	  break;
476 
477 	case 'u':
478 	  /* Output the context diff in unidiff format.  */
479 	  specify_style (OUTPUT_UNIFIED);
480 	  break;
481 
482 	case 'v':
483 	  if (callbacks && callbacks->write_stdout)
484 	    {
485 	      (*callbacks->write_stdout) ("diff - GNU diffutils version ");
486 	      (*callbacks->write_stdout) (diff_version_string);
487 	      (*callbacks->write_stdout) ("\n");
488 	    }
489 	  else
490 	    printf ("diff - GNU diffutils version %s\n", diff_version_string);
491 	  return 0;
492 
493 	case 'w':
494 	  /* Ignore horizontal white space when comparing lines.  */
495 	  ignore_all_space_flag = 1;
496 	  ignore_some_changes = 1;
497 	  ignore_some_line_changes = 1;
498 	  break;
499 
500 	case 'x':
501 	  add_exclude (optarg);
502 	  break;
503 
504 	case 'X':
505 	  if (add_exclude_file (optarg) != 0)
506 	    pfatal_with_name (optarg);
507 	  break;
508 
509 	case 'y':
510 	  /* Use side-by-side (sdiff-style) columnar output. */
511 	  specify_style (OUTPUT_SDIFF);
512 	  break;
513 
514 	case 'W':
515 	  /* Set the line width for OUTPUT_SDIFF.  */
516 	  if (ck_atoi (optarg, &width) || width <= 0)
517 	    fatal ("column width must be a positive integer");
518 	  break;
519 
520 	case 129:
521 	  sdiff_left_only = 1;
522 	  break;
523 
524 	case 130:
525 	  sdiff_skip_common_lines = 1;
526 	  break;
527 
528 	case 131:
529 	  /* sdiff-style columns output. */
530 	  specify_style (OUTPUT_SDIFF);
531 	  sdiff_help_sdiff = 1;
532 	  break;
533 
534 	case 132:
535 	case 133:
536 	case 134:
537 	  specify_style (OUTPUT_IFDEF);
538 	  if (specify_format (&line_format[c - 132], optarg) != 0)
539 	    diff_error ("conflicting line format", 0, 0);
540 	  break;
541 
542 	case 135:
543 	  specify_style (OUTPUT_IFDEF);
544 	  {
545 	    int i, err = 0;
546 	    for (i = 0; i < sizeof (line_format) / sizeof (*line_format); i++)
547 	      err |= specify_format (&line_format[i], optarg);
548 	    if (err)
549 	      diff_error ("conflicting line format", 0, 0);
550 	  }
551 	  break;
552 
553 	case 136:
554 	case 137:
555 	case 138:
556 	case 139:
557 	  specify_style (OUTPUT_IFDEF);
558 	  if (specify_format (&group_format[c - 136], optarg) != 0)
559 	    diff_error ("conflicting group format", 0, 0);
560 	  break;
561 
562 	case 140:
563 	  if (ck_atoi (optarg, &horizon_lines) || horizon_lines < 0)
564 	    fatal ("horizon must be a nonnegative integer");
565 	  break;
566 
567 	case 141:
568 	  usage ();
569 	  if (! callbacks || ! callbacks->write_stdout)
570 	    check_output (stdout);
571 	  return 0;
572 
573 	case 142:
574 	  /* Use binary I/O when reading and writing data.
575 	     On Posix hosts, this has no effect.  */
576 #if HAVE_SETMODE
577 	  binary_I_O = 1;
578 #  if 0
579 	  /* Because this code is leftover from pre-library days,
580 	     there is no way to set stdout back to the default mode
581 	     when we are done.  As it turns out, I think the only
582 	     parts of CVS that pass out == NULL, and thus cause diff
583 	     to write to stdout, are "cvs diff" and "cvs rdiff".  So
584 	     I'm not going to worry about this too much yet.  */
585 	  setmode (STDOUT_FILENO, O_BINARY);
586 #  else
587 	  if (out == NULL)
588 	    error (0, 0, "warning: did not set stdout to binary mode");
589 #  endif
590 #endif
591 	  break;
592 
593 	default:
594 	  return try_help (0);
595 	}
596       prev = c;
597     }
598 
599   if (argc - optind != 2)
600     return try_help (argc - optind < 2 ? "missing operand" : "extra operand");
601 
602   {
603     /*
604      *	We maximize first the half line width, and then the gutter width,
605      *	according to the following constraints:
606      *	1.  Two half lines plus a gutter must fit in a line.
607      *	2.  If the half line width is nonzero:
608      *	    a.  The gutter width is at least GUTTER_WIDTH_MINIMUM.
609      *	    b.  If tabs are not expanded to spaces,
610      *		a half line plus a gutter is an integral number of tabs,
611      *		so that tabs in the right column line up.
612      */
613     int t = tab_expand_flag ? 1 : TAB_WIDTH;
614     int off = (width + t + GUTTER_WIDTH_MINIMUM) / (2*t)  *  t;
615     sdiff_half_width = max (0, min (off - GUTTER_WIDTH_MINIMUM, width - off)),
616     sdiff_column2_offset = sdiff_half_width ? off : width;
617   }
618 
619   if (show_c_function && output_style != OUTPUT_UNIFIED)
620     specify_style (OUTPUT_CONTEXT);
621 
622   if (output_style != OUTPUT_CONTEXT && output_style != OUTPUT_UNIFIED)
623     context = 0;
624   else if (context == -1)
625     /* Default amount of context for -c.  */
626     context = 3;
627 
628   if (output_style == OUTPUT_IFDEF)
629     {
630       /* Format arrays are char *, not char const *,
631 	 because integer formats are temporarily modified.
632 	 But it is safe to assign a constant like "%=" to a format array,
633 	 since "%=" does not format any integers.  */
634       int i;
635       for (i = 0; i < sizeof (line_format) / sizeof (*line_format); i++)
636 	if (!line_format[i])
637 	  line_format[i] = "%l\n";
638       if (!group_format[OLD])
639 	group_format[OLD]
640 	  = group_format[UNCHANGED] ? group_format[UNCHANGED] : "%<";
641       if (!group_format[NEW])
642 	group_format[NEW]
643 	  = group_format[UNCHANGED] ? group_format[UNCHANGED] : "%>";
644       if (!group_format[UNCHANGED])
645 	group_format[UNCHANGED] = "%=";
646       if (!group_format[CHANGED])
647 	group_format[CHANGED] = concat (group_format[OLD],
648 					group_format[NEW], "");
649     }
650 
651   no_diff_means_no_output =
652     (output_style == OUTPUT_IFDEF ?
653       (!*group_format[UNCHANGED]
654        || (strcmp (group_format[UNCHANGED], "%=") == 0
655 	   && !*line_format[UNCHANGED]))
656      : output_style == OUTPUT_SDIFF ? sdiff_skip_common_lines : 1);
657 
658   switch_string = option_list (argv + 1, optind - 1);
659 
660   if (callbacks && callbacks->write_output)
661     {
662       if (out != NULL)
663 	{
664 	  diff_error ("write callback with output file", 0, 0);
665 	  return 2;
666 	}
667     }
668   else
669     {
670       if (out == NULL)
671 	outfile = stdout;
672       else
673 	{
674 #if HAVE_SETMODE
675 	  /* A diff which is full of ^Z and such isn't going to work
676 	     very well in text mode.  */
677 	  if (binary_I_O)
678 	    outfile = fopen (out, "wb");
679 	  else
680 #endif
681 	    outfile = fopen (out, "w");
682 	  if (outfile == NULL)
683 	    {
684 	      perror_with_name ("could not open output file");
685 	      return 2;
686 	    }
687 	  opened_file = 1;
688 	}
689     }
690 
691   /* Set the jump buffer, so that diff may abort execution without
692      terminating the process. */
693   if ((val = setjmp (diff_abort_buf)) != 0)
694     {
695       optind = optind_old;
696       if (opened_file)
697 	fclose (outfile);
698       return val;
699     }
700 
701   val = compare_files (0, argv[optind], 0, argv[optind + 1], 0);
702 
703   /* Print any messages that were saved up for last.  */
704   print_message_queue ();
705 
706   free (switch_string);
707 
708   optind = optind_old;
709 
710   if (! callbacks || ! callbacks->write_output)
711     check_output (outfile);
712 
713   if (opened_file)
714     if (fclose (outfile) != 0)
715 	perror_with_name ("close error on output file");
716 
717   return val;
718 }
719 
720 /* Add the compiled form of regexp PATTERN to REGLIST.  */
721 
722 static void
723 add_regexp (reglist, pattern)
724      struct regexp_list **reglist;
725      char const *pattern;
726 {
727   struct regexp_list *r;
728   char const *m;
729 
730   r = (struct regexp_list *) xmalloc (sizeof (*r));
731   bzero (r, sizeof (*r));
732   r->buf.fastmap = xmalloc (256);
733   m = re_compile_pattern (pattern, strlen (pattern), &r->buf);
734   if (m != 0)
735     diff_error ("%s: %s", pattern, m);
736 
737   /* Add to the start of the list, since it's easier than the end.  */
738   r->next = *reglist;
739   *reglist = r;
740 }
741 
742 static int
743 try_help (reason)
744      char const *reason;
745 {
746   if (reason)
747     diff_error ("%s", reason, 0);
748   diff_error ("Try `%s --help' for more information.", diff_program_name, 0);
749   return 2;
750 }
751 
752 static void
753 check_output (file)
754     FILE *file;
755 {
756   if (ferror (file) || fflush (file) != 0)
757     fatal ("write error");
758 }
759 
760 static char const * const option_help[] = {
761 "-i  --ignore-case  Consider upper- and lower-case to be the same.",
762 "-w  --ignore-all-space  Ignore all white space.",
763 "-b  --ignore-space-change  Ignore changes in the amount of white space.",
764 "-B  --ignore-blank-lines  Ignore changes whose lines are all blank.",
765 "-I RE  --ignore-matching-lines=RE  Ignore changes whose lines all match RE.",
766 #if HAVE_SETMODE
767 "--binary  Read and write data in binary mode.",
768 #endif
769 "-a  --text  Treat all files as text.\n",
770 "-c  -C NUM  --context[=NUM]  Output NUM (default 2) lines of copied context.",
771 "-u  -U NUM  --unified[=NUM]  Output NUM (default 2) lines of unified context.",
772 "  -NUM  Use NUM context lines.",
773 "  -L LABEL  --label LABEL  Use LABEL instead of file name.",
774 "  -p  --show-c-function  Show which C function each change is in.",
775 "  -F RE  --show-function-line=RE  Show the most recent line matching RE.",
776 "-q  --brief  Output only whether files differ.",
777 "-e  --ed  Output an ed script.",
778 "-n  --rcs  Output an RCS format diff.",
779 "-y  --side-by-side  Output in two columns.",
780 "  -w NUM  --width=NUM  Output at most NUM (default 130) characters per line.",
781 "  --left-column  Output only the left column of common lines.",
782 "  --suppress-common-lines  Do not output common lines.",
783 "-DNAME  --ifdef=NAME  Output merged file to show `#ifdef NAME' diffs.",
784 "--GTYPE-group-format=GFMT  Similar, but format GTYPE input groups with GFMT.",
785 "--line-format=LFMT  Similar, but format all input lines with LFMT.",
786 "--LTYPE-line-format=LFMT  Similar, but format LTYPE input lines with LFMT.",
787 "  LTYPE is `old', `new', or `unchanged'.  GTYPE is LTYPE or `changed'.",
788 "  GFMT may contain:",
789 "    %<  lines from FILE1",
790 "    %>  lines from FILE2",
791 "    %=  lines common to FILE1 and FILE2",
792 "    %[-][WIDTH][.[PREC]]{doxX}LETTER  printf-style spec for LETTER",
793 "      LETTERs are as follows for new group, lower case for old group:",
794 "        F  first line number",
795 "        L  last line number",
796 "        N  number of lines = L-F+1",
797 "        E  F-1",
798 "        M  L+1",
799 "  LFMT may contain:",
800 "    %L  contents of line",
801 "    %l  contents of line, excluding any trailing newline",
802 "    %[-][WIDTH][.[PREC]]{doxX}n  printf-style spec for input line number",
803 "  Either GFMT or LFMT may contain:",
804 "    %%  %",
805 "    %c'C'  the single character C",
806 "    %c'\\OOO'  the character with octal code OOO\n",
807 "-l  --paginate  Pass the output through `pr' to paginate it.",
808 "-t  --expand-tabs  Expand tabs to spaces in output.",
809 "-T  --initial-tab  Make tabs line up by prepending a tab.\n",
810 "-r  --recursive  Recursively compare any subdirectories found.",
811 "-N  --new-file  Treat absent files as empty.",
812 "-P  --unidirectional-new-file  Treat absent first files as empty.",
813 "-s  --report-identical-files  Report when two files are the same.",
814 "-x PAT  --exclude=PAT  Exclude files that match PAT.",
815 "-X FILE  --exclude-from=FILE  Exclude files that match any pattern in FILE.",
816 "-S FILE  --starting-file=FILE  Start with FILE when comparing directories.\n",
817 "--horizon-lines=NUM  Keep NUM lines of the common prefix and suffix.",
818 "-d  --minimal  Try hard to find a smaller set of changes.",
819 "-H  --speed-large-files  Assume large files and many scattered small changes.\n",
820 "-v  --version  Output version info.",
821 "--help  Output this help.",
822 0
823 };
824 
825 static void
826 usage ()
827 {
828   char const * const *p;
829 
830   if (callbacks && callbacks->write_stdout)
831     {
832       (*callbacks->write_stdout) ("Usage: ");
833       (*callbacks->write_stdout) (diff_program_name);
834       (*callbacks->write_stdout) (" [OPTION]... FILE1 FILE2\n\n");
835       for (p = option_help;  *p;  p++)
836 	{
837 	  (*callbacks->write_stdout) ("  ");
838 	  (*callbacks->write_stdout) (*p);
839 	  (*callbacks->write_stdout) ("\n");
840 	}
841       (*callbacks->write_stdout)
842 	("\nIf FILE1 or FILE2 is `-', read standard input.\n");
843     }
844   else
845     {
846       printf ("Usage: %s [OPTION]... FILE1 FILE2\n\n", diff_program_name);
847       for (p = option_help;  *p;  p++)
848 	printf ("  %s\n", *p);
849       printf ("\nIf FILE1 or FILE2 is `-', read standard input.\n");
850     }
851 }
852 
853 static int
854 specify_format (var, value)
855      char **var;
856      char *value;
857 {
858   int err = *var ? strcmp (*var, value) : 0;
859   *var = value;
860   return err;
861 }
862 
863 static void
864 specify_style (style)
865      enum output_style style;
866 {
867   if (output_style != OUTPUT_NORMAL
868       && output_style != style)
869     diff_error ("conflicting specifications of output style", 0, 0);
870   output_style = style;
871 }
872 
873 static char const *
874 filetype (st)
875      struct stat const *st;
876 {
877   /* See Posix.2 section 4.17.6.1.1 and Table 5-1 for these formats.
878      To keep diagnostics grammatical, the returned string must start
879      with a consonant.  */
880 
881   if (S_ISREG (st->st_mode))
882     {
883       if (st->st_size == 0)
884 	return "regular empty file";
885       /* Posix.2 section 5.14.2 seems to suggest that we must read the file
886 	 and guess whether it's C, Fortran, etc., but this is somewhat useless
887 	 and doesn't reflect historical practice.  We're allowed to guess
888 	 wrong, so we don't bother to read the file.  */
889       return "regular file";
890     }
891   if (S_ISDIR (st->st_mode)) return "directory";
892 
893   /* other Posix.1 file types */
894 #ifdef S_ISBLK
895   if (S_ISBLK (st->st_mode)) return "block special file";
896 #endif
897 #ifdef S_ISCHR
898   if (S_ISCHR (st->st_mode)) return "character special file";
899 #endif
900 #ifdef S_ISFIFO
901   if (S_ISFIFO (st->st_mode)) return "fifo";
902 #endif
903 
904   /* other Posix.1b file types */
905 #ifdef S_TYPEISMQ
906   if (S_TYPEISMQ (st)) return "message queue";
907 #endif
908 #ifdef S_TYPEISSEM
909   if (S_TYPEISSEM (st)) return "semaphore";
910 #endif
911 #ifdef S_TYPEISSHM
912   if (S_TYPEISSHM (st)) return "shared memory object";
913 #endif
914 
915   /* other popular file types */
916   /* S_ISLNK is impossible with `fstat' and `stat'.  */
917 #ifdef S_ISSOCK
918   if (S_ISSOCK (st->st_mode)) return "socket";
919 #endif
920 
921   return "weird file";
922 }
923 
924 /* Compare two files (or dirs) with specified names
925    DIR0/NAME0 and DIR1/NAME1, at level DEPTH in directory recursion.
926    (if DIR0 is 0, then the name is just NAME0, etc.)
927    This is self-contained; it opens the files and closes them.
928 
929    Value is 0 if files are the same, 1 if different,
930    2 if there is a problem opening them.  */
931 
932 static int
933 compare_files (dir0, name0, dir1, name1, depth)
934      char const *dir0, *dir1;
935      char const *name0, *name1;
936      int depth;
937 {
938   struct file_data inf[2];
939   register int i;
940   int val;
941   int same_files;
942   int failed = 0;
943   char *free0 = 0, *free1 = 0;
944 
945   /* If this is directory comparison, perhaps we have a file
946      that exists only in one of the directories.
947      If so, just print a message to that effect.  */
948 
949   if (! ((name0 != 0 && name1 != 0)
950 	 || (unidirectional_new_file_flag && name1 != 0)
951 	 || entire_new_file_flag))
952     {
953       char const *name = name0 == 0 ? name1 : name0;
954       char const *dir = name0 == 0 ? dir1 : dir0;
955       message ("Only in %s: %s\n", dir, name);
956       /* Return 1 so that diff_dirs will return 1 ("some files differ").  */
957       return 1;
958     }
959 
960   bzero (inf, sizeof (inf));
961 
962   /* Mark any nonexistent file with -1 in the desc field.  */
963   /* Mark unopened files (e.g. directories) with -2. */
964 
965   inf[0].desc = name0 == 0 ? -1 : -2;
966   inf[1].desc = name1 == 0 ? -1 : -2;
967 
968   /* Now record the full name of each file, including nonexistent ones.  */
969 
970   if (name0 == 0)
971     name0 = name1;
972   if (name1 == 0)
973     name1 = name0;
974 
975   inf[0].name = dir0 == 0 ? name0 : (free0 = dir_file_pathname (dir0, name0));
976   inf[1].name = dir1 == 0 ? name1 : (free1 = dir_file_pathname (dir1, name1));
977 
978   /* Stat the files.  Record whether they are directories.  */
979 
980   for (i = 0; i <= 1; i++)
981     {
982       if (inf[i].desc != -1)
983 	{
984 	  int stat_result;
985 
986 	  if (i && filename_cmp (inf[i].name, inf[0].name) == 0)
987 	    {
988 	      inf[i].stat = inf[0].stat;
989 	      stat_result = 0;
990 	    }
991 	  else if (strcmp (inf[i].name, "-") == 0)
992 	    {
993 	      inf[i].desc = STDIN_FILENO;
994 	      stat_result = fstat (STDIN_FILENO, &inf[i].stat);
995 	      if (stat_result == 0 && S_ISREG (inf[i].stat.st_mode))
996 		{
997 		  off_t pos = lseek (STDIN_FILENO, (off_t) 0, SEEK_CUR);
998 		  if (pos == -1)
999 		    stat_result = -1;
1000 		  else
1001 		    {
1002 		      if (pos <= inf[i].stat.st_size)
1003 			inf[i].stat.st_size -= pos;
1004 		      else
1005 			inf[i].stat.st_size = 0;
1006 		      /* Posix.2 4.17.6.1.4 requires current time for stdin.  */
1007 		      time (&inf[i].stat.st_mtime);
1008 		    }
1009 		}
1010 	    }
1011 	  else
1012 	    stat_result = stat (inf[i].name, &inf[i].stat);
1013 
1014 	  if (stat_result != 0)
1015 	    {
1016 	      perror_with_name (inf[i].name);
1017 	      failed = 1;
1018 	    }
1019 	  else
1020 	    {
1021 	      inf[i].dir_p = S_ISDIR (inf[i].stat.st_mode) && inf[i].desc != 0;
1022 	      if (inf[1 - i].desc == -1)
1023 		{
1024 		  inf[1 - i].dir_p = inf[i].dir_p;
1025 		  inf[1 - i].stat.st_mode = inf[i].stat.st_mode;
1026 		}
1027 	    }
1028 	}
1029     }
1030 
1031   if (! failed && depth == 0 && inf[0].dir_p != inf[1].dir_p)
1032     {
1033       /* If one is a directory, and it was specified in the command line,
1034 	 use the file in that dir with the other file's basename.  */
1035 
1036       int fnm_arg = inf[0].dir_p;
1037       int dir_arg = 1 - fnm_arg;
1038       char const *fnm = inf[fnm_arg].name;
1039       char const *dir = inf[dir_arg].name;
1040       char const *p = filename_lastdirchar (fnm);
1041       char const *filename = inf[dir_arg].name
1042 	= dir_file_pathname (dir, p ? p + 1 : fnm);
1043 
1044       if (strcmp (fnm, "-") == 0)
1045 	fatal ("can't compare - to a directory");
1046 
1047       if (stat (filename, &inf[dir_arg].stat) != 0)
1048 	{
1049 	  perror_with_name (filename);
1050 	  failed = 1;
1051 	}
1052       else
1053 	inf[dir_arg].dir_p = S_ISDIR (inf[dir_arg].stat.st_mode);
1054     }
1055 
1056   if (failed)
1057     {
1058 
1059       /* If either file should exist but does not, return 2.  */
1060 
1061       val = 2;
1062 
1063     }
1064   else if ((same_files = inf[0].desc != -1 && inf[1].desc != -1
1065 			 && 0 < same_file (&inf[0].stat, &inf[1].stat))
1066 	   && no_diff_means_no_output)
1067     {
1068       /* The two named files are actually the same physical file.
1069 	 We know they are identical without actually reading them.  */
1070 
1071       val = 0;
1072     }
1073   else if (inf[0].dir_p & inf[1].dir_p)
1074     {
1075       if (output_style == OUTPUT_IFDEF)
1076 	fatal ("-D option not supported with directories");
1077 
1078       /* If both are directories, compare the files in them.  */
1079 
1080       if (depth > 0 && !recursive)
1081 	{
1082 	  /* But don't compare dir contents one level down
1083 	     unless -r was specified.  */
1084 	  message ("Common subdirectories: %s and %s\n",
1085 		   inf[0].name, inf[1].name);
1086 	  val = 0;
1087 	}
1088       else
1089 	{
1090 	  val = diff_dirs (inf, compare_files, depth);
1091 	}
1092 
1093     }
1094   else if ((inf[0].dir_p | inf[1].dir_p)
1095 	   || (depth > 0
1096 	       && (! S_ISREG (inf[0].stat.st_mode)
1097 		   || ! S_ISREG (inf[1].stat.st_mode))))
1098     {
1099       /* Perhaps we have a subdirectory that exists only in one directory.
1100 	 If so, just print a message to that effect.  */
1101 
1102       if (inf[0].desc == -1 || inf[1].desc == -1)
1103 	{
1104 	  if ((inf[0].dir_p | inf[1].dir_p)
1105 	      && recursive
1106 	      && (entire_new_file_flag
1107 		  || (unidirectional_new_file_flag && inf[0].desc == -1)))
1108 	    val = diff_dirs (inf, compare_files, depth);
1109 	  else
1110 	    {
1111 	      char const *dir = (inf[0].desc == -1) ? dir1 : dir0;
1112 	      /* See Posix.2 section 4.17.6.1.1 for this format.  */
1113 	      message ("Only in %s: %s\n", dir, name0);
1114 	      val = 1;
1115 	    }
1116 	}
1117       else
1118 	{
1119 	  /* We have two files that are not to be compared.  */
1120 
1121 	  /* See Posix.2 section 4.17.6.1.1 for this format.  */
1122 	  message5 ("File %s is a %s while file %s is a %s\n",
1123 		    inf[0].name, filetype (&inf[0].stat),
1124 		    inf[1].name, filetype (&inf[1].stat));
1125 
1126 	  /* This is a difference.  */
1127 	  val = 1;
1128 	}
1129     }
1130   else if ((no_details_flag & ~ignore_some_changes)
1131 	   && inf[0].stat.st_size != inf[1].stat.st_size
1132 	   && (inf[0].desc == -1 || S_ISREG (inf[0].stat.st_mode))
1133 	   && (inf[1].desc == -1 || S_ISREG (inf[1].stat.st_mode)))
1134     {
1135       message ("Files %s and %s differ\n", inf[0].name, inf[1].name);
1136       val = 1;
1137     }
1138   else
1139     {
1140       /* Both exist and neither is a directory.  */
1141 
1142       /* Open the files and record their descriptors.  */
1143 
1144       if (inf[0].desc == -2)
1145 	if ((inf[0].desc = open (inf[0].name, O_RDONLY, 0)) < 0)
1146 	  {
1147 	    perror_with_name (inf[0].name);
1148 	    failed = 1;
1149 	  }
1150       if (inf[1].desc == -2)
1151 	if (same_files)
1152 	  inf[1].desc = inf[0].desc;
1153 	else if ((inf[1].desc = open (inf[1].name, O_RDONLY, 0)) < 0)
1154 	  {
1155 	    perror_with_name (inf[1].name);
1156 	    failed = 1;
1157 	  }
1158 
1159 #if HAVE_SETMODE
1160       if (binary_I_O)
1161 	for (i = 0; i <= 1; i++)
1162 	  if (0 <= inf[i].desc)
1163 	    setmode (inf[i].desc, O_BINARY);
1164 #endif
1165 
1166       /* Compare the files, if no error was found.  */
1167 
1168       val = failed ? 2 : diff_2_files (inf, depth);
1169 
1170       /* Close the file descriptors.  */
1171 
1172       if (inf[0].desc >= 0 && close (inf[0].desc) != 0)
1173 	{
1174 	  perror_with_name (inf[0].name);
1175 	  val = 2;
1176 	}
1177       if (inf[1].desc >= 0 && inf[0].desc != inf[1].desc
1178 	  && close (inf[1].desc) != 0)
1179 	{
1180 	  perror_with_name (inf[1].name);
1181 	  val = 2;
1182 	}
1183     }
1184 
1185   /* Now the comparison has been done, if no error prevented it,
1186      and VAL is the value this function will return.  */
1187 
1188   if (val == 0 && !inf[0].dir_p)
1189     {
1190       if (print_file_same_flag)
1191 	message ("Files %s and %s are identical\n",
1192 		 inf[0].name, inf[1].name);
1193     }
1194   else
1195     flush_output ();
1196 
1197   if (free0)
1198     free (free0);
1199   if (free1)
1200     free (free1);
1201 
1202   return val;
1203 }
1204 
1205 /* Initialize status variables and flag variables used in libdiff,
1206    to permit repeated calls to diff_run. */
1207 
1208 static void
1209 initialize_main (argcp, argvp)
1210     int *argcp;
1211     char ***argvp;
1212 {
1213   /* These variables really must be reset each time diff_run is called. */
1214   output_style = OUTPUT_NORMAL;
1215   context = -1;
1216   file_label[0] = NULL;
1217   file_label[1] = NULL;
1218   diff_program_name = (*argvp)[0];
1219   outfile = NULL;
1220 
1221   /* Reset these also, just for safety's sake. (If one invocation turns
1222      on ignore_case_flag, it must be turned off before diff_run is called
1223      again.  But it is possible to make many diffs before encountering
1224      such a problem. */
1225   recursive = 0;
1226   no_discards = 0;
1227 #if HAVE_SETMODE
1228   binary_I_O = 0;
1229 #endif
1230   no_diff_means_no_output = 0;
1231   always_text_flag = 0;
1232   horizon_lines = 0;
1233   ignore_space_change_flag = 0;
1234   ignore_all_space_flag = 0;
1235   ignore_blank_lines_flag = 0;
1236   ignore_some_line_changes = 0;
1237   ignore_some_changes = 0;
1238   ignore_case_flag = 0;
1239   function_regexp_list = NULL;
1240   ignore_regexp_list = NULL;
1241   no_details_flag = 0;
1242   print_file_same_flag = 0;
1243   tab_align_flag = 0;
1244   tab_expand_flag = 0;
1245   dir_start_file = NULL;
1246   entire_new_file_flag = 0;
1247   unidirectional_new_file_flag = 0;
1248   paginate_flag = 0;
1249   bzero (group_format, sizeof (group_format));
1250   bzero (line_format, sizeof (line_format));
1251   sdiff_help_sdiff = 0;
1252   sdiff_left_only = 0;
1253   sdiff_skip_common_lines = 0;
1254   sdiff_half_width = 0;
1255   sdiff_column2_offset = 0;
1256   switch_string = NULL;
1257   heuristic = 0;
1258   bzero (files, sizeof (files));
1259 }
1260