xref: /openbsd-src/gnu/usr.bin/cvs/diff/diff.c (revision 2286d8ed900f26153a3cd5227a124b1c0adce72f)
1 /* GNU DIFF entry routine.
2    Copyright (C) 1988, 1989, 1992, 1993, 1994, 1997 Free Software Foundation, Inc.
3 
4 This file is part of GNU DIFF.
5 
6 GNU DIFF is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10 
11 GNU DIFF is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GNU DIFF; see the file COPYING.  If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
19 
20 /* GNU DIFF was written by Mike Haertel, David Hayes,
21    Richard Stallman, Len Tower, and Paul Eggert.  */
22 
23 #define GDIFF_MAIN
24 #include "diff.h"
25 #include <signal.h>
26 #include "getopt.h"
27 #include "fnmatch.h"
28 
29 #ifndef DEFAULT_WIDTH
30 #define DEFAULT_WIDTH 130
31 #endif
32 
33 #ifndef GUTTER_WIDTH_MINIMUM
34 #define GUTTER_WIDTH_MINIMUM 3
35 #endif
36 
37 /* diff.c has a real initialize_main function. */
38 #ifdef initialize_main
39 #undef initialize_main
40 #endif
41 
42 static char const *filetype PARAMS((struct stat const *));
43 static char *option_list PARAMS((char **, int));
44 static int add_exclude_file PARAMS((char const *));
45 static int ck_atoi PARAMS((char const *, int *));
46 static int compare_files PARAMS((char const *, char const *, char const *, char const *, int));
47 static int specify_format PARAMS((char **, char *));
48 static void add_exclude PARAMS((char const *));
49 static void add_regexp PARAMS((struct regexp_list **, char const *));
50 static void specify_style PARAMS((enum output_style));
51 static int try_help PARAMS((char const *));
52 static void check_output PARAMS((FILE *));
53 static void usage PARAMS((void));
54 static void initialize_main PARAMS((int *, char ***));
55 
56 /* Nonzero for -r: if comparing two directories,
57    compare their common subdirectories recursively.  */
58 
59 static int recursive;
60 
61 /* For debugging: don't do discard_confusing_lines.  */
62 
63 int no_discards;
64 
65 #if HAVE_SETMODE
66 /* I/O mode: nonzero only if using binary input/output.  */
67 static int binary_I_O;
68 #endif
69 
70 /* Return a string containing the command options with which diff was invoked.
71    Spaces appear between what were separate ARGV-elements.
72    There is a space at the beginning but none at the end.
73    If there were no options, the result is an empty string.
74 
75    Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT,
76    the length of that vector.  */
77 
78 static char *
79 option_list (optionvec, count)
80      char **optionvec;  /* Was `vector', but that collides on Alliant.  */
81      int count;
82 {
83   int i;
84   size_t length = 0;
85   char *result;
86 
87   for (i = 0; i < count; i++)
88     length += strlen (optionvec[i]) + 1;
89 
90   result = xmalloc (length + 1);
91   result[0] = 0;
92 
93   for (i = 0; i < count; i++)
94     {
95       strcat (result, " ");
96       strcat (result, optionvec[i]);
97     }
98 
99   return result;
100 }
101 
102 /* Convert STR to a positive integer, storing the result in *OUT.
103    If STR is not a valid integer, return -1 (otherwise 0). */
104 static int
105 ck_atoi (str, out)
106      char const *str;
107      int *out;
108 {
109   char const *p;
110   for (p = str; *p; p++)
111     if (*p < '0' || *p > '9')
112       return -1;
113 
114   *out = atoi (optarg);
115   return 0;
116 }
117 
118 /* Keep track of excluded file name patterns.  */
119 
120 static char const **exclude;
121 static int exclude_alloc, exclude_count;
122 
123 int
124 excluded_filename (f)
125      char const *f;
126 {
127   int i;
128   for (i = 0;  i < exclude_count;  i++)
129     if (fnmatch (exclude[i], f, 0) == 0)
130       return 1;
131   return 0;
132 }
133 
134 static void
135 add_exclude (pattern)
136      char const *pattern;
137 {
138   if (exclude_alloc <= exclude_count)
139     exclude = (char const **)
140 	      (exclude_alloc == 0
141 	       ? xmalloc ((exclude_alloc = 64) * sizeof (*exclude))
142 	       : xrealloc (exclude, (exclude_alloc *= 2) * sizeof (*exclude)));
143 
144   exclude[exclude_count++] = pattern;
145 }
146 
147 static int
148 add_exclude_file (name)
149      char const *name;
150 {
151   struct file_data f;
152   char *p, *q, *lim;
153 
154   f.name = optarg;
155   f.desc = (strcmp (optarg, "-") == 0
156 	    ? STDIN_FILENO
157 	    : open (optarg, O_RDONLY, 0));
158   if (f.desc < 0 || fstat (f.desc, &f.stat) != 0)
159     return -1;
160 
161   sip (&f, 1);
162   slurp (&f);
163 
164   for (p = f.buffer, lim = p + f.buffered_chars;  p < lim;  p = q)
165     {
166       q = (char *) memchr (p, '\n', lim - p);
167       if (!q)
168 	q = lim;
169       *q++ = 0;
170       add_exclude (p);
171     }
172 
173   return close (f.desc);
174 }
175 
176 /* The numbers 129- that appear in the fourth element of some entries
177    tell the big switch in `diff_run' how to process those options.  */
178 
179 static struct option const longopts[] =
180 {
181   {"ignore-blank-lines", 0, 0, 'B'},
182   {"context", 2, 0, 'C'},
183   {"ifdef", 1, 0, 'D'},
184   {"show-function-line", 1, 0, 'F'},
185   {"speed-large-files", 0, 0, 'H'},
186   {"ignore-matching-lines", 1, 0, 'I'},
187   {"label", 1, 0, 'L'},
188   {"file-label", 1, 0, 'L'},	/* An alias, no longer recommended */
189   {"new-file", 0, 0, 'N'},
190   {"entire-new-file", 0, 0, 'N'},	/* An alias, no longer recommended */
191   {"unidirectional-new-file", 0, 0, 'P'},
192   {"starting-file", 1, 0, 'S'},
193   {"initial-tab", 0, 0, 'T'},
194   {"width", 1, 0, 'W'},
195   {"text", 0, 0, 'a'},
196   {"ascii", 0, 0, 'a'},		/* An alias, no longer recommended */
197   {"ignore-space-change", 0, 0, 'b'},
198   {"minimal", 0, 0, 'd'},
199   {"ed", 0, 0, 'e'},
200   {"forward-ed", 0, 0, 'f'},
201   {"ignore-case", 0, 0, 'i'},
202   {"paginate", 0, 0, 'l'},
203   {"print", 0, 0, 'l'},		/* An alias, no longer recommended */
204   {"rcs", 0, 0, 'n'},
205   {"show-c-function", 0, 0, 'p'},
206   {"brief", 0, 0, 'q'},
207   {"recursive", 0, 0, 'r'},
208   {"report-identical-files", 0, 0, 's'},
209   {"expand-tabs", 0, 0, 't'},
210   {"version", 0, 0, 'v'},
211   {"ignore-all-space", 0, 0, 'w'},
212   {"exclude", 1, 0, 'x'},
213   {"exclude-from", 1, 0, 'X'},
214   {"side-by-side", 0, 0, 'y'},
215   {"unified", 2, 0, 'U'},
216   {"left-column", 0, 0, 129},
217   {"suppress-common-lines", 0, 0, 130},
218   {"sdiff-merge-assist", 0, 0, 131},
219   {"old-line-format", 1, 0, 132},
220   {"new-line-format", 1, 0, 133},
221   {"unchanged-line-format", 1, 0, 134},
222   {"line-format", 1, 0, 135},
223   {"old-group-format", 1, 0, 136},
224   {"new-group-format", 1, 0, 137},
225   {"unchanged-group-format", 1, 0, 138},
226   {"changed-group-format", 1, 0, 139},
227   {"horizon-lines", 1, 0, 140},
228   {"help", 0, 0, 141},
229   {"binary", 0, 0, 142},
230   {0, 0, 0, 0}
231 };
232 
233 int
234 diff_run (argc, argv, out)
235      int argc;
236      char *argv[];
237      char *out;
238 {
239   int val;
240   int c;
241   int prev = -1;
242   int width = DEFAULT_WIDTH;
243   int show_c_function = 0;
244   int optind_old;
245 
246   /* Do our initializations.  */
247   initialize_main (&argc, &argv);
248 
249   /* Decode the options.  */
250 
251   optind_old = optind;
252   optind = 0;
253   while ((c = getopt_long (argc, argv,
254 			   "0123456789abBcC:dD:efF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y",
255 			   longopts, 0)) != EOF)
256     {
257       switch (c)
258 	{
259 	  /* All digits combine in decimal to specify the context-size.  */
260 	case '1':
261 	case '2':
262 	case '3':
263 	case '4':
264 	case '5':
265 	case '6':
266 	case '7':
267 	case '8':
268 	case '9':
269 	case '0':
270 	  if (context == -1)
271 	    context = 0;
272 	  /* If a context length has already been specified,
273 	     more digits allowed only if they follow right after the others.
274 	     Reject two separate runs of digits, or digits after -C.  */
275 	  else if (prev < '0' || prev > '9')
276 	    fatal ("context length specified twice");
277 
278 	  context = context * 10 + c - '0';
279 	  break;
280 
281 	case 'a':
282 	  /* Treat all files as text files; never treat as binary.  */
283 	  always_text_flag = 1;
284 	  break;
285 
286 	case 'b':
287 	  /* Ignore changes in amount of white space.  */
288 	  ignore_space_change_flag = 1;
289 	  ignore_some_changes = 1;
290 	  ignore_some_line_changes = 1;
291 	  break;
292 
293 	case 'B':
294 	  /* Ignore changes affecting only blank lines.  */
295 	  ignore_blank_lines_flag = 1;
296 	  ignore_some_changes = 1;
297 	  break;
298 
299 	case 'C':		/* +context[=lines] */
300 	case 'U':		/* +unified[=lines] */
301 	  if (optarg)
302 	    {
303 	      if (context >= 0)
304 		fatal ("context length specified twice");
305 
306 	      if (ck_atoi (optarg, &context))
307 		fatal ("invalid context length argument");
308 	    }
309 
310 	  /* Falls through.  */
311 	case 'c':
312 	  /* Make context-style output.  */
313 	  specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT);
314 	  break;
315 
316 	case 'd':
317 	  /* Don't discard lines.  This makes things slower (sometimes much
318 	     slower) but will find a guaranteed minimal set of changes.  */
319 	  no_discards = 1;
320 	  break;
321 
322 	case 'D':
323 	  /* Make merged #ifdef output.  */
324 	  specify_style (OUTPUT_IFDEF);
325 	  {
326 	    int i, err = 0;
327 	    static char const C_ifdef_group_formats[] =
328 	      "#ifndef %s\n%%<#endif /* not %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c%%=%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n";
329 	    char *b = xmalloc (sizeof (C_ifdef_group_formats)
330 			       + 7 * strlen(optarg) - 14 /* 7*"%s" */
331 			       - 8 /* 5*"%%" + 3*"%c" */);
332 	    sprintf (b, C_ifdef_group_formats,
333 		     optarg, optarg, 0,
334 		     optarg, optarg, 0, 0,
335 		     optarg, optarg, optarg);
336 	    for (i = 0; i < 4; i++)
337 	      {
338 		err |= specify_format (&group_format[i], b);
339 		b += strlen (b) + 1;
340 	      }
341 	    if (err)
342 	      diff_error ("conflicting #ifdef formats", 0, 0);
343 	  }
344 	  break;
345 
346 	case 'e':
347 	  /* Make output that is a valid `ed' script.  */
348 	  specify_style (OUTPUT_ED);
349 	  break;
350 
351 	case 'f':
352 	  /* Make output that looks vaguely like an `ed' script
353 	     but has changes in the order they appear in the file.  */
354 	  specify_style (OUTPUT_FORWARD_ED);
355 	  break;
356 
357 	case 'F':
358 	  /* Show, for each set of changes, the previous line that
359 	     matches the specified regexp.  Currently affects only
360 	     context-style output.  */
361 	  add_regexp (&function_regexp_list, optarg);
362 	  break;
363 
364 	case 'h':
365 	  /* Split the files into chunks of around 1500 lines
366 	     for faster processing.  Usually does not change the result.
367 
368 	     This currently has no effect.  */
369 	  break;
370 
371 	case 'H':
372 	  /* Turn on heuristics that speed processing of large files
373 	     with a small density of changes.  */
374 	  heuristic = 1;
375 	  break;
376 
377 	case 'i':
378 	  /* Ignore changes in case.  */
379 	  ignore_case_flag = 1;
380 	  ignore_some_changes = 1;
381 	  ignore_some_line_changes = 1;
382 	  break;
383 
384 	case 'I':
385 	  /* Ignore changes affecting only lines that match the
386 	     specified regexp.  */
387 	  add_regexp (&ignore_regexp_list, optarg);
388 	  ignore_some_changes = 1;
389 	  break;
390 
391 	case 'l':
392 	  /* Pass the output through `pr' to paginate it.  */
393 	  paginate_flag = 1;
394 #if !defined(SIGCHLD) && defined(SIGCLD)
395 #define SIGCHLD SIGCLD
396 #endif
397 #ifdef SIGCHLD
398 	  /* Pagination requires forking and waiting, and
399 	     System V fork+wait does not work if SIGCHLD is ignored.  */
400 	  signal (SIGCHLD, SIG_DFL);
401 #endif
402 	  break;
403 
404 	case 'L':
405 	  /* Specify file labels for `-c' output headers.  */
406 	  if (!file_label[0])
407 	    file_label[0] = optarg;
408 	  else if (!file_label[1])
409 	    file_label[1] = optarg;
410 	  else
411 	    fatal ("too many file label options");
412 	  break;
413 
414 	case 'n':
415 	  /* Output RCS-style diffs, like `-f' except that each command
416 	     specifies the number of lines affected.  */
417 	  specify_style (OUTPUT_RCS);
418 	  break;
419 
420 	case 'N':
421 	  /* When comparing directories, if a file appears only in one
422 	     directory, treat it as present but empty in the other.  */
423 	  entire_new_file_flag = 1;
424 	  break;
425 
426 	case 'p':
427 	  /* Make context-style output and show name of last C function.  */
428 	  show_c_function = 1;
429 	  add_regexp (&function_regexp_list, "^[_a-zA-Z$]");
430 	  break;
431 
432 	case 'P':
433 	  /* When comparing directories, if a file appears only in
434 	     the second directory of the two,
435 	     treat it as present but empty in the other.  */
436 	  unidirectional_new_file_flag = 1;
437 	  break;
438 
439 	case 'q':
440 	  no_details_flag = 1;
441 	  break;
442 
443 	case 'r':
444 	  /* When comparing directories,
445 	     recursively compare any subdirectories found.  */
446 	  recursive = 1;
447 	  break;
448 
449 	case 's':
450 	  /* Print a message if the files are the same.  */
451 	  print_file_same_flag = 1;
452 	  break;
453 
454 	case 'S':
455 	  /* When comparing directories, start with the specified
456 	     file name.  This is used for resuming an aborted comparison.  */
457 	  dir_start_file = optarg;
458 	  break;
459 
460 	case 't':
461 	  /* Expand tabs to spaces in the output so that it preserves
462 	     the alignment of the input files.  */
463 	  tab_expand_flag = 1;
464 	  break;
465 
466 	case 'T':
467 	  /* Use a tab in the output, rather than a space, before the
468 	     text of an input line, so as to keep the proper alignment
469 	     in the input line without changing the characters in it.  */
470 	  tab_align_flag = 1;
471 	  break;
472 
473 	case 'u':
474 	  /* Output the context diff in unidiff format.  */
475 	  specify_style (OUTPUT_UNIFIED);
476 	  break;
477 
478 	case 'v':
479 	  printf ("diff - GNU diffutils version %s\n", diff_version_string);
480 	  return 0;
481 
482 	case 'w':
483 	  /* Ignore horizontal white space when comparing lines.  */
484 	  ignore_all_space_flag = 1;
485 	  ignore_some_changes = 1;
486 	  ignore_some_line_changes = 1;
487 	  break;
488 
489 	case 'x':
490 	  add_exclude (optarg);
491 	  break;
492 
493 	case 'X':
494 	  if (add_exclude_file (optarg) != 0)
495 	    pfatal_with_name (optarg);
496 	  break;
497 
498 	case 'y':
499 	  /* Use side-by-side (sdiff-style) columnar output. */
500 	  specify_style (OUTPUT_SDIFF);
501 	  break;
502 
503 	case 'W':
504 	  /* Set the line width for OUTPUT_SDIFF.  */
505 	  if (ck_atoi (optarg, &width) || width <= 0)
506 	    fatal ("column width must be a positive integer");
507 	  break;
508 
509 	case 129:
510 	  sdiff_left_only = 1;
511 	  break;
512 
513 	case 130:
514 	  sdiff_skip_common_lines = 1;
515 	  break;
516 
517 	case 131:
518 	  /* sdiff-style columns output. */
519 	  specify_style (OUTPUT_SDIFF);
520 	  sdiff_help_sdiff = 1;
521 	  break;
522 
523 	case 132:
524 	case 133:
525 	case 134:
526 	  specify_style (OUTPUT_IFDEF);
527 	  if (specify_format (&line_format[c - 132], optarg) != 0)
528 	    diff_error ("conflicting line format", 0, 0);
529 	  break;
530 
531 	case 135:
532 	  specify_style (OUTPUT_IFDEF);
533 	  {
534 	    int i, err = 0;
535 	    for (i = 0; i < sizeof (line_format) / sizeof (*line_format); i++)
536 	      err |= specify_format (&line_format[i], optarg);
537 	    if (err)
538 	      diff_error ("conflicting line format", 0, 0);
539 	  }
540 	  break;
541 
542 	case 136:
543 	case 137:
544 	case 138:
545 	case 139:
546 	  specify_style (OUTPUT_IFDEF);
547 	  if (specify_format (&group_format[c - 136], optarg) != 0)
548 	    diff_error ("conflicting group format", 0, 0);
549 	  break;
550 
551 	case 140:
552 	  if (ck_atoi (optarg, &horizon_lines) || horizon_lines < 0)
553 	    fatal ("horizon must be a nonnegative integer");
554 	  break;
555 
556 	case 141:
557 	  usage ();
558 	  check_output (stdout);
559 	  return 0;
560 
561 	case 142:
562 	  /* Use binary I/O when reading and writing data.
563 	     On Posix hosts, this has no effect.  */
564 #if HAVE_SETMODE
565 	  binary_I_O = 1;
566 #  if 0
567 	  /* Because this code is leftover from pre-library days,
568 	     there is no way to set stdout back to the default mode
569 	     when we are done.  As it turns out, I think the only
570 	     parts of CVS that pass out == NULL, and thus cause diff
571 	     to write to stdout, are "cvs diff" and "cvs rdiff".  So
572 	     I'm not going to worry about this too much yet.  */
573 	  setmode (STDOUT_FILENO, O_BINARY);
574 #  else
575 	  if (out == NULL)
576 	    error (0, 0, "warning: did not set stdout to binary mode");
577 #  endif
578 #endif
579 	  break;
580 
581 	default:
582 	  return try_help (0);
583 	}
584       prev = c;
585     }
586 
587   if (argc - optind != 2)
588     return try_help (argc - optind < 2 ? "missing operand" : "extra operand");
589 
590   {
591     /*
592      *	We maximize first the half line width, and then the gutter width,
593      *	according to the following constraints:
594      *	1.  Two half lines plus a gutter must fit in a line.
595      *	2.  If the half line width is nonzero:
596      *	    a.  The gutter width is at least GUTTER_WIDTH_MINIMUM.
597      *	    b.  If tabs are not expanded to spaces,
598      *		a half line plus a gutter is an integral number of tabs,
599      *		so that tabs in the right column line up.
600      */
601     int t = tab_expand_flag ? 1 : TAB_WIDTH;
602     int off = (width + t + GUTTER_WIDTH_MINIMUM) / (2*t)  *  t;
603     sdiff_half_width = max (0, min (off - GUTTER_WIDTH_MINIMUM, width - off)),
604     sdiff_column2_offset = sdiff_half_width ? off : width;
605   }
606 
607   if (show_c_function && output_style != OUTPUT_UNIFIED)
608     specify_style (OUTPUT_CONTEXT);
609 
610   if (output_style != OUTPUT_CONTEXT && output_style != OUTPUT_UNIFIED)
611     context = 0;
612   else if (context == -1)
613     /* Default amount of context for -c.  */
614     context = 3;
615 
616   if (output_style == OUTPUT_IFDEF)
617     {
618       /* Format arrays are char *, not char const *,
619 	 because integer formats are temporarily modified.
620 	 But it is safe to assign a constant like "%=" to a format array,
621 	 since "%=" does not format any integers.  */
622       int i;
623       for (i = 0; i < sizeof (line_format) / sizeof (*line_format); i++)
624 	if (!line_format[i])
625 	  line_format[i] = "%l\n";
626       if (!group_format[OLD])
627 	group_format[OLD]
628 	  = group_format[UNCHANGED] ? group_format[UNCHANGED] : "%<";
629       if (!group_format[NEW])
630 	group_format[NEW]
631 	  = group_format[UNCHANGED] ? group_format[UNCHANGED] : "%>";
632       if (!group_format[UNCHANGED])
633 	group_format[UNCHANGED] = "%=";
634       if (!group_format[CHANGED])
635 	group_format[CHANGED] = concat (group_format[OLD],
636 					group_format[NEW], "");
637     }
638 
639   no_diff_means_no_output =
640     (output_style == OUTPUT_IFDEF ?
641       (!*group_format[UNCHANGED]
642        || (strcmp (group_format[UNCHANGED], "%=") == 0
643 	   && !*line_format[UNCHANGED]))
644      : output_style == OUTPUT_SDIFF ? sdiff_skip_common_lines : 1);
645 
646   switch_string = option_list (argv + 1, optind - 1);
647 
648   if (out == NULL)
649     outfile = stdout;
650   else
651     {
652 #if HAVE_SETMODE
653       /* A diff which is full of ^Z and such isn't going to work
654          very well in text mode.  */
655       if (binary_I_O)
656 	outfile = fopen (out, "wb");
657       else
658 #endif
659       outfile = fopen (out, "w");
660       if (outfile == NULL)
661         {
662 	  perror_with_name ("could not open output file");
663 	  return 2;
664         }
665     }
666 
667   /* Set the jump buffer, so that diff may abort execution without
668      terminating the process. */
669   if ((val = setjmp (diff_abort_buf)) != 0)
670     {
671       optind = optind_old;
672       if (outfile != stdout)
673 	fclose (outfile);
674       return val;
675     }
676 
677   val = compare_files (0, argv[optind], 0, argv[optind + 1], 0);
678 
679   /* Print any messages that were saved up for last.  */
680   print_message_queue ();
681 
682   free (switch_string);
683 
684   optind = optind_old;
685   check_output (outfile);
686   if (outfile != stdout)
687     if (fclose (outfile) != 0)
688 	perror ("close error on output file");
689   return val;
690 }
691 
692 /* Add the compiled form of regexp PATTERN to REGLIST.  */
693 
694 static void
695 add_regexp (reglist, pattern)
696      struct regexp_list **reglist;
697      char const *pattern;
698 {
699   struct regexp_list *r;
700   char const *m;
701 
702   r = (struct regexp_list *) xmalloc (sizeof (*r));
703   bzero (r, sizeof (*r));
704   r->buf.fastmap = xmalloc (256);
705   m = re_compile_pattern (pattern, strlen (pattern), &r->buf);
706   if (m != 0)
707     diff_error ("%s: %s", pattern, m);
708 
709   /* Add to the start of the list, since it's easier than the end.  */
710   r->next = *reglist;
711   *reglist = r;
712 }
713 
714 static int
715 try_help (reason)
716      char const *reason;
717 {
718   if (reason)
719     diff_error ("%s", reason, 0);
720   diff_error ("Try `%s --help' for more information.", diff_program_name, 0);
721   return 2;
722 }
723 
724 static void
725 check_output (file)
726     FILE *file;
727 {
728   if (ferror (file) || fflush (file) != 0)
729     fatal ("write error");
730 }
731 
732 static char const * const option_help[] = {
733 "-i  --ignore-case  Consider upper- and lower-case to be the same.",
734 "-w  --ignore-all-space  Ignore all white space.",
735 "-b  --ignore-space-change  Ignore changes in the amount of white space.",
736 "-B  --ignore-blank-lines  Ignore changes whose lines are all blank.",
737 "-I RE  --ignore-matching-lines=RE  Ignore changes whose lines all match RE.",
738 #if HAVE_SETMODE
739 "--binary  Read and write data in binary mode.",
740 #endif
741 "-a  --text  Treat all files as text.\n",
742 "-c  -C NUM  --context[=NUM]  Output NUM (default 2) lines of copied context.",
743 "-u  -U NUM  --unified[=NUM]  Output NUM (default 2) lines of unified context.",
744 "  -NUM  Use NUM context lines.",
745 "  -L LABEL  --label LABEL  Use LABEL instead of file name.",
746 "  -p  --show-c-function  Show which C function each change is in.",
747 "  -F RE  --show-function-line=RE  Show the most recent line matching RE.",
748 "-q  --brief  Output only whether files differ.",
749 "-e  --ed  Output an ed script.",
750 "-n  --rcs  Output an RCS format diff.",
751 "-y  --side-by-side  Output in two columns.",
752 "  -w NUM  --width=NUM  Output at most NUM (default 130) characters per line.",
753 "  --left-column  Output only the left column of common lines.",
754 "  --suppress-common-lines  Do not output common lines.",
755 "-DNAME  --ifdef=NAME  Output merged file to show `#ifdef NAME' diffs.",
756 "--GTYPE-group-format=GFMT  Similar, but format GTYPE input groups with GFMT.",
757 "--line-format=LFMT  Similar, but format all input lines with LFMT.",
758 "--LTYPE-line-format=LFMT  Similar, but format LTYPE input lines with LFMT.",
759 "  LTYPE is `old', `new', or `unchanged'.  GTYPE is LTYPE or `changed'.",
760 "  GFMT may contain:",
761 "    %<  lines from FILE1",
762 "    %>  lines from FILE2",
763 "    %=  lines common to FILE1 and FILE2",
764 "    %[-][WIDTH][.[PREC]]{doxX}LETTER  printf-style spec for LETTER",
765 "      LETTERs are as follows for new group, lower case for old group:",
766 "        F  first line number",
767 "        L  last line number",
768 "        N  number of lines = L-F+1",
769 "        E  F-1",
770 "        M  L+1",
771 "  LFMT may contain:",
772 "    %L  contents of line",
773 "    %l  contents of line, excluding any trailing newline",
774 "    %[-][WIDTH][.[PREC]]{doxX}n  printf-style spec for input line number",
775 "  Either GFMT or LFMT may contain:",
776 "    %%  %",
777 "    %c'C'  the single character C",
778 "    %c'\\OOO'  the character with octal code OOO\n",
779 "-l  --paginate  Pass the output through `pr' to paginate it.",
780 "-t  --expand-tabs  Expand tabs to spaces in output.",
781 "-T  --initial-tab  Make tabs line up by prepending a tab.\n",
782 "-r  --recursive  Recursively compare any subdirectories found.",
783 "-N  --new-file  Treat absent files as empty.",
784 "-P  --unidirectional-new-file  Treat absent first files as empty.",
785 "-s  --report-identical-files  Report when two files are the same.",
786 "-x PAT  --exclude=PAT  Exclude files that match PAT.",
787 "-X FILE  --exclude-from=FILE  Exclude files that match any pattern in FILE.",
788 "-S FILE  --starting-file=FILE  Start with FILE when comparing directories.\n",
789 "--horizon-lines=NUM  Keep NUM lines of the common prefix and suffix.",
790 "-d  --minimal  Try hard to find a smaller set of changes.",
791 "-H  --speed-large-files  Assume large files and many scattered small changes.\n",
792 "-v  --version  Output version info.",
793 "--help  Output this help.",
794 0
795 };
796 
797 static void
798 usage ()
799 {
800   char const * const *p;
801 
802   printf ("Usage: %s [OPTION]... FILE1 FILE2\n\n", diff_program_name);
803   for (p = option_help;  *p;  p++)
804     printf ("  %s\n", *p);
805   printf ("\nIf FILE1 or FILE2 is `-', read standard input.\n");
806 }
807 
808 static int
809 specify_format (var, value)
810      char **var;
811      char *value;
812 {
813   int err = *var ? strcmp (*var, value) : 0;
814   *var = value;
815   return err;
816 }
817 
818 static void
819 specify_style (style)
820      enum output_style style;
821 {
822   if (output_style != OUTPUT_NORMAL
823       && output_style != style)
824     diff_error ("conflicting specifications of output style", 0, 0);
825   output_style = style;
826 }
827 
828 static char const *
829 filetype (st)
830      struct stat const *st;
831 {
832   /* See Posix.2 section 4.17.6.1.1 and Table 5-1 for these formats.
833      To keep diagnostics grammatical, the returned string must start
834      with a consonant.  */
835 
836   if (S_ISREG (st->st_mode))
837     {
838       if (st->st_size == 0)
839 	return "regular empty file";
840       /* Posix.2 section 5.14.2 seems to suggest that we must read the file
841 	 and guess whether it's C, Fortran, etc., but this is somewhat useless
842 	 and doesn't reflect historical practice.  We're allowed to guess
843 	 wrong, so we don't bother to read the file.  */
844       return "regular file";
845     }
846   if (S_ISDIR (st->st_mode)) return "directory";
847 
848   /* other Posix.1 file types */
849 #ifdef S_ISBLK
850   if (S_ISBLK (st->st_mode)) return "block special file";
851 #endif
852 #ifdef S_ISCHR
853   if (S_ISCHR (st->st_mode)) return "character special file";
854 #endif
855 #ifdef S_ISFIFO
856   if (S_ISFIFO (st->st_mode)) return "fifo";
857 #endif
858 
859   /* other Posix.1b file types */
860 #ifdef S_TYPEISMQ
861   if (S_TYPEISMQ (st)) return "message queue";
862 #endif
863 #ifdef S_TYPEISSEM
864   if (S_TYPEISSEM (st)) return "semaphore";
865 #endif
866 #ifdef S_TYPEISSHM
867   if (S_TYPEISSHM (st)) return "shared memory object";
868 #endif
869 
870   /* other popular file types */
871   /* S_ISLNK is impossible with `fstat' and `stat'.  */
872 #ifdef S_ISSOCK
873   if (S_ISSOCK (st->st_mode)) return "socket";
874 #endif
875 
876   return "weird file";
877 }
878 
879 /* Compare two files (or dirs) with specified names
880    DIR0/NAME0 and DIR1/NAME1, at level DEPTH in directory recursion.
881    (if DIR0 is 0, then the name is just NAME0, etc.)
882    This is self-contained; it opens the files and closes them.
883 
884    Value is 0 if files are the same, 1 if different,
885    2 if there is a problem opening them.  */
886 
887 static int
888 compare_files (dir0, name0, dir1, name1, depth)
889      char const *dir0, *dir1;
890      char const *name0, *name1;
891      int depth;
892 {
893   struct file_data inf[2];
894   register int i;
895   int val;
896   int same_files;
897   int failed = 0;
898   char *free0 = 0, *free1 = 0;
899 
900   /* If this is directory comparison, perhaps we have a file
901      that exists only in one of the directories.
902      If so, just print a message to that effect.  */
903 
904   if (! ((name0 != 0 && name1 != 0)
905 	 || (unidirectional_new_file_flag && name1 != 0)
906 	 || entire_new_file_flag))
907     {
908       char const *name = name0 == 0 ? name1 : name0;
909       char const *dir = name0 == 0 ? dir1 : dir0;
910       message ("Only in %s: %s\n", dir, name);
911       /* Return 1 so that diff_dirs will return 1 ("some files differ").  */
912       return 1;
913     }
914 
915   bzero (inf, sizeof (inf));
916 
917   /* Mark any nonexistent file with -1 in the desc field.  */
918   /* Mark unopened files (e.g. directories) with -2. */
919 
920   inf[0].desc = name0 == 0 ? -1 : -2;
921   inf[1].desc = name1 == 0 ? -1 : -2;
922 
923   /* Now record the full name of each file, including nonexistent ones.  */
924 
925   if (name0 == 0)
926     name0 = name1;
927   if (name1 == 0)
928     name1 = name0;
929 
930   inf[0].name = dir0 == 0 ? name0 : (free0 = dir_file_pathname (dir0, name0));
931   inf[1].name = dir1 == 0 ? name1 : (free1 = dir_file_pathname (dir1, name1));
932 
933   /* Stat the files.  Record whether they are directories.  */
934 
935   for (i = 0; i <= 1; i++)
936     {
937       if (inf[i].desc != -1)
938 	{
939 	  int stat_result;
940 
941 	  if (i && filename_cmp (inf[i].name, inf[0].name) == 0)
942 	    {
943 	      inf[i].stat = inf[0].stat;
944 	      stat_result = 0;
945 	    }
946 	  else if (strcmp (inf[i].name, "-") == 0)
947 	    {
948 	      inf[i].desc = STDIN_FILENO;
949 	      stat_result = fstat (STDIN_FILENO, &inf[i].stat);
950 	      if (stat_result == 0 && S_ISREG (inf[i].stat.st_mode))
951 		{
952 		  off_t pos = lseek (STDIN_FILENO, (off_t) 0, SEEK_CUR);
953 		  if (pos == -1)
954 		    stat_result = -1;
955 		  else
956 		    {
957 		      if (pos <= inf[i].stat.st_size)
958 			inf[i].stat.st_size -= pos;
959 		      else
960 			inf[i].stat.st_size = 0;
961 		      /* Posix.2 4.17.6.1.4 requires current time for stdin.  */
962 		      time (&inf[i].stat.st_mtime);
963 		    }
964 		}
965 	    }
966 	  else
967 	    stat_result = stat (inf[i].name, &inf[i].stat);
968 
969 	  if (stat_result != 0)
970 	    {
971 	      perror_with_name (inf[i].name);
972 	      failed = 1;
973 	    }
974 	  else
975 	    {
976 	      inf[i].dir_p = S_ISDIR (inf[i].stat.st_mode) && inf[i].desc != 0;
977 	      if (inf[1 - i].desc == -1)
978 		{
979 		  inf[1 - i].dir_p = inf[i].dir_p;
980 		  inf[1 - i].stat.st_mode = inf[i].stat.st_mode;
981 		}
982 	    }
983 	}
984     }
985 
986   if (! failed && depth == 0 && inf[0].dir_p != inf[1].dir_p)
987     {
988       /* If one is a directory, and it was specified in the command line,
989 	 use the file in that dir with the other file's basename.  */
990 
991       int fnm_arg = inf[0].dir_p;
992       int dir_arg = 1 - fnm_arg;
993       char const *fnm = inf[fnm_arg].name;
994       char const *dir = inf[dir_arg].name;
995       char const *p = filename_lastdirchar (fnm);
996       char const *filename = inf[dir_arg].name
997 	= dir_file_pathname (dir, p ? p + 1 : fnm);
998 
999       if (strcmp (fnm, "-") == 0)
1000 	fatal ("can't compare - to a directory");
1001 
1002       if (stat (filename, &inf[dir_arg].stat) != 0)
1003 	{
1004 	  perror_with_name (filename);
1005 	  failed = 1;
1006 	}
1007       else
1008 	inf[dir_arg].dir_p = S_ISDIR (inf[dir_arg].stat.st_mode);
1009     }
1010 
1011   if (failed)
1012     {
1013 
1014       /* If either file should exist but does not, return 2.  */
1015 
1016       val = 2;
1017 
1018     }
1019   else if ((same_files = inf[0].desc != -1 && inf[1].desc != -1
1020 			 && 0 < same_file (&inf[0].stat, &inf[1].stat))
1021 	   && no_diff_means_no_output)
1022     {
1023       /* The two named files are actually the same physical file.
1024 	 We know they are identical without actually reading them.  */
1025 
1026       val = 0;
1027     }
1028   else if (inf[0].dir_p & inf[1].dir_p)
1029     {
1030       if (output_style == OUTPUT_IFDEF)
1031 	fatal ("-D option not supported with directories");
1032 
1033       /* If both are directories, compare the files in them.  */
1034 
1035       if (depth > 0 && !recursive)
1036 	{
1037 	  /* But don't compare dir contents one level down
1038 	     unless -r was specified.  */
1039 	  message ("Common subdirectories: %s and %s\n",
1040 		   inf[0].name, inf[1].name);
1041 	  val = 0;
1042 	}
1043       else
1044 	{
1045 	  val = diff_dirs (inf, compare_files, depth);
1046 	}
1047 
1048     }
1049   else if ((inf[0].dir_p | inf[1].dir_p)
1050 	   || (depth > 0
1051 	       && (! S_ISREG (inf[0].stat.st_mode)
1052 		   || ! S_ISREG (inf[1].stat.st_mode))))
1053     {
1054       /* Perhaps we have a subdirectory that exists only in one directory.
1055 	 If so, just print a message to that effect.  */
1056 
1057       if (inf[0].desc == -1 || inf[1].desc == -1)
1058 	{
1059 	  if ((inf[0].dir_p | inf[1].dir_p)
1060 	      && recursive
1061 	      && (entire_new_file_flag
1062 		  || (unidirectional_new_file_flag && inf[0].desc == -1)))
1063 	    val = diff_dirs (inf, compare_files, depth);
1064 	  else
1065 	    {
1066 	      char const *dir = (inf[0].desc == -1) ? dir1 : dir0;
1067 	      /* See Posix.2 section 4.17.6.1.1 for this format.  */
1068 	      message ("Only in %s: %s\n", dir, name0);
1069 	      val = 1;
1070 	    }
1071 	}
1072       else
1073 	{
1074 	  /* We have two files that are not to be compared.  */
1075 
1076 	  /* See Posix.2 section 4.17.6.1.1 for this format.  */
1077 	  message5 ("File %s is a %s while file %s is a %s\n",
1078 		    inf[0].name, filetype (&inf[0].stat),
1079 		    inf[1].name, filetype (&inf[1].stat));
1080 
1081 	  /* This is a difference.  */
1082 	  val = 1;
1083 	}
1084     }
1085   else if ((no_details_flag & ~ignore_some_changes)
1086 	   && inf[0].stat.st_size != inf[1].stat.st_size
1087 	   && (inf[0].desc == -1 || S_ISREG (inf[0].stat.st_mode))
1088 	   && (inf[1].desc == -1 || S_ISREG (inf[1].stat.st_mode)))
1089     {
1090       message ("Files %s and %s differ\n", inf[0].name, inf[1].name);
1091       val = 1;
1092     }
1093   else
1094     {
1095       /* Both exist and neither is a directory.  */
1096 
1097       /* Open the files and record their descriptors.  */
1098 
1099       if (inf[0].desc == -2)
1100 	if ((inf[0].desc = open (inf[0].name, O_RDONLY, 0)) < 0)
1101 	  {
1102 	    perror_with_name (inf[0].name);
1103 	    failed = 1;
1104 	  }
1105       if (inf[1].desc == -2)
1106 	if (same_files)
1107 	  inf[1].desc = inf[0].desc;
1108 	else if ((inf[1].desc = open (inf[1].name, O_RDONLY, 0)) < 0)
1109 	  {
1110 	    perror_with_name (inf[1].name);
1111 	    failed = 1;
1112 	  }
1113 
1114 #if HAVE_SETMODE
1115       if (binary_I_O)
1116 	for (i = 0; i <= 1; i++)
1117 	  if (0 <= inf[i].desc)
1118 	    setmode (inf[i].desc, O_BINARY);
1119 #endif
1120 
1121       /* Compare the files, if no error was found.  */
1122 
1123       val = failed ? 2 : diff_2_files (inf, depth);
1124 
1125       /* Close the file descriptors.  */
1126 
1127       if (inf[0].desc >= 0 && close (inf[0].desc) != 0)
1128 	{
1129 	  perror_with_name (inf[0].name);
1130 	  val = 2;
1131 	}
1132       if (inf[1].desc >= 0 && inf[0].desc != inf[1].desc
1133 	  && close (inf[1].desc) != 0)
1134 	{
1135 	  perror_with_name (inf[1].name);
1136 	  val = 2;
1137 	}
1138     }
1139 
1140   /* Now the comparison has been done, if no error prevented it,
1141      and VAL is the value this function will return.  */
1142 
1143   if (val == 0 && !inf[0].dir_p)
1144     {
1145       if (print_file_same_flag)
1146 	message ("Files %s and %s are identical\n",
1147 		 inf[0].name, inf[1].name);
1148     }
1149   else
1150     fflush (outfile);
1151 
1152   if (free0)
1153     free (free0);
1154   if (free1)
1155     free (free1);
1156 
1157   return val;
1158 }
1159 
1160 /* Initialize status variables and flag variables used in libdiff,
1161    to permit repeated calls to diff_run. */
1162 
1163 static void
1164 initialize_main (argcp, argvp)
1165     int *argcp;
1166     char ***argvp;
1167 {
1168   /* These variables really must be reset each time diff_run is called. */
1169   output_style = OUTPUT_NORMAL;
1170   context = -1;
1171   file_label[0] = NULL;
1172   file_label[1] = NULL;
1173   diff_program_name = (*argvp)[0];
1174   outfile = NULL;
1175 
1176   /* Reset these also, just for safety's sake. (If one invocation turns
1177      on ignore_case_flag, it must be turned off before diff_run is called
1178      again.  But it is possible to make many diffs before encountering
1179      such a problem. */
1180   recursive = 0;
1181   no_discards = 0;
1182 #if HAVE_SETMODE
1183   binary_I_O = 0;
1184 #endif
1185   no_diff_means_no_output = 0;
1186   always_text_flag = 0;
1187   horizon_lines = 0;
1188   ignore_space_change_flag = 0;
1189   ignore_all_space_flag = 0;
1190   ignore_blank_lines_flag = 0;
1191   ignore_some_line_changes = 0;
1192   ignore_some_changes = 0;
1193   ignore_case_flag = 0;
1194   function_regexp_list = NULL;
1195   ignore_regexp_list = NULL;
1196   no_details_flag = 0;
1197   print_file_same_flag = 0;
1198   tab_align_flag = 0;
1199   tab_expand_flag = 0;
1200   dir_start_file = NULL;
1201   entire_new_file_flag = 0;
1202   unidirectional_new_file_flag = 0;
1203   paginate_flag = 0;
1204   bzero (group_format, sizeof (group_format));
1205   bzero (line_format, sizeof (line_format));
1206   sdiff_help_sdiff = 0;
1207   sdiff_left_only = 0;
1208   sdiff_skip_common_lines = 0;
1209   sdiff_half_width = 0;
1210   sdiff_column2_offset = 0;
1211   switch_string = NULL;
1212   heuristic = 0;
1213   bzero (files, sizeof (files));
1214 }
1215