xref: /plan9/sys/src/ape/cmd/diff/diff.c (revision 0b459c2cb92b7c9d88818e9a2f72e678e5bc4553)
1 /* GNU DIFF main routine.
2    Copyright (C) 1988, 1989, 1992, 1993, 1994 Free Software Foundation, Inc.
3 
4 This file is part of GNU DIFF.
5 
6 GNU DIFF is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10 
11 GNU DIFF is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GNU DIFF; see the file COPYING.  If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
19 
20 /* GNU DIFF was written by Mike Haertel, David Hayes,
21    Richard Stallman, Len Tower, and Paul Eggert.  */
22 
23 /* $FreeBSD: src/contrib/diff/diff.c,v 1.3 1999/11/26 02:51:44 obrien Exp $ */
24 
25 #define GDIFF_MAIN
26 #include "diff.h"
27 #include <signal.h>
28 #include "getopt.h"
29 #ifdef __FreeBSD__
30 #include <locale.h>
31 #include <fnmatch.h>
32 #else
33 #include "fnmatch.h"
34 #endif
35 #include "prepend_args.h"
36 
37 #ifndef DEFAULT_WIDTH
38 #define DEFAULT_WIDTH 130
39 #endif
40 
41 #ifndef GUTTER_WIDTH_MINIMUM
42 #define GUTTER_WIDTH_MINIMUM 3
43 #endif
44 
45 static char const *filetype PARAMS((struct stat const *));
46 static char *option_list PARAMS((char **, int));
47 static int add_exclude_file PARAMS((char const *));
48 static int ck_atoi PARAMS((char const *, int *));
49 static int compare_files PARAMS((char const *, char const *, char const *, char const *, int));
50 static int specify_format PARAMS((char **, char *));
51 static void add_exclude PARAMS((char const *));
52 static void add_regexp PARAMS((struct regexp_list **, char const *));
53 static void specify_style PARAMS((enum output_style));
54 static void try_help PARAMS((char const *));
55 static void check_stdout PARAMS((void));
56 static void usage PARAMS((void));
57 
58 /* Nonzero for -r: if comparing two directories,
59    compare their common subdirectories recursively.  */
60 
61 static int recursive;
62 
63 /* For debugging: don't do discard_confusing_lines.  */
64 
65 int no_discards;
66 
67 #if HAVE_SETMODE
68 /* I/O mode: nonzero only if using binary input/output.  */
69 static int binary_I_O;
70 #endif
71 
72 /* Return a string containing the command options with which diff was invoked.
73    Spaces appear between what were separate ARGV-elements.
74    There is a space at the beginning but none at the end.
75    If there were no options, the result is an empty string.
76 
77    Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT,
78    the length of that vector.  */
79 
80 static char *
option_list(optionvec,count)81 option_list (optionvec, count)
82      char **optionvec;  /* Was `vector', but that collides on Alliant.  */
83      int count;
84 {
85   int i;
86   size_t length = 0;
87   char *result;
88 
89   for (i = 0; i < count; i++)
90     length += strlen (optionvec[i]) + 1;
91 
92   result = xmalloc (length + 1);
93   result[0] = 0;
94 
95   for (i = 0; i < count; i++)
96     {
97       strcat (result, " ");
98       strcat (result, optionvec[i]);
99     }
100 
101   return result;
102 }
103 
104 /* Convert STR to a positive integer, storing the result in *OUT.
105    If STR is not a valid integer, return -1 (otherwise 0). */
106 static int
ck_atoi(str,out)107 ck_atoi (str, out)
108      char const *str;
109      int *out;
110 {
111   char const *p;
112   for (p = str; *p; p++)
113     if (*p < '0' || *p > '9')
114       return -1;
115 
116   *out = atoi (optarg);
117   return 0;
118 }
119 
120 /* Keep track of excluded file name patterns.  */
121 
122 static char const **exclude;
123 static int exclude_alloc, exclude_count;
124 
125 int
excluded_filename(f)126 excluded_filename (f)
127      char const *f;
128 {
129   int i;
130   for (i = 0;  i < exclude_count;  i++)
131     if (fnmatch (exclude[i], f, 0) == 0)
132       return 1;
133   return 0;
134 }
135 
136 static void
add_exclude(pattern)137 add_exclude (pattern)
138      char const *pattern;
139 {
140   if (exclude_alloc <= exclude_count)
141     exclude = (char const **)
142 	      (exclude_alloc == 0
143 	       ? xmalloc ((exclude_alloc = 64) * sizeof (*exclude))
144 	       : xrealloc (exclude, (exclude_alloc *= 2) * sizeof (*exclude)));
145 
146   exclude[exclude_count++] = pattern;
147 }
148 
149 static int
add_exclude_file(name)150 add_exclude_file (name)
151      char const *name;
152 {
153   struct file_data f;
154   char *p, *q, *lim;
155 
156   f.name = optarg;
157   f.desc = (strcmp (name, "-") == 0
158 	    ? STDIN_FILENO
159 	    : open (name, O_RDONLY, 0));
160   if (f.desc < 0 || fstat (f.desc, &f.stat) != 0)
161     return -1;
162 
163   sip (&f, 1);
164   slurp (&f);
165 
166   for (p = f.buffer, lim = p + f.buffered_chars;  p < lim;  p = q)
167     {
168       q = (char *) memchr (p, '\n', lim - p);
169       if (!q)
170 	q = lim;
171       *q++ = 0;
172       add_exclude (p);
173     }
174 
175   return close (f.desc);
176 }
177 
178 /* The numbers 129- that appear in the fourth element of some entries
179    tell the big switch in `main' how to process those options.  */
180 
181 static struct option const longopts[] =
182 {
183   {"ignore-blank-lines", 0, 0, 'B'},
184   {"context", 2, 0, 'C'},
185   {"ifdef", 1, 0, 'D'},
186   {"show-function-line", 1, 0, 'F'},
187   {"speed-large-files", 0, 0, 'H'},
188   {"ignore-matching-lines", 1, 0, 'I'},
189   {"label", 1, 0, 'L'},
190   {"file-label", 1, 0, 'L'},	/* An alias, no longer recommended */
191   {"new-file", 0, 0, 'N'},
192   {"entire-new-file", 0, 0, 'N'},	/* An alias, no longer recommended */
193   {"unidirectional-new-file", 0, 0, 'P'},
194   {"starting-file", 1, 0, 'S'},
195   {"initial-tab", 0, 0, 'T'},
196   {"width", 1, 0, 'W'},
197   {"text", 0, 0, 'a'},
198   {"ascii", 0, 0, 'a'},		/* An alias, no longer recommended */
199   {"ignore-space-change", 0, 0, 'b'},
200   {"minimal", 0, 0, 'd'},
201   {"ed", 0, 0, 'e'},
202   {"forward-ed", 0, 0, 'f'},
203   {"ignore-case", 0, 0, 'i'},
204   {"paginate", 0, 0, 'l'},
205   {"print", 0, 0, 'l'},		/* An alias, no longer recommended */
206   {"rcs", 0, 0, 'n'},
207   {"show-c-function", 0, 0, 'p'},
208   {"brief", 0, 0, 'q'},
209   {"recursive", 0, 0, 'r'},
210   {"report-identical-files", 0, 0, 's'},
211   {"expand-tabs", 0, 0, 't'},
212   {"version", 0, 0, 'v'},
213   {"ignore-all-space", 0, 0, 'w'},
214   {"exclude", 1, 0, 'x'},
215   {"exclude-from", 1, 0, 'X'},
216   {"side-by-side", 0, 0, 'y'},
217   {"unified", 2, 0, 'U'},
218   {"left-column", 0, 0, 129},
219   {"suppress-common-lines", 0, 0, 130},
220   {"sdiff-merge-assist", 0, 0, 131},
221   {"old-line-format", 1, 0, 132},
222   {"new-line-format", 1, 0, 133},
223   {"unchanged-line-format", 1, 0, 134},
224   {"line-format", 1, 0, 135},
225   {"old-group-format", 1, 0, 136},
226   {"new-group-format", 1, 0, 137},
227   {"unchanged-group-format", 1, 0, 138},
228   {"changed-group-format", 1, 0, 139},
229   {"horizon-lines", 1, 0, 140},
230   {"help", 0, 0, 141},
231   {"binary", 0, 0, 142},
232   {0, 0, 0, 0}
233 };
234 
235 int
main(argc,argv)236 main (argc, argv)
237      int argc;
238      char *argv[];
239 {
240   int val;
241   int c;
242   int prev = -1;
243   int width = DEFAULT_WIDTH;
244   int show_c_function = 0;
245 
246 #ifdef __FreeBSD__
247   setlocale(LC_ALL, "");
248 #endif
249   /* Do our initializations.  */
250   initialize_main (&argc, &argv);
251   program_name = argv[0];
252   output_style = OUTPUT_NORMAL;
253   context = -1;
254 
255   prepend_default_options (getenv ("DIFF_OPTIONS"), &argc, &argv);
256 
257   /* Decode the options.  */
258 
259   while ((c = getopt_long (argc, argv,
260 			   "0123456789abBcC:dD:efF:hHiI:lL:nNopPqrsS:tTuU:vwW:x:X:y",
261 			   longopts, 0)) != EOF)
262     {
263       switch (c)
264 	{
265 	  /* All digits combine in decimal to specify the context-size.  */
266 	case '1':
267 	case '2':
268 	case '3':
269 	case '4':
270 	case '5':
271 	case '6':
272 	case '7':
273 	case '8':
274 	case '9':
275 	case '0':
276 	  if (context == -1)
277 	    context = 0;
278 	  /* If a context length has already been specified,
279 	     more digits allowed only if they follow right after the others.
280 	     Reject two separate runs of digits, or digits after -C.  */
281 	  else if (prev < '0' || prev > '9')
282 	    fatal ("context length specified twice");
283 
284 	  context = context * 10 + c - '0';
285 	  break;
286 
287 	case 'a':
288 	  /* Treat all files as text files; never treat as binary.  */
289 	  always_text_flag = 1;
290 	  break;
291 
292 	case 'b':
293 	  /* Ignore changes in amount of white space.  */
294 	  ignore_space_change_flag = 1;
295 	  ignore_some_changes = 1;
296 	  ignore_some_line_changes = 1;
297 	  break;
298 
299 	case 'B':
300 	  /* Ignore changes affecting only blank lines.  */
301 	  ignore_blank_lines_flag = 1;
302 	  ignore_some_changes = 1;
303 	  break;
304 
305 	case 'C':		/* +context[=lines] */
306 	case 'U':		/* +unified[=lines] */
307 	  if (optarg)
308 	    {
309 	      if (context >= 0)
310 		fatal ("context length specified twice");
311 
312 	      if (ck_atoi (optarg, &context))
313 		fatal ("invalid context length argument");
314 	    }
315 
316 	  /* Falls through.  */
317 	case 'c':
318 	  /* Make context-style output.  */
319 	  specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT);
320 	  break;
321 
322 	case 'd':
323 	  /* Don't discard lines.  This makes things slower (sometimes much
324 	     slower) but will find a guaranteed minimal set of changes.  */
325 	  no_discards = 1;
326 	  break;
327 
328 	case 'D':
329 	  /* Make merged #ifdef output.  */
330 	  specify_style (OUTPUT_IFDEF);
331 	  {
332 	    int i, err = 0;
333 	    static char const C_ifdef_group_formats[] =
334 	      "#ifndef %s\n%%<#endif /* not %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c%%=%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n";
335 	    char *b = xmalloc (sizeof (C_ifdef_group_formats)
336 			       + 7 * strlen(optarg) - 14 /* 7*"%s" */
337 			       - 8 /* 5*"%%" + 3*"%c" */);
338 	    sprintf (b, C_ifdef_group_formats,
339 		     optarg, optarg, 0,
340 		     optarg, optarg, 0, 0,
341 		     optarg, optarg, optarg);
342 	    for (i = 0; i < 4; i++)
343 	      {
344 		err |= specify_format (&group_format[i], b);
345 		b += strlen (b) + 1;
346 	      }
347 	    if (err)
348 	      error ("conflicting #ifdef formats", 0, 0);
349 	  }
350 	  break;
351 
352 	case 'e':
353 	  /* Make output that is a valid `ed' script.  */
354 	  specify_style (OUTPUT_ED);
355 	  break;
356 
357 	case 'f':
358 	  /* Make output that looks vaguely like an `ed' script
359 	     but has changes in the order they appear in the file.  */
360 	  specify_style (OUTPUT_FORWARD_ED);
361 	  break;
362 
363 	case 'F':
364 	  /* Show, for each set of changes, the previous line that
365 	     matches the specified regexp.  Currently affects only
366 	     context-style output.  */
367 	  add_regexp (&function_regexp_list, optarg);
368 	  break;
369 
370 	case 'h':
371 	  /* Split the files into chunks of around 1500 lines
372 	     for faster processing.  Usually does not change the result.
373 
374 	     This currently has no effect.  */
375 	  break;
376 
377 	case 'H':
378 	  /* Turn on heuristics that speed processing of large files
379 	     with a small density of changes.  */
380 	  heuristic = 1;
381 	  break;
382 
383 	case 'i':
384 	  /* Ignore changes in case.  */
385 	  ignore_case_flag = 1;
386 	  ignore_some_changes = 1;
387 	  ignore_some_line_changes = 1;
388 	  break;
389 
390 	case 'I':
391 	  /* Ignore changes affecting only lines that match the
392 	     specified regexp.  */
393 	  add_regexp (&ignore_regexp_list, optarg);
394 	  ignore_some_changes = 1;
395 	  break;
396 
397 	case 'l':
398 	  /* Pass the output through `pr' to paginate it.  */
399 	  paginate_flag = 1;
400 #if !defined(SIGCHLD) && defined(SIGCLD)
401 #define SIGCHLD SIGCLD
402 #endif
403 #ifdef SIGCHLD
404 	  /* Pagination requires forking and waiting, and
405 	     System V fork+wait does not work if SIGCHLD is ignored.  */
406 	  signal (SIGCHLD, SIG_DFL);
407 #endif
408 	  break;
409 
410 	case 'L':
411 	  /* Specify file labels for `-c' output headers.  */
412 	  if (!file_label[0])
413 	    file_label[0] = optarg;
414 	  else if (!file_label[1])
415 	    file_label[1] = optarg;
416 	  else
417 	    fatal ("too many file label options");
418 	  break;
419 
420 	case 'n':
421 	  /* Output RCS-style diffs, like `-f' except that each command
422 	     specifies the number of lines affected.  */
423 	  specify_style (OUTPUT_RCS);
424 	  break;
425 
426 	case 'N':
427 	  /* When comparing directories, if a file appears only in one
428 	     directory, treat it as present but empty in the other.  */
429 	  entire_new_file_flag = 1;
430 	  break;
431 
432 	case 'o':
433 	  /* Output in the old tradition style.  */
434 	  specify_style (OUTPUT_NORMAL);
435 	  break;
436 
437 	case 'p':
438 	  /* Make context-style output and show name of last C function.  */
439 	  show_c_function = 1;
440 	  add_regexp (&function_regexp_list, "^[_a-zA-Z$]");
441 	  break;
442 
443 	case 'P':
444 	  /* When comparing directories, if a file appears only in
445 	     the second directory of the two,
446 	     treat it as present but empty in the other.  */
447 	  unidirectional_new_file_flag = 1;
448 	  break;
449 
450 	case 'q':
451 	  no_details_flag = 1;
452 	  break;
453 
454 	case 'r':
455 	  /* When comparing directories,
456 	     recursively compare any subdirectories found.  */
457 	  recursive = 1;
458 	  break;
459 
460 	case 's':
461 	  /* Print a message if the files are the same.  */
462 	  print_file_same_flag = 1;
463 	  break;
464 
465 	case 'S':
466 	  /* When comparing directories, start with the specified
467 	     file name.  This is used for resuming an aborted comparison.  */
468 	  dir_start_file = optarg;
469 	  break;
470 
471 	case 't':
472 	  /* Expand tabs to spaces in the output so that it preserves
473 	     the alignment of the input files.  */
474 	  tab_expand_flag = 1;
475 	  break;
476 
477 	case 'T':
478 	  /* Use a tab in the output, rather than a space, before the
479 	     text of an input line, so as to keep the proper alignment
480 	     in the input line without changing the characters in it.  */
481 	  tab_align_flag = 1;
482 	  break;
483 
484 	case 'u':
485 	  /* Output the context diff in unidiff format.  */
486 	  specify_style (OUTPUT_UNIFIED);
487 	  break;
488 
489 	case 'v':
490 	  printf ("diff - GNU diffutils version %s\n", version_string);
491 	  exit (0);
492 
493 	case 'w':
494 	  /* Ignore horizontal white space when comparing lines.  */
495 	  ignore_all_space_flag = 1;
496 	  ignore_some_changes = 1;
497 	  ignore_some_line_changes = 1;
498 	  break;
499 
500 	case 'x':
501 	  add_exclude (optarg);
502 	  break;
503 
504 	case 'X':
505 	  if (add_exclude_file (optarg) != 0)
506 	    pfatal_with_name (optarg);
507 	  break;
508 
509 	case 'y':
510 	  /* Use side-by-side (sdiff-style) columnar output. */
511 	  specify_style (OUTPUT_SDIFF);
512 	  break;
513 
514 	case 'W':
515 	  /* Set the line width for OUTPUT_SDIFF.  */
516 	  if (ck_atoi (optarg, &width) || width <= 0)
517 	    fatal ("column width must be a positive integer");
518 	  break;
519 
520 	case 129:
521 	  sdiff_left_only = 1;
522 	  break;
523 
524 	case 130:
525 	  sdiff_skip_common_lines = 1;
526 	  break;
527 
528 	case 131:
529 	  /* sdiff-style columns output. */
530 	  specify_style (OUTPUT_SDIFF);
531 	  sdiff_help_sdiff = 1;
532 	  break;
533 
534 	case 132:
535 	case 133:
536 	case 134:
537 	  specify_style (OUTPUT_IFDEF);
538 	  if (specify_format (&line_format[c - 132], optarg) != 0)
539 	    error ("conflicting line format", 0, 0);
540 	  break;
541 
542 	case 135:
543 	  specify_style (OUTPUT_IFDEF);
544 	  {
545 	    int i, err = 0;
546 	    for (i = 0; i < sizeof (line_format) / sizeof (*line_format); i++)
547 	      err |= specify_format (&line_format[i], optarg);
548 	    if (err)
549 	      error ("conflicting line format", 0, 0);
550 	  }
551 	  break;
552 
553 	case 136:
554 	case 137:
555 	case 138:
556 	case 139:
557 	  specify_style (OUTPUT_IFDEF);
558 	  if (specify_format (&group_format[c - 136], optarg) != 0)
559 	    error ("conflicting group format", 0, 0);
560 	  break;
561 
562 	case 140:
563 	  if (ck_atoi (optarg, &horizon_lines) || horizon_lines < 0)
564 	    fatal ("horizon must be a nonnegative integer");
565 	  break;
566 
567 	case 141:
568 	  usage ();
569 	  check_stdout ();
570 	  exit (0);
571 
572 	case 142:
573 	  /* Use binary I/O when reading and writing data.
574 	     On Posix hosts, this has no effect.  */
575 #if HAVE_SETMODE
576 	  binary_I_O = 1;
577 	  setmode (STDOUT_FILENO, O_BINARY);
578 #endif
579 	  break;
580 
581 	default:
582 	  try_help (0);
583 	}
584       prev = c;
585     }
586 
587   if (argc - optind != 2)
588     try_help (argc - optind < 2 ? "missing operand" : "extra operand");
589 
590 
591   {
592     /*
593      *	We maximize first the half line width, and then the gutter width,
594      *	according to the following constraints:
595      *	1.  Two half lines plus a gutter must fit in a line.
596      *	2.  If the half line width is nonzero:
597      *	    a.  The gutter width is at least GUTTER_WIDTH_MINIMUM.
598      *	    b.  If tabs are not expanded to spaces,
599      *		a half line plus a gutter is an integral number of tabs,
600      *		so that tabs in the right column line up.
601      */
602     int t = tab_expand_flag ? 1 : TAB_WIDTH;
603     int off = (width + t + GUTTER_WIDTH_MINIMUM) / (2*t)  *  t;
604     sdiff_half_width = max (0, min (off - GUTTER_WIDTH_MINIMUM, width - off)),
605     sdiff_column2_offset = sdiff_half_width ? off : width;
606   }
607 
608   if (show_c_function && output_style != OUTPUT_UNIFIED)
609     specify_style (OUTPUT_CONTEXT);
610 
611   if (output_style != OUTPUT_CONTEXT && output_style != OUTPUT_UNIFIED)
612     context = 0;
613   else if (context == -1)
614     /* Default amount of context for -c.  */
615     context = 3;
616 
617   if (output_style == OUTPUT_IFDEF)
618     {
619       /* Format arrays are char *, not char const *,
620 	 because integer formats are temporarily modified.
621 	 But it is safe to assign a constant like "%=" to a format array,
622 	 since "%=" does not format any integers.  */
623       int i;
624       for (i = 0; i < sizeof (line_format) / sizeof (*line_format); i++)
625 	if (!line_format[i])
626 	  line_format[i] = "%l\n";
627       if (!group_format[OLD])
628 	group_format[OLD]
629 	  = group_format[UNCHANGED] ? group_format[UNCHANGED] : "%<";
630       if (!group_format[NEW])
631 	group_format[NEW]
632 	  = group_format[UNCHANGED] ? group_format[UNCHANGED] : "%>";
633       if (!group_format[UNCHANGED])
634 	group_format[UNCHANGED] = "%=";
635       if (!group_format[CHANGED])
636 	group_format[CHANGED] = concat (group_format[OLD],
637 					group_format[NEW], "");
638     }
639 
640   no_diff_means_no_output =
641     (output_style == OUTPUT_IFDEF ?
642       (!*group_format[UNCHANGED]
643        || (strcmp (group_format[UNCHANGED], "%=") == 0
644 	   && !*line_format[UNCHANGED]))
645      : output_style == OUTPUT_SDIFF ? sdiff_skip_common_lines : 1);
646 
647   switch_string = option_list (argv + 1, optind - 1);
648 
649   val = compare_files (0, argv[optind], 0, argv[optind + 1], 0);
650 
651   /* Print any messages that were saved up for last.  */
652   print_message_queue ();
653 
654   check_stdout ();
655   exit (val);
656   return val;
657 }
658 
659 /* Add the compiled form of regexp PATTERN to REGLIST.  */
660 
661 static void
add_regexp(reglist,pattern)662 add_regexp (reglist, pattern)
663      struct regexp_list **reglist;
664      char const *pattern;
665 {
666   struct regexp_list *r;
667   char const *m;
668 
669   r = (struct regexp_list *) xmalloc (sizeof (*r));
670   bzero (r, sizeof (*r));
671   r->buf.fastmap = xmalloc (256);
672   m = re_compile_pattern (pattern, strlen (pattern), &r->buf);
673   if (m != 0)
674     error ("%s: %s", pattern, m);
675 
676   /* Add to the start of the list, since it's easier than the end.  */
677   r->next = *reglist;
678   *reglist = r;
679 }
680 
681 static void
try_help(reason)682 try_help (reason)
683      char const *reason;
684 {
685   if (reason)
686     error ("%s", reason, 0);
687   error ("Try `%s --help' for more information.", program_name, 0);
688   exit (2);
689 }
690 
691 static void
check_stdout()692 check_stdout ()
693 {
694   if (ferror (stdout) || fclose (stdout) != 0)
695     fatal ("write error");
696 }
697 
698 static char const * const option_help[] = {
699 "-i  --ignore-case  Consider upper- and lower-case to be the same.",
700 "-w  --ignore-all-space  Ignore all white space.",
701 "-b  --ignore-space-change  Ignore changes in the amount of white space.",
702 "-B  --ignore-blank-lines  Ignore changes whose lines are all blank.",
703 "-I RE  --ignore-matching-lines=RE  Ignore changes whose lines all match RE.",
704 #if HAVE_SETMODE
705 "--binary  Read and write data in binary mode.",
706 #endif
707 "-a  --text  Treat all files as text.\n",
708 "-c  -C NUM  --context[=NUM]  Output NUM (default 2) lines of copied context.",
709 "-u  -U NUM  --unified[=NUM]  Output NUM (default 2) lines of unified context.",
710 "  -NUM  Use NUM context lines.",
711 "  -L LABEL  --label LABEL  Use LABEL instead of file name.",
712 "  -p  --show-c-function  Show which C function each change is in.",
713 "  -F RE  --show-function-line=RE  Show the most recent line matching RE.",
714 "-q  --brief  Output only whether files differ.",
715 "-e  --ed  Output an ed script.",
716 "-n  --rcs  Output an RCS format diff.",
717 "-y  --side-by-side  Output in two columns.",
718 "  -w NUM  --width=NUM  Output at most NUM (default 130) characters per line.",
719 "  --left-column  Output only the left column of common lines.",
720 "  --suppress-common-lines  Do not output common lines.",
721 "-DNAME  --ifdef=NAME  Output merged file to show `#ifdef NAME' diffs.",
722 "--GTYPE-group-format=GFMT  Similar, but format GTYPE input groups with GFMT.",
723 "--line-format=LFMT  Similar, but format all input lines with LFMT.",
724 "--LTYPE-line-format=LFMT  Similar, but format LTYPE input lines with LFMT.",
725 "  LTYPE is `old', `new', or `unchanged'.  GTYPE is LTYPE or `changed'.",
726 "  GFMT may contain:",
727 "    %<  lines from FILE1",
728 "    %>  lines from FILE2",
729 "    %=  lines common to FILE1 and FILE2",
730 "    %[-][WIDTH][.[PREC]]{doxX}LETTER  printf-style spec for LETTER",
731 "      LETTERs are as follows for new group, lower case for old group:",
732 "        F  first line number",
733 "        L  last line number",
734 "        N  number of lines = L-F+1",
735 "        E  F-1",
736 "        M  L+1",
737 "  LFMT may contain:",
738 "    %L  contents of line",
739 "    %l  contents of line, excluding any trailing newline",
740 "    %[-][WIDTH][.[PREC]]{doxX}n  printf-style spec for input line number",
741 "  Either GFMT or LFMT may contain:",
742 "    %%  %",
743 "    %c'C'  the single character C",
744 "    %c'\\OOO'  the character with octal code OOO\n",
745 "-l  --paginate  Pass the output through `pr' to paginate it.",
746 "-t  --expand-tabs  Expand tabs to spaces in output.",
747 "-T  --initial-tab  Make tabs line up by prepending a tab.\n",
748 "-r  --recursive  Recursively compare any subdirectories found.",
749 "-N  --new-file  Treat absent files as empty.",
750 "-P  --unidirectional-new-file  Treat absent first files as empty.",
751 "-s  --report-identical-files  Report when two files are the same.",
752 "-x PAT  --exclude=PAT  Exclude files that match PAT.",
753 "-X FILE  --exclude-from=FILE  Exclude files that match any pattern in FILE.",
754 "-S FILE  --starting-file=FILE  Start with FILE when comparing directories.\n",
755 "--horizon-lines=NUM  Keep NUM lines of the common prefix and suffix.",
756 "-d  --minimal  Try hard to find a smaller set of changes.",
757 "-H  --speed-large-files  Assume large files and many scattered small changes.\n",
758 "-v  --version  Output version info.",
759 "--help  Output this help.",
760 0
761 };
762 
763 static void
usage()764 usage ()
765 {
766   char const * const *p;
767 
768   printf ("Usage: %s [OPTION]... FILE1 FILE2\n\n", program_name);
769   for (p = option_help;  *p;  p++)
770     printf ("  %s\n", *p);
771   printf ("\nIf FILE1 or FILE2 is `-', read standard input.\n");
772 }
773 
774 static int
specify_format(var,value)775 specify_format (var, value)
776      char **var;
777      char *value;
778 {
779   int err = *var ? strcmp (*var, value) : 0;
780   *var = value;
781   return err;
782 }
783 
784 static void
specify_style(style)785 specify_style (style)
786      enum output_style style;
787 {
788   if (output_style != OUTPUT_NORMAL
789       && output_style != style)
790     error ("conflicting specifications of output style", 0, 0);
791   output_style = style;
792 }
793 
794 static char const *
filetype(st)795 filetype (st)
796      struct stat const *st;
797 {
798   /* See Posix.2 section 4.17.6.1.1 and Table 5-1 for these formats.
799      To keep diagnostics grammatical, the returned string must start
800      with a consonant.  */
801 
802   if (S_ISREG (st->st_mode))
803     {
804       if (st->st_size == 0)
805 	return "regular empty file";
806       /* Posix.2 section 5.14.2 seems to suggest that we must read the file
807 	 and guess whether it's C, Fortran, etc., but this is somewhat useless
808 	 and doesn't reflect historical practice.  We're allowed to guess
809 	 wrong, so we don't bother to read the file.  */
810       return "regular file";
811     }
812   if (S_ISDIR (st->st_mode)) return "directory";
813 
814   /* other Posix.1 file types */
815 #ifdef S_ISBLK
816   if (S_ISBLK (st->st_mode)) return "block special file";
817 #endif
818 #ifdef S_ISCHR
819   if (S_ISCHR (st->st_mode)) return "character special file";
820 #endif
821 #ifdef S_ISFIFO
822   if (S_ISFIFO (st->st_mode)) return "fifo";
823 #endif
824 
825   /* other Posix.1b file types */
826 #ifdef S_TYPEISMQ
827   if (S_TYPEISMQ (st)) return "message queue";
828 #endif
829 #ifdef S_TYPEISSEM
830   if (S_TYPEISSEM (st)) return "semaphore";
831 #endif
832 #ifdef S_TYPEISSHM
833   if (S_TYPEISSHM (st)) return "shared memory object";
834 #endif
835 
836   /* other popular file types */
837   /* S_ISLNK is impossible with `fstat' and `stat'.  */
838 #ifdef S_ISSOCK
839   if (S_ISSOCK (st->st_mode)) return "socket";
840 #endif
841 
842   return "weird file";
843 }
844 
845 /* Compare two files (or dirs) with specified names
846    DIR0/NAME0 and DIR1/NAME1, at level DEPTH in directory recursion.
847    (if DIR0 is 0, then the name is just NAME0, etc.)
848    This is self-contained; it opens the files and closes them.
849 
850    Value is 0 if files are the same, 1 if different,
851    2 if there is a problem opening them.  */
852 
853 static int
compare_files(dir0,name0,dir1,name1,depth)854 compare_files (dir0, name0, dir1, name1, depth)
855      char const *dir0, *dir1;
856      char const *name0, *name1;
857      int depth;
858 {
859   struct file_data inf[2];
860   register int i;
861   int val;
862   int same_files;
863   int failed = 0;
864   char *free0 = 0, *free1 = 0;
865 
866   /* If this is directory comparison, perhaps we have a file
867      that exists only in one of the directories.
868      If so, just print a message to that effect.  */
869 
870   if (! ((name0 != 0 && name1 != 0)
871 	 || (unidirectional_new_file_flag && name1 != 0)
872 	 || entire_new_file_flag))
873     {
874       char const *name = name0 == 0 ? name1 : name0;
875       char const *dir = name0 == 0 ? dir1 : dir0;
876       message ("Only in %s: %s\n", dir, name);
877       /* Return 1 so that diff_dirs will return 1 ("some files differ").  */
878       return 1;
879     }
880 
881   bzero (inf, sizeof (inf));
882 
883   /* Mark any nonexistent file with -1 in the desc field.  */
884   /* Mark unopened files (e.g. directories) with -2. */
885 
886   inf[0].desc = name0 == 0 ? -1 : -2;
887   inf[1].desc = name1 == 0 ? -1 : -2;
888 
889   /* Now record the full name of each file, including nonexistent ones.  */
890 
891   if (name0 == 0)
892     name0 = name1;
893   if (name1 == 0)
894     name1 = name0;
895 
896   inf[0].name = dir0 == 0 ? name0 : (free0 = dir_file_pathname (dir0, name0));
897   inf[1].name = dir1 == 0 ? name1 : (free1 = dir_file_pathname (dir1, name1));
898 
899   /* Stat the files.  Record whether they are directories.  */
900 
901   for (i = 0; i <= 1; i++)
902     {
903       if (inf[i].desc != -1)
904 	{
905 	  int stat_result;
906 
907 	  if (i && filename_cmp (inf[i].name, inf[0].name) == 0)
908 	    {
909 	      inf[i].stat = inf[0].stat;
910 	      stat_result = 0;
911 	    }
912 	  else if (strcmp (inf[i].name, "-") == 0)
913 	    {
914 	      inf[i].desc = STDIN_FILENO;
915 	      stat_result = fstat (STDIN_FILENO, &inf[i].stat);
916 	      if (stat_result == 0 && S_ISREG (inf[i].stat.st_mode))
917 		{
918 		  off_t pos = lseek (STDIN_FILENO, (off_t) 0, SEEK_CUR);
919 		  if (pos == -1)
920 		    stat_result = -1;
921 		  else
922 		    {
923 		      if (pos <= inf[i].stat.st_size)
924 			inf[i].stat.st_size -= pos;
925 		      else
926 			inf[i].stat.st_size = 0;
927 		      /* Posix.2 4.17.6.1.4 requires current time for stdin.  */
928 		      time (&inf[i].stat.st_mtime);
929 		    }
930 		}
931 	    }
932 	  else
933 	    stat_result = stat (inf[i].name, &inf[i].stat);
934 
935 	  if (stat_result != 0)
936 	    {
937 	      perror_with_name (inf[i].name);
938 	      failed = 1;
939 	    }
940 	  else
941 	    {
942 	      inf[i].dir_p = S_ISDIR (inf[i].stat.st_mode) && inf[i].desc != 0;
943 	      if (inf[1 - i].desc == -1)
944 		{
945 		  inf[1 - i].dir_p = inf[i].dir_p;
946 		  inf[1 - i].stat.st_mode = inf[i].stat.st_mode;
947 		}
948 	    }
949 	}
950     }
951 
952   if (! failed && depth == 0 && inf[0].dir_p != inf[1].dir_p)
953     {
954       /* If one is a directory, and it was specified in the command line,
955 	 use the file in that dir with the other file's basename.  */
956 
957       int fnm_arg = inf[0].dir_p;
958       int dir_arg = 1 - fnm_arg;
959       char const *fnm = inf[fnm_arg].name;
960       char const *dir = inf[dir_arg].name;
961       char const *p = filename_lastdirchar (fnm);
962       char const *filename = inf[dir_arg].name
963 	= dir_file_pathname (dir, p ? p + 1 : fnm);
964 
965       if (strcmp (fnm, "-") == 0)
966 	fatal ("can't compare - to a directory");
967 
968       if (stat (filename, &inf[dir_arg].stat) != 0)
969 	{
970 	  perror_with_name (filename);
971 	  failed = 1;
972 	}
973       else
974 	inf[dir_arg].dir_p = S_ISDIR (inf[dir_arg].stat.st_mode);
975     }
976 
977   if (failed)
978     {
979 
980       /* If either file should exist but does not, return 2.  */
981 
982       val = 2;
983 
984     }
985   else if ((same_files = inf[0].desc != -1 && inf[1].desc != -1
986 			 && 0 < same_file (&inf[0].stat, &inf[1].stat))
987 	   && no_diff_means_no_output)
988     {
989       /* The two named files are actually the same physical file.
990 	 We know they are identical without actually reading them.  */
991 
992       val = 0;
993     }
994   else if (inf[0].dir_p & inf[1].dir_p)
995     {
996       if (output_style == OUTPUT_IFDEF)
997 	fatal ("-D option not supported with directories");
998 
999       /* If both are directories, compare the files in them.  */
1000 
1001       if (depth > 0 && !recursive)
1002 	{
1003 	  /* But don't compare dir contents one level down
1004 	     unless -r was specified.  */
1005 	  message ("Common subdirectories: %s and %s\n",
1006 		   inf[0].name, inf[1].name);
1007 	  val = 0;
1008 	}
1009       else
1010 	{
1011 	  val = diff_dirs (inf, compare_files, depth);
1012 	}
1013 
1014     }
1015   else if ((inf[0].dir_p | inf[1].dir_p)
1016 	   || (depth > 0
1017 	       && (! S_ISREG (inf[0].stat.st_mode)
1018 		   || ! S_ISREG (inf[1].stat.st_mode))))
1019     {
1020       /* Perhaps we have a subdirectory that exists only in one directory.
1021 	 If so, just print a message to that effect.  */
1022 
1023       if (inf[0].desc == -1 || inf[1].desc == -1)
1024 	{
1025 	  if ((inf[0].dir_p | inf[1].dir_p)
1026 	      && recursive
1027 	      && (entire_new_file_flag
1028 		  || (unidirectional_new_file_flag && inf[0].desc == -1)))
1029 	    val = diff_dirs (inf, compare_files, depth);
1030 	  else
1031 	    {
1032 	      char const *dir = (inf[0].desc == -1) ? dir1 : dir0;
1033 	      /* See Posix.2 section 4.17.6.1.1 for this format.  */
1034 	      message ("Only in %s: %s\n", dir, name0);
1035 	      val = 1;
1036 	    }
1037 	}
1038       else
1039 	{
1040 	  /* We have two files that are not to be compared.  */
1041 
1042 	  /* See Posix.2 section 4.17.6.1.1 for this format.  */
1043 	  message5 ("File %s is a %s while file %s is a %s\n",
1044 		    inf[0].name, filetype (&inf[0].stat),
1045 		    inf[1].name, filetype (&inf[1].stat));
1046 
1047 	  /* This is a difference.  */
1048 	  val = 1;
1049 	}
1050     }
1051   else if ((no_details_flag & ~ignore_some_changes)
1052 	   && inf[0].stat.st_size != inf[1].stat.st_size
1053 	   && (inf[0].desc == -1 || S_ISREG (inf[0].stat.st_mode))
1054 	   && (inf[1].desc == -1 || S_ISREG (inf[1].stat.st_mode)))
1055     {
1056       message ("Files %s and %s differ\n", inf[0].name, inf[1].name);
1057       val = 1;
1058     }
1059   else
1060     {
1061       /* Both exist and neither is a directory.  */
1062 
1063       /* Open the files and record their descriptors.  */
1064 
1065       if (inf[0].desc == -2)
1066 	if ((inf[0].desc = open (inf[0].name, O_RDONLY, 0)) < 0)
1067 	  {
1068 	    perror_with_name (inf[0].name);
1069 	    failed = 1;
1070 	  }
1071       if (inf[1].desc == -2)
1072 	if (same_files)
1073 	  inf[1].desc = inf[0].desc;
1074 	else if ((inf[1].desc = open (inf[1].name, O_RDONLY, 0)) < 0)
1075 	  {
1076 	    perror_with_name (inf[1].name);
1077 	    failed = 1;
1078 	  }
1079 
1080 #if HAVE_SETMODE
1081       if (binary_I_O)
1082 	for (i = 0; i <= 1; i++)
1083 	  if (0 <= inf[i].desc)
1084 	    setmode (inf[i].desc, O_BINARY);
1085 #endif
1086 
1087       /* Compare the files, if no error was found.  */
1088 
1089       val = failed ? 2 : diff_2_files (inf, depth);
1090 
1091       /* Close the file descriptors.  */
1092 
1093       if (inf[0].desc >= 0 && close (inf[0].desc) != 0)
1094 	{
1095 	  perror_with_name (inf[0].name);
1096 	  val = 2;
1097 	}
1098       if (inf[1].desc >= 0 && inf[0].desc != inf[1].desc
1099 	  && close (inf[1].desc) != 0)
1100 	{
1101 	  perror_with_name (inf[1].name);
1102 	  val = 2;
1103 	}
1104     }
1105 
1106   /* Now the comparison has been done, if no error prevented it,
1107      and VAL is the value this function will return.  */
1108 
1109   if (val == 0 && !inf[0].dir_p)
1110     {
1111       if (print_file_same_flag)
1112 	message ("Files %s and %s are identical\n",
1113 		 inf[0].name, inf[1].name);
1114     }
1115   else
1116     fflush (stdout);
1117 
1118   if (free0)
1119     free (free0);
1120   if (free1)
1121     free (free1);
1122 
1123   return val;
1124 }
1125