1 /* GNU DIFF main routine.
2 Copyright (C) 1988, 1989, 1992, 1993, 1994 Free Software Foundation, Inc.
3
4 This file is part of GNU DIFF.
5
6 GNU DIFF is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU DIFF is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU DIFF; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20 /* GNU DIFF was written by Mike Haertel, David Hayes,
21 Richard Stallman, Len Tower, and Paul Eggert. */
22
23 /* $FreeBSD: src/contrib/diff/diff.c,v 1.3 1999/11/26 02:51:44 obrien Exp $ */
24
25 #define GDIFF_MAIN
26 #include "diff.h"
27 #include <signal.h>
28 #include "getopt.h"
29 #ifdef __FreeBSD__
30 #include <locale.h>
31 #include <fnmatch.h>
32 #else
33 #include "fnmatch.h"
34 #endif
35 #include "prepend_args.h"
36
37 #ifndef DEFAULT_WIDTH
38 #define DEFAULT_WIDTH 130
39 #endif
40
41 #ifndef GUTTER_WIDTH_MINIMUM
42 #define GUTTER_WIDTH_MINIMUM 3
43 #endif
44
45 static char const *filetype PARAMS((struct stat const *));
46 static char *option_list PARAMS((char **, int));
47 static int add_exclude_file PARAMS((char const *));
48 static int ck_atoi PARAMS((char const *, int *));
49 static int compare_files PARAMS((char const *, char const *, char const *, char const *, int));
50 static int specify_format PARAMS((char **, char *));
51 static void add_exclude PARAMS((char const *));
52 static void add_regexp PARAMS((struct regexp_list **, char const *));
53 static void specify_style PARAMS((enum output_style));
54 static void try_help PARAMS((char const *));
55 static void check_stdout PARAMS((void));
56 static void usage PARAMS((void));
57
58 /* Nonzero for -r: if comparing two directories,
59 compare their common subdirectories recursively. */
60
61 static int recursive;
62
63 /* For debugging: don't do discard_confusing_lines. */
64
65 int no_discards;
66
67 #if HAVE_SETMODE
68 /* I/O mode: nonzero only if using binary input/output. */
69 static int binary_I_O;
70 #endif
71
72 /* Return a string containing the command options with which diff was invoked.
73 Spaces appear between what were separate ARGV-elements.
74 There is a space at the beginning but none at the end.
75 If there were no options, the result is an empty string.
76
77 Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT,
78 the length of that vector. */
79
80 static char *
option_list(optionvec,count)81 option_list (optionvec, count)
82 char **optionvec; /* Was `vector', but that collides on Alliant. */
83 int count;
84 {
85 int i;
86 size_t length = 0;
87 char *result;
88
89 for (i = 0; i < count; i++)
90 length += strlen (optionvec[i]) + 1;
91
92 result = xmalloc (length + 1);
93 result[0] = 0;
94
95 for (i = 0; i < count; i++)
96 {
97 strcat (result, " ");
98 strcat (result, optionvec[i]);
99 }
100
101 return result;
102 }
103
104 /* Convert STR to a positive integer, storing the result in *OUT.
105 If STR is not a valid integer, return -1 (otherwise 0). */
106 static int
ck_atoi(str,out)107 ck_atoi (str, out)
108 char const *str;
109 int *out;
110 {
111 char const *p;
112 for (p = str; *p; p++)
113 if (*p < '0' || *p > '9')
114 return -1;
115
116 *out = atoi (optarg);
117 return 0;
118 }
119
120 /* Keep track of excluded file name patterns. */
121
122 static char const **exclude;
123 static int exclude_alloc, exclude_count;
124
125 int
excluded_filename(f)126 excluded_filename (f)
127 char const *f;
128 {
129 int i;
130 for (i = 0; i < exclude_count; i++)
131 if (fnmatch (exclude[i], f, 0) == 0)
132 return 1;
133 return 0;
134 }
135
136 static void
add_exclude(pattern)137 add_exclude (pattern)
138 char const *pattern;
139 {
140 if (exclude_alloc <= exclude_count)
141 exclude = (char const **)
142 (exclude_alloc == 0
143 ? xmalloc ((exclude_alloc = 64) * sizeof (*exclude))
144 : xrealloc (exclude, (exclude_alloc *= 2) * sizeof (*exclude)));
145
146 exclude[exclude_count++] = pattern;
147 }
148
149 static int
add_exclude_file(name)150 add_exclude_file (name)
151 char const *name;
152 {
153 struct file_data f;
154 char *p, *q, *lim;
155
156 f.name = optarg;
157 f.desc = (strcmp (name, "-") == 0
158 ? STDIN_FILENO
159 : open (name, O_RDONLY, 0));
160 if (f.desc < 0 || fstat (f.desc, &f.stat) != 0)
161 return -1;
162
163 sip (&f, 1);
164 slurp (&f);
165
166 for (p = f.buffer, lim = p + f.buffered_chars; p < lim; p = q)
167 {
168 q = (char *) memchr (p, '\n', lim - p);
169 if (!q)
170 q = lim;
171 *q++ = 0;
172 add_exclude (p);
173 }
174
175 return close (f.desc);
176 }
177
178 /* The numbers 129- that appear in the fourth element of some entries
179 tell the big switch in `main' how to process those options. */
180
181 static struct option const longopts[] =
182 {
183 {"ignore-blank-lines", 0, 0, 'B'},
184 {"context", 2, 0, 'C'},
185 {"ifdef", 1, 0, 'D'},
186 {"show-function-line", 1, 0, 'F'},
187 {"speed-large-files", 0, 0, 'H'},
188 {"ignore-matching-lines", 1, 0, 'I'},
189 {"label", 1, 0, 'L'},
190 {"file-label", 1, 0, 'L'}, /* An alias, no longer recommended */
191 {"new-file", 0, 0, 'N'},
192 {"entire-new-file", 0, 0, 'N'}, /* An alias, no longer recommended */
193 {"unidirectional-new-file", 0, 0, 'P'},
194 {"starting-file", 1, 0, 'S'},
195 {"initial-tab", 0, 0, 'T'},
196 {"width", 1, 0, 'W'},
197 {"text", 0, 0, 'a'},
198 {"ascii", 0, 0, 'a'}, /* An alias, no longer recommended */
199 {"ignore-space-change", 0, 0, 'b'},
200 {"minimal", 0, 0, 'd'},
201 {"ed", 0, 0, 'e'},
202 {"forward-ed", 0, 0, 'f'},
203 {"ignore-case", 0, 0, 'i'},
204 {"paginate", 0, 0, 'l'},
205 {"print", 0, 0, 'l'}, /* An alias, no longer recommended */
206 {"rcs", 0, 0, 'n'},
207 {"show-c-function", 0, 0, 'p'},
208 {"brief", 0, 0, 'q'},
209 {"recursive", 0, 0, 'r'},
210 {"report-identical-files", 0, 0, 's'},
211 {"expand-tabs", 0, 0, 't'},
212 {"version", 0, 0, 'v'},
213 {"ignore-all-space", 0, 0, 'w'},
214 {"exclude", 1, 0, 'x'},
215 {"exclude-from", 1, 0, 'X'},
216 {"side-by-side", 0, 0, 'y'},
217 {"unified", 2, 0, 'U'},
218 {"left-column", 0, 0, 129},
219 {"suppress-common-lines", 0, 0, 130},
220 {"sdiff-merge-assist", 0, 0, 131},
221 {"old-line-format", 1, 0, 132},
222 {"new-line-format", 1, 0, 133},
223 {"unchanged-line-format", 1, 0, 134},
224 {"line-format", 1, 0, 135},
225 {"old-group-format", 1, 0, 136},
226 {"new-group-format", 1, 0, 137},
227 {"unchanged-group-format", 1, 0, 138},
228 {"changed-group-format", 1, 0, 139},
229 {"horizon-lines", 1, 0, 140},
230 {"help", 0, 0, 141},
231 {"binary", 0, 0, 142},
232 {0, 0, 0, 0}
233 };
234
235 int
main(argc,argv)236 main (argc, argv)
237 int argc;
238 char *argv[];
239 {
240 int val;
241 int c;
242 int prev = -1;
243 int width = DEFAULT_WIDTH;
244 int show_c_function = 0;
245
246 #ifdef __FreeBSD__
247 setlocale(LC_ALL, "");
248 #endif
249 /* Do our initializations. */
250 initialize_main (&argc, &argv);
251 program_name = argv[0];
252 output_style = OUTPUT_NORMAL;
253 context = -1;
254
255 prepend_default_options (getenv ("DIFF_OPTIONS"), &argc, &argv);
256
257 /* Decode the options. */
258
259 while ((c = getopt_long (argc, argv,
260 "0123456789abBcC:dD:efF:hHiI:lL:nNopPqrsS:tTuU:vwW:x:X:y",
261 longopts, 0)) != EOF)
262 {
263 switch (c)
264 {
265 /* All digits combine in decimal to specify the context-size. */
266 case '1':
267 case '2':
268 case '3':
269 case '4':
270 case '5':
271 case '6':
272 case '7':
273 case '8':
274 case '9':
275 case '0':
276 if (context == -1)
277 context = 0;
278 /* If a context length has already been specified,
279 more digits allowed only if they follow right after the others.
280 Reject two separate runs of digits, or digits after -C. */
281 else if (prev < '0' || prev > '9')
282 fatal ("context length specified twice");
283
284 context = context * 10 + c - '0';
285 break;
286
287 case 'a':
288 /* Treat all files as text files; never treat as binary. */
289 always_text_flag = 1;
290 break;
291
292 case 'b':
293 /* Ignore changes in amount of white space. */
294 ignore_space_change_flag = 1;
295 ignore_some_changes = 1;
296 ignore_some_line_changes = 1;
297 break;
298
299 case 'B':
300 /* Ignore changes affecting only blank lines. */
301 ignore_blank_lines_flag = 1;
302 ignore_some_changes = 1;
303 break;
304
305 case 'C': /* +context[=lines] */
306 case 'U': /* +unified[=lines] */
307 if (optarg)
308 {
309 if (context >= 0)
310 fatal ("context length specified twice");
311
312 if (ck_atoi (optarg, &context))
313 fatal ("invalid context length argument");
314 }
315
316 /* Falls through. */
317 case 'c':
318 /* Make context-style output. */
319 specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT);
320 break;
321
322 case 'd':
323 /* Don't discard lines. This makes things slower (sometimes much
324 slower) but will find a guaranteed minimal set of changes. */
325 no_discards = 1;
326 break;
327
328 case 'D':
329 /* Make merged #ifdef output. */
330 specify_style (OUTPUT_IFDEF);
331 {
332 int i, err = 0;
333 static char const C_ifdef_group_formats[] =
334 "#ifndef %s\n%%<#endif /* not %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c%%=%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n";
335 char *b = xmalloc (sizeof (C_ifdef_group_formats)
336 + 7 * strlen(optarg) - 14 /* 7*"%s" */
337 - 8 /* 5*"%%" + 3*"%c" */);
338 sprintf (b, C_ifdef_group_formats,
339 optarg, optarg, 0,
340 optarg, optarg, 0, 0,
341 optarg, optarg, optarg);
342 for (i = 0; i < 4; i++)
343 {
344 err |= specify_format (&group_format[i], b);
345 b += strlen (b) + 1;
346 }
347 if (err)
348 error ("conflicting #ifdef formats", 0, 0);
349 }
350 break;
351
352 case 'e':
353 /* Make output that is a valid `ed' script. */
354 specify_style (OUTPUT_ED);
355 break;
356
357 case 'f':
358 /* Make output that looks vaguely like an `ed' script
359 but has changes in the order they appear in the file. */
360 specify_style (OUTPUT_FORWARD_ED);
361 break;
362
363 case 'F':
364 /* Show, for each set of changes, the previous line that
365 matches the specified regexp. Currently affects only
366 context-style output. */
367 add_regexp (&function_regexp_list, optarg);
368 break;
369
370 case 'h':
371 /* Split the files into chunks of around 1500 lines
372 for faster processing. Usually does not change the result.
373
374 This currently has no effect. */
375 break;
376
377 case 'H':
378 /* Turn on heuristics that speed processing of large files
379 with a small density of changes. */
380 heuristic = 1;
381 break;
382
383 case 'i':
384 /* Ignore changes in case. */
385 ignore_case_flag = 1;
386 ignore_some_changes = 1;
387 ignore_some_line_changes = 1;
388 break;
389
390 case 'I':
391 /* Ignore changes affecting only lines that match the
392 specified regexp. */
393 add_regexp (&ignore_regexp_list, optarg);
394 ignore_some_changes = 1;
395 break;
396
397 case 'l':
398 /* Pass the output through `pr' to paginate it. */
399 paginate_flag = 1;
400 #if !defined(SIGCHLD) && defined(SIGCLD)
401 #define SIGCHLD SIGCLD
402 #endif
403 #ifdef SIGCHLD
404 /* Pagination requires forking and waiting, and
405 System V fork+wait does not work if SIGCHLD is ignored. */
406 signal (SIGCHLD, SIG_DFL);
407 #endif
408 break;
409
410 case 'L':
411 /* Specify file labels for `-c' output headers. */
412 if (!file_label[0])
413 file_label[0] = optarg;
414 else if (!file_label[1])
415 file_label[1] = optarg;
416 else
417 fatal ("too many file label options");
418 break;
419
420 case 'n':
421 /* Output RCS-style diffs, like `-f' except that each command
422 specifies the number of lines affected. */
423 specify_style (OUTPUT_RCS);
424 break;
425
426 case 'N':
427 /* When comparing directories, if a file appears only in one
428 directory, treat it as present but empty in the other. */
429 entire_new_file_flag = 1;
430 break;
431
432 case 'o':
433 /* Output in the old tradition style. */
434 specify_style (OUTPUT_NORMAL);
435 break;
436
437 case 'p':
438 /* Make context-style output and show name of last C function. */
439 show_c_function = 1;
440 add_regexp (&function_regexp_list, "^[_a-zA-Z$]");
441 break;
442
443 case 'P':
444 /* When comparing directories, if a file appears only in
445 the second directory of the two,
446 treat it as present but empty in the other. */
447 unidirectional_new_file_flag = 1;
448 break;
449
450 case 'q':
451 no_details_flag = 1;
452 break;
453
454 case 'r':
455 /* When comparing directories,
456 recursively compare any subdirectories found. */
457 recursive = 1;
458 break;
459
460 case 's':
461 /* Print a message if the files are the same. */
462 print_file_same_flag = 1;
463 break;
464
465 case 'S':
466 /* When comparing directories, start with the specified
467 file name. This is used for resuming an aborted comparison. */
468 dir_start_file = optarg;
469 break;
470
471 case 't':
472 /* Expand tabs to spaces in the output so that it preserves
473 the alignment of the input files. */
474 tab_expand_flag = 1;
475 break;
476
477 case 'T':
478 /* Use a tab in the output, rather than a space, before the
479 text of an input line, so as to keep the proper alignment
480 in the input line without changing the characters in it. */
481 tab_align_flag = 1;
482 break;
483
484 case 'u':
485 /* Output the context diff in unidiff format. */
486 specify_style (OUTPUT_UNIFIED);
487 break;
488
489 case 'v':
490 printf ("diff - GNU diffutils version %s\n", version_string);
491 exit (0);
492
493 case 'w':
494 /* Ignore horizontal white space when comparing lines. */
495 ignore_all_space_flag = 1;
496 ignore_some_changes = 1;
497 ignore_some_line_changes = 1;
498 break;
499
500 case 'x':
501 add_exclude (optarg);
502 break;
503
504 case 'X':
505 if (add_exclude_file (optarg) != 0)
506 pfatal_with_name (optarg);
507 break;
508
509 case 'y':
510 /* Use side-by-side (sdiff-style) columnar output. */
511 specify_style (OUTPUT_SDIFF);
512 break;
513
514 case 'W':
515 /* Set the line width for OUTPUT_SDIFF. */
516 if (ck_atoi (optarg, &width) || width <= 0)
517 fatal ("column width must be a positive integer");
518 break;
519
520 case 129:
521 sdiff_left_only = 1;
522 break;
523
524 case 130:
525 sdiff_skip_common_lines = 1;
526 break;
527
528 case 131:
529 /* sdiff-style columns output. */
530 specify_style (OUTPUT_SDIFF);
531 sdiff_help_sdiff = 1;
532 break;
533
534 case 132:
535 case 133:
536 case 134:
537 specify_style (OUTPUT_IFDEF);
538 if (specify_format (&line_format[c - 132], optarg) != 0)
539 error ("conflicting line format", 0, 0);
540 break;
541
542 case 135:
543 specify_style (OUTPUT_IFDEF);
544 {
545 int i, err = 0;
546 for (i = 0; i < sizeof (line_format) / sizeof (*line_format); i++)
547 err |= specify_format (&line_format[i], optarg);
548 if (err)
549 error ("conflicting line format", 0, 0);
550 }
551 break;
552
553 case 136:
554 case 137:
555 case 138:
556 case 139:
557 specify_style (OUTPUT_IFDEF);
558 if (specify_format (&group_format[c - 136], optarg) != 0)
559 error ("conflicting group format", 0, 0);
560 break;
561
562 case 140:
563 if (ck_atoi (optarg, &horizon_lines) || horizon_lines < 0)
564 fatal ("horizon must be a nonnegative integer");
565 break;
566
567 case 141:
568 usage ();
569 check_stdout ();
570 exit (0);
571
572 case 142:
573 /* Use binary I/O when reading and writing data.
574 On Posix hosts, this has no effect. */
575 #if HAVE_SETMODE
576 binary_I_O = 1;
577 setmode (STDOUT_FILENO, O_BINARY);
578 #endif
579 break;
580
581 default:
582 try_help (0);
583 }
584 prev = c;
585 }
586
587 if (argc - optind != 2)
588 try_help (argc - optind < 2 ? "missing operand" : "extra operand");
589
590
591 {
592 /*
593 * We maximize first the half line width, and then the gutter width,
594 * according to the following constraints:
595 * 1. Two half lines plus a gutter must fit in a line.
596 * 2. If the half line width is nonzero:
597 * a. The gutter width is at least GUTTER_WIDTH_MINIMUM.
598 * b. If tabs are not expanded to spaces,
599 * a half line plus a gutter is an integral number of tabs,
600 * so that tabs in the right column line up.
601 */
602 int t = tab_expand_flag ? 1 : TAB_WIDTH;
603 int off = (width + t + GUTTER_WIDTH_MINIMUM) / (2*t) * t;
604 sdiff_half_width = max (0, min (off - GUTTER_WIDTH_MINIMUM, width - off)),
605 sdiff_column2_offset = sdiff_half_width ? off : width;
606 }
607
608 if (show_c_function && output_style != OUTPUT_UNIFIED)
609 specify_style (OUTPUT_CONTEXT);
610
611 if (output_style != OUTPUT_CONTEXT && output_style != OUTPUT_UNIFIED)
612 context = 0;
613 else if (context == -1)
614 /* Default amount of context for -c. */
615 context = 3;
616
617 if (output_style == OUTPUT_IFDEF)
618 {
619 /* Format arrays are char *, not char const *,
620 because integer formats are temporarily modified.
621 But it is safe to assign a constant like "%=" to a format array,
622 since "%=" does not format any integers. */
623 int i;
624 for (i = 0; i < sizeof (line_format) / sizeof (*line_format); i++)
625 if (!line_format[i])
626 line_format[i] = "%l\n";
627 if (!group_format[OLD])
628 group_format[OLD]
629 = group_format[UNCHANGED] ? group_format[UNCHANGED] : "%<";
630 if (!group_format[NEW])
631 group_format[NEW]
632 = group_format[UNCHANGED] ? group_format[UNCHANGED] : "%>";
633 if (!group_format[UNCHANGED])
634 group_format[UNCHANGED] = "%=";
635 if (!group_format[CHANGED])
636 group_format[CHANGED] = concat (group_format[OLD],
637 group_format[NEW], "");
638 }
639
640 no_diff_means_no_output =
641 (output_style == OUTPUT_IFDEF ?
642 (!*group_format[UNCHANGED]
643 || (strcmp (group_format[UNCHANGED], "%=") == 0
644 && !*line_format[UNCHANGED]))
645 : output_style == OUTPUT_SDIFF ? sdiff_skip_common_lines : 1);
646
647 switch_string = option_list (argv + 1, optind - 1);
648
649 val = compare_files (0, argv[optind], 0, argv[optind + 1], 0);
650
651 /* Print any messages that were saved up for last. */
652 print_message_queue ();
653
654 check_stdout ();
655 exit (val);
656 return val;
657 }
658
659 /* Add the compiled form of regexp PATTERN to REGLIST. */
660
661 static void
add_regexp(reglist,pattern)662 add_regexp (reglist, pattern)
663 struct regexp_list **reglist;
664 char const *pattern;
665 {
666 struct regexp_list *r;
667 char const *m;
668
669 r = (struct regexp_list *) xmalloc (sizeof (*r));
670 bzero (r, sizeof (*r));
671 r->buf.fastmap = xmalloc (256);
672 m = re_compile_pattern (pattern, strlen (pattern), &r->buf);
673 if (m != 0)
674 error ("%s: %s", pattern, m);
675
676 /* Add to the start of the list, since it's easier than the end. */
677 r->next = *reglist;
678 *reglist = r;
679 }
680
681 static void
try_help(reason)682 try_help (reason)
683 char const *reason;
684 {
685 if (reason)
686 error ("%s", reason, 0);
687 error ("Try `%s --help' for more information.", program_name, 0);
688 exit (2);
689 }
690
691 static void
check_stdout()692 check_stdout ()
693 {
694 if (ferror (stdout) || fclose (stdout) != 0)
695 fatal ("write error");
696 }
697
698 static char const * const option_help[] = {
699 "-i --ignore-case Consider upper- and lower-case to be the same.",
700 "-w --ignore-all-space Ignore all white space.",
701 "-b --ignore-space-change Ignore changes in the amount of white space.",
702 "-B --ignore-blank-lines Ignore changes whose lines are all blank.",
703 "-I RE --ignore-matching-lines=RE Ignore changes whose lines all match RE.",
704 #if HAVE_SETMODE
705 "--binary Read and write data in binary mode.",
706 #endif
707 "-a --text Treat all files as text.\n",
708 "-c -C NUM --context[=NUM] Output NUM (default 2) lines of copied context.",
709 "-u -U NUM --unified[=NUM] Output NUM (default 2) lines of unified context.",
710 " -NUM Use NUM context lines.",
711 " -L LABEL --label LABEL Use LABEL instead of file name.",
712 " -p --show-c-function Show which C function each change is in.",
713 " -F RE --show-function-line=RE Show the most recent line matching RE.",
714 "-q --brief Output only whether files differ.",
715 "-e --ed Output an ed script.",
716 "-n --rcs Output an RCS format diff.",
717 "-y --side-by-side Output in two columns.",
718 " -w NUM --width=NUM Output at most NUM (default 130) characters per line.",
719 " --left-column Output only the left column of common lines.",
720 " --suppress-common-lines Do not output common lines.",
721 "-DNAME --ifdef=NAME Output merged file to show `#ifdef NAME' diffs.",
722 "--GTYPE-group-format=GFMT Similar, but format GTYPE input groups with GFMT.",
723 "--line-format=LFMT Similar, but format all input lines with LFMT.",
724 "--LTYPE-line-format=LFMT Similar, but format LTYPE input lines with LFMT.",
725 " LTYPE is `old', `new', or `unchanged'. GTYPE is LTYPE or `changed'.",
726 " GFMT may contain:",
727 " %< lines from FILE1",
728 " %> lines from FILE2",
729 " %= lines common to FILE1 and FILE2",
730 " %[-][WIDTH][.[PREC]]{doxX}LETTER printf-style spec for LETTER",
731 " LETTERs are as follows for new group, lower case for old group:",
732 " F first line number",
733 " L last line number",
734 " N number of lines = L-F+1",
735 " E F-1",
736 " M L+1",
737 " LFMT may contain:",
738 " %L contents of line",
739 " %l contents of line, excluding any trailing newline",
740 " %[-][WIDTH][.[PREC]]{doxX}n printf-style spec for input line number",
741 " Either GFMT or LFMT may contain:",
742 " %% %",
743 " %c'C' the single character C",
744 " %c'\\OOO' the character with octal code OOO\n",
745 "-l --paginate Pass the output through `pr' to paginate it.",
746 "-t --expand-tabs Expand tabs to spaces in output.",
747 "-T --initial-tab Make tabs line up by prepending a tab.\n",
748 "-r --recursive Recursively compare any subdirectories found.",
749 "-N --new-file Treat absent files as empty.",
750 "-P --unidirectional-new-file Treat absent first files as empty.",
751 "-s --report-identical-files Report when two files are the same.",
752 "-x PAT --exclude=PAT Exclude files that match PAT.",
753 "-X FILE --exclude-from=FILE Exclude files that match any pattern in FILE.",
754 "-S FILE --starting-file=FILE Start with FILE when comparing directories.\n",
755 "--horizon-lines=NUM Keep NUM lines of the common prefix and suffix.",
756 "-d --minimal Try hard to find a smaller set of changes.",
757 "-H --speed-large-files Assume large files and many scattered small changes.\n",
758 "-v --version Output version info.",
759 "--help Output this help.",
760 0
761 };
762
763 static void
usage()764 usage ()
765 {
766 char const * const *p;
767
768 printf ("Usage: %s [OPTION]... FILE1 FILE2\n\n", program_name);
769 for (p = option_help; *p; p++)
770 printf (" %s\n", *p);
771 printf ("\nIf FILE1 or FILE2 is `-', read standard input.\n");
772 }
773
774 static int
specify_format(var,value)775 specify_format (var, value)
776 char **var;
777 char *value;
778 {
779 int err = *var ? strcmp (*var, value) : 0;
780 *var = value;
781 return err;
782 }
783
784 static void
specify_style(style)785 specify_style (style)
786 enum output_style style;
787 {
788 if (output_style != OUTPUT_NORMAL
789 && output_style != style)
790 error ("conflicting specifications of output style", 0, 0);
791 output_style = style;
792 }
793
794 static char const *
filetype(st)795 filetype (st)
796 struct stat const *st;
797 {
798 /* See Posix.2 section 4.17.6.1.1 and Table 5-1 for these formats.
799 To keep diagnostics grammatical, the returned string must start
800 with a consonant. */
801
802 if (S_ISREG (st->st_mode))
803 {
804 if (st->st_size == 0)
805 return "regular empty file";
806 /* Posix.2 section 5.14.2 seems to suggest that we must read the file
807 and guess whether it's C, Fortran, etc., but this is somewhat useless
808 and doesn't reflect historical practice. We're allowed to guess
809 wrong, so we don't bother to read the file. */
810 return "regular file";
811 }
812 if (S_ISDIR (st->st_mode)) return "directory";
813
814 /* other Posix.1 file types */
815 #ifdef S_ISBLK
816 if (S_ISBLK (st->st_mode)) return "block special file";
817 #endif
818 #ifdef S_ISCHR
819 if (S_ISCHR (st->st_mode)) return "character special file";
820 #endif
821 #ifdef S_ISFIFO
822 if (S_ISFIFO (st->st_mode)) return "fifo";
823 #endif
824
825 /* other Posix.1b file types */
826 #ifdef S_TYPEISMQ
827 if (S_TYPEISMQ (st)) return "message queue";
828 #endif
829 #ifdef S_TYPEISSEM
830 if (S_TYPEISSEM (st)) return "semaphore";
831 #endif
832 #ifdef S_TYPEISSHM
833 if (S_TYPEISSHM (st)) return "shared memory object";
834 #endif
835
836 /* other popular file types */
837 /* S_ISLNK is impossible with `fstat' and `stat'. */
838 #ifdef S_ISSOCK
839 if (S_ISSOCK (st->st_mode)) return "socket";
840 #endif
841
842 return "weird file";
843 }
844
845 /* Compare two files (or dirs) with specified names
846 DIR0/NAME0 and DIR1/NAME1, at level DEPTH in directory recursion.
847 (if DIR0 is 0, then the name is just NAME0, etc.)
848 This is self-contained; it opens the files and closes them.
849
850 Value is 0 if files are the same, 1 if different,
851 2 if there is a problem opening them. */
852
853 static int
compare_files(dir0,name0,dir1,name1,depth)854 compare_files (dir0, name0, dir1, name1, depth)
855 char const *dir0, *dir1;
856 char const *name0, *name1;
857 int depth;
858 {
859 struct file_data inf[2];
860 register int i;
861 int val;
862 int same_files;
863 int failed = 0;
864 char *free0 = 0, *free1 = 0;
865
866 /* If this is directory comparison, perhaps we have a file
867 that exists only in one of the directories.
868 If so, just print a message to that effect. */
869
870 if (! ((name0 != 0 && name1 != 0)
871 || (unidirectional_new_file_flag && name1 != 0)
872 || entire_new_file_flag))
873 {
874 char const *name = name0 == 0 ? name1 : name0;
875 char const *dir = name0 == 0 ? dir1 : dir0;
876 message ("Only in %s: %s\n", dir, name);
877 /* Return 1 so that diff_dirs will return 1 ("some files differ"). */
878 return 1;
879 }
880
881 bzero (inf, sizeof (inf));
882
883 /* Mark any nonexistent file with -1 in the desc field. */
884 /* Mark unopened files (e.g. directories) with -2. */
885
886 inf[0].desc = name0 == 0 ? -1 : -2;
887 inf[1].desc = name1 == 0 ? -1 : -2;
888
889 /* Now record the full name of each file, including nonexistent ones. */
890
891 if (name0 == 0)
892 name0 = name1;
893 if (name1 == 0)
894 name1 = name0;
895
896 inf[0].name = dir0 == 0 ? name0 : (free0 = dir_file_pathname (dir0, name0));
897 inf[1].name = dir1 == 0 ? name1 : (free1 = dir_file_pathname (dir1, name1));
898
899 /* Stat the files. Record whether they are directories. */
900
901 for (i = 0; i <= 1; i++)
902 {
903 if (inf[i].desc != -1)
904 {
905 int stat_result;
906
907 if (i && filename_cmp (inf[i].name, inf[0].name) == 0)
908 {
909 inf[i].stat = inf[0].stat;
910 stat_result = 0;
911 }
912 else if (strcmp (inf[i].name, "-") == 0)
913 {
914 inf[i].desc = STDIN_FILENO;
915 stat_result = fstat (STDIN_FILENO, &inf[i].stat);
916 if (stat_result == 0 && S_ISREG (inf[i].stat.st_mode))
917 {
918 off_t pos = lseek (STDIN_FILENO, (off_t) 0, SEEK_CUR);
919 if (pos == -1)
920 stat_result = -1;
921 else
922 {
923 if (pos <= inf[i].stat.st_size)
924 inf[i].stat.st_size -= pos;
925 else
926 inf[i].stat.st_size = 0;
927 /* Posix.2 4.17.6.1.4 requires current time for stdin. */
928 time (&inf[i].stat.st_mtime);
929 }
930 }
931 }
932 else
933 stat_result = stat (inf[i].name, &inf[i].stat);
934
935 if (stat_result != 0)
936 {
937 perror_with_name (inf[i].name);
938 failed = 1;
939 }
940 else
941 {
942 inf[i].dir_p = S_ISDIR (inf[i].stat.st_mode) && inf[i].desc != 0;
943 if (inf[1 - i].desc == -1)
944 {
945 inf[1 - i].dir_p = inf[i].dir_p;
946 inf[1 - i].stat.st_mode = inf[i].stat.st_mode;
947 }
948 }
949 }
950 }
951
952 if (! failed && depth == 0 && inf[0].dir_p != inf[1].dir_p)
953 {
954 /* If one is a directory, and it was specified in the command line,
955 use the file in that dir with the other file's basename. */
956
957 int fnm_arg = inf[0].dir_p;
958 int dir_arg = 1 - fnm_arg;
959 char const *fnm = inf[fnm_arg].name;
960 char const *dir = inf[dir_arg].name;
961 char const *p = filename_lastdirchar (fnm);
962 char const *filename = inf[dir_arg].name
963 = dir_file_pathname (dir, p ? p + 1 : fnm);
964
965 if (strcmp (fnm, "-") == 0)
966 fatal ("can't compare - to a directory");
967
968 if (stat (filename, &inf[dir_arg].stat) != 0)
969 {
970 perror_with_name (filename);
971 failed = 1;
972 }
973 else
974 inf[dir_arg].dir_p = S_ISDIR (inf[dir_arg].stat.st_mode);
975 }
976
977 if (failed)
978 {
979
980 /* If either file should exist but does not, return 2. */
981
982 val = 2;
983
984 }
985 else if ((same_files = inf[0].desc != -1 && inf[1].desc != -1
986 && 0 < same_file (&inf[0].stat, &inf[1].stat))
987 && no_diff_means_no_output)
988 {
989 /* The two named files are actually the same physical file.
990 We know they are identical without actually reading them. */
991
992 val = 0;
993 }
994 else if (inf[0].dir_p & inf[1].dir_p)
995 {
996 if (output_style == OUTPUT_IFDEF)
997 fatal ("-D option not supported with directories");
998
999 /* If both are directories, compare the files in them. */
1000
1001 if (depth > 0 && !recursive)
1002 {
1003 /* But don't compare dir contents one level down
1004 unless -r was specified. */
1005 message ("Common subdirectories: %s and %s\n",
1006 inf[0].name, inf[1].name);
1007 val = 0;
1008 }
1009 else
1010 {
1011 val = diff_dirs (inf, compare_files, depth);
1012 }
1013
1014 }
1015 else if ((inf[0].dir_p | inf[1].dir_p)
1016 || (depth > 0
1017 && (! S_ISREG (inf[0].stat.st_mode)
1018 || ! S_ISREG (inf[1].stat.st_mode))))
1019 {
1020 /* Perhaps we have a subdirectory that exists only in one directory.
1021 If so, just print a message to that effect. */
1022
1023 if (inf[0].desc == -1 || inf[1].desc == -1)
1024 {
1025 if ((inf[0].dir_p | inf[1].dir_p)
1026 && recursive
1027 && (entire_new_file_flag
1028 || (unidirectional_new_file_flag && inf[0].desc == -1)))
1029 val = diff_dirs (inf, compare_files, depth);
1030 else
1031 {
1032 char const *dir = (inf[0].desc == -1) ? dir1 : dir0;
1033 /* See Posix.2 section 4.17.6.1.1 for this format. */
1034 message ("Only in %s: %s\n", dir, name0);
1035 val = 1;
1036 }
1037 }
1038 else
1039 {
1040 /* We have two files that are not to be compared. */
1041
1042 /* See Posix.2 section 4.17.6.1.1 for this format. */
1043 message5 ("File %s is a %s while file %s is a %s\n",
1044 inf[0].name, filetype (&inf[0].stat),
1045 inf[1].name, filetype (&inf[1].stat));
1046
1047 /* This is a difference. */
1048 val = 1;
1049 }
1050 }
1051 else if ((no_details_flag & ~ignore_some_changes)
1052 && inf[0].stat.st_size != inf[1].stat.st_size
1053 && (inf[0].desc == -1 || S_ISREG (inf[0].stat.st_mode))
1054 && (inf[1].desc == -1 || S_ISREG (inf[1].stat.st_mode)))
1055 {
1056 message ("Files %s and %s differ\n", inf[0].name, inf[1].name);
1057 val = 1;
1058 }
1059 else
1060 {
1061 /* Both exist and neither is a directory. */
1062
1063 /* Open the files and record their descriptors. */
1064
1065 if (inf[0].desc == -2)
1066 if ((inf[0].desc = open (inf[0].name, O_RDONLY, 0)) < 0)
1067 {
1068 perror_with_name (inf[0].name);
1069 failed = 1;
1070 }
1071 if (inf[1].desc == -2)
1072 if (same_files)
1073 inf[1].desc = inf[0].desc;
1074 else if ((inf[1].desc = open (inf[1].name, O_RDONLY, 0)) < 0)
1075 {
1076 perror_with_name (inf[1].name);
1077 failed = 1;
1078 }
1079
1080 #if HAVE_SETMODE
1081 if (binary_I_O)
1082 for (i = 0; i <= 1; i++)
1083 if (0 <= inf[i].desc)
1084 setmode (inf[i].desc, O_BINARY);
1085 #endif
1086
1087 /* Compare the files, if no error was found. */
1088
1089 val = failed ? 2 : diff_2_files (inf, depth);
1090
1091 /* Close the file descriptors. */
1092
1093 if (inf[0].desc >= 0 && close (inf[0].desc) != 0)
1094 {
1095 perror_with_name (inf[0].name);
1096 val = 2;
1097 }
1098 if (inf[1].desc >= 0 && inf[0].desc != inf[1].desc
1099 && close (inf[1].desc) != 0)
1100 {
1101 perror_with_name (inf[1].name);
1102 val = 2;
1103 }
1104 }
1105
1106 /* Now the comparison has been done, if no error prevented it,
1107 and VAL is the value this function will return. */
1108
1109 if (val == 0 && !inf[0].dir_p)
1110 {
1111 if (print_file_same_flag)
1112 message ("Files %s and %s are identical\n",
1113 inf[0].name, inf[1].name);
1114 }
1115 else
1116 fflush (stdout);
1117
1118 if (free0)
1119 free (free0);
1120 if (free1)
1121 free (free1);
1122
1123 return val;
1124 }
1125