1 /* $NetBSD: diff.c,v 1.2 2016/01/13 03:39:28 christos Exp $ */
2
3 /* diff - compare files line by line
4
5 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1996, 1998, 2001, 2002
6 Free Software Foundation, Inc.
7
8 This file is part of GNU DIFF.
9
10 GNU DIFF is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 GNU DIFF is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
18 See the GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GNU DIFF; see the file COPYING.
22 If not, write to the Free Software Foundation,
23 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
24
25 #define GDIFF_MAIN
26 #include "diff.h"
27 #include <c-stack.h>
28 #include <dirname.h>
29 #include <error.h>
30 #include <exclude.h>
31 #include <exitfail.h>
32 #include <fnmatch.h>
33 #include <freesoft.h>
34 #include <getopt.h>
35 #include <hard-locale.h>
36 #include <prepargs.h>
37 #include <quotesys.h>
38 #include <regex.h>
39 #include <setmode.h>
40 #include <xalloc.h>
41 #include <posixver.h>
42
43 static char const authorship_msgid[] =
44 N_("Written by Paul Eggert, Mike Haertel, David Hayes,\n\
45 Richard Stallman, and Len Tower.");
46
47 static char const copyright_string[] =
48 "Copyright (C) 2002 Free Software Foundation, Inc.";
49
50 #ifndef GUTTER_WIDTH_MINIMUM
51 # define GUTTER_WIDTH_MINIMUM 3
52 #endif
53
54 struct regexp_list
55 {
56 char *regexps; /* chars representing disjunction of the regexps */
57 size_t len; /* chars used in `regexps' */
58 size_t size; /* size malloc'ed for `regexps'; 0 if not malloc'ed */
59 bool multiple_regexps;/* Does `regexps' represent a disjunction? */
60 struct re_pattern_buffer *buf;
61 };
62
63 static int compare_files (struct comparison const *, char const *, char const *);
64 static void add_regexp (struct regexp_list *, char const *);
65 static void summarize_regexp_list (struct regexp_list *);
66 static void specify_style (enum output_style);
67 static void specify_value (char const **, char const *, char const *);
68 static void try_help (char const *, char const *) __attribute__((noreturn));
69 static void check_stdout (void);
70 static void usage (void);
71
72 /* If comparing directories, compare their common subdirectories
73 recursively. */
74 static bool recursive;
75
76 /* In context diffs, show previous lines that match these regexps. */
77 static struct regexp_list function_regexp_list;
78
79 /* Ignore changes affecting only lines that match these regexps. */
80 static struct regexp_list ignore_regexp_list;
81
82 #if HAVE_SETMODE_DOS
83 /* Use binary I/O when reading and writing data (--binary).
84 On POSIX hosts, this has no effect. */
85 static bool binary;
86 #endif
87
88 /* When comparing directories, if a file appears only in one
89 directory, treat it as present but empty in the other (-N).
90 Then `patch' would create the file with appropriate contents. */
91 static bool new_file;
92
93 /* When comparing directories, if a file appears only in the second
94 directory of the two, treat it as present but empty in the other
95 (--unidirectional-new-file).
96 Then `patch' would create the file with appropriate contents. */
97 static bool unidirectional_new_file;
98
99 /* Report files compared that are the same (-s).
100 Normally nothing is output when that happens. */
101 static bool report_identical_files;
102
103
104 /* Return a string containing the command options with which diff was invoked.
105 Spaces appear between what were separate ARGV-elements.
106 There is a space at the beginning but none at the end.
107 If there were no options, the result is an empty string.
108
109 Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT,
110 the length of that vector. */
111
112 static char *
option_list(char ** optionvec,int count)113 option_list (char **optionvec, int count)
114 {
115 int i;
116 size_t size = 1;
117 char *result;
118 char *p;
119
120 for (i = 0; i < count; i++)
121 size += 1 + quote_system_arg ((char *) 0, optionvec[i]);
122
123 p = result = xmalloc (size);
124
125 for (i = 0; i < count; i++)
126 {
127 *p++ = ' ';
128 p += quote_system_arg (p, optionvec[i]);
129 }
130
131 *p = 0;
132 return result;
133 }
134
135
136 /* Return an option value suitable for add_exclude. */
137
138 static int
exclude_options(void)139 exclude_options (void)
140 {
141 return EXCLUDE_WILDCARDS | (ignore_file_name_case ? FNM_CASEFOLD : 0);
142 }
143
144 static char const shortopts[] =
145 "0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y";
146
147 /* Values for long options that do not have single-letter equivalents. */
148 enum
149 {
150 BINARY_OPTION = CHAR_MAX + 1,
151 FROM_FILE_OPTION,
152 HELP_OPTION,
153 HORIZON_LINES_OPTION,
154 IGNORE_FILE_NAME_CASE_OPTION,
155 INHIBIT_HUNK_MERGE_OPTION,
156 LEFT_COLUMN_OPTION,
157 LINE_FORMAT_OPTION,
158 NO_IGNORE_FILE_NAME_CASE_OPTION,
159 NORMAL_OPTION,
160 SDIFF_MERGE_ASSIST_OPTION,
161 STRIP_TRAILING_CR_OPTION,
162 SUPPRESS_COMMON_LINES_OPTION,
163 TO_FILE_OPTION,
164
165 /* These options must be in sequence. */
166 UNCHANGED_LINE_FORMAT_OPTION,
167 OLD_LINE_FORMAT_OPTION,
168 NEW_LINE_FORMAT_OPTION,
169
170 /* These options must be in sequence. */
171 UNCHANGED_GROUP_FORMAT_OPTION,
172 OLD_GROUP_FORMAT_OPTION,
173 NEW_GROUP_FORMAT_OPTION,
174 CHANGED_GROUP_FORMAT_OPTION
175 };
176
177 static char const group_format_option[][sizeof "--unchanged-group-format"] =
178 {
179 "--unchanged-group-format",
180 "--old-group-format",
181 "--new-group-format",
182 "--changed-group-format"
183 };
184
185 static char const line_format_option[][sizeof "--unchanged-line-format"] =
186 {
187 "--unchanged-line-format",
188 "--old-line-format",
189 "--new-line-format"
190 };
191
192 static struct option const longopts[] =
193 {
194 {"binary", 0, 0, BINARY_OPTION},
195 {"brief", 0, 0, 'q'},
196 {"changed-group-format", 1, 0, CHANGED_GROUP_FORMAT_OPTION},
197 {"context", 2, 0, 'C'},
198 {"ed", 0, 0, 'e'},
199 {"exclude", 1, 0, 'x'},
200 {"exclude-from", 1, 0, 'X'},
201 {"expand-tabs", 0, 0, 't'},
202 {"forward-ed", 0, 0, 'f'},
203 {"from-file", 1, 0, FROM_FILE_OPTION},
204 {"help", 0, 0, HELP_OPTION},
205 {"horizon-lines", 1, 0, HORIZON_LINES_OPTION},
206 {"ifdef", 1, 0, 'D'},
207 {"ignore-all-space", 0, 0, 'w'},
208 {"ignore-blank-lines", 0, 0, 'B'},
209 {"ignore-case", 0, 0, 'i'},
210 {"ignore-file-name-case", 0, 0, IGNORE_FILE_NAME_CASE_OPTION},
211 {"ignore-matching-lines", 1, 0, 'I'},
212 {"ignore-space-change", 0, 0, 'b'},
213 {"ignore-tab-expansion", 0, 0, 'E'},
214 {"inhibit-hunk-merge", 0, 0, INHIBIT_HUNK_MERGE_OPTION},
215 {"initial-tab", 0, 0, 'T'},
216 {"label", 1, 0, 'L'},
217 {"left-column", 0, 0, LEFT_COLUMN_OPTION},
218 {"line-format", 1, 0, LINE_FORMAT_OPTION},
219 {"minimal", 0, 0, 'd'},
220 {"new-file", 0, 0, 'N'},
221 {"new-group-format", 1, 0, NEW_GROUP_FORMAT_OPTION},
222 {"new-line-format", 1, 0, NEW_LINE_FORMAT_OPTION},
223 {"no-ignore-file-name-case", 0, 0, NO_IGNORE_FILE_NAME_CASE_OPTION},
224 {"normal", 0, 0, NORMAL_OPTION},
225 {"old-group-format", 1, 0, OLD_GROUP_FORMAT_OPTION},
226 {"old-line-format", 1, 0, OLD_LINE_FORMAT_OPTION},
227 {"paginate", 0, 0, 'l'},
228 {"rcs", 0, 0, 'n'},
229 {"recursive", 0, 0, 'r'},
230 {"report-identical-files", 0, 0, 's'},
231 {"sdiff-merge-assist", 0, 0, SDIFF_MERGE_ASSIST_OPTION},
232 {"show-c-function", 0, 0, 'p'},
233 {"show-function-line", 1, 0, 'F'},
234 {"side-by-side", 0, 0, 'y'},
235 {"speed-large-files", 0, 0, 'H'},
236 {"starting-file", 1, 0, 'S'},
237 {"strip-trailing-cr", 0, 0, STRIP_TRAILING_CR_OPTION},
238 {"suppress-common-lines", 0, 0, SUPPRESS_COMMON_LINES_OPTION},
239 {"text", 0, 0, 'a'},
240 {"to-file", 1, 0, TO_FILE_OPTION},
241 {"unchanged-group-format", 1, 0, UNCHANGED_GROUP_FORMAT_OPTION},
242 {"unchanged-line-format", 1, 0, UNCHANGED_LINE_FORMAT_OPTION},
243 {"unidirectional-new-file", 0, 0, 'P'},
244 {"unified", 2, 0, 'U'},
245 {"version", 0, 0, 'v'},
246 {"width", 1, 0, 'W'},
247 {0, 0, 0, 0}
248 };
249
250 int
main(int argc,char ** argv)251 main (int argc, char **argv)
252 {
253 int exit_status = EXIT_SUCCESS;
254 int c;
255 int i;
256 int prev = -1;
257 lin ocontext = -1;
258 bool explicit_context = 0;
259 int width = 0;
260 bool show_c_function = 0;
261 char const *from_file = 0;
262 char const *to_file = 0;
263 uintmax_t numval;
264 char *numend;
265
266 /* Do our initializations. */
267 exit_failure = 2;
268 initialize_main (&argc, &argv);
269 program_name = argv[0];
270 setlocale (LC_ALL, "");
271 bindtextdomain (PACKAGE, LOCALEDIR);
272 textdomain (PACKAGE);
273 c_stack_action (c_stack_die);
274 function_regexp_list.buf = &function_regexp;
275 ignore_regexp_list.buf = &ignore_regexp;
276 re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING);
277 excluded = new_exclude ();
278
279 /* Decode the options. */
280
281 while ((c = getopt_long (argc, argv, shortopts, longopts, 0)) != -1)
282 {
283 switch (c)
284 {
285 case 0:
286 break;
287
288 case '0':
289 case '1':
290 case '2':
291 case '3':
292 case '4':
293 case '5':
294 case '6':
295 case '7':
296 case '8':
297 case '9':
298 if (! ISDIGIT (prev))
299 ocontext = c - '0';
300 else if (LIN_MAX / 10 < ocontext
301 || ((ocontext = 10 * ocontext + c - '0') < 0))
302 ocontext = LIN_MAX;
303 break;
304
305 case 'a':
306 text = 1;
307 break;
308
309 case 'b':
310 if (ignore_white_space < IGNORE_SPACE_CHANGE)
311 ignore_white_space = IGNORE_SPACE_CHANGE;
312 break;
313
314 case 'B':
315 ignore_blank_lines = 1;
316 break;
317
318 case 'C': /* +context[=lines] */
319 case 'U': /* +unified[=lines] */
320 {
321 if (optarg)
322 {
323 numval = strtoumax (optarg, &numend, 10);
324 if (*numend)
325 try_help ("invalid context length `%s'", optarg);
326 if (LIN_MAX < numval)
327 numval = LIN_MAX;
328 }
329 else
330 numval = 3;
331
332 specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT);
333 if (context < numval)
334 context = numval;
335 explicit_context = 1;
336 }
337 break;
338
339 case 'c':
340 specify_style (OUTPUT_CONTEXT);
341 if (context < 3)
342 context = 3;
343 break;
344
345 case 'd':
346 minimal = 1;
347 break;
348
349 case 'D':
350 specify_style (OUTPUT_IFDEF);
351 {
352 static char const C_ifdef_group_formats[] =
353 "%%=%c#ifndef %s\n%%<#endif /* ! %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n";
354 char *b = xmalloc (sizeof C_ifdef_group_formats
355 + 7 * strlen (optarg) - 14 /* 7*"%s" */
356 - 8 /* 5*"%%" + 3*"%c" */);
357 sprintf (b, C_ifdef_group_formats,
358 0,
359 optarg, optarg, 0,
360 optarg, optarg, 0,
361 optarg, optarg, optarg);
362 for (i = 0; i < sizeof group_format / sizeof *group_format; i++)
363 {
364 specify_value (&group_format[i], b, "-D");
365 b += strlen (b) + 1;
366 }
367 }
368 break;
369
370 case 'e':
371 specify_style (OUTPUT_ED);
372 break;
373
374 case 'E':
375 if (ignore_white_space < IGNORE_TAB_EXPANSION)
376 ignore_white_space = IGNORE_TAB_EXPANSION;
377 break;
378
379 case 'f':
380 specify_style (OUTPUT_FORWARD_ED);
381 break;
382
383 case 'F':
384 add_regexp (&function_regexp_list, optarg);
385 break;
386
387 case 'h':
388 /* Split the files into chunks for faster processing.
389 Usually does not change the result.
390
391 This currently has no effect. */
392 break;
393
394 case 'H':
395 speed_large_files = 1;
396 break;
397
398 case 'i':
399 ignore_case = 1;
400 break;
401
402 case 'I':
403 add_regexp (&ignore_regexp_list, optarg);
404 break;
405
406 case 'l':
407 if (!pr_program[0])
408 try_help ("pagination not supported on this host", 0);
409 paginate = 1;
410 #ifdef SIGCHLD
411 /* Pagination requires forking and waiting, and
412 System V fork+wait does not work if SIGCHLD is ignored. */
413 signal (SIGCHLD, SIG_DFL);
414 #endif
415 break;
416
417 case 'L':
418 if (!file_label[0])
419 file_label[0] = optarg;
420 else if (!file_label[1])
421 file_label[1] = optarg;
422 else
423 fatal ("too many file label options");
424 break;
425
426 case 'n':
427 specify_style (OUTPUT_RCS);
428 break;
429
430 case 'N':
431 new_file = 1;
432 break;
433
434 case 'p':
435 show_c_function = 1;
436 add_regexp (&function_regexp_list, "^[[:alpha:]$_]");
437 break;
438
439 case 'P':
440 unidirectional_new_file = 1;
441 break;
442
443 case 'q':
444 brief = 1;
445 break;
446
447 case 'r':
448 recursive = 1;
449 break;
450
451 case 's':
452 report_identical_files = 1;
453 break;
454
455 case 'S':
456 specify_value (&starting_file, optarg, "-S");
457 break;
458
459 case 't':
460 expand_tabs = 1;
461 break;
462
463 case 'T':
464 initial_tab = 1;
465 break;
466
467 case 'u':
468 specify_style (OUTPUT_UNIFIED);
469 if (context < 3)
470 context = 3;
471 break;
472
473 case 'v':
474 printf ("diff %s\n%s\n\n%s\n\n%s\n",
475 version_string, copyright_string,
476 _(free_software_msgid), _(authorship_msgid));
477 check_stdout ();
478 return EXIT_SUCCESS;
479
480 case 'w':
481 ignore_white_space = IGNORE_ALL_SPACE;
482 break;
483
484 case 'x':
485 add_exclude (excluded, optarg, exclude_options ());
486 break;
487
488 case 'X':
489 if (add_exclude_file (add_exclude, excluded, optarg,
490 exclude_options (), '\n'))
491 pfatal_with_name (optarg);
492 break;
493
494 case 'y':
495 specify_style (OUTPUT_SDIFF);
496 break;
497
498 case 'W':
499 numval = strtoumax (optarg, &numend, 10);
500 if (! (0 < numval && numval <= INT_MAX) || *numend)
501 try_help ("invalid width `%s'", optarg);
502 if (width != numval)
503 {
504 if (width)
505 fatal ("conflicting width options");
506 width = numval;
507 }
508 break;
509
510 case BINARY_OPTION:
511 #if HAVE_SETMODE_DOS
512 binary = 1;
513 set_binary_mode (STDOUT_FILENO, 1);
514 #endif
515 break;
516
517 case FROM_FILE_OPTION:
518 specify_value (&from_file, optarg, "--from-file");
519 break;
520
521 case HELP_OPTION:
522 usage ();
523 check_stdout ();
524 return EXIT_SUCCESS;
525
526 case HORIZON_LINES_OPTION:
527 numval = strtoumax (optarg, &numend, 10);
528 if (*numend)
529 try_help ("invalid horizon length `%s'", optarg);
530 horizon_lines = MAX (horizon_lines, MIN (numval, LIN_MAX));
531 break;
532
533 case IGNORE_FILE_NAME_CASE_OPTION:
534 ignore_file_name_case = 1;
535 break;
536
537 case INHIBIT_HUNK_MERGE_OPTION:
538 /* This option is obsolete, but accept it for backward
539 compatibility. */
540 break;
541
542 case LEFT_COLUMN_OPTION:
543 left_column = 1;
544 break;
545
546 case LINE_FORMAT_OPTION:
547 specify_style (OUTPUT_IFDEF);
548 for (i = 0; i < sizeof line_format / sizeof *line_format; i++)
549 specify_value (&line_format[i], optarg, "--line-format");
550 break;
551
552 case NO_IGNORE_FILE_NAME_CASE_OPTION:
553 ignore_file_name_case = 0;
554 break;
555
556 case NORMAL_OPTION:
557 specify_style (OUTPUT_NORMAL);
558 break;
559
560 case SDIFF_MERGE_ASSIST_OPTION:
561 specify_style (OUTPUT_SDIFF);
562 sdiff_merge_assist = 1;
563 break;
564
565 case STRIP_TRAILING_CR_OPTION:
566 strip_trailing_cr = 1;
567 break;
568
569 case SUPPRESS_COMMON_LINES_OPTION:
570 suppress_common_lines = 1;
571 break;
572
573 case TO_FILE_OPTION:
574 specify_value (&to_file, optarg, "--to-file");
575 break;
576
577 case UNCHANGED_LINE_FORMAT_OPTION:
578 case OLD_LINE_FORMAT_OPTION:
579 case NEW_LINE_FORMAT_OPTION:
580 specify_style (OUTPUT_IFDEF);
581 c -= UNCHANGED_LINE_FORMAT_OPTION;
582 specify_value (&line_format[c], optarg, line_format_option[c]);
583 break;
584
585 case UNCHANGED_GROUP_FORMAT_OPTION:
586 case OLD_GROUP_FORMAT_OPTION:
587 case NEW_GROUP_FORMAT_OPTION:
588 case CHANGED_GROUP_FORMAT_OPTION:
589 specify_style (OUTPUT_IFDEF);
590 c -= UNCHANGED_GROUP_FORMAT_OPTION;
591 specify_value (&group_format[c], optarg, group_format_option[c]);
592 break;
593
594 default:
595 try_help (0, 0);
596 }
597 prev = c;
598 }
599
600 if (output_style == OUTPUT_UNSPECIFIED)
601 {
602 if (show_c_function)
603 {
604 specify_style (OUTPUT_CONTEXT);
605 if (ocontext < 0)
606 context = 3;
607 }
608 else
609 specify_style (OUTPUT_NORMAL);
610 }
611
612 if (output_style != OUTPUT_CONTEXT || hard_locale (LC_TIME))
613 time_format = "%Y-%m-%d %H:%M:%S.%N %z";
614 else
615 {
616 /* See POSIX 1003.1-2001 for this format. */
617 time_format = "%a %b %e %T %Y";
618 }
619
620 if (0 <= ocontext)
621 {
622 bool modern_usage = 200112 <= posix2_version ();
623
624 if ((output_style == OUTPUT_CONTEXT
625 || output_style == OUTPUT_UNIFIED)
626 && (context < ocontext
627 || (ocontext < context && ! explicit_context)))
628 {
629 if (modern_usage)
630 {
631 error (0, 0,
632 _("`-%ld' option is obsolete; use `-%c %ld'"),
633 (long) ocontext,
634 output_style == OUTPUT_CONTEXT ? 'C' : 'U',
635 (long) ocontext);
636 try_help (0, 0);
637 }
638 context = ocontext;
639 }
640 else
641 {
642 if (modern_usage)
643 {
644 error (0, 0, _("`-%ld' option is obsolete; omit it"),
645 (long) ocontext);
646 try_help (0, 0);
647 }
648 }
649 }
650
651 {
652 /*
653 * We maximize first the half line width, and then the gutter width,
654 * according to the following constraints:
655 * 1. Two half lines plus a gutter must fit in a line.
656 * 2. If the half line width is nonzero:
657 * a. The gutter width is at least GUTTER_WIDTH_MINIMUM.
658 * b. If tabs are not expanded to spaces,
659 * a half line plus a gutter is an integral number of tabs,
660 * so that tabs in the right column line up.
661 */
662 unsigned int t = expand_tabs ? 1 : TAB_WIDTH;
663 int w = width ? width : 130;
664 int off = (w + t + GUTTER_WIDTH_MINIMUM) / (2 * t) * t;
665 sdiff_half_width = MAX (0, MIN (off - GUTTER_WIDTH_MINIMUM, w - off)),
666 sdiff_column2_offset = sdiff_half_width ? off : w;
667 }
668
669 /* Make the horizon at least as large as the context, so that
670 shift_boundaries has more freedom to shift the first and last hunks. */
671 if (horizon_lines < context)
672 horizon_lines = context;
673
674 summarize_regexp_list (&function_regexp_list);
675 summarize_regexp_list (&ignore_regexp_list);
676
677 if (output_style == OUTPUT_IFDEF)
678 {
679 for (i = 0; i < sizeof line_format / sizeof *line_format; i++)
680 if (!line_format[i])
681 line_format[i] = "%l\n";
682 if (!group_format[OLD])
683 group_format[OLD]
684 = group_format[CHANGED] ? group_format[CHANGED] : "%<";
685 if (!group_format[NEW])
686 group_format[NEW]
687 = group_format[CHANGED] ? group_format[CHANGED] : "%>";
688 if (!group_format[UNCHANGED])
689 group_format[UNCHANGED] = "%=";
690 if (!group_format[CHANGED])
691 group_format[CHANGED] = concat (group_format[OLD],
692 group_format[NEW], "");
693 }
694
695 no_diff_means_no_output =
696 (output_style == OUTPUT_IFDEF ?
697 (!*group_format[UNCHANGED]
698 || (strcmp (group_format[UNCHANGED], "%=") == 0
699 && !*line_format[UNCHANGED]))
700 : (output_style != OUTPUT_SDIFF) | suppress_common_lines);
701
702 files_can_be_treated_as_binary =
703 (brief
704 & ~ (ignore_blank_lines | ignore_case | strip_trailing_cr
705 | (ignore_regexp_list.regexps || ignore_white_space)));
706
707 switch_string = option_list (argv + 1, optind - 1);
708
709 if (from_file)
710 {
711 if (to_file)
712 fatal ("--from-file and --to-file both specified");
713 else
714 for (; optind < argc; optind++)
715 {
716 int status = compare_files ((struct comparison *) 0,
717 from_file, argv[optind]);
718 if (exit_status < status)
719 exit_status = status;
720 }
721 }
722 else
723 {
724 if (to_file)
725 for (; optind < argc; optind++)
726 {
727 int status = compare_files ((struct comparison *) 0,
728 argv[optind], to_file);
729 if (exit_status < status)
730 exit_status = status;
731 }
732 else
733 {
734 if (argc - optind != 2)
735 {
736 if (argc - optind < 2)
737 try_help ("missing operand after `%s'", argv[argc - 1]);
738 else
739 try_help ("extra operand `%s'", argv[optind + 2]);
740 }
741
742 exit_status = compare_files ((struct comparison *) 0,
743 argv[optind], argv[optind + 1]);
744 }
745 }
746
747 /* Print any messages that were saved up for last. */
748 print_message_queue ();
749
750 check_stdout ();
751 exit (exit_status);
752 return exit_status;
753 }
754
755 /* Append to REGLIST the regexp PATTERN. */
756
757 static void
add_regexp(struct regexp_list * reglist,char const * pattern)758 add_regexp (struct regexp_list *reglist, char const *pattern)
759 {
760 size_t patlen = strlen (pattern);
761 char const *m = re_compile_pattern (pattern, patlen, reglist->buf);
762
763 if (m != 0)
764 error (0, 0, "%s: %s", pattern, m);
765 else
766 {
767 char *regexps = reglist->regexps;
768 size_t len = reglist->len;
769 bool multiple_regexps = reglist->multiple_regexps = regexps != 0;
770 size_t newlen = reglist->len = len + 2 * multiple_regexps + patlen;
771 size_t size = reglist->size;
772
773 if (size <= newlen)
774 {
775 if (!size)
776 size = 1;
777
778 do size *= 2;
779 while (size <= newlen);
780
781 reglist->size = size;
782 reglist->regexps = regexps = xrealloc (regexps, size);
783 }
784 if (multiple_regexps)
785 {
786 regexps[len++] = '\\';
787 regexps[len++] = '|';
788 }
789 memcpy (regexps + len, pattern, patlen + 1);
790 }
791 }
792
793 /* Ensure that REGLIST represents the disjunction of its regexps.
794 This is done here, rather than earlier, to avoid O(N^2) behavior. */
795
796 static void
summarize_regexp_list(struct regexp_list * reglist)797 summarize_regexp_list (struct regexp_list *reglist)
798 {
799 if (reglist->regexps)
800 {
801 /* At least one regexp was specified. Allocate a fastmap for it. */
802 reglist->buf->fastmap = xmalloc (1 << CHAR_BIT);
803 if (reglist->multiple_regexps)
804 {
805 /* Compile the disjunction of the regexps.
806 (If just one regexp was specified, it is already compiled.) */
807 char const *m = re_compile_pattern (reglist->regexps, reglist->len,
808 reglist->buf);
809 if (m != 0)
810 error (EXIT_TROUBLE, 0, "%s: %s", reglist->regexps, m);
811 }
812 }
813 }
814
815 static void
try_help(char const * reason_msgid,char const * operand)816 try_help (char const *reason_msgid, char const *operand)
817 {
818 if (reason_msgid)
819 error (0, 0, _(reason_msgid), operand);
820 error (EXIT_TROUBLE, 0, _("Try `%s --help' for more information."),
821 program_name);
822 abort ();
823 }
824
825 static void
check_stdout(void)826 check_stdout (void)
827 {
828 if (ferror (stdout))
829 fatal ("write failed");
830 else if (fclose (stdout) != 0)
831 pfatal_with_name (_("standard output"));
832 }
833
834 static char const * const option_help_msgid[] = {
835 N_("Compare files line by line."),
836 "",
837 N_("-i --ignore-case Ignore case differences in file contents."),
838 N_("--ignore-file-name-case Ignore case when comparing file names."),
839 N_("--no-ignore-file-name-case Consider case when comparing file names."),
840 N_("-E --ignore-tab-expansion Ignore changes due to tab expansion."),
841 N_("-b --ignore-space-change Ignore changes in the amount of white space."),
842 N_("-w --ignore-all-space Ignore all white space."),
843 N_("-B --ignore-blank-lines Ignore changes whose lines are all blank."),
844 N_("-I RE --ignore-matching-lines=RE Ignore changes whose lines all match RE."),
845 N_("--strip-trailing-cr Strip trailing carriage return on input."),
846 #if HAVE_SETMODE_DOS
847 N_("--binary Read and write data in binary mode."),
848 #endif
849 N_("-a --text Treat all files as text."),
850 "",
851 N_("-c -C NUM --context[=NUM] Output NUM (default 3) lines of copied context.\n\
852 -u -U NUM --unified[=NUM] Output NUM (default 3) lines of unified context.\n\
853 --label LABEL Use LABEL instead of file name.\n\
854 -p --show-c-function Show which C function each change is in.\n\
855 -F RE --show-function-line=RE Show the most recent line matching RE."),
856 N_("-q --brief Output only whether files differ."),
857 N_("-e --ed Output an ed script."),
858 N_("--normal Output a normal diff."),
859 N_("-n --rcs Output an RCS format diff."),
860 N_("-y --side-by-side Output in two columns.\n\
861 -W NUM --width=NUM Output at most NUM (default 130) print columns.\n\
862 --left-column Output only the left column of common lines.\n\
863 --suppress-common-lines Do not output common lines."),
864 N_("-D NAME --ifdef=NAME Output merged file to show `#ifdef NAME' diffs."),
865 N_("--GTYPE-group-format=GFMT Similar, but format GTYPE input groups with GFMT."),
866 N_("--line-format=LFMT Similar, but format all input lines with LFMT."),
867 N_("--LTYPE-line-format=LFMT Similar, but format LTYPE input lines with LFMT."),
868 N_(" LTYPE is `old', `new', or `unchanged'. GTYPE is LTYPE or `changed'."),
869 N_(" GFMT may contain:\n\
870 %< lines from FILE1\n\
871 %> lines from FILE2\n\
872 %= lines common to FILE1 and FILE2\n\
873 %[-][WIDTH][.[PREC]]{doxX}LETTER printf-style spec for LETTER\n\
874 LETTERs are as follows for new group, lower case for old group:\n\
875 F first line number\n\
876 L last line number\n\
877 N number of lines = L-F+1\n\
878 E F-1\n\
879 M L+1"),
880 N_(" LFMT may contain:\n\
881 %L contents of line\n\
882 %l contents of line, excluding any trailing newline\n\
883 %[-][WIDTH][.[PREC]]{doxX}n printf-style spec for input line number"),
884 N_(" Either GFMT or LFMT may contain:\n\
885 %% %\n\
886 %c'C' the single character C\n\
887 %c'\\OOO' the character with octal code OOO"),
888 "",
889 N_("-l --paginate Pass the output through `pr' to paginate it."),
890 N_("-t --expand-tabs Expand tabs to spaces in output."),
891 N_("-T --initial-tab Make tabs line up by prepending a tab."),
892 "",
893 N_("-r --recursive Recursively compare any subdirectories found."),
894 N_("-N --new-file Treat absent files as empty."),
895 N_("--unidirectional-new-file Treat absent first files as empty."),
896 N_("-s --report-identical-files Report when two files are the same."),
897 N_("-x PAT --exclude=PAT Exclude files that match PAT."),
898 N_("-X FILE --exclude-from=FILE Exclude files that match any pattern in FILE."),
899 N_("-S FILE --starting-file=FILE Start with FILE when comparing directories."),
900 N_("--from-file=FILE1 Compare FILE1 to all operands. FILE1 can be a directory."),
901 N_("--to-file=FILE2 Compare all operands to FILE2. FILE2 can be a directory."),
902 "",
903 N_("--horizon-lines=NUM Keep NUM lines of the common prefix and suffix."),
904 N_("-d --minimal Try hard to find a smaller set of changes."),
905 N_("--speed-large-files Assume large files and many scattered small changes."),
906 "",
907 N_("-v --version Output version info."),
908 N_("--help Output this help."),
909 "",
910 N_("FILES are `FILE1 FILE2' or `DIR1 DIR2' or `DIR FILE...' or `FILE... DIR'."),
911 N_("If --from-file or --to-file is given, there are no restrictions on FILES."),
912 N_("If a FILE is `-', read standard input."),
913 "",
914 N_("Report bugs to <bug-gnu-utils@gnu.org>."),
915 0
916 };
917
918 static void
usage(void)919 usage (void)
920 {
921 char const * const *p;
922
923 printf (_("Usage: %s [OPTION]... FILES\n"), program_name);
924
925 for (p = option_help_msgid; *p; p++)
926 {
927 if (!**p)
928 putchar ('\n');
929 else
930 {
931 char const *msg = _(*p);
932 char const *nl;
933 while ((nl = strchr (msg, '\n')))
934 {
935 int msglen = nl + 1 - msg;
936 printf (" %.*s", msglen, msg);
937 msg = nl + 1;
938 }
939
940 printf (" %s\n" + 2 * (*msg != ' ' && *msg != '-'), msg);
941 }
942 }
943 }
944
945 /* Set VAR to VALUE, reporting an OPTION error if this is a
946 conflict. */
947 static void
specify_value(char const ** var,char const * value,char const * option)948 specify_value (char const **var, char const *value, char const *option)
949 {
950 if (*var && strcmp (*var, value) != 0)
951 {
952 error (0, 0, _("conflicting %s option value `%s'"), option, value);
953 try_help (0, 0);
954 }
955 *var = value;
956 }
957
958 /* Set the output style to STYLE, diagnosing conflicts. */
959 static void
specify_style(enum output_style style)960 specify_style (enum output_style style)
961 {
962 if (output_style != style)
963 {
964 if (output_style != OUTPUT_UNSPECIFIED)
965 try_help ("conflicting output style options", 0);
966 output_style = style;
967 }
968 }
969
970 static char const *
filetype(struct stat const * st)971 filetype (struct stat const *st)
972 {
973 /* See POSIX 1003.1-2001 for these formats.
974
975 To keep diagnostics grammatical in English, the returned string
976 must start with a consonant. */
977
978 if (S_ISREG (st->st_mode))
979 return st->st_size == 0 ? _("regular empty file") : _("regular file");
980
981 if (S_ISDIR (st->st_mode)) return _("directory");
982
983 #ifdef S_ISBLK
984 if (S_ISBLK (st->st_mode)) return _("block special file");
985 #endif
986 #ifdef S_ISCHR
987 if (S_ISCHR (st->st_mode)) return _("character special file");
988 #endif
989 #ifdef S_ISFIFO
990 if (S_ISFIFO (st->st_mode)) return _("fifo");
991 #endif
992 /* S_ISLNK is impossible with `fstat' and `stat'. */
993 #ifdef S_ISSOCK
994 if (S_ISSOCK (st->st_mode)) return _("socket");
995 #endif
996 #ifdef S_TYPEISMQ
997 if (S_TYPEISMQ (st)) return _("message queue");
998 #endif
999 #ifdef S_TYPEISSEM
1000 if (S_TYPEISSEM (st)) return _("semaphore");
1001 #endif
1002 #ifdef S_TYPEISSHM
1003 if (S_TYPEISSHM (st)) return _("shared memory object");
1004 #endif
1005 #ifdef S_TYPEISTMO
1006 if (S_TYPEISTMO (st)) return _("typed memory object");
1007 #endif
1008
1009 return _("weird file");
1010 }
1011
1012 /* Set the last-modified time of *ST to be the current time. */
1013
1014 static void
set_mtime_to_now(struct stat * st)1015 set_mtime_to_now (struct stat *st)
1016 {
1017 #ifdef ST_MTIM_NSEC
1018
1019 # if HAVE_CLOCK_GETTIME && defined CLOCK_REALTIME
1020 if (clock_gettime (CLOCK_REALTIME, &st->st_mtim) == 0)
1021 return;
1022 # endif
1023
1024 # if HAVE_GETTIMEOFDAY
1025 {
1026 struct timeval timeval;
1027 if (gettimeofday (&timeval, NULL) == 0)
1028 {
1029 st->st_mtime = timeval.tv_sec;
1030 st->st_mtim.ST_MTIM_NSEC = timeval.tv_usec * 1000;
1031 return;
1032 }
1033 }
1034 # endif
1035
1036 #endif /* ST_MTIM_NSEC */
1037
1038 time (&st->st_mtime);
1039 }
1040
1041 /* Compare two files (or dirs) with parent comparison PARENT
1042 and names NAME0 and NAME1.
1043 (If PARENT is 0, then the first name is just NAME0, etc.)
1044 This is self-contained; it opens the files and closes them.
1045
1046 Value is EXIT_SUCCESS if files are the same, EXIT_FAILURE if
1047 different, EXIT_TROUBLE if there is a problem opening them. */
1048
1049 static int
compare_files(struct comparison const * parent,char const * name0,char const * name1)1050 compare_files (struct comparison const *parent,
1051 char const *name0,
1052 char const *name1)
1053 {
1054 struct comparison cmp;
1055 #define DIR_P(f) (S_ISDIR (cmp.file[f].stat.st_mode) != 0)
1056 register int f;
1057 int status = EXIT_SUCCESS;
1058 bool same_files;
1059 char *free0, *free1;
1060
1061 /* If this is directory comparison, perhaps we have a file
1062 that exists only in one of the directories.
1063 If so, just print a message to that effect. */
1064
1065 if (! ((name0 && name1)
1066 || (unidirectional_new_file && name1)
1067 || new_file))
1068 {
1069 char const *name = name0 == 0 ? name1 : name0;
1070 char const *dir = parent->file[name0 == 0].name;
1071
1072 /* See POSIX 1003.1-2001 for this format. */
1073 message ("Only in %s: %s\n", dir, name);
1074
1075 /* Return EXIT_FAILURE so that diff_dirs will return
1076 EXIT_FAILURE ("some files differ"). */
1077 return EXIT_FAILURE;
1078 }
1079
1080 memset (cmp.file, 0, sizeof cmp.file);
1081 cmp.parent = parent;
1082
1083 /* cmp.file[f].desc markers */
1084 #define NONEXISTENT (-1) /* nonexistent file */
1085 #define UNOPENED (-2) /* unopened file (e.g. directory) */
1086 #define ERRNO_ENCODE(errno) (-3 - (errno)) /* encoded errno value */
1087
1088 #define ERRNO_DECODE(desc) (-3 - (desc)) /* inverse of ERRNO_ENCODE */
1089
1090 cmp.file[0].desc = name0 == 0 ? NONEXISTENT : UNOPENED;
1091 cmp.file[1].desc = name1 == 0 ? NONEXISTENT : UNOPENED;
1092
1093 /* Now record the full name of each file, including nonexistent ones. */
1094
1095 if (name0 == 0)
1096 name0 = name1;
1097 if (name1 == 0)
1098 name1 = name0;
1099
1100 if (!parent)
1101 {
1102 free0 = 0;
1103 free1 = 0;
1104 cmp.file[0].name = name0;
1105 cmp.file[1].name = name1;
1106 }
1107 else
1108 {
1109 cmp.file[0].name = free0
1110 = dir_file_pathname (parent->file[0].name, name0);
1111 cmp.file[1].name = free1
1112 = dir_file_pathname (parent->file[1].name, name1);
1113 }
1114
1115 /* Stat the files. */
1116
1117 for (f = 0; f < 2; f++)
1118 {
1119 if (cmp.file[f].desc != NONEXISTENT)
1120 {
1121 if (f && file_name_cmp (cmp.file[f].name, cmp.file[0].name) == 0)
1122 {
1123 cmp.file[f].desc = cmp.file[0].desc;
1124 cmp.file[f].stat = cmp.file[0].stat;
1125 }
1126 else if (strcmp (cmp.file[f].name, "-") == 0)
1127 {
1128 cmp.file[f].desc = STDIN_FILENO;
1129 if (fstat (STDIN_FILENO, &cmp.file[f].stat) != 0)
1130 cmp.file[f].desc = ERRNO_ENCODE (errno);
1131 else
1132 {
1133 if (S_ISREG (cmp.file[f].stat.st_mode))
1134 {
1135 off_t pos = lseek (STDIN_FILENO, (off_t) 0, SEEK_CUR);
1136 if (pos < 0)
1137 cmp.file[f].desc = ERRNO_ENCODE (errno);
1138 else
1139 cmp.file[f].stat.st_size =
1140 MAX (0, cmp.file[f].stat.st_size - pos);
1141 }
1142
1143 /* POSIX 1003.1-2001 requires current time for
1144 stdin. */
1145 set_mtime_to_now (&cmp.file[f].stat);
1146 }
1147 }
1148 else if (stat (cmp.file[f].name, &cmp.file[f].stat) != 0)
1149 cmp.file[f].desc = ERRNO_ENCODE (errno);
1150 }
1151 }
1152
1153 /* Mark files as nonexistent at the top level as needed for -N and
1154 --unidirectional-new-file. */
1155 if (! parent)
1156 {
1157 if ((new_file | unidirectional_new_file)
1158 && cmp.file[0].desc == ERRNO_ENCODE (ENOENT)
1159 && cmp.file[1].desc == UNOPENED)
1160 cmp.file[0].desc = NONEXISTENT;
1161
1162 if (new_file
1163 && cmp.file[0].desc == UNOPENED
1164 && cmp.file[1].desc == ERRNO_ENCODE (ENOENT))
1165 cmp.file[1].desc = NONEXISTENT;
1166 }
1167
1168 for (f = 0; f < 2; f++)
1169 if (cmp.file[f].desc == NONEXISTENT)
1170 cmp.file[f].stat.st_mode = cmp.file[1 - f].stat.st_mode;
1171
1172 for (f = 0; f < 2; f++)
1173 {
1174 int e = ERRNO_DECODE (cmp.file[f].desc);
1175 if (0 <= e)
1176 {
1177 errno = e;
1178 perror_with_name (cmp.file[f].name);
1179 status = EXIT_TROUBLE;
1180 }
1181 }
1182
1183 if (status == EXIT_SUCCESS && ! parent && DIR_P (0) != DIR_P (1))
1184 {
1185 /* If one is a directory, and it was specified in the command line,
1186 use the file in that dir with the other file's basename. */
1187
1188 int fnm_arg = DIR_P (0);
1189 int dir_arg = 1 - fnm_arg;
1190 char const *fnm = cmp.file[fnm_arg].name;
1191 char const *dir = cmp.file[dir_arg].name;
1192 char const *filename = cmp.file[dir_arg].name = free0
1193 = dir_file_pathname (dir, base_name (fnm));
1194
1195 if (strcmp (fnm, "-") == 0)
1196 fatal ("cannot compare `-' to a directory");
1197
1198 if (stat (filename, &cmp.file[dir_arg].stat) != 0)
1199 {
1200 perror_with_name (filename);
1201 status = EXIT_TROUBLE;
1202 }
1203 }
1204
1205 if (status != EXIT_SUCCESS)
1206 {
1207 /* One of the files should exist but does not. */
1208 }
1209 else if ((same_files
1210 = (cmp.file[0].desc != NONEXISTENT
1211 && cmp.file[1].desc != NONEXISTENT
1212 && (same_special_file (&cmp.file[0].stat, &cmp.file[1].stat)
1213 || (0 < same_file (&cmp.file[0].stat, &cmp.file[1].stat)
1214 && same_file_attributes (&cmp.file[0].stat,
1215 &cmp.file[1].stat)))))
1216 && no_diff_means_no_output)
1217 {
1218 /* The two named files are actually the same physical file.
1219 We know they are identical without actually reading them. */
1220 }
1221 else if (DIR_P (0) & DIR_P (1))
1222 {
1223 if (output_style == OUTPUT_IFDEF)
1224 fatal ("-D option not supported with directories");
1225
1226 /* If both are directories, compare the files in them. */
1227
1228 if (parent && !recursive)
1229 {
1230 /* But don't compare dir contents one level down
1231 unless -r was specified.
1232 See POSIX 1003.1-2001 for this format. */
1233 message ("Common subdirectories: %s and %s\n",
1234 cmp.file[0].name, cmp.file[1].name);
1235 }
1236 else
1237 status = diff_dirs (&cmp, compare_files);
1238 }
1239 else if ((DIR_P (0) | DIR_P (1))
1240 || (parent
1241 && (! S_ISREG (cmp.file[0].stat.st_mode)
1242 || ! S_ISREG (cmp.file[1].stat.st_mode))))
1243 {
1244 if (cmp.file[0].desc == NONEXISTENT || cmp.file[1].desc == NONEXISTENT)
1245 {
1246 /* We have a subdirectory that exists only in one directory. */
1247
1248 if ((DIR_P (0) | DIR_P (1))
1249 && recursive
1250 && (new_file
1251 || (unidirectional_new_file
1252 && cmp.file[0].desc == NONEXISTENT)))
1253 status = diff_dirs (&cmp, compare_files);
1254 else
1255 {
1256 char const *dir
1257 = parent->file[cmp.file[0].desc == NONEXISTENT].name;
1258
1259 /* See POSIX 1003.1-2001 for this format. */
1260 message ("Only in %s: %s\n", dir, name0);
1261
1262 status = EXIT_FAILURE;
1263 }
1264 }
1265 else
1266 {
1267 /* We have two files that are not to be compared. */
1268
1269 /* See POSIX 1003.1-2001 for this format. */
1270 message5 ("File %s is a %s while file %s is a %s\n",
1271 file_label[0] ? file_label[0] : cmp.file[0].name,
1272 filetype (&cmp.file[0].stat),
1273 file_label[1] ? file_label[1] : cmp.file[1].name,
1274 filetype (&cmp.file[1].stat));
1275
1276 /* This is a difference. */
1277 status = EXIT_FAILURE;
1278 }
1279 }
1280 else if (files_can_be_treated_as_binary
1281 && cmp.file[0].stat.st_size != cmp.file[1].stat.st_size
1282 && (cmp.file[0].desc == NONEXISTENT
1283 || S_ISREG (cmp.file[0].stat.st_mode))
1284 && (cmp.file[1].desc == NONEXISTENT
1285 || S_ISREG (cmp.file[1].stat.st_mode)))
1286 {
1287 message ("Files %s and %s differ\n",
1288 file_label[0] ? file_label[0] : cmp.file[0].name,
1289 file_label[1] ? file_label[1] : cmp.file[1].name);
1290 status = EXIT_FAILURE;
1291 }
1292 else
1293 {
1294 /* Both exist and neither is a directory. */
1295
1296 /* Open the files and record their descriptors. */
1297
1298 if (cmp.file[0].desc == UNOPENED)
1299 if ((cmp.file[0].desc = open (cmp.file[0].name, O_RDONLY, 0)) < 0)
1300 {
1301 perror_with_name (cmp.file[0].name);
1302 status = EXIT_TROUBLE;
1303 }
1304 if (cmp.file[1].desc == UNOPENED)
1305 {
1306 if (same_files)
1307 cmp.file[1].desc = cmp.file[0].desc;
1308 else if ((cmp.file[1].desc = open (cmp.file[1].name, O_RDONLY, 0))
1309 < 0)
1310 {
1311 perror_with_name (cmp.file[1].name);
1312 status = EXIT_TROUBLE;
1313 }
1314 }
1315
1316 #if HAVE_SETMODE_DOS
1317 if (binary)
1318 for (f = 0; f < 2; f++)
1319 if (0 <= cmp.file[f].desc)
1320 set_binary_mode (cmp.file[f].desc, 1);
1321 #endif
1322
1323 /* Compare the files, if no error was found. */
1324
1325 if (status == EXIT_SUCCESS)
1326 status = diff_2_files (&cmp);
1327
1328 /* Close the file descriptors. */
1329
1330 if (0 <= cmp.file[0].desc && close (cmp.file[0].desc) != 0)
1331 {
1332 perror_with_name (cmp.file[0].name);
1333 status = EXIT_TROUBLE;
1334 }
1335 if (0 <= cmp.file[1].desc && cmp.file[0].desc != cmp.file[1].desc
1336 && close (cmp.file[1].desc) != 0)
1337 {
1338 perror_with_name (cmp.file[1].name);
1339 status = EXIT_TROUBLE;
1340 }
1341 }
1342
1343 /* Now the comparison has been done, if no error prevented it,
1344 and STATUS is the value this function will return. */
1345
1346 if (status == EXIT_SUCCESS)
1347 {
1348 if (report_identical_files && !DIR_P (0))
1349 message ("Files %s and %s are identical\n",
1350 file_label[0] ? file_label[0] : cmp.file[0].name,
1351 file_label[1] ? file_label[1] : cmp.file[1].name);
1352 }
1353 else
1354 {
1355 /* Flush stdout so that the user sees differences immediately.
1356 This can hurt performance, unfortunately. */
1357 if (fflush (stdout) != 0)
1358 pfatal_with_name (_("standard output"));
1359 }
1360
1361 if (free0)
1362 free (free0);
1363 if (free1)
1364 free (free1);
1365
1366 return status;
1367 }
1368