1 /* diff - compare files line by line 2 3 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1996, 1998, 2001, 2002, 4 2004 Free Software Foundation, Inc. 5 6 This file is part of GNU DIFF. 7 8 GNU DIFF is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 2, or (at your option) 11 any later version. 12 13 GNU DIFF is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 16 See the GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GNU DIFF; see the file COPYING. 20 If not, write to the Free Software Foundation, 21 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 22 23 #define GDIFF_MAIN 24 #include "diff.h" 25 #include "paths.h" 26 #include <c-stack.h> 27 #include <dirname.h> 28 #include <error.h> 29 #include <exclude.h> 30 #include <exit.h> 31 #include <exitfail.h> 32 #include <file-type.h> 33 #include <fnmatch.h> 34 #include <getopt.h> 35 #include <hard-locale.h> 36 #include <posixver.h> 37 #include <prepargs.h> 38 #include <quotesys.h> 39 #include <setmode.h> 40 #include <version-etc.h> 41 #include <xalloc.h> 42 43 #ifndef GUTTER_WIDTH_MINIMUM 44 # define GUTTER_WIDTH_MINIMUM 3 45 #endif 46 47 struct regexp_list 48 { 49 char *regexps; /* chars representing disjunction of the regexps */ 50 size_t len; /* chars used in `regexps' */ 51 size_t size; /* size malloc'ed for `regexps'; 0 if not malloc'ed */ 52 bool multiple_regexps;/* Does `regexps' represent a disjunction? */ 53 struct re_pattern_buffer *buf; 54 }; 55 56 static int compare_files (struct comparison const *, char const *, char const *); 57 static void add_regexp (struct regexp_list *, char const *); 58 static void summarize_regexp_list (struct regexp_list *); 59 static void specify_style (enum output_style); 60 static void specify_value (char const **, char const *, char const *); 61 static void try_help (char const *, char const *) __attribute__((noreturn)); 62 static void check_stdout (void); 63 static void usage (void); 64 65 /* If comparing directories, compare their common subdirectories 66 recursively. */ 67 static bool recursive; 68 69 /* In context diffs, show previous lines that match these regexps. */ 70 static struct regexp_list function_regexp_list; 71 72 /* Ignore changes affecting only lines that match these regexps. */ 73 static struct regexp_list ignore_regexp_list; 74 75 #if HAVE_SETMODE_DOS 76 /* Use binary I/O when reading and writing data (--binary). 77 On POSIX hosts, this has no effect. */ 78 static bool binary; 79 #else 80 enum { binary = true }; 81 #endif 82 83 /* When comparing directories, if a file appears only in one 84 directory, treat it as present but empty in the other (-N). 85 Then `patch' would create the file with appropriate contents. */ 86 static bool new_file; 87 88 /* When comparing directories, if a file appears only in the second 89 directory of the two, treat it as present but empty in the other 90 (--unidirectional-new-file). 91 Then `patch' would create the file with appropriate contents. */ 92 static bool unidirectional_new_file; 93 94 /* Report files compared that are the same (-s). 95 Normally nothing is output when that happens. */ 96 static bool report_identical_files; 97 98 99 /* Return a string containing the command options with which diff was invoked. 100 Spaces appear between what were separate ARGV-elements. 101 There is a space at the beginning but none at the end. 102 If there were no options, the result is an empty string. 103 104 Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT, 105 the length of that vector. */ 106 107 static char * 108 option_list (char **optionvec, int count) 109 { 110 int i; 111 size_t size = 1; 112 char *result; 113 char *p; 114 115 for (i = 0; i < count; i++) 116 size += 1 + quote_system_arg ((char *) 0, optionvec[i]); 117 118 p = result = xmalloc (size); 119 120 for (i = 0; i < count; i++) 121 { 122 *p++ = ' '; 123 p += quote_system_arg (p, optionvec[i]); 124 } 125 126 *p = 0; 127 return result; 128 } 129 130 131 /* Return an option value suitable for add_exclude. */ 132 133 static int 134 exclude_options (void) 135 { 136 return EXCLUDE_WILDCARDS | (ignore_file_name_case ? FNM_CASEFOLD : 0); 137 } 138 139 static char const shortopts[] = 140 "0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y"; 141 142 /* Values for long options that do not have single-letter equivalents. */ 143 enum 144 { 145 BINARY_OPTION = CHAR_MAX + 1, 146 FROM_FILE_OPTION, 147 HELP_OPTION, 148 HORIZON_LINES_OPTION, 149 IGNORE_FILE_NAME_CASE_OPTION, 150 INHIBIT_HUNK_MERGE_OPTION, 151 LEFT_COLUMN_OPTION, 152 LINE_FORMAT_OPTION, 153 NO_IGNORE_FILE_NAME_CASE_OPTION, 154 NORMAL_OPTION, 155 SDIFF_MERGE_ASSIST_OPTION, 156 STRIP_TRAILING_CR_OPTION, 157 SUPPRESS_COMMON_LINES_OPTION, 158 TABSIZE_OPTION, 159 TO_FILE_OPTION, 160 161 /* These options must be in sequence. */ 162 UNCHANGED_LINE_FORMAT_OPTION, 163 OLD_LINE_FORMAT_OPTION, 164 NEW_LINE_FORMAT_OPTION, 165 166 /* These options must be in sequence. */ 167 UNCHANGED_GROUP_FORMAT_OPTION, 168 OLD_GROUP_FORMAT_OPTION, 169 NEW_GROUP_FORMAT_OPTION, 170 CHANGED_GROUP_FORMAT_OPTION 171 }; 172 173 static char const group_format_option[][sizeof "--unchanged-group-format"] = 174 { 175 "--unchanged-group-format", 176 "--old-group-format", 177 "--new-group-format", 178 "--changed-group-format" 179 }; 180 181 static char const line_format_option[][sizeof "--unchanged-line-format"] = 182 { 183 "--unchanged-line-format", 184 "--old-line-format", 185 "--new-line-format" 186 }; 187 188 static struct option const longopts[] = 189 { 190 {"binary", 0, 0, BINARY_OPTION}, 191 {"brief", 0, 0, 'q'}, 192 {"changed-group-format", 1, 0, CHANGED_GROUP_FORMAT_OPTION}, 193 {"context", 2, 0, 'C'}, 194 {"ed", 0, 0, 'e'}, 195 {"exclude", 1, 0, 'x'}, 196 {"exclude-from", 1, 0, 'X'}, 197 {"expand-tabs", 0, 0, 't'}, 198 {"forward-ed", 0, 0, 'f'}, 199 {"from-file", 1, 0, FROM_FILE_OPTION}, 200 {"help", 0, 0, HELP_OPTION}, 201 {"horizon-lines", 1, 0, HORIZON_LINES_OPTION}, 202 {"ifdef", 1, 0, 'D'}, 203 {"ignore-all-space", 0, 0, 'w'}, 204 {"ignore-blank-lines", 0, 0, 'B'}, 205 {"ignore-case", 0, 0, 'i'}, 206 {"ignore-file-name-case", 0, 0, IGNORE_FILE_NAME_CASE_OPTION}, 207 {"ignore-matching-lines", 1, 0, 'I'}, 208 {"ignore-space-change", 0, 0, 'b'}, 209 {"ignore-tab-expansion", 0, 0, 'E'}, 210 {"inhibit-hunk-merge", 0, 0, INHIBIT_HUNK_MERGE_OPTION}, 211 {"initial-tab", 0, 0, 'T'}, 212 {"label", 1, 0, 'L'}, 213 {"left-column", 0, 0, LEFT_COLUMN_OPTION}, 214 {"line-format", 1, 0, LINE_FORMAT_OPTION}, 215 {"minimal", 0, 0, 'd'}, 216 {"new-file", 0, 0, 'N'}, 217 {"new-group-format", 1, 0, NEW_GROUP_FORMAT_OPTION}, 218 {"new-line-format", 1, 0, NEW_LINE_FORMAT_OPTION}, 219 {"no-ignore-file-name-case", 0, 0, NO_IGNORE_FILE_NAME_CASE_OPTION}, 220 {"normal", 0, 0, NORMAL_OPTION}, 221 {"old-group-format", 1, 0, OLD_GROUP_FORMAT_OPTION}, 222 {"old-line-format", 1, 0, OLD_LINE_FORMAT_OPTION}, 223 {"paginate", 0, 0, 'l'}, 224 {"rcs", 0, 0, 'n'}, 225 {"recursive", 0, 0, 'r'}, 226 {"report-identical-files", 0, 0, 's'}, 227 {"sdiff-merge-assist", 0, 0, SDIFF_MERGE_ASSIST_OPTION}, 228 {"show-c-function", 0, 0, 'p'}, 229 {"show-function-line", 1, 0, 'F'}, 230 {"side-by-side", 0, 0, 'y'}, 231 {"speed-large-files", 0, 0, 'H'}, 232 {"starting-file", 1, 0, 'S'}, 233 {"strip-trailing-cr", 0, 0, STRIP_TRAILING_CR_OPTION}, 234 {"suppress-common-lines", 0, 0, SUPPRESS_COMMON_LINES_OPTION}, 235 {"tabsize", 1, 0, TABSIZE_OPTION}, 236 {"text", 0, 0, 'a'}, 237 {"to-file", 1, 0, TO_FILE_OPTION}, 238 {"unchanged-group-format", 1, 0, UNCHANGED_GROUP_FORMAT_OPTION}, 239 {"unchanged-line-format", 1, 0, UNCHANGED_LINE_FORMAT_OPTION}, 240 {"unidirectional-new-file", 0, 0, 'P'}, 241 {"unified", 2, 0, 'U'}, 242 {"version", 0, 0, 'v'}, 243 {"width", 1, 0, 'W'}, 244 {0, 0, 0, 0} 245 }; 246 247 int 248 main (int argc, char **argv) 249 { 250 int exit_status = EXIT_SUCCESS; 251 int c; 252 int i; 253 int prev = -1; 254 lin ocontext = -1; 255 bool explicit_context = false; 256 size_t width = 0; 257 bool show_c_function = false; 258 char const *from_file = 0; 259 char const *to_file = 0; 260 uintmax_t numval; 261 char *numend; 262 263 /* Do our initializations. */ 264 exit_failure = 2; 265 initialize_main (&argc, &argv); 266 program_name = argv[0]; 267 setlocale (LC_ALL, ""); 268 textdomain (PACKAGE); 269 c_stack_action (0); 270 function_regexp_list.buf = &function_regexp; 271 ignore_regexp_list.buf = &ignore_regexp; 272 re_set_syntax (RE_SYNTAX_GREP); 273 excluded = new_exclude (); 274 275 /* Decode the options. */ 276 277 while ((c = getopt_long (argc, argv, shortopts, longopts, 0)) != -1) 278 { 279 switch (c) 280 { 281 case 0: 282 break; 283 284 case '0': 285 case '1': 286 case '2': 287 case '3': 288 case '4': 289 case '5': 290 case '6': 291 case '7': 292 case '8': 293 case '9': 294 if (! ISDIGIT (prev)) 295 ocontext = c - '0'; 296 else if (LIN_MAX / 10 < ocontext 297 || ((ocontext = 10 * ocontext + c - '0') < 0)) 298 ocontext = LIN_MAX; 299 break; 300 301 case 'a': 302 text = true; 303 break; 304 305 case 'b': 306 if (ignore_white_space < IGNORE_SPACE_CHANGE) 307 ignore_white_space = IGNORE_SPACE_CHANGE; 308 break; 309 310 case 'B': 311 ignore_blank_lines = true; 312 break; 313 314 case 'C': 315 case 'U': 316 { 317 if (optarg) 318 { 319 numval = strtoumax (optarg, &numend, 10); 320 if (*numend) 321 try_help ("invalid context length `%s'", optarg); 322 if (LIN_MAX < numval) 323 numval = LIN_MAX; 324 } 325 else 326 numval = 3; 327 328 specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT); 329 if (context < numval) 330 context = numval; 331 explicit_context = true; 332 } 333 break; 334 335 case 'c': 336 specify_style (OUTPUT_CONTEXT); 337 if (context < 3) 338 context = 3; 339 break; 340 341 case 'd': 342 minimal = true; 343 break; 344 345 case 'D': 346 specify_style (OUTPUT_IFDEF); 347 { 348 static char const C_ifdef_group_formats[] = 349 "%%=%c#ifndef %s\n%%<#endif /* ! %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n"; 350 char *b = xmalloc (sizeof C_ifdef_group_formats 351 + 7 * strlen (optarg) - 14 /* 7*"%s" */ 352 - 8 /* 5*"%%" + 3*"%c" */); 353 sprintf (b, C_ifdef_group_formats, 354 0, 355 optarg, optarg, 0, 356 optarg, optarg, 0, 357 optarg, optarg, optarg); 358 for (i = 0; i < sizeof group_format / sizeof *group_format; i++) 359 { 360 specify_value (&group_format[i], b, "-D"); 361 b += strlen (b) + 1; 362 } 363 } 364 break; 365 366 case 'e': 367 specify_style (OUTPUT_ED); 368 break; 369 370 case 'E': 371 if (ignore_white_space < IGNORE_TAB_EXPANSION) 372 ignore_white_space = IGNORE_TAB_EXPANSION; 373 break; 374 375 case 'f': 376 specify_style (OUTPUT_FORWARD_ED); 377 break; 378 379 case 'F': 380 add_regexp (&function_regexp_list, optarg); 381 break; 382 383 case 'h': 384 /* Split the files into chunks for faster processing. 385 Usually does not change the result. 386 387 This currently has no effect. */ 388 break; 389 390 case 'H': 391 speed_large_files = true; 392 break; 393 394 case 'i': 395 ignore_case = true; 396 break; 397 398 case 'I': 399 add_regexp (&ignore_regexp_list, optarg); 400 break; 401 402 case 'l': 403 if (!pr_program[0]) 404 try_help ("pagination not supported on this host", 0); 405 paginate = true; 406 #ifdef SIGCHLD 407 /* Pagination requires forking and waiting, and 408 System V fork+wait does not work if SIGCHLD is ignored. */ 409 signal (SIGCHLD, SIG_DFL); 410 #endif 411 break; 412 413 case 'L': 414 if (!file_label[0]) 415 file_label[0] = optarg; 416 else if (!file_label[1]) 417 file_label[1] = optarg; 418 else 419 fatal ("too many file label options"); 420 break; 421 422 case 'n': 423 specify_style (OUTPUT_RCS); 424 break; 425 426 case 'N': 427 new_file = true; 428 break; 429 430 case 'p': 431 show_c_function = true; 432 add_regexp (&function_regexp_list, "^[[:alpha:]$_]"); 433 break; 434 435 case 'P': 436 unidirectional_new_file = true; 437 break; 438 439 case 'q': 440 brief = true; 441 break; 442 443 case 'r': 444 recursive = true; 445 break; 446 447 case 's': 448 report_identical_files = true; 449 break; 450 451 case 'S': 452 specify_value (&starting_file, optarg, "-S"); 453 break; 454 455 case 't': 456 expand_tabs = true; 457 break; 458 459 case 'T': 460 initial_tab = true; 461 break; 462 463 case 'u': 464 specify_style (OUTPUT_UNIFIED); 465 if (context < 3) 466 context = 3; 467 break; 468 469 case 'v': 470 version_etc (stdout, "diff", PACKAGE_NAME, PACKAGE_VERSION, 471 "Paul Eggert", "Mike Haertel", "David Hayes", 472 "Richard Stallman", "Len Tower", (char *) 0); 473 check_stdout (); 474 return EXIT_SUCCESS; 475 476 case 'w': 477 ignore_white_space = IGNORE_ALL_SPACE; 478 break; 479 480 case 'x': 481 add_exclude (excluded, optarg, exclude_options ()); 482 break; 483 484 case 'X': 485 if (add_exclude_file (add_exclude, excluded, optarg, 486 exclude_options (), '\n')) 487 pfatal_with_name (optarg); 488 break; 489 490 case 'y': 491 specify_style (OUTPUT_SDIFF); 492 break; 493 494 case 'W': 495 numval = strtoumax (optarg, &numend, 10); 496 if (! (0 < numval && numval <= SIZE_MAX) || *numend) 497 try_help ("invalid width `%s'", optarg); 498 if (width != numval) 499 { 500 if (width) 501 fatal ("conflicting width options"); 502 width = numval; 503 } 504 break; 505 506 case BINARY_OPTION: 507 #if HAVE_SETMODE_DOS 508 binary = true; 509 set_binary_mode (STDOUT_FILENO, true); 510 #endif 511 break; 512 513 case FROM_FILE_OPTION: 514 specify_value (&from_file, optarg, "--from-file"); 515 break; 516 517 case HELP_OPTION: 518 usage (); 519 check_stdout (); 520 return EXIT_SUCCESS; 521 522 case HORIZON_LINES_OPTION: 523 numval = strtoumax (optarg, &numend, 10); 524 if (*numend) 525 try_help ("invalid horizon length `%s'", optarg); 526 horizon_lines = MAX (horizon_lines, MIN (numval, LIN_MAX)); 527 break; 528 529 case IGNORE_FILE_NAME_CASE_OPTION: 530 ignore_file_name_case = true; 531 break; 532 533 case INHIBIT_HUNK_MERGE_OPTION: 534 /* This option is obsolete, but accept it for backward 535 compatibility. */ 536 break; 537 538 case LEFT_COLUMN_OPTION: 539 left_column = true; 540 break; 541 542 case LINE_FORMAT_OPTION: 543 specify_style (OUTPUT_IFDEF); 544 for (i = 0; i < sizeof line_format / sizeof *line_format; i++) 545 specify_value (&line_format[i], optarg, "--line-format"); 546 break; 547 548 case NO_IGNORE_FILE_NAME_CASE_OPTION: 549 ignore_file_name_case = false; 550 break; 551 552 case NORMAL_OPTION: 553 specify_style (OUTPUT_NORMAL); 554 break; 555 556 case SDIFF_MERGE_ASSIST_OPTION: 557 specify_style (OUTPUT_SDIFF); 558 sdiff_merge_assist = true; 559 break; 560 561 case STRIP_TRAILING_CR_OPTION: 562 strip_trailing_cr = true; 563 break; 564 565 case SUPPRESS_COMMON_LINES_OPTION: 566 suppress_common_lines = true; 567 break; 568 569 case TABSIZE_OPTION: 570 numval = strtoumax (optarg, &numend, 10); 571 if (! (0 < numval && numval <= SIZE_MAX) || *numend) 572 try_help ("invalid tabsize `%s'", optarg); 573 if (tabsize != numval) 574 { 575 if (tabsize) 576 fatal ("conflicting tabsize options"); 577 tabsize = numval; 578 } 579 break; 580 581 case TO_FILE_OPTION: 582 specify_value (&to_file, optarg, "--to-file"); 583 break; 584 585 case UNCHANGED_LINE_FORMAT_OPTION: 586 case OLD_LINE_FORMAT_OPTION: 587 case NEW_LINE_FORMAT_OPTION: 588 specify_style (OUTPUT_IFDEF); 589 c -= UNCHANGED_LINE_FORMAT_OPTION; 590 specify_value (&line_format[c], optarg, line_format_option[c]); 591 break; 592 593 case UNCHANGED_GROUP_FORMAT_OPTION: 594 case OLD_GROUP_FORMAT_OPTION: 595 case NEW_GROUP_FORMAT_OPTION: 596 case CHANGED_GROUP_FORMAT_OPTION: 597 specify_style (OUTPUT_IFDEF); 598 c -= UNCHANGED_GROUP_FORMAT_OPTION; 599 specify_value (&group_format[c], optarg, group_format_option[c]); 600 break; 601 602 default: 603 try_help (0, 0); 604 } 605 prev = c; 606 } 607 608 if (output_style == OUTPUT_UNSPECIFIED) 609 { 610 if (show_c_function) 611 { 612 specify_style (OUTPUT_CONTEXT); 613 if (ocontext < 0) 614 context = 3; 615 } 616 else 617 specify_style (OUTPUT_NORMAL); 618 } 619 620 if (output_style != OUTPUT_CONTEXT || hard_locale (LC_TIME)) 621 { 622 #ifdef ST_MTIM_NSEC 623 time_format = "%Y-%m-%d %H:%M:%S.%N %z"; 624 #else 625 time_format = "%Y-%m-%d %H:%M:%S %z"; 626 #endif 627 } 628 else 629 { 630 /* See POSIX 1003.1-2001 for this format. */ 631 time_format = "%a %b %e %T %Y"; 632 } 633 634 if (0 <= ocontext) 635 { 636 bool modern_usage = 200112 <= posix2_version (); 637 638 if ((output_style == OUTPUT_CONTEXT 639 || output_style == OUTPUT_UNIFIED) 640 && (context < ocontext 641 || (ocontext < context && ! explicit_context))) 642 { 643 if (modern_usage) 644 { 645 error (0, 0, 646 _("`-%ld' option is obsolete; use `-%c %ld'"), 647 (long int) ocontext, 648 output_style == OUTPUT_CONTEXT ? 'C' : 'U', 649 (long int) ocontext); 650 try_help (0, 0); 651 } 652 context = ocontext; 653 } 654 else 655 { 656 if (modern_usage) 657 { 658 error (0, 0, _("`-%ld' option is obsolete; omit it"), 659 (long int) ocontext); 660 try_help (0, 0); 661 } 662 } 663 } 664 665 if (! tabsize) 666 tabsize = 8; 667 if (! width) 668 width = 130; 669 670 { 671 /* Maximize first the half line width, and then the gutter width, 672 according to the following constraints: 673 674 1. Two half lines plus a gutter must fit in a line. 675 2. If the half line width is nonzero: 676 a. The gutter width is at least GUTTER_WIDTH_MINIMUM. 677 b. If tabs are not expanded to spaces, 678 a half line plus a gutter is an integral number of tabs, 679 so that tabs in the right column line up. */ 680 681 intmax_t t = expand_tabs ? 1 : tabsize; 682 intmax_t w = width; 683 intmax_t off = (w + t + GUTTER_WIDTH_MINIMUM) / (2 * t) * t; 684 sdiff_half_width = MAX (0, MIN (off - GUTTER_WIDTH_MINIMUM, w - off)), 685 sdiff_column2_offset = sdiff_half_width ? off : w; 686 } 687 688 /* Make the horizon at least as large as the context, so that 689 shift_boundaries has more freedom to shift the first and last hunks. */ 690 if (horizon_lines < context) 691 horizon_lines = context; 692 693 summarize_regexp_list (&function_regexp_list); 694 summarize_regexp_list (&ignore_regexp_list); 695 696 if (output_style == OUTPUT_IFDEF) 697 { 698 for (i = 0; i < sizeof line_format / sizeof *line_format; i++) 699 if (!line_format[i]) 700 line_format[i] = "%l\n"; 701 if (!group_format[OLD]) 702 group_format[OLD] 703 = group_format[CHANGED] ? group_format[CHANGED] : "%<"; 704 if (!group_format[NEW]) 705 group_format[NEW] 706 = group_format[CHANGED] ? group_format[CHANGED] : "%>"; 707 if (!group_format[UNCHANGED]) 708 group_format[UNCHANGED] = "%="; 709 if (!group_format[CHANGED]) 710 group_format[CHANGED] = concat (group_format[OLD], 711 group_format[NEW], ""); 712 } 713 714 no_diff_means_no_output = 715 (output_style == OUTPUT_IFDEF ? 716 (!*group_format[UNCHANGED] 717 || (strcmp (group_format[UNCHANGED], "%=") == 0 718 && !*line_format[UNCHANGED])) 719 : (output_style != OUTPUT_SDIFF) | suppress_common_lines); 720 721 files_can_be_treated_as_binary = 722 (brief & binary 723 & ~ (ignore_blank_lines | ignore_case | strip_trailing_cr 724 | (ignore_regexp_list.regexps || ignore_white_space))); 725 726 switch_string = option_list (argv + 1, optind - 1); 727 728 if (from_file) 729 { 730 if (to_file) 731 fatal ("--from-file and --to-file both specified"); 732 else 733 for (; optind < argc; optind++) 734 { 735 int status = compare_files ((struct comparison *) 0, 736 from_file, argv[optind]); 737 if (exit_status < status) 738 exit_status = status; 739 } 740 } 741 else 742 { 743 if (to_file) 744 for (; optind < argc; optind++) 745 { 746 int status = compare_files ((struct comparison *) 0, 747 argv[optind], to_file); 748 if (exit_status < status) 749 exit_status = status; 750 } 751 else 752 { 753 if (argc - optind != 2) 754 { 755 if (argc - optind < 2) 756 try_help ("missing operand after `%s'", argv[argc - 1]); 757 else 758 try_help ("extra operand `%s'", argv[optind + 2]); 759 } 760 761 exit_status = compare_files ((struct comparison *) 0, 762 argv[optind], argv[optind + 1]); 763 } 764 } 765 766 /* Print any messages that were saved up for last. */ 767 print_message_queue (); 768 769 check_stdout (); 770 exit (exit_status); 771 return exit_status; 772 } 773 774 /* Append to REGLIST the regexp PATTERN. */ 775 776 static void 777 add_regexp (struct regexp_list *reglist, char const *pattern) 778 { 779 size_t patlen = strlen (pattern); 780 char const *m = re_compile_pattern (pattern, patlen, reglist->buf); 781 782 if (m != 0) 783 error (0, 0, "%s: %s", pattern, m); 784 else 785 { 786 char *regexps = reglist->regexps; 787 size_t len = reglist->len; 788 bool multiple_regexps = reglist->multiple_regexps = regexps != 0; 789 size_t newlen = reglist->len = len + 2 * multiple_regexps + patlen; 790 size_t size = reglist->size; 791 792 if (size <= newlen) 793 { 794 if (!size) 795 size = 1; 796 797 do size *= 2; 798 while (size <= newlen); 799 800 reglist->size = size; 801 reglist->regexps = regexps = xrealloc (regexps, size); 802 } 803 if (multiple_regexps) 804 { 805 regexps[len++] = '\\'; 806 regexps[len++] = '|'; 807 } 808 memcpy (regexps + len, pattern, patlen + 1); 809 } 810 } 811 812 /* Ensure that REGLIST represents the disjunction of its regexps. 813 This is done here, rather than earlier, to avoid O(N^2) behavior. */ 814 815 static void 816 summarize_regexp_list (struct regexp_list *reglist) 817 { 818 if (reglist->regexps) 819 { 820 /* At least one regexp was specified. Allocate a fastmap for it. */ 821 reglist->buf->fastmap = xmalloc (1 << CHAR_BIT); 822 if (reglist->multiple_regexps) 823 { 824 /* Compile the disjunction of the regexps. 825 (If just one regexp was specified, it is already compiled.) */ 826 char const *m = re_compile_pattern (reglist->regexps, reglist->len, 827 reglist->buf); 828 if (m != 0) 829 error (EXIT_TROUBLE, 0, "%s: %s", reglist->regexps, m); 830 } 831 } 832 } 833 834 static void 835 try_help (char const *reason_msgid, char const *operand) 836 { 837 if (reason_msgid) 838 error (0, 0, _(reason_msgid), operand); 839 error (EXIT_TROUBLE, 0, _("Try `%s --help' for more information."), 840 program_name); 841 abort (); 842 } 843 844 static void 845 check_stdout (void) 846 { 847 if (ferror (stdout)) 848 fatal ("write failed"); 849 else if (fclose (stdout) != 0) 850 pfatal_with_name (_("standard output")); 851 } 852 853 static char const * const option_help_msgid[] = { 854 N_("Compare files line by line."), 855 "", 856 N_("-i --ignore-case Ignore case differences in file contents."), 857 N_("--ignore-file-name-case Ignore case when comparing file names."), 858 N_("--no-ignore-file-name-case Consider case when comparing file names."), 859 N_("-E --ignore-tab-expansion Ignore changes due to tab expansion."), 860 N_("-b --ignore-space-change Ignore changes in the amount of white space."), 861 N_("-w --ignore-all-space Ignore all white space."), 862 N_("-B --ignore-blank-lines Ignore changes whose lines are all blank."), 863 N_("-I RE --ignore-matching-lines=RE Ignore changes whose lines all match RE."), 864 N_("--strip-trailing-cr Strip trailing carriage return on input."), 865 #if HAVE_SETMODE_DOS 866 N_("--binary Read and write data in binary mode."), 867 #endif 868 N_("-a --text Treat all files as text."), 869 "", 870 N_("-c -C NUM --context[=NUM] Output NUM (default 3) lines of copied context.\n\ 871 -u -U NUM --unified[=NUM] Output NUM (default 3) lines of unified context.\n\ 872 --label LABEL Use LABEL instead of file name.\n\ 873 -p --show-c-function Show which C function each change is in.\n\ 874 -F RE --show-function-line=RE Show the most recent line matching RE."), 875 N_("-q --brief Output only whether files differ."), 876 N_("-e --ed Output an ed script."), 877 N_("--normal Output a normal diff."), 878 N_("-n --rcs Output an RCS format diff."), 879 N_("-y --side-by-side Output in two columns.\n\ 880 -W NUM --width=NUM Output at most NUM (default 130) print columns.\n\ 881 --left-column Output only the left column of common lines.\n\ 882 --suppress-common-lines Do not output common lines."), 883 N_("-D NAME --ifdef=NAME Output merged file to show `#ifdef NAME' diffs."), 884 N_("--GTYPE-group-format=GFMT Similar, but format GTYPE input groups with GFMT."), 885 N_("--line-format=LFMT Similar, but format all input lines with LFMT."), 886 N_("--LTYPE-line-format=LFMT Similar, but format LTYPE input lines with LFMT."), 887 N_(" LTYPE is `old', `new', or `unchanged'. GTYPE is LTYPE or `changed'."), 888 N_(" GFMT may contain:\n\ 889 %< lines from FILE1\n\ 890 %> lines from FILE2\n\ 891 %= lines common to FILE1 and FILE2\n\ 892 %[-][WIDTH][.[PREC]]{doxX}LETTER printf-style spec for LETTER\n\ 893 LETTERs are as follows for new group, lower case for old group:\n\ 894 F first line number\n\ 895 L last line number\n\ 896 N number of lines = L-F+1\n\ 897 E F-1\n\ 898 M L+1"), 899 N_(" LFMT may contain:\n\ 900 %L contents of line\n\ 901 %l contents of line, excluding any trailing newline\n\ 902 %[-][WIDTH][.[PREC]]{doxX}n printf-style spec for input line number"), 903 N_(" Either GFMT or LFMT may contain:\n\ 904 %% %\n\ 905 %c'C' the single character C\n\ 906 %c'\\OOO' the character with octal code OOO"), 907 "", 908 N_("-l --paginate Pass the output through `pr' to paginate it."), 909 N_("-t --expand-tabs Expand tabs to spaces in output."), 910 N_("-T --initial-tab Make tabs line up by prepending a tab."), 911 N_("--tabsize=NUM Tab stops are every NUM (default 8) print columns."), 912 "", 913 N_("-r --recursive Recursively compare any subdirectories found."), 914 N_("-N --new-file Treat absent files as empty."), 915 N_("--unidirectional-new-file Treat absent first files as empty."), 916 N_("-s --report-identical-files Report when two files are the same."), 917 N_("-x PAT --exclude=PAT Exclude files that match PAT."), 918 N_("-X FILE --exclude-from=FILE Exclude files that match any pattern in FILE."), 919 N_("-S FILE --starting-file=FILE Start with FILE when comparing directories."), 920 N_("--from-file=FILE1 Compare FILE1 to all operands. FILE1 can be a directory."), 921 N_("--to-file=FILE2 Compare all operands to FILE2. FILE2 can be a directory."), 922 "", 923 N_("--horizon-lines=NUM Keep NUM lines of the common prefix and suffix."), 924 N_("-d --minimal Try hard to find a smaller set of changes."), 925 N_("--speed-large-files Assume large files and many scattered small changes."), 926 "", 927 N_("-v --version Output version info."), 928 N_("--help Output this help."), 929 "", 930 N_("FILES are `FILE1 FILE2' or `DIR1 DIR2' or `DIR FILE...' or `FILE... DIR'."), 931 N_("If --from-file or --to-file is given, there are no restrictions on FILES."), 932 N_("If a FILE is `-', read standard input."), 933 N_("Exit status is 0 if inputs are the same, 1 if different, 2 if trouble."), 934 "", 935 N_("Report bugs to <bug-gnu-utils@gnu.org>."), 936 0 937 }; 938 939 static void 940 usage (void) 941 { 942 char const * const *p; 943 944 printf (_("Usage: %s [OPTION]... FILES\n"), program_name); 945 946 for (p = option_help_msgid; *p; p++) 947 { 948 if (!**p) 949 putchar ('\n'); 950 else 951 { 952 char const *msg = _(*p); 953 char const *nl; 954 while ((nl = strchr (msg, '\n'))) 955 { 956 int msglen = nl + 1 - msg; 957 printf (" %.*s", msglen, msg); 958 msg = nl + 1; 959 } 960 961 printf (" %s\n" + 2 * (*msg != ' ' && *msg != '-'), msg); 962 } 963 } 964 } 965 966 /* Set VAR to VALUE, reporting an OPTION error if this is a 967 conflict. */ 968 static void 969 specify_value (char const **var, char const *value, char const *option) 970 { 971 if (*var && strcmp (*var, value) != 0) 972 { 973 error (0, 0, _("conflicting %s option value `%s'"), option, value); 974 try_help (0, 0); 975 } 976 *var = value; 977 } 978 979 /* Set the output style to STYLE, diagnosing conflicts. */ 980 static void 981 specify_style (enum output_style style) 982 { 983 if (output_style != style) 984 { 985 if (output_style != OUTPUT_UNSPECIFIED) 986 try_help ("conflicting output style options", 0); 987 output_style = style; 988 } 989 } 990 991 /* Set the last-modified time of *ST to be the current time. */ 992 993 static void 994 set_mtime_to_now (struct stat *st) 995 { 996 #ifdef ST_MTIM_NSEC 997 998 # if HAVE_CLOCK_GETTIME && defined CLOCK_REALTIME 999 if (clock_gettime (CLOCK_REALTIME, &st->st_mtim) == 0) 1000 return; 1001 # endif 1002 1003 # if HAVE_GETTIMEOFDAY 1004 { 1005 struct timeval timeval; 1006 if (gettimeofday (&timeval, 0) == 0) 1007 { 1008 st->st_mtime = timeval.tv_sec; 1009 st->st_mtim.ST_MTIM_NSEC = timeval.tv_usec * 1000; 1010 return; 1011 } 1012 } 1013 # endif 1014 1015 #endif /* ST_MTIM_NSEC */ 1016 1017 time (&st->st_mtime); 1018 } 1019 1020 /* Compare two files (or dirs) with parent comparison PARENT 1021 and names NAME0 and NAME1. 1022 (If PARENT is 0, then the first name is just NAME0, etc.) 1023 This is self-contained; it opens the files and closes them. 1024 1025 Value is EXIT_SUCCESS if files are the same, EXIT_FAILURE if 1026 different, EXIT_TROUBLE if there is a problem opening them. */ 1027 1028 static int 1029 compare_files (struct comparison const *parent, 1030 char const *name0, 1031 char const *name1) 1032 { 1033 struct comparison cmp; 1034 #define DIR_P(f) (S_ISDIR (cmp.file[f].stat.st_mode) != 0) 1035 register int f; 1036 int status = EXIT_SUCCESS; 1037 bool same_files; 1038 char *free0, *free1; 1039 1040 /* If this is directory comparison, perhaps we have a file 1041 that exists only in one of the directories. 1042 If so, just print a message to that effect. */ 1043 1044 if (! ((name0 && name1) 1045 || (unidirectional_new_file && name1) 1046 || new_file)) 1047 { 1048 char const *name = name0 == 0 ? name1 : name0; 1049 char const *dir = parent->file[name0 == 0].name; 1050 1051 /* See POSIX 1003.1-2001 for this format. */ 1052 message ("Only in %s: %s\n", dir, name); 1053 1054 /* Return EXIT_FAILURE so that diff_dirs will return 1055 EXIT_FAILURE ("some files differ"). */ 1056 return EXIT_FAILURE; 1057 } 1058 1059 memset (cmp.file, 0, sizeof cmp.file); 1060 cmp.parent = parent; 1061 1062 /* cmp.file[f].desc markers */ 1063 #define NONEXISTENT (-1) /* nonexistent file */ 1064 #define UNOPENED (-2) /* unopened file (e.g. directory) */ 1065 #define ERRNO_ENCODE(errno) (-3 - (errno)) /* encoded errno value */ 1066 1067 #define ERRNO_DECODE(desc) (-3 - (desc)) /* inverse of ERRNO_ENCODE */ 1068 1069 cmp.file[0].desc = name0 == 0 ? NONEXISTENT : UNOPENED; 1070 cmp.file[1].desc = name1 == 0 ? NONEXISTENT : UNOPENED; 1071 1072 /* Now record the full name of each file, including nonexistent ones. */ 1073 1074 if (name0 == 0) 1075 name0 = name1; 1076 if (name1 == 0) 1077 name1 = name0; 1078 1079 if (!parent) 1080 { 1081 free0 = 0; 1082 free1 = 0; 1083 cmp.file[0].name = name0; 1084 cmp.file[1].name = name1; 1085 } 1086 else 1087 { 1088 cmp.file[0].name = free0 1089 = dir_file_pathname (parent->file[0].name, name0); 1090 cmp.file[1].name = free1 1091 = dir_file_pathname (parent->file[1].name, name1); 1092 } 1093 1094 /* Stat the files. */ 1095 1096 for (f = 0; f < 2; f++) 1097 { 1098 if (cmp.file[f].desc != NONEXISTENT) 1099 { 1100 if (f && file_name_cmp (cmp.file[f].name, cmp.file[0].name) == 0) 1101 { 1102 cmp.file[f].desc = cmp.file[0].desc; 1103 cmp.file[f].stat = cmp.file[0].stat; 1104 } 1105 else if (strcmp (cmp.file[f].name, "-") == 0) 1106 { 1107 cmp.file[f].desc = STDIN_FILENO; 1108 if (fstat (STDIN_FILENO, &cmp.file[f].stat) != 0) 1109 cmp.file[f].desc = ERRNO_ENCODE (errno); 1110 else 1111 { 1112 if (S_ISREG (cmp.file[f].stat.st_mode)) 1113 { 1114 off_t pos = lseek (STDIN_FILENO, (off_t) 0, SEEK_CUR); 1115 if (pos < 0) 1116 cmp.file[f].desc = ERRNO_ENCODE (errno); 1117 else 1118 cmp.file[f].stat.st_size = 1119 MAX (0, cmp.file[f].stat.st_size - pos); 1120 } 1121 1122 /* POSIX 1003.1-2001 requires current time for 1123 stdin. */ 1124 set_mtime_to_now (&cmp.file[f].stat); 1125 } 1126 } 1127 else if (stat (cmp.file[f].name, &cmp.file[f].stat) != 0) 1128 cmp.file[f].desc = ERRNO_ENCODE (errno); 1129 } 1130 } 1131 1132 /* Mark files as nonexistent as needed for -N and -P, if they are 1133 inaccessible empty regular files (the kind of files that 'patch' 1134 creates to indicate nonexistent backups), or if they are 1135 top-level files that do not exist but their counterparts do 1136 exist. */ 1137 for (f = 0; f < 2; f++) 1138 if ((new_file || (f == 0 && unidirectional_new_file)) 1139 && (cmp.file[f].desc == UNOPENED 1140 ? (S_ISREG (cmp.file[f].stat.st_mode) 1141 && ! (cmp.file[f].stat.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO)) 1142 && cmp.file[f].stat.st_size == 0) 1143 : (cmp.file[f].desc == ERRNO_ENCODE (ENOENT) 1144 && ! parent 1145 && cmp.file[1 - f].desc == UNOPENED))) 1146 cmp.file[f].desc = NONEXISTENT; 1147 1148 for (f = 0; f < 2; f++) 1149 if (cmp.file[f].desc == NONEXISTENT) 1150 { 1151 memset (&cmp.file[f].stat, 0, sizeof cmp.file[f].stat); 1152 cmp.file[f].stat.st_mode = cmp.file[1 - f].stat.st_mode; 1153 } 1154 1155 for (f = 0; f < 2; f++) 1156 { 1157 int e = ERRNO_DECODE (cmp.file[f].desc); 1158 if (0 <= e) 1159 { 1160 errno = e; 1161 perror_with_name (cmp.file[f].name); 1162 status = EXIT_TROUBLE; 1163 } 1164 } 1165 1166 if (status == EXIT_SUCCESS && ! parent && DIR_P (0) != DIR_P (1)) 1167 { 1168 /* If one is a directory, and it was specified in the command line, 1169 use the file in that dir with the other file's basename. */ 1170 1171 int fnm_arg = DIR_P (0); 1172 int dir_arg = 1 - fnm_arg; 1173 char const *fnm = cmp.file[fnm_arg].name; 1174 char const *dir = cmp.file[dir_arg].name; 1175 char const *filename = cmp.file[dir_arg].name = free0 1176 = dir_file_pathname (dir, base_name (fnm)); 1177 1178 if (strcmp (fnm, "-") == 0) 1179 fatal ("cannot compare `-' to a directory"); 1180 1181 if (stat (filename, &cmp.file[dir_arg].stat) != 0) 1182 { 1183 perror_with_name (filename); 1184 status = EXIT_TROUBLE; 1185 } 1186 } 1187 1188 if (status != EXIT_SUCCESS) 1189 { 1190 /* One of the files should exist but does not. */ 1191 } 1192 else if (cmp.file[0].desc == NONEXISTENT 1193 && cmp.file[1].desc == NONEXISTENT) 1194 { 1195 /* Neither file "exists", so there's nothing to compare. */ 1196 } 1197 else if ((same_files 1198 = (cmp.file[0].desc != NONEXISTENT 1199 && cmp.file[1].desc != NONEXISTENT 1200 && 0 < same_file (&cmp.file[0].stat, &cmp.file[1].stat) 1201 && same_file_attributes (&cmp.file[0].stat, 1202 &cmp.file[1].stat))) 1203 && no_diff_means_no_output) 1204 { 1205 /* The two named files are actually the same physical file. 1206 We know they are identical without actually reading them. */ 1207 } 1208 else if (DIR_P (0) & DIR_P (1)) 1209 { 1210 if (output_style == OUTPUT_IFDEF) 1211 fatal ("-D option not supported with directories"); 1212 1213 /* If both are directories, compare the files in them. */ 1214 1215 if (parent && !recursive) 1216 { 1217 /* But don't compare dir contents one level down 1218 unless -r was specified. 1219 See POSIX 1003.1-2001 for this format. */ 1220 message ("Common subdirectories: %s and %s\n", 1221 cmp.file[0].name, cmp.file[1].name); 1222 } 1223 else 1224 status = diff_dirs (&cmp, compare_files); 1225 } 1226 else if ((DIR_P (0) | DIR_P (1)) 1227 || (parent 1228 && (! S_ISREG (cmp.file[0].stat.st_mode) 1229 || ! S_ISREG (cmp.file[1].stat.st_mode)))) 1230 { 1231 if (cmp.file[0].desc == NONEXISTENT || cmp.file[1].desc == NONEXISTENT) 1232 { 1233 /* We have a subdirectory that exists only in one directory. */ 1234 1235 if ((DIR_P (0) | DIR_P (1)) 1236 && recursive 1237 && (new_file 1238 || (unidirectional_new_file 1239 && cmp.file[0].desc == NONEXISTENT))) 1240 status = diff_dirs (&cmp, compare_files); 1241 else 1242 { 1243 char const *dir 1244 = parent->file[cmp.file[0].desc == NONEXISTENT].name; 1245 1246 /* See POSIX 1003.1-2001 for this format. */ 1247 message ("Only in %s: %s\n", dir, name0); 1248 1249 status = EXIT_FAILURE; 1250 } 1251 } 1252 else 1253 { 1254 /* We have two files that are not to be compared. */ 1255 1256 /* See POSIX 1003.1-2001 for this format. */ 1257 message5 ("File %s is a %s while file %s is a %s\n", 1258 file_label[0] ? file_label[0] : cmp.file[0].name, 1259 file_type (&cmp.file[0].stat), 1260 file_label[1] ? file_label[1] : cmp.file[1].name, 1261 file_type (&cmp.file[1].stat)); 1262 1263 /* This is a difference. */ 1264 status = EXIT_FAILURE; 1265 } 1266 } 1267 else if (files_can_be_treated_as_binary 1268 && S_ISREG (cmp.file[0].stat.st_mode) 1269 && S_ISREG (cmp.file[1].stat.st_mode) 1270 && cmp.file[0].stat.st_size != cmp.file[1].stat.st_size) 1271 { 1272 message ("Files %s and %s differ\n", 1273 file_label[0] ? file_label[0] : cmp.file[0].name, 1274 file_label[1] ? file_label[1] : cmp.file[1].name); 1275 status = EXIT_FAILURE; 1276 } 1277 else 1278 { 1279 /* Both exist and neither is a directory. */ 1280 1281 /* Open the files and record their descriptors. */ 1282 1283 if (cmp.file[0].desc == UNOPENED) 1284 if ((cmp.file[0].desc = open (cmp.file[0].name, O_RDONLY, 0)) < 0) 1285 { 1286 perror_with_name (cmp.file[0].name); 1287 status = EXIT_TROUBLE; 1288 } 1289 if (cmp.file[1].desc == UNOPENED) 1290 { 1291 if (same_files) 1292 cmp.file[1].desc = cmp.file[0].desc; 1293 else if ((cmp.file[1].desc = open (cmp.file[1].name, O_RDONLY, 0)) 1294 < 0) 1295 { 1296 perror_with_name (cmp.file[1].name); 1297 status = EXIT_TROUBLE; 1298 } 1299 } 1300 1301 #if HAVE_SETMODE_DOS 1302 if (binary) 1303 for (f = 0; f < 2; f++) 1304 if (0 <= cmp.file[f].desc) 1305 set_binary_mode (cmp.file[f].desc, true); 1306 #endif 1307 1308 /* Compare the files, if no error was found. */ 1309 1310 if (status == EXIT_SUCCESS) 1311 status = diff_2_files (&cmp); 1312 1313 /* Close the file descriptors. */ 1314 1315 if (0 <= cmp.file[0].desc && close (cmp.file[0].desc) != 0) 1316 { 1317 perror_with_name (cmp.file[0].name); 1318 status = EXIT_TROUBLE; 1319 } 1320 if (0 <= cmp.file[1].desc && cmp.file[0].desc != cmp.file[1].desc 1321 && close (cmp.file[1].desc) != 0) 1322 { 1323 perror_with_name (cmp.file[1].name); 1324 status = EXIT_TROUBLE; 1325 } 1326 } 1327 1328 /* Now the comparison has been done, if no error prevented it, 1329 and STATUS is the value this function will return. */ 1330 1331 if (status == EXIT_SUCCESS) 1332 { 1333 if (report_identical_files && !DIR_P (0)) 1334 message ("Files %s and %s are identical\n", 1335 file_label[0] ? file_label[0] : cmp.file[0].name, 1336 file_label[1] ? file_label[1] : cmp.file[1].name); 1337 } 1338 else 1339 { 1340 /* Flush stdout so that the user sees differences immediately. 1341 This can hurt performance, unfortunately. */ 1342 if (fflush (stdout) != 0) 1343 pfatal_with_name (_("standard output")); 1344 } 1345 1346 if (free0) 1347 free (free0); 1348 if (free1) 1349 free (free1); 1350 1351 return status; 1352 } 1353