1 /* $NetBSD: diff.c,v 1.1.1.1 2016/01/13 03:15:30 christos Exp $ */ 2 3 /* diff - compare files line by line 4 5 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1996, 1998, 2001, 2002 6 Free Software Foundation, Inc. 7 8 This file is part of GNU DIFF. 9 10 GNU DIFF is free software; you can redistribute it and/or modify 11 it under the terms of the GNU General Public License as published by 12 the Free Software Foundation; either version 2, or (at your option) 13 any later version. 14 15 GNU DIFF is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 18 See the GNU General Public License for more details. 19 20 You should have received a copy of the GNU General Public License 21 along with GNU DIFF; see the file COPYING. 22 If not, write to the Free Software Foundation, 23 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 24 25 #define GDIFF_MAIN 26 #include "diff.h" 27 #include <c-stack.h> 28 #include <dirname.h> 29 #include <error.h> 30 #include <exclude.h> 31 #include <exitfail.h> 32 #include <fnmatch.h> 33 #include <freesoft.h> 34 #include <getopt.h> 35 #include <hard-locale.h> 36 #include <prepargs.h> 37 #include <quotesys.h> 38 #include <regex.h> 39 #include <setmode.h> 40 #include <xalloc.h> 41 42 static char const authorship_msgid[] = 43 N_("Written by Paul Eggert, Mike Haertel, David Hayes,\n\ 44 Richard Stallman, and Len Tower."); 45 46 static char const copyright_string[] = 47 "Copyright (C) 2002 Free Software Foundation, Inc."; 48 49 #ifndef GUTTER_WIDTH_MINIMUM 50 # define GUTTER_WIDTH_MINIMUM 3 51 #endif 52 53 struct regexp_list 54 { 55 char *regexps; /* chars representing disjunction of the regexps */ 56 size_t len; /* chars used in `regexps' */ 57 size_t size; /* size malloc'ed for `regexps'; 0 if not malloc'ed */ 58 bool multiple_regexps;/* Does `regexps' represent a disjunction? */ 59 struct re_pattern_buffer *buf; 60 }; 61 62 static int compare_files (struct comparison const *, char const *, char const *); 63 static void add_regexp (struct regexp_list *, char const *); 64 static void summarize_regexp_list (struct regexp_list *); 65 static void specify_style (enum output_style); 66 static void specify_value (char const **, char const *, char const *); 67 static void try_help (char const *, char const *) __attribute__((noreturn)); 68 static void check_stdout (void); 69 static void usage (void); 70 71 /* If comparing directories, compare their common subdirectories 72 recursively. */ 73 static bool recursive; 74 75 /* In context diffs, show previous lines that match these regexps. */ 76 static struct regexp_list function_regexp_list; 77 78 /* Ignore changes affecting only lines that match these regexps. */ 79 static struct regexp_list ignore_regexp_list; 80 81 #if HAVE_SETMODE_DOS 82 /* Use binary I/O when reading and writing data (--binary). 83 On POSIX hosts, this has no effect. */ 84 static bool binary; 85 #endif 86 87 /* When comparing directories, if a file appears only in one 88 directory, treat it as present but empty in the other (-N). 89 Then `patch' would create the file with appropriate contents. */ 90 static bool new_file; 91 92 /* When comparing directories, if a file appears only in the second 93 directory of the two, treat it as present but empty in the other 94 (--unidirectional-new-file). 95 Then `patch' would create the file with appropriate contents. */ 96 static bool unidirectional_new_file; 97 98 /* Report files compared that are the same (-s). 99 Normally nothing is output when that happens. */ 100 static bool report_identical_files; 101 102 103 /* Return a string containing the command options with which diff was invoked. 104 Spaces appear between what were separate ARGV-elements. 105 There is a space at the beginning but none at the end. 106 If there were no options, the result is an empty string. 107 108 Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT, 109 the length of that vector. */ 110 111 static char * 112 option_list (char **optionvec, int count) 113 { 114 int i; 115 size_t size = 1; 116 char *result; 117 char *p; 118 119 for (i = 0; i < count; i++) 120 size += 1 + quote_system_arg ((char *) 0, optionvec[i]); 121 122 p = result = xmalloc (size); 123 124 for (i = 0; i < count; i++) 125 { 126 *p++ = ' '; 127 p += quote_system_arg (p, optionvec[i]); 128 } 129 130 *p = 0; 131 return result; 132 } 133 134 135 /* Return an option value suitable for add_exclude. */ 136 137 static int 138 exclude_options (void) 139 { 140 return EXCLUDE_WILDCARDS | (ignore_file_name_case ? FNM_CASEFOLD : 0); 141 } 142 143 static char const shortopts[] = 144 "0123456789abBcC:dD:eEfF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y"; 145 146 /* Values for long options that do not have single-letter equivalents. */ 147 enum 148 { 149 BINARY_OPTION = CHAR_MAX + 1, 150 FROM_FILE_OPTION, 151 HELP_OPTION, 152 HORIZON_LINES_OPTION, 153 IGNORE_FILE_NAME_CASE_OPTION, 154 INHIBIT_HUNK_MERGE_OPTION, 155 LEFT_COLUMN_OPTION, 156 LINE_FORMAT_OPTION, 157 NO_IGNORE_FILE_NAME_CASE_OPTION, 158 NORMAL_OPTION, 159 SDIFF_MERGE_ASSIST_OPTION, 160 STRIP_TRAILING_CR_OPTION, 161 SUPPRESS_COMMON_LINES_OPTION, 162 TO_FILE_OPTION, 163 164 /* These options must be in sequence. */ 165 UNCHANGED_LINE_FORMAT_OPTION, 166 OLD_LINE_FORMAT_OPTION, 167 NEW_LINE_FORMAT_OPTION, 168 169 /* These options must be in sequence. */ 170 UNCHANGED_GROUP_FORMAT_OPTION, 171 OLD_GROUP_FORMAT_OPTION, 172 NEW_GROUP_FORMAT_OPTION, 173 CHANGED_GROUP_FORMAT_OPTION 174 }; 175 176 static char const group_format_option[][sizeof "--unchanged-group-format"] = 177 { 178 "--unchanged-group-format", 179 "--old-group-format", 180 "--new-group-format", 181 "--changed-group-format" 182 }; 183 184 static char const line_format_option[][sizeof "--unchanged-line-format"] = 185 { 186 "--unchanged-line-format", 187 "--old-line-format", 188 "--new-line-format" 189 }; 190 191 static struct option const longopts[] = 192 { 193 {"binary", 0, 0, BINARY_OPTION}, 194 {"brief", 0, 0, 'q'}, 195 {"changed-group-format", 1, 0, CHANGED_GROUP_FORMAT_OPTION}, 196 {"context", 2, 0, 'C'}, 197 {"ed", 0, 0, 'e'}, 198 {"exclude", 1, 0, 'x'}, 199 {"exclude-from", 1, 0, 'X'}, 200 {"expand-tabs", 0, 0, 't'}, 201 {"forward-ed", 0, 0, 'f'}, 202 {"from-file", 1, 0, FROM_FILE_OPTION}, 203 {"help", 0, 0, HELP_OPTION}, 204 {"horizon-lines", 1, 0, HORIZON_LINES_OPTION}, 205 {"ifdef", 1, 0, 'D'}, 206 {"ignore-all-space", 0, 0, 'w'}, 207 {"ignore-blank-lines", 0, 0, 'B'}, 208 {"ignore-case", 0, 0, 'i'}, 209 {"ignore-file-name-case", 0, 0, IGNORE_FILE_NAME_CASE_OPTION}, 210 {"ignore-matching-lines", 1, 0, 'I'}, 211 {"ignore-space-change", 0, 0, 'b'}, 212 {"ignore-tab-expansion", 0, 0, 'E'}, 213 {"inhibit-hunk-merge", 0, 0, INHIBIT_HUNK_MERGE_OPTION}, 214 {"initial-tab", 0, 0, 'T'}, 215 {"label", 1, 0, 'L'}, 216 {"left-column", 0, 0, LEFT_COLUMN_OPTION}, 217 {"line-format", 1, 0, LINE_FORMAT_OPTION}, 218 {"minimal", 0, 0, 'd'}, 219 {"new-file", 0, 0, 'N'}, 220 {"new-group-format", 1, 0, NEW_GROUP_FORMAT_OPTION}, 221 {"new-line-format", 1, 0, NEW_LINE_FORMAT_OPTION}, 222 {"no-ignore-file-name-case", 0, 0, NO_IGNORE_FILE_NAME_CASE_OPTION}, 223 {"normal", 0, 0, NORMAL_OPTION}, 224 {"old-group-format", 1, 0, OLD_GROUP_FORMAT_OPTION}, 225 {"old-line-format", 1, 0, OLD_LINE_FORMAT_OPTION}, 226 {"paginate", 0, 0, 'l'}, 227 {"rcs", 0, 0, 'n'}, 228 {"recursive", 0, 0, 'r'}, 229 {"report-identical-files", 0, 0, 's'}, 230 {"sdiff-merge-assist", 0, 0, SDIFF_MERGE_ASSIST_OPTION}, 231 {"show-c-function", 0, 0, 'p'}, 232 {"show-function-line", 1, 0, 'F'}, 233 {"side-by-side", 0, 0, 'y'}, 234 {"speed-large-files", 0, 0, 'H'}, 235 {"starting-file", 1, 0, 'S'}, 236 {"strip-trailing-cr", 0, 0, STRIP_TRAILING_CR_OPTION}, 237 {"suppress-common-lines", 0, 0, SUPPRESS_COMMON_LINES_OPTION}, 238 {"text", 0, 0, 'a'}, 239 {"to-file", 1, 0, TO_FILE_OPTION}, 240 {"unchanged-group-format", 1, 0, UNCHANGED_GROUP_FORMAT_OPTION}, 241 {"unchanged-line-format", 1, 0, UNCHANGED_LINE_FORMAT_OPTION}, 242 {"unidirectional-new-file", 0, 0, 'P'}, 243 {"unified", 2, 0, 'U'}, 244 {"version", 0, 0, 'v'}, 245 {"width", 1, 0, 'W'}, 246 {0, 0, 0, 0} 247 }; 248 249 int 250 main (int argc, char **argv) 251 { 252 int exit_status = EXIT_SUCCESS; 253 int c; 254 int i; 255 int prev = -1; 256 lin ocontext = -1; 257 bool explicit_context = 0; 258 int width = 0; 259 bool show_c_function = 0; 260 char const *from_file = 0; 261 char const *to_file = 0; 262 uintmax_t numval; 263 char *numend; 264 265 /* Do our initializations. */ 266 exit_failure = 2; 267 initialize_main (&argc, &argv); 268 program_name = argv[0]; 269 setlocale (LC_ALL, ""); 270 bindtextdomain (PACKAGE, LOCALEDIR); 271 textdomain (PACKAGE); 272 c_stack_action (c_stack_die); 273 function_regexp_list.buf = &function_regexp; 274 ignore_regexp_list.buf = &ignore_regexp; 275 re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING); 276 excluded = new_exclude (); 277 278 /* Decode the options. */ 279 280 while ((c = getopt_long (argc, argv, shortopts, longopts, 0)) != -1) 281 { 282 switch (c) 283 { 284 case 0: 285 break; 286 287 case '0': 288 case '1': 289 case '2': 290 case '3': 291 case '4': 292 case '5': 293 case '6': 294 case '7': 295 case '8': 296 case '9': 297 if (! ISDIGIT (prev)) 298 ocontext = c - '0'; 299 else if (LIN_MAX / 10 < ocontext 300 || ((ocontext = 10 * ocontext + c - '0') < 0)) 301 ocontext = LIN_MAX; 302 break; 303 304 case 'a': 305 text = 1; 306 break; 307 308 case 'b': 309 if (ignore_white_space < IGNORE_SPACE_CHANGE) 310 ignore_white_space = IGNORE_SPACE_CHANGE; 311 break; 312 313 case 'B': 314 ignore_blank_lines = 1; 315 break; 316 317 case 'C': /* +context[=lines] */ 318 case 'U': /* +unified[=lines] */ 319 { 320 if (optarg) 321 { 322 numval = strtoumax (optarg, &numend, 10); 323 if (*numend) 324 try_help ("invalid context length `%s'", optarg); 325 if (LIN_MAX < numval) 326 numval = LIN_MAX; 327 } 328 else 329 numval = 3; 330 331 specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT); 332 if (context < numval) 333 context = numval; 334 explicit_context = 1; 335 } 336 break; 337 338 case 'c': 339 specify_style (OUTPUT_CONTEXT); 340 if (context < 3) 341 context = 3; 342 break; 343 344 case 'd': 345 minimal = 1; 346 break; 347 348 case 'D': 349 specify_style (OUTPUT_IFDEF); 350 { 351 static char const C_ifdef_group_formats[] = 352 "%%=%c#ifndef %s\n%%<#endif /* ! %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n"; 353 char *b = xmalloc (sizeof C_ifdef_group_formats 354 + 7 * strlen (optarg) - 14 /* 7*"%s" */ 355 - 8 /* 5*"%%" + 3*"%c" */); 356 sprintf (b, C_ifdef_group_formats, 357 0, 358 optarg, optarg, 0, 359 optarg, optarg, 0, 360 optarg, optarg, optarg); 361 for (i = 0; i < sizeof group_format / sizeof *group_format; i++) 362 { 363 specify_value (&group_format[i], b, "-D"); 364 b += strlen (b) + 1; 365 } 366 } 367 break; 368 369 case 'e': 370 specify_style (OUTPUT_ED); 371 break; 372 373 case 'E': 374 if (ignore_white_space < IGNORE_TAB_EXPANSION) 375 ignore_white_space = IGNORE_TAB_EXPANSION; 376 break; 377 378 case 'f': 379 specify_style (OUTPUT_FORWARD_ED); 380 break; 381 382 case 'F': 383 add_regexp (&function_regexp_list, optarg); 384 break; 385 386 case 'h': 387 /* Split the files into chunks for faster processing. 388 Usually does not change the result. 389 390 This currently has no effect. */ 391 break; 392 393 case 'H': 394 speed_large_files = 1; 395 break; 396 397 case 'i': 398 ignore_case = 1; 399 break; 400 401 case 'I': 402 add_regexp (&ignore_regexp_list, optarg); 403 break; 404 405 case 'l': 406 if (!pr_program[0]) 407 try_help ("pagination not supported on this host", 0); 408 paginate = 1; 409 #ifdef SIGCHLD 410 /* Pagination requires forking and waiting, and 411 System V fork+wait does not work if SIGCHLD is ignored. */ 412 signal (SIGCHLD, SIG_DFL); 413 #endif 414 break; 415 416 case 'L': 417 if (!file_label[0]) 418 file_label[0] = optarg; 419 else if (!file_label[1]) 420 file_label[1] = optarg; 421 else 422 fatal ("too many file label options"); 423 break; 424 425 case 'n': 426 specify_style (OUTPUT_RCS); 427 break; 428 429 case 'N': 430 new_file = 1; 431 break; 432 433 case 'p': 434 show_c_function = 1; 435 add_regexp (&function_regexp_list, "^[[:alpha:]$_]"); 436 break; 437 438 case 'P': 439 unidirectional_new_file = 1; 440 break; 441 442 case 'q': 443 brief = 1; 444 break; 445 446 case 'r': 447 recursive = 1; 448 break; 449 450 case 's': 451 report_identical_files = 1; 452 break; 453 454 case 'S': 455 specify_value (&starting_file, optarg, "-S"); 456 break; 457 458 case 't': 459 expand_tabs = 1; 460 break; 461 462 case 'T': 463 initial_tab = 1; 464 break; 465 466 case 'u': 467 specify_style (OUTPUT_UNIFIED); 468 if (context < 3) 469 context = 3; 470 break; 471 472 case 'v': 473 printf ("diff %s\n%s\n\n%s\n\n%s\n", 474 version_string, copyright_string, 475 _(free_software_msgid), _(authorship_msgid)); 476 check_stdout (); 477 return EXIT_SUCCESS; 478 479 case 'w': 480 ignore_white_space = IGNORE_ALL_SPACE; 481 break; 482 483 case 'x': 484 add_exclude (excluded, optarg, exclude_options ()); 485 break; 486 487 case 'X': 488 if (add_exclude_file (add_exclude, excluded, optarg, 489 exclude_options (), '\n')) 490 pfatal_with_name (optarg); 491 break; 492 493 case 'y': 494 specify_style (OUTPUT_SDIFF); 495 break; 496 497 case 'W': 498 numval = strtoumax (optarg, &numend, 10); 499 if (! (0 < numval && numval <= INT_MAX) || *numend) 500 try_help ("invalid width `%s'", optarg); 501 if (width != numval) 502 { 503 if (width) 504 fatal ("conflicting width options"); 505 width = numval; 506 } 507 break; 508 509 case BINARY_OPTION: 510 #if HAVE_SETMODE_DOS 511 binary = 1; 512 set_binary_mode (STDOUT_FILENO, 1); 513 #endif 514 break; 515 516 case FROM_FILE_OPTION: 517 specify_value (&from_file, optarg, "--from-file"); 518 break; 519 520 case HELP_OPTION: 521 usage (); 522 check_stdout (); 523 return EXIT_SUCCESS; 524 525 case HORIZON_LINES_OPTION: 526 numval = strtoumax (optarg, &numend, 10); 527 if (*numend) 528 try_help ("invalid horizon length `%s'", optarg); 529 horizon_lines = MAX (horizon_lines, MIN (numval, LIN_MAX)); 530 break; 531 532 case IGNORE_FILE_NAME_CASE_OPTION: 533 ignore_file_name_case = 1; 534 break; 535 536 case INHIBIT_HUNK_MERGE_OPTION: 537 /* This option is obsolete, but accept it for backward 538 compatibility. */ 539 break; 540 541 case LEFT_COLUMN_OPTION: 542 left_column = 1; 543 break; 544 545 case LINE_FORMAT_OPTION: 546 specify_style (OUTPUT_IFDEF); 547 for (i = 0; i < sizeof line_format / sizeof *line_format; i++) 548 specify_value (&line_format[i], optarg, "--line-format"); 549 break; 550 551 case NO_IGNORE_FILE_NAME_CASE_OPTION: 552 ignore_file_name_case = 0; 553 break; 554 555 case NORMAL_OPTION: 556 specify_style (OUTPUT_NORMAL); 557 break; 558 559 case SDIFF_MERGE_ASSIST_OPTION: 560 specify_style (OUTPUT_SDIFF); 561 sdiff_merge_assist = 1; 562 break; 563 564 case STRIP_TRAILING_CR_OPTION: 565 strip_trailing_cr = 1; 566 break; 567 568 case SUPPRESS_COMMON_LINES_OPTION: 569 suppress_common_lines = 1; 570 break; 571 572 case TO_FILE_OPTION: 573 specify_value (&to_file, optarg, "--to-file"); 574 break; 575 576 case UNCHANGED_LINE_FORMAT_OPTION: 577 case OLD_LINE_FORMAT_OPTION: 578 case NEW_LINE_FORMAT_OPTION: 579 specify_style (OUTPUT_IFDEF); 580 c -= UNCHANGED_LINE_FORMAT_OPTION; 581 specify_value (&line_format[c], optarg, line_format_option[c]); 582 break; 583 584 case UNCHANGED_GROUP_FORMAT_OPTION: 585 case OLD_GROUP_FORMAT_OPTION: 586 case NEW_GROUP_FORMAT_OPTION: 587 case CHANGED_GROUP_FORMAT_OPTION: 588 specify_style (OUTPUT_IFDEF); 589 c -= UNCHANGED_GROUP_FORMAT_OPTION; 590 specify_value (&group_format[c], optarg, group_format_option[c]); 591 break; 592 593 default: 594 try_help (0, 0); 595 } 596 prev = c; 597 } 598 599 if (output_style == OUTPUT_UNSPECIFIED) 600 { 601 if (show_c_function) 602 { 603 specify_style (OUTPUT_CONTEXT); 604 if (ocontext < 0) 605 context = 3; 606 } 607 else 608 specify_style (OUTPUT_NORMAL); 609 } 610 611 if (output_style != OUTPUT_CONTEXT || hard_locale (LC_TIME)) 612 time_format = "%Y-%m-%d %H:%M:%S.%N %z"; 613 else 614 { 615 /* See POSIX 1003.1-2001 for this format. */ 616 time_format = "%a %b %e %T %Y"; 617 } 618 619 if (0 <= ocontext) 620 { 621 bool modern_usage = 200112 <= posix2_version (); 622 623 if ((output_style == OUTPUT_CONTEXT 624 || output_style == OUTPUT_UNIFIED) 625 && (context < ocontext 626 || (ocontext < context && ! explicit_context))) 627 { 628 if (modern_usage) 629 { 630 error (0, 0, 631 _("`-%ld' option is obsolete; use `-%c %ld'"), 632 (long) ocontext, 633 output_style == OUTPUT_CONTEXT ? 'C' : 'U', 634 (long) ocontext); 635 try_help (0, 0); 636 } 637 context = ocontext; 638 } 639 else 640 { 641 if (modern_usage) 642 { 643 error (0, 0, _("`-%ld' option is obsolete; omit it"), 644 (long) ocontext); 645 try_help (0, 0); 646 } 647 } 648 } 649 650 { 651 /* 652 * We maximize first the half line width, and then the gutter width, 653 * according to the following constraints: 654 * 1. Two half lines plus a gutter must fit in a line. 655 * 2. If the half line width is nonzero: 656 * a. The gutter width is at least GUTTER_WIDTH_MINIMUM. 657 * b. If tabs are not expanded to spaces, 658 * a half line plus a gutter is an integral number of tabs, 659 * so that tabs in the right column line up. 660 */ 661 unsigned int t = expand_tabs ? 1 : TAB_WIDTH; 662 int w = width ? width : 130; 663 int off = (w + t + GUTTER_WIDTH_MINIMUM) / (2 * t) * t; 664 sdiff_half_width = MAX (0, MIN (off - GUTTER_WIDTH_MINIMUM, w - off)), 665 sdiff_column2_offset = sdiff_half_width ? off : w; 666 } 667 668 /* Make the horizon at least as large as the context, so that 669 shift_boundaries has more freedom to shift the first and last hunks. */ 670 if (horizon_lines < context) 671 horizon_lines = context; 672 673 summarize_regexp_list (&function_regexp_list); 674 summarize_regexp_list (&ignore_regexp_list); 675 676 if (output_style == OUTPUT_IFDEF) 677 { 678 for (i = 0; i < sizeof line_format / sizeof *line_format; i++) 679 if (!line_format[i]) 680 line_format[i] = "%l\n"; 681 if (!group_format[OLD]) 682 group_format[OLD] 683 = group_format[CHANGED] ? group_format[CHANGED] : "%<"; 684 if (!group_format[NEW]) 685 group_format[NEW] 686 = group_format[CHANGED] ? group_format[CHANGED] : "%>"; 687 if (!group_format[UNCHANGED]) 688 group_format[UNCHANGED] = "%="; 689 if (!group_format[CHANGED]) 690 group_format[CHANGED] = concat (group_format[OLD], 691 group_format[NEW], ""); 692 } 693 694 no_diff_means_no_output = 695 (output_style == OUTPUT_IFDEF ? 696 (!*group_format[UNCHANGED] 697 || (strcmp (group_format[UNCHANGED], "%=") == 0 698 && !*line_format[UNCHANGED])) 699 : (output_style != OUTPUT_SDIFF) | suppress_common_lines); 700 701 files_can_be_treated_as_binary = 702 (brief 703 & ~ (ignore_blank_lines | ignore_case | strip_trailing_cr 704 | (ignore_regexp_list.regexps || ignore_white_space))); 705 706 switch_string = option_list (argv + 1, optind - 1); 707 708 if (from_file) 709 { 710 if (to_file) 711 fatal ("--from-file and --to-file both specified"); 712 else 713 for (; optind < argc; optind++) 714 { 715 int status = compare_files ((struct comparison *) 0, 716 from_file, argv[optind]); 717 if (exit_status < status) 718 exit_status = status; 719 } 720 } 721 else 722 { 723 if (to_file) 724 for (; optind < argc; optind++) 725 { 726 int status = compare_files ((struct comparison *) 0, 727 argv[optind], to_file); 728 if (exit_status < status) 729 exit_status = status; 730 } 731 else 732 { 733 if (argc - optind != 2) 734 { 735 if (argc - optind < 2) 736 try_help ("missing operand after `%s'", argv[argc - 1]); 737 else 738 try_help ("extra operand `%s'", argv[optind + 2]); 739 } 740 741 exit_status = compare_files ((struct comparison *) 0, 742 argv[optind], argv[optind + 1]); 743 } 744 } 745 746 /* Print any messages that were saved up for last. */ 747 print_message_queue (); 748 749 check_stdout (); 750 exit (exit_status); 751 return exit_status; 752 } 753 754 /* Append to REGLIST the regexp PATTERN. */ 755 756 static void 757 add_regexp (struct regexp_list *reglist, char const *pattern) 758 { 759 size_t patlen = strlen (pattern); 760 char const *m = re_compile_pattern (pattern, patlen, reglist->buf); 761 762 if (m != 0) 763 error (0, 0, "%s: %s", pattern, m); 764 else 765 { 766 char *regexps = reglist->regexps; 767 size_t len = reglist->len; 768 bool multiple_regexps = reglist->multiple_regexps = regexps != 0; 769 size_t newlen = reglist->len = len + 2 * multiple_regexps + patlen; 770 size_t size = reglist->size; 771 772 if (size <= newlen) 773 { 774 if (!size) 775 size = 1; 776 777 do size *= 2; 778 while (size <= newlen); 779 780 reglist->size = size; 781 reglist->regexps = regexps = xrealloc (regexps, size); 782 } 783 if (multiple_regexps) 784 { 785 regexps[len++] = '\\'; 786 regexps[len++] = '|'; 787 } 788 memcpy (regexps + len, pattern, patlen + 1); 789 } 790 } 791 792 /* Ensure that REGLIST represents the disjunction of its regexps. 793 This is done here, rather than earlier, to avoid O(N^2) behavior. */ 794 795 static void 796 summarize_regexp_list (struct regexp_list *reglist) 797 { 798 if (reglist->regexps) 799 { 800 /* At least one regexp was specified. Allocate a fastmap for it. */ 801 reglist->buf->fastmap = xmalloc (1 << CHAR_BIT); 802 if (reglist->multiple_regexps) 803 { 804 /* Compile the disjunction of the regexps. 805 (If just one regexp was specified, it is already compiled.) */ 806 char const *m = re_compile_pattern (reglist->regexps, reglist->len, 807 reglist->buf); 808 if (m != 0) 809 error (EXIT_TROUBLE, 0, "%s: %s", reglist->regexps, m); 810 } 811 } 812 } 813 814 static void 815 try_help (char const *reason_msgid, char const *operand) 816 { 817 if (reason_msgid) 818 error (0, 0, _(reason_msgid), operand); 819 error (EXIT_TROUBLE, 0, _("Try `%s --help' for more information."), 820 program_name); 821 abort (); 822 } 823 824 static void 825 check_stdout (void) 826 { 827 if (ferror (stdout)) 828 fatal ("write failed"); 829 else if (fclose (stdout) != 0) 830 pfatal_with_name (_("standard output")); 831 } 832 833 static char const * const option_help_msgid[] = { 834 N_("Compare files line by line."), 835 "", 836 N_("-i --ignore-case Ignore case differences in file contents."), 837 N_("--ignore-file-name-case Ignore case when comparing file names."), 838 N_("--no-ignore-file-name-case Consider case when comparing file names."), 839 N_("-E --ignore-tab-expansion Ignore changes due to tab expansion."), 840 N_("-b --ignore-space-change Ignore changes in the amount of white space."), 841 N_("-w --ignore-all-space Ignore all white space."), 842 N_("-B --ignore-blank-lines Ignore changes whose lines are all blank."), 843 N_("-I RE --ignore-matching-lines=RE Ignore changes whose lines all match RE."), 844 N_("--strip-trailing-cr Strip trailing carriage return on input."), 845 #if HAVE_SETMODE_DOS 846 N_("--binary Read and write data in binary mode."), 847 #endif 848 N_("-a --text Treat all files as text."), 849 "", 850 N_("-c -C NUM --context[=NUM] Output NUM (default 3) lines of copied context.\n\ 851 -u -U NUM --unified[=NUM] Output NUM (default 3) lines of unified context.\n\ 852 --label LABEL Use LABEL instead of file name.\n\ 853 -p --show-c-function Show which C function each change is in.\n\ 854 -F RE --show-function-line=RE Show the most recent line matching RE."), 855 N_("-q --brief Output only whether files differ."), 856 N_("-e --ed Output an ed script."), 857 N_("--normal Output a normal diff."), 858 N_("-n --rcs Output an RCS format diff."), 859 N_("-y --side-by-side Output in two columns.\n\ 860 -W NUM --width=NUM Output at most NUM (default 130) print columns.\n\ 861 --left-column Output only the left column of common lines.\n\ 862 --suppress-common-lines Do not output common lines."), 863 N_("-D NAME --ifdef=NAME Output merged file to show `#ifdef NAME' diffs."), 864 N_("--GTYPE-group-format=GFMT Similar, but format GTYPE input groups with GFMT."), 865 N_("--line-format=LFMT Similar, but format all input lines with LFMT."), 866 N_("--LTYPE-line-format=LFMT Similar, but format LTYPE input lines with LFMT."), 867 N_(" LTYPE is `old', `new', or `unchanged'. GTYPE is LTYPE or `changed'."), 868 N_(" GFMT may contain:\n\ 869 %< lines from FILE1\n\ 870 %> lines from FILE2\n\ 871 %= lines common to FILE1 and FILE2\n\ 872 %[-][WIDTH][.[PREC]]{doxX}LETTER printf-style spec for LETTER\n\ 873 LETTERs are as follows for new group, lower case for old group:\n\ 874 F first line number\n\ 875 L last line number\n\ 876 N number of lines = L-F+1\n\ 877 E F-1\n\ 878 M L+1"), 879 N_(" LFMT may contain:\n\ 880 %L contents of line\n\ 881 %l contents of line, excluding any trailing newline\n\ 882 %[-][WIDTH][.[PREC]]{doxX}n printf-style spec for input line number"), 883 N_(" Either GFMT or LFMT may contain:\n\ 884 %% %\n\ 885 %c'C' the single character C\n\ 886 %c'\\OOO' the character with octal code OOO"), 887 "", 888 N_("-l --paginate Pass the output through `pr' to paginate it."), 889 N_("-t --expand-tabs Expand tabs to spaces in output."), 890 N_("-T --initial-tab Make tabs line up by prepending a tab."), 891 "", 892 N_("-r --recursive Recursively compare any subdirectories found."), 893 N_("-N --new-file Treat absent files as empty."), 894 N_("--unidirectional-new-file Treat absent first files as empty."), 895 N_("-s --report-identical-files Report when two files are the same."), 896 N_("-x PAT --exclude=PAT Exclude files that match PAT."), 897 N_("-X FILE --exclude-from=FILE Exclude files that match any pattern in FILE."), 898 N_("-S FILE --starting-file=FILE Start with FILE when comparing directories."), 899 N_("--from-file=FILE1 Compare FILE1 to all operands. FILE1 can be a directory."), 900 N_("--to-file=FILE2 Compare all operands to FILE2. FILE2 can be a directory."), 901 "", 902 N_("--horizon-lines=NUM Keep NUM lines of the common prefix and suffix."), 903 N_("-d --minimal Try hard to find a smaller set of changes."), 904 N_("--speed-large-files Assume large files and many scattered small changes."), 905 "", 906 N_("-v --version Output version info."), 907 N_("--help Output this help."), 908 "", 909 N_("FILES are `FILE1 FILE2' or `DIR1 DIR2' or `DIR FILE...' or `FILE... DIR'."), 910 N_("If --from-file or --to-file is given, there are no restrictions on FILES."), 911 N_("If a FILE is `-', read standard input."), 912 "", 913 N_("Report bugs to <bug-gnu-utils@gnu.org>."), 914 0 915 }; 916 917 static void 918 usage (void) 919 { 920 char const * const *p; 921 922 printf (_("Usage: %s [OPTION]... FILES\n"), program_name); 923 924 for (p = option_help_msgid; *p; p++) 925 { 926 if (!**p) 927 putchar ('\n'); 928 else 929 { 930 char const *msg = _(*p); 931 char const *nl; 932 while ((nl = strchr (msg, '\n'))) 933 { 934 int msglen = nl + 1 - msg; 935 printf (" %.*s", msglen, msg); 936 msg = nl + 1; 937 } 938 939 printf (" %s\n" + 2 * (*msg != ' ' && *msg != '-'), msg); 940 } 941 } 942 } 943 944 /* Set VAR to VALUE, reporting an OPTION error if this is a 945 conflict. */ 946 static void 947 specify_value (char const **var, char const *value, char const *option) 948 { 949 if (*var && strcmp (*var, value) != 0) 950 { 951 error (0, 0, _("conflicting %s option value `%s'"), option, value); 952 try_help (0, 0); 953 } 954 *var = value; 955 } 956 957 /* Set the output style to STYLE, diagnosing conflicts. */ 958 static void 959 specify_style (enum output_style style) 960 { 961 if (output_style != style) 962 { 963 if (output_style != OUTPUT_UNSPECIFIED) 964 try_help ("conflicting output style options", 0); 965 output_style = style; 966 } 967 } 968 969 static char const * 970 filetype (struct stat const *st) 971 { 972 /* See POSIX 1003.1-2001 for these formats. 973 974 To keep diagnostics grammatical in English, the returned string 975 must start with a consonant. */ 976 977 if (S_ISREG (st->st_mode)) 978 return st->st_size == 0 ? _("regular empty file") : _("regular file"); 979 980 if (S_ISDIR (st->st_mode)) return _("directory"); 981 982 #ifdef S_ISBLK 983 if (S_ISBLK (st->st_mode)) return _("block special file"); 984 #endif 985 #ifdef S_ISCHR 986 if (S_ISCHR (st->st_mode)) return _("character special file"); 987 #endif 988 #ifdef S_ISFIFO 989 if (S_ISFIFO (st->st_mode)) return _("fifo"); 990 #endif 991 /* S_ISLNK is impossible with `fstat' and `stat'. */ 992 #ifdef S_ISSOCK 993 if (S_ISSOCK (st->st_mode)) return _("socket"); 994 #endif 995 #ifdef S_TYPEISMQ 996 if (S_TYPEISMQ (st)) return _("message queue"); 997 #endif 998 #ifdef S_TYPEISSEM 999 if (S_TYPEISSEM (st)) return _("semaphore"); 1000 #endif 1001 #ifdef S_TYPEISSHM 1002 if (S_TYPEISSHM (st)) return _("shared memory object"); 1003 #endif 1004 #ifdef S_TYPEISTMO 1005 if (S_TYPEISTMO (st)) return _("typed memory object"); 1006 #endif 1007 1008 return _("weird file"); 1009 } 1010 1011 /* Set the last-modified time of *ST to be the current time. */ 1012 1013 static void 1014 set_mtime_to_now (struct stat *st) 1015 { 1016 #ifdef ST_MTIM_NSEC 1017 1018 # if HAVE_CLOCK_GETTIME && defined CLOCK_REALTIME 1019 if (clock_gettime (CLOCK_REALTIME, &st->st_mtim) == 0) 1020 return; 1021 # endif 1022 1023 # if HAVE_GETTIMEOFDAY 1024 { 1025 struct timeval timeval; 1026 if (gettimeofday (&timeval, NULL) == 0) 1027 { 1028 st->st_mtime = timeval.tv_sec; 1029 st->st_mtim.ST_MTIM_NSEC = timeval.tv_usec * 1000; 1030 return; 1031 } 1032 } 1033 # endif 1034 1035 #endif /* ST_MTIM_NSEC */ 1036 1037 time (&st->st_mtime); 1038 } 1039 1040 /* Compare two files (or dirs) with parent comparison PARENT 1041 and names NAME0 and NAME1. 1042 (If PARENT is 0, then the first name is just NAME0, etc.) 1043 This is self-contained; it opens the files and closes them. 1044 1045 Value is EXIT_SUCCESS if files are the same, EXIT_FAILURE if 1046 different, EXIT_TROUBLE if there is a problem opening them. */ 1047 1048 static int 1049 compare_files (struct comparison const *parent, 1050 char const *name0, 1051 char const *name1) 1052 { 1053 struct comparison cmp; 1054 #define DIR_P(f) (S_ISDIR (cmp.file[f].stat.st_mode) != 0) 1055 register int f; 1056 int status = EXIT_SUCCESS; 1057 bool same_files; 1058 char *free0, *free1; 1059 1060 /* If this is directory comparison, perhaps we have a file 1061 that exists only in one of the directories. 1062 If so, just print a message to that effect. */ 1063 1064 if (! ((name0 && name1) 1065 || (unidirectional_new_file && name1) 1066 || new_file)) 1067 { 1068 char const *name = name0 == 0 ? name1 : name0; 1069 char const *dir = parent->file[name0 == 0].name; 1070 1071 /* See POSIX 1003.1-2001 for this format. */ 1072 message ("Only in %s: %s\n", dir, name); 1073 1074 /* Return EXIT_FAILURE so that diff_dirs will return 1075 EXIT_FAILURE ("some files differ"). */ 1076 return EXIT_FAILURE; 1077 } 1078 1079 memset (cmp.file, 0, sizeof cmp.file); 1080 cmp.parent = parent; 1081 1082 /* cmp.file[f].desc markers */ 1083 #define NONEXISTENT (-1) /* nonexistent file */ 1084 #define UNOPENED (-2) /* unopened file (e.g. directory) */ 1085 #define ERRNO_ENCODE(errno) (-3 - (errno)) /* encoded errno value */ 1086 1087 #define ERRNO_DECODE(desc) (-3 - (desc)) /* inverse of ERRNO_ENCODE */ 1088 1089 cmp.file[0].desc = name0 == 0 ? NONEXISTENT : UNOPENED; 1090 cmp.file[1].desc = name1 == 0 ? NONEXISTENT : UNOPENED; 1091 1092 /* Now record the full name of each file, including nonexistent ones. */ 1093 1094 if (name0 == 0) 1095 name0 = name1; 1096 if (name1 == 0) 1097 name1 = name0; 1098 1099 if (!parent) 1100 { 1101 free0 = 0; 1102 free1 = 0; 1103 cmp.file[0].name = name0; 1104 cmp.file[1].name = name1; 1105 } 1106 else 1107 { 1108 cmp.file[0].name = free0 1109 = dir_file_pathname (parent->file[0].name, name0); 1110 cmp.file[1].name = free1 1111 = dir_file_pathname (parent->file[1].name, name1); 1112 } 1113 1114 /* Stat the files. */ 1115 1116 for (f = 0; f < 2; f++) 1117 { 1118 if (cmp.file[f].desc != NONEXISTENT) 1119 { 1120 if (f && file_name_cmp (cmp.file[f].name, cmp.file[0].name) == 0) 1121 { 1122 cmp.file[f].desc = cmp.file[0].desc; 1123 cmp.file[f].stat = cmp.file[0].stat; 1124 } 1125 else if (strcmp (cmp.file[f].name, "-") == 0) 1126 { 1127 cmp.file[f].desc = STDIN_FILENO; 1128 if (fstat (STDIN_FILENO, &cmp.file[f].stat) != 0) 1129 cmp.file[f].desc = ERRNO_ENCODE (errno); 1130 else 1131 { 1132 if (S_ISREG (cmp.file[f].stat.st_mode)) 1133 { 1134 off_t pos = lseek (STDIN_FILENO, (off_t) 0, SEEK_CUR); 1135 if (pos < 0) 1136 cmp.file[f].desc = ERRNO_ENCODE (errno); 1137 else 1138 cmp.file[f].stat.st_size = 1139 MAX (0, cmp.file[f].stat.st_size - pos); 1140 } 1141 1142 /* POSIX 1003.1-2001 requires current time for 1143 stdin. */ 1144 set_mtime_to_now (&cmp.file[f].stat); 1145 } 1146 } 1147 else if (stat (cmp.file[f].name, &cmp.file[f].stat) != 0) 1148 cmp.file[f].desc = ERRNO_ENCODE (errno); 1149 } 1150 } 1151 1152 /* Mark files as nonexistent at the top level as needed for -N and 1153 --unidirectional-new-file. */ 1154 if (! parent) 1155 { 1156 if ((new_file | unidirectional_new_file) 1157 && cmp.file[0].desc == ERRNO_ENCODE (ENOENT) 1158 && cmp.file[1].desc == UNOPENED) 1159 cmp.file[0].desc = NONEXISTENT; 1160 1161 if (new_file 1162 && cmp.file[0].desc == UNOPENED 1163 && cmp.file[1].desc == ERRNO_ENCODE (ENOENT)) 1164 cmp.file[1].desc = NONEXISTENT; 1165 } 1166 1167 for (f = 0; f < 2; f++) 1168 if (cmp.file[f].desc == NONEXISTENT) 1169 cmp.file[f].stat.st_mode = cmp.file[1 - f].stat.st_mode; 1170 1171 for (f = 0; f < 2; f++) 1172 { 1173 int e = ERRNO_DECODE (cmp.file[f].desc); 1174 if (0 <= e) 1175 { 1176 errno = e; 1177 perror_with_name (cmp.file[f].name); 1178 status = EXIT_TROUBLE; 1179 } 1180 } 1181 1182 if (status == EXIT_SUCCESS && ! parent && DIR_P (0) != DIR_P (1)) 1183 { 1184 /* If one is a directory, and it was specified in the command line, 1185 use the file in that dir with the other file's basename. */ 1186 1187 int fnm_arg = DIR_P (0); 1188 int dir_arg = 1 - fnm_arg; 1189 char const *fnm = cmp.file[fnm_arg].name; 1190 char const *dir = cmp.file[dir_arg].name; 1191 char const *filename = cmp.file[dir_arg].name = free0 1192 = dir_file_pathname (dir, base_name (fnm)); 1193 1194 if (strcmp (fnm, "-") == 0) 1195 fatal ("cannot compare `-' to a directory"); 1196 1197 if (stat (filename, &cmp.file[dir_arg].stat) != 0) 1198 { 1199 perror_with_name (filename); 1200 status = EXIT_TROUBLE; 1201 } 1202 } 1203 1204 if (status != EXIT_SUCCESS) 1205 { 1206 /* One of the files should exist but does not. */ 1207 } 1208 else if ((same_files 1209 = (cmp.file[0].desc != NONEXISTENT 1210 && cmp.file[1].desc != NONEXISTENT 1211 && 0 < same_file (&cmp.file[0].stat, &cmp.file[1].stat) 1212 && same_file_attributes (&cmp.file[0].stat, 1213 &cmp.file[1].stat))) 1214 && no_diff_means_no_output) 1215 { 1216 /* The two named files are actually the same physical file. 1217 We know they are identical without actually reading them. */ 1218 } 1219 else if (DIR_P (0) & DIR_P (1)) 1220 { 1221 if (output_style == OUTPUT_IFDEF) 1222 fatal ("-D option not supported with directories"); 1223 1224 /* If both are directories, compare the files in them. */ 1225 1226 if (parent && !recursive) 1227 { 1228 /* But don't compare dir contents one level down 1229 unless -r was specified. 1230 See POSIX 1003.1-2001 for this format. */ 1231 message ("Common subdirectories: %s and %s\n", 1232 cmp.file[0].name, cmp.file[1].name); 1233 } 1234 else 1235 status = diff_dirs (&cmp, compare_files); 1236 } 1237 else if ((DIR_P (0) | DIR_P (1)) 1238 || (parent 1239 && (! S_ISREG (cmp.file[0].stat.st_mode) 1240 || ! S_ISREG (cmp.file[1].stat.st_mode)))) 1241 { 1242 if (cmp.file[0].desc == NONEXISTENT || cmp.file[1].desc == NONEXISTENT) 1243 { 1244 /* We have a subdirectory that exists only in one directory. */ 1245 1246 if ((DIR_P (0) | DIR_P (1)) 1247 && recursive 1248 && (new_file 1249 || (unidirectional_new_file 1250 && cmp.file[0].desc == NONEXISTENT))) 1251 status = diff_dirs (&cmp, compare_files); 1252 else 1253 { 1254 char const *dir 1255 = parent->file[cmp.file[0].desc == NONEXISTENT].name; 1256 1257 /* See POSIX 1003.1-2001 for this format. */ 1258 message ("Only in %s: %s\n", dir, name0); 1259 1260 status = EXIT_FAILURE; 1261 } 1262 } 1263 else 1264 { 1265 /* We have two files that are not to be compared. */ 1266 1267 /* See POSIX 1003.1-2001 for this format. */ 1268 message5 ("File %s is a %s while file %s is a %s\n", 1269 file_label[0] ? file_label[0] : cmp.file[0].name, 1270 filetype (&cmp.file[0].stat), 1271 file_label[1] ? file_label[1] : cmp.file[1].name, 1272 filetype (&cmp.file[1].stat)); 1273 1274 /* This is a difference. */ 1275 status = EXIT_FAILURE; 1276 } 1277 } 1278 else if (files_can_be_treated_as_binary 1279 && cmp.file[0].stat.st_size != cmp.file[1].stat.st_size 1280 && (cmp.file[0].desc == NONEXISTENT 1281 || S_ISREG (cmp.file[0].stat.st_mode)) 1282 && (cmp.file[1].desc == NONEXISTENT 1283 || S_ISREG (cmp.file[1].stat.st_mode))) 1284 { 1285 message ("Files %s and %s differ\n", 1286 file_label[0] ? file_label[0] : cmp.file[0].name, 1287 file_label[1] ? file_label[1] : cmp.file[1].name); 1288 status = EXIT_FAILURE; 1289 } 1290 else 1291 { 1292 /* Both exist and neither is a directory. */ 1293 1294 /* Open the files and record their descriptors. */ 1295 1296 if (cmp.file[0].desc == UNOPENED) 1297 if ((cmp.file[0].desc = open (cmp.file[0].name, O_RDONLY, 0)) < 0) 1298 { 1299 perror_with_name (cmp.file[0].name); 1300 status = EXIT_TROUBLE; 1301 } 1302 if (cmp.file[1].desc == UNOPENED) 1303 { 1304 if (same_files) 1305 cmp.file[1].desc = cmp.file[0].desc; 1306 else if ((cmp.file[1].desc = open (cmp.file[1].name, O_RDONLY, 0)) 1307 < 0) 1308 { 1309 perror_with_name (cmp.file[1].name); 1310 status = EXIT_TROUBLE; 1311 } 1312 } 1313 1314 #if HAVE_SETMODE_DOS 1315 if (binary) 1316 for (f = 0; f < 2; f++) 1317 if (0 <= cmp.file[f].desc) 1318 set_binary_mode (cmp.file[f].desc, 1); 1319 #endif 1320 1321 /* Compare the files, if no error was found. */ 1322 1323 if (status == EXIT_SUCCESS) 1324 status = diff_2_files (&cmp); 1325 1326 /* Close the file descriptors. */ 1327 1328 if (0 <= cmp.file[0].desc && close (cmp.file[0].desc) != 0) 1329 { 1330 perror_with_name (cmp.file[0].name); 1331 status = EXIT_TROUBLE; 1332 } 1333 if (0 <= cmp.file[1].desc && cmp.file[0].desc != cmp.file[1].desc 1334 && close (cmp.file[1].desc) != 0) 1335 { 1336 perror_with_name (cmp.file[1].name); 1337 status = EXIT_TROUBLE; 1338 } 1339 } 1340 1341 /* Now the comparison has been done, if no error prevented it, 1342 and STATUS is the value this function will return. */ 1343 1344 if (status == EXIT_SUCCESS) 1345 { 1346 if (report_identical_files && !DIR_P (0)) 1347 message ("Files %s and %s are identical\n", 1348 file_label[0] ? file_label[0] : cmp.file[0].name, 1349 file_label[1] ? file_label[1] : cmp.file[1].name); 1350 } 1351 else 1352 { 1353 /* Flush stdout so that the user sees differences immediately. 1354 This can hurt performance, unfortunately. */ 1355 if (fflush (stdout) != 0) 1356 pfatal_with_name (_("standard output")); 1357 } 1358 1359 if (free0) 1360 free (free0); 1361 if (free1) 1362 free (free1); 1363 1364 return status; 1365 } 1366