1 /* GNU DIFF entry routine. 2 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1997 Free Software Foundation, Inc. 3 4 This file is part of GNU DIFF. 5 6 GNU DIFF is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 GNU DIFF is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GNU DIFF; see the file COPYING. If not, write to 18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ 19 20 /* GNU DIFF was written by Mike Haertel, David Hayes, 21 Richard Stallman, Len Tower, and Paul Eggert. */ 22 23 #define GDIFF_MAIN 24 #include "diff.h" 25 #include <signal.h> 26 #include "getopt.h" 27 #include "fnmatch.h" 28 29 #ifndef DEFAULT_WIDTH 30 #define DEFAULT_WIDTH 130 31 #endif 32 33 #ifndef GUTTER_WIDTH_MINIMUM 34 #define GUTTER_WIDTH_MINIMUM 3 35 #endif 36 37 /* diff.c has a real initialize_main function. */ 38 #ifdef initialize_main 39 #undef initialize_main 40 #endif 41 42 static char const *filetype PARAMS((struct stat const *)); 43 static char *option_list PARAMS((char **, int)); 44 static int add_exclude_file PARAMS((char const *)); 45 static int ck_atoi PARAMS((char const *, int *)); 46 static int compare_files PARAMS((char const *, char const *, char const *, char const *, int)); 47 static int specify_format PARAMS((char **, char *)); 48 static void add_exclude PARAMS((char const *)); 49 static void add_regexp PARAMS((struct regexp_list **, char const *)); 50 static void specify_style PARAMS((enum output_style)); 51 static int try_help PARAMS((char const *)); 52 static void check_output PARAMS((FILE *)); 53 static void usage PARAMS((void)); 54 static void initialize_main PARAMS((int *, char ***)); 55 56 /* Nonzero for -r: if comparing two directories, 57 compare their common subdirectories recursively. */ 58 59 static int recursive; 60 61 /* For debugging: don't do discard_confusing_lines. */ 62 63 int no_discards; 64 65 #if HAVE_SETMODE 66 /* I/O mode: nonzero only if using binary input/output. */ 67 static int binary_I_O; 68 #endif 69 70 /* Return a string containing the command options with which diff was invoked. 71 Spaces appear between what were separate ARGV-elements. 72 There is a space at the beginning but none at the end. 73 If there were no options, the result is an empty string. 74 75 Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT, 76 the length of that vector. */ 77 78 static char * 79 option_list (optionvec, count) 80 char **optionvec; /* Was `vector', but that collides on Alliant. */ 81 int count; 82 { 83 int i; 84 size_t length = 0; 85 char *result; 86 87 for (i = 0; i < count; i++) 88 length += strlen (optionvec[i]) + 1; 89 90 result = xmalloc (length + 1); 91 result[0] = 0; 92 93 for (i = 0; i < count; i++) 94 { 95 strcat (result, " "); 96 strcat (result, optionvec[i]); 97 } 98 99 return result; 100 } 101 102 /* Convert STR to a positive integer, storing the result in *OUT. 103 If STR is not a valid integer, return -1 (otherwise 0). */ 104 static int 105 ck_atoi (str, out) 106 char const *str; 107 int *out; 108 { 109 char const *p; 110 for (p = str; *p; p++) 111 if (*p < '0' || *p > '9') 112 return -1; 113 114 *out = atoi (optarg); 115 return 0; 116 } 117 118 /* Keep track of excluded file name patterns. */ 119 120 static char const **exclude; 121 static int exclude_alloc, exclude_count; 122 123 int 124 excluded_filename (f) 125 char const *f; 126 { 127 int i; 128 for (i = 0; i < exclude_count; i++) 129 if (fnmatch (exclude[i], f, 0) == 0) 130 return 1; 131 return 0; 132 } 133 134 static void 135 add_exclude (pattern) 136 char const *pattern; 137 { 138 if (exclude_alloc <= exclude_count) 139 exclude = (char const **) 140 (exclude_alloc == 0 141 ? xmalloc ((exclude_alloc = 64) * sizeof (*exclude)) 142 : xrealloc (exclude, (exclude_alloc *= 2) * sizeof (*exclude))); 143 144 exclude[exclude_count++] = pattern; 145 } 146 147 static int 148 add_exclude_file (name) 149 char const *name; 150 { 151 struct file_data f; 152 char *p, *q, *lim; 153 154 f.name = optarg; 155 f.desc = (strcmp (optarg, "-") == 0 156 ? STDIN_FILENO 157 : open (optarg, O_RDONLY, 0)); 158 if (f.desc < 0 || fstat (f.desc, &f.stat) != 0) 159 return -1; 160 161 sip (&f, 1); 162 slurp (&f); 163 164 for (p = f.buffer, lim = p + f.buffered_chars; p < lim; p = q) 165 { 166 q = (char *) memchr (p, '\n', lim - p); 167 if (!q) 168 q = lim; 169 *q++ = 0; 170 add_exclude (p); 171 } 172 173 return close (f.desc); 174 } 175 176 /* The numbers 129- that appear in the fourth element of some entries 177 tell the big switch in `diff_run' how to process those options. */ 178 179 static struct option const longopts[] = 180 { 181 {"ignore-blank-lines", 0, 0, 'B'}, 182 {"context", 2, 0, 'C'}, 183 {"ifdef", 1, 0, 'D'}, 184 {"show-function-line", 1, 0, 'F'}, 185 {"speed-large-files", 0, 0, 'H'}, 186 {"ignore-matching-lines", 1, 0, 'I'}, 187 {"label", 1, 0, 'L'}, 188 {"file-label", 1, 0, 'L'}, /* An alias, no longer recommended */ 189 {"new-file", 0, 0, 'N'}, 190 {"entire-new-file", 0, 0, 'N'}, /* An alias, no longer recommended */ 191 {"unidirectional-new-file", 0, 0, 'P'}, 192 {"starting-file", 1, 0, 'S'}, 193 {"initial-tab", 0, 0, 'T'}, 194 {"width", 1, 0, 'W'}, 195 {"text", 0, 0, 'a'}, 196 {"ascii", 0, 0, 'a'}, /* An alias, no longer recommended */ 197 {"ignore-space-change", 0, 0, 'b'}, 198 {"minimal", 0, 0, 'd'}, 199 {"ed", 0, 0, 'e'}, 200 {"forward-ed", 0, 0, 'f'}, 201 {"ignore-case", 0, 0, 'i'}, 202 {"paginate", 0, 0, 'l'}, 203 {"print", 0, 0, 'l'}, /* An alias, no longer recommended */ 204 {"rcs", 0, 0, 'n'}, 205 {"show-c-function", 0, 0, 'p'}, 206 {"brief", 0, 0, 'q'}, 207 {"recursive", 0, 0, 'r'}, 208 {"report-identical-files", 0, 0, 's'}, 209 {"expand-tabs", 0, 0, 't'}, 210 {"version", 0, 0, 'v'}, 211 {"ignore-all-space", 0, 0, 'w'}, 212 {"exclude", 1, 0, 'x'}, 213 {"exclude-from", 1, 0, 'X'}, 214 {"side-by-side", 0, 0, 'y'}, 215 {"unified", 2, 0, 'U'}, 216 {"left-column", 0, 0, 129}, 217 {"suppress-common-lines", 0, 0, 130}, 218 {"sdiff-merge-assist", 0, 0, 131}, 219 {"old-line-format", 1, 0, 132}, 220 {"new-line-format", 1, 0, 133}, 221 {"unchanged-line-format", 1, 0, 134}, 222 {"line-format", 1, 0, 135}, 223 {"old-group-format", 1, 0, 136}, 224 {"new-group-format", 1, 0, 137}, 225 {"unchanged-group-format", 1, 0, 138}, 226 {"changed-group-format", 1, 0, 139}, 227 {"horizon-lines", 1, 0, 140}, 228 {"help", 0, 0, 141}, 229 {"binary", 0, 0, 142}, 230 {0, 0, 0, 0} 231 }; 232 233 int 234 diff_run (argc, argv, out) 235 int argc; 236 char *argv[]; 237 char *out; 238 { 239 int val; 240 int c; 241 int prev = -1; 242 int width = DEFAULT_WIDTH; 243 int show_c_function = 0; 244 int optind_old; 245 246 /* Do our initializations. */ 247 initialize_main (&argc, &argv); 248 249 /* Decode the options. */ 250 251 optind_old = optind; 252 optind = 0; 253 while ((c = getopt_long (argc, argv, 254 "0123456789abBcC:dD:efF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y", 255 longopts, 0)) != EOF) 256 { 257 switch (c) 258 { 259 /* All digits combine in decimal to specify the context-size. */ 260 case '1': 261 case '2': 262 case '3': 263 case '4': 264 case '5': 265 case '6': 266 case '7': 267 case '8': 268 case '9': 269 case '0': 270 if (context == -1) 271 context = 0; 272 /* If a context length has already been specified, 273 more digits allowed only if they follow right after the others. 274 Reject two separate runs of digits, or digits after -C. */ 275 else if (prev < '0' || prev > '9') 276 fatal ("context length specified twice"); 277 278 context = context * 10 + c - '0'; 279 break; 280 281 case 'a': 282 /* Treat all files as text files; never treat as binary. */ 283 always_text_flag = 1; 284 break; 285 286 case 'b': 287 /* Ignore changes in amount of white space. */ 288 ignore_space_change_flag = 1; 289 ignore_some_changes = 1; 290 ignore_some_line_changes = 1; 291 break; 292 293 case 'B': 294 /* Ignore changes affecting only blank lines. */ 295 ignore_blank_lines_flag = 1; 296 ignore_some_changes = 1; 297 break; 298 299 case 'C': /* +context[=lines] */ 300 case 'U': /* +unified[=lines] */ 301 if (optarg) 302 { 303 if (context >= 0) 304 fatal ("context length specified twice"); 305 306 if (ck_atoi (optarg, &context)) 307 fatal ("invalid context length argument"); 308 } 309 310 /* Falls through. */ 311 case 'c': 312 /* Make context-style output. */ 313 specify_style (c == 'U' ? OUTPUT_UNIFIED : OUTPUT_CONTEXT); 314 break; 315 316 case 'd': 317 /* Don't discard lines. This makes things slower (sometimes much 318 slower) but will find a guaranteed minimal set of changes. */ 319 no_discards = 1; 320 break; 321 322 case 'D': 323 /* Make merged #ifdef output. */ 324 specify_style (OUTPUT_IFDEF); 325 { 326 int i, err = 0; 327 static char const C_ifdef_group_formats[] = 328 "#ifndef %s\n%%<#endif /* not %s */\n%c#ifdef %s\n%%>#endif /* %s */\n%c%%=%c#ifndef %s\n%%<#else /* %s */\n%%>#endif /* %s */\n"; 329 char *b = xmalloc (sizeof (C_ifdef_group_formats) 330 + 7 * strlen(optarg) - 14 /* 7*"%s" */ 331 - 8 /* 5*"%%" + 3*"%c" */); 332 sprintf (b, C_ifdef_group_formats, 333 optarg, optarg, 0, 334 optarg, optarg, 0, 0, 335 optarg, optarg, optarg); 336 for (i = 0; i < 4; i++) 337 { 338 err |= specify_format (&group_format[i], b); 339 b += strlen (b) + 1; 340 } 341 if (err) 342 diff_error ("conflicting #ifdef formats", 0, 0); 343 } 344 break; 345 346 case 'e': 347 /* Make output that is a valid `ed' script. */ 348 specify_style (OUTPUT_ED); 349 break; 350 351 case 'f': 352 /* Make output that looks vaguely like an `ed' script 353 but has changes in the order they appear in the file. */ 354 specify_style (OUTPUT_FORWARD_ED); 355 break; 356 357 case 'F': 358 /* Show, for each set of changes, the previous line that 359 matches the specified regexp. Currently affects only 360 context-style output. */ 361 add_regexp (&function_regexp_list, optarg); 362 break; 363 364 case 'h': 365 /* Split the files into chunks of around 1500 lines 366 for faster processing. Usually does not change the result. 367 368 This currently has no effect. */ 369 break; 370 371 case 'H': 372 /* Turn on heuristics that speed processing of large files 373 with a small density of changes. */ 374 heuristic = 1; 375 break; 376 377 case 'i': 378 /* Ignore changes in case. */ 379 ignore_case_flag = 1; 380 ignore_some_changes = 1; 381 ignore_some_line_changes = 1; 382 break; 383 384 case 'I': 385 /* Ignore changes affecting only lines that match the 386 specified regexp. */ 387 add_regexp (&ignore_regexp_list, optarg); 388 ignore_some_changes = 1; 389 break; 390 391 case 'l': 392 /* Pass the output through `pr' to paginate it. */ 393 paginate_flag = 1; 394 #if !defined(SIGCHLD) && defined(SIGCLD) 395 #define SIGCHLD SIGCLD 396 #endif 397 #ifdef SIGCHLD 398 /* Pagination requires forking and waiting, and 399 System V fork+wait does not work if SIGCHLD is ignored. */ 400 signal (SIGCHLD, SIG_DFL); 401 #endif 402 break; 403 404 case 'L': 405 /* Specify file labels for `-c' output headers. */ 406 if (!file_label[0]) 407 file_label[0] = optarg; 408 else if (!file_label[1]) 409 file_label[1] = optarg; 410 else 411 fatal ("too many file label options"); 412 break; 413 414 case 'n': 415 /* Output RCS-style diffs, like `-f' except that each command 416 specifies the number of lines affected. */ 417 specify_style (OUTPUT_RCS); 418 break; 419 420 case 'N': 421 /* When comparing directories, if a file appears only in one 422 directory, treat it as present but empty in the other. */ 423 entire_new_file_flag = 1; 424 break; 425 426 case 'p': 427 /* Make context-style output and show name of last C function. */ 428 show_c_function = 1; 429 add_regexp (&function_regexp_list, "^[_a-zA-Z$]"); 430 break; 431 432 case 'P': 433 /* When comparing directories, if a file appears only in 434 the second directory of the two, 435 treat it as present but empty in the other. */ 436 unidirectional_new_file_flag = 1; 437 break; 438 439 case 'q': 440 no_details_flag = 1; 441 break; 442 443 case 'r': 444 /* When comparing directories, 445 recursively compare any subdirectories found. */ 446 recursive = 1; 447 break; 448 449 case 's': 450 /* Print a message if the files are the same. */ 451 print_file_same_flag = 1; 452 break; 453 454 case 'S': 455 /* When comparing directories, start with the specified 456 file name. This is used for resuming an aborted comparison. */ 457 dir_start_file = optarg; 458 break; 459 460 case 't': 461 /* Expand tabs to spaces in the output so that it preserves 462 the alignment of the input files. */ 463 tab_expand_flag = 1; 464 break; 465 466 case 'T': 467 /* Use a tab in the output, rather than a space, before the 468 text of an input line, so as to keep the proper alignment 469 in the input line without changing the characters in it. */ 470 tab_align_flag = 1; 471 break; 472 473 case 'u': 474 /* Output the context diff in unidiff format. */ 475 specify_style (OUTPUT_UNIFIED); 476 break; 477 478 case 'v': 479 printf ("diff - GNU diffutils version %s\n", diff_version_string); 480 return 0; 481 482 case 'w': 483 /* Ignore horizontal white space when comparing lines. */ 484 ignore_all_space_flag = 1; 485 ignore_some_changes = 1; 486 ignore_some_line_changes = 1; 487 break; 488 489 case 'x': 490 add_exclude (optarg); 491 break; 492 493 case 'X': 494 if (add_exclude_file (optarg) != 0) 495 pfatal_with_name (optarg); 496 break; 497 498 case 'y': 499 /* Use side-by-side (sdiff-style) columnar output. */ 500 specify_style (OUTPUT_SDIFF); 501 break; 502 503 case 'W': 504 /* Set the line width for OUTPUT_SDIFF. */ 505 if (ck_atoi (optarg, &width) || width <= 0) 506 fatal ("column width must be a positive integer"); 507 break; 508 509 case 129: 510 sdiff_left_only = 1; 511 break; 512 513 case 130: 514 sdiff_skip_common_lines = 1; 515 break; 516 517 case 131: 518 /* sdiff-style columns output. */ 519 specify_style (OUTPUT_SDIFF); 520 sdiff_help_sdiff = 1; 521 break; 522 523 case 132: 524 case 133: 525 case 134: 526 specify_style (OUTPUT_IFDEF); 527 if (specify_format (&line_format[c - 132], optarg) != 0) 528 diff_error ("conflicting line format", 0, 0); 529 break; 530 531 case 135: 532 specify_style (OUTPUT_IFDEF); 533 { 534 int i, err = 0; 535 for (i = 0; i < sizeof (line_format) / sizeof (*line_format); i++) 536 err |= specify_format (&line_format[i], optarg); 537 if (err) 538 diff_error ("conflicting line format", 0, 0); 539 } 540 break; 541 542 case 136: 543 case 137: 544 case 138: 545 case 139: 546 specify_style (OUTPUT_IFDEF); 547 if (specify_format (&group_format[c - 136], optarg) != 0) 548 diff_error ("conflicting group format", 0, 0); 549 break; 550 551 case 140: 552 if (ck_atoi (optarg, &horizon_lines) || horizon_lines < 0) 553 fatal ("horizon must be a nonnegative integer"); 554 break; 555 556 case 141: 557 usage (); 558 check_output (stdout); 559 return 0; 560 561 case 142: 562 /* Use binary I/O when reading and writing data. 563 On Posix hosts, this has no effect. */ 564 #if HAVE_SETMODE 565 binary_I_O = 1; 566 # if 0 567 /* Because this code is leftover from pre-library days, 568 there is no way to set stdout back to the default mode 569 when we are done. As it turns out, I think the only 570 parts of CVS that pass out == NULL, and thus cause diff 571 to write to stdout, are "cvs diff" and "cvs rdiff". So 572 I'm not going to worry about this too much yet. */ 573 setmode (STDOUT_FILENO, O_BINARY); 574 # else 575 if (out == NULL) 576 error (0, 0, "warning: did not set stdout to binary mode"); 577 # endif 578 #endif 579 break; 580 581 default: 582 return try_help (0); 583 } 584 prev = c; 585 } 586 587 if (argc - optind != 2) 588 return try_help (argc - optind < 2 ? "missing operand" : "extra operand"); 589 590 { 591 /* 592 * We maximize first the half line width, and then the gutter width, 593 * according to the following constraints: 594 * 1. Two half lines plus a gutter must fit in a line. 595 * 2. If the half line width is nonzero: 596 * a. The gutter width is at least GUTTER_WIDTH_MINIMUM. 597 * b. If tabs are not expanded to spaces, 598 * a half line plus a gutter is an integral number of tabs, 599 * so that tabs in the right column line up. 600 */ 601 int t = tab_expand_flag ? 1 : TAB_WIDTH; 602 int off = (width + t + GUTTER_WIDTH_MINIMUM) / (2*t) * t; 603 sdiff_half_width = max (0, min (off - GUTTER_WIDTH_MINIMUM, width - off)), 604 sdiff_column2_offset = sdiff_half_width ? off : width; 605 } 606 607 if (show_c_function && output_style != OUTPUT_UNIFIED) 608 specify_style (OUTPUT_CONTEXT); 609 610 if (output_style != OUTPUT_CONTEXT && output_style != OUTPUT_UNIFIED) 611 context = 0; 612 else if (context == -1) 613 /* Default amount of context for -c. */ 614 context = 3; 615 616 if (output_style == OUTPUT_IFDEF) 617 { 618 /* Format arrays are char *, not char const *, 619 because integer formats are temporarily modified. 620 But it is safe to assign a constant like "%=" to a format array, 621 since "%=" does not format any integers. */ 622 int i; 623 for (i = 0; i < sizeof (line_format) / sizeof (*line_format); i++) 624 if (!line_format[i]) 625 line_format[i] = "%l\n"; 626 if (!group_format[OLD]) 627 group_format[OLD] 628 = group_format[UNCHANGED] ? group_format[UNCHANGED] : "%<"; 629 if (!group_format[NEW]) 630 group_format[NEW] 631 = group_format[UNCHANGED] ? group_format[UNCHANGED] : "%>"; 632 if (!group_format[UNCHANGED]) 633 group_format[UNCHANGED] = "%="; 634 if (!group_format[CHANGED]) 635 group_format[CHANGED] = concat (group_format[OLD], 636 group_format[NEW], ""); 637 } 638 639 no_diff_means_no_output = 640 (output_style == OUTPUT_IFDEF ? 641 (!*group_format[UNCHANGED] 642 || (strcmp (group_format[UNCHANGED], "%=") == 0 643 && !*line_format[UNCHANGED])) 644 : output_style == OUTPUT_SDIFF ? sdiff_skip_common_lines : 1); 645 646 switch_string = option_list (argv + 1, optind - 1); 647 648 if (out == NULL) 649 outfile = stdout; 650 else 651 { 652 #if HAVE_SETMODE 653 /* A diff which is full of ^Z and such isn't going to work 654 very well in text mode. */ 655 if (binary_I_O) 656 outfile = fopen (out, "wb"); 657 else 658 #endif 659 outfile = fopen (out, "w"); 660 if (outfile == NULL) 661 { 662 perror_with_name ("could not open output file"); 663 return 2; 664 } 665 } 666 667 /* Set the jump buffer, so that diff may abort execution without 668 terminating the process. */ 669 if ((val = setjmp (diff_abort_buf)) != 0) 670 { 671 optind = optind_old; 672 if (outfile != stdout) 673 fclose (outfile); 674 return val; 675 } 676 677 val = compare_files (0, argv[optind], 0, argv[optind + 1], 0); 678 679 /* Print any messages that were saved up for last. */ 680 print_message_queue (); 681 682 free (switch_string); 683 684 optind = optind_old; 685 check_output (outfile); 686 if (outfile != stdout) 687 if (fclose (outfile) != 0) 688 perror ("close error on output file"); 689 return val; 690 } 691 692 /* Add the compiled form of regexp PATTERN to REGLIST. */ 693 694 static void 695 add_regexp (reglist, pattern) 696 struct regexp_list **reglist; 697 char const *pattern; 698 { 699 struct regexp_list *r; 700 char const *m; 701 702 r = (struct regexp_list *) xmalloc (sizeof (*r)); 703 bzero (r, sizeof (*r)); 704 r->buf.fastmap = xmalloc (256); 705 m = re_compile_pattern (pattern, strlen (pattern), &r->buf); 706 if (m != 0) 707 diff_error ("%s: %s", pattern, m); 708 709 /* Add to the start of the list, since it's easier than the end. */ 710 r->next = *reglist; 711 *reglist = r; 712 } 713 714 static int 715 try_help (reason) 716 char const *reason; 717 { 718 if (reason) 719 diff_error ("%s", reason, 0); 720 diff_error ("Try `%s --help' for more information.", diff_program_name, 0); 721 return 2; 722 } 723 724 static void 725 check_output (file) 726 FILE *file; 727 { 728 if (ferror (file) || fflush (file) != 0) 729 fatal ("write error"); 730 } 731 732 static char const * const option_help[] = { 733 "-i --ignore-case Consider upper- and lower-case to be the same.", 734 "-w --ignore-all-space Ignore all white space.", 735 "-b --ignore-space-change Ignore changes in the amount of white space.", 736 "-B --ignore-blank-lines Ignore changes whose lines are all blank.", 737 "-I RE --ignore-matching-lines=RE Ignore changes whose lines all match RE.", 738 #if HAVE_SETMODE 739 "--binary Read and write data in binary mode.", 740 #endif 741 "-a --text Treat all files as text.\n", 742 "-c -C NUM --context[=NUM] Output NUM (default 2) lines of copied context.", 743 "-u -U NUM --unified[=NUM] Output NUM (default 2) lines of unified context.", 744 " -NUM Use NUM context lines.", 745 " -L LABEL --label LABEL Use LABEL instead of file name.", 746 " -p --show-c-function Show which C function each change is in.", 747 " -F RE --show-function-line=RE Show the most recent line matching RE.", 748 "-q --brief Output only whether files differ.", 749 "-e --ed Output an ed script.", 750 "-n --rcs Output an RCS format diff.", 751 "-y --side-by-side Output in two columns.", 752 " -w NUM --width=NUM Output at most NUM (default 130) characters per line.", 753 " --left-column Output only the left column of common lines.", 754 " --suppress-common-lines Do not output common lines.", 755 "-DNAME --ifdef=NAME Output merged file to show `#ifdef NAME' diffs.", 756 "--GTYPE-group-format=GFMT Similar, but format GTYPE input groups with GFMT.", 757 "--line-format=LFMT Similar, but format all input lines with LFMT.", 758 "--LTYPE-line-format=LFMT Similar, but format LTYPE input lines with LFMT.", 759 " LTYPE is `old', `new', or `unchanged'. GTYPE is LTYPE or `changed'.", 760 " GFMT may contain:", 761 " %< lines from FILE1", 762 " %> lines from FILE2", 763 " %= lines common to FILE1 and FILE2", 764 " %[-][WIDTH][.[PREC]]{doxX}LETTER printf-style spec for LETTER", 765 " LETTERs are as follows for new group, lower case for old group:", 766 " F first line number", 767 " L last line number", 768 " N number of lines = L-F+1", 769 " E F-1", 770 " M L+1", 771 " LFMT may contain:", 772 " %L contents of line", 773 " %l contents of line, excluding any trailing newline", 774 " %[-][WIDTH][.[PREC]]{doxX}n printf-style spec for input line number", 775 " Either GFMT or LFMT may contain:", 776 " %% %", 777 " %c'C' the single character C", 778 " %c'\\OOO' the character with octal code OOO\n", 779 "-l --paginate Pass the output through `pr' to paginate it.", 780 "-t --expand-tabs Expand tabs to spaces in output.", 781 "-T --initial-tab Make tabs line up by prepending a tab.\n", 782 "-r --recursive Recursively compare any subdirectories found.", 783 "-N --new-file Treat absent files as empty.", 784 "-P --unidirectional-new-file Treat absent first files as empty.", 785 "-s --report-identical-files Report when two files are the same.", 786 "-x PAT --exclude=PAT Exclude files that match PAT.", 787 "-X FILE --exclude-from=FILE Exclude files that match any pattern in FILE.", 788 "-S FILE --starting-file=FILE Start with FILE when comparing directories.\n", 789 "--horizon-lines=NUM Keep NUM lines of the common prefix and suffix.", 790 "-d --minimal Try hard to find a smaller set of changes.", 791 "-H --speed-large-files Assume large files and many scattered small changes.\n", 792 "-v --version Output version info.", 793 "--help Output this help.", 794 0 795 }; 796 797 static void 798 usage () 799 { 800 char const * const *p; 801 802 printf ("Usage: %s [OPTION]... FILE1 FILE2\n\n", diff_program_name); 803 for (p = option_help; *p; p++) 804 printf (" %s\n", *p); 805 printf ("\nIf FILE1 or FILE2 is `-', read standard input.\n"); 806 } 807 808 static int 809 specify_format (var, value) 810 char **var; 811 char *value; 812 { 813 int err = *var ? strcmp (*var, value) : 0; 814 *var = value; 815 return err; 816 } 817 818 static void 819 specify_style (style) 820 enum output_style style; 821 { 822 if (output_style != OUTPUT_NORMAL 823 && output_style != style) 824 diff_error ("conflicting specifications of output style", 0, 0); 825 output_style = style; 826 } 827 828 static char const * 829 filetype (st) 830 struct stat const *st; 831 { 832 /* See Posix.2 section 4.17.6.1.1 and Table 5-1 for these formats. 833 To keep diagnostics grammatical, the returned string must start 834 with a consonant. */ 835 836 if (S_ISREG (st->st_mode)) 837 { 838 if (st->st_size == 0) 839 return "regular empty file"; 840 /* Posix.2 section 5.14.2 seems to suggest that we must read the file 841 and guess whether it's C, Fortran, etc., but this is somewhat useless 842 and doesn't reflect historical practice. We're allowed to guess 843 wrong, so we don't bother to read the file. */ 844 return "regular file"; 845 } 846 if (S_ISDIR (st->st_mode)) return "directory"; 847 848 /* other Posix.1 file types */ 849 #ifdef S_ISBLK 850 if (S_ISBLK (st->st_mode)) return "block special file"; 851 #endif 852 #ifdef S_ISCHR 853 if (S_ISCHR (st->st_mode)) return "character special file"; 854 #endif 855 #ifdef S_ISFIFO 856 if (S_ISFIFO (st->st_mode)) return "fifo"; 857 #endif 858 859 /* other Posix.1b file types */ 860 #ifdef S_TYPEISMQ 861 if (S_TYPEISMQ (st)) return "message queue"; 862 #endif 863 #ifdef S_TYPEISSEM 864 if (S_TYPEISSEM (st)) return "semaphore"; 865 #endif 866 #ifdef S_TYPEISSHM 867 if (S_TYPEISSHM (st)) return "shared memory object"; 868 #endif 869 870 /* other popular file types */ 871 /* S_ISLNK is impossible with `fstat' and `stat'. */ 872 #ifdef S_ISSOCK 873 if (S_ISSOCK (st->st_mode)) return "socket"; 874 #endif 875 876 return "weird file"; 877 } 878 879 /* Compare two files (or dirs) with specified names 880 DIR0/NAME0 and DIR1/NAME1, at level DEPTH in directory recursion. 881 (if DIR0 is 0, then the name is just NAME0, etc.) 882 This is self-contained; it opens the files and closes them. 883 884 Value is 0 if files are the same, 1 if different, 885 2 if there is a problem opening them. */ 886 887 static int 888 compare_files (dir0, name0, dir1, name1, depth) 889 char const *dir0, *dir1; 890 char const *name0, *name1; 891 int depth; 892 { 893 struct file_data inf[2]; 894 register int i; 895 int val; 896 int same_files; 897 int failed = 0; 898 char *free0 = 0, *free1 = 0; 899 900 /* If this is directory comparison, perhaps we have a file 901 that exists only in one of the directories. 902 If so, just print a message to that effect. */ 903 904 if (! ((name0 != 0 && name1 != 0) 905 || (unidirectional_new_file_flag && name1 != 0) 906 || entire_new_file_flag)) 907 { 908 char const *name = name0 == 0 ? name1 : name0; 909 char const *dir = name0 == 0 ? dir1 : dir0; 910 message ("Only in %s: %s\n", dir, name); 911 /* Return 1 so that diff_dirs will return 1 ("some files differ"). */ 912 return 1; 913 } 914 915 bzero (inf, sizeof (inf)); 916 917 /* Mark any nonexistent file with -1 in the desc field. */ 918 /* Mark unopened files (e.g. directories) with -2. */ 919 920 inf[0].desc = name0 == 0 ? -1 : -2; 921 inf[1].desc = name1 == 0 ? -1 : -2; 922 923 /* Now record the full name of each file, including nonexistent ones. */ 924 925 if (name0 == 0) 926 name0 = name1; 927 if (name1 == 0) 928 name1 = name0; 929 930 inf[0].name = dir0 == 0 ? name0 : (free0 = dir_file_pathname (dir0, name0)); 931 inf[1].name = dir1 == 0 ? name1 : (free1 = dir_file_pathname (dir1, name1)); 932 933 /* Stat the files. Record whether they are directories. */ 934 935 for (i = 0; i <= 1; i++) 936 { 937 if (inf[i].desc != -1) 938 { 939 int stat_result; 940 941 if (i && filename_cmp (inf[i].name, inf[0].name) == 0) 942 { 943 inf[i].stat = inf[0].stat; 944 stat_result = 0; 945 } 946 else if (strcmp (inf[i].name, "-") == 0) 947 { 948 inf[i].desc = STDIN_FILENO; 949 stat_result = fstat (STDIN_FILENO, &inf[i].stat); 950 if (stat_result == 0 && S_ISREG (inf[i].stat.st_mode)) 951 { 952 off_t pos = lseek (STDIN_FILENO, (off_t) 0, SEEK_CUR); 953 if (pos == -1) 954 stat_result = -1; 955 else 956 { 957 if (pos <= inf[i].stat.st_size) 958 inf[i].stat.st_size -= pos; 959 else 960 inf[i].stat.st_size = 0; 961 /* Posix.2 4.17.6.1.4 requires current time for stdin. */ 962 time (&inf[i].stat.st_mtime); 963 } 964 } 965 } 966 else 967 stat_result = stat (inf[i].name, &inf[i].stat); 968 969 if (stat_result != 0) 970 { 971 perror_with_name (inf[i].name); 972 failed = 1; 973 } 974 else 975 { 976 inf[i].dir_p = S_ISDIR (inf[i].stat.st_mode) && inf[i].desc != 0; 977 if (inf[1 - i].desc == -1) 978 { 979 inf[1 - i].dir_p = inf[i].dir_p; 980 inf[1 - i].stat.st_mode = inf[i].stat.st_mode; 981 } 982 } 983 } 984 } 985 986 if (! failed && depth == 0 && inf[0].dir_p != inf[1].dir_p) 987 { 988 /* If one is a directory, and it was specified in the command line, 989 use the file in that dir with the other file's basename. */ 990 991 int fnm_arg = inf[0].dir_p; 992 int dir_arg = 1 - fnm_arg; 993 char const *fnm = inf[fnm_arg].name; 994 char const *dir = inf[dir_arg].name; 995 char const *p = filename_lastdirchar (fnm); 996 char const *filename = inf[dir_arg].name 997 = dir_file_pathname (dir, p ? p + 1 : fnm); 998 999 if (strcmp (fnm, "-") == 0) 1000 fatal ("can't compare - to a directory"); 1001 1002 if (stat (filename, &inf[dir_arg].stat) != 0) 1003 { 1004 perror_with_name (filename); 1005 failed = 1; 1006 } 1007 else 1008 inf[dir_arg].dir_p = S_ISDIR (inf[dir_arg].stat.st_mode); 1009 } 1010 1011 if (failed) 1012 { 1013 1014 /* If either file should exist but does not, return 2. */ 1015 1016 val = 2; 1017 1018 } 1019 else if ((same_files = inf[0].desc != -1 && inf[1].desc != -1 1020 && 0 < same_file (&inf[0].stat, &inf[1].stat)) 1021 && no_diff_means_no_output) 1022 { 1023 /* The two named files are actually the same physical file. 1024 We know they are identical without actually reading them. */ 1025 1026 val = 0; 1027 } 1028 else if (inf[0].dir_p & inf[1].dir_p) 1029 { 1030 if (output_style == OUTPUT_IFDEF) 1031 fatal ("-D option not supported with directories"); 1032 1033 /* If both are directories, compare the files in them. */ 1034 1035 if (depth > 0 && !recursive) 1036 { 1037 /* But don't compare dir contents one level down 1038 unless -r was specified. */ 1039 message ("Common subdirectories: %s and %s\n", 1040 inf[0].name, inf[1].name); 1041 val = 0; 1042 } 1043 else 1044 { 1045 val = diff_dirs (inf, compare_files, depth); 1046 } 1047 1048 } 1049 else if ((inf[0].dir_p | inf[1].dir_p) 1050 || (depth > 0 1051 && (! S_ISREG (inf[0].stat.st_mode) 1052 || ! S_ISREG (inf[1].stat.st_mode)))) 1053 { 1054 /* Perhaps we have a subdirectory that exists only in one directory. 1055 If so, just print a message to that effect. */ 1056 1057 if (inf[0].desc == -1 || inf[1].desc == -1) 1058 { 1059 if ((inf[0].dir_p | inf[1].dir_p) 1060 && recursive 1061 && (entire_new_file_flag 1062 || (unidirectional_new_file_flag && inf[0].desc == -1))) 1063 val = diff_dirs (inf, compare_files, depth); 1064 else 1065 { 1066 char const *dir = (inf[0].desc == -1) ? dir1 : dir0; 1067 /* See Posix.2 section 4.17.6.1.1 for this format. */ 1068 message ("Only in %s: %s\n", dir, name0); 1069 val = 1; 1070 } 1071 } 1072 else 1073 { 1074 /* We have two files that are not to be compared. */ 1075 1076 /* See Posix.2 section 4.17.6.1.1 for this format. */ 1077 message5 ("File %s is a %s while file %s is a %s\n", 1078 inf[0].name, filetype (&inf[0].stat), 1079 inf[1].name, filetype (&inf[1].stat)); 1080 1081 /* This is a difference. */ 1082 val = 1; 1083 } 1084 } 1085 else if ((no_details_flag & ~ignore_some_changes) 1086 && inf[0].stat.st_size != inf[1].stat.st_size 1087 && (inf[0].desc == -1 || S_ISREG (inf[0].stat.st_mode)) 1088 && (inf[1].desc == -1 || S_ISREG (inf[1].stat.st_mode))) 1089 { 1090 message ("Files %s and %s differ\n", inf[0].name, inf[1].name); 1091 val = 1; 1092 } 1093 else 1094 { 1095 /* Both exist and neither is a directory. */ 1096 1097 /* Open the files and record their descriptors. */ 1098 1099 if (inf[0].desc == -2) 1100 if ((inf[0].desc = open (inf[0].name, O_RDONLY, 0)) < 0) 1101 { 1102 perror_with_name (inf[0].name); 1103 failed = 1; 1104 } 1105 if (inf[1].desc == -2) 1106 if (same_files) 1107 inf[1].desc = inf[0].desc; 1108 else if ((inf[1].desc = open (inf[1].name, O_RDONLY, 0)) < 0) 1109 { 1110 perror_with_name (inf[1].name); 1111 failed = 1; 1112 } 1113 1114 #if HAVE_SETMODE 1115 if (binary_I_O) 1116 for (i = 0; i <= 1; i++) 1117 if (0 <= inf[i].desc) 1118 setmode (inf[i].desc, O_BINARY); 1119 #endif 1120 1121 /* Compare the files, if no error was found. */ 1122 1123 val = failed ? 2 : diff_2_files (inf, depth); 1124 1125 /* Close the file descriptors. */ 1126 1127 if (inf[0].desc >= 0 && close (inf[0].desc) != 0) 1128 { 1129 perror_with_name (inf[0].name); 1130 val = 2; 1131 } 1132 if (inf[1].desc >= 0 && inf[0].desc != inf[1].desc 1133 && close (inf[1].desc) != 0) 1134 { 1135 perror_with_name (inf[1].name); 1136 val = 2; 1137 } 1138 } 1139 1140 /* Now the comparison has been done, if no error prevented it, 1141 and VAL is the value this function will return. */ 1142 1143 if (val == 0 && !inf[0].dir_p) 1144 { 1145 if (print_file_same_flag) 1146 message ("Files %s and %s are identical\n", 1147 inf[0].name, inf[1].name); 1148 } 1149 else 1150 fflush (outfile); 1151 1152 if (free0) 1153 free (free0); 1154 if (free1) 1155 free (free1); 1156 1157 return val; 1158 } 1159 1160 /* Initialize status variables and flag variables used in libdiff, 1161 to permit repeated calls to diff_run. */ 1162 1163 static void 1164 initialize_main (argcp, argvp) 1165 int *argcp; 1166 char ***argvp; 1167 { 1168 /* These variables really must be reset each time diff_run is called. */ 1169 output_style = OUTPUT_NORMAL; 1170 context = -1; 1171 file_label[0] = NULL; 1172 file_label[1] = NULL; 1173 diff_program_name = (*argvp)[0]; 1174 outfile = NULL; 1175 1176 /* Reset these also, just for safety's sake. (If one invocation turns 1177 on ignore_case_flag, it must be turned off before diff_run is called 1178 again. But it is possible to make many diffs before encountering 1179 such a problem. */ 1180 recursive = 0; 1181 no_discards = 0; 1182 #if HAVE_SETMODE 1183 binary_I_O = 0; 1184 #endif 1185 no_diff_means_no_output = 0; 1186 always_text_flag = 0; 1187 horizon_lines = 0; 1188 ignore_space_change_flag = 0; 1189 ignore_all_space_flag = 0; 1190 ignore_blank_lines_flag = 0; 1191 ignore_some_line_changes = 0; 1192 ignore_some_changes = 0; 1193 ignore_case_flag = 0; 1194 function_regexp_list = NULL; 1195 ignore_regexp_list = NULL; 1196 no_details_flag = 0; 1197 print_file_same_flag = 0; 1198 tab_align_flag = 0; 1199 tab_expand_flag = 0; 1200 dir_start_file = NULL; 1201 entire_new_file_flag = 0; 1202 unidirectional_new_file_flag = 0; 1203 paginate_flag = 0; 1204 bzero (group_format, sizeof (group_format)); 1205 bzero (line_format, sizeof (line_format)); 1206 sdiff_help_sdiff = 0; 1207 sdiff_left_only = 0; 1208 sdiff_skip_common_lines = 0; 1209 sdiff_half_width = 0; 1210 sdiff_column2_offset = 0; 1211 switch_string = NULL; 1212 heuristic = 0; 1213 bzero (files, sizeof (files)); 1214 } 1215