1 /* Support routines for GNU DIFF. 2 3 Copyright (C) 1988-1989, 1992-1995, 1998, 2001-2002, 2004, 2006, 2009-2010 4 Free Software Foundation, Inc. 5 6 This file is part of GNU DIFF. 7 8 This program is free software: you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation, either version 3 of the License, or 11 (at your option) any later version. 12 13 This program is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 20 21 #include "diff.h" 22 #include <dirname.h> 23 #include <error.h> 24 #include <sh-quote.h> 25 #include <xalloc.h> 26 27 char const pr_program[] = PR_PROGRAM; 28 29 /* Queue up one-line messages to be printed at the end, 30 when -l is specified. Each message is recorded with a `struct msg'. */ 31 32 struct msg 33 { 34 struct msg *next; 35 char args[1]; /* Format + 4 args, each '\0' terminated, concatenated. */ 36 }; 37 38 /* Head of the chain of queues messages. */ 39 40 static struct msg *msg_chain; 41 42 /* Tail of the chain of queues messages. */ 43 44 static struct msg **msg_chain_end = &msg_chain; 45 46 /* Use when a system call returns non-zero status. 47 NAME should normally be the file name. */ 48 49 void 50 perror_with_name (char const *name) 51 { 52 error (0, errno, "%s", name); 53 } 54 55 /* Use when a system call returns non-zero status and that is fatal. */ 56 57 void 58 pfatal_with_name (char const *name) 59 { 60 int e = errno; 61 print_message_queue (); 62 error (EXIT_TROUBLE, e, "%s", name); 63 abort (); 64 } 65 66 /* Print an error message containing MSGID, then exit. */ 67 68 void 69 fatal (char const *msgid) 70 { 71 print_message_queue (); 72 error (EXIT_TROUBLE, 0, "%s", _(msgid)); 73 abort (); 74 } 75 76 /* Like printf, except if -l in effect then save the message and print later. 77 This is used for things like "Only in ...". */ 78 79 void 80 message (char const *format_msgid, char const *arg1, char const *arg2) 81 { 82 message5 (format_msgid, arg1, arg2, 0, 0); 83 } 84 85 void 86 message5 (char const *format_msgid, char const *arg1, char const *arg2, 87 char const *arg3, char const *arg4) 88 { 89 if (paginate) 90 { 91 char *p; 92 char const *arg[5]; 93 int i; 94 size_t size[5]; 95 size_t total_size = offsetof (struct msg, args); 96 struct msg *new; 97 98 arg[0] = format_msgid; 99 arg[1] = arg1; 100 arg[2] = arg2; 101 arg[3] = arg3 ? arg3 : ""; 102 arg[4] = arg4 ? arg4 : ""; 103 104 for (i = 0; i < 5; i++) 105 total_size += size[i] = strlen (arg[i]) + 1; 106 107 new = xmalloc (total_size); 108 109 for (i = 0, p = new->args; i < 5; p += size[i++]) 110 memcpy (p, arg[i], size[i]); 111 112 *msg_chain_end = new; 113 new->next = 0; 114 msg_chain_end = &new->next; 115 } 116 else 117 { 118 if (sdiff_merge_assist) 119 putchar (' '); 120 printf (_(format_msgid), arg1, arg2, arg3, arg4); 121 } 122 } 123 124 /* Output all the messages that were saved up by calls to `message'. */ 125 126 void 127 print_message_queue (void) 128 { 129 char const *arg[5]; 130 int i; 131 struct msg *m = msg_chain; 132 133 while (m) 134 { 135 struct msg *next = m->next; 136 arg[0] = m->args; 137 for (i = 0; i < 4; i++) 138 arg[i + 1] = arg[i] + strlen (arg[i]) + 1; 139 printf (_(arg[0]), arg[1], arg[2], arg[3], arg[4]); 140 free (m); 141 m = next; 142 } 143 } 144 145 /* Call before outputting the results of comparing files NAME0 and NAME1 146 to set up OUTFILE, the stdio stream for the output to go to. 147 148 Usually, OUTFILE is just stdout. But when -l was specified 149 we fork off a `pr' and make OUTFILE a pipe to it. 150 `pr' then outputs to our stdout. */ 151 152 static char const *current_name0; 153 static char const *current_name1; 154 static bool currently_recursive; 155 156 void 157 setup_output (char const *name0, char const *name1, bool recursive) 158 { 159 current_name0 = name0; 160 current_name1 = name1; 161 currently_recursive = recursive; 162 outfile = 0; 163 } 164 165 #if HAVE_WORKING_FORK || HAVE_WORKING_VFORK 166 static pid_t pr_pid; 167 #endif 168 169 void 170 begin_output (void) 171 { 172 char *name; 173 174 if (outfile != 0) 175 return; 176 177 /* Construct the header of this piece of diff. */ 178 name = xmalloc (strlen (current_name0) + strlen (current_name1) 179 + strlen (switch_string) + 7); 180 181 /* POSIX 1003.1-2001 specifies this format. But there are some bugs in 182 the standard: it says that we must print only the last component 183 of the pathnames, and it requires two spaces after "diff" if 184 there are no options. These requirements are silly and do not 185 match historical practice. */ 186 sprintf (name, "diff%s %s %s", switch_string, current_name0, current_name1); 187 188 if (paginate) 189 { 190 if (fflush (stdout) != 0) 191 pfatal_with_name (_("write failed")); 192 193 /* Make OUTFILE a pipe to a subsidiary `pr'. */ 194 { 195 #if HAVE_WORKING_FORK || HAVE_WORKING_VFORK 196 int pipes[2]; 197 198 if (pipe (pipes) != 0) 199 pfatal_with_name ("pipe"); 200 201 pr_pid = vfork (); 202 if (pr_pid < 0) 203 pfatal_with_name ("fork"); 204 205 if (pr_pid == 0) 206 { 207 close (pipes[1]); 208 if (pipes[0] != STDIN_FILENO) 209 { 210 if (dup2 (pipes[0], STDIN_FILENO) < 0) 211 pfatal_with_name ("dup2"); 212 close (pipes[0]); 213 } 214 215 execl (pr_program, pr_program, "-h", name, (char *) 0); 216 _exit (errno == ENOENT ? 127 : 126); 217 } 218 else 219 { 220 close (pipes[0]); 221 outfile = fdopen (pipes[1], "w"); 222 if (!outfile) 223 pfatal_with_name ("fdopen"); 224 } 225 #else 226 char *command = xmalloc (sizeof pr_program - 1 + 7 227 + shell_quote_length (name) + 1); 228 char *p; 229 sprintf (command, "%s -f -h ", pr_program); 230 p = command + sizeof pr_program - 1 + 7; 231 p = shell_quote_copy (p, name); 232 *p = 0; 233 errno = 0; 234 outfile = popen (command, "w"); 235 if (!outfile) 236 pfatal_with_name (command); 237 free (command); 238 #endif 239 } 240 } 241 else 242 { 243 244 /* If -l was not specified, output the diff straight to `stdout'. */ 245 246 outfile = stdout; 247 248 /* If handling multiple files (because scanning a directory), 249 print which files the following output is about. */ 250 if (currently_recursive) 251 printf ("%s\n", name); 252 } 253 254 free (name); 255 256 /* A special header is needed at the beginning of context output. */ 257 switch (output_style) 258 { 259 case OUTPUT_CONTEXT: 260 print_context_header (files, false); 261 break; 262 263 case OUTPUT_UNIFIED: 264 print_context_header (files, true); 265 break; 266 267 default: 268 break; 269 } 270 } 271 272 /* Call after the end of output of diffs for one file. 273 Close OUTFILE and get rid of the `pr' subfork. */ 274 275 void 276 finish_output (void) 277 { 278 if (outfile != 0 && outfile != stdout) 279 { 280 int status; 281 int wstatus; 282 int werrno = 0; 283 if (ferror (outfile)) 284 fatal ("write failed"); 285 #if ! (HAVE_WORKING_FORK || HAVE_WORKING_VFORK) 286 wstatus = pclose (outfile); 287 if (wstatus == -1) 288 werrno = errno; 289 #else 290 if (fclose (outfile) != 0) 291 pfatal_with_name (_("write failed")); 292 if (waitpid (pr_pid, &wstatus, 0) < 0) 293 pfatal_with_name ("waitpid"); 294 #endif 295 status = (! werrno && WIFEXITED (wstatus) 296 ? WEXITSTATUS (wstatus) 297 : INT_MAX); 298 if (status) 299 error (EXIT_TROUBLE, werrno, 300 _(status == 126 301 ? "subsidiary program `%s' could not be invoked" 302 : status == 127 303 ? "subsidiary program `%s' not found" 304 : status == INT_MAX 305 ? "subsidiary program `%s' failed" 306 : "subsidiary program `%s' failed (exit status %d)"), 307 pr_program, status); 308 } 309 310 outfile = 0; 311 } 312 313 /* Compare two lines (typically one from each input file) 314 according to the command line options. 315 For efficiency, this is invoked only when the lines do not match exactly 316 but an option like -i might cause us to ignore the difference. 317 Return nonzero if the lines differ. */ 318 319 bool 320 lines_differ (char const *s1, char const *s2) 321 { 322 register char const *t1 = s1; 323 register char const *t2 = s2; 324 size_t column = 0; 325 326 while (1) 327 { 328 register unsigned char c1 = *t1++; 329 register unsigned char c2 = *t2++; 330 331 /* Test for exact char equality first, since it's a common case. */ 332 if (c1 != c2) 333 { 334 switch (ignore_white_space) 335 { 336 case IGNORE_ALL_SPACE: 337 /* For -w, just skip past any white space. */ 338 while (isspace (c1) && c1 != '\n') c1 = *t1++; 339 while (isspace (c2) && c2 != '\n') c2 = *t2++; 340 break; 341 342 case IGNORE_SPACE_CHANGE: 343 /* For -b, advance past any sequence of white space in 344 line 1 and consider it just one space, or nothing at 345 all if it is at the end of the line. */ 346 if (isspace (c1)) 347 { 348 while (c1 != '\n') 349 { 350 c1 = *t1++; 351 if (! isspace (c1)) 352 { 353 --t1; 354 c1 = ' '; 355 break; 356 } 357 } 358 } 359 360 /* Likewise for line 2. */ 361 if (isspace (c2)) 362 { 363 while (c2 != '\n') 364 { 365 c2 = *t2++; 366 if (! isspace (c2)) 367 { 368 --t2; 369 c2 = ' '; 370 break; 371 } 372 } 373 } 374 375 if (c1 != c2) 376 { 377 /* If we went too far when doing the simple test 378 for equality, go back to the first non-white-space 379 character in both sides and try again. */ 380 if (c2 == ' ' && c1 != '\n' 381 && s1 + 1 < t1 382 && isspace ((unsigned char) t1[-2])) 383 { 384 --t1; 385 continue; 386 } 387 if (c1 == ' ' && c2 != '\n' 388 && s2 + 1 < t2 389 && isspace ((unsigned char) t2[-2])) 390 { 391 --t2; 392 continue; 393 } 394 } 395 396 break; 397 398 case IGNORE_TAB_EXPANSION: 399 if ((c1 == ' ' && c2 == '\t') 400 || (c1 == '\t' && c2 == ' ')) 401 { 402 size_t column2 = column; 403 for (;; c1 = *t1++) 404 { 405 if (c1 == ' ') 406 column++; 407 else if (c1 == '\t') 408 column += tabsize - column % tabsize; 409 else 410 break; 411 } 412 for (;; c2 = *t2++) 413 { 414 if (c2 == ' ') 415 column2++; 416 else if (c2 == '\t') 417 column2 += tabsize - column2 % tabsize; 418 else 419 break; 420 } 421 if (column != column2) 422 return true; 423 } 424 break; 425 426 case IGNORE_NO_WHITE_SPACE: 427 break; 428 } 429 430 /* Lowercase all letters if -i is specified. */ 431 432 if (ignore_case) 433 { 434 c1 = tolower (c1); 435 c2 = tolower (c2); 436 } 437 438 if (c1 != c2) 439 break; 440 } 441 if (c1 == '\n') 442 return false; 443 444 column += c1 == '\t' ? tabsize - column % tabsize : 1; 445 } 446 447 return true; 448 } 449 450 /* Find the consecutive changes at the start of the script START. 451 Return the last link before the first gap. */ 452 453 struct change * 454 find_change (struct change *start) 455 { 456 return start; 457 } 458 459 struct change * 460 find_reverse_change (struct change *start) 461 { 462 return start; 463 } 464 465 /* Divide SCRIPT into pieces by calling HUNKFUN and 466 print each piece with PRINTFUN. 467 Both functions take one arg, an edit script. 468 469 HUNKFUN is called with the tail of the script 470 and returns the last link that belongs together with the start 471 of the tail. 472 473 PRINTFUN takes a subscript which belongs together (with a null 474 link at the end) and prints it. */ 475 476 void 477 print_script (struct change *script, 478 struct change * (*hunkfun) (struct change *), 479 void (*printfun) (struct change *)) 480 { 481 struct change *next = script; 482 483 while (next) 484 { 485 struct change *this, *end; 486 487 /* Find a set of changes that belong together. */ 488 this = next; 489 end = (*hunkfun) (next); 490 491 /* Disconnect them from the rest of the changes, 492 making them a hunk, and remember the rest for next iteration. */ 493 next = end->link; 494 end->link = 0; 495 #ifdef DEBUG 496 debug_script (this); 497 #endif 498 499 /* Print this hunk. */ 500 (*printfun) (this); 501 502 /* Reconnect the script so it will all be freed properly. */ 503 end->link = next; 504 } 505 } 506 507 /* Print the text of a single line LINE, 508 flagging it with the characters in LINE_FLAG (which say whether 509 the line is inserted, deleted, changed, etc.). LINE_FLAG must not 510 end in a blank, unless it is a single blank. */ 511 512 void 513 print_1_line (char const *line_flag, char const *const *line) 514 { 515 char const *base = line[0], *limit = line[1]; /* Help the compiler. */ 516 FILE *out = outfile; /* Help the compiler some more. */ 517 char const *flag_format = 0; 518 519 /* If -T was specified, use a Tab between the line-flag and the text. 520 Otherwise use a Space (as Unix diff does). 521 Print neither space nor tab if line-flags are empty. 522 But omit trailing blanks if requested. */ 523 524 if (line_flag && *line_flag) 525 { 526 char const *flag_format_1 = flag_format = initial_tab ? "%s\t" : "%s "; 527 char const *line_flag_1 = line_flag; 528 529 if (suppress_blank_empty && **line == '\n') 530 { 531 flag_format_1 = "%s"; 532 533 /* This hack to omit trailing blanks takes advantage of the 534 fact that the only way that LINE_FLAG can end in a blank 535 is when LINE_FLAG consists of a single blank. */ 536 line_flag_1 += *line_flag_1 == ' '; 537 } 538 539 fprintf (out, flag_format_1, line_flag_1); 540 } 541 542 output_1_line (base, limit, flag_format, line_flag); 543 544 if ((!line_flag || line_flag[0]) && limit[-1] != '\n') 545 fprintf (out, "\n\\ %s\n", _("No newline at end of file")); 546 } 547 548 /* Output a line from BASE up to LIMIT. 549 With -t, expand white space characters to spaces, and if FLAG_FORMAT 550 is nonzero, output it with argument LINE_FLAG after every 551 internal carriage return, so that tab stops continue to line up. */ 552 553 void 554 output_1_line (char const *base, char const *limit, char const *flag_format, 555 char const *line_flag) 556 { 557 if (!expand_tabs) 558 fwrite (base, sizeof (char), limit - base, outfile); 559 else 560 { 561 register FILE *out = outfile; 562 register unsigned char c; 563 register char const *t = base; 564 register size_t column = 0; 565 size_t tab_size = tabsize; 566 567 while (t < limit) 568 switch ((c = *t++)) 569 { 570 case '\t': 571 { 572 size_t spaces = tab_size - column % tab_size; 573 column += spaces; 574 do 575 putc (' ', out); 576 while (--spaces); 577 } 578 break; 579 580 case '\r': 581 putc (c, out); 582 if (flag_format && t < limit && *t != '\n') 583 fprintf (out, flag_format, line_flag); 584 column = 0; 585 break; 586 587 case '\b': 588 if (column == 0) 589 continue; 590 column--; 591 putc (c, out); 592 break; 593 594 default: 595 column += isprint (c) != 0; 596 putc (c, out); 597 break; 598 } 599 } 600 } 601 602 char const change_letter[] = { 0, 'd', 'a', 'c' }; 603 604 /* Translate an internal line number (an index into diff's table of lines) 605 into an actual line number in the input file. 606 The internal line number is I. FILE points to the data on the file. 607 608 Internal line numbers count from 0 starting after the prefix. 609 Actual line numbers count from 1 within the entire file. */ 610 611 lin 612 translate_line_number (struct file_data const *file, lin i) 613 { 614 return i + file->prefix_lines + 1; 615 } 616 617 /* Translate a line number range. This is always done for printing, 618 so for convenience translate to long int rather than lin, so that the 619 caller can use printf with "%ld" without casting. */ 620 621 void 622 translate_range (struct file_data const *file, 623 lin a, lin b, 624 long int *aptr, long int *bptr) 625 { 626 *aptr = translate_line_number (file, a - 1) + 1; 627 *bptr = translate_line_number (file, b + 1) - 1; 628 } 629 630 /* Print a pair of line numbers with SEPCHAR, translated for file FILE. 631 If the two numbers are identical, print just one number. 632 633 Args A and B are internal line numbers. 634 We print the translated (real) line numbers. */ 635 636 void 637 print_number_range (char sepchar, struct file_data *file, lin a, lin b) 638 { 639 long int trans_a, trans_b; 640 translate_range (file, a, b, &trans_a, &trans_b); 641 642 /* Note: we can have B < A in the case of a range of no lines. 643 In this case, we should print the line number before the range, 644 which is B. */ 645 if (trans_b > trans_a) 646 fprintf (outfile, "%ld%c%ld", trans_a, sepchar, trans_b); 647 else 648 fprintf (outfile, "%ld", trans_b); 649 } 650 651 /* Look at a hunk of edit script and report the range of lines in each file 652 that it applies to. HUNK is the start of the hunk, which is a chain 653 of `struct change'. The first and last line numbers of file 0 are stored in 654 *FIRST0 and *LAST0, and likewise for file 1 in *FIRST1 and *LAST1. 655 Note that these are internal line numbers that count from 0. 656 657 If no lines from file 0 are deleted, then FIRST0 is LAST0+1. 658 659 Return UNCHANGED if only ignorable lines are inserted or deleted, 660 OLD if lines of file 0 are deleted, 661 NEW if lines of file 1 are inserted, 662 and CHANGED if both kinds of changes are found. */ 663 664 enum changes 665 analyze_hunk (struct change *hunk, 666 lin *first0, lin *last0, 667 lin *first1, lin *last1) 668 { 669 struct change *next; 670 lin l0, l1; 671 lin show_from, show_to; 672 lin i; 673 bool trivial = ignore_blank_lines || ignore_regexp.fastmap; 674 size_t trivial_length = ignore_blank_lines - 1; 675 /* If 0, ignore zero-length lines; 676 if SIZE_MAX, do not ignore lines just because of their length. */ 677 bool skip_leading_white_space = 678 (ignore_blank_lines && IGNORE_SPACE_CHANGE <= ignore_white_space); 679 680 char const * const *linbuf0 = files[0].linbuf; /* Help the compiler. */ 681 char const * const *linbuf1 = files[1].linbuf; 682 683 show_from = show_to = 0; 684 685 *first0 = hunk->line0; 686 *first1 = hunk->line1; 687 688 next = hunk; 689 do 690 { 691 l0 = next->line0 + next->deleted - 1; 692 l1 = next->line1 + next->inserted - 1; 693 show_from += next->deleted; 694 show_to += next->inserted; 695 696 for (i = next->line0; i <= l0 && trivial; i++) 697 { 698 char const *line = linbuf0[i]; 699 char const *newline = linbuf0[i + 1] - 1; 700 size_t len = newline - line; 701 char const *p = line; 702 if (skip_leading_white_space) 703 while (isspace ((unsigned char) *p) && *p != '\n') 704 p++; 705 if (newline - p != trivial_length 706 && (! ignore_regexp.fastmap 707 || re_search (&ignore_regexp, line, len, 0, len, 0) < 0)) 708 trivial = 0; 709 } 710 711 for (i = next->line1; i <= l1 && trivial; i++) 712 { 713 char const *line = linbuf1[i]; 714 char const *newline = linbuf1[i + 1] - 1; 715 size_t len = newline - line; 716 char const *p = line; 717 if (skip_leading_white_space) 718 while (isspace ((unsigned char) *p) && *p != '\n') 719 p++; 720 if (newline - p != trivial_length 721 && (! ignore_regexp.fastmap 722 || re_search (&ignore_regexp, line, len, 0, len, 0) < 0)) 723 trivial = 0; 724 } 725 } 726 while ((next = next->link) != 0); 727 728 *last0 = l0; 729 *last1 = l1; 730 731 /* If all inserted or deleted lines are ignorable, 732 tell the caller to ignore this hunk. */ 733 734 if (trivial) 735 return UNCHANGED; 736 737 return (show_from ? OLD : UNCHANGED) | (show_to ? NEW : UNCHANGED); 738 } 739 740 /* Concatenate three strings, returning a newly malloc'd string. */ 741 742 char * 743 concat (char const *s1, char const *s2, char const *s3) 744 { 745 char *new = xmalloc (strlen (s1) + strlen (s2) + strlen (s3) + 1); 746 sprintf (new, "%s%s%s", s1, s2, s3); 747 return new; 748 } 749 750 /* Yield a new block of SIZE bytes, initialized to zero. */ 751 752 void * 753 zalloc (size_t size) 754 { 755 void *p = xmalloc (size); 756 memset (p, 0, size); 757 return p; 758 } 759 760 /* Yield the newly malloc'd pathname 761 of the file in DIR whose filename is FILE. */ 762 763 char * 764 dir_file_pathname (char const *dir, char const *file) 765 { 766 char const *base = last_component (dir); 767 size_t baselen = base_len (base); 768 bool omit_slash = baselen == 0 || base[baselen - 1] == '/'; 769 return concat (dir, "/" + omit_slash, file); 770 } 771 772 void 773 debug_script (struct change *sp) 774 { 775 fflush (stdout); 776 777 for (; sp; sp = sp->link) 778 { 779 long int line0 = sp->line0; 780 long int line1 = sp->line1; 781 long int deleted = sp->deleted; 782 long int inserted = sp->inserted; 783 fprintf (stderr, "%3ld %3ld delete %ld insert %ld\n", 784 line0, line1, deleted, inserted); 785 } 786 787 fflush (stderr); 788 } 789