1 /* $NetBSD: util.c,v 1.2 2020/12/13 00:04:40 roy Exp $ */ 2 3 /* Support routines for GNU DIFF. 4 5 Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002 6 Free Software Foundation, Inc. 7 8 This file is part of GNU DIFF. 9 10 GNU DIFF is free software; you can redistribute it and/or modify 11 it under the terms of the GNU General Public License as published by 12 the Free Software Foundation; either version 2, or (at your option) 13 any later version. 14 15 GNU DIFF is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 GNU General Public License for more details. 19 20 You should have received a copy of the GNU General Public License 21 along with this program; see the file COPYING. 22 If not, write to the Free Software Foundation, 23 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 24 25 #include "diff.h" 26 #include <dirname.h> 27 #include <error.h> 28 #include <quotesys.h> 29 #include <regex.h> 30 #include <xalloc.h> 31 32 char const pr_program[] = PR_PROGRAM; 33 34 /* Queue up one-line messages to be printed at the end, 35 when -l is specified. Each message is recorded with a `struct msg'. */ 36 37 struct msg 38 { 39 struct msg *next; 40 char args[1]; /* Format + 4 args, each '\0' terminated, concatenated. */ 41 }; 42 43 /* Head of the chain of queues messages. */ 44 45 static struct msg *msg_chain; 46 47 /* Tail of the chain of queues messages. */ 48 49 static struct msg **msg_chain_end = &msg_chain; 50 51 /* Use when a system call returns non-zero status. 52 NAME should normally be the file name. */ 53 54 void 55 perror_with_name (char const *name) 56 { 57 error (0, errno, "%s", name); 58 } 59 60 /* Use when a system call returns non-zero status and that is fatal. */ 61 62 void 63 pfatal_with_name (char const *name) 64 { 65 int e = errno; 66 print_message_queue (); 67 error (EXIT_TROUBLE, e, "%s", name); 68 abort (); 69 } 70 71 /* Print an error message containing MSGID, then exit. */ 72 73 void 74 fatal (char const *msgid) 75 { 76 print_message_queue (); 77 error (EXIT_TROUBLE, 0, "%s", _(msgid)); 78 abort (); 79 } 80 81 /* Like printf, except if -l in effect then save the message and print later. 82 This is used for things like "Only in ...". */ 83 84 void 85 message (char const *format_msgid, char const *arg1, char const *arg2) 86 { 87 message5 (format_msgid, arg1, arg2, 0, 0); 88 } 89 90 void 91 message5 (char const *format_msgid, char const *arg1, char const *arg2, 92 char const *arg3, char const *arg4) 93 { 94 if (paginate) 95 { 96 char *p; 97 char const *arg[5]; 98 int i; 99 size_t size[5]; 100 size_t total_size = offsetof (struct msg, args); 101 struct msg *new; 102 103 arg[0] = format_msgid; 104 arg[1] = arg1; 105 arg[2] = arg2; 106 arg[3] = arg3 ? arg3 : ""; 107 arg[4] = arg4 ? arg4 : ""; 108 109 for (i = 0; i < 5; i++) 110 total_size += size[i] = strlen (arg[i]) + 1; 111 112 new = xmalloc (total_size); 113 114 for (i = 0, p = new->args; i < 5; p += size[i++]) 115 memcpy (p, arg[i], size[i]); 116 117 *msg_chain_end = new; 118 new->next = 0; 119 msg_chain_end = &new->next; 120 } 121 else 122 { 123 if (sdiff_merge_assist) 124 putchar (' '); 125 printf (_(format_msgid), arg1, arg2, arg3, arg4); 126 } 127 } 128 129 /* Output all the messages that were saved up by calls to `message'. */ 130 131 void 132 print_message_queue (void) 133 { 134 char const *arg[5]; 135 int i; 136 struct msg *m = msg_chain; 137 138 while (m) 139 { 140 struct msg *next = m->next; 141 arg[0] = m->args; 142 for (i = 0; i < 4; i++) 143 arg[i + 1] = arg[i] + strlen (arg[i]) + 1; 144 printf (_(arg[0]), arg[1], arg[2], arg[3], arg[4]); 145 free (m); 146 m = next; 147 } 148 } 149 150 /* Call before outputting the results of comparing files NAME0 and NAME1 151 to set up OUTFILE, the stdio stream for the output to go to. 152 153 Usually, OUTFILE is just stdout. But when -l was specified 154 we fork off a `pr' and make OUTFILE a pipe to it. 155 `pr' then outputs to our stdout. */ 156 157 static char const *current_name0; 158 static char const *current_name1; 159 static bool currently_recursive; 160 161 void 162 setup_output (char const *name0, char const *name1, bool recursive) 163 { 164 current_name0 = name0; 165 current_name1 = name1; 166 currently_recursive = recursive; 167 outfile = 0; 168 } 169 170 #if HAVE_WORKING_FORK || HAVE_WORKING_VFORK 171 static pid_t pr_pid; 172 #endif 173 174 void 175 begin_output (void) 176 { 177 char *name; 178 179 if (outfile != 0) 180 return; 181 182 /* Construct the header of this piece of diff. */ 183 name = xmalloc (strlen (current_name0) + strlen (current_name1) 184 + strlen (switch_string) + 7); 185 186 /* POSIX 1003.1-2001 specifies this format. But there are some bugs in 187 the standard: it says that we must print only the last component 188 of the pathnames, and it requires two spaces after "diff" if 189 there are no options. These requirements are silly and do not 190 match historical practice. */ 191 sprintf (name, "diff%s %s %s", switch_string, current_name0, current_name1); 192 193 if (paginate) 194 { 195 if (fflush (stdout) != 0) 196 pfatal_with_name (_("write failed")); 197 198 /* Make OUTFILE a pipe to a subsidiary `pr'. */ 199 { 200 #if HAVE_WORKING_FORK || HAVE_WORKING_VFORK 201 int pipes[2]; 202 203 if (pipe (pipes) != 0) 204 pfatal_with_name ("pipe"); 205 206 pr_pid = vfork (); 207 if (pr_pid < 0) 208 pfatal_with_name ("fork"); 209 210 if (pr_pid == 0) 211 { 212 close (pipes[1]); 213 if (pipes[0] != STDIN_FILENO) 214 { 215 if (dup2 (pipes[0], STDIN_FILENO) < 0) 216 pfatal_with_name ("dup2"); 217 close (pipes[0]); 218 } 219 220 execl (pr_program, pr_program, "-h", name, NULL); 221 _exit (errno == ENOEXEC ? 126 : 127); 222 } 223 else 224 { 225 close (pipes[0]); 226 outfile = fdopen (pipes[1], "w"); 227 if (!outfile) 228 pfatal_with_name ("fdopen"); 229 } 230 #else 231 char *command = xmalloc (sizeof pr_program - 1 + 7 232 + quote_system_arg ((char *) 0, name) + 1); 233 char *p; 234 sprintf (command, "%s -f -h ", pr_program); 235 p = command + sizeof pr_program - 1 + 7; 236 p += quote_system_arg (p, name); 237 *p = 0; 238 errno = 0; 239 outfile = popen (command, "w"); 240 if (!outfile) 241 pfatal_with_name (command); 242 free (command); 243 #endif 244 } 245 } 246 else 247 { 248 249 /* If -l was not specified, output the diff straight to `stdout'. */ 250 251 outfile = stdout; 252 253 /* If handling multiple files (because scanning a directory), 254 print which files the following output is about. */ 255 if (currently_recursive) 256 printf ("%s\n", name); 257 } 258 259 free (name); 260 261 /* A special header is needed at the beginning of context output. */ 262 switch (output_style) 263 { 264 case OUTPUT_CONTEXT: 265 print_context_header (files, 0); 266 break; 267 268 case OUTPUT_UNIFIED: 269 print_context_header (files, 1); 270 break; 271 272 default: 273 break; 274 } 275 } 276 277 /* Call after the end of output of diffs for one file. 278 Close OUTFILE and get rid of the `pr' subfork. */ 279 280 void 281 finish_output (void) 282 { 283 if (outfile != 0 && outfile != stdout) 284 { 285 int wstatus; 286 int werrno = 0; 287 if (ferror (outfile)) 288 fatal ("write failed"); 289 #if ! (HAVE_WORKING_FORK || HAVE_WORKING_VFORK) 290 wstatus = pclose (outfile); 291 if (wstatus == -1) 292 werrno = errno; 293 #else 294 if (fclose (outfile) != 0) 295 pfatal_with_name (_("write failed")); 296 if (waitpid (pr_pid, &wstatus, 0) < 0) 297 pfatal_with_name ("waitpid"); 298 #endif 299 if (! werrno && WIFEXITED (wstatus) && WEXITSTATUS (wstatus) == 127) 300 error (EXIT_TROUBLE, 0, _("subsidiary program `%s' not found"), 301 pr_program); 302 if (wstatus != 0) 303 error (EXIT_TROUBLE, werrno, _("subsidiary program `%s' failed"), 304 pr_program); 305 } 306 307 outfile = 0; 308 } 309 310 /* Compare two lines (typically one from each input file) 311 according to the command line options. 312 For efficiency, this is invoked only when the lines do not match exactly 313 but an option like -i might cause us to ignore the difference. 314 Return nonzero if the lines differ. */ 315 316 bool 317 lines_differ (char const *s1, char const *s2) 318 { 319 register unsigned char const *t1 = (unsigned char const *) s1; 320 register unsigned char const *t2 = (unsigned char const *) s2; 321 size_t column = 0; 322 323 while (1) 324 { 325 register unsigned char c1 = *t1++; 326 register unsigned char c2 = *t2++; 327 328 /* Test for exact char equality first, since it's a common case. */ 329 if (c1 != c2) 330 { 331 switch (ignore_white_space) 332 { 333 case IGNORE_ALL_SPACE: 334 /* For -w, just skip past any white space. */ 335 while (ISSPACE (c1) && c1 != '\n') c1 = *t1++; 336 while (ISSPACE (c2) && c2 != '\n') c2 = *t2++; 337 break; 338 339 case IGNORE_SPACE_CHANGE: 340 /* For -b, advance past any sequence of white space in 341 line 1 and consider it just one space, or nothing at 342 all if it is at the end of the line. */ 343 if (ISSPACE (c1)) 344 { 345 while (c1 != '\n') 346 { 347 c1 = *t1++; 348 if (! ISSPACE (c1)) 349 { 350 --t1; 351 c1 = ' '; 352 break; 353 } 354 } 355 } 356 357 /* Likewise for line 2. */ 358 if (ISSPACE (c2)) 359 { 360 while (c2 != '\n') 361 { 362 c2 = *t2++; 363 if (! ISSPACE (c2)) 364 { 365 --t2; 366 c2 = ' '; 367 break; 368 } 369 } 370 } 371 372 if (c1 != c2) 373 { 374 /* If we went too far when doing the simple test 375 for equality, go back to the first non-white-space 376 character in both sides and try again. */ 377 if (c2 == ' ' && c1 != '\n' 378 && (unsigned char const *) s1 + 1 < t1 379 && ISSPACE (t1[-2])) 380 { 381 --t1; 382 continue; 383 } 384 if (c1 == ' ' && c2 != '\n' 385 && (unsigned char const *) s2 + 1 < t2 386 && ISSPACE (t2[-2])) 387 { 388 --t2; 389 continue; 390 } 391 } 392 393 break; 394 395 case IGNORE_TAB_EXPANSION: 396 if ((c1 == ' ' && c2 == '\t') 397 || (c1 == '\t' && c2 == ' ')) 398 { 399 size_t column2 = column; 400 for (;; c1 = *t1++) 401 { 402 if (c1 == ' ') 403 column++; 404 else if (c1 == '\t') 405 column += TAB_WIDTH - column % TAB_WIDTH; 406 else 407 break; 408 } 409 for (;; c2 = *t2++) 410 { 411 if (c2 == ' ') 412 column2++; 413 else if (c2 == '\t') 414 column2 += TAB_WIDTH - column2 % TAB_WIDTH; 415 else 416 break; 417 } 418 if (column != column2) 419 return 1; 420 } 421 break; 422 423 case IGNORE_NO_WHITE_SPACE: 424 break; 425 } 426 427 /* Lowercase all letters if -i is specified. */ 428 429 if (ignore_case) 430 { 431 c1 = TOLOWER (c1); 432 c2 = TOLOWER (c2); 433 } 434 435 if (c1 != c2) 436 break; 437 } 438 if (c1 == '\n') 439 return 0; 440 441 column += c1 == '\t' ? TAB_WIDTH - column % TAB_WIDTH : 1; 442 } 443 444 return 1; 445 } 446 447 /* Find the consecutive changes at the start of the script START. 448 Return the last link before the first gap. */ 449 450 struct change * 451 find_change (struct change *start) 452 { 453 return start; 454 } 455 456 struct change * 457 find_reverse_change (struct change *start) 458 { 459 return start; 460 } 461 462 /* Divide SCRIPT into pieces by calling HUNKFUN and 463 print each piece with PRINTFUN. 464 Both functions take one arg, an edit script. 465 466 HUNKFUN is called with the tail of the script 467 and returns the last link that belongs together with the start 468 of the tail. 469 470 PRINTFUN takes a subscript which belongs together (with a null 471 link at the end) and prints it. */ 472 473 void 474 print_script (struct change *script, 475 struct change * (*hunkfun) (struct change *), 476 void (*printfun) (struct change *)) 477 { 478 struct change *next = script; 479 480 while (next) 481 { 482 struct change *this, *end; 483 484 /* Find a set of changes that belong together. */ 485 this = next; 486 end = (*hunkfun) (next); 487 488 /* Disconnect them from the rest of the changes, 489 making them a hunk, and remember the rest for next iteration. */ 490 next = end->link; 491 end->link = 0; 492 #ifdef DEBUG 493 debug_script (this); 494 #endif 495 496 /* Print this hunk. */ 497 (*printfun) (this); 498 499 /* Reconnect the script so it will all be freed properly. */ 500 end->link = next; 501 } 502 } 503 504 /* Print the text of a single line LINE, 505 flagging it with the characters in LINE_FLAG (which say whether 506 the line is inserted, deleted, changed, etc.). */ 507 508 void 509 print_1_line (char const *line_flag, char const *const *line) 510 { 511 char const *base = line[0], *limit = line[1]; /* Help the compiler. */ 512 FILE *out = outfile; /* Help the compiler some more. */ 513 char const *flag_format = 0; 514 515 /* If -T was specified, use a Tab between the line-flag and the text. 516 Otherwise use a Space (as Unix diff does). 517 Print neither space nor tab if line-flags are empty. */ 518 519 if (line_flag && *line_flag) 520 { 521 flag_format = initial_tab ? "%s\t" : "%s "; 522 fprintf (out, flag_format, line_flag); 523 } 524 525 output_1_line (base, limit, flag_format, line_flag); 526 527 if ((!line_flag || line_flag[0]) && limit[-1] != '\n') 528 fprintf (out, "\n\\ %s\n", _("No newline at end of file")); 529 } 530 531 /* Output a line from BASE up to LIMIT. 532 With -t, expand white space characters to spaces, and if FLAG_FORMAT 533 is nonzero, output it with argument LINE_FLAG after every 534 internal carriage return, so that tab stops continue to line up. */ 535 536 void 537 output_1_line (char const *base, char const *limit, char const *flag_format, 538 char const *line_flag) 539 { 540 if (!expand_tabs) 541 fwrite (base, limit - base, 1, outfile); 542 else 543 { 544 register FILE *out = outfile; 545 register unsigned char c; 546 register char const *t = base; 547 register unsigned int column = 0; 548 549 while (t < limit) 550 switch ((c = *t++)) 551 { 552 case '\t': 553 { 554 unsigned int spaces = TAB_WIDTH - column % TAB_WIDTH; 555 column += spaces; 556 do 557 putc (' ', out); 558 while (--spaces); 559 } 560 break; 561 562 case '\r': 563 putc (c, out); 564 if (flag_format && t < limit && *t != '\n') 565 fprintf (out, flag_format, line_flag); 566 column = 0; 567 break; 568 569 case '\b': 570 if (column == 0) 571 continue; 572 column--; 573 putc (c, out); 574 break; 575 576 default: 577 if (ISPRINT (c)) 578 column++; 579 putc (c, out); 580 break; 581 } 582 } 583 } 584 585 char const change_letter[] = { 0, 'd', 'a', 'c' }; 586 587 /* Translate an internal line number (an index into diff's table of lines) 588 into an actual line number in the input file. 589 The internal line number is I. FILE points to the data on the file. 590 591 Internal line numbers count from 0 starting after the prefix. 592 Actual line numbers count from 1 within the entire file. */ 593 594 lin 595 translate_line_number (struct file_data const *file, lin i) 596 { 597 return i + file->prefix_lines + 1; 598 } 599 600 /* Translate a line number range. This is always done for printing, 601 so for convenience translate to long rather than lin, so that the 602 caller can use printf with "%ld" without casting. */ 603 604 void 605 translate_range (struct file_data const *file, 606 lin a, lin b, 607 long *aptr, long *bptr) 608 { 609 *aptr = translate_line_number (file, a - 1) + 1; 610 *bptr = translate_line_number (file, b + 1) - 1; 611 } 612 613 /* Print a pair of line numbers with SEPCHAR, translated for file FILE. 614 If the two numbers are identical, print just one number. 615 616 Args A and B are internal line numbers. 617 We print the translated (real) line numbers. */ 618 619 void 620 print_number_range (char sepchar, struct file_data *file, lin a, lin b) 621 { 622 long trans_a, trans_b; 623 translate_range (file, a, b, &trans_a, &trans_b); 624 625 /* Note: we can have B < A in the case of a range of no lines. 626 In this case, we should print the line number before the range, 627 which is B. */ 628 if (trans_b > trans_a) 629 fprintf (outfile, "%ld%c%ld", trans_a, sepchar, trans_b); 630 else 631 fprintf (outfile, "%ld", trans_b); 632 } 633 634 /* Look at a hunk of edit script and report the range of lines in each file 635 that it applies to. HUNK is the start of the hunk, which is a chain 636 of `struct change'. The first and last line numbers of file 0 are stored in 637 *FIRST0 and *LAST0, and likewise for file 1 in *FIRST1 and *LAST1. 638 Note that these are internal line numbers that count from 0. 639 640 If no lines from file 0 are deleted, then FIRST0 is LAST0+1. 641 642 Return UNCHANGED if only ignorable lines are inserted or deleted, 643 OLD if lines of file 0 are deleted, 644 NEW if lines of file 1 are inserted, 645 and CHANGED if both kinds of changes are found. */ 646 647 enum changes 648 analyze_hunk (struct change *hunk, 649 lin *first0, lin *last0, 650 lin *first1, lin *last1) 651 { 652 struct change *next; 653 lin l0, l1; 654 lin show_from, show_to; 655 lin i; 656 bool trivial = ignore_blank_lines || ignore_regexp.fastmap; 657 size_t trivial_length = (int) ignore_blank_lines - 1; 658 /* If 0, ignore zero-length lines; 659 if SIZE_MAX, do not ignore lines just because of their length. */ 660 661 char const * const *linbuf0 = files[0].linbuf; /* Help the compiler. */ 662 char const * const *linbuf1 = files[1].linbuf; 663 664 show_from = show_to = 0; 665 666 *first0 = hunk->line0; 667 *first1 = hunk->line1; 668 669 next = hunk; 670 do 671 { 672 l0 = next->line0 + next->deleted - 1; 673 l1 = next->line1 + next->inserted - 1; 674 show_from += next->deleted; 675 show_to += next->inserted; 676 677 for (i = next->line0; i <= l0 && trivial; i++) 678 { 679 char const *line = linbuf0[i]; 680 size_t len = linbuf0[i + 1] - line - 1; 681 if (len != trivial_length 682 && (! ignore_regexp.fastmap 683 || re_search (&ignore_regexp, line, len, 0, len, 0) < 0)) 684 trivial = 0; 685 } 686 687 for (i = next->line1; i <= l1 && trivial; i++) 688 { 689 char const *line = linbuf1[i]; 690 size_t len = linbuf1[i + 1] - line - 1; 691 if (len != trivial_length 692 && (! ignore_regexp.fastmap 693 || re_search (&ignore_regexp, line, len, 0, len, 0) < 0)) 694 trivial = 0; 695 } 696 } 697 while ((next = next->link) != 0); 698 699 *last0 = l0; 700 *last1 = l1; 701 702 /* If all inserted or deleted lines are ignorable, 703 tell the caller to ignore this hunk. */ 704 705 if (trivial) 706 return UNCHANGED; 707 708 return (show_from ? OLD : UNCHANGED) | (show_to ? NEW : UNCHANGED); 709 } 710 711 /* Concatenate three strings, returning a newly malloc'd string. */ 712 713 char * 714 concat (char const *s1, char const *s2, char const *s3) 715 { 716 char *new = xmalloc (strlen (s1) + strlen (s2) + strlen (s3) + 1); 717 sprintf (new, "%s%s%s", s1, s2, s3); 718 return new; 719 } 720 721 /* Yield a new block of SIZE bytes, initialized to zero. */ 722 723 void * 724 zalloc (size_t size) 725 { 726 void *p = xmalloc (size); 727 memset (p, 0, size); 728 return p; 729 } 730 731 /* Yield the newly malloc'd pathname 732 of the file in DIR whose filename is FILE. */ 733 734 char * 735 dir_file_pathname (char const *dir, char const *file) 736 { 737 char const *base = base_name (dir); 738 bool omit_slash = !*base || base[strlen (base) - 1] == '/'; 739 return concat (dir, "/" + omit_slash, file); 740 } 741 742 void 743 debug_script (struct change *sp) 744 { 745 fflush (stdout); 746 747 for (; sp; sp = sp->link) 748 { 749 long line0 = sp->line0; 750 long line1 = sp->line1; 751 long deleted = sp->deleted; 752 long inserted = sp->inserted; 753 fprintf (stderr, "%3ld %3ld delete %ld insert %ld\n", 754 line0, line1, deleted, inserted); 755 } 756 757 fflush (stderr); 758 } 759