1 /* Analyze file differences for GNU DIFF. 2 Copyright (C) 1988, 1989, 1992, 1993, 1997 Free Software Foundation, Inc. 3 4 This file is part of GNU DIFF. 5 6 GNU DIFF is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 GNU DIFF is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 */ 17 18 /* The basic algorithm is described in: 19 "An O(ND) Difference Algorithm and its Variations", Eugene Myers, 20 Algorithmica Vol. 1 No. 2, 1986, pp. 251-266; 21 see especially section 4.2, which describes the variation used below. 22 Unless the --minimal option is specified, this code uses the TOO_EXPENSIVE 23 heuristic, by Paul Eggert, to limit the cost to O(N**1.5 log N) 24 at the price of producing suboptimal output for large inputs with 25 many differences. 26 27 The basic algorithm was independently discovered as described in: 28 "Algorithms for Approximate String Matching", E. Ukkonen, 29 Information and Control Vol. 64, 1985, pp. 100-118. */ 30 31 #include "diff.h" 32 #include "cmpbuf.h" 33 34 extern int no_discards; 35 36 static int *xvec, *yvec; /* Vectors being compared. */ 37 static int *fdiag; /* Vector, indexed by diagonal, containing 38 1 + the X coordinate of the point furthest 39 along the given diagonal in the forward 40 search of the edit matrix. */ 41 static int *bdiag; /* Vector, indexed by diagonal, containing 42 the X coordinate of the point furthest 43 along the given diagonal in the backward 44 search of the edit matrix. */ 45 static int too_expensive; /* Edit scripts longer than this are too 46 expensive to compute. */ 47 48 #define SNAKE_LIMIT 20 /* Snakes bigger than this are considered `big'. */ 49 50 struct partition 51 { 52 int xmid, ymid; /* Midpoints of this partition. */ 53 int lo_minimal; /* Nonzero if low half will be analyzed minimally. */ 54 int hi_minimal; /* Likewise for high half. */ 55 }; 56 57 static int diag PARAMS((int, int, int, int, int, struct partition *)); 58 static struct change *add_change PARAMS((int, int, int, int, struct change *)); 59 static struct change *build_reverse_script PARAMS((struct file_data const[])); 60 static struct change *build_script PARAMS((struct file_data const[])); 61 static void briefly_report PARAMS((int, struct file_data const[])); 62 static void compareseq PARAMS((int, int, int, int, int)); 63 static void discard_confusing_lines PARAMS((struct file_data[])); 64 static void shift_boundaries PARAMS((struct file_data[])); 65 66 /* Find the midpoint of the shortest edit script for a specified 67 portion of the two files. 68 69 Scan from the beginnings of the files, and simultaneously from the ends, 70 doing a breadth-first search through the space of edit-sequence. 71 When the two searches meet, we have found the midpoint of the shortest 72 edit sequence. 73 74 If MINIMAL is nonzero, find the minimal edit script regardless 75 of expense. Otherwise, if the search is too expensive, use 76 heuristics to stop the search and report a suboptimal answer. 77 78 Set PART->(XMID,YMID) to the midpoint (XMID,YMID). The diagonal number 79 XMID - YMID equals the number of inserted lines minus the number 80 of deleted lines (counting only lines before the midpoint). 81 Return the approximate edit cost; this is the total number of 82 lines inserted or deleted (counting only lines before the midpoint), 83 unless a heuristic is used to terminate the search prematurely. 84 85 Set PART->LEFT_MINIMAL to nonzero iff the minimal edit script for the 86 left half of the partition is known; similarly for PART->RIGHT_MINIMAL. 87 88 This function assumes that the first lines of the specified portions 89 of the two files do not match, and likewise that the last lines do not 90 match. The caller must trim matching lines from the beginning and end 91 of the portions it is going to specify. 92 93 If we return the "wrong" partitions, 94 the worst this can do is cause suboptimal diff output. 95 It cannot cause incorrect diff output. */ 96 97 static int 98 diag (xoff, xlim, yoff, ylim, minimal, part) 99 int xoff, xlim, yoff, ylim, minimal; 100 struct partition *part; 101 { 102 int *const fd = fdiag; /* Give the compiler a chance. */ 103 int *const bd = bdiag; /* Additional help for the compiler. */ 104 int const *const xv = xvec; /* Still more help for the compiler. */ 105 int const *const yv = yvec; /* And more and more . . . */ 106 int const dmin = xoff - ylim; /* Minimum valid diagonal. */ 107 int const dmax = xlim - yoff; /* Maximum valid diagonal. */ 108 int const fmid = xoff - yoff; /* Center diagonal of top-down search. */ 109 int const bmid = xlim - ylim; /* Center diagonal of bottom-up search. */ 110 int fmin = fmid, fmax = fmid; /* Limits of top-down search. */ 111 int bmin = bmid, bmax = bmid; /* Limits of bottom-up search. */ 112 int c; /* Cost. */ 113 int odd = (fmid - bmid) & 1; /* True if southeast corner is on an odd 114 diagonal with respect to the northwest. */ 115 116 fd[fmid] = xoff; 117 bd[bmid] = xlim; 118 119 for (c = 1;; ++c) 120 { 121 int d; /* Active diagonal. */ 122 int big_snake = 0; 123 124 /* Extend the top-down search by an edit step in each diagonal. */ 125 fmin > dmin ? fd[--fmin - 1] = -1 : ++fmin; 126 fmax < dmax ? fd[++fmax + 1] = -1 : --fmax; 127 for (d = fmax; d >= fmin; d -= 2) 128 { 129 int x, y, oldx, tlo = fd[d - 1], thi = fd[d + 1]; 130 131 if (tlo >= thi) 132 x = tlo + 1; 133 else 134 x = thi; 135 oldx = x; 136 y = x - d; 137 while (x < xlim && y < ylim && xv[x] == yv[y]) 138 ++x, ++y; 139 if (x - oldx > SNAKE_LIMIT) 140 big_snake = 1; 141 fd[d] = x; 142 if (odd && bmin <= d && d <= bmax && bd[d] <= x) 143 { 144 part->xmid = x; 145 part->ymid = y; 146 part->lo_minimal = part->hi_minimal = 1; 147 return 2 * c - 1; 148 } 149 } 150 151 /* Similarly extend the bottom-up search. */ 152 bmin > dmin ? bd[--bmin - 1] = INT_MAX : ++bmin; 153 bmax < dmax ? bd[++bmax + 1] = INT_MAX : --bmax; 154 for (d = bmax; d >= bmin; d -= 2) 155 { 156 int x, y, oldx, tlo = bd[d - 1], thi = bd[d + 1]; 157 158 if (tlo < thi) 159 x = tlo; 160 else 161 x = thi - 1; 162 oldx = x; 163 y = x - d; 164 while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) 165 --x, --y; 166 if (oldx - x > SNAKE_LIMIT) 167 big_snake = 1; 168 bd[d] = x; 169 if (!odd && fmin <= d && d <= fmax && x <= fd[d]) 170 { 171 part->xmid = x; 172 part->ymid = y; 173 part->lo_minimal = part->hi_minimal = 1; 174 return 2 * c; 175 } 176 } 177 178 if (minimal) 179 continue; 180 181 /* Heuristic: check occasionally for a diagonal that has made 182 lots of progress compared with the edit distance. 183 If we have any such, find the one that has made the most 184 progress and return it as if it had succeeded. 185 186 With this heuristic, for files with a constant small density 187 of changes, the algorithm is linear in the file size. */ 188 189 if (c > 200 && big_snake && heuristic) 190 { 191 int best; 192 193 best = 0; 194 for (d = fmax; d >= fmin; d -= 2) 195 { 196 int dd = d - fmid; 197 int x = fd[d]; 198 int y = x - d; 199 int v = (x - xoff) * 2 - dd; 200 if (v > 12 * (c + (dd < 0 ? -dd : dd))) 201 { 202 if (v > best 203 && xoff + SNAKE_LIMIT <= x && x < xlim 204 && yoff + SNAKE_LIMIT <= y && y < ylim) 205 { 206 /* We have a good enough best diagonal; 207 now insist that it end with a significant snake. */ 208 int k; 209 210 for (k = 1; xv[x - k] == yv[y - k]; k++) 211 if (k == SNAKE_LIMIT) 212 { 213 best = v; 214 part->xmid = x; 215 part->ymid = y; 216 break; 217 } 218 } 219 } 220 } 221 if (best > 0) 222 { 223 part->lo_minimal = 1; 224 part->hi_minimal = 0; 225 return 2 * c - 1; 226 } 227 228 best = 0; 229 for (d = bmax; d >= bmin; d -= 2) 230 { 231 int dd = d - bmid; 232 int x = bd[d]; 233 int y = x - d; 234 int v = (xlim - x) * 2 + dd; 235 if (v > 12 * (c + (dd < 0 ? -dd : dd))) 236 { 237 if (v > best 238 && xoff < x && x <= xlim - SNAKE_LIMIT 239 && yoff < y && y <= ylim - SNAKE_LIMIT) 240 { 241 /* We have a good enough best diagonal; 242 now insist that it end with a significant snake. */ 243 int k; 244 245 for (k = 0; xv[x + k] == yv[y + k]; k++) 246 if (k == SNAKE_LIMIT - 1) 247 { 248 best = v; 249 part->xmid = x; 250 part->ymid = y; 251 break; 252 } 253 } 254 } 255 } 256 if (best > 0) 257 { 258 part->lo_minimal = 0; 259 part->hi_minimal = 1; 260 return 2 * c - 1; 261 } 262 } 263 264 /* Heuristic: if we've gone well beyond the call of duty, 265 give up and report halfway between our best results so far. */ 266 if (c >= too_expensive) 267 { 268 int fxybest, fxbest; 269 int bxybest, bxbest; 270 271 fxbest = bxbest = 0; /* Pacify `gcc -Wall'. */ 272 273 /* Find forward diagonal that maximizes X + Y. */ 274 fxybest = -1; 275 for (d = fmax; d >= fmin; d -= 2) 276 { 277 int x = min (fd[d], xlim); 278 int y = x - d; 279 if (ylim < y) 280 x = ylim + d, y = ylim; 281 if (fxybest < x + y) 282 { 283 fxybest = x + y; 284 fxbest = x; 285 } 286 } 287 288 /* Find backward diagonal that minimizes X + Y. */ 289 bxybest = INT_MAX; 290 for (d = bmax; d >= bmin; d -= 2) 291 { 292 int x = max (xoff, bd[d]); 293 int y = x - d; 294 if (y < yoff) 295 x = yoff + d, y = yoff; 296 if (x + y < bxybest) 297 { 298 bxybest = x + y; 299 bxbest = x; 300 } 301 } 302 303 /* Use the better of the two diagonals. */ 304 if ((xlim + ylim) - bxybest < fxybest - (xoff + yoff)) 305 { 306 part->xmid = fxbest; 307 part->ymid = fxybest - fxbest; 308 part->lo_minimal = 1; 309 part->hi_minimal = 0; 310 } 311 else 312 { 313 part->xmid = bxbest; 314 part->ymid = bxybest - bxbest; 315 part->lo_minimal = 0; 316 part->hi_minimal = 1; 317 } 318 return 2 * c - 1; 319 } 320 } 321 } 322 323 /* Compare in detail contiguous subsequences of the two files 324 which are known, as a whole, to match each other. 325 326 The results are recorded in the vectors files[N].changed_flag, by 327 storing a 1 in the element for each line that is an insertion or deletion. 328 329 The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1. 330 331 Note that XLIM, YLIM are exclusive bounds. 332 All line numbers are origin-0 and discarded lines are not counted. 333 334 If MINIMAL is nonzero, find a minimal difference no matter how 335 expensive it is. */ 336 337 static void 338 compareseq (xoff, xlim, yoff, ylim, minimal) 339 int xoff, xlim, yoff, ylim, minimal; 340 { 341 int * const xv = xvec; /* Help the compiler. */ 342 int * const yv = yvec; 343 344 /* Slide down the bottom initial diagonal. */ 345 while (xoff < xlim && yoff < ylim && xv[xoff] == yv[yoff]) 346 ++xoff, ++yoff; 347 /* Slide up the top initial diagonal. */ 348 while (xlim > xoff && ylim > yoff && xv[xlim - 1] == yv[ylim - 1]) 349 --xlim, --ylim; 350 351 /* Handle simple cases. */ 352 if (xoff == xlim) 353 while (yoff < ylim) 354 files[1].changed_flag[files[1].realindexes[yoff++]] = 1; 355 else if (yoff == ylim) 356 while (xoff < xlim) 357 files[0].changed_flag[files[0].realindexes[xoff++]] = 1; 358 else 359 { 360 int c; 361 struct partition part; 362 363 /* Find a point of correspondence in the middle of the files. */ 364 365 c = diag (xoff, xlim, yoff, ylim, minimal, &part); 366 367 if (c == 1) 368 { 369 /* This should be impossible, because it implies that 370 one of the two subsequences is empty, 371 and that case was handled above without calling `diag'. 372 Let's verify that this is true. */ 373 abort (); 374 #if 0 375 /* The two subsequences differ by a single insert or delete; 376 record it and we are done. */ 377 if (part.xmid - part.ymid < xoff - yoff) 378 files[1].changed_flag[files[1].realindexes[part.ymid - 1]] = 1; 379 else 380 files[0].changed_flag[files[0].realindexes[part.xmid]] = 1; 381 #endif 382 } 383 else 384 { 385 /* Use the partitions to split this problem into subproblems. */ 386 compareseq (xoff, part.xmid, yoff, part.ymid, part.lo_minimal); 387 compareseq (part.xmid, xlim, part.ymid, ylim, part.hi_minimal); 388 } 389 } 390 } 391 392 /* Discard lines from one file that have no matches in the other file. 393 394 A line which is discarded will not be considered by the actual 395 comparison algorithm; it will be as if that line were not in the file. 396 The file's `realindexes' table maps virtual line numbers 397 (which don't count the discarded lines) into real line numbers; 398 this is how the actual comparison algorithm produces results 399 that are comprehensible when the discarded lines are counted. 400 401 When we discard a line, we also mark it as a deletion or insertion 402 so that it will be printed in the output. */ 403 404 static void 405 discard_confusing_lines (filevec) 406 struct file_data filevec[]; 407 { 408 unsigned int f, i; 409 char *discarded[2]; 410 int *equiv_count[2]; 411 int *p; 412 413 /* Allocate our results. */ 414 p = (int *) xmalloc ((filevec[0].buffered_lines + filevec[1].buffered_lines) 415 * (2 * sizeof (int))); 416 for (f = 0; f < 2; f++) 417 { 418 filevec[f].undiscarded = p; p += filevec[f].buffered_lines; 419 filevec[f].realindexes = p; p += filevec[f].buffered_lines; 420 } 421 422 /* Set up equiv_count[F][I] as the number of lines in file F 423 that fall in equivalence class I. */ 424 425 p = (int *) xmalloc (filevec[0].equiv_max * (2 * sizeof (int))); 426 equiv_count[0] = p; 427 equiv_count[1] = p + filevec[0].equiv_max; 428 bzero (p, filevec[0].equiv_max * (2 * sizeof (int))); 429 430 for (i = 0; i < filevec[0].buffered_lines; ++i) 431 ++equiv_count[0][filevec[0].equivs[i]]; 432 for (i = 0; i < filevec[1].buffered_lines; ++i) 433 ++equiv_count[1][filevec[1].equivs[i]]; 434 435 /* Set up tables of which lines are going to be discarded. */ 436 437 discarded[0] = xmalloc (sizeof (char) 438 * (filevec[0].buffered_lines 439 + filevec[1].buffered_lines)); 440 discarded[1] = discarded[0] + filevec[0].buffered_lines; 441 bzero (discarded[0], sizeof (char) * (filevec[0].buffered_lines 442 + filevec[1].buffered_lines)); 443 444 /* Mark to be discarded each line that matches no line of the other file. 445 If a line matches many lines, mark it as provisionally discardable. */ 446 447 for (f = 0; f < 2; f++) 448 { 449 unsigned int end = filevec[f].buffered_lines; 450 char *discards = discarded[f]; 451 int *counts = equiv_count[1 - f]; 452 int *equivs = filevec[f].equivs; 453 unsigned int many = 5; 454 unsigned int tem = end / 64; 455 456 /* Multiply MANY by approximate square root of number of lines. 457 That is the threshold for provisionally discardable lines. */ 458 while ((tem = tem >> 2) > 0) 459 many *= 2; 460 461 for (i = 0; i < end; i++) 462 { 463 int nmatch; 464 if (equivs[i] == 0) 465 continue; 466 nmatch = counts[equivs[i]]; 467 if (nmatch == 0) 468 discards[i] = 1; 469 else if (nmatch > many) 470 discards[i] = 2; 471 } 472 } 473 474 /* Don't really discard the provisional lines except when they occur 475 in a run of discardables, with nonprovisionals at the beginning 476 and end. */ 477 478 for (f = 0; f < 2; f++) 479 { 480 unsigned int end = filevec[f].buffered_lines; 481 register char *discards = discarded[f]; 482 483 for (i = 0; i < end; i++) 484 { 485 /* Cancel provisional discards not in middle of run of discards. */ 486 if (discards[i] == 2) 487 discards[i] = 0; 488 else if (discards[i] != 0) 489 { 490 /* We have found a nonprovisional discard. */ 491 register int j; 492 unsigned int length; 493 unsigned int provisional = 0; 494 495 /* Find end of this run of discardable lines. 496 Count how many are provisionally discardable. */ 497 for (j = i; j < end; j++) 498 { 499 if (discards[j] == 0) 500 break; 501 if (discards[j] == 2) 502 ++provisional; 503 } 504 505 /* Cancel provisional discards at end, and shrink the run. */ 506 while (j > i && discards[j - 1] == 2) 507 discards[--j] = 0, --provisional; 508 509 /* Now we have the length of a run of discardable lines 510 whose first and last are not provisional. */ 511 length = j - i; 512 513 /* If 1/4 of the lines in the run are provisional, 514 cancel discarding of all provisional lines in the run. */ 515 if (provisional * 4 > length) 516 { 517 while (j > i) 518 if (discards[--j] == 2) 519 discards[j] = 0; 520 } 521 else 522 { 523 register unsigned int consec; 524 unsigned int minimum = 1; 525 unsigned int tem = length / 4; 526 527 /* MINIMUM is approximate square root of LENGTH/4. 528 A subrun of two or more provisionals can stand 529 when LENGTH is at least 16. 530 A subrun of 4 or more can stand when LENGTH >= 64. */ 531 while ((tem = tem >> 2) > 0) 532 minimum *= 2; 533 minimum++; 534 535 /* Cancel any subrun of MINIMUM or more provisionals 536 within the larger run. */ 537 for (j = 0, consec = 0; j < length; j++) 538 if (discards[i + j] != 2) 539 consec = 0; 540 else if (minimum == ++consec) 541 /* Back up to start of subrun, to cancel it all. */ 542 j -= consec; 543 else if (minimum < consec) 544 discards[i + j] = 0; 545 546 /* Scan from beginning of run 547 until we find 3 or more nonprovisionals in a row 548 or until the first nonprovisional at least 8 lines in. 549 Until that point, cancel any provisionals. */ 550 for (j = 0, consec = 0; j < length; j++) 551 { 552 if (j >= 8 && discards[i + j] == 1) 553 break; 554 if (discards[i + j] == 2) 555 consec = 0, discards[i + j] = 0; 556 else if (discards[i + j] == 0) 557 consec = 0; 558 else 559 consec++; 560 if (consec == 3) 561 break; 562 } 563 564 /* I advances to the last line of the run. */ 565 i += length - 1; 566 567 /* Same thing, from end. */ 568 for (j = 0, consec = 0; j < length; j++) 569 { 570 if (j >= 8 && discards[i - j] == 1) 571 break; 572 if (discards[i - j] == 2) 573 consec = 0, discards[i - j] = 0; 574 else if (discards[i - j] == 0) 575 consec = 0; 576 else 577 consec++; 578 if (consec == 3) 579 break; 580 } 581 } 582 } 583 } 584 } 585 586 /* Actually discard the lines. */ 587 for (f = 0; f < 2; f++) 588 { 589 char *discards = discarded[f]; 590 unsigned int end = filevec[f].buffered_lines; 591 unsigned int j = 0; 592 for (i = 0; i < end; ++i) 593 if (no_discards || discards[i] == 0) 594 { 595 filevec[f].undiscarded[j] = filevec[f].equivs[i]; 596 filevec[f].realindexes[j++] = i; 597 } 598 else 599 filevec[f].changed_flag[i] = 1; 600 filevec[f].nondiscarded_lines = j; 601 } 602 603 free (discarded[0]); 604 free (equiv_count[0]); 605 } 606 607 /* Adjust inserts/deletes of identical lines to join changes 608 as much as possible. 609 610 We do something when a run of changed lines include a 611 line at one end and have an excluded, identical line at the other. 612 We are free to choose which identical line is included. 613 `compareseq' usually chooses the one at the beginning, 614 but usually it is cleaner to consider the following identical line 615 to be the "change". */ 616 617 int inhibit; 618 619 static void 620 shift_boundaries (filevec) 621 struct file_data filevec[]; 622 { 623 int f; 624 int inhibit_hunk_merge = horizon_lines != context; 625 626 for (f = 0; f < 2; f++) 627 { 628 char *changed = filevec[f].changed_flag; 629 char const *other_changed = filevec[1-f].changed_flag; 630 int const *equivs = filevec[f].equivs; 631 int i = 0; 632 int j = 0; 633 int i_end = filevec[f].buffered_lines; 634 635 while (1) 636 { 637 int runlength, start, corresponding; 638 639 /* Scan forwards to find beginning of another run of changes. 640 Also keep track of the corresponding point in the other file. */ 641 642 while (i < i_end && changed[i] == 0) 643 { 644 while (other_changed[j++]) 645 continue; 646 i++; 647 } 648 649 if (i == i_end) 650 break; 651 652 start = i; 653 654 /* Find the end of this run of changes. */ 655 656 while (changed[++i]) 657 continue; 658 while (other_changed[j]) 659 j++; 660 661 do 662 { 663 /* Record the length of this run of changes, so that 664 we can later determine whether the run has grown. */ 665 runlength = i - start; 666 667 if (! inhibit_hunk_merge) 668 { 669 /* Move the changed region back, so long as the 670 previous unchanged line matches the last changed one. 671 This merges with previous changed regions. */ 672 673 while (start && equivs[start - 1] == equivs[i - 1]) 674 { 675 changed[--start] = 1; 676 changed[--i] = 0; 677 while (changed[start - 1]) 678 start--; 679 while (other_changed[--j]) 680 continue; 681 } 682 } 683 684 /* Set CORRESPONDING to the end of the changed run, at the last 685 point where it corresponds to a changed run in the other file. 686 CORRESPONDING == I_END means no such point has been found. */ 687 corresponding = other_changed[j - 1] ? i : i_end; 688 689 /* Shift the changed region forward, so long as the 690 first changed line matches the following unchanged one, 691 but if INHIBIT_HUNK_MERGE is 1 do not shift if 692 this would merge with another changed region. 693 Do this second, so that if there are no merges, 694 the changed region is moved forward as far as possible. */ 695 696 while (i != i_end && equivs[start] == equivs[i] 697 && ! (inhibit_hunk_merge & other_changed[j + 1])) 698 { 699 changed[start++] = 0; 700 changed[i++] = 1; 701 while (changed[i]) 702 i++; 703 while (other_changed[++j]) 704 corresponding = i; 705 } 706 } 707 while (runlength != i - start); 708 709 /* If possible, move the fully-merged run of changes 710 back to a corresponding run in the other file. */ 711 712 while (corresponding < i) 713 { 714 changed[--start] = 1; 715 changed[--i] = 0; 716 while (other_changed[--j]) 717 continue; 718 } 719 } 720 } 721 } 722 723 /* Cons an additional entry onto the front of an edit script OLD. 724 LINE0 and LINE1 are the first affected lines in the two files (origin 0). 725 DELETED is the number of lines deleted here from file 0. 726 INSERTED is the number of lines inserted here in file 1. 727 728 If DELETED is 0 then LINE0 is the number of the line before 729 which the insertion was done; vice versa for INSERTED and LINE1. */ 730 731 static struct change * 732 add_change (line0, line1, deleted, inserted, old) 733 int line0, line1, deleted, inserted; 734 struct change *old; 735 { 736 struct change *new = (struct change *) xmalloc (sizeof (struct change)); 737 738 new->line0 = line0; 739 new->line1 = line1; 740 new->inserted = inserted; 741 new->deleted = deleted; 742 new->link = old; 743 return new; 744 } 745 746 /* Scan the tables of which lines are inserted and deleted, 747 producing an edit script in reverse order. */ 748 749 static struct change * 750 build_reverse_script (filevec) 751 struct file_data const filevec[]; 752 { 753 struct change *script = 0; 754 char *changed0 = filevec[0].changed_flag; 755 char *changed1 = filevec[1].changed_flag; 756 int len0 = filevec[0].buffered_lines; 757 int len1 = filevec[1].buffered_lines; 758 759 /* Note that changedN[len0] does exist, and contains 0. */ 760 761 int i0 = 0, i1 = 0; 762 763 while (i0 < len0 || i1 < len1) 764 { 765 if (changed0[i0] || changed1[i1]) 766 { 767 int line0 = i0, line1 = i1; 768 769 /* Find # lines changed here in each file. */ 770 while (changed0[i0]) ++i0; 771 while (changed1[i1]) ++i1; 772 773 /* Record this change. */ 774 script = add_change (line0, line1, i0 - line0, i1 - line1, script); 775 } 776 777 /* We have reached lines in the two files that match each other. */ 778 i0++, i1++; 779 } 780 781 return script; 782 } 783 784 /* Scan the tables of which lines are inserted and deleted, 785 producing an edit script in forward order. */ 786 787 static struct change * 788 build_script (filevec) 789 struct file_data const filevec[]; 790 { 791 struct change *script = 0; 792 char *changed0 = filevec[0].changed_flag; 793 char *changed1 = filevec[1].changed_flag; 794 int i0 = filevec[0].buffered_lines, i1 = filevec[1].buffered_lines; 795 796 /* Note that changedN[-1] does exist, and contains 0. */ 797 798 while (i0 >= 0 || i1 >= 0) 799 { 800 if (changed0[i0 - 1] || changed1[i1 - 1]) 801 { 802 int line0 = i0, line1 = i1; 803 804 /* Find # lines changed here in each file. */ 805 while (changed0[i0 - 1]) --i0; 806 while (changed1[i1 - 1]) --i1; 807 808 /* Record this change. */ 809 script = add_change (i0, i1, line0 - i0, line1 - i1, script); 810 } 811 812 /* We have reached lines in the two files that match each other. */ 813 i0--, i1--; 814 } 815 816 return script; 817 } 818 819 /* If CHANGES, briefly report that two files differed. */ 820 static void 821 briefly_report (changes, filevec) 822 int changes; 823 struct file_data const filevec[]; 824 { 825 if (changes) 826 message (no_details_flag ? "Files %s and %s differ\n" 827 : "Binary files %s and %s differ\n", 828 filevec[0].name, filevec[1].name); 829 } 830 831 /* Report the differences of two files. DEPTH is the current directory 832 depth. */ 833 int 834 diff_2_files (filevec, depth) 835 struct file_data filevec[]; 836 int depth; 837 { 838 int diags; 839 int i; 840 struct change *e, *p; 841 struct change *script; 842 int changes; 843 844 845 /* If we have detected that either file is binary, 846 compare the two files as binary. This can happen 847 only when the first chunk is read. 848 Also, --brief without any --ignore-* options means 849 we can speed things up by treating the files as binary. */ 850 851 if (read_files (filevec, no_details_flag & ~ignore_some_changes)) 852 { 853 /* Files with different lengths must be different. */ 854 if (filevec[0].stat.st_size != filevec[1].stat.st_size 855 && (filevec[0].desc < 0 || S_ISREG (filevec[0].stat.st_mode)) 856 && (filevec[1].desc < 0 || S_ISREG (filevec[1].stat.st_mode))) 857 changes = 1; 858 859 /* Standard input equals itself. */ 860 else if (filevec[0].desc == filevec[1].desc) 861 changes = 0; 862 863 else 864 /* Scan both files, a buffer at a time, looking for a difference. */ 865 { 866 /* Allocate same-sized buffers for both files. */ 867 size_t buffer_size = buffer_lcm (STAT_BLOCKSIZE (filevec[0].stat), 868 STAT_BLOCKSIZE (filevec[1].stat)); 869 for (i = 0; i < 2; i++) 870 filevec[i].buffer = xrealloc (filevec[i].buffer, buffer_size); 871 872 for (;; filevec[0].buffered_chars = filevec[1].buffered_chars = 0) 873 { 874 /* Read a buffer's worth from both files. */ 875 for (i = 0; i < 2; i++) 876 if (0 <= filevec[i].desc) 877 while (filevec[i].buffered_chars != buffer_size) 878 { 879 int r = read (filevec[i].desc, 880 filevec[i].buffer 881 + filevec[i].buffered_chars, 882 buffer_size - filevec[i].buffered_chars); 883 if (r == 0) 884 break; 885 if (r < 0) 886 pfatal_with_name (filevec[i].name); 887 filevec[i].buffered_chars += r; 888 } 889 890 /* If the buffers differ, the files differ. */ 891 if (filevec[0].buffered_chars != filevec[1].buffered_chars 892 || (filevec[0].buffered_chars != 0 893 && memcmp (filevec[0].buffer, 894 filevec[1].buffer, 895 filevec[0].buffered_chars) != 0)) 896 { 897 changes = 1; 898 break; 899 } 900 901 /* If we reach end of file, the files are the same. */ 902 if (filevec[0].buffered_chars != buffer_size) 903 { 904 changes = 0; 905 break; 906 } 907 } 908 } 909 910 briefly_report (changes, filevec); 911 } 912 else 913 { 914 /* Allocate vectors for the results of comparison: 915 a flag for each line of each file, saying whether that line 916 is an insertion or deletion. 917 Allocate an extra element, always zero, at each end of each vector. */ 918 919 size_t s = filevec[0].buffered_lines + filevec[1].buffered_lines + 4; 920 filevec[0].changed_flag = xmalloc (s); 921 bzero (filevec[0].changed_flag, s); 922 filevec[0].changed_flag++; 923 filevec[1].changed_flag = filevec[0].changed_flag 924 + filevec[0].buffered_lines + 2; 925 926 /* Some lines are obviously insertions or deletions 927 because they don't match anything. Detect them now, and 928 avoid even thinking about them in the main comparison algorithm. */ 929 930 discard_confusing_lines (filevec); 931 932 /* Now do the main comparison algorithm, considering just the 933 undiscarded lines. */ 934 935 xvec = filevec[0].undiscarded; 936 yvec = filevec[1].undiscarded; 937 diags = filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines + 3; 938 fdiag = (int *) xmalloc (diags * (2 * sizeof (int))); 939 bdiag = fdiag + diags; 940 fdiag += filevec[1].nondiscarded_lines + 1; 941 bdiag += filevec[1].nondiscarded_lines + 1; 942 943 /* Set TOO_EXPENSIVE to be approximate square root of input size, 944 bounded below by 256. */ 945 too_expensive = 1; 946 for (i = filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines; 947 i != 0; i >>= 2) 948 too_expensive <<= 1; 949 too_expensive = max (256, too_expensive); 950 951 files[0] = filevec[0]; 952 files[1] = filevec[1]; 953 954 compareseq (0, filevec[0].nondiscarded_lines, 955 0, filevec[1].nondiscarded_lines, no_discards); 956 957 free (fdiag - (filevec[1].nondiscarded_lines + 1)); 958 959 /* Modify the results slightly to make them prettier 960 in cases where that can validly be done. */ 961 962 shift_boundaries (filevec); 963 964 /* Get the results of comparison in the form of a chain 965 of `struct change's -- an edit script. */ 966 967 if (output_style == OUTPUT_ED) 968 script = build_reverse_script (filevec); 969 else 970 script = build_script (filevec); 971 972 /* Set CHANGES if we had any diffs. 973 If some changes are ignored, we must scan the script to decide. */ 974 if (ignore_blank_lines_flag || ignore_regexp_list) 975 { 976 struct change *next = script; 977 changes = 0; 978 979 while (next && changes == 0) 980 { 981 struct change *this, *end; 982 int first0, last0, first1, last1, deletes, inserts; 983 984 /* Find a set of changes that belong together. */ 985 this = next; 986 end = find_change (next); 987 988 /* Disconnect them from the rest of the changes, making them 989 a hunk, and remember the rest for next iteration. */ 990 next = end->link; 991 end->link = 0; 992 993 /* Determine whether this hunk is really a difference. */ 994 analyze_hunk (this, &first0, &last0, &first1, &last1, 995 &deletes, &inserts); 996 997 /* Reconnect the script so it will all be freed properly. */ 998 end->link = next; 999 1000 if (deletes || inserts) 1001 changes = 1; 1002 } 1003 } 1004 else 1005 changes = (script != 0); 1006 1007 if (no_details_flag) 1008 briefly_report (changes, filevec); 1009 else 1010 { 1011 if (changes || ! no_diff_means_no_output) 1012 { 1013 /* Record info for starting up output, 1014 to be used if and when we have some output to print. */ 1015 setup_output (files[0].name, files[1].name, depth); 1016 1017 switch (output_style) 1018 { 1019 case OUTPUT_CONTEXT: 1020 print_context_script (script, 0); 1021 break; 1022 1023 case OUTPUT_UNIFIED: 1024 print_context_script (script, 1); 1025 break; 1026 1027 case OUTPUT_ED: 1028 print_ed_script (script); 1029 break; 1030 1031 case OUTPUT_FORWARD_ED: 1032 pr_forward_ed_script (script); 1033 break; 1034 1035 case OUTPUT_RCS: 1036 print_rcs_script (script); 1037 break; 1038 1039 case OUTPUT_NORMAL: 1040 print_normal_script (script); 1041 break; 1042 1043 case OUTPUT_IFDEF: 1044 print_ifdef_script (script); 1045 break; 1046 1047 case OUTPUT_SDIFF: 1048 print_sdiff_script (script); 1049 } 1050 1051 finish_output (); 1052 } 1053 } 1054 1055 free (filevec[0].undiscarded); 1056 1057 free (filevec[0].changed_flag - 1); 1058 1059 for (i = 1; i >= 0; --i) 1060 free (filevec[i].equivs); 1061 1062 for (i = 0; i < 2; ++i) 1063 free (filevec[i].linbuf + filevec[i].linbuf_base); 1064 1065 for (e = script; e; e = p) 1066 { 1067 p = e->link; 1068 free (e); 1069 } 1070 1071 if (! ROBUST_OUTPUT_STYLE (output_style)) 1072 for (i = 0; i < 2; ++i) 1073 if (filevec[i].missing_newline) 1074 { 1075 diff_error ("No newline at end of file %s", filevec[i].name, ""); 1076 changes = 2; 1077 } 1078 } 1079 1080 if (filevec[0].buffer != filevec[1].buffer) 1081 free (filevec[0].buffer); 1082 free (filevec[1].buffer); 1083 1084 return changes; 1085 } 1086