xref: /openbsd-src/gnu/usr.bin/cvs/diff/analyze.c (revision 43c1707e6f6829177cb1974ee6615ce6c1307689)
12286d8edStholo /* Analyze file differences for GNU DIFF.
22286d8edStholo    Copyright (C) 1988, 1989, 1992, 1993, 1997 Free Software Foundation, Inc.
32286d8edStholo 
42286d8edStholo This file is part of GNU DIFF.
52286d8edStholo 
62286d8edStholo GNU DIFF is free software; you can redistribute it and/or modify
72286d8edStholo it under the terms of the GNU General Public License as published by
82286d8edStholo the Free Software Foundation; either version 2, or (at your option)
92286d8edStholo any later version.
102286d8edStholo 
112286d8edStholo GNU DIFF is distributed in the hope that it will be useful,
122286d8edStholo but WITHOUT ANY WARRANTY; without even the implied warranty of
132286d8edStholo MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
142286d8edStholo GNU General Public License for more details.
152286d8edStholo 
16c71bc7e2Stholo */
172286d8edStholo 
182286d8edStholo /* The basic algorithm is described in:
192286d8edStholo    "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
202286d8edStholo    Algorithmica Vol. 1 No. 2, 1986, pp. 251-266;
212286d8edStholo    see especially section 4.2, which describes the variation used below.
222286d8edStholo    Unless the --minimal option is specified, this code uses the TOO_EXPENSIVE
232286d8edStholo    heuristic, by Paul Eggert, to limit the cost to O(N**1.5 log N)
242286d8edStholo    at the price of producing suboptimal output for large inputs with
252286d8edStholo    many differences.
262286d8edStholo 
272286d8edStholo    The basic algorithm was independently discovered as described in:
282286d8edStholo    "Algorithms for Approximate String Matching", E. Ukkonen,
292286d8edStholo    Information and Control Vol. 64, 1985, pp. 100-118.  */
302286d8edStholo 
312286d8edStholo #include "diff.h"
322286d8edStholo #include "cmpbuf.h"
332286d8edStholo 
342286d8edStholo extern int no_discards;
352286d8edStholo 
362286d8edStholo static int *xvec, *yvec;	/* Vectors being compared. */
372286d8edStholo static int *fdiag;		/* Vector, indexed by diagonal, containing
382286d8edStholo 				   1 + the X coordinate of the point furthest
392286d8edStholo 				   along the given diagonal in the forward
402286d8edStholo 				   search of the edit matrix. */
412286d8edStholo static int *bdiag;		/* Vector, indexed by diagonal, containing
422286d8edStholo 				   the X coordinate of the point furthest
432286d8edStholo 				   along the given diagonal in the backward
442286d8edStholo 				   search of the edit matrix. */
452286d8edStholo static int too_expensive;	/* Edit scripts longer than this are too
462286d8edStholo 				   expensive to compute.  */
472286d8edStholo 
482286d8edStholo #define SNAKE_LIMIT 20	/* Snakes bigger than this are considered `big'.  */
492286d8edStholo 
502286d8edStholo struct partition
512286d8edStholo {
522286d8edStholo   int xmid, ymid;	/* Midpoints of this partition.  */
532286d8edStholo   int lo_minimal;	/* Nonzero if low half will be analyzed minimally.  */
542286d8edStholo   int hi_minimal;	/* Likewise for high half.  */
552286d8edStholo };
562286d8edStholo 
572286d8edStholo static int diag PARAMS((int, int, int, int, int, struct partition *));
582286d8edStholo static struct change *add_change PARAMS((int, int, int, int, struct change *));
592286d8edStholo static struct change *build_reverse_script PARAMS((struct file_data const[]));
602286d8edStholo static struct change *build_script PARAMS((struct file_data const[]));
612286d8edStholo static void briefly_report PARAMS((int, struct file_data const[]));
622286d8edStholo static void compareseq PARAMS((int, int, int, int, int));
632286d8edStholo static void discard_confusing_lines PARAMS((struct file_data[]));
642286d8edStholo static void shift_boundaries PARAMS((struct file_data[]));
652286d8edStholo 
662286d8edStholo /* Find the midpoint of the shortest edit script for a specified
672286d8edStholo    portion of the two files.
682286d8edStholo 
692286d8edStholo    Scan from the beginnings of the files, and simultaneously from the ends,
702286d8edStholo    doing a breadth-first search through the space of edit-sequence.
712286d8edStholo    When the two searches meet, we have found the midpoint of the shortest
722286d8edStholo    edit sequence.
732286d8edStholo 
742286d8edStholo    If MINIMAL is nonzero, find the minimal edit script regardless
752286d8edStholo    of expense.  Otherwise, if the search is too expensive, use
762286d8edStholo    heuristics to stop the search and report a suboptimal answer.
772286d8edStholo 
782286d8edStholo    Set PART->(XMID,YMID) to the midpoint (XMID,YMID).  The diagonal number
792286d8edStholo    XMID - YMID equals the number of inserted lines minus the number
802286d8edStholo    of deleted lines (counting only lines before the midpoint).
812286d8edStholo    Return the approximate edit cost; this is the total number of
822286d8edStholo    lines inserted or deleted (counting only lines before the midpoint),
832286d8edStholo    unless a heuristic is used to terminate the search prematurely.
842286d8edStholo 
852286d8edStholo    Set PART->LEFT_MINIMAL to nonzero iff the minimal edit script for the
862286d8edStholo    left half of the partition is known; similarly for PART->RIGHT_MINIMAL.
872286d8edStholo 
882286d8edStholo    This function assumes that the first lines of the specified portions
892286d8edStholo    of the two files do not match, and likewise that the last lines do not
902286d8edStholo    match.  The caller must trim matching lines from the beginning and end
912286d8edStholo    of the portions it is going to specify.
922286d8edStholo 
932286d8edStholo    If we return the "wrong" partitions,
942286d8edStholo    the worst this can do is cause suboptimal diff output.
952286d8edStholo    It cannot cause incorrect diff output.  */
962286d8edStholo 
972286d8edStholo static int
diag(xoff,xlim,yoff,ylim,minimal,part)982286d8edStholo diag (xoff, xlim, yoff, ylim, minimal, part)
992286d8edStholo      int xoff, xlim, yoff, ylim, minimal;
1002286d8edStholo      struct partition *part;
1012286d8edStholo {
1022286d8edStholo   int *const fd = fdiag;	/* Give the compiler a chance. */
1032286d8edStholo   int *const bd = bdiag;	/* Additional help for the compiler. */
1042286d8edStholo   int const *const xv = xvec;	/* Still more help for the compiler. */
1052286d8edStholo   int const *const yv = yvec;	/* And more and more . . . */
1062286d8edStholo   int const dmin = xoff - ylim;	/* Minimum valid diagonal. */
1072286d8edStholo   int const dmax = xlim - yoff;	/* Maximum valid diagonal. */
1082286d8edStholo   int const fmid = xoff - yoff;	/* Center diagonal of top-down search. */
1092286d8edStholo   int const bmid = xlim - ylim;	/* Center diagonal of bottom-up search. */
1102286d8edStholo   int fmin = fmid, fmax = fmid;	/* Limits of top-down search. */
1112286d8edStholo   int bmin = bmid, bmax = bmid;	/* Limits of bottom-up search. */
1122286d8edStholo   int c;			/* Cost. */
1132286d8edStholo   int odd = (fmid - bmid) & 1;	/* True if southeast corner is on an odd
1142286d8edStholo 				   diagonal with respect to the northwest. */
1152286d8edStholo 
1162286d8edStholo   fd[fmid] = xoff;
1172286d8edStholo   bd[bmid] = xlim;
1182286d8edStholo 
1192286d8edStholo   for (c = 1;; ++c)
1202286d8edStholo     {
1212286d8edStholo       int d;			/* Active diagonal. */
1222286d8edStholo       int big_snake = 0;
1232286d8edStholo 
1242286d8edStholo       /* Extend the top-down search by an edit step in each diagonal. */
1252286d8edStholo       fmin > dmin ? fd[--fmin - 1] = -1 : ++fmin;
1262286d8edStholo       fmax < dmax ? fd[++fmax + 1] = -1 : --fmax;
1272286d8edStholo       for (d = fmax; d >= fmin; d -= 2)
1282286d8edStholo 	{
1292286d8edStholo 	  int x, y, oldx, tlo = fd[d - 1], thi = fd[d + 1];
1302286d8edStholo 
1312286d8edStholo 	  if (tlo >= thi)
1322286d8edStholo 	    x = tlo + 1;
1332286d8edStholo 	  else
1342286d8edStholo 	    x = thi;
1352286d8edStholo 	  oldx = x;
1362286d8edStholo 	  y = x - d;
1372286d8edStholo 	  while (x < xlim && y < ylim && xv[x] == yv[y])
1382286d8edStholo 	    ++x, ++y;
1392286d8edStholo 	  if (x - oldx > SNAKE_LIMIT)
1402286d8edStholo 	    big_snake = 1;
1412286d8edStholo 	  fd[d] = x;
1422286d8edStholo 	  if (odd && bmin <= d && d <= bmax && bd[d] <= x)
1432286d8edStholo 	    {
1442286d8edStholo 	      part->xmid = x;
1452286d8edStholo 	      part->ymid = y;
1462286d8edStholo 	      part->lo_minimal = part->hi_minimal = 1;
1472286d8edStholo 	      return 2 * c - 1;
1482286d8edStholo 	    }
1492286d8edStholo 	}
1502286d8edStholo 
1512286d8edStholo       /* Similarly extend the bottom-up search.  */
1522286d8edStholo       bmin > dmin ? bd[--bmin - 1] = INT_MAX : ++bmin;
1532286d8edStholo       bmax < dmax ? bd[++bmax + 1] = INT_MAX : --bmax;
1542286d8edStholo       for (d = bmax; d >= bmin; d -= 2)
1552286d8edStholo 	{
1562286d8edStholo 	  int x, y, oldx, tlo = bd[d - 1], thi = bd[d + 1];
1572286d8edStholo 
1582286d8edStholo 	  if (tlo < thi)
1592286d8edStholo 	    x = tlo;
1602286d8edStholo 	  else
1612286d8edStholo 	    x = thi - 1;
1622286d8edStholo 	  oldx = x;
1632286d8edStholo 	  y = x - d;
1642286d8edStholo 	  while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1])
1652286d8edStholo 	    --x, --y;
1662286d8edStholo 	  if (oldx - x > SNAKE_LIMIT)
1672286d8edStholo 	    big_snake = 1;
1682286d8edStholo 	  bd[d] = x;
1692286d8edStholo 	  if (!odd && fmin <= d && d <= fmax && x <= fd[d])
1702286d8edStholo 	    {
1712286d8edStholo 	      part->xmid = x;
1722286d8edStholo 	      part->ymid = y;
1732286d8edStholo 	      part->lo_minimal = part->hi_minimal = 1;
1742286d8edStholo 	      return 2 * c;
1752286d8edStholo 	    }
1762286d8edStholo 	}
1772286d8edStholo 
1782286d8edStholo       if (minimal)
1792286d8edStholo 	continue;
1802286d8edStholo 
1812286d8edStholo       /* Heuristic: check occasionally for a diagonal that has made
1822286d8edStholo 	 lots of progress compared with the edit distance.
1832286d8edStholo 	 If we have any such, find the one that has made the most
1842286d8edStholo 	 progress and return it as if it had succeeded.
1852286d8edStholo 
1862286d8edStholo 	 With this heuristic, for files with a constant small density
1872286d8edStholo 	 of changes, the algorithm is linear in the file size.  */
1882286d8edStholo 
1892286d8edStholo       if (c > 200 && big_snake && heuristic)
1902286d8edStholo 	{
1912286d8edStholo 	  int best;
1922286d8edStholo 
1932286d8edStholo 	  best = 0;
1942286d8edStholo 	  for (d = fmax; d >= fmin; d -= 2)
1952286d8edStholo 	    {
1962286d8edStholo 	      int dd = d - fmid;
1972286d8edStholo 	      int x = fd[d];
1982286d8edStholo 	      int y = x - d;
1992286d8edStholo 	      int v = (x - xoff) * 2 - dd;
2002286d8edStholo 	      if (v > 12 * (c + (dd < 0 ? -dd : dd)))
2012286d8edStholo 		{
2022286d8edStholo 		  if (v > best
2032286d8edStholo 		      && xoff + SNAKE_LIMIT <= x && x < xlim
2042286d8edStholo 		      && yoff + SNAKE_LIMIT <= y && y < ylim)
2052286d8edStholo 		    {
2062286d8edStholo 		      /* We have a good enough best diagonal;
2072286d8edStholo 			 now insist that it end with a significant snake.  */
2082286d8edStholo 		      int k;
2092286d8edStholo 
2102286d8edStholo 		      for (k = 1; xv[x - k] == yv[y - k]; k++)
2112286d8edStholo 			if (k == SNAKE_LIMIT)
2122286d8edStholo 			  {
2132286d8edStholo 			    best = v;
2142286d8edStholo 			    part->xmid = x;
2152286d8edStholo 			    part->ymid = y;
2162286d8edStholo 			    break;
2172286d8edStholo 			  }
2182286d8edStholo 		    }
2192286d8edStholo 		}
2202286d8edStholo 	    }
2212286d8edStholo 	  if (best > 0)
2222286d8edStholo 	    {
2232286d8edStholo 	      part->lo_minimal = 1;
2242286d8edStholo 	      part->hi_minimal = 0;
2252286d8edStholo 	      return 2 * c - 1;
2262286d8edStholo 	    }
2272286d8edStholo 
2282286d8edStholo 	  best = 0;
2292286d8edStholo 	  for (d = bmax; d >= bmin; d -= 2)
2302286d8edStholo 	    {
2312286d8edStholo 	      int dd = d - bmid;
2322286d8edStholo 	      int x = bd[d];
2332286d8edStholo 	      int y = x - d;
2342286d8edStholo 	      int v = (xlim - x) * 2 + dd;
2352286d8edStholo 	      if (v > 12 * (c + (dd < 0 ? -dd : dd)))
2362286d8edStholo 		{
2372286d8edStholo 		  if (v > best
2382286d8edStholo 		      && xoff < x && x <= xlim - SNAKE_LIMIT
2392286d8edStholo 		      && yoff < y && y <= ylim - SNAKE_LIMIT)
2402286d8edStholo 		    {
2412286d8edStholo 		      /* We have a good enough best diagonal;
2422286d8edStholo 			 now insist that it end with a significant snake.  */
2432286d8edStholo 		      int k;
2442286d8edStholo 
2452286d8edStholo 		      for (k = 0; xv[x + k] == yv[y + k]; k++)
2462286d8edStholo 			if (k == SNAKE_LIMIT - 1)
2472286d8edStholo 			  {
2482286d8edStholo 			    best = v;
2492286d8edStholo 			    part->xmid = x;
2502286d8edStholo 			    part->ymid = y;
2512286d8edStholo 			    break;
2522286d8edStholo 			  }
2532286d8edStholo 		    }
2542286d8edStholo 		}
2552286d8edStholo 	    }
2562286d8edStholo 	  if (best > 0)
2572286d8edStholo 	    {
2582286d8edStholo 	      part->lo_minimal = 0;
2592286d8edStholo 	      part->hi_minimal = 1;
2602286d8edStholo 	      return 2 * c - 1;
2612286d8edStholo 	    }
2622286d8edStholo 	}
2632286d8edStholo 
2642286d8edStholo       /* Heuristic: if we've gone well beyond the call of duty,
2652286d8edStholo 	 give up and report halfway between our best results so far.  */
2662286d8edStholo       if (c >= too_expensive)
2672286d8edStholo 	{
2682286d8edStholo 	  int fxybest, fxbest;
2692286d8edStholo 	  int bxybest, bxbest;
2702286d8edStholo 
2712286d8edStholo 	  fxbest = bxbest = 0;  /* Pacify `gcc -Wall'.  */
2722286d8edStholo 
2732286d8edStholo 	  /* Find forward diagonal that maximizes X + Y.  */
2742286d8edStholo 	  fxybest = -1;
2752286d8edStholo 	  for (d = fmax; d >= fmin; d -= 2)
2762286d8edStholo 	    {
2772286d8edStholo 	      int x = min (fd[d], xlim);
2782286d8edStholo 	      int y = x - d;
2792286d8edStholo 	      if (ylim < y)
2802286d8edStholo 		x = ylim + d, y = ylim;
2812286d8edStholo 	      if (fxybest < x + y)
2822286d8edStholo 		{
2832286d8edStholo 		  fxybest = x + y;
2842286d8edStholo 		  fxbest = x;
2852286d8edStholo 		}
2862286d8edStholo 	    }
2872286d8edStholo 
2882286d8edStholo 	  /* Find backward diagonal that minimizes X + Y.  */
2892286d8edStholo 	  bxybest = INT_MAX;
2902286d8edStholo 	  for (d = bmax; d >= bmin; d -= 2)
2912286d8edStholo 	    {
2922286d8edStholo 	      int x = max (xoff, bd[d]);
2932286d8edStholo 	      int y = x - d;
2942286d8edStholo 	      if (y < yoff)
2952286d8edStholo 		x = yoff + d, y = yoff;
2962286d8edStholo 	      if (x + y < bxybest)
2972286d8edStholo 		{
2982286d8edStholo 		  bxybest = x + y;
2992286d8edStholo 		  bxbest = x;
3002286d8edStholo 		}
3012286d8edStholo 	    }
3022286d8edStholo 
3032286d8edStholo 	  /* Use the better of the two diagonals.  */
3042286d8edStholo 	  if ((xlim + ylim) - bxybest < fxybest - (xoff + yoff))
3052286d8edStholo 	    {
3062286d8edStholo 	      part->xmid = fxbest;
3072286d8edStholo 	      part->ymid = fxybest - fxbest;
3082286d8edStholo 	      part->lo_minimal = 1;
3092286d8edStholo 	      part->hi_minimal = 0;
3102286d8edStholo 	    }
3112286d8edStholo 	  else
3122286d8edStholo 	    {
3132286d8edStholo 	      part->xmid = bxbest;
3142286d8edStholo 	      part->ymid = bxybest - bxbest;
3152286d8edStholo 	      part->lo_minimal = 0;
3162286d8edStholo 	      part->hi_minimal = 1;
3172286d8edStholo 	    }
3182286d8edStholo 	  return 2 * c - 1;
3192286d8edStholo 	}
3202286d8edStholo     }
3212286d8edStholo }
3222286d8edStholo 
3232286d8edStholo /* Compare in detail contiguous subsequences of the two files
3242286d8edStholo    which are known, as a whole, to match each other.
3252286d8edStholo 
3262286d8edStholo    The results are recorded in the vectors files[N].changed_flag, by
3272286d8edStholo    storing a 1 in the element for each line that is an insertion or deletion.
3282286d8edStholo 
3292286d8edStholo    The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
3302286d8edStholo 
3312286d8edStholo    Note that XLIM, YLIM are exclusive bounds.
3322286d8edStholo    All line numbers are origin-0 and discarded lines are not counted.
3332286d8edStholo 
3342286d8edStholo    If MINIMAL is nonzero, find a minimal difference no matter how
3352286d8edStholo    expensive it is.  */
3362286d8edStholo 
3372286d8edStholo static void
compareseq(xoff,xlim,yoff,ylim,minimal)3382286d8edStholo compareseq (xoff, xlim, yoff, ylim, minimal)
3392286d8edStholo      int xoff, xlim, yoff, ylim, minimal;
3402286d8edStholo {
3412286d8edStholo   int * const xv = xvec; /* Help the compiler.  */
3422286d8edStholo   int * const yv = yvec;
3432286d8edStholo 
3442286d8edStholo   /* Slide down the bottom initial diagonal. */
3452286d8edStholo   while (xoff < xlim && yoff < ylim && xv[xoff] == yv[yoff])
3462286d8edStholo     ++xoff, ++yoff;
3472286d8edStholo   /* Slide up the top initial diagonal. */
3482286d8edStholo   while (xlim > xoff && ylim > yoff && xv[xlim - 1] == yv[ylim - 1])
3492286d8edStholo     --xlim, --ylim;
3502286d8edStholo 
3512286d8edStholo   /* Handle simple cases. */
3522286d8edStholo   if (xoff == xlim)
3532286d8edStholo     while (yoff < ylim)
3542286d8edStholo       files[1].changed_flag[files[1].realindexes[yoff++]] = 1;
3552286d8edStholo   else if (yoff == ylim)
3562286d8edStholo     while (xoff < xlim)
3572286d8edStholo       files[0].changed_flag[files[0].realindexes[xoff++]] = 1;
3582286d8edStholo   else
3592286d8edStholo     {
3602286d8edStholo       int c;
3612286d8edStholo       struct partition part;
3622286d8edStholo 
3632286d8edStholo       /* Find a point of correspondence in the middle of the files.  */
3642286d8edStholo 
3652286d8edStholo       c = diag (xoff, xlim, yoff, ylim, minimal, &part);
3662286d8edStholo 
3672286d8edStholo       if (c == 1)
3682286d8edStholo 	{
3692286d8edStholo 	  /* This should be impossible, because it implies that
3702286d8edStholo 	     one of the two subsequences is empty,
3712286d8edStholo 	     and that case was handled above without calling `diag'.
3722286d8edStholo 	     Let's verify that this is true.  */
3732286d8edStholo 	  abort ();
3742286d8edStholo #if 0
3752286d8edStholo 	  /* The two subsequences differ by a single insert or delete;
3762286d8edStholo 	     record it and we are done.  */
3772286d8edStholo 	  if (part.xmid - part.ymid < xoff - yoff)
3782286d8edStholo 	    files[1].changed_flag[files[1].realindexes[part.ymid - 1]] = 1;
3792286d8edStholo 	  else
3802286d8edStholo 	    files[0].changed_flag[files[0].realindexes[part.xmid]] = 1;
3812286d8edStholo #endif
3822286d8edStholo 	}
3832286d8edStholo       else
3842286d8edStholo 	{
3852286d8edStholo 	  /* Use the partitions to split this problem into subproblems.  */
3862286d8edStholo 	  compareseq (xoff, part.xmid, yoff, part.ymid, part.lo_minimal);
3872286d8edStholo 	  compareseq (part.xmid, xlim, part.ymid, ylim, part.hi_minimal);
3882286d8edStholo 	}
3892286d8edStholo     }
3902286d8edStholo }
3912286d8edStholo 
3922286d8edStholo /* Discard lines from one file that have no matches in the other file.
3932286d8edStholo 
3942286d8edStholo    A line which is discarded will not be considered by the actual
3952286d8edStholo    comparison algorithm; it will be as if that line were not in the file.
3962286d8edStholo    The file's `realindexes' table maps virtual line numbers
3972286d8edStholo    (which don't count the discarded lines) into real line numbers;
3982286d8edStholo    this is how the actual comparison algorithm produces results
3992286d8edStholo    that are comprehensible when the discarded lines are counted.
4002286d8edStholo 
4012286d8edStholo    When we discard a line, we also mark it as a deletion or insertion
4022286d8edStholo    so that it will be printed in the output.  */
4032286d8edStholo 
4042286d8edStholo static void
discard_confusing_lines(filevec)4052286d8edStholo discard_confusing_lines (filevec)
4062286d8edStholo      struct file_data filevec[];
4072286d8edStholo {
4082286d8edStholo   unsigned int f, i;
4092286d8edStholo   char *discarded[2];
4102286d8edStholo   int *equiv_count[2];
4112286d8edStholo   int *p;
4122286d8edStholo 
4132286d8edStholo   /* Allocate our results.  */
4142286d8edStholo   p = (int *) xmalloc ((filevec[0].buffered_lines + filevec[1].buffered_lines)
4152286d8edStholo 		       * (2 * sizeof (int)));
4162286d8edStholo   for (f = 0; f < 2; f++)
4172286d8edStholo     {
4182286d8edStholo       filevec[f].undiscarded = p;  p += filevec[f].buffered_lines;
4192286d8edStholo       filevec[f].realindexes = p;  p += filevec[f].buffered_lines;
4202286d8edStholo     }
4212286d8edStholo 
4222286d8edStholo   /* Set up equiv_count[F][I] as the number of lines in file F
4232286d8edStholo      that fall in equivalence class I.  */
4242286d8edStholo 
4252286d8edStholo   p = (int *) xmalloc (filevec[0].equiv_max * (2 * sizeof (int)));
4262286d8edStholo   equiv_count[0] = p;
4272286d8edStholo   equiv_count[1] = p + filevec[0].equiv_max;
4282286d8edStholo   bzero (p, filevec[0].equiv_max * (2 * sizeof (int)));
4292286d8edStholo 
4302286d8edStholo   for (i = 0; i < filevec[0].buffered_lines; ++i)
4312286d8edStholo     ++equiv_count[0][filevec[0].equivs[i]];
4322286d8edStholo   for (i = 0; i < filevec[1].buffered_lines; ++i)
4332286d8edStholo     ++equiv_count[1][filevec[1].equivs[i]];
4342286d8edStholo 
4352286d8edStholo   /* Set up tables of which lines are going to be discarded.  */
4362286d8edStholo 
4372286d8edStholo   discarded[0] = xmalloc (sizeof (char)
4382286d8edStholo 			  * (filevec[0].buffered_lines
4392286d8edStholo 			     + filevec[1].buffered_lines));
4402286d8edStholo   discarded[1] = discarded[0] + filevec[0].buffered_lines;
4412286d8edStholo   bzero (discarded[0], sizeof (char) * (filevec[0].buffered_lines
4422286d8edStholo 					+ filevec[1].buffered_lines));
4432286d8edStholo 
4442286d8edStholo   /* Mark to be discarded each line that matches no line of the other file.
4452286d8edStholo      If a line matches many lines, mark it as provisionally discardable.  */
4462286d8edStholo 
4472286d8edStholo   for (f = 0; f < 2; f++)
4482286d8edStholo     {
4492286d8edStholo       unsigned int end = filevec[f].buffered_lines;
4502286d8edStholo       char *discards = discarded[f];
4512286d8edStholo       int *counts = equiv_count[1 - f];
4522286d8edStholo       int *equivs = filevec[f].equivs;
4532286d8edStholo       unsigned int many = 5;
4542286d8edStholo       unsigned int tem = end / 64;
4552286d8edStholo 
4562286d8edStholo       /* Multiply MANY by approximate square root of number of lines.
4572286d8edStholo 	 That is the threshold for provisionally discardable lines.  */
4582286d8edStholo       while ((tem = tem >> 2) > 0)
4592286d8edStholo 	many *= 2;
4602286d8edStholo 
4612286d8edStholo       for (i = 0; i < end; i++)
4622286d8edStholo 	{
4632286d8edStholo 	  int nmatch;
4642286d8edStholo 	  if (equivs[i] == 0)
4652286d8edStholo 	    continue;
4662286d8edStholo 	  nmatch = counts[equivs[i]];
4672286d8edStholo 	  if (nmatch == 0)
4682286d8edStholo 	    discards[i] = 1;
4692286d8edStholo 	  else if (nmatch > many)
4702286d8edStholo 	    discards[i] = 2;
4712286d8edStholo 	}
4722286d8edStholo     }
4732286d8edStholo 
4742286d8edStholo   /* Don't really discard the provisional lines except when they occur
4752286d8edStholo      in a run of discardables, with nonprovisionals at the beginning
4762286d8edStholo      and end.  */
4772286d8edStholo 
4782286d8edStholo   for (f = 0; f < 2; f++)
4792286d8edStholo     {
4802286d8edStholo       unsigned int end = filevec[f].buffered_lines;
4812286d8edStholo       register char *discards = discarded[f];
4822286d8edStholo 
4832286d8edStholo       for (i = 0; i < end; i++)
4842286d8edStholo 	{
4852286d8edStholo 	  /* Cancel provisional discards not in middle of run of discards.  */
4862286d8edStholo 	  if (discards[i] == 2)
4872286d8edStholo 	    discards[i] = 0;
4882286d8edStholo 	  else if (discards[i] != 0)
4892286d8edStholo 	    {
4902286d8edStholo 	      /* We have found a nonprovisional discard.  */
4912286d8edStholo 	      register int j;
4922286d8edStholo 	      unsigned int length;
4932286d8edStholo 	      unsigned int provisional = 0;
4942286d8edStholo 
4952286d8edStholo 	      /* Find end of this run of discardable lines.
4962286d8edStholo 		 Count how many are provisionally discardable.  */
4972286d8edStholo 	      for (j = i; j < end; j++)
4982286d8edStholo 		{
4992286d8edStholo 		  if (discards[j] == 0)
5002286d8edStholo 		    break;
5012286d8edStholo 		  if (discards[j] == 2)
5022286d8edStholo 		    ++provisional;
5032286d8edStholo 		}
5042286d8edStholo 
5052286d8edStholo 	      /* Cancel provisional discards at end, and shrink the run.  */
5062286d8edStholo 	      while (j > i && discards[j - 1] == 2)
5072286d8edStholo 		discards[--j] = 0, --provisional;
5082286d8edStholo 
5092286d8edStholo 	      /* Now we have the length of a run of discardable lines
5102286d8edStholo 		 whose first and last are not provisional.  */
5112286d8edStholo 	      length = j - i;
5122286d8edStholo 
5132286d8edStholo 	      /* If 1/4 of the lines in the run are provisional,
5142286d8edStholo 		 cancel discarding of all provisional lines in the run.  */
5152286d8edStholo 	      if (provisional * 4 > length)
5162286d8edStholo 		{
5172286d8edStholo 		  while (j > i)
5182286d8edStholo 		    if (discards[--j] == 2)
5192286d8edStholo 		      discards[j] = 0;
5202286d8edStholo 		}
5212286d8edStholo 	      else
5222286d8edStholo 		{
5232286d8edStholo 		  register unsigned int consec;
5242286d8edStholo 		  unsigned int minimum = 1;
5252286d8edStholo 		  unsigned int tem = length / 4;
5262286d8edStholo 
5272286d8edStholo 		  /* MINIMUM is approximate square root of LENGTH/4.
5282286d8edStholo 		     A subrun of two or more provisionals can stand
5292286d8edStholo 		     when LENGTH is at least 16.
5302286d8edStholo 		     A subrun of 4 or more can stand when LENGTH >= 64.  */
5312286d8edStholo 		  while ((tem = tem >> 2) > 0)
5322286d8edStholo 		    minimum *= 2;
5332286d8edStholo 		  minimum++;
5342286d8edStholo 
5352286d8edStholo 		  /* Cancel any subrun of MINIMUM or more provisionals
5362286d8edStholo 		     within the larger run.  */
5372286d8edStholo 		  for (j = 0, consec = 0; j < length; j++)
5382286d8edStholo 		    if (discards[i + j] != 2)
5392286d8edStholo 		      consec = 0;
5402286d8edStholo 		    else if (minimum == ++consec)
5412286d8edStholo 		      /* Back up to start of subrun, to cancel it all.  */
5422286d8edStholo 		      j -= consec;
5432286d8edStholo 		    else if (minimum < consec)
5442286d8edStholo 		      discards[i + j] = 0;
5452286d8edStholo 
5462286d8edStholo 		  /* Scan from beginning of run
5472286d8edStholo 		     until we find 3 or more nonprovisionals in a row
5482286d8edStholo 		     or until the first nonprovisional at least 8 lines in.
5492286d8edStholo 		     Until that point, cancel any provisionals.  */
5502286d8edStholo 		  for (j = 0, consec = 0; j < length; j++)
5512286d8edStholo 		    {
5522286d8edStholo 		      if (j >= 8 && discards[i + j] == 1)
5532286d8edStholo 			break;
5542286d8edStholo 		      if (discards[i + j] == 2)
5552286d8edStholo 			consec = 0, discards[i + j] = 0;
5562286d8edStholo 		      else if (discards[i + j] == 0)
5572286d8edStholo 			consec = 0;
5582286d8edStholo 		      else
5592286d8edStholo 			consec++;
5602286d8edStholo 		      if (consec == 3)
5612286d8edStholo 			break;
5622286d8edStholo 		    }
5632286d8edStholo 
5642286d8edStholo 		  /* I advances to the last line of the run.  */
5652286d8edStholo 		  i += length - 1;
5662286d8edStholo 
5672286d8edStholo 		  /* Same thing, from end.  */
5682286d8edStholo 		  for (j = 0, consec = 0; j < length; j++)
5692286d8edStholo 		    {
5702286d8edStholo 		      if (j >= 8 && discards[i - j] == 1)
5712286d8edStholo 			break;
5722286d8edStholo 		      if (discards[i - j] == 2)
5732286d8edStholo 			consec = 0, discards[i - j] = 0;
5742286d8edStholo 		      else if (discards[i - j] == 0)
5752286d8edStholo 			consec = 0;
5762286d8edStholo 		      else
5772286d8edStholo 			consec++;
5782286d8edStholo 		      if (consec == 3)
5792286d8edStholo 			break;
5802286d8edStholo 		    }
5812286d8edStholo 		}
5822286d8edStholo 	    }
5832286d8edStholo 	}
5842286d8edStholo     }
5852286d8edStholo 
5862286d8edStholo   /* Actually discard the lines. */
5872286d8edStholo   for (f = 0; f < 2; f++)
5882286d8edStholo     {
5892286d8edStholo       char *discards = discarded[f];
5902286d8edStholo       unsigned int end = filevec[f].buffered_lines;
5912286d8edStholo       unsigned int j = 0;
5922286d8edStholo       for (i = 0; i < end; ++i)
5932286d8edStholo 	if (no_discards || discards[i] == 0)
5942286d8edStholo 	  {
5952286d8edStholo 	    filevec[f].undiscarded[j] = filevec[f].equivs[i];
5962286d8edStholo 	    filevec[f].realindexes[j++] = i;
5972286d8edStholo 	  }
5982286d8edStholo 	else
5992286d8edStholo 	  filevec[f].changed_flag[i] = 1;
6002286d8edStholo       filevec[f].nondiscarded_lines = j;
6012286d8edStholo     }
6022286d8edStholo 
6032286d8edStholo   free (discarded[0]);
6042286d8edStholo   free (equiv_count[0]);
6052286d8edStholo }
6062286d8edStholo 
6072286d8edStholo /* Adjust inserts/deletes of identical lines to join changes
6082286d8edStholo    as much as possible.
6092286d8edStholo 
6102286d8edStholo    We do something when a run of changed lines include a
6112286d8edStholo    line at one end and have an excluded, identical line at the other.
6122286d8edStholo    We are free to choose which identical line is included.
6132286d8edStholo    `compareseq' usually chooses the one at the beginning,
6142286d8edStholo    but usually it is cleaner to consider the following identical line
6152286d8edStholo    to be the "change".  */
6162286d8edStholo 
6172286d8edStholo int inhibit;
6182286d8edStholo 
6192286d8edStholo static void
shift_boundaries(filevec)6202286d8edStholo shift_boundaries (filevec)
6212286d8edStholo      struct file_data filevec[];
6222286d8edStholo {
6232286d8edStholo   int f;
624*43c1707eStholo 
625*43c1707eStholo   if (inhibit)
626*43c1707eStholo     return;
6272286d8edStholo 
6282286d8edStholo   for (f = 0; f < 2; f++)
6292286d8edStholo     {
6302286d8edStholo       char *changed = filevec[f].changed_flag;
6312286d8edStholo       char const *other_changed = filevec[1-f].changed_flag;
6322286d8edStholo       int const *equivs = filevec[f].equivs;
6332286d8edStholo       int i = 0;
6342286d8edStholo       int j = 0;
6352286d8edStholo       int i_end = filevec[f].buffered_lines;
6362286d8edStholo 
6372286d8edStholo       while (1)
6382286d8edStholo 	{
6392286d8edStholo 	  int runlength, start, corresponding;
6402286d8edStholo 
6412286d8edStholo 	  /* Scan forwards to find beginning of another run of changes.
6422286d8edStholo 	     Also keep track of the corresponding point in the other file.  */
6432286d8edStholo 
6442286d8edStholo 	  while (i < i_end && changed[i] == 0)
6452286d8edStholo 	    {
6462286d8edStholo 	      while (other_changed[j++])
6472286d8edStholo 		continue;
6482286d8edStholo 	      i++;
6492286d8edStholo 	    }
6502286d8edStholo 
6512286d8edStholo 	  if (i == i_end)
6522286d8edStholo 	    break;
6532286d8edStholo 
6542286d8edStholo 	  start = i;
6552286d8edStholo 
6562286d8edStholo 	  /* Find the end of this run of changes.  */
6572286d8edStholo 
6582286d8edStholo 	  while (changed[++i])
6592286d8edStholo 	    continue;
6602286d8edStholo 	  while (other_changed[j])
6612286d8edStholo 	    j++;
6622286d8edStholo 
6632286d8edStholo 	  do
6642286d8edStholo 	    {
6652286d8edStholo 	      /* Record the length of this run of changes, so that
6662286d8edStholo 		 we can later determine whether the run has grown.  */
6672286d8edStholo 	      runlength = i - start;
6682286d8edStholo 
6692286d8edStholo 	      /* Move the changed region back, so long as the
6702286d8edStholo 		 previous unchanged line matches the last changed one.
6712286d8edStholo 		 This merges with previous changed regions.  */
6722286d8edStholo 
6732286d8edStholo 	      while (start && equivs[start - 1] == equivs[i - 1])
6742286d8edStholo 		{
6752286d8edStholo 		  changed[--start] = 1;
6762286d8edStholo 		  changed[--i] = 0;
6772286d8edStholo 		  while (changed[start - 1])
6782286d8edStholo 		    start--;
6792286d8edStholo 		  while (other_changed[--j])
6802286d8edStholo 		    continue;
6812286d8edStholo 		}
6822286d8edStholo 
6832286d8edStholo 	      /* Set CORRESPONDING to the end of the changed run, at the last
6842286d8edStholo 		 point where it corresponds to a changed run in the other file.
6852286d8edStholo 		 CORRESPONDING == I_END means no such point has been found.  */
6862286d8edStholo 	      corresponding = other_changed[j - 1] ? i : i_end;
6872286d8edStholo 
688*43c1707eStholo 	      /* Move the changed region forward, so long as the
689*43c1707eStholo 		 first changed line matches the following unchanged one.
690*43c1707eStholo 		 This merges with following changed regions.
6912286d8edStholo 		 Do this second, so that if there are no merges,
6922286d8edStholo 		 the changed region is moved forward as far as possible.  */
6932286d8edStholo 
694*43c1707eStholo 	      while (i != i_end && equivs[start] == equivs[i])
6952286d8edStholo 		{
6962286d8edStholo 		  changed[start++] = 0;
6972286d8edStholo 		  changed[i++] = 1;
6982286d8edStholo 		  while (changed[i])
6992286d8edStholo 		    i++;
7002286d8edStholo 		  while (other_changed[++j])
7012286d8edStholo 		    corresponding = i;
7022286d8edStholo 		}
7032286d8edStholo 	    }
7042286d8edStholo 	  while (runlength != i - start);
7052286d8edStholo 
7062286d8edStholo 	  /* If possible, move the fully-merged run of changes
7072286d8edStholo 	     back to a corresponding run in the other file.  */
7082286d8edStholo 
7092286d8edStholo 	  while (corresponding < i)
7102286d8edStholo 	    {
7112286d8edStholo 	      changed[--start] = 1;
7122286d8edStholo 	      changed[--i] = 0;
7132286d8edStholo 	      while (other_changed[--j])
7142286d8edStholo 		continue;
7152286d8edStholo 	    }
7162286d8edStholo 	}
7172286d8edStholo     }
7182286d8edStholo }
7192286d8edStholo 
7202286d8edStholo /* Cons an additional entry onto the front of an edit script OLD.
7212286d8edStholo    LINE0 and LINE1 are the first affected lines in the two files (origin 0).
7222286d8edStholo    DELETED is the number of lines deleted here from file 0.
7232286d8edStholo    INSERTED is the number of lines inserted here in file 1.
7242286d8edStholo 
7252286d8edStholo    If DELETED is 0 then LINE0 is the number of the line before
7262286d8edStholo    which the insertion was done; vice versa for INSERTED and LINE1.  */
7272286d8edStholo 
7282286d8edStholo static struct change *
add_change(line0,line1,deleted,inserted,old)7292286d8edStholo add_change (line0, line1, deleted, inserted, old)
7302286d8edStholo      int line0, line1, deleted, inserted;
7312286d8edStholo      struct change *old;
7322286d8edStholo {
7332286d8edStholo   struct change *new = (struct change *) xmalloc (sizeof (struct change));
7342286d8edStholo 
7352286d8edStholo   new->line0 = line0;
7362286d8edStholo   new->line1 = line1;
7372286d8edStholo   new->inserted = inserted;
7382286d8edStholo   new->deleted = deleted;
7392286d8edStholo   new->link = old;
7402286d8edStholo   return new;
7412286d8edStholo }
7422286d8edStholo 
7432286d8edStholo /* Scan the tables of which lines are inserted and deleted,
7442286d8edStholo    producing an edit script in reverse order.  */
7452286d8edStholo 
7462286d8edStholo static struct change *
build_reverse_script(filevec)7472286d8edStholo build_reverse_script (filevec)
7482286d8edStholo      struct file_data const filevec[];
7492286d8edStholo {
7502286d8edStholo   struct change *script = 0;
7512286d8edStholo   char *changed0 = filevec[0].changed_flag;
7522286d8edStholo   char *changed1 = filevec[1].changed_flag;
7532286d8edStholo   int len0 = filevec[0].buffered_lines;
7542286d8edStholo   int len1 = filevec[1].buffered_lines;
7552286d8edStholo 
7562286d8edStholo   /* Note that changedN[len0] does exist, and contains 0.  */
7572286d8edStholo 
7582286d8edStholo   int i0 = 0, i1 = 0;
7592286d8edStholo 
7602286d8edStholo   while (i0 < len0 || i1 < len1)
7612286d8edStholo     {
7622286d8edStholo       if (changed0[i0] || changed1[i1])
7632286d8edStholo 	{
7642286d8edStholo 	  int line0 = i0, line1 = i1;
7652286d8edStholo 
7662286d8edStholo 	  /* Find # lines changed here in each file.  */
7672286d8edStholo 	  while (changed0[i0]) ++i0;
7682286d8edStholo 	  while (changed1[i1]) ++i1;
7692286d8edStholo 
7702286d8edStholo 	  /* Record this change.  */
7712286d8edStholo 	  script = add_change (line0, line1, i0 - line0, i1 - line1, script);
7722286d8edStholo 	}
7732286d8edStholo 
7742286d8edStholo       /* We have reached lines in the two files that match each other.  */
7752286d8edStholo       i0++, i1++;
7762286d8edStholo     }
7772286d8edStholo 
7782286d8edStholo   return script;
7792286d8edStholo }
7802286d8edStholo 
7812286d8edStholo /* Scan the tables of which lines are inserted and deleted,
7822286d8edStholo    producing an edit script in forward order.  */
7832286d8edStholo 
7842286d8edStholo static struct change *
build_script(filevec)7852286d8edStholo build_script (filevec)
7862286d8edStholo      struct file_data const filevec[];
7872286d8edStholo {
7882286d8edStholo   struct change *script = 0;
7892286d8edStholo   char *changed0 = filevec[0].changed_flag;
7902286d8edStholo   char *changed1 = filevec[1].changed_flag;
7912286d8edStholo   int i0 = filevec[0].buffered_lines, i1 = filevec[1].buffered_lines;
7922286d8edStholo 
7932286d8edStholo   /* Note that changedN[-1] does exist, and contains 0.  */
7942286d8edStholo 
7952286d8edStholo   while (i0 >= 0 || i1 >= 0)
7962286d8edStholo     {
7972286d8edStholo       if (changed0[i0 - 1] || changed1[i1 - 1])
7982286d8edStholo 	{
7992286d8edStholo 	  int line0 = i0, line1 = i1;
8002286d8edStholo 
8012286d8edStholo 	  /* Find # lines changed here in each file.  */
8022286d8edStholo 	  while (changed0[i0 - 1]) --i0;
8032286d8edStholo 	  while (changed1[i1 - 1]) --i1;
8042286d8edStholo 
8052286d8edStholo 	  /* Record this change.  */
8062286d8edStholo 	  script = add_change (i0, i1, line0 - i0, line1 - i1, script);
8072286d8edStholo 	}
8082286d8edStholo 
8092286d8edStholo       /* We have reached lines in the two files that match each other.  */
8102286d8edStholo       i0--, i1--;
8112286d8edStholo     }
8122286d8edStholo 
8132286d8edStholo   return script;
8142286d8edStholo }
8152286d8edStholo 
8162286d8edStholo /* If CHANGES, briefly report that two files differed.  */
8172286d8edStholo static void
briefly_report(changes,filevec)8182286d8edStholo briefly_report (changes, filevec)
8192286d8edStholo      int changes;
8202286d8edStholo      struct file_data const filevec[];
8212286d8edStholo {
8222286d8edStholo   if (changes)
8232286d8edStholo     message (no_details_flag ? "Files %s and %s differ\n"
8242286d8edStholo 	     : "Binary files %s and %s differ\n",
8252286d8edStholo 	     filevec[0].name, filevec[1].name);
8262286d8edStholo }
8272286d8edStholo 
8282286d8edStholo /* Report the differences of two files.  DEPTH is the current directory
8292286d8edStholo    depth. */
8302286d8edStholo int
diff_2_files(filevec,depth)8312286d8edStholo diff_2_files (filevec, depth)
8322286d8edStholo      struct file_data filevec[];
8332286d8edStholo      int depth;
8342286d8edStholo {
8352286d8edStholo   int diags;
8362286d8edStholo   int i;
8372286d8edStholo   struct change *e, *p;
8382286d8edStholo   struct change *script;
8392286d8edStholo   int changes;
8402286d8edStholo 
8412286d8edStholo 
8422286d8edStholo   /* If we have detected that either file is binary,
8432286d8edStholo      compare the two files as binary.  This can happen
8442286d8edStholo      only when the first chunk is read.
8452286d8edStholo      Also, --brief without any --ignore-* options means
8462286d8edStholo      we can speed things up by treating the files as binary.  */
8472286d8edStholo 
8482286d8edStholo   if (read_files (filevec, no_details_flag & ~ignore_some_changes))
8492286d8edStholo     {
8502286d8edStholo       /* Files with different lengths must be different.  */
8512286d8edStholo       if (filevec[0].stat.st_size != filevec[1].stat.st_size
8522286d8edStholo 	  && (filevec[0].desc < 0 || S_ISREG (filevec[0].stat.st_mode))
8532286d8edStholo 	  && (filevec[1].desc < 0 || S_ISREG (filevec[1].stat.st_mode)))
8542286d8edStholo 	changes = 1;
8552286d8edStholo 
8562286d8edStholo       /* Standard input equals itself.  */
8572286d8edStholo       else if (filevec[0].desc == filevec[1].desc)
8582286d8edStholo 	changes = 0;
8592286d8edStholo 
8602286d8edStholo       else
8612286d8edStholo 	/* Scan both files, a buffer at a time, looking for a difference.  */
8622286d8edStholo 	{
8632286d8edStholo 	  /* Allocate same-sized buffers for both files.  */
8642286d8edStholo 	  size_t buffer_size = buffer_lcm (STAT_BLOCKSIZE (filevec[0].stat),
8652286d8edStholo 					   STAT_BLOCKSIZE (filevec[1].stat));
8662286d8edStholo 	  for (i = 0; i < 2; i++)
8672286d8edStholo 	    filevec[i].buffer = xrealloc (filevec[i].buffer, buffer_size);
8682286d8edStholo 
8692286d8edStholo 	  for (;;  filevec[0].buffered_chars = filevec[1].buffered_chars = 0)
8702286d8edStholo 	    {
8712286d8edStholo 	      /* Read a buffer's worth from both files.  */
8722286d8edStholo 	      for (i = 0; i < 2; i++)
8732286d8edStholo 		if (0 <= filevec[i].desc)
8742286d8edStholo 		  while (filevec[i].buffered_chars != buffer_size)
8752286d8edStholo 		    {
8762286d8edStholo 		      int r = read (filevec[i].desc,
8772286d8edStholo 				    filevec[i].buffer
8782286d8edStholo 				    + filevec[i].buffered_chars,
8792286d8edStholo 				    buffer_size - filevec[i].buffered_chars);
8802286d8edStholo 		      if (r == 0)
8812286d8edStholo 			break;
8822286d8edStholo 		      if (r < 0)
8832286d8edStholo 			pfatal_with_name (filevec[i].name);
8842286d8edStholo 		      filevec[i].buffered_chars += r;
8852286d8edStholo 		    }
8862286d8edStholo 
8872286d8edStholo 	      /* If the buffers differ, the files differ.  */
8882286d8edStholo 	      if (filevec[0].buffered_chars != filevec[1].buffered_chars
8892286d8edStholo 		  || (filevec[0].buffered_chars != 0
8902286d8edStholo 		      && memcmp (filevec[0].buffer,
8912286d8edStholo 				 filevec[1].buffer,
8922286d8edStholo 				 filevec[0].buffered_chars) != 0))
8932286d8edStholo 		{
8942286d8edStholo 		  changes = 1;
8952286d8edStholo 		  break;
8962286d8edStholo 		}
8972286d8edStholo 
8982286d8edStholo 	      /* If we reach end of file, the files are the same.  */
8992286d8edStholo 	      if (filevec[0].buffered_chars != buffer_size)
9002286d8edStholo 		{
9012286d8edStholo 		  changes = 0;
9022286d8edStholo 		  break;
9032286d8edStholo 		}
9042286d8edStholo 	    }
9052286d8edStholo 	}
9062286d8edStholo 
9072286d8edStholo       briefly_report (changes, filevec);
9082286d8edStholo     }
9092286d8edStholo   else
9102286d8edStholo     {
9112286d8edStholo       /* Allocate vectors for the results of comparison:
9122286d8edStholo 	 a flag for each line of each file, saying whether that line
9132286d8edStholo 	 is an insertion or deletion.
9142286d8edStholo 	 Allocate an extra element, always zero, at each end of each vector.  */
9152286d8edStholo 
9162286d8edStholo       size_t s = filevec[0].buffered_lines + filevec[1].buffered_lines + 4;
9172286d8edStholo       filevec[0].changed_flag = xmalloc (s);
9182286d8edStholo       bzero (filevec[0].changed_flag, s);
9192286d8edStholo       filevec[0].changed_flag++;
9202286d8edStholo       filevec[1].changed_flag = filevec[0].changed_flag
9212286d8edStholo 				+ filevec[0].buffered_lines + 2;
9222286d8edStholo 
9232286d8edStholo       /* Some lines are obviously insertions or deletions
9242286d8edStholo 	 because they don't match anything.  Detect them now, and
9252286d8edStholo 	 avoid even thinking about them in the main comparison algorithm.  */
9262286d8edStholo 
9272286d8edStholo       discard_confusing_lines (filevec);
9282286d8edStholo 
9292286d8edStholo       /* Now do the main comparison algorithm, considering just the
9302286d8edStholo 	 undiscarded lines.  */
9312286d8edStholo 
9322286d8edStholo       xvec = filevec[0].undiscarded;
9332286d8edStholo       yvec = filevec[1].undiscarded;
9342286d8edStholo       diags = filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines + 3;
9352286d8edStholo       fdiag = (int *) xmalloc (diags * (2 * sizeof (int)));
9362286d8edStholo       bdiag = fdiag + diags;
9372286d8edStholo       fdiag += filevec[1].nondiscarded_lines + 1;
9382286d8edStholo       bdiag += filevec[1].nondiscarded_lines + 1;
9392286d8edStholo 
9402286d8edStholo       /* Set TOO_EXPENSIVE to be approximate square root of input size,
9412286d8edStholo 	 bounded below by 256.  */
9422286d8edStholo       too_expensive = 1;
9432286d8edStholo       for (i = filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines;
9442286d8edStholo 	   i != 0; i >>= 2)
9452286d8edStholo 	too_expensive <<= 1;
9462286d8edStholo       too_expensive = max (256, too_expensive);
9472286d8edStholo 
9482286d8edStholo       files[0] = filevec[0];
9492286d8edStholo       files[1] = filevec[1];
9502286d8edStholo 
9512286d8edStholo       compareseq (0, filevec[0].nondiscarded_lines,
9522286d8edStholo 		  0, filevec[1].nondiscarded_lines, no_discards);
9532286d8edStholo 
9542286d8edStholo       free (fdiag - (filevec[1].nondiscarded_lines + 1));
9552286d8edStholo 
9562286d8edStholo       /* Modify the results slightly to make them prettier
9572286d8edStholo 	 in cases where that can validly be done.  */
9582286d8edStholo 
9592286d8edStholo       shift_boundaries (filevec);
9602286d8edStholo 
9612286d8edStholo       /* Get the results of comparison in the form of a chain
9622286d8edStholo 	 of `struct change's -- an edit script.  */
9632286d8edStholo 
9642286d8edStholo       if (output_style == OUTPUT_ED)
9652286d8edStholo 	script = build_reverse_script (filevec);
9662286d8edStholo       else
9672286d8edStholo 	script = build_script (filevec);
9682286d8edStholo 
9692286d8edStholo       /* Set CHANGES if we had any diffs.
9702286d8edStholo 	 If some changes are ignored, we must scan the script to decide.  */
9712286d8edStholo       if (ignore_blank_lines_flag || ignore_regexp_list)
9722286d8edStholo 	{
9732286d8edStholo 	  struct change *next = script;
9742286d8edStholo 	  changes = 0;
9752286d8edStholo 
9762286d8edStholo 	  while (next && changes == 0)
9772286d8edStholo 	    {
9782286d8edStholo 	      struct change *this, *end;
9792286d8edStholo 	      int first0, last0, first1, last1, deletes, inserts;
9802286d8edStholo 
9812286d8edStholo 	      /* Find a set of changes that belong together.  */
9822286d8edStholo 	      this = next;
9832286d8edStholo 	      end = find_change (next);
9842286d8edStholo 
9852286d8edStholo 	      /* Disconnect them from the rest of the changes, making them
9862286d8edStholo 		 a hunk, and remember the rest for next iteration.  */
9872286d8edStholo 	      next = end->link;
9882286d8edStholo 	      end->link = 0;
9892286d8edStholo 
9902286d8edStholo 	      /* Determine whether this hunk is really a difference.  */
9912286d8edStholo 	      analyze_hunk (this, &first0, &last0, &first1, &last1,
9922286d8edStholo 			    &deletes, &inserts);
9932286d8edStholo 
9942286d8edStholo 	      /* Reconnect the script so it will all be freed properly.  */
9952286d8edStholo 	      end->link = next;
9962286d8edStholo 
9972286d8edStholo 	      if (deletes || inserts)
9982286d8edStholo 		changes = 1;
9992286d8edStholo 	    }
10002286d8edStholo 	}
10012286d8edStholo       else
10022286d8edStholo 	changes = (script != 0);
10032286d8edStholo 
10042286d8edStholo       if (no_details_flag)
10052286d8edStholo 	briefly_report (changes, filevec);
10062286d8edStholo       else
10072286d8edStholo 	{
10082286d8edStholo 	  if (changes || ! no_diff_means_no_output)
10092286d8edStholo 	    {
10102286d8edStholo 	      /* Record info for starting up output,
10112286d8edStholo 		 to be used if and when we have some output to print.  */
10122286d8edStholo 	      setup_output (files[0].name, files[1].name, depth);
10132286d8edStholo 
10142286d8edStholo 	      switch (output_style)
10152286d8edStholo 		{
10162286d8edStholo 		case OUTPUT_CONTEXT:
10172286d8edStholo 		  print_context_script (script, 0);
10182286d8edStholo 		  break;
10192286d8edStholo 
10202286d8edStholo 		case OUTPUT_UNIFIED:
10212286d8edStholo 		  print_context_script (script, 1);
10222286d8edStholo 		  break;
10232286d8edStholo 
10242286d8edStholo 		case OUTPUT_ED:
10252286d8edStholo 		  print_ed_script (script);
10262286d8edStholo 		  break;
10272286d8edStholo 
10282286d8edStholo 		case OUTPUT_FORWARD_ED:
10292286d8edStholo 		  pr_forward_ed_script (script);
10302286d8edStholo 		  break;
10312286d8edStholo 
10322286d8edStholo 		case OUTPUT_RCS:
10332286d8edStholo 		  print_rcs_script (script);
10342286d8edStholo 		  break;
10352286d8edStholo 
10362286d8edStholo 		case OUTPUT_NORMAL:
10372286d8edStholo 		  print_normal_script (script);
10382286d8edStholo 		  break;
10392286d8edStholo 
10402286d8edStholo 		case OUTPUT_IFDEF:
10412286d8edStholo 		  print_ifdef_script (script);
10422286d8edStholo 		  break;
10432286d8edStholo 
10442286d8edStholo 		case OUTPUT_SDIFF:
10452286d8edStholo 		  print_sdiff_script (script);
10462286d8edStholo 		}
10472286d8edStholo 
10482286d8edStholo 	      finish_output ();
10492286d8edStholo 	    }
10502286d8edStholo 	}
10512286d8edStholo 
10522286d8edStholo       free (filevec[0].undiscarded);
10532286d8edStholo 
10542286d8edStholo       free (filevec[0].changed_flag - 1);
10552286d8edStholo 
10562286d8edStholo       for (i = 1; i >= 0; --i)
10572286d8edStholo 	free (filevec[i].equivs);
10582286d8edStholo 
10592286d8edStholo       for (i = 0; i < 2; ++i)
10602286d8edStholo 	free (filevec[i].linbuf + filevec[i].linbuf_base);
10612286d8edStholo 
10622286d8edStholo       for (e = script; e; e = p)
10632286d8edStholo 	{
10642286d8edStholo 	  p = e->link;
10652286d8edStholo 	  free (e);
10662286d8edStholo 	}
10672286d8edStholo 
10682286d8edStholo       if (! ROBUST_OUTPUT_STYLE (output_style))
10692286d8edStholo 	for (i = 0; i < 2; ++i)
10702286d8edStholo 	  if (filevec[i].missing_newline)
10712286d8edStholo 	    {
10722286d8edStholo 	      diff_error ("No newline at end of file %s", filevec[i].name, "");
10732286d8edStholo 	      changes = 2;
10742286d8edStholo 	    }
10752286d8edStholo     }
10762286d8edStholo 
10772286d8edStholo   if (filevec[0].buffer != filevec[1].buffer)
10782286d8edStholo     free (filevec[0].buffer);
10792286d8edStholo   free (filevec[1].buffer);
10802286d8edStholo 
10812286d8edStholo   return changes;
10822286d8edStholo }
1083