1*75f6d617Schristos /* $NetBSD: analyze.c,v 1.1.1.1 2016/01/13 03:15:30 christos Exp $ */
2*75f6d617Schristos
3*75f6d617Schristos /* Analyze file differences for GNU DIFF.
4*75f6d617Schristos
5*75f6d617Schristos Copyright (C) 1988, 1989, 1992, 1993, 1994, 1995, 1998, 2001, 2002
6*75f6d617Schristos Free Software Foundation, Inc.
7*75f6d617Schristos
8*75f6d617Schristos This file is part of GNU DIFF.
9*75f6d617Schristos
10*75f6d617Schristos GNU DIFF is free software; you can redistribute it and/or modify
11*75f6d617Schristos it under the terms of the GNU General Public License as published by
12*75f6d617Schristos the Free Software Foundation; either version 2, or (at your option)
13*75f6d617Schristos any later version.
14*75f6d617Schristos
15*75f6d617Schristos GNU DIFF is distributed in the hope that it will be useful,
16*75f6d617Schristos but WITHOUT ANY WARRANTY; without even the implied warranty of
17*75f6d617Schristos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18*75f6d617Schristos GNU General Public License for more details.
19*75f6d617Schristos
20*75f6d617Schristos You should have received a copy of the GNU General Public License
21*75f6d617Schristos along with this program; see the file COPYING.
22*75f6d617Schristos If not, write to the Free Software Foundation,
23*75f6d617Schristos 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
24*75f6d617Schristos
25*75f6d617Schristos /* The basic algorithm is described in:
26*75f6d617Schristos "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
27*75f6d617Schristos Algorithmica Vol. 1 No. 2, 1986, pp. 251-266;
28*75f6d617Schristos see especially section 4.2, which describes the variation used below.
29*75f6d617Schristos Unless the --minimal option is specified, this code uses the TOO_EXPENSIVE
30*75f6d617Schristos heuristic, by Paul Eggert, to limit the cost to O(N**1.5 log N)
31*75f6d617Schristos at the price of producing suboptimal output for large inputs with
32*75f6d617Schristos many differences.
33*75f6d617Schristos
34*75f6d617Schristos The basic algorithm was independently discovered as described in:
35*75f6d617Schristos "Algorithms for Approximate String Matching", E. Ukkonen,
36*75f6d617Schristos Information and Control Vol. 64, 1985, pp. 100-118. */
37*75f6d617Schristos
38*75f6d617Schristos #include "diff.h"
39*75f6d617Schristos #include <cmpbuf.h>
40*75f6d617Schristos #include <error.h>
41*75f6d617Schristos #include <regex.h>
42*75f6d617Schristos #include <xalloc.h>
43*75f6d617Schristos
44*75f6d617Schristos static lin *xvec, *yvec; /* Vectors being compared. */
45*75f6d617Schristos static lin *fdiag; /* Vector, indexed by diagonal, containing
46*75f6d617Schristos 1 + the X coordinate of the point furthest
47*75f6d617Schristos along the given diagonal in the forward
48*75f6d617Schristos search of the edit matrix. */
49*75f6d617Schristos static lin *bdiag; /* Vector, indexed by diagonal, containing
50*75f6d617Schristos the X coordinate of the point furthest
51*75f6d617Schristos along the given diagonal in the backward
52*75f6d617Schristos search of the edit matrix. */
53*75f6d617Schristos static lin too_expensive; /* Edit scripts longer than this are too
54*75f6d617Schristos expensive to compute. */
55*75f6d617Schristos
56*75f6d617Schristos #define SNAKE_LIMIT 20 /* Snakes bigger than this are considered `big'. */
57*75f6d617Schristos
58*75f6d617Schristos struct partition
59*75f6d617Schristos {
60*75f6d617Schristos lin xmid, ymid; /* Midpoints of this partition. */
61*75f6d617Schristos bool lo_minimal; /* Nonzero if low half will be analyzed minimally. */
62*75f6d617Schristos bool hi_minimal; /* Likewise for high half. */
63*75f6d617Schristos };
64*75f6d617Schristos
65*75f6d617Schristos /* Find the midpoint of the shortest edit script for a specified
66*75f6d617Schristos portion of the two files.
67*75f6d617Schristos
68*75f6d617Schristos Scan from the beginnings of the files, and simultaneously from the ends,
69*75f6d617Schristos doing a breadth-first search through the space of edit-sequence.
70*75f6d617Schristos When the two searches meet, we have found the midpoint of the shortest
71*75f6d617Schristos edit sequence.
72*75f6d617Schristos
73*75f6d617Schristos If FIND_MINIMAL is nonzero, find the minimal edit script regardless
74*75f6d617Schristos of expense. Otherwise, if the search is too expensive, use
75*75f6d617Schristos heuristics to stop the search and report a suboptimal answer.
76*75f6d617Schristos
77*75f6d617Schristos Set PART->(xmid,ymid) to the midpoint (XMID,YMID). The diagonal number
78*75f6d617Schristos XMID - YMID equals the number of inserted lines minus the number
79*75f6d617Schristos of deleted lines (counting only lines before the midpoint).
80*75f6d617Schristos Return the approximate edit cost; this is the total number of
81*75f6d617Schristos lines inserted or deleted (counting only lines before the midpoint),
82*75f6d617Schristos unless a heuristic is used to terminate the search prematurely.
83*75f6d617Schristos
84*75f6d617Schristos Set PART->lo_minimal to true iff the minimal edit script for the
85*75f6d617Schristos left half of the partition is known; similarly for PART->hi_minimal.
86*75f6d617Schristos
87*75f6d617Schristos This function assumes that the first lines of the specified portions
88*75f6d617Schristos of the two files do not match, and likewise that the last lines do not
89*75f6d617Schristos match. The caller must trim matching lines from the beginning and end
90*75f6d617Schristos of the portions it is going to specify.
91*75f6d617Schristos
92*75f6d617Schristos If we return the "wrong" partitions,
93*75f6d617Schristos the worst this can do is cause suboptimal diff output.
94*75f6d617Schristos It cannot cause incorrect diff output. */
95*75f6d617Schristos
96*75f6d617Schristos static lin
diag(lin xoff,lin xlim,lin yoff,lin ylim,bool find_minimal,struct partition * part)97*75f6d617Schristos diag (lin xoff, lin xlim, lin yoff, lin ylim, bool find_minimal,
98*75f6d617Schristos struct partition *part)
99*75f6d617Schristos {
100*75f6d617Schristos lin *const fd = fdiag; /* Give the compiler a chance. */
101*75f6d617Schristos lin *const bd = bdiag; /* Additional help for the compiler. */
102*75f6d617Schristos lin const *const xv = xvec; /* Still more help for the compiler. */
103*75f6d617Schristos lin const *const yv = yvec; /* And more and more . . . */
104*75f6d617Schristos lin const dmin = xoff - ylim; /* Minimum valid diagonal. */
105*75f6d617Schristos lin const dmax = xlim - yoff; /* Maximum valid diagonal. */
106*75f6d617Schristos lin const fmid = xoff - yoff; /* Center diagonal of top-down search. */
107*75f6d617Schristos lin const bmid = xlim - ylim; /* Center diagonal of bottom-up search. */
108*75f6d617Schristos lin fmin = fmid, fmax = fmid; /* Limits of top-down search. */
109*75f6d617Schristos lin bmin = bmid, bmax = bmid; /* Limits of bottom-up search. */
110*75f6d617Schristos lin c; /* Cost. */
111*75f6d617Schristos bool odd = (fmid - bmid) & 1; /* True if southeast corner is on an odd
112*75f6d617Schristos diagonal with respect to the northwest. */
113*75f6d617Schristos
114*75f6d617Schristos fd[fmid] = xoff;
115*75f6d617Schristos bd[bmid] = xlim;
116*75f6d617Schristos
117*75f6d617Schristos for (c = 1;; ++c)
118*75f6d617Schristos {
119*75f6d617Schristos lin d; /* Active diagonal. */
120*75f6d617Schristos bool big_snake = 0;
121*75f6d617Schristos
122*75f6d617Schristos /* Extend the top-down search by an edit step in each diagonal. */
123*75f6d617Schristos fmin > dmin ? fd[--fmin - 1] = -1 : ++fmin;
124*75f6d617Schristos fmax < dmax ? fd[++fmax + 1] = -1 : --fmax;
125*75f6d617Schristos for (d = fmax; d >= fmin; d -= 2)
126*75f6d617Schristos {
127*75f6d617Schristos lin x, y, oldx, tlo = fd[d - 1], thi = fd[d + 1];
128*75f6d617Schristos
129*75f6d617Schristos if (tlo >= thi)
130*75f6d617Schristos x = tlo + 1;
131*75f6d617Schristos else
132*75f6d617Schristos x = thi;
133*75f6d617Schristos oldx = x;
134*75f6d617Schristos y = x - d;
135*75f6d617Schristos while (x < xlim && y < ylim && xv[x] == yv[y])
136*75f6d617Schristos ++x, ++y;
137*75f6d617Schristos if (x - oldx > SNAKE_LIMIT)
138*75f6d617Schristos big_snake = 1;
139*75f6d617Schristos fd[d] = x;
140*75f6d617Schristos if (odd && bmin <= d && d <= bmax && bd[d] <= x)
141*75f6d617Schristos {
142*75f6d617Schristos part->xmid = x;
143*75f6d617Schristos part->ymid = y;
144*75f6d617Schristos part->lo_minimal = part->hi_minimal = 1;
145*75f6d617Schristos return 2 * c - 1;
146*75f6d617Schristos }
147*75f6d617Schristos }
148*75f6d617Schristos
149*75f6d617Schristos /* Similarly extend the bottom-up search. */
150*75f6d617Schristos bmin > dmin ? bd[--bmin - 1] = LIN_MAX : ++bmin;
151*75f6d617Schristos bmax < dmax ? bd[++bmax + 1] = LIN_MAX : --bmax;
152*75f6d617Schristos for (d = bmax; d >= bmin; d -= 2)
153*75f6d617Schristos {
154*75f6d617Schristos lin x, y, oldx, tlo = bd[d - 1], thi = bd[d + 1];
155*75f6d617Schristos
156*75f6d617Schristos if (tlo < thi)
157*75f6d617Schristos x = tlo;
158*75f6d617Schristos else
159*75f6d617Schristos x = thi - 1;
160*75f6d617Schristos oldx = x;
161*75f6d617Schristos y = x - d;
162*75f6d617Schristos while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1])
163*75f6d617Schristos --x, --y;
164*75f6d617Schristos if (oldx - x > SNAKE_LIMIT)
165*75f6d617Schristos big_snake = 1;
166*75f6d617Schristos bd[d] = x;
167*75f6d617Schristos if (!odd && fmin <= d && d <= fmax && x <= fd[d])
168*75f6d617Schristos {
169*75f6d617Schristos part->xmid = x;
170*75f6d617Schristos part->ymid = y;
171*75f6d617Schristos part->lo_minimal = part->hi_minimal = 1;
172*75f6d617Schristos return 2 * c;
173*75f6d617Schristos }
174*75f6d617Schristos }
175*75f6d617Schristos
176*75f6d617Schristos if (find_minimal)
177*75f6d617Schristos continue;
178*75f6d617Schristos
179*75f6d617Schristos /* Heuristic: check occasionally for a diagonal that has made
180*75f6d617Schristos lots of progress compared with the edit distance.
181*75f6d617Schristos If we have any such, find the one that has made the most
182*75f6d617Schristos progress and return it as if it had succeeded.
183*75f6d617Schristos
184*75f6d617Schristos With this heuristic, for files with a constant small density
185*75f6d617Schristos of changes, the algorithm is linear in the file size. */
186*75f6d617Schristos
187*75f6d617Schristos if (200 < c && big_snake && speed_large_files)
188*75f6d617Schristos {
189*75f6d617Schristos lin best;
190*75f6d617Schristos
191*75f6d617Schristos best = 0;
192*75f6d617Schristos for (d = fmax; d >= fmin; d -= 2)
193*75f6d617Schristos {
194*75f6d617Schristos lin dd = d - fmid;
195*75f6d617Schristos lin x = fd[d];
196*75f6d617Schristos lin y = x - d;
197*75f6d617Schristos lin v = (x - xoff) * 2 - dd;
198*75f6d617Schristos if (v > 12 * (c + (dd < 0 ? -dd : dd)))
199*75f6d617Schristos {
200*75f6d617Schristos if (v > best
201*75f6d617Schristos && xoff + SNAKE_LIMIT <= x && x < xlim
202*75f6d617Schristos && yoff + SNAKE_LIMIT <= y && y < ylim)
203*75f6d617Schristos {
204*75f6d617Schristos /* We have a good enough best diagonal;
205*75f6d617Schristos now insist that it end with a significant snake. */
206*75f6d617Schristos int k;
207*75f6d617Schristos
208*75f6d617Schristos for (k = 1; xv[x - k] == yv[y - k]; k++)
209*75f6d617Schristos if (k == SNAKE_LIMIT)
210*75f6d617Schristos {
211*75f6d617Schristos best = v;
212*75f6d617Schristos part->xmid = x;
213*75f6d617Schristos part->ymid = y;
214*75f6d617Schristos break;
215*75f6d617Schristos }
216*75f6d617Schristos }
217*75f6d617Schristos }
218*75f6d617Schristos }
219*75f6d617Schristos if (best > 0)
220*75f6d617Schristos {
221*75f6d617Schristos part->lo_minimal = 1;
222*75f6d617Schristos part->hi_minimal = 0;
223*75f6d617Schristos return 2 * c - 1;
224*75f6d617Schristos }
225*75f6d617Schristos
226*75f6d617Schristos best = 0;
227*75f6d617Schristos for (d = bmax; d >= bmin; d -= 2)
228*75f6d617Schristos {
229*75f6d617Schristos lin dd = d - bmid;
230*75f6d617Schristos lin x = bd[d];
231*75f6d617Schristos lin y = x - d;
232*75f6d617Schristos lin v = (xlim - x) * 2 + dd;
233*75f6d617Schristos if (v > 12 * (c + (dd < 0 ? -dd : dd)))
234*75f6d617Schristos {
235*75f6d617Schristos if (v > best
236*75f6d617Schristos && xoff < x && x <= xlim - SNAKE_LIMIT
237*75f6d617Schristos && yoff < y && y <= ylim - SNAKE_LIMIT)
238*75f6d617Schristos {
239*75f6d617Schristos /* We have a good enough best diagonal;
240*75f6d617Schristos now insist that it end with a significant snake. */
241*75f6d617Schristos int k;
242*75f6d617Schristos
243*75f6d617Schristos for (k = 0; xv[x + k] == yv[y + k]; k++)
244*75f6d617Schristos if (k == SNAKE_LIMIT - 1)
245*75f6d617Schristos {
246*75f6d617Schristos best = v;
247*75f6d617Schristos part->xmid = x;
248*75f6d617Schristos part->ymid = y;
249*75f6d617Schristos break;
250*75f6d617Schristos }
251*75f6d617Schristos }
252*75f6d617Schristos }
253*75f6d617Schristos }
254*75f6d617Schristos if (best > 0)
255*75f6d617Schristos {
256*75f6d617Schristos part->lo_minimal = 0;
257*75f6d617Schristos part->hi_minimal = 1;
258*75f6d617Schristos return 2 * c - 1;
259*75f6d617Schristos }
260*75f6d617Schristos }
261*75f6d617Schristos
262*75f6d617Schristos /* Heuristic: if we've gone well beyond the call of duty,
263*75f6d617Schristos give up and report halfway between our best results so far. */
264*75f6d617Schristos if (c >= too_expensive)
265*75f6d617Schristos {
266*75f6d617Schristos lin fxybest, fxbest;
267*75f6d617Schristos lin bxybest, bxbest;
268*75f6d617Schristos
269*75f6d617Schristos fxbest = bxbest = 0; /* Pacify `gcc -Wall'. */
270*75f6d617Schristos
271*75f6d617Schristos /* Find forward diagonal that maximizes X + Y. */
272*75f6d617Schristos fxybest = -1;
273*75f6d617Schristos for (d = fmax; d >= fmin; d -= 2)
274*75f6d617Schristos {
275*75f6d617Schristos lin x = MIN (fd[d], xlim);
276*75f6d617Schristos lin y = x - d;
277*75f6d617Schristos if (ylim < y)
278*75f6d617Schristos x = ylim + d, y = ylim;
279*75f6d617Schristos if (fxybest < x + y)
280*75f6d617Schristos {
281*75f6d617Schristos fxybest = x + y;
282*75f6d617Schristos fxbest = x;
283*75f6d617Schristos }
284*75f6d617Schristos }
285*75f6d617Schristos
286*75f6d617Schristos /* Find backward diagonal that minimizes X + Y. */
287*75f6d617Schristos bxybest = LIN_MAX;
288*75f6d617Schristos for (d = bmax; d >= bmin; d -= 2)
289*75f6d617Schristos {
290*75f6d617Schristos lin x = MAX (xoff, bd[d]);
291*75f6d617Schristos lin y = x - d;
292*75f6d617Schristos if (y < yoff)
293*75f6d617Schristos x = yoff + d, y = yoff;
294*75f6d617Schristos if (x + y < bxybest)
295*75f6d617Schristos {
296*75f6d617Schristos bxybest = x + y;
297*75f6d617Schristos bxbest = x;
298*75f6d617Schristos }
299*75f6d617Schristos }
300*75f6d617Schristos
301*75f6d617Schristos /* Use the better of the two diagonals. */
302*75f6d617Schristos if ((xlim + ylim) - bxybest < fxybest - (xoff + yoff))
303*75f6d617Schristos {
304*75f6d617Schristos part->xmid = fxbest;
305*75f6d617Schristos part->ymid = fxybest - fxbest;
306*75f6d617Schristos part->lo_minimal = 1;
307*75f6d617Schristos part->hi_minimal = 0;
308*75f6d617Schristos }
309*75f6d617Schristos else
310*75f6d617Schristos {
311*75f6d617Schristos part->xmid = bxbest;
312*75f6d617Schristos part->ymid = bxybest - bxbest;
313*75f6d617Schristos part->lo_minimal = 0;
314*75f6d617Schristos part->hi_minimal = 1;
315*75f6d617Schristos }
316*75f6d617Schristos return 2 * c - 1;
317*75f6d617Schristos }
318*75f6d617Schristos }
319*75f6d617Schristos }
320*75f6d617Schristos
321*75f6d617Schristos /* Compare in detail contiguous subsequences of the two files
322*75f6d617Schristos which are known, as a whole, to match each other.
323*75f6d617Schristos
324*75f6d617Schristos The results are recorded in the vectors files[N].changed, by
325*75f6d617Schristos storing 1 in the element for each line that is an insertion or deletion.
326*75f6d617Schristos
327*75f6d617Schristos The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
328*75f6d617Schristos
329*75f6d617Schristos Note that XLIM, YLIM are exclusive bounds.
330*75f6d617Schristos All line numbers are origin-0 and discarded lines are not counted.
331*75f6d617Schristos
332*75f6d617Schristos If FIND_MINIMAL, find a minimal difference no matter how
333*75f6d617Schristos expensive it is. */
334*75f6d617Schristos
335*75f6d617Schristos static void
compareseq(lin xoff,lin xlim,lin yoff,lin ylim,bool find_minimal)336*75f6d617Schristos compareseq (lin xoff, lin xlim, lin yoff, lin ylim, bool find_minimal)
337*75f6d617Schristos {
338*75f6d617Schristos lin * const xv = xvec; /* Help the compiler. */
339*75f6d617Schristos lin * const yv = yvec;
340*75f6d617Schristos
341*75f6d617Schristos /* Slide down the bottom initial diagonal. */
342*75f6d617Schristos while (xoff < xlim && yoff < ylim && xv[xoff] == yv[yoff])
343*75f6d617Schristos ++xoff, ++yoff;
344*75f6d617Schristos /* Slide up the top initial diagonal. */
345*75f6d617Schristos while (xlim > xoff && ylim > yoff && xv[xlim - 1] == yv[ylim - 1])
346*75f6d617Schristos --xlim, --ylim;
347*75f6d617Schristos
348*75f6d617Schristos /* Handle simple cases. */
349*75f6d617Schristos if (xoff == xlim)
350*75f6d617Schristos while (yoff < ylim)
351*75f6d617Schristos files[1].changed[files[1].realindexes[yoff++]] = 1;
352*75f6d617Schristos else if (yoff == ylim)
353*75f6d617Schristos while (xoff < xlim)
354*75f6d617Schristos files[0].changed[files[0].realindexes[xoff++]] = 1;
355*75f6d617Schristos else
356*75f6d617Schristos {
357*75f6d617Schristos lin c;
358*75f6d617Schristos struct partition part;
359*75f6d617Schristos
360*75f6d617Schristos /* Find a point of correspondence in the middle of the files. */
361*75f6d617Schristos
362*75f6d617Schristos c = diag (xoff, xlim, yoff, ylim, find_minimal, &part);
363*75f6d617Schristos
364*75f6d617Schristos if (c == 1)
365*75f6d617Schristos {
366*75f6d617Schristos /* This should be impossible, because it implies that
367*75f6d617Schristos one of the two subsequences is empty,
368*75f6d617Schristos and that case was handled above without calling `diag'.
369*75f6d617Schristos Let's verify that this is true. */
370*75f6d617Schristos abort ();
371*75f6d617Schristos #if 0
372*75f6d617Schristos /* The two subsequences differ by a single insert or delete;
373*75f6d617Schristos record it and we are done. */
374*75f6d617Schristos if (part.xmid - part.ymid < xoff - yoff)
375*75f6d617Schristos files[1].changed[files[1].realindexes[part.ymid - 1]] = 1;
376*75f6d617Schristos else
377*75f6d617Schristos files[0].changed[files[0].realindexes[part.xmid]] = 1;
378*75f6d617Schristos #endif
379*75f6d617Schristos }
380*75f6d617Schristos else
381*75f6d617Schristos {
382*75f6d617Schristos /* Use the partitions to split this problem into subproblems. */
383*75f6d617Schristos compareseq (xoff, part.xmid, yoff, part.ymid, part.lo_minimal);
384*75f6d617Schristos compareseq (part.xmid, xlim, part.ymid, ylim, part.hi_minimal);
385*75f6d617Schristos }
386*75f6d617Schristos }
387*75f6d617Schristos }
388*75f6d617Schristos
389*75f6d617Schristos /* Discard lines from one file that have no matches in the other file.
390*75f6d617Schristos
391*75f6d617Schristos A line which is discarded will not be considered by the actual
392*75f6d617Schristos comparison algorithm; it will be as if that line were not in the file.
393*75f6d617Schristos The file's `realindexes' table maps virtual line numbers
394*75f6d617Schristos (which don't count the discarded lines) into real line numbers;
395*75f6d617Schristos this is how the actual comparison algorithm produces results
396*75f6d617Schristos that are comprehensible when the discarded lines are counted.
397*75f6d617Schristos
398*75f6d617Schristos When we discard a line, we also mark it as a deletion or insertion
399*75f6d617Schristos so that it will be printed in the output. */
400*75f6d617Schristos
401*75f6d617Schristos static void
discard_confusing_lines(struct file_data filevec[])402*75f6d617Schristos discard_confusing_lines (struct file_data filevec[])
403*75f6d617Schristos {
404*75f6d617Schristos int f;
405*75f6d617Schristos lin i;
406*75f6d617Schristos char *discarded[2];
407*75f6d617Schristos lin *equiv_count[2];
408*75f6d617Schristos lin *p;
409*75f6d617Schristos
410*75f6d617Schristos /* Allocate our results. */
411*75f6d617Schristos p = xmalloc ((filevec[0].buffered_lines + filevec[1].buffered_lines)
412*75f6d617Schristos * (2 * sizeof *p));
413*75f6d617Schristos for (f = 0; f < 2; f++)
414*75f6d617Schristos {
415*75f6d617Schristos filevec[f].undiscarded = p; p += filevec[f].buffered_lines;
416*75f6d617Schristos filevec[f].realindexes = p; p += filevec[f].buffered_lines;
417*75f6d617Schristos }
418*75f6d617Schristos
419*75f6d617Schristos /* Set up equiv_count[F][I] as the number of lines in file F
420*75f6d617Schristos that fall in equivalence class I. */
421*75f6d617Schristos
422*75f6d617Schristos p = zalloc (filevec[0].equiv_max * (2 * sizeof *p));
423*75f6d617Schristos equiv_count[0] = p;
424*75f6d617Schristos equiv_count[1] = p + filevec[0].equiv_max;
425*75f6d617Schristos
426*75f6d617Schristos for (i = 0; i < filevec[0].buffered_lines; ++i)
427*75f6d617Schristos ++equiv_count[0][filevec[0].equivs[i]];
428*75f6d617Schristos for (i = 0; i < filevec[1].buffered_lines; ++i)
429*75f6d617Schristos ++equiv_count[1][filevec[1].equivs[i]];
430*75f6d617Schristos
431*75f6d617Schristos /* Set up tables of which lines are going to be discarded. */
432*75f6d617Schristos
433*75f6d617Schristos discarded[0] = zalloc (filevec[0].buffered_lines
434*75f6d617Schristos + filevec[1].buffered_lines);
435*75f6d617Schristos discarded[1] = discarded[0] + filevec[0].buffered_lines;
436*75f6d617Schristos
437*75f6d617Schristos /* Mark to be discarded each line that matches no line of the other file.
438*75f6d617Schristos If a line matches many lines, mark it as provisionally discardable. */
439*75f6d617Schristos
440*75f6d617Schristos for (f = 0; f < 2; f++)
441*75f6d617Schristos {
442*75f6d617Schristos size_t end = filevec[f].buffered_lines;
443*75f6d617Schristos char *discards = discarded[f];
444*75f6d617Schristos lin *counts = equiv_count[1 - f];
445*75f6d617Schristos lin *equivs = filevec[f].equivs;
446*75f6d617Schristos size_t many = 5;
447*75f6d617Schristos size_t tem = end / 64;
448*75f6d617Schristos
449*75f6d617Schristos /* Multiply MANY by approximate square root of number of lines.
450*75f6d617Schristos That is the threshold for provisionally discardable lines. */
451*75f6d617Schristos while ((tem = tem >> 2) > 0)
452*75f6d617Schristos many *= 2;
453*75f6d617Schristos
454*75f6d617Schristos for (i = 0; i < end; i++)
455*75f6d617Schristos {
456*75f6d617Schristos lin nmatch;
457*75f6d617Schristos if (equivs[i] == 0)
458*75f6d617Schristos continue;
459*75f6d617Schristos nmatch = counts[equivs[i]];
460*75f6d617Schristos if (nmatch == 0)
461*75f6d617Schristos discards[i] = 1;
462*75f6d617Schristos else if (nmatch > many)
463*75f6d617Schristos discards[i] = 2;
464*75f6d617Schristos }
465*75f6d617Schristos }
466*75f6d617Schristos
467*75f6d617Schristos /* Don't really discard the provisional lines except when they occur
468*75f6d617Schristos in a run of discardables, with nonprovisionals at the beginning
469*75f6d617Schristos and end. */
470*75f6d617Schristos
471*75f6d617Schristos for (f = 0; f < 2; f++)
472*75f6d617Schristos {
473*75f6d617Schristos lin end = filevec[f].buffered_lines;
474*75f6d617Schristos register char *discards = discarded[f];
475*75f6d617Schristos
476*75f6d617Schristos for (i = 0; i < end; i++)
477*75f6d617Schristos {
478*75f6d617Schristos /* Cancel provisional discards not in middle of run of discards. */
479*75f6d617Schristos if (discards[i] == 2)
480*75f6d617Schristos discards[i] = 0;
481*75f6d617Schristos else if (discards[i] != 0)
482*75f6d617Schristos {
483*75f6d617Schristos /* We have found a nonprovisional discard. */
484*75f6d617Schristos register lin j;
485*75f6d617Schristos lin length;
486*75f6d617Schristos lin provisional = 0;
487*75f6d617Schristos
488*75f6d617Schristos /* Find end of this run of discardable lines.
489*75f6d617Schristos Count how many are provisionally discardable. */
490*75f6d617Schristos for (j = i; j < end; j++)
491*75f6d617Schristos {
492*75f6d617Schristos if (discards[j] == 0)
493*75f6d617Schristos break;
494*75f6d617Schristos if (discards[j] == 2)
495*75f6d617Schristos ++provisional;
496*75f6d617Schristos }
497*75f6d617Schristos
498*75f6d617Schristos /* Cancel provisional discards at end, and shrink the run. */
499*75f6d617Schristos while (j > i && discards[j - 1] == 2)
500*75f6d617Schristos discards[--j] = 0, --provisional;
501*75f6d617Schristos
502*75f6d617Schristos /* Now we have the length of a run of discardable lines
503*75f6d617Schristos whose first and last are not provisional. */
504*75f6d617Schristos length = j - i;
505*75f6d617Schristos
506*75f6d617Schristos /* If 1/4 of the lines in the run are provisional,
507*75f6d617Schristos cancel discarding of all provisional lines in the run. */
508*75f6d617Schristos if (provisional * 4 > length)
509*75f6d617Schristos {
510*75f6d617Schristos while (j > i)
511*75f6d617Schristos if (discards[--j] == 2)
512*75f6d617Schristos discards[j] = 0;
513*75f6d617Schristos }
514*75f6d617Schristos else
515*75f6d617Schristos {
516*75f6d617Schristos register lin consec;
517*75f6d617Schristos lin minimum = 1;
518*75f6d617Schristos lin tem = length >> 2;
519*75f6d617Schristos
520*75f6d617Schristos /* MINIMUM is approximate square root of LENGTH/4.
521*75f6d617Schristos A subrun of two or more provisionals can stand
522*75f6d617Schristos when LENGTH is at least 16.
523*75f6d617Schristos A subrun of 4 or more can stand when LENGTH >= 64. */
524*75f6d617Schristos while (0 < (tem >>= 2))
525*75f6d617Schristos minimum <<= 1;
526*75f6d617Schristos minimum++;
527*75f6d617Schristos
528*75f6d617Schristos /* Cancel any subrun of MINIMUM or more provisionals
529*75f6d617Schristos within the larger run. */
530*75f6d617Schristos for (j = 0, consec = 0; j < length; j++)
531*75f6d617Schristos if (discards[i + j] != 2)
532*75f6d617Schristos consec = 0;
533*75f6d617Schristos else if (minimum == ++consec)
534*75f6d617Schristos /* Back up to start of subrun, to cancel it all. */
535*75f6d617Schristos j -= consec;
536*75f6d617Schristos else if (minimum < consec)
537*75f6d617Schristos discards[i + j] = 0;
538*75f6d617Schristos
539*75f6d617Schristos /* Scan from beginning of run
540*75f6d617Schristos until we find 3 or more nonprovisionals in a row
541*75f6d617Schristos or until the first nonprovisional at least 8 lines in.
542*75f6d617Schristos Until that point, cancel any provisionals. */
543*75f6d617Schristos for (j = 0, consec = 0; j < length; j++)
544*75f6d617Schristos {
545*75f6d617Schristos if (j >= 8 && discards[i + j] == 1)
546*75f6d617Schristos break;
547*75f6d617Schristos if (discards[i + j] == 2)
548*75f6d617Schristos consec = 0, discards[i + j] = 0;
549*75f6d617Schristos else if (discards[i + j] == 0)
550*75f6d617Schristos consec = 0;
551*75f6d617Schristos else
552*75f6d617Schristos consec++;
553*75f6d617Schristos if (consec == 3)
554*75f6d617Schristos break;
555*75f6d617Schristos }
556*75f6d617Schristos
557*75f6d617Schristos /* I advances to the last line of the run. */
558*75f6d617Schristos i += length - 1;
559*75f6d617Schristos
560*75f6d617Schristos /* Same thing, from end. */
561*75f6d617Schristos for (j = 0, consec = 0; j < length; j++)
562*75f6d617Schristos {
563*75f6d617Schristos if (j >= 8 && discards[i - j] == 1)
564*75f6d617Schristos break;
565*75f6d617Schristos if (discards[i - j] == 2)
566*75f6d617Schristos consec = 0, discards[i - j] = 0;
567*75f6d617Schristos else if (discards[i - j] == 0)
568*75f6d617Schristos consec = 0;
569*75f6d617Schristos else
570*75f6d617Schristos consec++;
571*75f6d617Schristos if (consec == 3)
572*75f6d617Schristos break;
573*75f6d617Schristos }
574*75f6d617Schristos }
575*75f6d617Schristos }
576*75f6d617Schristos }
577*75f6d617Schristos }
578*75f6d617Schristos
579*75f6d617Schristos /* Actually discard the lines. */
580*75f6d617Schristos for (f = 0; f < 2; f++)
581*75f6d617Schristos {
582*75f6d617Schristos char *discards = discarded[f];
583*75f6d617Schristos lin end = filevec[f].buffered_lines;
584*75f6d617Schristos lin j = 0;
585*75f6d617Schristos for (i = 0; i < end; ++i)
586*75f6d617Schristos if (minimal || discards[i] == 0)
587*75f6d617Schristos {
588*75f6d617Schristos filevec[f].undiscarded[j] = filevec[f].equivs[i];
589*75f6d617Schristos filevec[f].realindexes[j++] = i;
590*75f6d617Schristos }
591*75f6d617Schristos else
592*75f6d617Schristos filevec[f].changed[i] = 1;
593*75f6d617Schristos filevec[f].nondiscarded_lines = j;
594*75f6d617Schristos }
595*75f6d617Schristos
596*75f6d617Schristos free (discarded[0]);
597*75f6d617Schristos free (equiv_count[0]);
598*75f6d617Schristos }
599*75f6d617Schristos
600*75f6d617Schristos /* Adjust inserts/deletes of identical lines to join changes
601*75f6d617Schristos as much as possible.
602*75f6d617Schristos
603*75f6d617Schristos We do something when a run of changed lines include a
604*75f6d617Schristos line at one end and have an excluded, identical line at the other.
605*75f6d617Schristos We are free to choose which identical line is included.
606*75f6d617Schristos `compareseq' usually chooses the one at the beginning,
607*75f6d617Schristos but usually it is cleaner to consider the following identical line
608*75f6d617Schristos to be the "change". */
609*75f6d617Schristos
610*75f6d617Schristos static void
shift_boundaries(struct file_data filevec[])611*75f6d617Schristos shift_boundaries (struct file_data filevec[])
612*75f6d617Schristos {
613*75f6d617Schristos int f;
614*75f6d617Schristos
615*75f6d617Schristos for (f = 0; f < 2; f++)
616*75f6d617Schristos {
617*75f6d617Schristos bool *changed = filevec[f].changed;
618*75f6d617Schristos bool const *other_changed = filevec[1 - f].changed;
619*75f6d617Schristos lin const *equivs = filevec[f].equivs;
620*75f6d617Schristos lin i = 0;
621*75f6d617Schristos lin j = 0;
622*75f6d617Schristos lin i_end = filevec[f].buffered_lines;
623*75f6d617Schristos
624*75f6d617Schristos while (1)
625*75f6d617Schristos {
626*75f6d617Schristos lin runlength, start, corresponding;
627*75f6d617Schristos
628*75f6d617Schristos /* Scan forwards to find beginning of another run of changes.
629*75f6d617Schristos Also keep track of the corresponding point in the other file. */
630*75f6d617Schristos
631*75f6d617Schristos while (i < i_end && !changed[i])
632*75f6d617Schristos {
633*75f6d617Schristos while (other_changed[j++])
634*75f6d617Schristos continue;
635*75f6d617Schristos i++;
636*75f6d617Schristos }
637*75f6d617Schristos
638*75f6d617Schristos if (i == i_end)
639*75f6d617Schristos break;
640*75f6d617Schristos
641*75f6d617Schristos start = i;
642*75f6d617Schristos
643*75f6d617Schristos /* Find the end of this run of changes. */
644*75f6d617Schristos
645*75f6d617Schristos while (changed[++i])
646*75f6d617Schristos continue;
647*75f6d617Schristos while (other_changed[j])
648*75f6d617Schristos j++;
649*75f6d617Schristos
650*75f6d617Schristos do
651*75f6d617Schristos {
652*75f6d617Schristos /* Record the length of this run of changes, so that
653*75f6d617Schristos we can later determine whether the run has grown. */
654*75f6d617Schristos runlength = i - start;
655*75f6d617Schristos
656*75f6d617Schristos /* Move the changed region back, so long as the
657*75f6d617Schristos previous unchanged line matches the last changed one.
658*75f6d617Schristos This merges with previous changed regions. */
659*75f6d617Schristos
660*75f6d617Schristos while (start && equivs[start - 1] == equivs[i - 1])
661*75f6d617Schristos {
662*75f6d617Schristos changed[--start] = 1;
663*75f6d617Schristos changed[--i] = 0;
664*75f6d617Schristos while (changed[start - 1])
665*75f6d617Schristos start--;
666*75f6d617Schristos while (other_changed[--j])
667*75f6d617Schristos continue;
668*75f6d617Schristos }
669*75f6d617Schristos
670*75f6d617Schristos /* Set CORRESPONDING to the end of the changed run, at the last
671*75f6d617Schristos point where it corresponds to a changed run in the other file.
672*75f6d617Schristos CORRESPONDING == I_END means no such point has been found. */
673*75f6d617Schristos corresponding = other_changed[j - 1] ? i : i_end;
674*75f6d617Schristos
675*75f6d617Schristos /* Move the changed region forward, so long as the
676*75f6d617Schristos first changed line matches the following unchanged one.
677*75f6d617Schristos This merges with following changed regions.
678*75f6d617Schristos Do this second, so that if there are no merges,
679*75f6d617Schristos the changed region is moved forward as far as possible. */
680*75f6d617Schristos
681*75f6d617Schristos while (i != i_end && equivs[start] == equivs[i])
682*75f6d617Schristos {
683*75f6d617Schristos changed[start++] = 0;
684*75f6d617Schristos changed[i++] = 1;
685*75f6d617Schristos while (changed[i])
686*75f6d617Schristos i++;
687*75f6d617Schristos while (other_changed[++j])
688*75f6d617Schristos corresponding = i;
689*75f6d617Schristos }
690*75f6d617Schristos }
691*75f6d617Schristos while (runlength != i - start);
692*75f6d617Schristos
693*75f6d617Schristos /* If possible, move the fully-merged run of changes
694*75f6d617Schristos back to a corresponding run in the other file. */
695*75f6d617Schristos
696*75f6d617Schristos while (corresponding < i)
697*75f6d617Schristos {
698*75f6d617Schristos changed[--start] = 1;
699*75f6d617Schristos changed[--i] = 0;
700*75f6d617Schristos while (other_changed[--j])
701*75f6d617Schristos continue;
702*75f6d617Schristos }
703*75f6d617Schristos }
704*75f6d617Schristos }
705*75f6d617Schristos }
706*75f6d617Schristos
707*75f6d617Schristos /* Cons an additional entry onto the front of an edit script OLD.
708*75f6d617Schristos LINE0 and LINE1 are the first affected lines in the two files (origin 0).
709*75f6d617Schristos DELETED is the number of lines deleted here from file 0.
710*75f6d617Schristos INSERTED is the number of lines inserted here in file 1.
711*75f6d617Schristos
712*75f6d617Schristos If DELETED is 0 then LINE0 is the number of the line before
713*75f6d617Schristos which the insertion was done; vice versa for INSERTED and LINE1. */
714*75f6d617Schristos
715*75f6d617Schristos static struct change *
add_change(lin line0,lin line1,lin deleted,lin inserted,struct change * old)716*75f6d617Schristos add_change (lin line0, lin line1, lin deleted, lin inserted,
717*75f6d617Schristos struct change *old)
718*75f6d617Schristos {
719*75f6d617Schristos struct change *new = xmalloc (sizeof *new);
720*75f6d617Schristos
721*75f6d617Schristos new->line0 = line0;
722*75f6d617Schristos new->line1 = line1;
723*75f6d617Schristos new->inserted = inserted;
724*75f6d617Schristos new->deleted = deleted;
725*75f6d617Schristos new->link = old;
726*75f6d617Schristos return new;
727*75f6d617Schristos }
728*75f6d617Schristos
729*75f6d617Schristos /* Scan the tables of which lines are inserted and deleted,
730*75f6d617Schristos producing an edit script in reverse order. */
731*75f6d617Schristos
732*75f6d617Schristos static struct change *
build_reverse_script(struct file_data const filevec[])733*75f6d617Schristos build_reverse_script (struct file_data const filevec[])
734*75f6d617Schristos {
735*75f6d617Schristos struct change *script = 0;
736*75f6d617Schristos bool *changed0 = filevec[0].changed;
737*75f6d617Schristos bool *changed1 = filevec[1].changed;
738*75f6d617Schristos lin len0 = filevec[0].buffered_lines;
739*75f6d617Schristos lin len1 = filevec[1].buffered_lines;
740*75f6d617Schristos
741*75f6d617Schristos /* Note that changedN[len0] does exist, and is 0. */
742*75f6d617Schristos
743*75f6d617Schristos lin i0 = 0, i1 = 0;
744*75f6d617Schristos
745*75f6d617Schristos while (i0 < len0 || i1 < len1)
746*75f6d617Schristos {
747*75f6d617Schristos if (changed0[i0] | changed1[i1])
748*75f6d617Schristos {
749*75f6d617Schristos lin line0 = i0, line1 = i1;
750*75f6d617Schristos
751*75f6d617Schristos /* Find # lines changed here in each file. */
752*75f6d617Schristos while (changed0[i0]) ++i0;
753*75f6d617Schristos while (changed1[i1]) ++i1;
754*75f6d617Schristos
755*75f6d617Schristos /* Record this change. */
756*75f6d617Schristos script = add_change (line0, line1, i0 - line0, i1 - line1, script);
757*75f6d617Schristos }
758*75f6d617Schristos
759*75f6d617Schristos /* We have reached lines in the two files that match each other. */
760*75f6d617Schristos i0++, i1++;
761*75f6d617Schristos }
762*75f6d617Schristos
763*75f6d617Schristos return script;
764*75f6d617Schristos }
765*75f6d617Schristos
766*75f6d617Schristos /* Scan the tables of which lines are inserted and deleted,
767*75f6d617Schristos producing an edit script in forward order. */
768*75f6d617Schristos
769*75f6d617Schristos static struct change *
build_script(struct file_data const filevec[])770*75f6d617Schristos build_script (struct file_data const filevec[])
771*75f6d617Schristos {
772*75f6d617Schristos struct change *script = 0;
773*75f6d617Schristos bool *changed0 = filevec[0].changed;
774*75f6d617Schristos bool *changed1 = filevec[1].changed;
775*75f6d617Schristos lin i0 = filevec[0].buffered_lines, i1 = filevec[1].buffered_lines;
776*75f6d617Schristos
777*75f6d617Schristos /* Note that changedN[-1] does exist, and is 0. */
778*75f6d617Schristos
779*75f6d617Schristos while (i0 >= 0 || i1 >= 0)
780*75f6d617Schristos {
781*75f6d617Schristos if (changed0[i0 - 1] | changed1[i1 - 1])
782*75f6d617Schristos {
783*75f6d617Schristos lin line0 = i0, line1 = i1;
784*75f6d617Schristos
785*75f6d617Schristos /* Find # lines changed here in each file. */
786*75f6d617Schristos while (changed0[i0 - 1]) --i0;
787*75f6d617Schristos while (changed1[i1 - 1]) --i1;
788*75f6d617Schristos
789*75f6d617Schristos /* Record this change. */
790*75f6d617Schristos script = add_change (i0, i1, line0 - i0, line1 - i1, script);
791*75f6d617Schristos }
792*75f6d617Schristos
793*75f6d617Schristos /* We have reached lines in the two files that match each other. */
794*75f6d617Schristos i0--, i1--;
795*75f6d617Schristos }
796*75f6d617Schristos
797*75f6d617Schristos return script;
798*75f6d617Schristos }
799*75f6d617Schristos
800*75f6d617Schristos /* If CHANGES, briefly report that two files differed.
801*75f6d617Schristos Return 2 if trouble, CHANGES otherwise. */
802*75f6d617Schristos static int
briefly_report(int changes,struct file_data const filevec[])803*75f6d617Schristos briefly_report (int changes, struct file_data const filevec[])
804*75f6d617Schristos {
805*75f6d617Schristos if (changes)
806*75f6d617Schristos {
807*75f6d617Schristos char const *label0 = file_label[0] ? file_label[0] : filevec[0].name;
808*75f6d617Schristos char const *label1 = file_label[1] ? file_label[1] : filevec[1].name;
809*75f6d617Schristos
810*75f6d617Schristos if (brief)
811*75f6d617Schristos message ("Files %s and %s differ\n", label0, label1);
812*75f6d617Schristos else
813*75f6d617Schristos {
814*75f6d617Schristos message ("Binary files %s and %s differ\n", label0, label1);
815*75f6d617Schristos changes = 2;
816*75f6d617Schristos }
817*75f6d617Schristos }
818*75f6d617Schristos
819*75f6d617Schristos return changes;
820*75f6d617Schristos }
821*75f6d617Schristos
822*75f6d617Schristos /* Report the differences of two files. */
823*75f6d617Schristos int
diff_2_files(struct comparison * cmp)824*75f6d617Schristos diff_2_files (struct comparison *cmp)
825*75f6d617Schristos {
826*75f6d617Schristos lin diags;
827*75f6d617Schristos int f;
828*75f6d617Schristos struct change *e, *p;
829*75f6d617Schristos struct change *script;
830*75f6d617Schristos int changes;
831*75f6d617Schristos
832*75f6d617Schristos
833*75f6d617Schristos /* If we have detected that either file is binary,
834*75f6d617Schristos compare the two files as binary. This can happen
835*75f6d617Schristos only when the first chunk is read.
836*75f6d617Schristos Also, --brief without any --ignore-* options means
837*75f6d617Schristos we can speed things up by treating the files as binary. */
838*75f6d617Schristos
839*75f6d617Schristos if (read_files (cmp->file, files_can_be_treated_as_binary))
840*75f6d617Schristos {
841*75f6d617Schristos /* Files with different lengths must be different. */
842*75f6d617Schristos if (cmp->file[0].stat.st_size != cmp->file[1].stat.st_size
843*75f6d617Schristos && (cmp->file[0].desc < 0 || S_ISREG (cmp->file[0].stat.st_mode))
844*75f6d617Schristos && (cmp->file[1].desc < 0 || S_ISREG (cmp->file[1].stat.st_mode)))
845*75f6d617Schristos changes = 1;
846*75f6d617Schristos
847*75f6d617Schristos /* Standard input equals itself. */
848*75f6d617Schristos else if (cmp->file[0].desc == cmp->file[1].desc)
849*75f6d617Schristos changes = 0;
850*75f6d617Schristos
851*75f6d617Schristos else
852*75f6d617Schristos /* Scan both files, a buffer at a time, looking for a difference. */
853*75f6d617Schristos {
854*75f6d617Schristos /* Allocate same-sized buffers for both files. */
855*75f6d617Schristos size_t lcm_max = PTRDIFF_MAX - 1;
856*75f6d617Schristos size_t buffer_size =
857*75f6d617Schristos buffer_lcm (sizeof (word),
858*75f6d617Schristos buffer_lcm (STAT_BLOCKSIZE (cmp->file[0].stat),
859*75f6d617Schristos STAT_BLOCKSIZE (cmp->file[1].stat),
860*75f6d617Schristos lcm_max),
861*75f6d617Schristos lcm_max);
862*75f6d617Schristos for (f = 0; f < 2; f++)
863*75f6d617Schristos cmp->file[f].buffer = xrealloc (cmp->file[f].buffer, buffer_size);
864*75f6d617Schristos
865*75f6d617Schristos for (;; cmp->file[0].buffered = cmp->file[1].buffered = 0)
866*75f6d617Schristos {
867*75f6d617Schristos /* Read a buffer's worth from both files. */
868*75f6d617Schristos for (f = 0; f < 2; f++)
869*75f6d617Schristos if (0 <= cmp->file[f].desc)
870*75f6d617Schristos file_block_read (&cmp->file[f],
871*75f6d617Schristos buffer_size - cmp->file[f].buffered);
872*75f6d617Schristos
873*75f6d617Schristos /* If the buffers differ, the files differ. */
874*75f6d617Schristos if (cmp->file[0].buffered != cmp->file[1].buffered
875*75f6d617Schristos || memcmp (cmp->file[0].buffer,
876*75f6d617Schristos cmp->file[1].buffer,
877*75f6d617Schristos cmp->file[0].buffered))
878*75f6d617Schristos {
879*75f6d617Schristos changes = 1;
880*75f6d617Schristos break;
881*75f6d617Schristos }
882*75f6d617Schristos
883*75f6d617Schristos /* If we reach end of file, the files are the same. */
884*75f6d617Schristos if (cmp->file[0].buffered != buffer_size)
885*75f6d617Schristos {
886*75f6d617Schristos changes = 0;
887*75f6d617Schristos break;
888*75f6d617Schristos }
889*75f6d617Schristos }
890*75f6d617Schristos }
891*75f6d617Schristos
892*75f6d617Schristos changes = briefly_report (changes, cmp->file);
893*75f6d617Schristos }
894*75f6d617Schristos else
895*75f6d617Schristos {
896*75f6d617Schristos /* Allocate vectors for the results of comparison:
897*75f6d617Schristos a flag for each line of each file, saying whether that line
898*75f6d617Schristos is an insertion or deletion.
899*75f6d617Schristos Allocate an extra element, always 0, at each end of each vector. */
900*75f6d617Schristos
901*75f6d617Schristos size_t s = cmp->file[0].buffered_lines + cmp->file[1].buffered_lines + 4;
902*75f6d617Schristos bool *flag_space = zalloc (s * sizeof *flag_space);
903*75f6d617Schristos cmp->file[0].changed = flag_space + 1;
904*75f6d617Schristos cmp->file[1].changed = flag_space + cmp->file[0].buffered_lines + 3;
905*75f6d617Schristos
906*75f6d617Schristos /* Some lines are obviously insertions or deletions
907*75f6d617Schristos because they don't match anything. Detect them now, and
908*75f6d617Schristos avoid even thinking about them in the main comparison algorithm. */
909*75f6d617Schristos
910*75f6d617Schristos discard_confusing_lines (cmp->file);
911*75f6d617Schristos
912*75f6d617Schristos /* Now do the main comparison algorithm, considering just the
913*75f6d617Schristos undiscarded lines. */
914*75f6d617Schristos
915*75f6d617Schristos xvec = cmp->file[0].undiscarded;
916*75f6d617Schristos yvec = cmp->file[1].undiscarded;
917*75f6d617Schristos diags = (cmp->file[0].nondiscarded_lines
918*75f6d617Schristos + cmp->file[1].nondiscarded_lines + 3);
919*75f6d617Schristos fdiag = xmalloc (diags * (2 * sizeof *fdiag));
920*75f6d617Schristos bdiag = fdiag + diags;
921*75f6d617Schristos fdiag += cmp->file[1].nondiscarded_lines + 1;
922*75f6d617Schristos bdiag += cmp->file[1].nondiscarded_lines + 1;
923*75f6d617Schristos
924*75f6d617Schristos /* Set TOO_EXPENSIVE to be approximate square root of input size,
925*75f6d617Schristos bounded below by 256. */
926*75f6d617Schristos too_expensive = 1;
927*75f6d617Schristos for (; diags != 0; diags >>= 2)
928*75f6d617Schristos too_expensive <<= 1;
929*75f6d617Schristos too_expensive = MAX (256, too_expensive);
930*75f6d617Schristos
931*75f6d617Schristos files[0] = cmp->file[0];
932*75f6d617Schristos files[1] = cmp->file[1];
933*75f6d617Schristos
934*75f6d617Schristos compareseq (0, cmp->file[0].nondiscarded_lines,
935*75f6d617Schristos 0, cmp->file[1].nondiscarded_lines, minimal);
936*75f6d617Schristos
937*75f6d617Schristos free (fdiag - (cmp->file[1].nondiscarded_lines + 1));
938*75f6d617Schristos
939*75f6d617Schristos /* Modify the results slightly to make them prettier
940*75f6d617Schristos in cases where that can validly be done. */
941*75f6d617Schristos
942*75f6d617Schristos shift_boundaries (cmp->file);
943*75f6d617Schristos
944*75f6d617Schristos /* Get the results of comparison in the form of a chain
945*75f6d617Schristos of `struct change's -- an edit script. */
946*75f6d617Schristos
947*75f6d617Schristos if (output_style == OUTPUT_ED)
948*75f6d617Schristos script = build_reverse_script (cmp->file);
949*75f6d617Schristos else
950*75f6d617Schristos script = build_script (cmp->file);
951*75f6d617Schristos
952*75f6d617Schristos /* Set CHANGES if we had any diffs.
953*75f6d617Schristos If some changes are ignored, we must scan the script to decide. */
954*75f6d617Schristos if (ignore_blank_lines || ignore_regexp.fastmap)
955*75f6d617Schristos {
956*75f6d617Schristos struct change *next = script;
957*75f6d617Schristos changes = 0;
958*75f6d617Schristos
959*75f6d617Schristos while (next && changes == 0)
960*75f6d617Schristos {
961*75f6d617Schristos struct change *this, *end;
962*75f6d617Schristos lin first0, last0, first1, last1;
963*75f6d617Schristos
964*75f6d617Schristos /* Find a set of changes that belong together. */
965*75f6d617Schristos this = next;
966*75f6d617Schristos end = find_change (next);
967*75f6d617Schristos
968*75f6d617Schristos /* Disconnect them from the rest of the changes, making them
969*75f6d617Schristos a hunk, and remember the rest for next iteration. */
970*75f6d617Schristos next = end->link;
971*75f6d617Schristos end->link = 0;
972*75f6d617Schristos
973*75f6d617Schristos /* Determine whether this hunk is really a difference. */
974*75f6d617Schristos if (analyze_hunk (this, &first0, &last0, &first1, &last1))
975*75f6d617Schristos changes = 1;
976*75f6d617Schristos
977*75f6d617Schristos /* Reconnect the script so it will all be freed properly. */
978*75f6d617Schristos end->link = next;
979*75f6d617Schristos }
980*75f6d617Schristos }
981*75f6d617Schristos else
982*75f6d617Schristos changes = (script != 0);
983*75f6d617Schristos
984*75f6d617Schristos if (brief)
985*75f6d617Schristos changes = briefly_report (changes, cmp->file);
986*75f6d617Schristos else
987*75f6d617Schristos {
988*75f6d617Schristos if (changes | !no_diff_means_no_output)
989*75f6d617Schristos {
990*75f6d617Schristos /* Record info for starting up output,
991*75f6d617Schristos to be used if and when we have some output to print. */
992*75f6d617Schristos setup_output (file_label[0] ? file_label[0] : cmp->file[0].name,
993*75f6d617Schristos file_label[1] ? file_label[1] : cmp->file[1].name,
994*75f6d617Schristos cmp->parent != 0);
995*75f6d617Schristos
996*75f6d617Schristos switch (output_style)
997*75f6d617Schristos {
998*75f6d617Schristos case OUTPUT_CONTEXT:
999*75f6d617Schristos print_context_script (script, 0);
1000*75f6d617Schristos break;
1001*75f6d617Schristos
1002*75f6d617Schristos case OUTPUT_UNIFIED:
1003*75f6d617Schristos print_context_script (script, 1);
1004*75f6d617Schristos break;
1005*75f6d617Schristos
1006*75f6d617Schristos case OUTPUT_ED:
1007*75f6d617Schristos print_ed_script (script);
1008*75f6d617Schristos break;
1009*75f6d617Schristos
1010*75f6d617Schristos case OUTPUT_FORWARD_ED:
1011*75f6d617Schristos pr_forward_ed_script (script);
1012*75f6d617Schristos break;
1013*75f6d617Schristos
1014*75f6d617Schristos case OUTPUT_RCS:
1015*75f6d617Schristos print_rcs_script (script);
1016*75f6d617Schristos break;
1017*75f6d617Schristos
1018*75f6d617Schristos case OUTPUT_NORMAL:
1019*75f6d617Schristos print_normal_script (script);
1020*75f6d617Schristos break;
1021*75f6d617Schristos
1022*75f6d617Schristos case OUTPUT_IFDEF:
1023*75f6d617Schristos print_ifdef_script (script);
1024*75f6d617Schristos break;
1025*75f6d617Schristos
1026*75f6d617Schristos case OUTPUT_SDIFF:
1027*75f6d617Schristos print_sdiff_script (script);
1028*75f6d617Schristos break;
1029*75f6d617Schristos
1030*75f6d617Schristos default:
1031*75f6d617Schristos abort ();
1032*75f6d617Schristos }
1033*75f6d617Schristos
1034*75f6d617Schristos finish_output ();
1035*75f6d617Schristos }
1036*75f6d617Schristos }
1037*75f6d617Schristos
1038*75f6d617Schristos free (cmp->file[0].undiscarded);
1039*75f6d617Schristos
1040*75f6d617Schristos free (flag_space);
1041*75f6d617Schristos
1042*75f6d617Schristos for (f = 0; f < 2; f++)
1043*75f6d617Schristos {
1044*75f6d617Schristos free (cmp->file[f].equivs);
1045*75f6d617Schristos free (cmp->file[f].linbuf + cmp->file[f].linbuf_base);
1046*75f6d617Schristos }
1047*75f6d617Schristos
1048*75f6d617Schristos for (e = script; e; e = p)
1049*75f6d617Schristos {
1050*75f6d617Schristos p = e->link;
1051*75f6d617Schristos free (e);
1052*75f6d617Schristos }
1053*75f6d617Schristos
1054*75f6d617Schristos if (! ROBUST_OUTPUT_STYLE (output_style))
1055*75f6d617Schristos for (f = 0; f < 2; ++f)
1056*75f6d617Schristos if (cmp->file[f].missing_newline)
1057*75f6d617Schristos {
1058*75f6d617Schristos error (0, 0, "%s: %s\n",
1059*75f6d617Schristos file_label[f] ? file_label[f] : cmp->file[f].name,
1060*75f6d617Schristos _("No newline at end of file"));
1061*75f6d617Schristos changes = 2;
1062*75f6d617Schristos }
1063*75f6d617Schristos }
1064*75f6d617Schristos
1065*75f6d617Schristos if (cmp->file[0].buffer != cmp->file[1].buffer)
1066*75f6d617Schristos free (cmp->file[0].buffer);
1067*75f6d617Schristos free (cmp->file[1].buffer);
1068*75f6d617Schristos
1069*75f6d617Schristos return changes;
1070*75f6d617Schristos }
1071