xref: /openbsd-src/usr.bin/diff/diffreg.c (revision f02e3d868a55cf99e8681a219961f546974d2c4b)
1 /*	$OpenBSD: diffreg.c,v 1.56 2004/06/18 07:23:50 otto Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #ifndef lint
68 static const char rcsid[] = "$OpenBSD: diffreg.c,v 1.56 2004/06/18 07:23:50 otto Exp $";
69 #endif /* not lint */
70 
71 #include <sys/param.h>
72 #include <sys/stat.h>
73 #include <sys/wait.h>
74 
75 #include <ctype.h>
76 #include <err.h>
77 #include <errno.h>
78 #include <fcntl.h>
79 #include <stddef.h>
80 #include <stdio.h>
81 #include <stdlib.h>
82 #include <string.h>
83 #include <unistd.h>
84 
85 #include "diff.h"
86 #include "pathnames.h"
87 
88 /*
89  * diff - compare two files.
90  */
91 
92 /*
93  *	Uses an algorithm due to Harold Stone, which finds
94  *	a pair of longest identical subsequences in the two
95  *	files.
96  *
97  *	The major goal is to generate the match vector J.
98  *	J[i] is the index of the line in file1 corresponding
99  *	to line i file0. J[i] = 0 if there is no
100  *	such line in file1.
101  *
102  *	Lines are hashed so as to work in core. All potential
103  *	matches are located by sorting the lines of each file
104  *	on the hash (called ``value''). In particular, this
105  *	collects the equivalence classes in file1 together.
106  *	Subroutine equiv replaces the value of each line in
107  *	file0 by the index of the first element of its
108  *	matching equivalence in (the reordered) file1.
109  *	To save space equiv squeezes file1 into a single
110  *	array member in which the equivalence classes
111  *	are simply concatenated, except that their first
112  *	members are flagged by changing sign.
113  *
114  *	Next the indices that point into member are unsorted into
115  *	array class according to the original order of file0.
116  *
117  *	The cleverness lies in routine stone. This marches
118  *	through the lines of file0, developing a vector klist
119  *	of "k-candidates". At step i a k-candidate is a matched
120  *	pair of lines x,y (x in file0 y in file1) such that
121  *	there is a common subsequence of length k
122  *	between the first i lines of file0 and the first y
123  *	lines of file1, but there is no such subsequence for
124  *	any smaller y. x is the earliest possible mate to y
125  *	that occurs in such a subsequence.
126  *
127  *	Whenever any of the members of the equivalence class of
128  *	lines in file1 matable to a line in file0 has serial number
129  *	less than the y of some k-candidate, that k-candidate
130  *	with the smallest such y is replaced. The new
131  *	k-candidate is chained (via pred) to the current
132  *	k-1 candidate so that the actual subsequence can
133  *	be recovered. When a member has serial number greater
134  *	that the y of all k-candidates, the klist is extended.
135  *	At the end, the longest subsequence is pulled out
136  *	and placed in the array J by unravel
137  *
138  *	With J in hand, the matches there recorded are
139  *	check'ed against reality to assure that no spurious
140  *	matches have crept in due to hashing. If they have,
141  *	they are broken, and "jackpot" is recorded--a harmless
142  *	matter except that a true match for a spuriously
143  *	mated line may now be unnecessarily reported as a change.
144  *
145  *	Much of the complexity of the program comes simply
146  *	from trying to minimize core utilization and
147  *	maximize the range of doable problems by dynamically
148  *	allocating what is needed and reusing what is not.
149  *	The core requirements for problems larger than somewhat
150  *	are (in words) 2*length(file0) + length(file1) +
151  *	3*(number of k-candidates installed),  typically about
152  *	6n words for files of length n.
153  */
154 
155 struct cand {
156 	int x;
157 	int y;
158 	int pred;
159 } cand;
160 
161 struct line {
162 	int serial;
163 	int value;
164 } *file[2];
165 
166 /*
167  * The following struct is used to record change information when
168  * doing a "context" or "unified" diff.  (see routine "change" to
169  * understand the highly mnemonic field names)
170  */
171 struct context_vec {
172 	int a;			/* start line in old file */
173 	int b;			/* end line in old file */
174 	int c;			/* start line in new file */
175 	int d;			/* end line in new file */
176 };
177 
178 static int  *J;			/* will be overlaid on class */
179 static int  *class;		/* will be overlaid on file[0] */
180 static int  *klist;		/* will be overlaid on file[0] after class */
181 static int  *member;		/* will be overlaid on file[1] */
182 static int   clen;
183 static int   inifdef;		/* whether or not we are in a #ifdef block */
184 static int   len[2];
185 static int   pref, suff;	/* length of prefix and suffix */
186 static int   slen[2];
187 static int   anychange;
188 static long *ixnew;		/* will be overlaid on file[1] */
189 static long *ixold;		/* will be overlaid on klist */
190 static struct cand *clist;	/* merely a free storage pot for candidates */
191 static int   clistlen;		/* the length of clist */
192 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
193 static u_char *chrtran;		/* translation table for case-folding */
194 static struct context_vec *context_vec_start;
195 static struct context_vec *context_vec_end;
196 static struct context_vec *context_vec_ptr;
197 
198 #define FUNCTION_CONTEXT_SIZE	41
199 static char lastbuf[FUNCTION_CONTEXT_SIZE];
200 static int lastline;
201 static int lastmatchline;
202 
203 static FILE *opentemp(const char *);
204 static void output(char *, FILE *, char *, FILE *);
205 static void check(char *, FILE *, char *, FILE *);
206 static void range(int, int, char *);
207 static void uni_range(int, int);
208 static void dump_context_vec(FILE *, FILE *);
209 static void dump_unified_vec(FILE *, FILE *);
210 static void prepare(int, FILE *, off_t);
211 static void prune(void);
212 static void equiv(struct line *, int, struct line *, int, int *);
213 static void unravel(int);
214 static void unsort(struct line *, int, int *);
215 static void change(char *, FILE *, char *, FILE *, int, int, int, int);
216 static void sort(struct line *, int);
217 static int  asciifile(FILE *);
218 static int  fetch(long *, int, int, FILE *, int, int);
219 static int  newcand(int, int, int);
220 static int  search(int *, int, int);
221 static int  skipline(FILE *);
222 static int  isqrt(int);
223 static int  stone(int *, int, int *, int *);
224 static int  readhash(FILE *);
225 static int  files_differ(FILE *, FILE *, int);
226 static __inline int min(int, int);
227 static __inline int max(int, int);
228 static char *match_function(const long *, int, FILE *);
229 
230 
231 /*
232  * chrtran points to one of 2 translation tables: cup2low if folding upper to
233  * lower case clow2low if not folding case
234  */
235 u_char clow2low[256] = {
236 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
237 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
238 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
239 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
240 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
241 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
242 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
243 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
244 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
245 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
246 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
247 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
248 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
249 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
250 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
251 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
252 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
253 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
254 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
255 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
256 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
257 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
258 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
259 	0xfd, 0xfe, 0xff
260 };
261 
262 u_char cup2low[256] = {
263 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
264 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
265 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
266 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
267 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
268 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
269 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
270 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
271 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
272 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
273 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
274 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
275 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
276 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
277 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
278 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
279 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
280 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
281 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
282 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
283 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
284 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
285 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
286 	0xfd, 0xfe, 0xff
287 };
288 
289 int
290 diffreg(char *ofile1, char *ofile2, int flags)
291 {
292 	char *file1 = ofile1;
293 	char *file2 = ofile2;
294 	FILE *f1 = NULL;
295 	FILE *f2 = NULL;
296 	int rval = D_SAME;
297 	int i, ostdout = -1;
298 	pid_t pid = -1;
299 
300 	anychange = 0;
301 	lastline = 0;
302 	lastmatchline = 0;
303 	context_vec_ptr = context_vec_start - 1;
304 	chrtran = (iflag ? cup2low : clow2low);
305 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
306 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
307 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
308 		goto closem;
309 
310 	if (flags & D_EMPTY1)
311 		f1 = fopen(_PATH_DEVNULL, "r");
312 	else {
313 		if (!S_ISREG(stb1.st_mode)) {
314 			if ((f1 = opentemp(file1)) == NULL ||
315 			    fstat(fileno(f1), &stb1) < 0) {
316 				warn("%s", file1);
317 				status |= 2;
318 				goto closem;
319 			}
320 		} else if (strcmp(file1, "-") == 0)
321 			f1 = stdin;
322 		else
323 			f1 = fopen(file1, "r");
324 	}
325 	if (f1 == NULL) {
326 		warn("%s", file1);
327 		status |= 2;
328 		goto closem;
329 	}
330 
331 	if (flags & D_EMPTY2)
332 		f2 = fopen(_PATH_DEVNULL, "r");
333 	else {
334 		if (!S_ISREG(stb2.st_mode)) {
335 			if ((f2 = opentemp(file2)) == NULL ||
336 			    fstat(fileno(f2), &stb2) < 0) {
337 				warn("%s", file2);
338 				status |= 2;
339 				goto closem;
340 			}
341 		} else if (strcmp(file2, "-") == 0)
342 			f2 = stdin;
343 		else
344 			f2 = fopen(file2, "r");
345 	}
346 	if (f2 == NULL) {
347 		warn("%s", file2);
348 		status |= 2;
349 		goto closem;
350 	}
351 
352 	switch (files_differ(f1, f2, flags)) {
353 	case 0:
354 		goto closem;
355 	case 1:
356 		break;
357 	default:
358 		/* error */
359 		status |= 2;
360 		goto closem;
361 	}
362 
363 	if (!asciifile(f1) || !asciifile(f2)) {
364 		rval = D_BINARY;
365 		status |= 1;
366 		goto closem;
367 	}
368 	if (lflag) {
369 		/* redirect stdout to pr */
370 		int pfd[2];
371 		char *header;
372 		char *prargv[] = { "pr", "-h", NULL, "-f", NULL };
373 
374 		easprintf(&header, "%s %s %s", diffargs, file1, file2);
375 		prargv[2] = header;
376 		fflush(stdout);
377 		rewind(stdout);
378 		pipe(pfd);
379 		switch ((pid = fork())) {
380 		case -1:
381 			warnx("No more processes");
382 			status |= 2;
383 			free(header);
384 			return (D_ERROR);
385 		case 0:
386 			/* child */
387 			if (pfd[0] != STDIN_FILENO) {
388 				dup2(pfd[0], STDIN_FILENO);
389 				close(pfd[0]);
390 			}
391 			close(pfd[1]);
392 			execv(_PATH_PR, prargv);
393 			_exit(127);
394 		default:
395 			/* parent */
396 			if (pfd[1] != STDOUT_FILENO) {
397 				ostdout = dup(STDOUT_FILENO);
398 				dup2(pfd[1], STDOUT_FILENO);
399 				close(pfd[1]);
400 			}
401 			close(pfd[0]);
402 			rewind(stdout);
403 			free(header);
404 		}
405 	} else if (flags & D_HEADER)
406 		printf("%s %s %s\n", diffargs, file1, file2);
407 	prepare(0, f1, stb1.st_size);
408 	prepare(1, f2, stb2.st_size);
409 	prune();
410 	sort(sfile[0], slen[0]);
411 	sort(sfile[1], slen[1]);
412 
413 	member = (int *)file[1];
414 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
415 	member = erealloc(member, (slen[1] + 2) * sizeof(int));
416 
417 	class = (int *)file[0];
418 	unsort(sfile[0], slen[0], class);
419 	class = erealloc(class, (slen[0] + 2) * sizeof(int));
420 
421 	klist = emalloc((slen[0] + 2) * sizeof(int));
422 	clen = 0;
423 	clistlen = 100;
424 	clist = emalloc(clistlen * sizeof(cand));
425 	i = stone(class, slen[0], member, klist);
426 	free(member);
427 	free(class);
428 
429 	J = erealloc(J, (len[0] + 2) * sizeof(int));
430 	unravel(klist[i]);
431 	free(clist);
432 	free(klist);
433 
434 	ixold = erealloc(ixold, (len[0] + 2) * sizeof(long));
435 	ixnew = erealloc(ixnew, (len[1] + 2) * sizeof(long));
436 	check(file1, f1, file2, f2);
437 	output(file1, f1, file2, f2);
438 	if (ostdout != -1) {
439 		int wstatus;
440 
441 		/* close the pipe to pr and restore stdout */
442 		fflush(stdout);
443 		rewind(stdout);
444 		if (ostdout != STDOUT_FILENO) {
445 			close(STDOUT_FILENO);
446 			dup2(ostdout, STDOUT_FILENO);
447 			close(ostdout);
448 		}
449 		waitpid(pid, &wstatus, 0);
450 	}
451 closem:
452 	if (anychange) {
453 		status |= 1;
454 		if (rval == D_SAME)
455 			rval = D_DIFFER;
456 	}
457 	if (f1 != NULL)
458 		fclose(f1);
459 	if (f2 != NULL)
460 		fclose(f2);
461 	if (file1 != ofile1)
462 		free(file1);
463 	if (file2 != ofile2)
464 		free(file2);
465 	return (rval);
466 }
467 
468 /*
469  * Check to see if the given files differ.
470  * Returns 0 if they are the same, 1 if different, and -1 on error.
471  * XXX - could use code from cmp(1) [faster]
472  */
473 static int
474 files_differ(FILE *f1, FILE *f2, int flags)
475 {
476 	char buf1[BUFSIZ], buf2[BUFSIZ];
477 	size_t i, j;
478 
479 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
480 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
481 		return (1);
482 	for (;;) {
483 		i = fread(buf1, 1, sizeof(buf1), f1);
484 		j = fread(buf2, 1, sizeof(buf2), f2);
485 		if (i != j)
486 			return (1);
487 		if (i == 0 && j == 0) {
488 			if (ferror(f1) || ferror(f2))
489 				return (1);
490 			return (0);
491 		}
492 		if (memcmp(buf1, buf2, i) != 0)
493 			return (1);
494 	}
495 }
496 
497 static FILE *
498 opentemp(const char *file)
499 {
500 	char buf[BUFSIZ], *tempdir, tempfile[MAXPATHLEN];
501 	ssize_t nread;
502 	int ifd, ofd;
503 
504 	if (strcmp(file, "-") == 0)
505 		ifd = STDIN_FILENO;
506 	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
507 		return (NULL);
508 
509 	if ((tempdir = getenv("TMPDIR")) == NULL)
510 		tempdir = _PATH_TMP;
511 	if (snprintf(tempfile, sizeof(tempfile), "%s/diff.XXXXXXXX",
512 	    tempdir) >= sizeof(tempfile)) {
513 		close(ifd);
514 		errno = ENAMETOOLONG;
515 		return (NULL);
516 	}
517 
518 	if ((ofd = mkstemp(tempfile)) < 0)
519 		return (NULL);
520 	unlink(tempfile);
521 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
522 		if (write(ofd, buf, nread) != nread) {
523 			close(ifd);
524 			close(ofd);
525 			return (NULL);
526 		}
527 	}
528 	close(ifd);
529 	lseek(ofd, (off_t)0, SEEK_SET);
530 	return (fdopen(ofd, "r"));
531 }
532 
533 char *
534 splice(char *dir, char *file)
535 {
536 	char *tail, *buf;
537 
538 	if ((tail = strrchr(file, '/')) == NULL)
539 		tail = file;
540 	else
541 		tail++;
542 	easprintf(&buf, "%s/%s", dir, tail);
543 	return (buf);
544 }
545 
546 static void
547 prepare(int i, FILE *fd, off_t filesize)
548 {
549 	struct line *p;
550 	int j, h;
551 	size_t sz;
552 
553 	rewind(fd);
554 
555 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
556 	if (sz < 100)
557 		sz = 100;
558 
559 	p = emalloc((sz + 3) * sizeof(struct line));
560 	for (j = 0; (h = readhash(fd));) {
561 		if (j == sz) {
562 			sz = sz * 3 / 2;
563 			p = erealloc(p, (sz + 3) * sizeof(struct line));
564 		}
565 		p[++j].value = h;
566 	}
567 	len[i] = j;
568 	file[i] = p;
569 }
570 
571 static void
572 prune(void)
573 {
574 	int i, j;
575 
576 	for (pref = 0; pref < len[0] && pref < len[1] &&
577 	    file[0][pref + 1].value == file[1][pref + 1].value;
578 	    pref++)
579 		;
580 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
581 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
582 	    suff++)
583 		;
584 	for (j = 0; j < 2; j++) {
585 		sfile[j] = file[j] + pref;
586 		slen[j] = len[j] - pref - suff;
587 		for (i = 0; i <= slen[j]; i++)
588 			sfile[j][i].serial = i;
589 	}
590 }
591 
592 static void
593 equiv(struct line *a, int n, struct line *b, int m, int *c)
594 {
595 	int i, j;
596 
597 	i = j = 1;
598 	while (i <= n && j <= m) {
599 		if (a[i].value < b[j].value)
600 			a[i++].value = 0;
601 		else if (a[i].value == b[j].value)
602 			a[i++].value = j;
603 		else
604 			j++;
605 	}
606 	while (i <= n)
607 		a[i++].value = 0;
608 	b[m + 1].value = 0;
609 	j = 0;
610 	while (++j <= m) {
611 		c[j] = -b[j].serial;
612 		while (b[j + 1].value == b[j].value) {
613 			j++;
614 			c[j] = b[j].serial;
615 		}
616 	}
617 	c[j] = -1;
618 }
619 
620 /* Code taken from ping.c */
621 static int
622 isqrt(int n)
623 {
624 	int y, x = 1;
625 
626 	if (n == 0)
627 		return(0);
628 
629 	do { /* newton was a stinker */
630 		y = x;
631 		x = n / x;
632 		x += y;
633 		x /= 2;
634 	} while ((x - y) > 1 || (x - y) < -1);
635 
636 	return (x);
637 }
638 
639 static int
640 stone(int *a, int n, int *b, int *c)
641 {
642 	int i, k, y, j, l;
643 	int oldc, tc, oldl;
644 	u_int numtries;
645 
646 	const u_int bound = dflag ? UINT_MAX : max(256, isqrt(n));
647 
648 	k = 0;
649 	c[0] = newcand(0, 0, 0);
650 	for (i = 1; i <= n; i++) {
651 		j = a[i];
652 		if (j == 0)
653 			continue;
654 		y = -b[j];
655 		oldl = 0;
656 		oldc = c[0];
657 		numtries = 0;
658 		do {
659 			if (y <= clist[oldc].y)
660 				continue;
661 			l = search(c, k, y);
662 			if (l != oldl + 1)
663 				oldc = c[l - 1];
664 			if (l <= k) {
665 				if (clist[c[l]].y <= y)
666 					continue;
667 				tc = c[l];
668 				c[l] = newcand(i, y, oldc);
669 				oldc = tc;
670 				oldl = l;
671 				numtries++;
672 			} else {
673 				c[l] = newcand(i, y, oldc);
674 				k++;
675 				break;
676 			}
677 		} while ((y = b[++j]) > 0 && numtries < bound);
678 	}
679 	return (k);
680 }
681 
682 static int
683 newcand(int x, int y, int pred)
684 {
685 	struct cand *q;
686 
687 	if (clen == clistlen) {
688 		clistlen = clistlen * 11 / 10;
689 		clist = erealloc(clist, clistlen * sizeof(cand));
690 	}
691 	q = clist + clen;
692 	q->x = x;
693 	q->y = y;
694 	q->pred = pred;
695 	return (clen++);
696 }
697 
698 static int
699 search(int *c, int k, int y)
700 {
701 	int i, j, l, t;
702 
703 	if (clist[c[k]].y < y)	/* quick look for typical case */
704 		return (k + 1);
705 	i = 0;
706 	j = k + 1;
707 	while (1) {
708 		l = i + j;
709 		if ((l >>= 1) <= i)
710 			break;
711 		t = clist[c[l]].y;
712 		if (t > y)
713 			j = l;
714 		else if (t < y)
715 			i = l;
716 		else
717 			return (l);
718 	}
719 	return (l + 1);
720 }
721 
722 static void
723 unravel(int p)
724 {
725 	struct cand *q;
726 	int i;
727 
728 	for (i = 0; i <= len[0]; i++)
729 		J[i] = i <= pref ? i :
730 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
731 	for (q = clist + p; q->y != 0; q = clist + q->pred)
732 		J[q->x + pref] = q->y + pref;
733 }
734 
735 /*
736  * Check does double duty:
737  *  1.	ferret out any fortuitous correspondences due
738  *	to confounding by hashing (which result in "jackpot")
739  *  2.  collect random access indexes to the two files
740  */
741 static void
742 check(char *file1, FILE *f1, char *file2, FILE *f2)
743 {
744 	int i, j, jackpot, c, d;
745 	long ctold, ctnew;
746 
747 	rewind(f1);
748 	rewind(f2);
749 	j = 1;
750 	ixold[0] = ixnew[0] = 0;
751 	jackpot = 0;
752 	ctold = ctnew = 0;
753 	for (i = 1; i <= len[0]; i++) {
754 		if (J[i] == 0) {
755 			ixold[i] = ctold += skipline(f1);
756 			continue;
757 		}
758 		while (j < J[i]) {
759 			ixnew[j] = ctnew += skipline(f2);
760 			j++;
761 		}
762 		if (bflag || wflag || iflag) {
763 			for (;;) {
764 				c = getc(f1);
765 				d = getc(f2);
766 				/*
767 				 * GNU diff ignores a missing newline
768 				 * in one file if bflag || wflag.
769 				 */
770 				if ((bflag || wflag) &&
771 				    ((c == EOF && d == '\n') ||
772 				    (c == '\n' && d == EOF))) {
773 					break;
774 				}
775 				ctold++;
776 				ctnew++;
777 				if (bflag && isspace(c) && isspace(d)) {
778 					do {
779 						if (c == '\n')
780 							break;
781 						ctold++;
782 					} while (isspace(c = getc(f1)));
783 					do {
784 						if (d == '\n')
785 							break;
786 						ctnew++;
787 					} while (isspace(d = getc(f2)));
788 				} else if (wflag) {
789 					while (isspace(c) && c != '\n') {
790 						c = getc(f1);
791 						ctold++;
792 					}
793 					while (isspace(d) && d != '\n') {
794 						d = getc(f2);
795 						ctnew++;
796 					}
797 				}
798 				if (chrtran[c] != chrtran[d]) {
799 					jackpot++;
800 					J[i] = 0;
801 					if (c != '\n' && c != EOF)
802 						ctold += skipline(f1);
803 					if (d != '\n' && c != EOF)
804 						ctnew += skipline(f2);
805 					break;
806 				}
807 				if (c == '\n' || c == EOF)
808 					break;
809 			}
810 		} else {
811 			for (;;) {
812 				ctold++;
813 				ctnew++;
814 				if ((c = getc(f1)) != (d = getc(f2))) {
815 					/* jackpot++; */
816 					J[i] = 0;
817 					if (c != '\n' && c != EOF)
818 						ctold += skipline(f1);
819 					if (d != '\n' && c != EOF)
820 						ctnew += skipline(f2);
821 					break;
822 				}
823 				if (c == '\n' || c == EOF)
824 					break;
825 			}
826 		}
827 		ixold[i] = ctold;
828 		ixnew[j] = ctnew;
829 		j++;
830 	}
831 	for (; j <= len[1]; j++)
832 		ixnew[j] = ctnew += skipline(f2);
833 	/*
834 	 * if (jackpot)
835 	 *	fprintf(stderr, "jackpot\n");
836 	 */
837 }
838 
839 /* shellsort CACM #201 */
840 static void
841 sort(struct line *a, int n)
842 {
843 	struct line *ai, *aim, w;
844 	int j, m = 0, k;
845 
846 	if (n == 0)
847 		return;
848 	for (j = 1; j <= n; j *= 2)
849 		m = 2 * j - 1;
850 	for (m /= 2; m != 0; m /= 2) {
851 		k = n - m;
852 		for (j = 1; j <= k; j++) {
853 			for (ai = &a[j]; ai > a; ai -= m) {
854 				aim = &ai[m];
855 				if (aim < ai)
856 					break;	/* wraparound */
857 				if (aim->value > ai[0].value ||
858 				    (aim->value == ai[0].value &&
859 					aim->serial > ai[0].serial))
860 					break;
861 				w.value = ai[0].value;
862 				ai[0].value = aim->value;
863 				aim->value = w.value;
864 				w.serial = ai[0].serial;
865 				ai[0].serial = aim->serial;
866 				aim->serial = w.serial;
867 			}
868 		}
869 	}
870 }
871 
872 static void
873 unsort(struct line *f, int l, int *b)
874 {
875 	int *a, i;
876 
877 	a = emalloc((l + 1) * sizeof(int));
878 	for (i = 1; i <= l; i++)
879 		a[f[i].serial] = f[i].value;
880 	for (i = 1; i <= l; i++)
881 		b[i] = a[i];
882 	free(a);
883 }
884 
885 static int
886 skipline(FILE *f)
887 {
888 	int i, c;
889 
890 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
891 		continue;
892 	return (i);
893 }
894 
895 static void
896 output(char *file1, FILE *f1, char *file2, FILE *f2)
897 {
898 	int m, i0, i1, j0, j1;
899 
900 	rewind(f1);
901 	rewind(f2);
902 	m = len[0];
903 	J[0] = 0;
904 	J[m + 1] = len[1] + 1;
905 	if (format != D_EDIT) {
906 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
907 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
908 				i0++;
909 			j0 = J[i0 - 1] + 1;
910 			i1 = i0 - 1;
911 			while (i1 < m && J[i1 + 1] == 0)
912 				i1++;
913 			j1 = J[i1 + 1] - 1;
914 			J[i1] = j1;
915 			change(file1, f1, file2, f2, i0, i1, j0, j1);
916 		}
917 	} else {
918 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
919 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
920 				i0--;
921 			j0 = J[i0 + 1] - 1;
922 			i1 = i0 + 1;
923 			while (i1 > 1 && J[i1 - 1] == 0)
924 				i1--;
925 			j1 = J[i1 - 1] + 1;
926 			J[i1] = j1;
927 			change(file1, f1, file2, f2, i1, i0, j1, j0);
928 		}
929 	}
930 	if (m == 0)
931 		change(file1, f1, file2, f2, 1, 0, 1, len[1]);
932 	if (format == D_IFDEF) {
933 		for (;;) {
934 #define	c i0
935 			if ((c = getc(f1)) == EOF)
936 				return;
937 			putchar(c);
938 		}
939 #undef c
940 	}
941 	if (anychange != 0) {
942 		if (format == D_CONTEXT)
943 			dump_context_vec(f1, f2);
944 		else if (format == D_UNIFIED)
945 			dump_unified_vec(f1, f2);
946 	}
947 }
948 
949 static __inline void
950 range(int a, int b, char *separator)
951 {
952 	printf("%d", a > b ? b : a);
953 	if (a < b)
954 		printf("%s%d", separator, b);
955 }
956 
957 static __inline void
958 uni_range(int a, int b)
959 {
960 	if (a < b)
961 		printf("%d,%d", a, b - a + 1);
962 	else if (a == b)
963 		printf("%d", b);
964 	else
965 		printf("%d,0", b);
966 }
967 
968 /*
969  * Indicate that there is a difference between lines a and b of the from file
970  * to get to lines c to d of the to file.  If a is greater then b then there
971  * are no lines in the from file involved and this means that there were
972  * lines appended (beginning at b).  If c is greater than d then there are
973  * lines missing from the to file.
974  */
975 static void
976 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d)
977 {
978 	static size_t max_context = 64;
979 	int i;
980 
981 restart:
982 	if (format != D_IFDEF && a > b && c > d)
983 		return;
984 	if (format == D_CONTEXT || format == D_UNIFIED) {
985 		/*
986 		 * Allocate change records as needed.
987 		 */
988 		if (context_vec_ptr == context_vec_end - 1) {
989 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
990 			max_context <<= 1;
991 			context_vec_start = erealloc(context_vec_start,
992 			    max_context * sizeof(struct context_vec));
993 			context_vec_end = context_vec_start + max_context;
994 			context_vec_ptr = context_vec_start + offset;
995 		}
996 		if (anychange == 0) {
997 			/*
998 			 * Print the context/unidiff header first time through.
999 			 */
1000 			if (label != NULL)
1001 				printf("%s %s\n",
1002 				    format == D_CONTEXT ? "***" : "---", label);
1003 			else
1004 				printf("%s %s	%s",
1005 				    format == D_CONTEXT ? "***" : "---", file1,
1006 				    ctime(&stb1.st_mtime));
1007 			printf("%s %s	%s",
1008 			    format == D_CONTEXT ? "---" : "+++", file2,
1009 			    ctime(&stb2.st_mtime));
1010 			anychange = 1;
1011 		} else if (a > context_vec_ptr->b + (2 * context) + 1 &&
1012 		    c > context_vec_ptr->d + (2 * context) + 1) {
1013 			/*
1014 			 * If this change is more than 'context' lines from the
1015 			 * previous change, dump the record and reset it.
1016 			 */
1017 			if (format == D_CONTEXT)
1018 				dump_context_vec(f1, f2);
1019 			else
1020 				dump_unified_vec(f1, f2);
1021 		}
1022 		context_vec_ptr++;
1023 		context_vec_ptr->a = a;
1024 		context_vec_ptr->b = b;
1025 		context_vec_ptr->c = c;
1026 		context_vec_ptr->d = d;
1027 		return;
1028 	}
1029 	if (anychange == 0)
1030 		anychange = 1;
1031 	switch (format) {
1032 
1033 	case D_BRIEF:
1034 		return;
1035 	case D_NORMAL:
1036 	case D_EDIT:
1037 		range(a, b, ",");
1038 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1039 		if (format == D_NORMAL)
1040 			range(c, d, ",");
1041 		putchar('\n');
1042 		break;
1043 	case D_REVERSE:
1044 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1045 		range(a, b, " ");
1046 		putchar('\n');
1047 		break;
1048 	case D_NREVERSE:
1049 		if (a > b)
1050 			printf("a%d %d\n", b, d - c + 1);
1051 		else {
1052 			printf("d%d %d\n", a, b - a + 1);
1053 			if (!(c > d))
1054 				/* add changed lines */
1055 				printf("a%d %d\n", b, d - c + 1);
1056 		}
1057 		break;
1058 	}
1059 	if (format == D_NORMAL || format == D_IFDEF) {
1060 		fetch(ixold, a, b, f1, '<', 1);
1061 		if (a <= b && c <= d && format == D_NORMAL)
1062 			puts("---");
1063 	}
1064 	i = fetch(ixnew, c, d, f2, format == D_NORMAL ? '>' : '\0', 0);
1065 	if (i != 0 && format == D_EDIT) {
1066 		/*
1067 		 * A non-zero return value for D_EDIT indicates that the
1068 		 * last line printed was a bare dot (".") that has been
1069 		 * escaped as ".." to prevent ed(1) from misinterpreting
1070 		 * it.  We have to add a substitute command to change this
1071 		 * back and restart where we left off.
1072 		 */
1073 		puts(".");
1074 		printf("%ds/^\\.\\././\n", a);
1075 		a += i;
1076 		c += i;
1077 		goto restart;
1078 	}
1079 	if ((format == D_EDIT || format == D_REVERSE) && c <= d)
1080 		puts(".");
1081 	if (inifdef) {
1082 		printf("#endif /* %s */\n", ifdefname);
1083 		inifdef = 0;
1084 	}
1085 }
1086 
1087 static int
1088 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile)
1089 {
1090 	int i, j, c, lastc, col, nc;
1091 
1092 	/*
1093 	 * When doing #ifdef's, copy down to current line
1094 	 * if this is the first file, so that stuff makes it to output.
1095 	 */
1096 	if (format == D_IFDEF && oldfile) {
1097 		long curpos = ftell(lb);
1098 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1099 		nc = f[a > b ? b : a - 1] - curpos;
1100 		for (i = 0; i < nc; i++)
1101 			putchar(getc(lb));
1102 	}
1103 	if (a > b)
1104 		return (0);
1105 	if (format == D_IFDEF) {
1106 		if (inifdef) {
1107 			printf("#else /* %s%s */\n",
1108 			    oldfile == 1 ? "!" : "", ifdefname);
1109 		} else {
1110 			if (oldfile)
1111 				printf("#ifndef %s\n", ifdefname);
1112 			else
1113 				printf("#ifdef %s\n", ifdefname);
1114 		}
1115 		inifdef = 1 + oldfile;
1116 	}
1117 	for (i = a; i <= b; i++) {
1118 		fseek(lb, f[i - 1], SEEK_SET);
1119 		nc = f[i] - f[i - 1];
1120 		if (format != D_IFDEF && ch != '\0') {
1121 			putchar(ch);
1122 			if (Tflag && (format == D_NORMAL || format == D_CONTEXT
1123 			    || format == D_UNIFIED))
1124 				putchar('\t');
1125 			else if (format != D_UNIFIED)
1126 				putchar(' ');
1127 		}
1128 		col = 0;
1129 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1130 			if ((c = getc(lb)) == EOF) {
1131 				if (format == D_EDIT || format == D_REVERSE ||
1132 				    format == D_NREVERSE)
1133 					warnx("No newline at end of file");
1134 				else
1135 					puts("\n\\ No newline at end of file");
1136 				return (0);
1137 			}
1138 			if (c == '\t' && tflag) {
1139 				do {
1140 					putchar(' ');
1141 				} while (++col & 7);
1142 			} else {
1143 				if (format == D_EDIT && j == 1 && c == '\n'
1144 				    && lastc == '.') {
1145 					/*
1146 					 * Don't print a bare "." line
1147 					 * since that will confuse ed(1).
1148 					 * Print ".." instead and return,
1149 					 * giving the caller an offset
1150 					 * from which to restart.
1151 					 */
1152 					puts(".");
1153 					return (i - a + 1);
1154 				}
1155 				putchar(c);
1156 				col++;
1157 			}
1158 		}
1159 	}
1160 	return (0);
1161 }
1162 
1163 /*
1164  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1165  */
1166 static int
1167 readhash(FILE *f)
1168 {
1169 	int i, t, space;
1170 	int sum;
1171 
1172 	sum = 1;
1173 	space = 0;
1174 	if (!bflag && !wflag) {
1175 		if (iflag)
1176 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1177 				if (t == EOF) {
1178 					if (i == 0)
1179 						return (0);
1180 					break;
1181 				}
1182 				sum = sum * 127 + chrtran[t];
1183 			}
1184 		else
1185 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1186 				if (t == EOF) {
1187 					if (i == 0)
1188 						return (0);
1189 					break;
1190 				}
1191 				sum = sum * 127 + t;
1192 			}
1193 	} else {
1194 		for (i = 0;;) {
1195 			switch (t = getc(f)) {
1196 			case '\t':
1197 			case ' ':
1198 				space++;
1199 				continue;
1200 			default:
1201 				if (space && !wflag) {
1202 					i++;
1203 					space = 0;
1204 				}
1205 				sum = sum * 127 + chrtran[t];
1206 				i++;
1207 				continue;
1208 			case EOF:
1209 				if (i == 0)
1210 					return (0);
1211 				/* FALLTHROUGH */
1212 			case '\n':
1213 				break;
1214 			}
1215 			break;
1216 		}
1217 	}
1218 	/*
1219 	 * There is a remote possibility that we end up with a zero sum.
1220 	 * Zero is used as an EOF marker, so return 1 instead.
1221 	 */
1222 	return (sum == 0 ? 1 : sum);
1223 }
1224 
1225 static int
1226 asciifile(FILE *f)
1227 {
1228 	char buf[BUFSIZ];
1229 	int i, cnt;
1230 
1231 	if (aflag || f == NULL)
1232 		return (1);
1233 
1234 	rewind(f);
1235 	cnt = fread(buf, 1, sizeof(buf), f);
1236 	for (i = 0; i < cnt; i++)
1237 		if (!isprint(buf[i]) && !isspace(buf[i]))
1238 			return (0);
1239 	return (1);
1240 }
1241 
1242 static __inline int min(int a, int b)
1243 {
1244 	return (a < b ? a : b);
1245 }
1246 
1247 static __inline int max(int a, int b)
1248 {
1249 	return (a > b ? a : b);
1250 }
1251 
1252 static char *
1253 match_function(const long *f, int pos, FILE *file)
1254 {
1255 	char buf[FUNCTION_CONTEXT_SIZE];
1256 	size_t nc;
1257 	int last = lastline;
1258 	char *p;
1259 
1260 	lastline = pos;
1261 	while (pos > last) {
1262 		fseek(file, f[pos - 1], SEEK_SET);
1263 		nc = f[pos] - f[pos - 1];
1264 		if (nc >= sizeof(buf))
1265 			nc = sizeof(buf) - 1;
1266 		nc = fread(buf, 1, nc, file);
1267 		if (nc > 0) {
1268 			buf[nc] = '\0';
1269 			p = strchr(buf, '\n');
1270 			if (p != NULL)
1271 				*p = '\0';
1272 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1273 				strlcpy(lastbuf, buf, sizeof lastbuf);
1274 				lastmatchline = pos;
1275 				return lastbuf;
1276 			}
1277 		}
1278 		pos--;
1279 	}
1280 	return lastmatchline > 0 ? lastbuf : NULL;
1281 }
1282 
1283 /* dump accumulated "context" diff changes */
1284 static void
1285 dump_context_vec(FILE *f1, FILE *f2)
1286 {
1287 	struct context_vec *cvp = context_vec_start;
1288 	int lowa, upb, lowc, upd, do_output;
1289 	int a, b, c, d;
1290 	char ch, *f;
1291 
1292 	if (context_vec_start > context_vec_ptr)
1293 		return;
1294 
1295 	b = d = 0;		/* gcc */
1296 	lowa = max(1, cvp->a - context);
1297 	upb = min(len[0], context_vec_ptr->b + context);
1298 	lowc = max(1, cvp->c - context);
1299 	upd = min(len[1], context_vec_ptr->d + context);
1300 
1301 	printf("***************");
1302 	if (pflag) {
1303 		f = match_function(ixold, lowa-1, f1);
1304 		if (f != NULL) {
1305 			putchar(' ');
1306 			fputs(f, stdout);
1307 		}
1308 	}
1309 	printf("\n*** ");
1310 	range(lowa, upb, ",");
1311 	printf(" ****\n");
1312 
1313 	/*
1314 	 * Output changes to the "old" file.  The first loop suppresses
1315 	 * output if there were no changes to the "old" file (we'll see
1316 	 * the "old" lines as context in the "new" list).
1317 	 */
1318 	do_output = 0;
1319 	for (; cvp <= context_vec_ptr; cvp++)
1320 		if (cvp->a <= cvp->b) {
1321 			cvp = context_vec_start;
1322 			do_output++;
1323 			break;
1324 		}
1325 	if (do_output) {
1326 		while (cvp <= context_vec_ptr) {
1327 			a = cvp->a;
1328 			b = cvp->b;
1329 			c = cvp->c;
1330 			d = cvp->d;
1331 
1332 			if (a <= b && c <= d)
1333 				ch = 'c';
1334 			else
1335 				ch = (a <= b) ? 'd' : 'a';
1336 
1337 			if (ch == 'a')
1338 				fetch(ixold, lowa, b, f1, ' ', 0);
1339 			else {
1340 				fetch(ixold, lowa, a - 1, f1, ' ', 0);
1341 				fetch(ixold, a, b, f1,
1342 				    ch == 'c' ? '!' : '-', 0);
1343 			}
1344 			lowa = b + 1;
1345 			cvp++;
1346 		}
1347 		fetch(ixold, b + 1, upb, f1, ' ', 0);
1348 	}
1349 	/* output changes to the "new" file */
1350 	printf("--- ");
1351 	range(lowc, upd, ",");
1352 	printf(" ----\n");
1353 
1354 	do_output = 0;
1355 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1356 		if (cvp->c <= cvp->d) {
1357 			cvp = context_vec_start;
1358 			do_output++;
1359 			break;
1360 		}
1361 	if (do_output) {
1362 		while (cvp <= context_vec_ptr) {
1363 			a = cvp->a;
1364 			b = cvp->b;
1365 			c = cvp->c;
1366 			d = cvp->d;
1367 
1368 			if (a <= b && c <= d)
1369 				ch = 'c';
1370 			else
1371 				ch = (a <= b) ? 'd' : 'a';
1372 
1373 			if (ch == 'd')
1374 				fetch(ixnew, lowc, d, f2, ' ', 0);
1375 			else {
1376 				fetch(ixnew, lowc, c - 1, f2, ' ', 0);
1377 				fetch(ixnew, c, d, f2,
1378 				    ch == 'c' ? '!' : '+', 0);
1379 			}
1380 			lowc = d + 1;
1381 			cvp++;
1382 		}
1383 		fetch(ixnew, d + 1, upd, f2, ' ', 0);
1384 	}
1385 	context_vec_ptr = context_vec_start - 1;
1386 }
1387 
1388 /* dump accumulated "unified" diff changes */
1389 static void
1390 dump_unified_vec(FILE *f1, FILE *f2)
1391 {
1392 	struct context_vec *cvp = context_vec_start;
1393 	int lowa, upb, lowc, upd;
1394 	int a, b, c, d;
1395 	char ch, *f;
1396 
1397 	if (context_vec_start > context_vec_ptr)
1398 		return;
1399 
1400 	b = d = 0;		/* gcc */
1401 	lowa = max(1, cvp->a - context);
1402 	upb = min(len[0], context_vec_ptr->b + context);
1403 	lowc = max(1, cvp->c - context);
1404 	upd = min(len[1], context_vec_ptr->d + context);
1405 
1406 	fputs("@@ -", stdout);
1407 	uni_range(lowa, upb);
1408 	fputs(" +", stdout);
1409 	uni_range(lowc, upd);
1410 	fputs(" @@", stdout);
1411 	if (pflag) {
1412 		f = match_function(ixold, lowa-1, f1);
1413 		if (f != NULL) {
1414 			putchar(' ');
1415 			fputs(f, stdout);
1416 		}
1417 	}
1418 	putchar('\n');
1419 
1420 	/*
1421 	 * Output changes in "unified" diff format--the old and new lines
1422 	 * are printed together.
1423 	 */
1424 	for (; cvp <= context_vec_ptr; cvp++) {
1425 		a = cvp->a;
1426 		b = cvp->b;
1427 		c = cvp->c;
1428 		d = cvp->d;
1429 
1430 		/*
1431 		 * c: both new and old changes
1432 		 * d: only changes in the old file
1433 		 * a: only changes in the new file
1434 		 */
1435 		if (a <= b && c <= d)
1436 			ch = 'c';
1437 		else
1438 			ch = (a <= b) ? 'd' : 'a';
1439 
1440 		switch (ch) {
1441 		case 'c':
1442 			fetch(ixold, lowa, a - 1, f1, ' ', 0);
1443 			fetch(ixold, a, b, f1, '-', 0);
1444 			fetch(ixnew, c, d, f2, '+', 0);
1445 			break;
1446 		case 'd':
1447 			fetch(ixold, lowa, a - 1, f1, ' ', 0);
1448 			fetch(ixold, a, b, f1, '-', 0);
1449 			break;
1450 		case 'a':
1451 			fetch(ixnew, lowc, c - 1, f2, ' ', 0);
1452 			fetch(ixnew, c, d, f2, '+', 0);
1453 			break;
1454 		}
1455 		lowa = b + 1;
1456 		lowc = d + 1;
1457 	}
1458 	fetch(ixnew, d + 1, upd, f2, ' ', 0);
1459 
1460 	context_vec_ptr = context_vec_start - 1;
1461 }
1462