xref: /openbsd-src/usr.bin/diff/diffreg.c (revision 8d981b0011332816f7aaa62238a8165c5a45cfbe)
1 /*	$OpenBSD: diffreg.c,v 1.63 2006/02/16 08:15:05 otto Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #ifndef lint
68 static const char rcsid[] = "$OpenBSD: diffreg.c,v 1.63 2006/02/16 08:15:05 otto Exp $";
69 #endif /* not lint */
70 
71 #include <sys/param.h>
72 #include <sys/stat.h>
73 #include <sys/wait.h>
74 
75 #include <ctype.h>
76 #include <err.h>
77 #include <errno.h>
78 #include <fcntl.h>
79 #include <stddef.h>
80 #include <stdio.h>
81 #include <stdlib.h>
82 #include <string.h>
83 #include <unistd.h>
84 
85 #include "diff.h"
86 #include "pathnames.h"
87 
88 /*
89  * diff - compare two files.
90  */
91 
92 /*
93  *	Uses an algorithm due to Harold Stone, which finds
94  *	a pair of longest identical subsequences in the two
95  *	files.
96  *
97  *	The major goal is to generate the match vector J.
98  *	J[i] is the index of the line in file1 corresponding
99  *	to line i file0. J[i] = 0 if there is no
100  *	such line in file1.
101  *
102  *	Lines are hashed so as to work in core. All potential
103  *	matches are located by sorting the lines of each file
104  *	on the hash (called ``value''). In particular, this
105  *	collects the equivalence classes in file1 together.
106  *	Subroutine equiv replaces the value of each line in
107  *	file0 by the index of the first element of its
108  *	matching equivalence in (the reordered) file1.
109  *	To save space equiv squeezes file1 into a single
110  *	array member in which the equivalence classes
111  *	are simply concatenated, except that their first
112  *	members are flagged by changing sign.
113  *
114  *	Next the indices that point into member are unsorted into
115  *	array class according to the original order of file0.
116  *
117  *	The cleverness lies in routine stone. This marches
118  *	through the lines of file0, developing a vector klist
119  *	of "k-candidates". At step i a k-candidate is a matched
120  *	pair of lines x,y (x in file0 y in file1) such that
121  *	there is a common subsequence of length k
122  *	between the first i lines of file0 and the first y
123  *	lines of file1, but there is no such subsequence for
124  *	any smaller y. x is the earliest possible mate to y
125  *	that occurs in such a subsequence.
126  *
127  *	Whenever any of the members of the equivalence class of
128  *	lines in file1 matable to a line in file0 has serial number
129  *	less than the y of some k-candidate, that k-candidate
130  *	with the smallest such y is replaced. The new
131  *	k-candidate is chained (via pred) to the current
132  *	k-1 candidate so that the actual subsequence can
133  *	be recovered. When a member has serial number greater
134  *	that the y of all k-candidates, the klist is extended.
135  *	At the end, the longest subsequence is pulled out
136  *	and placed in the array J by unravel
137  *
138  *	With J in hand, the matches there recorded are
139  *	check'ed against reality to assure that no spurious
140  *	matches have crept in due to hashing. If they have,
141  *	they are broken, and "jackpot" is recorded--a harmless
142  *	matter except that a true match for a spuriously
143  *	mated line may now be unnecessarily reported as a change.
144  *
145  *	Much of the complexity of the program comes simply
146  *	from trying to minimize core utilization and
147  *	maximize the range of doable problems by dynamically
148  *	allocating what is needed and reusing what is not.
149  *	The core requirements for problems larger than somewhat
150  *	are (in words) 2*length(file0) + length(file1) +
151  *	3*(number of k-candidates installed),  typically about
152  *	6n words for files of length n.
153  */
154 
155 struct cand {
156 	int x;
157 	int y;
158 	int pred;
159 };
160 
161 struct line {
162 	int serial;
163 	int value;
164 } *file[2];
165 
166 /*
167  * The following struct is used to record change information when
168  * doing a "context" or "unified" diff.  (see routine "change" to
169  * understand the highly mnemonic field names)
170  */
171 struct context_vec {
172 	int a;			/* start line in old file */
173 	int b;			/* end line in old file */
174 	int c;			/* start line in new file */
175 	int d;			/* end line in new file */
176 };
177 
178 static int  *J;			/* will be overlaid on class */
179 static int  *class;		/* will be overlaid on file[0] */
180 static int  *klist;		/* will be overlaid on file[0] after class */
181 static int  *member;		/* will be overlaid on file[1] */
182 static int   clen;
183 static int   inifdef;		/* whether or not we are in a #ifdef block */
184 static int   len[2];
185 static int   pref, suff;	/* length of prefix and suffix */
186 static int   slen[2];
187 static int   anychange;
188 static long *ixnew;		/* will be overlaid on file[1] */
189 static long *ixold;		/* will be overlaid on klist */
190 static struct cand *clist;	/* merely a free storage pot for candidates */
191 static int   clistlen;		/* the length of clist */
192 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
193 static u_char *chrtran;		/* translation table for case-folding */
194 static struct context_vec *context_vec_start;
195 static struct context_vec *context_vec_end;
196 static struct context_vec *context_vec_ptr;
197 
198 #define FUNCTION_CONTEXT_SIZE	41
199 static char lastbuf[FUNCTION_CONTEXT_SIZE];
200 static int lastline;
201 static int lastmatchline;
202 
203 static FILE *opentemp(const char *);
204 static void output(char *, FILE *, char *, FILE *);
205 static void check(char *, FILE *, char *, FILE *);
206 static void range(int, int, char *);
207 static void uni_range(int, int);
208 static void dump_context_vec(FILE *, FILE *);
209 static void dump_unified_vec(FILE *, FILE *);
210 static void prepare(int, FILE *, off_t);
211 static void prune(void);
212 static void equiv(struct line *, int, struct line *, int, int *);
213 static void unravel(int);
214 static void unsort(struct line *, int, int *);
215 static void change(char *, FILE *, char *, FILE *, int, int, int, int);
216 static void sort(struct line *, int);
217 static void print_header(const char *, const char *);
218 static int  ignoreline(char *);
219 static int  asciifile(FILE *);
220 static int  fetch(long *, int, int, FILE *, int, int);
221 static int  newcand(int, int, int);
222 static int  search(int *, int, int);
223 static int  skipline(FILE *);
224 static int  isqrt(int);
225 static int  stone(int *, int, int *, int *);
226 static int  readhash(FILE *);
227 static int  files_differ(FILE *, FILE *, int);
228 static __inline int min(int, int);
229 static __inline int max(int, int);
230 static char *match_function(const long *, int, FILE *);
231 static char *preadline(int, size_t, off_t);
232 
233 
234 /*
235  * chrtran points to one of 2 translation tables: cup2low if folding upper to
236  * lower case clow2low if not folding case
237  */
238 u_char clow2low[256] = {
239 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
240 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
241 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
242 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
243 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
244 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
245 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
246 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
247 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
248 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
249 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
250 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
251 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
252 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
253 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
254 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
255 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
256 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
257 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
258 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
259 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
260 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
261 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
262 	0xfd, 0xfe, 0xff
263 };
264 
265 u_char cup2low[256] = {
266 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
267 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
268 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
269 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
270 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
271 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
272 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
273 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
274 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
275 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
276 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
277 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
278 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
279 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
280 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
281 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
282 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
283 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
284 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
285 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
286 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
287 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
288 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
289 	0xfd, 0xfe, 0xff
290 };
291 
292 int
293 diffreg(char *ofile1, char *ofile2, int flags)
294 {
295 	char *file1 = ofile1;
296 	char *file2 = ofile2;
297 	FILE *f1 = NULL;
298 	FILE *f2 = NULL;
299 	int rval = D_SAME;
300 	int i, ostdout = -1;
301 	pid_t pid = -1;
302 
303 	anychange = 0;
304 	lastline = 0;
305 	lastmatchline = 0;
306 	context_vec_ptr = context_vec_start - 1;
307 	chrtran = (iflag ? cup2low : clow2low);
308 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
309 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
310 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
311 		goto closem;
312 
313 	if (flags & D_EMPTY1)
314 		f1 = fopen(_PATH_DEVNULL, "r");
315 	else {
316 		if (!S_ISREG(stb1.st_mode)) {
317 			if ((f1 = opentemp(file1)) == NULL ||
318 			    fstat(fileno(f1), &stb1) < 0) {
319 				warn("%s", file1);
320 				status |= 2;
321 				goto closem;
322 			}
323 		} else if (strcmp(file1, "-") == 0)
324 			f1 = stdin;
325 		else
326 			f1 = fopen(file1, "r");
327 	}
328 	if (f1 == NULL) {
329 		warn("%s", file1);
330 		status |= 2;
331 		goto closem;
332 	}
333 
334 	if (flags & D_EMPTY2)
335 		f2 = fopen(_PATH_DEVNULL, "r");
336 	else {
337 		if (!S_ISREG(stb2.st_mode)) {
338 			if ((f2 = opentemp(file2)) == NULL ||
339 			    fstat(fileno(f2), &stb2) < 0) {
340 				warn("%s", file2);
341 				status |= 2;
342 				goto closem;
343 			}
344 		} else if (strcmp(file2, "-") == 0)
345 			f2 = stdin;
346 		else
347 			f2 = fopen(file2, "r");
348 	}
349 	if (f2 == NULL) {
350 		warn("%s", file2);
351 		status |= 2;
352 		goto closem;
353 	}
354 
355 	switch (files_differ(f1, f2, flags)) {
356 	case 0:
357 		goto closem;
358 	case 1:
359 		break;
360 	default:
361 		/* error */
362 		status |= 2;
363 		goto closem;
364 	}
365 
366 	if (!asciifile(f1) || !asciifile(f2)) {
367 		rval = D_BINARY;
368 		status |= 1;
369 		goto closem;
370 	}
371 	if (lflag) {
372 		/* redirect stdout to pr */
373 		int pfd[2];
374 		char *header;
375 		char *prargv[] = { "pr", "-h", NULL, "-f", NULL };
376 
377 		easprintf(&header, "%s %s %s", diffargs, file1, file2);
378 		prargv[2] = header;
379 		fflush(stdout);
380 		rewind(stdout);
381 		pipe(pfd);
382 		switch ((pid = fork())) {
383 		case -1:
384 			warnx("No more processes");
385 			status |= 2;
386 			free(header);
387 			return (D_ERROR);
388 		case 0:
389 			/* child */
390 			if (pfd[0] != STDIN_FILENO) {
391 				dup2(pfd[0], STDIN_FILENO);
392 				close(pfd[0]);
393 			}
394 			close(pfd[1]);
395 			execv(_PATH_PR, prargv);
396 			_exit(127);
397 		default:
398 			/* parent */
399 			if (pfd[1] != STDOUT_FILENO) {
400 				ostdout = dup(STDOUT_FILENO);
401 				dup2(pfd[1], STDOUT_FILENO);
402 				close(pfd[1]);
403 			}
404 			close(pfd[0]);
405 			rewind(stdout);
406 			free(header);
407 		}
408 	} else if (flags & D_HEADER)
409 		printf("%s %s %s\n", diffargs, file1, file2);
410 	prepare(0, f1, stb1.st_size);
411 	prepare(1, f2, stb2.st_size);
412 	prune();
413 	sort(sfile[0], slen[0]);
414 	sort(sfile[1], slen[1]);
415 
416 	member = (int *)file[1];
417 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
418 	member = erealloc(member, (slen[1] + 2) * sizeof(int));
419 
420 	class = (int *)file[0];
421 	unsort(sfile[0], slen[0], class);
422 	class = erealloc(class, (slen[0] + 2) * sizeof(int));
423 
424 	klist = emalloc((slen[0] + 2) * sizeof(int));
425 	clen = 0;
426 	clistlen = 100;
427 	clist = emalloc(clistlen * sizeof(struct cand));
428 	i = stone(class, slen[0], member, klist);
429 	free(member);
430 	free(class);
431 
432 	J = erealloc(J, (len[0] + 2) * sizeof(int));
433 	unravel(klist[i]);
434 	free(clist);
435 	free(klist);
436 
437 	ixold = erealloc(ixold, (len[0] + 2) * sizeof(long));
438 	ixnew = erealloc(ixnew, (len[1] + 2) * sizeof(long));
439 	check(file1, f1, file2, f2);
440 	output(file1, f1, file2, f2);
441 	if (ostdout != -1) {
442 		int wstatus;
443 
444 		/* close the pipe to pr and restore stdout */
445 		fflush(stdout);
446 		rewind(stdout);
447 		if (ostdout != STDOUT_FILENO) {
448 			close(STDOUT_FILENO);
449 			dup2(ostdout, STDOUT_FILENO);
450 			close(ostdout);
451 		}
452 		waitpid(pid, &wstatus, 0);
453 	}
454 closem:
455 	if (anychange) {
456 		status |= 1;
457 		if (rval == D_SAME)
458 			rval = D_DIFFER;
459 	}
460 	if (f1 != NULL)
461 		fclose(f1);
462 	if (f2 != NULL)
463 		fclose(f2);
464 	if (file1 != ofile1)
465 		free(file1);
466 	if (file2 != ofile2)
467 		free(file2);
468 	return (rval);
469 }
470 
471 /*
472  * Check to see if the given files differ.
473  * Returns 0 if they are the same, 1 if different, and -1 on error.
474  * XXX - could use code from cmp(1) [faster]
475  */
476 static int
477 files_differ(FILE *f1, FILE *f2, int flags)
478 {
479 	char buf1[BUFSIZ], buf2[BUFSIZ];
480 	size_t i, j;
481 
482 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
483 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
484 		return (1);
485 	for (;;) {
486 		i = fread(buf1, 1, sizeof(buf1), f1);
487 		j = fread(buf2, 1, sizeof(buf2), f2);
488 		if (i != j)
489 			return (1);
490 		if (i == 0 && j == 0) {
491 			if (ferror(f1) || ferror(f2))
492 				return (1);
493 			return (0);
494 		}
495 		if (memcmp(buf1, buf2, i) != 0)
496 			return (1);
497 	}
498 }
499 
500 static FILE *
501 opentemp(const char *file)
502 {
503 	char buf[BUFSIZ], *tempdir, tempfile[MAXPATHLEN];
504 	ssize_t nread;
505 	int ifd, ofd;
506 
507 	if (strcmp(file, "-") == 0)
508 		ifd = STDIN_FILENO;
509 	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
510 		return (NULL);
511 
512 	if ((tempdir = getenv("TMPDIR")) == NULL)
513 		tempdir = _PATH_TMP;
514 	if (snprintf(tempfile, sizeof(tempfile), "%s/diff.XXXXXXXX",
515 	    tempdir) >= sizeof(tempfile)) {
516 		close(ifd);
517 		errno = ENAMETOOLONG;
518 		return (NULL);
519 	}
520 
521 	if ((ofd = mkstemp(tempfile)) < 0)
522 		return (NULL);
523 	unlink(tempfile);
524 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
525 		if (write(ofd, buf, nread) != nread) {
526 			close(ifd);
527 			close(ofd);
528 			return (NULL);
529 		}
530 	}
531 	close(ifd);
532 	lseek(ofd, (off_t)0, SEEK_SET);
533 	return (fdopen(ofd, "r"));
534 }
535 
536 char *
537 splice(char *dir, char *file)
538 {
539 	char *tail, *buf;
540 
541 	if ((tail = strrchr(file, '/')) == NULL)
542 		tail = file;
543 	else
544 		tail++;
545 	easprintf(&buf, "%s/%s", dir, tail);
546 	return (buf);
547 }
548 
549 static void
550 prepare(int i, FILE *fd, off_t filesize)
551 {
552 	struct line *p;
553 	int j, h;
554 	size_t sz;
555 
556 	rewind(fd);
557 
558 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
559 	if (sz < 100)
560 		sz = 100;
561 
562 	p = emalloc((sz + 3) * sizeof(struct line));
563 	for (j = 0; (h = readhash(fd));) {
564 		if (j == sz) {
565 			sz = sz * 3 / 2;
566 			p = erealloc(p, (sz + 3) * sizeof(struct line));
567 		}
568 		p[++j].value = h;
569 	}
570 	len[i] = j;
571 	file[i] = p;
572 }
573 
574 static void
575 prune(void)
576 {
577 	int i, j;
578 
579 	for (pref = 0; pref < len[0] && pref < len[1] &&
580 	    file[0][pref + 1].value == file[1][pref + 1].value;
581 	    pref++)
582 		;
583 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
584 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
585 	    suff++)
586 		;
587 	for (j = 0; j < 2; j++) {
588 		sfile[j] = file[j] + pref;
589 		slen[j] = len[j] - pref - suff;
590 		for (i = 0; i <= slen[j]; i++)
591 			sfile[j][i].serial = i;
592 	}
593 }
594 
595 static void
596 equiv(struct line *a, int n, struct line *b, int m, int *c)
597 {
598 	int i, j;
599 
600 	i = j = 1;
601 	while (i <= n && j <= m) {
602 		if (a[i].value < b[j].value)
603 			a[i++].value = 0;
604 		else if (a[i].value == b[j].value)
605 			a[i++].value = j;
606 		else
607 			j++;
608 	}
609 	while (i <= n)
610 		a[i++].value = 0;
611 	b[m + 1].value = 0;
612 	j = 0;
613 	while (++j <= m) {
614 		c[j] = -b[j].serial;
615 		while (b[j + 1].value == b[j].value) {
616 			j++;
617 			c[j] = b[j].serial;
618 		}
619 	}
620 	c[j] = -1;
621 }
622 
623 /* Code taken from ping.c */
624 static int
625 isqrt(int n)
626 {
627 	int y, x = 1;
628 
629 	if (n == 0)
630 		return(0);
631 
632 	do { /* newton was a stinker */
633 		y = x;
634 		x = n / x;
635 		x += y;
636 		x /= 2;
637 	} while ((x - y) > 1 || (x - y) < -1);
638 
639 	return (x);
640 }
641 
642 static int
643 stone(int *a, int n, int *b, int *c)
644 {
645 	int i, k, y, j, l;
646 	int oldc, tc, oldl;
647 	u_int numtries;
648 
649 	const u_int bound = dflag ? UINT_MAX : max(256, isqrt(n));
650 
651 	k = 0;
652 	c[0] = newcand(0, 0, 0);
653 	for (i = 1; i <= n; i++) {
654 		j = a[i];
655 		if (j == 0)
656 			continue;
657 		y = -b[j];
658 		oldl = 0;
659 		oldc = c[0];
660 		numtries = 0;
661 		do {
662 			if (y <= clist[oldc].y)
663 				continue;
664 			l = search(c, k, y);
665 			if (l != oldl + 1)
666 				oldc = c[l - 1];
667 			if (l <= k) {
668 				if (clist[c[l]].y <= y)
669 					continue;
670 				tc = c[l];
671 				c[l] = newcand(i, y, oldc);
672 				oldc = tc;
673 				oldl = l;
674 				numtries++;
675 			} else {
676 				c[l] = newcand(i, y, oldc);
677 				k++;
678 				break;
679 			}
680 		} while ((y = b[++j]) > 0 && numtries < bound);
681 	}
682 	return (k);
683 }
684 
685 static int
686 newcand(int x, int y, int pred)
687 {
688 	struct cand *q;
689 
690 	if (clen == clistlen) {
691 		clistlen = clistlen * 11 / 10;
692 		clist = erealloc(clist, clistlen * sizeof(struct cand));
693 	}
694 	q = clist + clen;
695 	q->x = x;
696 	q->y = y;
697 	q->pred = pred;
698 	return (clen++);
699 }
700 
701 static int
702 search(int *c, int k, int y)
703 {
704 	int i, j, l, t;
705 
706 	if (clist[c[k]].y < y)	/* quick look for typical case */
707 		return (k + 1);
708 	i = 0;
709 	j = k + 1;
710 	while (1) {
711 		l = i + j;
712 		if ((l >>= 1) <= i)
713 			break;
714 		t = clist[c[l]].y;
715 		if (t > y)
716 			j = l;
717 		else if (t < y)
718 			i = l;
719 		else
720 			return (l);
721 	}
722 	return (l + 1);
723 }
724 
725 static void
726 unravel(int p)
727 {
728 	struct cand *q;
729 	int i;
730 
731 	for (i = 0; i <= len[0]; i++)
732 		J[i] = i <= pref ? i :
733 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
734 	for (q = clist + p; q->y != 0; q = clist + q->pred)
735 		J[q->x + pref] = q->y + pref;
736 }
737 
738 /*
739  * Check does double duty:
740  *  1.	ferret out any fortuitous correspondences due
741  *	to confounding by hashing (which result in "jackpot")
742  *  2.  collect random access indexes to the two files
743  */
744 static void
745 check(char *file1, FILE *f1, char *file2, FILE *f2)
746 {
747 	int i, j, jackpot, c, d;
748 	long ctold, ctnew;
749 
750 	rewind(f1);
751 	rewind(f2);
752 	j = 1;
753 	ixold[0] = ixnew[0] = 0;
754 	jackpot = 0;
755 	ctold = ctnew = 0;
756 	for (i = 1; i <= len[0]; i++) {
757 		if (J[i] == 0) {
758 			ixold[i] = ctold += skipline(f1);
759 			continue;
760 		}
761 		while (j < J[i]) {
762 			ixnew[j] = ctnew += skipline(f2);
763 			j++;
764 		}
765 		if (bflag || wflag || iflag) {
766 			for (;;) {
767 				c = getc(f1);
768 				d = getc(f2);
769 				/*
770 				 * GNU diff ignores a missing newline
771 				 * in one file if bflag || wflag.
772 				 */
773 				if ((bflag || wflag) &&
774 				    ((c == EOF && d == '\n') ||
775 				    (c == '\n' && d == EOF))) {
776 					break;
777 				}
778 				ctold++;
779 				ctnew++;
780 				if (bflag && isspace(c) && isspace(d)) {
781 					do {
782 						if (c == '\n')
783 							break;
784 						ctold++;
785 					} while (isspace(c = getc(f1)));
786 					do {
787 						if (d == '\n')
788 							break;
789 						ctnew++;
790 					} while (isspace(d = getc(f2)));
791 				} else if (wflag) {
792 					while (isspace(c) && c != '\n') {
793 						c = getc(f1);
794 						ctold++;
795 					}
796 					while (isspace(d) && d != '\n') {
797 						d = getc(f2);
798 						ctnew++;
799 					}
800 				}
801 				if (chrtran[c] != chrtran[d]) {
802 					jackpot++;
803 					J[i] = 0;
804 					if (c != '\n' && c != EOF)
805 						ctold += skipline(f1);
806 					if (d != '\n' && c != EOF)
807 						ctnew += skipline(f2);
808 					break;
809 				}
810 				if (c == '\n' || c == EOF)
811 					break;
812 			}
813 		} else {
814 			for (;;) {
815 				ctold++;
816 				ctnew++;
817 				if ((c = getc(f1)) != (d = getc(f2))) {
818 					/* jackpot++; */
819 					J[i] = 0;
820 					if (c != '\n' && c != EOF)
821 						ctold += skipline(f1);
822 					if (d != '\n' && c != EOF)
823 						ctnew += skipline(f2);
824 					break;
825 				}
826 				if (c == '\n' || c == EOF)
827 					break;
828 			}
829 		}
830 		ixold[i] = ctold;
831 		ixnew[j] = ctnew;
832 		j++;
833 	}
834 	for (; j <= len[1]; j++)
835 		ixnew[j] = ctnew += skipline(f2);
836 	/*
837 	 * if (jackpot)
838 	 *	fprintf(stderr, "jackpot\n");
839 	 */
840 }
841 
842 /* shellsort CACM #201 */
843 static void
844 sort(struct line *a, int n)
845 {
846 	struct line *ai, *aim, w;
847 	int j, m = 0, k;
848 
849 	if (n == 0)
850 		return;
851 	for (j = 1; j <= n; j *= 2)
852 		m = 2 * j - 1;
853 	for (m /= 2; m != 0; m /= 2) {
854 		k = n - m;
855 		for (j = 1; j <= k; j++) {
856 			for (ai = &a[j]; ai > a; ai -= m) {
857 				aim = &ai[m];
858 				if (aim < ai)
859 					break;	/* wraparound */
860 				if (aim->value > ai[0].value ||
861 				    (aim->value == ai[0].value &&
862 					aim->serial > ai[0].serial))
863 					break;
864 				w.value = ai[0].value;
865 				ai[0].value = aim->value;
866 				aim->value = w.value;
867 				w.serial = ai[0].serial;
868 				ai[0].serial = aim->serial;
869 				aim->serial = w.serial;
870 			}
871 		}
872 	}
873 }
874 
875 static void
876 unsort(struct line *f, int l, int *b)
877 {
878 	int *a, i;
879 
880 	a = emalloc((l + 1) * sizeof(int));
881 	for (i = 1; i <= l; i++)
882 		a[f[i].serial] = f[i].value;
883 	for (i = 1; i <= l; i++)
884 		b[i] = a[i];
885 	free(a);
886 }
887 
888 static int
889 skipline(FILE *f)
890 {
891 	int i, c;
892 
893 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
894 		continue;
895 	return (i);
896 }
897 
898 static void
899 output(char *file1, FILE *f1, char *file2, FILE *f2)
900 {
901 	int m, i0, i1, j0, j1;
902 
903 	rewind(f1);
904 	rewind(f2);
905 	m = len[0];
906 	J[0] = 0;
907 	J[m + 1] = len[1] + 1;
908 	if (format != D_EDIT) {
909 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
910 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
911 				i0++;
912 			j0 = J[i0 - 1] + 1;
913 			i1 = i0 - 1;
914 			while (i1 < m && J[i1 + 1] == 0)
915 				i1++;
916 			j1 = J[i1 + 1] - 1;
917 			J[i1] = j1;
918 			change(file1, f1, file2, f2, i0, i1, j0, j1);
919 		}
920 	} else {
921 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
922 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
923 				i0--;
924 			j0 = J[i0 + 1] - 1;
925 			i1 = i0 + 1;
926 			while (i1 > 1 && J[i1 - 1] == 0)
927 				i1--;
928 			j1 = J[i1 - 1] + 1;
929 			J[i1] = j1;
930 			change(file1, f1, file2, f2, i1, i0, j1, j0);
931 		}
932 	}
933 	if (m == 0)
934 		change(file1, f1, file2, f2, 1, 0, 1, len[1]);
935 	if (format == D_IFDEF) {
936 		for (;;) {
937 #define	c i0
938 			if ((c = getc(f1)) == EOF)
939 				return;
940 			putchar(c);
941 		}
942 #undef c
943 	}
944 	if (anychange != 0) {
945 		if (format == D_CONTEXT)
946 			dump_context_vec(f1, f2);
947 		else if (format == D_UNIFIED)
948 			dump_unified_vec(f1, f2);
949 	}
950 }
951 
952 static __inline void
953 range(int a, int b, char *separator)
954 {
955 	printf("%d", a > b ? b : a);
956 	if (a < b)
957 		printf("%s%d", separator, b);
958 }
959 
960 static __inline void
961 uni_range(int a, int b)
962 {
963 	if (a < b)
964 		printf("%d,%d", a, b - a + 1);
965 	else if (a == b)
966 		printf("%d", b);
967 	else
968 		printf("%d,0", b);
969 }
970 
971 static char *
972 preadline(int fd, size_t len, off_t off)
973 {
974 	char *line;
975 	ssize_t nr;
976 
977 	line = emalloc(len + 1);
978 	if ((nr = pread(fd, line, len, off)) < 0)
979 		err(1, "preadline");
980 	if (nr > 0 && line[nr-1] == '\n')
981 		nr--;
982 	line[nr] = '\0';
983 	return (line);
984 }
985 
986 static int
987 ignoreline(char *line)
988 {
989 	int ret;
990 
991 	ret = regexec(&ignore_re, line, 0, NULL, 0);
992 	free(line);
993 	return (ret == 0);	/* if it matched, it should be ignored. */
994 }
995 
996 /*
997  * Indicate that there is a difference between lines a and b of the from file
998  * to get to lines c to d of the to file.  If a is greater then b then there
999  * are no lines in the from file involved and this means that there were
1000  * lines appended (beginning at b).  If c is greater than d then there are
1001  * lines missing from the to file.
1002  */
1003 static void
1004 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d)
1005 {
1006 	static size_t max_context = 64;
1007 	int i;
1008 
1009 restart:
1010 	if (format != D_IFDEF && a > b && c > d)
1011 		return;
1012 	if (ignore_pats != NULL) {
1013 		char *line;
1014 		/*
1015 		 * All lines in the change, insert, or delete must
1016 		 * match an ignore pattern for the change to be
1017 		 * ignored.
1018 		 */
1019 		if (a <= b) {		/* Changes and deletes. */
1020 			for (i = a; i <= b; i++) {
1021 				line = preadline(fileno(f1),
1022 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1023 				if (!ignoreline(line))
1024 					goto proceed;
1025 			}
1026 		}
1027 		if (a > b || c <= d) {	/* Changes and inserts. */
1028 			for (i = c; i <= d; i++) {
1029 				line = preadline(fileno(f2),
1030 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1031 				if (!ignoreline(line))
1032 					goto proceed;
1033 			}
1034 		}
1035 		return;
1036 	}
1037 proceed:
1038 	if (format == D_CONTEXT || format == D_UNIFIED) {
1039 		/*
1040 		 * Allocate change records as needed.
1041 		 */
1042 		if (context_vec_ptr == context_vec_end - 1) {
1043 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1044 			max_context <<= 1;
1045 			context_vec_start = erealloc(context_vec_start,
1046 			    max_context * sizeof(struct context_vec));
1047 			context_vec_end = context_vec_start + max_context;
1048 			context_vec_ptr = context_vec_start + offset;
1049 		}
1050 		if (anychange == 0) {
1051 			/*
1052 			 * Print the context/unidiff header first time through.
1053 			 */
1054 			print_header(file1, file2);
1055 			anychange = 1;
1056 		} else if (a > context_vec_ptr->b + (2 * context) + 1 &&
1057 		    c > context_vec_ptr->d + (2 * context) + 1) {
1058 			/*
1059 			 * If this change is more than 'context' lines from the
1060 			 * previous change, dump the record and reset it.
1061 			 */
1062 			if (format == D_CONTEXT)
1063 				dump_context_vec(f1, f2);
1064 			else
1065 				dump_unified_vec(f1, f2);
1066 		}
1067 		context_vec_ptr++;
1068 		context_vec_ptr->a = a;
1069 		context_vec_ptr->b = b;
1070 		context_vec_ptr->c = c;
1071 		context_vec_ptr->d = d;
1072 		return;
1073 	}
1074 	if (anychange == 0)
1075 		anychange = 1;
1076 	switch (format) {
1077 
1078 	case D_BRIEF:
1079 		return;
1080 	case D_NORMAL:
1081 	case D_EDIT:
1082 		range(a, b, ",");
1083 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1084 		if (format == D_NORMAL)
1085 			range(c, d, ",");
1086 		putchar('\n');
1087 		break;
1088 	case D_REVERSE:
1089 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1090 		range(a, b, " ");
1091 		putchar('\n');
1092 		break;
1093 	case D_NREVERSE:
1094 		if (a > b)
1095 			printf("a%d %d\n", b, d - c + 1);
1096 		else {
1097 			printf("d%d %d\n", a, b - a + 1);
1098 			if (!(c > d))
1099 				/* add changed lines */
1100 				printf("a%d %d\n", b, d - c + 1);
1101 		}
1102 		break;
1103 	}
1104 	if (format == D_NORMAL || format == D_IFDEF) {
1105 		fetch(ixold, a, b, f1, '<', 1);
1106 		if (a <= b && c <= d && format == D_NORMAL)
1107 			puts("---");
1108 	}
1109 	i = fetch(ixnew, c, d, f2, format == D_NORMAL ? '>' : '\0', 0);
1110 	if (i != 0 && format == D_EDIT) {
1111 		/*
1112 		 * A non-zero return value for D_EDIT indicates that the
1113 		 * last line printed was a bare dot (".") that has been
1114 		 * escaped as ".." to prevent ed(1) from misinterpreting
1115 		 * it.  We have to add a substitute command to change this
1116 		 * back and restart where we left off.
1117 		 */
1118 		puts(".");
1119 		printf("%ds/^\\.\\././\n", a);
1120 		a += i;
1121 		c += i;
1122 		goto restart;
1123 	}
1124 	if ((format == D_EDIT || format == D_REVERSE) && c <= d)
1125 		puts(".");
1126 	if (inifdef) {
1127 		printf("#endif /* %s */\n", ifdefname);
1128 		inifdef = 0;
1129 	}
1130 }
1131 
1132 static int
1133 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile)
1134 {
1135 	int i, j, c, lastc, col, nc;
1136 
1137 	/*
1138 	 * When doing #ifdef's, copy down to current line
1139 	 * if this is the first file, so that stuff makes it to output.
1140 	 */
1141 	if (format == D_IFDEF && oldfile) {
1142 		long curpos = ftell(lb);
1143 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1144 		nc = f[a > b ? b : a - 1] - curpos;
1145 		for (i = 0; i < nc; i++)
1146 			putchar(getc(lb));
1147 	}
1148 	if (a > b)
1149 		return (0);
1150 	if (format == D_IFDEF) {
1151 		if (inifdef) {
1152 			printf("#else /* %s%s */\n",
1153 			    oldfile == 1 ? "!" : "", ifdefname);
1154 		} else {
1155 			if (oldfile)
1156 				printf("#ifndef %s\n", ifdefname);
1157 			else
1158 				printf("#ifdef %s\n", ifdefname);
1159 		}
1160 		inifdef = 1 + oldfile;
1161 	}
1162 	for (i = a; i <= b; i++) {
1163 		fseek(lb, f[i - 1], SEEK_SET);
1164 		nc = f[i] - f[i - 1];
1165 		if (format != D_IFDEF && ch != '\0') {
1166 			putchar(ch);
1167 			if (Tflag && (format == D_NORMAL || format == D_CONTEXT
1168 			    || format == D_UNIFIED))
1169 				putchar('\t');
1170 			else if (format != D_UNIFIED)
1171 				putchar(' ');
1172 		}
1173 		col = 0;
1174 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1175 			if ((c = getc(lb)) == EOF) {
1176 				if (format == D_EDIT || format == D_REVERSE ||
1177 				    format == D_NREVERSE)
1178 					warnx("No newline at end of file");
1179 				else
1180 					puts("\n\\ No newline at end of file");
1181 				return (0);
1182 			}
1183 			if (c == '\t' && tflag) {
1184 				do {
1185 					putchar(' ');
1186 				} while (++col & 7);
1187 			} else {
1188 				if (format == D_EDIT && j == 1 && c == '\n'
1189 				    && lastc == '.') {
1190 					/*
1191 					 * Don't print a bare "." line
1192 					 * since that will confuse ed(1).
1193 					 * Print ".." instead and return,
1194 					 * giving the caller an offset
1195 					 * from which to restart.
1196 					 */
1197 					puts(".");
1198 					return (i - a + 1);
1199 				}
1200 				putchar(c);
1201 				col++;
1202 			}
1203 		}
1204 	}
1205 	return (0);
1206 }
1207 
1208 /*
1209  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1210  */
1211 static int
1212 readhash(FILE *f)
1213 {
1214 	int i, t, space;
1215 	int sum;
1216 
1217 	sum = 1;
1218 	space = 0;
1219 	if (!bflag && !wflag) {
1220 		if (iflag)
1221 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1222 				if (t == EOF) {
1223 					if (i == 0)
1224 						return (0);
1225 					break;
1226 				}
1227 				sum = sum * 127 + chrtran[t];
1228 			}
1229 		else
1230 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1231 				if (t == EOF) {
1232 					if (i == 0)
1233 						return (0);
1234 					break;
1235 				}
1236 				sum = sum * 127 + t;
1237 			}
1238 	} else {
1239 		for (i = 0;;) {
1240 			switch (t = getc(f)) {
1241 			case '\t':
1242 			case '\r':
1243 			case '\v':
1244 			case '\f':
1245 			case ' ':
1246 				space++;
1247 				continue;
1248 			default:
1249 				if (space && !wflag) {
1250 					i++;
1251 					space = 0;
1252 				}
1253 				sum = sum * 127 + chrtran[t];
1254 				i++;
1255 				continue;
1256 			case EOF:
1257 				if (i == 0)
1258 					return (0);
1259 				/* FALLTHROUGH */
1260 			case '\n':
1261 				break;
1262 			}
1263 			break;
1264 		}
1265 	}
1266 	/*
1267 	 * There is a remote possibility that we end up with a zero sum.
1268 	 * Zero is used as an EOF marker, so return 1 instead.
1269 	 */
1270 	return (sum == 0 ? 1 : sum);
1271 }
1272 
1273 static int
1274 asciifile(FILE *f)
1275 {
1276 	unsigned char buf[BUFSIZ];
1277 	int i, cnt;
1278 
1279 	if (aflag || f == NULL)
1280 		return (1);
1281 
1282 	rewind(f);
1283 	cnt = fread(buf, 1, sizeof(buf), f);
1284 	for (i = 0; i < cnt; i++)
1285 		if (!isprint(buf[i]) && !isspace(buf[i]))
1286 			return (0);
1287 	return (1);
1288 }
1289 
1290 static __inline int min(int a, int b)
1291 {
1292 	return (a < b ? a : b);
1293 }
1294 
1295 static __inline int max(int a, int b)
1296 {
1297 	return (a > b ? a : b);
1298 }
1299 
1300 static char *
1301 match_function(const long *f, int pos, FILE *file)
1302 {
1303 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1304 	size_t nc;
1305 	int last = lastline;
1306 	char *p;
1307 
1308 	lastline = pos;
1309 	while (pos > last) {
1310 		fseek(file, f[pos - 1], SEEK_SET);
1311 		nc = f[pos] - f[pos - 1];
1312 		if (nc >= sizeof(buf))
1313 			nc = sizeof(buf) - 1;
1314 		nc = fread(buf, 1, nc, file);
1315 		if (nc > 0) {
1316 			buf[nc] = '\0';
1317 			p = strchr(buf, '\n');
1318 			if (p != NULL)
1319 				*p = '\0';
1320 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1321 				strlcpy(lastbuf, buf, sizeof lastbuf);
1322 				lastmatchline = pos;
1323 				return lastbuf;
1324 			}
1325 		}
1326 		pos--;
1327 	}
1328 	return lastmatchline > 0 ? lastbuf : NULL;
1329 }
1330 
1331 /* dump accumulated "context" diff changes */
1332 static void
1333 dump_context_vec(FILE *f1, FILE *f2)
1334 {
1335 	struct context_vec *cvp = context_vec_start;
1336 	int lowa, upb, lowc, upd, do_output;
1337 	int a, b, c, d;
1338 	char ch, *f;
1339 
1340 	if (context_vec_start > context_vec_ptr)
1341 		return;
1342 
1343 	b = d = 0;		/* gcc */
1344 	lowa = max(1, cvp->a - context);
1345 	upb = min(len[0], context_vec_ptr->b + context);
1346 	lowc = max(1, cvp->c - context);
1347 	upd = min(len[1], context_vec_ptr->d + context);
1348 
1349 	printf("***************");
1350 	if (pflag) {
1351 		f = match_function(ixold, lowa-1, f1);
1352 		if (f != NULL) {
1353 			putchar(' ');
1354 			fputs(f, stdout);
1355 		}
1356 	}
1357 	printf("\n*** ");
1358 	range(lowa, upb, ",");
1359 	printf(" ****\n");
1360 
1361 	/*
1362 	 * Output changes to the "old" file.  The first loop suppresses
1363 	 * output if there were no changes to the "old" file (we'll see
1364 	 * the "old" lines as context in the "new" list).
1365 	 */
1366 	do_output = 0;
1367 	for (; cvp <= context_vec_ptr; cvp++)
1368 		if (cvp->a <= cvp->b) {
1369 			cvp = context_vec_start;
1370 			do_output++;
1371 			break;
1372 		}
1373 	if (do_output) {
1374 		while (cvp <= context_vec_ptr) {
1375 			a = cvp->a;
1376 			b = cvp->b;
1377 			c = cvp->c;
1378 			d = cvp->d;
1379 
1380 			if (a <= b && c <= d)
1381 				ch = 'c';
1382 			else
1383 				ch = (a <= b) ? 'd' : 'a';
1384 
1385 			if (ch == 'a')
1386 				fetch(ixold, lowa, b, f1, ' ', 0);
1387 			else {
1388 				fetch(ixold, lowa, a - 1, f1, ' ', 0);
1389 				fetch(ixold, a, b, f1,
1390 				    ch == 'c' ? '!' : '-', 0);
1391 			}
1392 			lowa = b + 1;
1393 			cvp++;
1394 		}
1395 		fetch(ixold, b + 1, upb, f1, ' ', 0);
1396 	}
1397 	/* output changes to the "new" file */
1398 	printf("--- ");
1399 	range(lowc, upd, ",");
1400 	printf(" ----\n");
1401 
1402 	do_output = 0;
1403 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1404 		if (cvp->c <= cvp->d) {
1405 			cvp = context_vec_start;
1406 			do_output++;
1407 			break;
1408 		}
1409 	if (do_output) {
1410 		while (cvp <= context_vec_ptr) {
1411 			a = cvp->a;
1412 			b = cvp->b;
1413 			c = cvp->c;
1414 			d = cvp->d;
1415 
1416 			if (a <= b && c <= d)
1417 				ch = 'c';
1418 			else
1419 				ch = (a <= b) ? 'd' : 'a';
1420 
1421 			if (ch == 'd')
1422 				fetch(ixnew, lowc, d, f2, ' ', 0);
1423 			else {
1424 				fetch(ixnew, lowc, c - 1, f2, ' ', 0);
1425 				fetch(ixnew, c, d, f2,
1426 				    ch == 'c' ? '!' : '+', 0);
1427 			}
1428 			lowc = d + 1;
1429 			cvp++;
1430 		}
1431 		fetch(ixnew, d + 1, upd, f2, ' ', 0);
1432 	}
1433 	context_vec_ptr = context_vec_start - 1;
1434 }
1435 
1436 /* dump accumulated "unified" diff changes */
1437 static void
1438 dump_unified_vec(FILE *f1, FILE *f2)
1439 {
1440 	struct context_vec *cvp = context_vec_start;
1441 	int lowa, upb, lowc, upd;
1442 	int a, b, c, d;
1443 	char ch, *f;
1444 
1445 	if (context_vec_start > context_vec_ptr)
1446 		return;
1447 
1448 	b = d = 0;		/* gcc */
1449 	lowa = max(1, cvp->a - context);
1450 	upb = min(len[0], context_vec_ptr->b + context);
1451 	lowc = max(1, cvp->c - context);
1452 	upd = min(len[1], context_vec_ptr->d + context);
1453 
1454 	fputs("@@ -", stdout);
1455 	uni_range(lowa, upb);
1456 	fputs(" +", stdout);
1457 	uni_range(lowc, upd);
1458 	fputs(" @@", stdout);
1459 	if (pflag) {
1460 		f = match_function(ixold, lowa-1, f1);
1461 		if (f != NULL) {
1462 			putchar(' ');
1463 			fputs(f, stdout);
1464 		}
1465 	}
1466 	putchar('\n');
1467 
1468 	/*
1469 	 * Output changes in "unified" diff format--the old and new lines
1470 	 * are printed together.
1471 	 */
1472 	for (; cvp <= context_vec_ptr; cvp++) {
1473 		a = cvp->a;
1474 		b = cvp->b;
1475 		c = cvp->c;
1476 		d = cvp->d;
1477 
1478 		/*
1479 		 * c: both new and old changes
1480 		 * d: only changes in the old file
1481 		 * a: only changes in the new file
1482 		 */
1483 		if (a <= b && c <= d)
1484 			ch = 'c';
1485 		else
1486 			ch = (a <= b) ? 'd' : 'a';
1487 
1488 		switch (ch) {
1489 		case 'c':
1490 			fetch(ixold, lowa, a - 1, f1, ' ', 0);
1491 			fetch(ixold, a, b, f1, '-', 0);
1492 			fetch(ixnew, c, d, f2, '+', 0);
1493 			break;
1494 		case 'd':
1495 			fetch(ixold, lowa, a - 1, f1, ' ', 0);
1496 			fetch(ixold, a, b, f1, '-', 0);
1497 			break;
1498 		case 'a':
1499 			fetch(ixnew, lowc, c - 1, f2, ' ', 0);
1500 			fetch(ixnew, c, d, f2, '+', 0);
1501 			break;
1502 		}
1503 		lowa = b + 1;
1504 		lowc = d + 1;
1505 	}
1506 	fetch(ixnew, d + 1, upd, f2, ' ', 0);
1507 
1508 	context_vec_ptr = context_vec_start - 1;
1509 }
1510 
1511 static void
1512 print_header(const char *file1, const char *file2)
1513 {
1514 	if (label[0] != NULL)
1515 		printf("%s %s\n", format == D_CONTEXT ? "***" : "---",
1516 		    label[0]);
1517 	else
1518 		printf("%s %s\t%s", format == D_CONTEXT ? "***" : "---",
1519 		    file1, ctime(&stb1.st_mtime));
1520 	if (label[1] != NULL)
1521 		printf("%s %s\n", format == D_CONTEXT ? "---" : "+++",
1522 		    label[1]);
1523 	else
1524 		printf("%s %s\t%s", format == D_CONTEXT ? "---" : "+++",
1525 		    file2, ctime(&stb2.st_mtime));
1526 }
1527