xref: /openbsd-src/usr.bin/diff/diffreg.c (revision c03f4b43f090ea639501a5e28eae384f0b3fa5e8)
1 /*	$OpenBSD: diffreg.c,v 1.59 2004/10/20 08:52:19 otto Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #ifndef lint
68 static const char rcsid[] = "$OpenBSD: diffreg.c,v 1.59 2004/10/20 08:52:19 otto Exp $";
69 #endif /* not lint */
70 
71 #include <sys/param.h>
72 #include <sys/stat.h>
73 #include <sys/wait.h>
74 
75 #include <ctype.h>
76 #include <err.h>
77 #include <errno.h>
78 #include <fcntl.h>
79 #include <stddef.h>
80 #include <stdio.h>
81 #include <stdlib.h>
82 #include <string.h>
83 #include <unistd.h>
84 
85 #include "diff.h"
86 #include "pathnames.h"
87 
88 /*
89  * diff - compare two files.
90  */
91 
92 /*
93  *	Uses an algorithm due to Harold Stone, which finds
94  *	a pair of longest identical subsequences in the two
95  *	files.
96  *
97  *	The major goal is to generate the match vector J.
98  *	J[i] is the index of the line in file1 corresponding
99  *	to line i file0. J[i] = 0 if there is no
100  *	such line in file1.
101  *
102  *	Lines are hashed so as to work in core. All potential
103  *	matches are located by sorting the lines of each file
104  *	on the hash (called ``value''). In particular, this
105  *	collects the equivalence classes in file1 together.
106  *	Subroutine equiv replaces the value of each line in
107  *	file0 by the index of the first element of its
108  *	matching equivalence in (the reordered) file1.
109  *	To save space equiv squeezes file1 into a single
110  *	array member in which the equivalence classes
111  *	are simply concatenated, except that their first
112  *	members are flagged by changing sign.
113  *
114  *	Next the indices that point into member are unsorted into
115  *	array class according to the original order of file0.
116  *
117  *	The cleverness lies in routine stone. This marches
118  *	through the lines of file0, developing a vector klist
119  *	of "k-candidates". At step i a k-candidate is a matched
120  *	pair of lines x,y (x in file0 y in file1) such that
121  *	there is a common subsequence of length k
122  *	between the first i lines of file0 and the first y
123  *	lines of file1, but there is no such subsequence for
124  *	any smaller y. x is the earliest possible mate to y
125  *	that occurs in such a subsequence.
126  *
127  *	Whenever any of the members of the equivalence class of
128  *	lines in file1 matable to a line in file0 has serial number
129  *	less than the y of some k-candidate, that k-candidate
130  *	with the smallest such y is replaced. The new
131  *	k-candidate is chained (via pred) to the current
132  *	k-1 candidate so that the actual subsequence can
133  *	be recovered. When a member has serial number greater
134  *	that the y of all k-candidates, the klist is extended.
135  *	At the end, the longest subsequence is pulled out
136  *	and placed in the array J by unravel
137  *
138  *	With J in hand, the matches there recorded are
139  *	check'ed against reality to assure that no spurious
140  *	matches have crept in due to hashing. If they have,
141  *	they are broken, and "jackpot" is recorded--a harmless
142  *	matter except that a true match for a spuriously
143  *	mated line may now be unnecessarily reported as a change.
144  *
145  *	Much of the complexity of the program comes simply
146  *	from trying to minimize core utilization and
147  *	maximize the range of doable problems by dynamically
148  *	allocating what is needed and reusing what is not.
149  *	The core requirements for problems larger than somewhat
150  *	are (in words) 2*length(file0) + length(file1) +
151  *	3*(number of k-candidates installed),  typically about
152  *	6n words for files of length n.
153  */
154 
155 struct cand {
156 	int x;
157 	int y;
158 	int pred;
159 };
160 
161 struct line {
162 	int serial;
163 	int value;
164 } *file[2];
165 
166 /*
167  * The following struct is used to record change information when
168  * doing a "context" or "unified" diff.  (see routine "change" to
169  * understand the highly mnemonic field names)
170  */
171 struct context_vec {
172 	int a;			/* start line in old file */
173 	int b;			/* end line in old file */
174 	int c;			/* start line in new file */
175 	int d;			/* end line in new file */
176 };
177 
178 static int  *J;			/* will be overlaid on class */
179 static int  *class;		/* will be overlaid on file[0] */
180 static int  *klist;		/* will be overlaid on file[0] after class */
181 static int  *member;		/* will be overlaid on file[1] */
182 static int   clen;
183 static int   inifdef;		/* whether or not we are in a #ifdef block */
184 static int   len[2];
185 static int   pref, suff;	/* length of prefix and suffix */
186 static int   slen[2];
187 static int   anychange;
188 static long *ixnew;		/* will be overlaid on file[1] */
189 static long *ixold;		/* will be overlaid on klist */
190 static struct cand *clist;	/* merely a free storage pot for candidates */
191 static int   clistlen;		/* the length of clist */
192 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
193 static u_char *chrtran;		/* translation table for case-folding */
194 static struct context_vec *context_vec_start;
195 static struct context_vec *context_vec_end;
196 static struct context_vec *context_vec_ptr;
197 
198 #define FUNCTION_CONTEXT_SIZE	41
199 static char lastbuf[FUNCTION_CONTEXT_SIZE];
200 static int lastline;
201 static int lastmatchline;
202 
203 static FILE *opentemp(const char *);
204 static void output(char *, FILE *, char *, FILE *);
205 static void check(char *, FILE *, char *, FILE *);
206 static void range(int, int, char *);
207 static void uni_range(int, int);
208 static void dump_context_vec(FILE *, FILE *);
209 static void dump_unified_vec(FILE *, FILE *);
210 static void prepare(int, FILE *, off_t);
211 static void prune(void);
212 static void equiv(struct line *, int, struct line *, int, int *);
213 static void unravel(int);
214 static void unsort(struct line *, int, int *);
215 static void change(char *, FILE *, char *, FILE *, int, int, int, int);
216 static void sort(struct line *, int);
217 static int  ignoreline(char *);
218 static int  asciifile(FILE *);
219 static int  fetch(long *, int, int, FILE *, int, int);
220 static int  newcand(int, int, int);
221 static int  search(int *, int, int);
222 static int  skipline(FILE *);
223 static int  isqrt(int);
224 static int  stone(int *, int, int *, int *);
225 static int  readhash(FILE *);
226 static int  files_differ(FILE *, FILE *, int);
227 static __inline int min(int, int);
228 static __inline int max(int, int);
229 static char *match_function(const long *, int, FILE *);
230 static char *preadline(int, size_t, off_t);
231 
232 
233 /*
234  * chrtran points to one of 2 translation tables: cup2low if folding upper to
235  * lower case clow2low if not folding case
236  */
237 u_char clow2low[256] = {
238 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
239 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
240 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
241 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
242 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
243 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
244 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
245 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
246 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
247 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
248 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
249 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
250 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
251 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
252 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
253 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
254 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
255 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
256 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
257 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
258 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
259 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
260 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
261 	0xfd, 0xfe, 0xff
262 };
263 
264 u_char cup2low[256] = {
265 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
266 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
267 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
268 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
269 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
270 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
271 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
272 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
273 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
274 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
275 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
276 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
277 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
278 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
279 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
280 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
281 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
282 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
283 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
284 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
285 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
286 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
287 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
288 	0xfd, 0xfe, 0xff
289 };
290 
291 int
292 diffreg(char *ofile1, char *ofile2, int flags)
293 {
294 	char *file1 = ofile1;
295 	char *file2 = ofile2;
296 	FILE *f1 = NULL;
297 	FILE *f2 = NULL;
298 	int rval = D_SAME;
299 	int i, ostdout = -1;
300 	pid_t pid = -1;
301 
302 	anychange = 0;
303 	lastline = 0;
304 	lastmatchline = 0;
305 	context_vec_ptr = context_vec_start - 1;
306 	chrtran = (iflag ? cup2low : clow2low);
307 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
308 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
309 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
310 		goto closem;
311 
312 	if (flags & D_EMPTY1)
313 		f1 = fopen(_PATH_DEVNULL, "r");
314 	else {
315 		if (!S_ISREG(stb1.st_mode)) {
316 			if ((f1 = opentemp(file1)) == NULL ||
317 			    fstat(fileno(f1), &stb1) < 0) {
318 				warn("%s", file1);
319 				status |= 2;
320 				goto closem;
321 			}
322 		} else if (strcmp(file1, "-") == 0)
323 			f1 = stdin;
324 		else
325 			f1 = fopen(file1, "r");
326 	}
327 	if (f1 == NULL) {
328 		warn("%s", file1);
329 		status |= 2;
330 		goto closem;
331 	}
332 
333 	if (flags & D_EMPTY2)
334 		f2 = fopen(_PATH_DEVNULL, "r");
335 	else {
336 		if (!S_ISREG(stb2.st_mode)) {
337 			if ((f2 = opentemp(file2)) == NULL ||
338 			    fstat(fileno(f2), &stb2) < 0) {
339 				warn("%s", file2);
340 				status |= 2;
341 				goto closem;
342 			}
343 		} else if (strcmp(file2, "-") == 0)
344 			f2 = stdin;
345 		else
346 			f2 = fopen(file2, "r");
347 	}
348 	if (f2 == NULL) {
349 		warn("%s", file2);
350 		status |= 2;
351 		goto closem;
352 	}
353 
354 	switch (files_differ(f1, f2, flags)) {
355 	case 0:
356 		goto closem;
357 	case 1:
358 		break;
359 	default:
360 		/* error */
361 		status |= 2;
362 		goto closem;
363 	}
364 
365 	if (!asciifile(f1) || !asciifile(f2)) {
366 		rval = D_BINARY;
367 		status |= 1;
368 		goto closem;
369 	}
370 	if (lflag) {
371 		/* redirect stdout to pr */
372 		int pfd[2];
373 		char *header;
374 		char *prargv[] = { "pr", "-h", NULL, "-f", NULL };
375 
376 		easprintf(&header, "%s %s %s", diffargs, file1, file2);
377 		prargv[2] = header;
378 		fflush(stdout);
379 		rewind(stdout);
380 		pipe(pfd);
381 		switch ((pid = fork())) {
382 		case -1:
383 			warnx("No more processes");
384 			status |= 2;
385 			free(header);
386 			return (D_ERROR);
387 		case 0:
388 			/* child */
389 			if (pfd[0] != STDIN_FILENO) {
390 				dup2(pfd[0], STDIN_FILENO);
391 				close(pfd[0]);
392 			}
393 			close(pfd[1]);
394 			execv(_PATH_PR, prargv);
395 			_exit(127);
396 		default:
397 			/* parent */
398 			if (pfd[1] != STDOUT_FILENO) {
399 				ostdout = dup(STDOUT_FILENO);
400 				dup2(pfd[1], STDOUT_FILENO);
401 				close(pfd[1]);
402 			}
403 			close(pfd[0]);
404 			rewind(stdout);
405 			free(header);
406 		}
407 	} else if (flags & D_HEADER)
408 		printf("%s %s %s\n", diffargs, file1, file2);
409 	prepare(0, f1, stb1.st_size);
410 	prepare(1, f2, stb2.st_size);
411 	prune();
412 	sort(sfile[0], slen[0]);
413 	sort(sfile[1], slen[1]);
414 
415 	member = (int *)file[1];
416 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
417 	member = erealloc(member, (slen[1] + 2) * sizeof(int));
418 
419 	class = (int *)file[0];
420 	unsort(sfile[0], slen[0], class);
421 	class = erealloc(class, (slen[0] + 2) * sizeof(int));
422 
423 	klist = emalloc((slen[0] + 2) * sizeof(int));
424 	clen = 0;
425 	clistlen = 100;
426 	clist = emalloc(clistlen * sizeof(struct cand));
427 	i = stone(class, slen[0], member, klist);
428 	free(member);
429 	free(class);
430 
431 	J = erealloc(J, (len[0] + 2) * sizeof(int));
432 	unravel(klist[i]);
433 	free(clist);
434 	free(klist);
435 
436 	ixold = erealloc(ixold, (len[0] + 2) * sizeof(long));
437 	ixnew = erealloc(ixnew, (len[1] + 2) * sizeof(long));
438 	check(file1, f1, file2, f2);
439 	output(file1, f1, file2, f2);
440 	if (ostdout != -1) {
441 		int wstatus;
442 
443 		/* close the pipe to pr and restore stdout */
444 		fflush(stdout);
445 		rewind(stdout);
446 		if (ostdout != STDOUT_FILENO) {
447 			close(STDOUT_FILENO);
448 			dup2(ostdout, STDOUT_FILENO);
449 			close(ostdout);
450 		}
451 		waitpid(pid, &wstatus, 0);
452 	}
453 closem:
454 	if (anychange) {
455 		status |= 1;
456 		if (rval == D_SAME)
457 			rval = D_DIFFER;
458 	}
459 	if (f1 != NULL)
460 		fclose(f1);
461 	if (f2 != NULL)
462 		fclose(f2);
463 	if (file1 != ofile1)
464 		free(file1);
465 	if (file2 != ofile2)
466 		free(file2);
467 	return (rval);
468 }
469 
470 /*
471  * Check to see if the given files differ.
472  * Returns 0 if they are the same, 1 if different, and -1 on error.
473  * XXX - could use code from cmp(1) [faster]
474  */
475 static int
476 files_differ(FILE *f1, FILE *f2, int flags)
477 {
478 	char buf1[BUFSIZ], buf2[BUFSIZ];
479 	size_t i, j;
480 
481 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
482 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
483 		return (1);
484 	for (;;) {
485 		i = fread(buf1, 1, sizeof(buf1), f1);
486 		j = fread(buf2, 1, sizeof(buf2), f2);
487 		if (i != j)
488 			return (1);
489 		if (i == 0 && j == 0) {
490 			if (ferror(f1) || ferror(f2))
491 				return (1);
492 			return (0);
493 		}
494 		if (memcmp(buf1, buf2, i) != 0)
495 			return (1);
496 	}
497 }
498 
499 static FILE *
500 opentemp(const char *file)
501 {
502 	char buf[BUFSIZ], *tempdir, tempfile[MAXPATHLEN];
503 	ssize_t nread;
504 	int ifd, ofd;
505 
506 	if (strcmp(file, "-") == 0)
507 		ifd = STDIN_FILENO;
508 	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
509 		return (NULL);
510 
511 	if ((tempdir = getenv("TMPDIR")) == NULL)
512 		tempdir = _PATH_TMP;
513 	if (snprintf(tempfile, sizeof(tempfile), "%s/diff.XXXXXXXX",
514 	    tempdir) >= sizeof(tempfile)) {
515 		close(ifd);
516 		errno = ENAMETOOLONG;
517 		return (NULL);
518 	}
519 
520 	if ((ofd = mkstemp(tempfile)) < 0)
521 		return (NULL);
522 	unlink(tempfile);
523 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
524 		if (write(ofd, buf, nread) != nread) {
525 			close(ifd);
526 			close(ofd);
527 			return (NULL);
528 		}
529 	}
530 	close(ifd);
531 	lseek(ofd, (off_t)0, SEEK_SET);
532 	return (fdopen(ofd, "r"));
533 }
534 
535 char *
536 splice(char *dir, char *file)
537 {
538 	char *tail, *buf;
539 
540 	if ((tail = strrchr(file, '/')) == NULL)
541 		tail = file;
542 	else
543 		tail++;
544 	easprintf(&buf, "%s/%s", dir, tail);
545 	return (buf);
546 }
547 
548 static void
549 prepare(int i, FILE *fd, off_t filesize)
550 {
551 	struct line *p;
552 	int j, h;
553 	size_t sz;
554 
555 	rewind(fd);
556 
557 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
558 	if (sz < 100)
559 		sz = 100;
560 
561 	p = emalloc((sz + 3) * sizeof(struct line));
562 	for (j = 0; (h = readhash(fd));) {
563 		if (j == sz) {
564 			sz = sz * 3 / 2;
565 			p = erealloc(p, (sz + 3) * sizeof(struct line));
566 		}
567 		p[++j].value = h;
568 	}
569 	len[i] = j;
570 	file[i] = p;
571 }
572 
573 static void
574 prune(void)
575 {
576 	int i, j;
577 
578 	for (pref = 0; pref < len[0] && pref < len[1] &&
579 	    file[0][pref + 1].value == file[1][pref + 1].value;
580 	    pref++)
581 		;
582 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
583 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
584 	    suff++)
585 		;
586 	for (j = 0; j < 2; j++) {
587 		sfile[j] = file[j] + pref;
588 		slen[j] = len[j] - pref - suff;
589 		for (i = 0; i <= slen[j]; i++)
590 			sfile[j][i].serial = i;
591 	}
592 }
593 
594 static void
595 equiv(struct line *a, int n, struct line *b, int m, int *c)
596 {
597 	int i, j;
598 
599 	i = j = 1;
600 	while (i <= n && j <= m) {
601 		if (a[i].value < b[j].value)
602 			a[i++].value = 0;
603 		else if (a[i].value == b[j].value)
604 			a[i++].value = j;
605 		else
606 			j++;
607 	}
608 	while (i <= n)
609 		a[i++].value = 0;
610 	b[m + 1].value = 0;
611 	j = 0;
612 	while (++j <= m) {
613 		c[j] = -b[j].serial;
614 		while (b[j + 1].value == b[j].value) {
615 			j++;
616 			c[j] = b[j].serial;
617 		}
618 	}
619 	c[j] = -1;
620 }
621 
622 /* Code taken from ping.c */
623 static int
624 isqrt(int n)
625 {
626 	int y, x = 1;
627 
628 	if (n == 0)
629 		return(0);
630 
631 	do { /* newton was a stinker */
632 		y = x;
633 		x = n / x;
634 		x += y;
635 		x /= 2;
636 	} while ((x - y) > 1 || (x - y) < -1);
637 
638 	return (x);
639 }
640 
641 static int
642 stone(int *a, int n, int *b, int *c)
643 {
644 	int i, k, y, j, l;
645 	int oldc, tc, oldl;
646 	u_int numtries;
647 
648 	const u_int bound = dflag ? UINT_MAX : max(256, isqrt(n));
649 
650 	k = 0;
651 	c[0] = newcand(0, 0, 0);
652 	for (i = 1; i <= n; i++) {
653 		j = a[i];
654 		if (j == 0)
655 			continue;
656 		y = -b[j];
657 		oldl = 0;
658 		oldc = c[0];
659 		numtries = 0;
660 		do {
661 			if (y <= clist[oldc].y)
662 				continue;
663 			l = search(c, k, y);
664 			if (l != oldl + 1)
665 				oldc = c[l - 1];
666 			if (l <= k) {
667 				if (clist[c[l]].y <= y)
668 					continue;
669 				tc = c[l];
670 				c[l] = newcand(i, y, oldc);
671 				oldc = tc;
672 				oldl = l;
673 				numtries++;
674 			} else {
675 				c[l] = newcand(i, y, oldc);
676 				k++;
677 				break;
678 			}
679 		} while ((y = b[++j]) > 0 && numtries < bound);
680 	}
681 	return (k);
682 }
683 
684 static int
685 newcand(int x, int y, int pred)
686 {
687 	struct cand *q;
688 
689 	if (clen == clistlen) {
690 		clistlen = clistlen * 11 / 10;
691 		clist = erealloc(clist, clistlen * sizeof(struct cand));
692 	}
693 	q = clist + clen;
694 	q->x = x;
695 	q->y = y;
696 	q->pred = pred;
697 	return (clen++);
698 }
699 
700 static int
701 search(int *c, int k, int y)
702 {
703 	int i, j, l, t;
704 
705 	if (clist[c[k]].y < y)	/* quick look for typical case */
706 		return (k + 1);
707 	i = 0;
708 	j = k + 1;
709 	while (1) {
710 		l = i + j;
711 		if ((l >>= 1) <= i)
712 			break;
713 		t = clist[c[l]].y;
714 		if (t > y)
715 			j = l;
716 		else if (t < y)
717 			i = l;
718 		else
719 			return (l);
720 	}
721 	return (l + 1);
722 }
723 
724 static void
725 unravel(int p)
726 {
727 	struct cand *q;
728 	int i;
729 
730 	for (i = 0; i <= len[0]; i++)
731 		J[i] = i <= pref ? i :
732 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
733 	for (q = clist + p; q->y != 0; q = clist + q->pred)
734 		J[q->x + pref] = q->y + pref;
735 }
736 
737 /*
738  * Check does double duty:
739  *  1.	ferret out any fortuitous correspondences due
740  *	to confounding by hashing (which result in "jackpot")
741  *  2.  collect random access indexes to the two files
742  */
743 static void
744 check(char *file1, FILE *f1, char *file2, FILE *f2)
745 {
746 	int i, j, jackpot, c, d;
747 	long ctold, ctnew;
748 
749 	rewind(f1);
750 	rewind(f2);
751 	j = 1;
752 	ixold[0] = ixnew[0] = 0;
753 	jackpot = 0;
754 	ctold = ctnew = 0;
755 	for (i = 1; i <= len[0]; i++) {
756 		if (J[i] == 0) {
757 			ixold[i] = ctold += skipline(f1);
758 			continue;
759 		}
760 		while (j < J[i]) {
761 			ixnew[j] = ctnew += skipline(f2);
762 			j++;
763 		}
764 		if (bflag || wflag || iflag) {
765 			for (;;) {
766 				c = getc(f1);
767 				d = getc(f2);
768 				/*
769 				 * GNU diff ignores a missing newline
770 				 * in one file if bflag || wflag.
771 				 */
772 				if ((bflag || wflag) &&
773 				    ((c == EOF && d == '\n') ||
774 				    (c == '\n' && d == EOF))) {
775 					break;
776 				}
777 				ctold++;
778 				ctnew++;
779 				if (bflag && isspace(c) && isspace(d)) {
780 					do {
781 						if (c == '\n')
782 							break;
783 						ctold++;
784 					} while (isspace(c = getc(f1)));
785 					do {
786 						if (d == '\n')
787 							break;
788 						ctnew++;
789 					} while (isspace(d = getc(f2)));
790 				} else if (wflag) {
791 					while (isspace(c) && c != '\n') {
792 						c = getc(f1);
793 						ctold++;
794 					}
795 					while (isspace(d) && d != '\n') {
796 						d = getc(f2);
797 						ctnew++;
798 					}
799 				}
800 				if (chrtran[c] != chrtran[d]) {
801 					jackpot++;
802 					J[i] = 0;
803 					if (c != '\n' && c != EOF)
804 						ctold += skipline(f1);
805 					if (d != '\n' && c != EOF)
806 						ctnew += skipline(f2);
807 					break;
808 				}
809 				if (c == '\n' || c == EOF)
810 					break;
811 			}
812 		} else {
813 			for (;;) {
814 				ctold++;
815 				ctnew++;
816 				if ((c = getc(f1)) != (d = getc(f2))) {
817 					/* jackpot++; */
818 					J[i] = 0;
819 					if (c != '\n' && c != EOF)
820 						ctold += skipline(f1);
821 					if (d != '\n' && c != EOF)
822 						ctnew += skipline(f2);
823 					break;
824 				}
825 				if (c == '\n' || c == EOF)
826 					break;
827 			}
828 		}
829 		ixold[i] = ctold;
830 		ixnew[j] = ctnew;
831 		j++;
832 	}
833 	for (; j <= len[1]; j++)
834 		ixnew[j] = ctnew += skipline(f2);
835 	/*
836 	 * if (jackpot)
837 	 *	fprintf(stderr, "jackpot\n");
838 	 */
839 }
840 
841 /* shellsort CACM #201 */
842 static void
843 sort(struct line *a, int n)
844 {
845 	struct line *ai, *aim, w;
846 	int j, m = 0, k;
847 
848 	if (n == 0)
849 		return;
850 	for (j = 1; j <= n; j *= 2)
851 		m = 2 * j - 1;
852 	for (m /= 2; m != 0; m /= 2) {
853 		k = n - m;
854 		for (j = 1; j <= k; j++) {
855 			for (ai = &a[j]; ai > a; ai -= m) {
856 				aim = &ai[m];
857 				if (aim < ai)
858 					break;	/* wraparound */
859 				if (aim->value > ai[0].value ||
860 				    (aim->value == ai[0].value &&
861 					aim->serial > ai[0].serial))
862 					break;
863 				w.value = ai[0].value;
864 				ai[0].value = aim->value;
865 				aim->value = w.value;
866 				w.serial = ai[0].serial;
867 				ai[0].serial = aim->serial;
868 				aim->serial = w.serial;
869 			}
870 		}
871 	}
872 }
873 
874 static void
875 unsort(struct line *f, int l, int *b)
876 {
877 	int *a, i;
878 
879 	a = emalloc((l + 1) * sizeof(int));
880 	for (i = 1; i <= l; i++)
881 		a[f[i].serial] = f[i].value;
882 	for (i = 1; i <= l; i++)
883 		b[i] = a[i];
884 	free(a);
885 }
886 
887 static int
888 skipline(FILE *f)
889 {
890 	int i, c;
891 
892 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
893 		continue;
894 	return (i);
895 }
896 
897 static void
898 output(char *file1, FILE *f1, char *file2, FILE *f2)
899 {
900 	int m, i0, i1, j0, j1;
901 
902 	rewind(f1);
903 	rewind(f2);
904 	m = len[0];
905 	J[0] = 0;
906 	J[m + 1] = len[1] + 1;
907 	if (format != D_EDIT) {
908 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
909 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
910 				i0++;
911 			j0 = J[i0 - 1] + 1;
912 			i1 = i0 - 1;
913 			while (i1 < m && J[i1 + 1] == 0)
914 				i1++;
915 			j1 = J[i1 + 1] - 1;
916 			J[i1] = j1;
917 			change(file1, f1, file2, f2, i0, i1, j0, j1);
918 		}
919 	} else {
920 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
921 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
922 				i0--;
923 			j0 = J[i0 + 1] - 1;
924 			i1 = i0 + 1;
925 			while (i1 > 1 && J[i1 - 1] == 0)
926 				i1--;
927 			j1 = J[i1 - 1] + 1;
928 			J[i1] = j1;
929 			change(file1, f1, file2, f2, i1, i0, j1, j0);
930 		}
931 	}
932 	if (m == 0)
933 		change(file1, f1, file2, f2, 1, 0, 1, len[1]);
934 	if (format == D_IFDEF) {
935 		for (;;) {
936 #define	c i0
937 			if ((c = getc(f1)) == EOF)
938 				return;
939 			putchar(c);
940 		}
941 #undef c
942 	}
943 	if (anychange != 0) {
944 		if (format == D_CONTEXT)
945 			dump_context_vec(f1, f2);
946 		else if (format == D_UNIFIED)
947 			dump_unified_vec(f1, f2);
948 	}
949 }
950 
951 static __inline void
952 range(int a, int b, char *separator)
953 {
954 	printf("%d", a > b ? b : a);
955 	if (a < b)
956 		printf("%s%d", separator, b);
957 }
958 
959 static __inline void
960 uni_range(int a, int b)
961 {
962 	if (a < b)
963 		printf("%d,%d", a, b - a + 1);
964 	else if (a == b)
965 		printf("%d", b);
966 	else
967 		printf("%d,0", b);
968 }
969 
970 static char *
971 preadline(int fd, size_t len, off_t off)
972 {
973 	char *line;
974 	ssize_t nr;
975 
976 	line = emalloc(len + 1);
977 	if ((nr = pread(fd, line, len, off)) < 0)
978 		err(1, "preadline");
979 	line[nr] = '\0';
980 	return (line);
981 }
982 
983 static int
984 ignoreline(char *line)
985 {
986 	int ret;
987 
988 	ret = regexec(&ignore_re, line, 0, NULL, 0);
989 	free(line);
990 	return (ret == 0);	/* if it matched, it should be ignored. */
991 }
992 
993 /*
994  * Indicate that there is a difference between lines a and b of the from file
995  * to get to lines c to d of the to file.  If a is greater then b then there
996  * are no lines in the from file involved and this means that there were
997  * lines appended (beginning at b).  If c is greater than d then there are
998  * lines missing from the to file.
999  */
1000 static void
1001 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d)
1002 {
1003 	static size_t max_context = 64;
1004 	int i;
1005 
1006 restart:
1007 	if (format != D_IFDEF && a > b && c > d)
1008 		return;
1009 	if (ignore_pats != NULL) {
1010 		char *line;
1011 		/*
1012 		 * All lines in the change, insert, or delete must
1013 		 * match an ignore pattern for the change to be
1014 		 * ignored.
1015 		 */
1016 		if (a <= b) {		/* Changes and deletes. */
1017 			for (i = a; i <= b; i++) {
1018 				line = preadline(fileno(f1),
1019 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1020 				if (!ignoreline(line))
1021 					goto proceed;
1022 			}
1023 		}
1024 		if (a > b || c <= d) {	/* Changes and inserts. */
1025 			for (i = c; i <= d; i++) {
1026 				line = preadline(fileno(f2),
1027 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1028 				if (!ignoreline(line))
1029 					goto proceed;
1030 			}
1031 		}
1032 		return;
1033 	}
1034 proceed:
1035 	if (format == D_CONTEXT || format == D_UNIFIED) {
1036 		/*
1037 		 * Allocate change records as needed.
1038 		 */
1039 		if (context_vec_ptr == context_vec_end - 1) {
1040 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1041 			max_context <<= 1;
1042 			context_vec_start = erealloc(context_vec_start,
1043 			    max_context * sizeof(struct context_vec));
1044 			context_vec_end = context_vec_start + max_context;
1045 			context_vec_ptr = context_vec_start + offset;
1046 		}
1047 		if (anychange == 0) {
1048 			/*
1049 			 * Print the context/unidiff header first time through.
1050 			 */
1051 			if (label != NULL)
1052 				printf("%s %s\n",
1053 				    format == D_CONTEXT ? "***" : "---", label);
1054 			else
1055 				printf("%s %s\t%s",
1056 				    format == D_CONTEXT ? "***" : "---", file1,
1057 				    ctime(&stb1.st_mtime));
1058 			printf("%s %s\t%s",
1059 			    format == D_CONTEXT ? "---" : "+++", file2,
1060 			    ctime(&stb2.st_mtime));
1061 			anychange = 1;
1062 		} else if (a > context_vec_ptr->b + (2 * context) + 1 &&
1063 		    c > context_vec_ptr->d + (2 * context) + 1) {
1064 			/*
1065 			 * If this change is more than 'context' lines from the
1066 			 * previous change, dump the record and reset it.
1067 			 */
1068 			if (format == D_CONTEXT)
1069 				dump_context_vec(f1, f2);
1070 			else
1071 				dump_unified_vec(f1, f2);
1072 		}
1073 		context_vec_ptr++;
1074 		context_vec_ptr->a = a;
1075 		context_vec_ptr->b = b;
1076 		context_vec_ptr->c = c;
1077 		context_vec_ptr->d = d;
1078 		return;
1079 	}
1080 	if (anychange == 0)
1081 		anychange = 1;
1082 	switch (format) {
1083 
1084 	case D_BRIEF:
1085 		return;
1086 	case D_NORMAL:
1087 	case D_EDIT:
1088 		range(a, b, ",");
1089 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1090 		if (format == D_NORMAL)
1091 			range(c, d, ",");
1092 		putchar('\n');
1093 		break;
1094 	case D_REVERSE:
1095 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1096 		range(a, b, " ");
1097 		putchar('\n');
1098 		break;
1099 	case D_NREVERSE:
1100 		if (a > b)
1101 			printf("a%d %d\n", b, d - c + 1);
1102 		else {
1103 			printf("d%d %d\n", a, b - a + 1);
1104 			if (!(c > d))
1105 				/* add changed lines */
1106 				printf("a%d %d\n", b, d - c + 1);
1107 		}
1108 		break;
1109 	}
1110 	if (format == D_NORMAL || format == D_IFDEF) {
1111 		fetch(ixold, a, b, f1, '<', 1);
1112 		if (a <= b && c <= d && format == D_NORMAL)
1113 			puts("---");
1114 	}
1115 	i = fetch(ixnew, c, d, f2, format == D_NORMAL ? '>' : '\0', 0);
1116 	if (i != 0 && format == D_EDIT) {
1117 		/*
1118 		 * A non-zero return value for D_EDIT indicates that the
1119 		 * last line printed was a bare dot (".") that has been
1120 		 * escaped as ".." to prevent ed(1) from misinterpreting
1121 		 * it.  We have to add a substitute command to change this
1122 		 * back and restart where we left off.
1123 		 */
1124 		puts(".");
1125 		printf("%ds/^\\.\\././\n", a);
1126 		a += i;
1127 		c += i;
1128 		goto restart;
1129 	}
1130 	if ((format == D_EDIT || format == D_REVERSE) && c <= d)
1131 		puts(".");
1132 	if (inifdef) {
1133 		printf("#endif /* %s */\n", ifdefname);
1134 		inifdef = 0;
1135 	}
1136 }
1137 
1138 static int
1139 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile)
1140 {
1141 	int i, j, c, lastc, col, nc;
1142 
1143 	/*
1144 	 * When doing #ifdef's, copy down to current line
1145 	 * if this is the first file, so that stuff makes it to output.
1146 	 */
1147 	if (format == D_IFDEF && oldfile) {
1148 		long curpos = ftell(lb);
1149 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1150 		nc = f[a > b ? b : a - 1] - curpos;
1151 		for (i = 0; i < nc; i++)
1152 			putchar(getc(lb));
1153 	}
1154 	if (a > b)
1155 		return (0);
1156 	if (format == D_IFDEF) {
1157 		if (inifdef) {
1158 			printf("#else /* %s%s */\n",
1159 			    oldfile == 1 ? "!" : "", ifdefname);
1160 		} else {
1161 			if (oldfile)
1162 				printf("#ifndef %s\n", ifdefname);
1163 			else
1164 				printf("#ifdef %s\n", ifdefname);
1165 		}
1166 		inifdef = 1 + oldfile;
1167 	}
1168 	for (i = a; i <= b; i++) {
1169 		fseek(lb, f[i - 1], SEEK_SET);
1170 		nc = f[i] - f[i - 1];
1171 		if (format != D_IFDEF && ch != '\0') {
1172 			putchar(ch);
1173 			if (Tflag && (format == D_NORMAL || format == D_CONTEXT
1174 			    || format == D_UNIFIED))
1175 				putchar('\t');
1176 			else if (format != D_UNIFIED)
1177 				putchar(' ');
1178 		}
1179 		col = 0;
1180 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1181 			if ((c = getc(lb)) == EOF) {
1182 				if (format == D_EDIT || format == D_REVERSE ||
1183 				    format == D_NREVERSE)
1184 					warnx("No newline at end of file");
1185 				else
1186 					puts("\n\\ No newline at end of file");
1187 				return (0);
1188 			}
1189 			if (c == '\t' && tflag) {
1190 				do {
1191 					putchar(' ');
1192 				} while (++col & 7);
1193 			} else {
1194 				if (format == D_EDIT && j == 1 && c == '\n'
1195 				    && lastc == '.') {
1196 					/*
1197 					 * Don't print a bare "." line
1198 					 * since that will confuse ed(1).
1199 					 * Print ".." instead and return,
1200 					 * giving the caller an offset
1201 					 * from which to restart.
1202 					 */
1203 					puts(".");
1204 					return (i - a + 1);
1205 				}
1206 				putchar(c);
1207 				col++;
1208 			}
1209 		}
1210 	}
1211 	return (0);
1212 }
1213 
1214 /*
1215  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1216  */
1217 static int
1218 readhash(FILE *f)
1219 {
1220 	int i, t, space;
1221 	int sum;
1222 
1223 	sum = 1;
1224 	space = 0;
1225 	if (!bflag && !wflag) {
1226 		if (iflag)
1227 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1228 				if (t == EOF) {
1229 					if (i == 0)
1230 						return (0);
1231 					break;
1232 				}
1233 				sum = sum * 127 + chrtran[t];
1234 			}
1235 		else
1236 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1237 				if (t == EOF) {
1238 					if (i == 0)
1239 						return (0);
1240 					break;
1241 				}
1242 				sum = sum * 127 + t;
1243 			}
1244 	} else {
1245 		for (i = 0;;) {
1246 			switch (t = getc(f)) {
1247 			case '\t':
1248 			case ' ':
1249 				space++;
1250 				continue;
1251 			default:
1252 				if (space && !wflag) {
1253 					i++;
1254 					space = 0;
1255 				}
1256 				sum = sum * 127 + chrtran[t];
1257 				i++;
1258 				continue;
1259 			case EOF:
1260 				if (i == 0)
1261 					return (0);
1262 				/* FALLTHROUGH */
1263 			case '\n':
1264 				break;
1265 			}
1266 			break;
1267 		}
1268 	}
1269 	/*
1270 	 * There is a remote possibility that we end up with a zero sum.
1271 	 * Zero is used as an EOF marker, so return 1 instead.
1272 	 */
1273 	return (sum == 0 ? 1 : sum);
1274 }
1275 
1276 static int
1277 asciifile(FILE *f)
1278 {
1279 	char buf[BUFSIZ];
1280 	int i, cnt;
1281 
1282 	if (aflag || f == NULL)
1283 		return (1);
1284 
1285 	rewind(f);
1286 	cnt = fread(buf, 1, sizeof(buf), f);
1287 	for (i = 0; i < cnt; i++)
1288 		if (!isprint(buf[i]) && !isspace(buf[i]))
1289 			return (0);
1290 	return (1);
1291 }
1292 
1293 static __inline int min(int a, int b)
1294 {
1295 	return (a < b ? a : b);
1296 }
1297 
1298 static __inline int max(int a, int b)
1299 {
1300 	return (a > b ? a : b);
1301 }
1302 
1303 static char *
1304 match_function(const long *f, int pos, FILE *file)
1305 {
1306 	char buf[FUNCTION_CONTEXT_SIZE];
1307 	size_t nc;
1308 	int last = lastline;
1309 	char *p;
1310 
1311 	lastline = pos;
1312 	while (pos > last) {
1313 		fseek(file, f[pos - 1], SEEK_SET);
1314 		nc = f[pos] - f[pos - 1];
1315 		if (nc >= sizeof(buf))
1316 			nc = sizeof(buf) - 1;
1317 		nc = fread(buf, 1, nc, file);
1318 		if (nc > 0) {
1319 			buf[nc] = '\0';
1320 			p = strchr(buf, '\n');
1321 			if (p != NULL)
1322 				*p = '\0';
1323 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1324 				strlcpy(lastbuf, buf, sizeof lastbuf);
1325 				lastmatchline = pos;
1326 				return lastbuf;
1327 			}
1328 		}
1329 		pos--;
1330 	}
1331 	return lastmatchline > 0 ? lastbuf : NULL;
1332 }
1333 
1334 /* dump accumulated "context" diff changes */
1335 static void
1336 dump_context_vec(FILE *f1, FILE *f2)
1337 {
1338 	struct context_vec *cvp = context_vec_start;
1339 	int lowa, upb, lowc, upd, do_output;
1340 	int a, b, c, d;
1341 	char ch, *f;
1342 
1343 	if (context_vec_start > context_vec_ptr)
1344 		return;
1345 
1346 	b = d = 0;		/* gcc */
1347 	lowa = max(1, cvp->a - context);
1348 	upb = min(len[0], context_vec_ptr->b + context);
1349 	lowc = max(1, cvp->c - context);
1350 	upd = min(len[1], context_vec_ptr->d + context);
1351 
1352 	printf("***************");
1353 	if (pflag) {
1354 		f = match_function(ixold, lowa-1, f1);
1355 		if (f != NULL) {
1356 			putchar(' ');
1357 			fputs(f, stdout);
1358 		}
1359 	}
1360 	printf("\n*** ");
1361 	range(lowa, upb, ",");
1362 	printf(" ****\n");
1363 
1364 	/*
1365 	 * Output changes to the "old" file.  The first loop suppresses
1366 	 * output if there were no changes to the "old" file (we'll see
1367 	 * the "old" lines as context in the "new" list).
1368 	 */
1369 	do_output = 0;
1370 	for (; cvp <= context_vec_ptr; cvp++)
1371 		if (cvp->a <= cvp->b) {
1372 			cvp = context_vec_start;
1373 			do_output++;
1374 			break;
1375 		}
1376 	if (do_output) {
1377 		while (cvp <= context_vec_ptr) {
1378 			a = cvp->a;
1379 			b = cvp->b;
1380 			c = cvp->c;
1381 			d = cvp->d;
1382 
1383 			if (a <= b && c <= d)
1384 				ch = 'c';
1385 			else
1386 				ch = (a <= b) ? 'd' : 'a';
1387 
1388 			if (ch == 'a')
1389 				fetch(ixold, lowa, b, f1, ' ', 0);
1390 			else {
1391 				fetch(ixold, lowa, a - 1, f1, ' ', 0);
1392 				fetch(ixold, a, b, f1,
1393 				    ch == 'c' ? '!' : '-', 0);
1394 			}
1395 			lowa = b + 1;
1396 			cvp++;
1397 		}
1398 		fetch(ixold, b + 1, upb, f1, ' ', 0);
1399 	}
1400 	/* output changes to the "new" file */
1401 	printf("--- ");
1402 	range(lowc, upd, ",");
1403 	printf(" ----\n");
1404 
1405 	do_output = 0;
1406 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1407 		if (cvp->c <= cvp->d) {
1408 			cvp = context_vec_start;
1409 			do_output++;
1410 			break;
1411 		}
1412 	if (do_output) {
1413 		while (cvp <= context_vec_ptr) {
1414 			a = cvp->a;
1415 			b = cvp->b;
1416 			c = cvp->c;
1417 			d = cvp->d;
1418 
1419 			if (a <= b && c <= d)
1420 				ch = 'c';
1421 			else
1422 				ch = (a <= b) ? 'd' : 'a';
1423 
1424 			if (ch == 'd')
1425 				fetch(ixnew, lowc, d, f2, ' ', 0);
1426 			else {
1427 				fetch(ixnew, lowc, c - 1, f2, ' ', 0);
1428 				fetch(ixnew, c, d, f2,
1429 				    ch == 'c' ? '!' : '+', 0);
1430 			}
1431 			lowc = d + 1;
1432 			cvp++;
1433 		}
1434 		fetch(ixnew, d + 1, upd, f2, ' ', 0);
1435 	}
1436 	context_vec_ptr = context_vec_start - 1;
1437 }
1438 
1439 /* dump accumulated "unified" diff changes */
1440 static void
1441 dump_unified_vec(FILE *f1, FILE *f2)
1442 {
1443 	struct context_vec *cvp = context_vec_start;
1444 	int lowa, upb, lowc, upd;
1445 	int a, b, c, d;
1446 	char ch, *f;
1447 
1448 	if (context_vec_start > context_vec_ptr)
1449 		return;
1450 
1451 	b = d = 0;		/* gcc */
1452 	lowa = max(1, cvp->a - context);
1453 	upb = min(len[0], context_vec_ptr->b + context);
1454 	lowc = max(1, cvp->c - context);
1455 	upd = min(len[1], context_vec_ptr->d + context);
1456 
1457 	fputs("@@ -", stdout);
1458 	uni_range(lowa, upb);
1459 	fputs(" +", stdout);
1460 	uni_range(lowc, upd);
1461 	fputs(" @@", stdout);
1462 	if (pflag) {
1463 		f = match_function(ixold, lowa-1, f1);
1464 		if (f != NULL) {
1465 			putchar(' ');
1466 			fputs(f, stdout);
1467 		}
1468 	}
1469 	putchar('\n');
1470 
1471 	/*
1472 	 * Output changes in "unified" diff format--the old and new lines
1473 	 * are printed together.
1474 	 */
1475 	for (; cvp <= context_vec_ptr; cvp++) {
1476 		a = cvp->a;
1477 		b = cvp->b;
1478 		c = cvp->c;
1479 		d = cvp->d;
1480 
1481 		/*
1482 		 * c: both new and old changes
1483 		 * d: only changes in the old file
1484 		 * a: only changes in the new file
1485 		 */
1486 		if (a <= b && c <= d)
1487 			ch = 'c';
1488 		else
1489 			ch = (a <= b) ? 'd' : 'a';
1490 
1491 		switch (ch) {
1492 		case 'c':
1493 			fetch(ixold, lowa, a - 1, f1, ' ', 0);
1494 			fetch(ixold, a, b, f1, '-', 0);
1495 			fetch(ixnew, c, d, f2, '+', 0);
1496 			break;
1497 		case 'd':
1498 			fetch(ixold, lowa, a - 1, f1, ' ', 0);
1499 			fetch(ixold, a, b, f1, '-', 0);
1500 			break;
1501 		case 'a':
1502 			fetch(ixnew, lowc, c - 1, f2, ' ', 0);
1503 			fetch(ixnew, c, d, f2, '+', 0);
1504 			break;
1505 		}
1506 		lowa = b + 1;
1507 		lowc = d + 1;
1508 	}
1509 	fetch(ixnew, d + 1, upd, f2, ' ', 0);
1510 
1511 	context_vec_ptr = context_vec_start - 1;
1512 }
1513