xref: /openbsd-src/usr.bin/diff/diffreg.c (revision 60b54a0e2d70d765fdc7eacff3d25d38e476538e)
1 /*	$OpenBSD: diffreg.c,v 1.78 2010/07/16 08:06:00 ray Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/param.h>
68 #include <sys/stat.h>
69 #include <sys/wait.h>
70 
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <stddef.h>
76 #include <stdio.h>
77 #include <stdlib.h>
78 #include <string.h>
79 #include <unistd.h>
80 
81 #include "diff.h"
82 #include "pathnames.h"
83 #include "xmalloc.h"
84 
85 /*
86  * diff - compare two files.
87  */
88 
89 /*
90  *	Uses an algorithm due to Harold Stone, which finds
91  *	a pair of longest identical subsequences in the two
92  *	files.
93  *
94  *	The major goal is to generate the match vector J.
95  *	J[i] is the index of the line in file1 corresponding
96  *	to line i file0. J[i] = 0 if there is no
97  *	such line in file1.
98  *
99  *	Lines are hashed so as to work in core. All potential
100  *	matches are located by sorting the lines of each file
101  *	on the hash (called ``value''). In particular, this
102  *	collects the equivalence classes in file1 together.
103  *	Subroutine equiv replaces the value of each line in
104  *	file0 by the index of the first element of its
105  *	matching equivalence in (the reordered) file1.
106  *	To save space equiv squeezes file1 into a single
107  *	array member in which the equivalence classes
108  *	are simply concatenated, except that their first
109  *	members are flagged by changing sign.
110  *
111  *	Next the indices that point into member are unsorted into
112  *	array class according to the original order of file0.
113  *
114  *	The cleverness lies in routine stone. This marches
115  *	through the lines of file0, developing a vector klist
116  *	of "k-candidates". At step i a k-candidate is a matched
117  *	pair of lines x,y (x in file0 y in file1) such that
118  *	there is a common subsequence of length k
119  *	between the first i lines of file0 and the first y
120  *	lines of file1, but there is no such subsequence for
121  *	any smaller y. x is the earliest possible mate to y
122  *	that occurs in such a subsequence.
123  *
124  *	Whenever any of the members of the equivalence class of
125  *	lines in file1 matable to a line in file0 has serial number
126  *	less than the y of some k-candidate, that k-candidate
127  *	with the smallest such y is replaced. The new
128  *	k-candidate is chained (via pred) to the current
129  *	k-1 candidate so that the actual subsequence can
130  *	be recovered. When a member has serial number greater
131  *	that the y of all k-candidates, the klist is extended.
132  *	At the end, the longest subsequence is pulled out
133  *	and placed in the array J by unravel
134  *
135  *	With J in hand, the matches there recorded are
136  *	check'ed against reality to assure that no spurious
137  *	matches have crept in due to hashing. If they have,
138  *	they are broken, and "jackpot" is recorded--a harmless
139  *	matter except that a true match for a spuriously
140  *	mated line may now be unnecessarily reported as a change.
141  *
142  *	Much of the complexity of the program comes simply
143  *	from trying to minimize core utilization and
144  *	maximize the range of doable problems by dynamically
145  *	allocating what is needed and reusing what is not.
146  *	The core requirements for problems larger than somewhat
147  *	are (in words) 2*length(file0) + length(file1) +
148  *	3*(number of k-candidates installed),  typically about
149  *	6n words for files of length n.
150  */
151 
152 struct cand {
153 	int	x;
154 	int	y;
155 	int	pred;
156 };
157 
158 struct line {
159 	int	serial;
160 	int	value;
161 } *file[2];
162 
163 /*
164  * The following struct is used to record change information when
165  * doing a "context" or "unified" diff.  (see routine "change" to
166  * understand the highly mnemonic field names)
167  */
168 struct context_vec {
169 	int	a;		/* start line in old file */
170 	int	b;		/* end line in old file */
171 	int	c;		/* start line in new file */
172 	int	d;		/* end line in new file */
173 };
174 
175 #define	diff_output	printf
176 static FILE	*opentemp(const char *);
177 static void	 output(char *, FILE *, char *, FILE *, int);
178 static void	 check(FILE *, FILE *, int);
179 static void	 range(int, int, char *);
180 static void	 uni_range(int, int);
181 static void	 dump_context_vec(FILE *, FILE *, int);
182 static void	 dump_unified_vec(FILE *, FILE *, int);
183 static void	 prepare(int, FILE *, off_t, int);
184 static void	 prune(void);
185 static void	 equiv(struct line *, int, struct line *, int, int *);
186 static void	 unravel(int);
187 static void	 unsort(struct line *, int, int *);
188 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
189 static void	 sort(struct line *, int);
190 static void	 print_header(const char *, const char *);
191 static int	 ignoreline(char *);
192 static int	 asciifile(FILE *);
193 static int	 fetch(long *, int, int, FILE *, int, int, int);
194 static int	 newcand(int, int, int);
195 static int	 search(int *, int, int);
196 static int	 skipline(FILE *);
197 static int	 isqrt(int);
198 static int	 stone(int *, int, int *, int *, int);
199 static int	 readhash(FILE *, int);
200 static int	 files_differ(FILE *, FILE *, int);
201 static char	*match_function(const long *, int, FILE *);
202 static char	*preadline(int, size_t, off_t);
203 
204 static int  *J;			/* will be overlaid on class */
205 static int  *class;		/* will be overlaid on file[0] */
206 static int  *klist;		/* will be overlaid on file[0] after class */
207 static int  *member;		/* will be overlaid on file[1] */
208 static int   clen;
209 static int   inifdef;		/* whether or not we are in a #ifdef block */
210 static int   len[2];
211 static int   pref, suff;	/* length of prefix and suffix */
212 static int   slen[2];
213 static int   anychange;
214 static long *ixnew;		/* will be overlaid on file[1] */
215 static long *ixold;		/* will be overlaid on klist */
216 static struct cand *clist;	/* merely a free storage pot for candidates */
217 static int   clistlen;		/* the length of clist */
218 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
219 static u_char *chrtran;		/* translation table for case-folding */
220 static struct context_vec *context_vec_start;
221 static struct context_vec *context_vec_end;
222 static struct context_vec *context_vec_ptr;
223 
224 #define FUNCTION_CONTEXT_SIZE	55
225 static char lastbuf[FUNCTION_CONTEXT_SIZE];
226 static int lastline;
227 static int lastmatchline;
228 
229 
230 /*
231  * chrtran points to one of 2 translation tables: cup2low if folding upper to
232  * lower case clow2low if not folding case
233  */
234 u_char clow2low[256] = {
235 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
236 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
237 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
238 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
239 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
240 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
241 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
242 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
243 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
244 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
245 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
246 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
247 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
248 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
249 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
250 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
251 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
252 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
253 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
254 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
255 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
256 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
257 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
258 	0xfd, 0xfe, 0xff
259 };
260 
261 u_char cup2low[256] = {
262 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
263 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
264 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
265 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
266 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
267 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
268 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
269 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
270 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
271 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
272 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
273 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
274 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
275 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
276 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
277 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
278 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
279 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
280 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
281 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
282 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
283 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
284 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
285 	0xfd, 0xfe, 0xff
286 };
287 
288 int
289 diffreg(char *file1, char *file2, int flags)
290 {
291 	FILE *f1, *f2;
292 	int i, rval, ostdout = -1;
293 	pid_t pid = -1;
294 
295 	f1 = f2 = NULL;
296 	rval = D_SAME;
297 	anychange = 0;
298 	lastline = 0;
299 	lastmatchline = 0;
300 	context_vec_ptr = context_vec_start - 1;
301 	if (flags & D_IGNORECASE)
302 		chrtran = cup2low;
303 	else
304 		chrtran = clow2low;
305 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
306 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
307 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
308 		goto closem;
309 
310 	if (flags & D_EMPTY1)
311 		f1 = fopen(_PATH_DEVNULL, "r");
312 	else {
313 		if (!S_ISREG(stb1.st_mode)) {
314 			if ((f1 = opentemp(file1)) == NULL ||
315 			    fstat(fileno(f1), &stb1) < 0) {
316 				warn("%s", file1);
317 				status |= 2;
318 				goto closem;
319 			}
320 		} else if (strcmp(file1, "-") == 0)
321 			f1 = stdin;
322 		else
323 			f1 = fopen(file1, "r");
324 	}
325 	if (f1 == NULL) {
326 		warn("%s", file1);
327 		status |= 2;
328 		goto closem;
329 	}
330 
331 	if (flags & D_EMPTY2)
332 		f2 = fopen(_PATH_DEVNULL, "r");
333 	else {
334 		if (!S_ISREG(stb2.st_mode)) {
335 			if ((f2 = opentemp(file2)) == NULL ||
336 			    fstat(fileno(f2), &stb2) < 0) {
337 				warn("%s", file2);
338 				status |= 2;
339 				goto closem;
340 			}
341 		} else if (strcmp(file2, "-") == 0)
342 			f2 = stdin;
343 		else
344 			f2 = fopen(file2, "r");
345 	}
346 	if (f2 == NULL) {
347 		warn("%s", file2);
348 		status |= 2;
349 		goto closem;
350 	}
351 
352 	switch (files_differ(f1, f2, flags)) {
353 	case 0:
354 		goto closem;
355 	case 1:
356 		break;
357 	default:
358 		/* error */
359 		status |= 2;
360 		goto closem;
361 	}
362 
363 	if ((flags & D_FORCEASCII) == 0 &&
364 	    (!asciifile(f1) || !asciifile(f2))) {
365 		rval = D_BINARY;
366 		status |= 1;
367 		goto closem;
368 	}
369 	if (lflag) {
370 		/* redirect stdout to pr */
371 		int pfd[2];
372 		char *header;
373 		char *prargv[] = { "pr", "-h", NULL, "-f", NULL };
374 
375 		xasprintf(&header, "%s %s %s", diffargs, file1, file2);
376 		prargv[2] = header;
377 		fflush(stdout);
378 		rewind(stdout);
379 		pipe(pfd);
380 		switch ((pid = fork())) {
381 		case -1:
382 			warnx("No more processes");
383 			status |= 2;
384 			xfree(header);
385 			return (D_ERROR);
386 		case 0:
387 			/* child */
388 			if (pfd[0] != STDIN_FILENO) {
389 				dup2(pfd[0], STDIN_FILENO);
390 				close(pfd[0]);
391 			}
392 			close(pfd[1]);
393 			execv(_PATH_PR, prargv);
394 			_exit(127);
395 		default:
396 			/* parent */
397 			if (pfd[1] != STDOUT_FILENO) {
398 				ostdout = dup(STDOUT_FILENO);
399 				dup2(pfd[1], STDOUT_FILENO);
400 				close(pfd[1]);
401 			}
402 			close(pfd[0]);
403 			rewind(stdout);
404 			xfree(header);
405 		}
406 	}
407 	prepare(0, f1, stb1.st_size, flags);
408 	prepare(1, f2, stb2.st_size, flags);
409 
410 	prune();
411 	sort(sfile[0], slen[0]);
412 	sort(sfile[1], slen[1]);
413 
414 	member = (int *)file[1];
415 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
416 	member = xrealloc(member, slen[1] + 2, sizeof(*member));
417 
418 	class = (int *)file[0];
419 	unsort(sfile[0], slen[0], class);
420 	class = xrealloc(class, slen[0] + 2, sizeof(*class));
421 
422 	klist = xcalloc(slen[0] + 2, sizeof(*klist));
423 	clen = 0;
424 	clistlen = 100;
425 	clist = xcalloc(clistlen, sizeof(*clist));
426 	i = stone(class, slen[0], member, klist, flags);
427 	xfree(member);
428 	xfree(class);
429 
430 	J = xrealloc(J, len[0] + 2, sizeof(*J));
431 	unravel(klist[i]);
432 	xfree(clist);
433 	xfree(klist);
434 
435 	ixold = xrealloc(ixold, len[0] + 2, sizeof(*ixold));
436 	ixnew = xrealloc(ixnew, len[1] + 2, sizeof(*ixnew));
437 	check(f1, f2, flags);
438 	output(file1, f1, file2, f2, flags);
439 	if (ostdout != -1) {
440 		int wstatus;
441 
442 		/* close the pipe to pr and restore stdout */
443 		fflush(stdout);
444 		rewind(stdout);
445 		if (ostdout != STDOUT_FILENO) {
446 			close(STDOUT_FILENO);
447 			dup2(ostdout, STDOUT_FILENO);
448 			close(ostdout);
449 		}
450 		waitpid(pid, &wstatus, 0);
451 	}
452 closem:
453 	if (anychange) {
454 		status |= 1;
455 		if (rval == D_SAME)
456 			rval = D_DIFFER;
457 	}
458 	if (f1 != NULL)
459 		fclose(f1);
460 	if (f2 != NULL)
461 		fclose(f2);
462 
463 	return (rval);
464 }
465 
466 /*
467  * Check to see if the given files differ.
468  * Returns 0 if they are the same, 1 if different, and -1 on error.
469  * XXX - could use code from cmp(1) [faster]
470  */
471 static int
472 files_differ(FILE *f1, FILE *f2, int flags)
473 {
474 	char buf1[BUFSIZ], buf2[BUFSIZ];
475 	size_t i, j;
476 
477 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
478 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
479 		return (1);
480 	for (;;) {
481 		i = fread(buf1, 1, sizeof(buf1), f1);
482 		j = fread(buf2, 1, sizeof(buf2), f2);
483 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
484 			return (-1);
485 		if (i != j)
486 			return (1);
487 		if (i == 0)
488 			return (0);
489 		if (memcmp(buf1, buf2, i) != 0)
490 			return (1);
491 	}
492 }
493 
494 static FILE *
495 opentemp(const char *file)
496 {
497 	char buf[BUFSIZ], *tempdir, tempfile[MAXPATHLEN];
498 	ssize_t nread;
499 	int ifd, ofd;
500 
501 	if (strcmp(file, "-") == 0)
502 		ifd = STDIN_FILENO;
503 	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
504 		return (NULL);
505 
506 	if ((tempdir = getenv("TMPDIR")) == NULL)
507 		tempdir = _PATH_TMP;
508 
509 	if (strlcpy(tempfile, tempdir, sizeof(tempfile)) >= sizeof(tempfile) ||
510 	    strlcat(tempfile, "/diff.XXXXXXXX", sizeof(tempfile)) >=
511 	    sizeof(tempfile)) {
512 		close(ifd);
513 		errno = ENAMETOOLONG;
514 		return (NULL);
515 	}
516 
517 	if ((ofd = mkstemp(tempfile)) < 0) {
518 		close(ifd);
519 		return (NULL);
520 	}
521 	unlink(tempfile);
522 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
523 		if (write(ofd, buf, nread) != nread) {
524 			close(ifd);
525 			close(ofd);
526 			return (NULL);
527 		}
528 	}
529 	close(ifd);
530 	lseek(ofd, (off_t)0, SEEK_SET);
531 	return (fdopen(ofd, "r"));
532 }
533 
534 char *
535 splice(char *dir, char *file)
536 {
537 	char *tail, *buf;
538 
539 	if ((tail = strrchr(file, '/')) == NULL)
540 		tail = file;
541 	else
542 		tail++;
543 	xasprintf(&buf, "%s/%s", dir, tail);
544 	return (buf);
545 }
546 
547 static void
548 prepare(int i, FILE *fd, off_t filesize, int flags)
549 {
550 	struct line *p;
551 	int j, h;
552 	size_t sz;
553 
554 	rewind(fd);
555 
556 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
557 	if (sz < 100)
558 		sz = 100;
559 
560 	p = xcalloc(sz + 3, sizeof(*p));
561 	for (j = 0; (h = readhash(fd, flags));) {
562 		if (j == sz) {
563 			sz = sz * 3 / 2;
564 			p = xrealloc(p, sz + 3, sizeof(*p));
565 		}
566 		p[++j].value = h;
567 	}
568 	len[i] = j;
569 	file[i] = p;
570 }
571 
572 static void
573 prune(void)
574 {
575 	int i, j;
576 
577 	for (pref = 0; pref < len[0] && pref < len[1] &&
578 	    file[0][pref + 1].value == file[1][pref + 1].value;
579 	    pref++)
580 		;
581 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
582 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
583 	    suff++)
584 		;
585 	for (j = 0; j < 2; j++) {
586 		sfile[j] = file[j] + pref;
587 		slen[j] = len[j] - pref - suff;
588 		for (i = 0; i <= slen[j]; i++)
589 			sfile[j][i].serial = i;
590 	}
591 }
592 
593 static void
594 equiv(struct line *a, int n, struct line *b, int m, int *c)
595 {
596 	int i, j;
597 
598 	i = j = 1;
599 	while (i <= n && j <= m) {
600 		if (a[i].value < b[j].value)
601 			a[i++].value = 0;
602 		else if (a[i].value == b[j].value)
603 			a[i++].value = j;
604 		else
605 			j++;
606 	}
607 	while (i <= n)
608 		a[i++].value = 0;
609 	b[m + 1].value = 0;
610 	j = 0;
611 	while (++j <= m) {
612 		c[j] = -b[j].serial;
613 		while (b[j + 1].value == b[j].value) {
614 			j++;
615 			c[j] = b[j].serial;
616 		}
617 	}
618 	c[j] = -1;
619 }
620 
621 /* Code taken from ping.c */
622 static int
623 isqrt(int n)
624 {
625 	int y, x = 1;
626 
627 	if (n == 0)
628 		return (0);
629 
630 	do { /* newton was a stinker */
631 		y = x;
632 		x = n / x;
633 		x += y;
634 		x /= 2;
635 	} while ((x - y) > 1 || (x - y) < -1);
636 
637 	return (x);
638 }
639 
640 static int
641 stone(int *a, int n, int *b, int *c, int flags)
642 {
643 	int i, k, y, j, l;
644 	int oldc, tc, oldl;
645 	u_int numtries;
646 
647 	/* XXX move the isqrt() out of the macro to avoid multiple calls */
648 	const u_int bound = (flags & D_MINIMAL) ? UINT_MAX :
649 	    MAX(256, isqrt(n));
650 
651 	k = 0;
652 	c[0] = newcand(0, 0, 0);
653 	for (i = 1; i <= n; i++) {
654 		j = a[i];
655 		if (j == 0)
656 			continue;
657 		y = -b[j];
658 		oldl = 0;
659 		oldc = c[0];
660 		numtries = 0;
661 		do {
662 			if (y <= clist[oldc].y)
663 				continue;
664 			l = search(c, k, y);
665 			if (l != oldl + 1)
666 				oldc = c[l - 1];
667 			if (l <= k) {
668 				if (clist[c[l]].y <= y)
669 					continue;
670 				tc = c[l];
671 				c[l] = newcand(i, y, oldc);
672 				oldc = tc;
673 				oldl = l;
674 				numtries++;
675 			} else {
676 				c[l] = newcand(i, y, oldc);
677 				k++;
678 				break;
679 			}
680 		} while ((y = b[++j]) > 0 && numtries < bound);
681 	}
682 	return (k);
683 }
684 
685 static int
686 newcand(int x, int y, int pred)
687 {
688 	struct cand *q;
689 
690 	if (clen == clistlen) {
691 		clistlen = clistlen * 11 / 10;
692 		clist = xrealloc(clist, clistlen, sizeof(*clist));
693 	}
694 	q = clist + clen;
695 	q->x = x;
696 	q->y = y;
697 	q->pred = pred;
698 	return (clen++);
699 }
700 
701 static int
702 search(int *c, int k, int y)
703 {
704 	int i, j, l, t;
705 
706 	if (clist[c[k]].y < y)	/* quick look for typical case */
707 		return (k + 1);
708 	i = 0;
709 	j = k + 1;
710 	for (;;) {
711 		l = (i + j) / 2;
712 		if (l <= i)
713 			break;
714 		t = clist[c[l]].y;
715 		if (t > y)
716 			j = l;
717 		else if (t < y)
718 			i = l;
719 		else
720 			return (l);
721 	}
722 	return (l + 1);
723 }
724 
725 static void
726 unravel(int p)
727 {
728 	struct cand *q;
729 	int i;
730 
731 	for (i = 0; i <= len[0]; i++)
732 		J[i] = i <= pref ? i :
733 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
734 	for (q = clist + p; q->y != 0; q = clist + q->pred)
735 		J[q->x + pref] = q->y + pref;
736 }
737 
738 /*
739  * Check does double duty:
740  *  1.	ferret out any fortuitous correspondences due
741  *	to confounding by hashing (which result in "jackpot")
742  *  2.  collect random access indexes to the two files
743  */
744 static void
745 check(FILE *f1, FILE *f2, int flags)
746 {
747 	int i, j, jackpot, c, d;
748 	long ctold, ctnew;
749 
750 	rewind(f1);
751 	rewind(f2);
752 	j = 1;
753 	ixold[0] = ixnew[0] = 0;
754 	jackpot = 0;
755 	ctold = ctnew = 0;
756 	for (i = 1; i <= len[0]; i++) {
757 		if (J[i] == 0) {
758 			ixold[i] = ctold += skipline(f1);
759 			continue;
760 		}
761 		while (j < J[i]) {
762 			ixnew[j] = ctnew += skipline(f2);
763 			j++;
764 		}
765 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
766 			for (;;) {
767 				c = getc(f1);
768 				d = getc(f2);
769 				/*
770 				 * GNU diff ignores a missing newline
771 				 * in one file for -b or -w.
772 				 */
773 				if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) &&
774 				    ((c == EOF && d == '\n') ||
775 				    (c == '\n' && d == EOF))) {
776 					break;
777 				}
778 				ctold++;
779 				ctnew++;
780 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
781 				    isspace(d)) {
782 					do {
783 						if (c == '\n')
784 							break;
785 						ctold++;
786 					} while (isspace(c = getc(f1)));
787 					do {
788 						if (d == '\n')
789 							break;
790 						ctnew++;
791 					} while (isspace(d = getc(f2)));
792 				} else if ((flags & D_IGNOREBLANKS)) {
793 					while (isspace(c) && c != '\n') {
794 						c = getc(f1);
795 						ctold++;
796 					}
797 					while (isspace(d) && d != '\n') {
798 						d = getc(f2);
799 						ctnew++;
800 					}
801 				}
802 				if (chrtran[c] != chrtran[d]) {
803 					jackpot++;
804 					J[i] = 0;
805 					if (c != '\n' && c != EOF)
806 						ctold += skipline(f1);
807 					if (d != '\n' && c != EOF)
808 						ctnew += skipline(f2);
809 					break;
810 				}
811 				if (c == '\n' || c == EOF)
812 					break;
813 			}
814 		} else {
815 			for (;;) {
816 				ctold++;
817 				ctnew++;
818 				if ((c = getc(f1)) != (d = getc(f2))) {
819 					/* jackpot++; */
820 					J[i] = 0;
821 					if (c != '\n' && c != EOF)
822 						ctold += skipline(f1);
823 					if (d != '\n' && c != EOF)
824 						ctnew += skipline(f2);
825 					break;
826 				}
827 				if (c == '\n' || c == EOF)
828 					break;
829 			}
830 		}
831 		ixold[i] = ctold;
832 		ixnew[j] = ctnew;
833 		j++;
834 	}
835 	for (; j <= len[1]; j++)
836 		ixnew[j] = ctnew += skipline(f2);
837 	/*
838 	 * if (jackpot)
839 	 *	fprintf(stderr, "jackpot\n");
840 	 */
841 }
842 
843 /* shellsort CACM #201 */
844 static void
845 sort(struct line *a, int n)
846 {
847 	struct line *ai, *aim, w;
848 	int j, m = 0, k;
849 
850 	if (n == 0)
851 		return;
852 	for (j = 1; j <= n; j *= 2)
853 		m = 2 * j - 1;
854 	for (m /= 2; m != 0; m /= 2) {
855 		k = n - m;
856 		for (j = 1; j <= k; j++) {
857 			for (ai = &a[j]; ai > a; ai -= m) {
858 				aim = &ai[m];
859 				if (aim < ai)
860 					break;	/* wraparound */
861 				if (aim->value > ai[0].value ||
862 				    (aim->value == ai[0].value &&
863 					aim->serial > ai[0].serial))
864 					break;
865 				w.value = ai[0].value;
866 				ai[0].value = aim->value;
867 				aim->value = w.value;
868 				w.serial = ai[0].serial;
869 				ai[0].serial = aim->serial;
870 				aim->serial = w.serial;
871 			}
872 		}
873 	}
874 }
875 
876 static void
877 unsort(struct line *f, int l, int *b)
878 {
879 	int *a, i;
880 
881 	a = xcalloc(l + 1, sizeof(*a));
882 	for (i = 1; i <= l; i++)
883 		a[f[i].serial] = f[i].value;
884 	for (i = 1; i <= l; i++)
885 		b[i] = a[i];
886 	xfree(a);
887 }
888 
889 static int
890 skipline(FILE *f)
891 {
892 	int i, c;
893 
894 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
895 		continue;
896 	return (i);
897 }
898 
899 static void
900 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
901 {
902 	int m, i0, i1, j0, j1;
903 
904 	rewind(f1);
905 	rewind(f2);
906 	m = len[0];
907 	J[0] = 0;
908 	J[m + 1] = len[1] + 1;
909 	if (diff_format != D_EDIT) {
910 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
911 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
912 				i0++;
913 			j0 = J[i0 - 1] + 1;
914 			i1 = i0 - 1;
915 			while (i1 < m && J[i1 + 1] == 0)
916 				i1++;
917 			j1 = J[i1 + 1] - 1;
918 			J[i1] = j1;
919 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
920 		}
921 	} else {
922 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
923 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
924 				i0--;
925 			j0 = J[i0 + 1] - 1;
926 			i1 = i0 + 1;
927 			while (i1 > 1 && J[i1 - 1] == 0)
928 				i1--;
929 			j1 = J[i1 - 1] + 1;
930 			J[i1] = j1;
931 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
932 		}
933 	}
934 	if (m == 0)
935 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
936 	if (diff_format == D_IFDEF) {
937 		for (;;) {
938 #define	c i0
939 			if ((c = getc(f1)) == EOF)
940 				return;
941 			diff_output("%c", c);
942 		}
943 #undef c
944 	}
945 	if (anychange != 0) {
946 		if (diff_format == D_CONTEXT)
947 			dump_context_vec(f1, f2, flags);
948 		else if (diff_format == D_UNIFIED)
949 			dump_unified_vec(f1, f2, flags);
950 	}
951 }
952 
953 static void
954 range(int a, int b, char *separator)
955 {
956 	diff_output("%d", a > b ? b : a);
957 	if (a < b)
958 		diff_output("%s%d", separator, b);
959 }
960 
961 static void
962 uni_range(int a, int b)
963 {
964 	if (a < b)
965 		diff_output("%d,%d", a, b - a + 1);
966 	else if (a == b)
967 		diff_output("%d", b);
968 	else
969 		diff_output("%d,0", b);
970 }
971 
972 static char *
973 preadline(int fd, size_t rlen, off_t off)
974 {
975 	char *line;
976 	ssize_t nr;
977 
978 	line = xmalloc(rlen + 1);
979 	if ((nr = pread(fd, line, rlen, off)) < 0)
980 		err(2, "preadline");
981 	if (nr > 0 && line[nr-1] == '\n')
982 		nr--;
983 	line[nr] = '\0';
984 	return (line);
985 }
986 
987 static int
988 ignoreline(char *line)
989 {
990 	int ret;
991 
992 	ret = regexec(&ignore_re, line, 0, NULL, 0);
993 	xfree(line);
994 	return (ret == 0);	/* if it matched, it should be ignored. */
995 }
996 
997 /*
998  * Indicate that there is a difference between lines a and b of the from file
999  * to get to lines c to d of the to file.  If a is greater then b then there
1000  * are no lines in the from file involved and this means that there were
1001  * lines appended (beginning at b).  If c is greater than d then there are
1002  * lines missing from the to file.
1003  */
1004 static void
1005 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1006     int *pflags)
1007 {
1008 	static size_t max_context = 64;
1009 	int i;
1010 
1011 restart:
1012 	if (diff_format != D_IFDEF && a > b && c > d)
1013 		return;
1014 	if (ignore_pats != NULL) {
1015 		char *line;
1016 		/*
1017 		 * All lines in the change, insert, or delete must
1018 		 * match an ignore pattern for the change to be
1019 		 * ignored.
1020 		 */
1021 		if (a <= b) {		/* Changes and deletes. */
1022 			for (i = a; i <= b; i++) {
1023 				line = preadline(fileno(f1),
1024 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1025 				if (!ignoreline(line))
1026 					goto proceed;
1027 			}
1028 		}
1029 		if (a > b || c <= d) {	/* Changes and inserts. */
1030 			for (i = c; i <= d; i++) {
1031 				line = preadline(fileno(f2),
1032 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1033 				if (!ignoreline(line))
1034 					goto proceed;
1035 			}
1036 		}
1037 		return;
1038 	}
1039 proceed:
1040 	if (*pflags & D_HEADER) {
1041 		diff_output("%s %s %s\n", diffargs, file1, file2);
1042 		*pflags &= ~D_HEADER;
1043 	}
1044 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1045 		/*
1046 		 * Allocate change records as needed.
1047 		 */
1048 		if (context_vec_ptr == context_vec_end - 1) {
1049 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1050 			max_context <<= 1;
1051 			context_vec_start = xrealloc(context_vec_start,
1052 			    max_context, sizeof(*context_vec_start));
1053 			context_vec_end = context_vec_start + max_context;
1054 			context_vec_ptr = context_vec_start + offset;
1055 		}
1056 		if (anychange == 0) {
1057 			/*
1058 			 * Print the context/unidiff header first time through.
1059 			 */
1060 			print_header(file1, file2);
1061 			anychange = 1;
1062 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1063 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1064 			/*
1065 			 * If this change is more than 'diff_context' lines from the
1066 			 * previous change, dump the record and reset it.
1067 			 */
1068 			if (diff_format == D_CONTEXT)
1069 				dump_context_vec(f1, f2, *pflags);
1070 			else
1071 				dump_unified_vec(f1, f2, *pflags);
1072 		}
1073 		context_vec_ptr++;
1074 		context_vec_ptr->a = a;
1075 		context_vec_ptr->b = b;
1076 		context_vec_ptr->c = c;
1077 		context_vec_ptr->d = d;
1078 		return;
1079 	}
1080 	if (anychange == 0)
1081 		anychange = 1;
1082 	switch (diff_format) {
1083 	case D_BRIEF:
1084 		return;
1085 	case D_NORMAL:
1086 	case D_EDIT:
1087 		range(a, b, ",");
1088 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1089 		if (diff_format == D_NORMAL)
1090 			range(c, d, ",");
1091 		diff_output("\n");
1092 		break;
1093 	case D_REVERSE:
1094 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1095 		range(a, b, " ");
1096 		diff_output("\n");
1097 		break;
1098 	case D_NREVERSE:
1099 		if (a > b)
1100 			diff_output("a%d %d\n", b, d - c + 1);
1101 		else {
1102 			diff_output("d%d %d\n", a, b - a + 1);
1103 			if (!(c > d))
1104 				/* add changed lines */
1105 				diff_output("a%d %d\n", b, d - c + 1);
1106 		}
1107 		break;
1108 	}
1109 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1110 		fetch(ixold, a, b, f1, '<', 1, *pflags);
1111 		if (a <= b && c <= d && diff_format == D_NORMAL)
1112 			diff_output("---\n");
1113 	}
1114 	i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1115 	if (i != 0 && diff_format == D_EDIT) {
1116 		/*
1117 		 * A non-zero return value for D_EDIT indicates that the
1118 		 * last line printed was a bare dot (".") that has been
1119 		 * escaped as ".." to prevent ed(1) from misinterpreting
1120 		 * it.  We have to add a substitute command to change this
1121 		 * back and restart where we left off.
1122 		 */
1123 		diff_output(".\n");
1124 		diff_output("%ds/^\\.\\././\n", a);
1125 		a += i;
1126 		c += i;
1127 		goto restart;
1128 	}
1129 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1130 		diff_output(".\n");
1131 	if (inifdef) {
1132 		diff_output("#endif /* %s */\n", ifdefname);
1133 		inifdef = 0;
1134 	}
1135 }
1136 
1137 static int
1138 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1139 {
1140 	int i, j, c, lastc, col, nc;
1141 
1142 	/*
1143 	 * When doing #ifdef's, copy down to current line
1144 	 * if this is the first file, so that stuff makes it to output.
1145 	 */
1146 	if (diff_format == D_IFDEF && oldfile) {
1147 		long curpos = ftell(lb);
1148 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1149 		nc = f[a > b ? b : a - 1] - curpos;
1150 		for (i = 0; i < nc; i++)
1151 			diff_output("%c", getc(lb));
1152 	}
1153 	if (a > b)
1154 		return (0);
1155 	if (diff_format == D_IFDEF) {
1156 		if (inifdef) {
1157 			diff_output("#else /* %s%s */\n",
1158 			    oldfile == 1 ? "!" : "", ifdefname);
1159 		} else {
1160 			if (oldfile)
1161 				diff_output("#ifndef %s\n", ifdefname);
1162 			else
1163 				diff_output("#ifdef %s\n", ifdefname);
1164 		}
1165 		inifdef = 1 + oldfile;
1166 	}
1167 	for (i = a; i <= b; i++) {
1168 		fseek(lb, f[i - 1], SEEK_SET);
1169 		nc = f[i] - f[i - 1];
1170 		if (diff_format != D_IFDEF && ch != '\0') {
1171 			diff_output("%c", ch);
1172 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1173 			    || diff_format == D_UNIFIED))
1174 				diff_output("\t");
1175 			else if (diff_format != D_UNIFIED)
1176 				diff_output(" ");
1177 		}
1178 		col = 0;
1179 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1180 			if ((c = getc(lb)) == EOF) {
1181 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1182 				    diff_format == D_NREVERSE)
1183 					warnx("No newline at end of file");
1184 				else
1185 					diff_output("\n\\ No newline at end of "
1186 					    "file\n");
1187 				return (0);
1188 			}
1189 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1190 				do {
1191 					diff_output(" ");
1192 				} while (++col & 7);
1193 			} else {
1194 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1195 				    && lastc == '.') {
1196 					/*
1197 					 * Don't print a bare "." line
1198 					 * since that will confuse ed(1).
1199 					 * Print ".." instead and return,
1200 					 * giving the caller an offset
1201 					 * from which to restart.
1202 					 */
1203 					diff_output(".\n");
1204 					return (i - a + 1);
1205 				}
1206 				diff_output("%c", c);
1207 				col++;
1208 			}
1209 		}
1210 	}
1211 	return (0);
1212 }
1213 
1214 /*
1215  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1216  */
1217 static int
1218 readhash(FILE *f, int flags)
1219 {
1220 	int i, t, space;
1221 	int sum;
1222 
1223 	sum = 1;
1224 	space = 0;
1225 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1226 		if (flags & D_IGNORECASE)
1227 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1228 				if (t == EOF) {
1229 					if (i == 0)
1230 						return (0);
1231 					break;
1232 				}
1233 				sum = sum * 127 + chrtran[t];
1234 			}
1235 		else
1236 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1237 				if (t == EOF) {
1238 					if (i == 0)
1239 						return (0);
1240 					break;
1241 				}
1242 				sum = sum * 127 + t;
1243 			}
1244 	} else {
1245 		for (i = 0;;) {
1246 			switch (t = getc(f)) {
1247 			case '\t':
1248 			case '\r':
1249 			case '\v':
1250 			case '\f':
1251 			case ' ':
1252 				space++;
1253 				continue;
1254 			default:
1255 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1256 					i++;
1257 					space = 0;
1258 				}
1259 				sum = sum * 127 + chrtran[t];
1260 				i++;
1261 				continue;
1262 			case EOF:
1263 				if (i == 0)
1264 					return (0);
1265 				/* FALLTHROUGH */
1266 			case '\n':
1267 				break;
1268 			}
1269 			break;
1270 		}
1271 	}
1272 	/*
1273 	 * There is a remote possibility that we end up with a zero sum.
1274 	 * Zero is used as an EOF marker, so return 1 instead.
1275 	 */
1276 	return (sum == 0 ? 1 : sum);
1277 }
1278 
1279 static int
1280 asciifile(FILE *f)
1281 {
1282 	unsigned char buf[BUFSIZ];
1283 	size_t i, cnt;
1284 
1285 	if (f == NULL)
1286 		return (1);
1287 
1288 	rewind(f);
1289 	cnt = fread(buf, 1, sizeof(buf), f);
1290 	for (i = 0; i < cnt; i++)
1291 		if (!isprint(buf[i]) && !isspace(buf[i]))
1292 			return (0);
1293 	return (1);
1294 }
1295 
1296 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1297 
1298 static char *
1299 match_function(const long *f, int pos, FILE *fp)
1300 {
1301 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1302 	size_t nc;
1303 	int last = lastline;
1304 	char *state = NULL;
1305 
1306 	lastline = pos;
1307 	while (pos > last) {
1308 		fseek(fp, f[pos - 1], SEEK_SET);
1309 		nc = f[pos] - f[pos - 1];
1310 		if (nc >= sizeof(buf))
1311 			nc = sizeof(buf) - 1;
1312 		nc = fread(buf, 1, nc, fp);
1313 		if (nc > 0) {
1314 			buf[nc] = '\0';
1315 			buf[strcspn(buf, "\n")] = '\0';
1316 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1317 				if (begins_with(buf, "private:")) {
1318 					if (!state)
1319 						state = " (private)";
1320 				} else if (begins_with(buf, "protected:")) {
1321 					if (!state)
1322 						state = " (protected)";
1323 				} else if (begins_with(buf, "public:")) {
1324 					if (!state)
1325 						state = " (public)";
1326 				} else {
1327 					strlcpy(lastbuf, buf, sizeof lastbuf);
1328 					if (state)
1329 						strlcat(lastbuf, state,
1330 						    sizeof lastbuf);
1331 					lastmatchline = pos;
1332 					return lastbuf;
1333 				}
1334 			}
1335 		}
1336 		pos--;
1337 	}
1338 	return lastmatchline > 0 ? lastbuf : NULL;
1339 }
1340 
1341 /* dump accumulated "context" diff changes */
1342 static void
1343 dump_context_vec(FILE *f1, FILE *f2, int flags)
1344 {
1345 	struct context_vec *cvp = context_vec_start;
1346 	int lowa, upb, lowc, upd, do_output;
1347 	int a, b, c, d;
1348 	char ch, *f;
1349 
1350 	if (context_vec_start > context_vec_ptr)
1351 		return;
1352 
1353 	b = d = 0;		/* gcc */
1354 	lowa = MAX(1, cvp->a - diff_context);
1355 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1356 	lowc = MAX(1, cvp->c - diff_context);
1357 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1358 
1359 	diff_output("***************");
1360 	if ((flags & D_PROTOTYPE)) {
1361 		f = match_function(ixold, lowa-1, f1);
1362 		if (f != NULL)
1363 			diff_output(" %s", f);
1364 	}
1365 	diff_output("\n*** ");
1366 	range(lowa, upb, ",");
1367 	diff_output(" ****\n");
1368 
1369 	/*
1370 	 * Output changes to the "old" file.  The first loop suppresses
1371 	 * output if there were no changes to the "old" file (we'll see
1372 	 * the "old" lines as context in the "new" list).
1373 	 */
1374 	do_output = 0;
1375 	for (; cvp <= context_vec_ptr; cvp++)
1376 		if (cvp->a <= cvp->b) {
1377 			cvp = context_vec_start;
1378 			do_output++;
1379 			break;
1380 		}
1381 	if (do_output) {
1382 		while (cvp <= context_vec_ptr) {
1383 			a = cvp->a;
1384 			b = cvp->b;
1385 			c = cvp->c;
1386 			d = cvp->d;
1387 
1388 			if (a <= b && c <= d)
1389 				ch = 'c';
1390 			else
1391 				ch = (a <= b) ? 'd' : 'a';
1392 
1393 			if (ch == 'a')
1394 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1395 			else {
1396 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1397 				fetch(ixold, a, b, f1,
1398 				    ch == 'c' ? '!' : '-', 0, flags);
1399 			}
1400 			lowa = b + 1;
1401 			cvp++;
1402 		}
1403 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1404 	}
1405 	/* output changes to the "new" file */
1406 	diff_output("--- ");
1407 	range(lowc, upd, ",");
1408 	diff_output(" ----\n");
1409 
1410 	do_output = 0;
1411 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1412 		if (cvp->c <= cvp->d) {
1413 			cvp = context_vec_start;
1414 			do_output++;
1415 			break;
1416 		}
1417 	if (do_output) {
1418 		while (cvp <= context_vec_ptr) {
1419 			a = cvp->a;
1420 			b = cvp->b;
1421 			c = cvp->c;
1422 			d = cvp->d;
1423 
1424 			if (a <= b && c <= d)
1425 				ch = 'c';
1426 			else
1427 				ch = (a <= b) ? 'd' : 'a';
1428 
1429 			if (ch == 'd')
1430 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1431 			else {
1432 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1433 				fetch(ixnew, c, d, f2,
1434 				    ch == 'c' ? '!' : '+', 0, flags);
1435 			}
1436 			lowc = d + 1;
1437 			cvp++;
1438 		}
1439 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1440 	}
1441 	context_vec_ptr = context_vec_start - 1;
1442 }
1443 
1444 /* dump accumulated "unified" diff changes */
1445 static void
1446 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1447 {
1448 	struct context_vec *cvp = context_vec_start;
1449 	int lowa, upb, lowc, upd;
1450 	int a, b, c, d;
1451 	char ch, *f;
1452 
1453 	if (context_vec_start > context_vec_ptr)
1454 		return;
1455 
1456 	b = d = 0;		/* gcc */
1457 	lowa = MAX(1, cvp->a - diff_context);
1458 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1459 	lowc = MAX(1, cvp->c - diff_context);
1460 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1461 
1462 	diff_output("@@ -");
1463 	uni_range(lowa, upb);
1464 	diff_output(" +");
1465 	uni_range(lowc, upd);
1466 	diff_output(" @@");
1467 	if ((flags & D_PROTOTYPE)) {
1468 		f = match_function(ixold, lowa-1, f1);
1469 		if (f != NULL)
1470 			diff_output(" %s", f);
1471 	}
1472 	diff_output("\n");
1473 
1474 	/*
1475 	 * Output changes in "unified" diff format--the old and new lines
1476 	 * are printed together.
1477 	 */
1478 	for (; cvp <= context_vec_ptr; cvp++) {
1479 		a = cvp->a;
1480 		b = cvp->b;
1481 		c = cvp->c;
1482 		d = cvp->d;
1483 
1484 		/*
1485 		 * c: both new and old changes
1486 		 * d: only changes in the old file
1487 		 * a: only changes in the new file
1488 		 */
1489 		if (a <= b && c <= d)
1490 			ch = 'c';
1491 		else
1492 			ch = (a <= b) ? 'd' : 'a';
1493 
1494 		switch (ch) {
1495 		case 'c':
1496 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1497 			fetch(ixold, a, b, f1, '-', 0, flags);
1498 			fetch(ixnew, c, d, f2, '+', 0, flags);
1499 			break;
1500 		case 'd':
1501 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1502 			fetch(ixold, a, b, f1, '-', 0, flags);
1503 			break;
1504 		case 'a':
1505 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1506 			fetch(ixnew, c, d, f2, '+', 0, flags);
1507 			break;
1508 		}
1509 		lowa = b + 1;
1510 		lowc = d + 1;
1511 	}
1512 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1513 
1514 	context_vec_ptr = context_vec_start - 1;
1515 }
1516 
1517 static void
1518 print_header(const char *file1, const char *file2)
1519 {
1520 	if (label[0] != NULL)
1521 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1522 		    label[0]);
1523 	else
1524 		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
1525 		    file1, ctime(&stb1.st_mtime));
1526 	if (label[1] != NULL)
1527 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1528 		    label[1]);
1529 	else
1530 		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
1531 		    file2, ctime(&stb2.st_mtime));
1532 }
1533