xref: /openbsd-src/usr.bin/diff/diffreg.c (revision b39c515898423c8d899e35282f4b395f7cad3298)
1 /*	$OpenBSD: diffreg.c,v 1.79 2010/07/16 21:47:02 ray Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/param.h>
68 #include <sys/stat.h>
69 #include <sys/wait.h>
70 
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <stddef.h>
76 #include <stdio.h>
77 #include <stdlib.h>
78 #include <string.h>
79 #include <unistd.h>
80 
81 #include "diff.h"
82 #include "pathnames.h"
83 #include "xmalloc.h"
84 
85 /*
86  * diff - compare two files.
87  */
88 
89 /*
90  *	Uses an algorithm due to Harold Stone, which finds
91  *	a pair of longest identical subsequences in the two
92  *	files.
93  *
94  *	The major goal is to generate the match vector J.
95  *	J[i] is the index of the line in file1 corresponding
96  *	to line i file0. J[i] = 0 if there is no
97  *	such line in file1.
98  *
99  *	Lines are hashed so as to work in core. All potential
100  *	matches are located by sorting the lines of each file
101  *	on the hash (called ``value''). In particular, this
102  *	collects the equivalence classes in file1 together.
103  *	Subroutine equiv replaces the value of each line in
104  *	file0 by the index of the first element of its
105  *	matching equivalence in (the reordered) file1.
106  *	To save space equiv squeezes file1 into a single
107  *	array member in which the equivalence classes
108  *	are simply concatenated, except that their first
109  *	members are flagged by changing sign.
110  *
111  *	Next the indices that point into member are unsorted into
112  *	array class according to the original order of file0.
113  *
114  *	The cleverness lies in routine stone. This marches
115  *	through the lines of file0, developing a vector klist
116  *	of "k-candidates". At step i a k-candidate is a matched
117  *	pair of lines x,y (x in file0 y in file1) such that
118  *	there is a common subsequence of length k
119  *	between the first i lines of file0 and the first y
120  *	lines of file1, but there is no such subsequence for
121  *	any smaller y. x is the earliest possible mate to y
122  *	that occurs in such a subsequence.
123  *
124  *	Whenever any of the members of the equivalence class of
125  *	lines in file1 matable to a line in file0 has serial number
126  *	less than the y of some k-candidate, that k-candidate
127  *	with the smallest such y is replaced. The new
128  *	k-candidate is chained (via pred) to the current
129  *	k-1 candidate so that the actual subsequence can
130  *	be recovered. When a member has serial number greater
131  *	that the y of all k-candidates, the klist is extended.
132  *	At the end, the longest subsequence is pulled out
133  *	and placed in the array J by unravel
134  *
135  *	With J in hand, the matches there recorded are
136  *	check'ed against reality to assure that no spurious
137  *	matches have crept in due to hashing. If they have,
138  *	they are broken, and "jackpot" is recorded--a harmless
139  *	matter except that a true match for a spuriously
140  *	mated line may now be unnecessarily reported as a change.
141  *
142  *	Much of the complexity of the program comes simply
143  *	from trying to minimize core utilization and
144  *	maximize the range of doable problems by dynamically
145  *	allocating what is needed and reusing what is not.
146  *	The core requirements for problems larger than somewhat
147  *	are (in words) 2*length(file0) + length(file1) +
148  *	3*(number of k-candidates installed),  typically about
149  *	6n words for files of length n.
150  */
151 
152 struct cand {
153 	int	x;
154 	int	y;
155 	int	pred;
156 };
157 
158 struct line {
159 	int	serial;
160 	int	value;
161 } *file[2];
162 
163 /*
164  * The following struct is used to record change information when
165  * doing a "context" or "unified" diff.  (see routine "change" to
166  * understand the highly mnemonic field names)
167  */
168 struct context_vec {
169 	int	a;		/* start line in old file */
170 	int	b;		/* end line in old file */
171 	int	c;		/* start line in new file */
172 	int	d;		/* end line in new file */
173 };
174 
175 #define	diff_output	printf
176 static FILE	*opentemp(const char *);
177 static void	 output(char *, FILE *, char *, FILE *, int);
178 static void	 check(FILE *, FILE *, int);
179 static void	 range(int, int, char *);
180 static void	 uni_range(int, int);
181 static void	 dump_context_vec(FILE *, FILE *, int);
182 static void	 dump_unified_vec(FILE *, FILE *, int);
183 static void	 prepare(int, FILE *, off_t, int);
184 static void	 prune(void);
185 static void	 equiv(struct line *, int, struct line *, int, int *);
186 static void	 unravel(int);
187 static void	 unsort(struct line *, int, int *);
188 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
189 static void	 sort(struct line *, int);
190 static void	 print_header(const char *, const char *);
191 static int	 ignoreline(char *);
192 static int	 asciifile(FILE *);
193 static int	 fetch(long *, int, int, FILE *, int, int, int);
194 static int	 newcand(int, int, int);
195 static int	 search(int *, int, int);
196 static int	 skipline(FILE *);
197 static int	 isqrt(int);
198 static int	 stone(int *, int, int *, int *, int);
199 static int	 readhash(FILE *, int);
200 static int	 files_differ(FILE *, FILE *, int);
201 static char	*match_function(const long *, int, FILE *);
202 static char	*preadline(int, size_t, off_t);
203 
204 static int  *J;			/* will be overlaid on class */
205 static int  *class;		/* will be overlaid on file[0] */
206 static int  *klist;		/* will be overlaid on file[0] after class */
207 static int  *member;		/* will be overlaid on file[1] */
208 static int   clen;
209 static int   inifdef;		/* whether or not we are in a #ifdef block */
210 static int   len[2];
211 static int   pref, suff;	/* length of prefix and suffix */
212 static int   slen[2];
213 static int   anychange;
214 static long *ixnew;		/* will be overlaid on file[1] */
215 static long *ixold;		/* will be overlaid on klist */
216 static struct cand *clist;	/* merely a free storage pot for candidates */
217 static int   clistlen;		/* the length of clist */
218 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
219 static u_char *chrtran;		/* translation table for case-folding */
220 static struct context_vec *context_vec_start;
221 static struct context_vec *context_vec_end;
222 static struct context_vec *context_vec_ptr;
223 
224 #define FUNCTION_CONTEXT_SIZE	55
225 static char lastbuf[FUNCTION_CONTEXT_SIZE];
226 static int lastline;
227 static int lastmatchline;
228 
229 
230 /*
231  * chrtran points to one of 2 translation tables: cup2low if folding upper to
232  * lower case clow2low if not folding case
233  */
234 u_char clow2low[256] = {
235 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
236 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
237 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
238 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
239 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
240 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
241 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
242 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
243 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
244 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
245 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
246 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
247 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
248 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
249 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
250 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
251 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
252 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
253 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
254 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
255 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
256 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
257 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
258 	0xfd, 0xfe, 0xff
259 };
260 
261 u_char cup2low[256] = {
262 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
263 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
264 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
265 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
266 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
267 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
268 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
269 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
270 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
271 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
272 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
273 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
274 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
275 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
276 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
277 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
278 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
279 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
280 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
281 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
282 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
283 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
284 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
285 	0xfd, 0xfe, 0xff
286 };
287 
288 int
289 diffreg(char *file1, char *file2, int flags)
290 {
291 	FILE *f1, *f2;
292 	int i, rval, ostdout = -1;
293 	pid_t pid = -1;
294 
295 	f1 = f2 = NULL;
296 	rval = D_SAME;
297 	anychange = 0;
298 	lastline = 0;
299 	lastmatchline = 0;
300 	context_vec_ptr = context_vec_start - 1;
301 	if (flags & D_IGNORECASE)
302 		chrtran = cup2low;
303 	else
304 		chrtran = clow2low;
305 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
306 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
307 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
308 		goto closem;
309 
310 	if (flags & D_EMPTY1)
311 		f1 = fopen(_PATH_DEVNULL, "r");
312 	else {
313 		if (!S_ISREG(stb1.st_mode)) {
314 			if ((f1 = opentemp(file1)) == NULL ||
315 			    fstat(fileno(f1), &stb1) < 0) {
316 				warn("%s", file1);
317 				status |= 2;
318 				goto closem;
319 			}
320 		} else if (strcmp(file1, "-") == 0)
321 			f1 = stdin;
322 		else
323 			f1 = fopen(file1, "r");
324 	}
325 	if (f1 == NULL) {
326 		warn("%s", file1);
327 		status |= 2;
328 		goto closem;
329 	}
330 
331 	if (flags & D_EMPTY2)
332 		f2 = fopen(_PATH_DEVNULL, "r");
333 	else {
334 		if (!S_ISREG(stb2.st_mode)) {
335 			if ((f2 = opentemp(file2)) == NULL ||
336 			    fstat(fileno(f2), &stb2) < 0) {
337 				warn("%s", file2);
338 				status |= 2;
339 				goto closem;
340 			}
341 		} else if (strcmp(file2, "-") == 0)
342 			f2 = stdin;
343 		else
344 			f2 = fopen(file2, "r");
345 	}
346 	if (f2 == NULL) {
347 		warn("%s", file2);
348 		status |= 2;
349 		goto closem;
350 	}
351 
352 	switch (files_differ(f1, f2, flags)) {
353 	case 0:
354 		goto closem;
355 	case 1:
356 		break;
357 	default:
358 		/* error */
359 		status |= 2;
360 		goto closem;
361 	}
362 
363 	if ((flags & D_FORCEASCII) == 0 &&
364 	    (!asciifile(f1) || !asciifile(f2))) {
365 		rval = D_BINARY;
366 		status |= 1;
367 		goto closem;
368 	}
369 	if (lflag) {
370 		/* redirect stdout to pr */
371 		int pfd[2];
372 		char *header;
373 		char *prargv[] = { "pr", "-h", NULL, "-f", NULL };
374 
375 		xasprintf(&header, "%s %s %s", diffargs, file1, file2);
376 		prargv[2] = header;
377 		fflush(stdout);
378 		rewind(stdout);
379 		pipe(pfd);
380 		switch ((pid = fork())) {
381 		case -1:
382 			warnx("No more processes");
383 			status |= 2;
384 			xfree(header);
385 			rval = D_ERROR;
386 			goto closem;
387 		case 0:
388 			/* child */
389 			if (pfd[0] != STDIN_FILENO) {
390 				dup2(pfd[0], STDIN_FILENO);
391 				close(pfd[0]);
392 			}
393 			close(pfd[1]);
394 			execv(_PATH_PR, prargv);
395 			_exit(127);
396 		default:
397 			/* parent */
398 			if (pfd[1] != STDOUT_FILENO) {
399 				ostdout = dup(STDOUT_FILENO);
400 				dup2(pfd[1], STDOUT_FILENO);
401 				close(pfd[1]);
402 			}
403 			close(pfd[0]);
404 			rewind(stdout);
405 			xfree(header);
406 		}
407 	}
408 	prepare(0, f1, stb1.st_size, flags);
409 	prepare(1, f2, stb2.st_size, flags);
410 
411 	prune();
412 	sort(sfile[0], slen[0]);
413 	sort(sfile[1], slen[1]);
414 
415 	member = (int *)file[1];
416 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
417 	member = xrealloc(member, slen[1] + 2, sizeof(*member));
418 
419 	class = (int *)file[0];
420 	unsort(sfile[0], slen[0], class);
421 	class = xrealloc(class, slen[0] + 2, sizeof(*class));
422 
423 	klist = xcalloc(slen[0] + 2, sizeof(*klist));
424 	clen = 0;
425 	clistlen = 100;
426 	clist = xcalloc(clistlen, sizeof(*clist));
427 	i = stone(class, slen[0], member, klist, flags);
428 	xfree(member);
429 	xfree(class);
430 
431 	J = xrealloc(J, len[0] + 2, sizeof(*J));
432 	unravel(klist[i]);
433 	xfree(clist);
434 	xfree(klist);
435 
436 	ixold = xrealloc(ixold, len[0] + 2, sizeof(*ixold));
437 	ixnew = xrealloc(ixnew, len[1] + 2, sizeof(*ixnew));
438 	check(f1, f2, flags);
439 	output(file1, f1, file2, f2, flags);
440 	if (ostdout != -1) {
441 		int wstatus;
442 
443 		/* close the pipe to pr and restore stdout */
444 		fflush(stdout);
445 		rewind(stdout);
446 		if (ostdout != STDOUT_FILENO) {
447 			close(STDOUT_FILENO);
448 			dup2(ostdout, STDOUT_FILENO);
449 			close(ostdout);
450 		}
451 		waitpid(pid, &wstatus, 0);
452 	}
453 closem:
454 	if (anychange) {
455 		status |= 1;
456 		if (rval == D_SAME)
457 			rval = D_DIFFER;
458 	}
459 	if (f1 != NULL)
460 		fclose(f1);
461 	if (f2 != NULL)
462 		fclose(f2);
463 
464 	return (rval);
465 }
466 
467 /*
468  * Check to see if the given files differ.
469  * Returns 0 if they are the same, 1 if different, and -1 on error.
470  * XXX - could use code from cmp(1) [faster]
471  */
472 static int
473 files_differ(FILE *f1, FILE *f2, int flags)
474 {
475 	char buf1[BUFSIZ], buf2[BUFSIZ];
476 	size_t i, j;
477 
478 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
479 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
480 		return (1);
481 	for (;;) {
482 		i = fread(buf1, 1, sizeof(buf1), f1);
483 		j = fread(buf2, 1, sizeof(buf2), f2);
484 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
485 			return (-1);
486 		if (i != j)
487 			return (1);
488 		if (i == 0)
489 			return (0);
490 		if (memcmp(buf1, buf2, i) != 0)
491 			return (1);
492 	}
493 }
494 
495 static FILE *
496 opentemp(const char *file)
497 {
498 	char buf[BUFSIZ], *tempdir, tempfile[MAXPATHLEN];
499 	ssize_t nread;
500 	int ifd, ofd;
501 
502 	if (strcmp(file, "-") == 0)
503 		ifd = STDIN_FILENO;
504 	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
505 		return (NULL);
506 
507 	if ((tempdir = getenv("TMPDIR")) == NULL)
508 		tempdir = _PATH_TMP;
509 
510 	if (strlcpy(tempfile, tempdir, sizeof(tempfile)) >= sizeof(tempfile) ||
511 	    strlcat(tempfile, "/diff.XXXXXXXX", sizeof(tempfile)) >=
512 	    sizeof(tempfile)) {
513 		close(ifd);
514 		errno = ENAMETOOLONG;
515 		return (NULL);
516 	}
517 
518 	if ((ofd = mkstemp(tempfile)) < 0) {
519 		close(ifd);
520 		return (NULL);
521 	}
522 	unlink(tempfile);
523 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
524 		if (write(ofd, buf, nread) != nread) {
525 			close(ifd);
526 			close(ofd);
527 			return (NULL);
528 		}
529 	}
530 	close(ifd);
531 	lseek(ofd, (off_t)0, SEEK_SET);
532 	return (fdopen(ofd, "r"));
533 }
534 
535 char *
536 splice(char *dir, char *file)
537 {
538 	char *tail, *buf;
539 
540 	if ((tail = strrchr(file, '/')) == NULL)
541 		tail = file;
542 	else
543 		tail++;
544 	xasprintf(&buf, "%s/%s", dir, tail);
545 	return (buf);
546 }
547 
548 static void
549 prepare(int i, FILE *fd, off_t filesize, int flags)
550 {
551 	struct line *p;
552 	int j, h;
553 	size_t sz;
554 
555 	rewind(fd);
556 
557 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
558 	if (sz < 100)
559 		sz = 100;
560 
561 	p = xcalloc(sz + 3, sizeof(*p));
562 	for (j = 0; (h = readhash(fd, flags));) {
563 		if (j == sz) {
564 			sz = sz * 3 / 2;
565 			p = xrealloc(p, sz + 3, sizeof(*p));
566 		}
567 		p[++j].value = h;
568 	}
569 	len[i] = j;
570 	file[i] = p;
571 }
572 
573 static void
574 prune(void)
575 {
576 	int i, j;
577 
578 	for (pref = 0; pref < len[0] && pref < len[1] &&
579 	    file[0][pref + 1].value == file[1][pref + 1].value;
580 	    pref++)
581 		;
582 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
583 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
584 	    suff++)
585 		;
586 	for (j = 0; j < 2; j++) {
587 		sfile[j] = file[j] + pref;
588 		slen[j] = len[j] - pref - suff;
589 		for (i = 0; i <= slen[j]; i++)
590 			sfile[j][i].serial = i;
591 	}
592 }
593 
594 static void
595 equiv(struct line *a, int n, struct line *b, int m, int *c)
596 {
597 	int i, j;
598 
599 	i = j = 1;
600 	while (i <= n && j <= m) {
601 		if (a[i].value < b[j].value)
602 			a[i++].value = 0;
603 		else if (a[i].value == b[j].value)
604 			a[i++].value = j;
605 		else
606 			j++;
607 	}
608 	while (i <= n)
609 		a[i++].value = 0;
610 	b[m + 1].value = 0;
611 	j = 0;
612 	while (++j <= m) {
613 		c[j] = -b[j].serial;
614 		while (b[j + 1].value == b[j].value) {
615 			j++;
616 			c[j] = b[j].serial;
617 		}
618 	}
619 	c[j] = -1;
620 }
621 
622 /* Code taken from ping.c */
623 static int
624 isqrt(int n)
625 {
626 	int y, x = 1;
627 
628 	if (n == 0)
629 		return (0);
630 
631 	do { /* newton was a stinker */
632 		y = x;
633 		x = n / x;
634 		x += y;
635 		x /= 2;
636 	} while ((x - y) > 1 || (x - y) < -1);
637 
638 	return (x);
639 }
640 
641 static int
642 stone(int *a, int n, int *b, int *c, int flags)
643 {
644 	int i, k, y, j, l;
645 	int oldc, tc, oldl;
646 	u_int numtries;
647 
648 	/* XXX move the isqrt() out of the macro to avoid multiple calls */
649 	const u_int bound = (flags & D_MINIMAL) ? UINT_MAX :
650 	    MAX(256, isqrt(n));
651 
652 	k = 0;
653 	c[0] = newcand(0, 0, 0);
654 	for (i = 1; i <= n; i++) {
655 		j = a[i];
656 		if (j == 0)
657 			continue;
658 		y = -b[j];
659 		oldl = 0;
660 		oldc = c[0];
661 		numtries = 0;
662 		do {
663 			if (y <= clist[oldc].y)
664 				continue;
665 			l = search(c, k, y);
666 			if (l != oldl + 1)
667 				oldc = c[l - 1];
668 			if (l <= k) {
669 				if (clist[c[l]].y <= y)
670 					continue;
671 				tc = c[l];
672 				c[l] = newcand(i, y, oldc);
673 				oldc = tc;
674 				oldl = l;
675 				numtries++;
676 			} else {
677 				c[l] = newcand(i, y, oldc);
678 				k++;
679 				break;
680 			}
681 		} while ((y = b[++j]) > 0 && numtries < bound);
682 	}
683 	return (k);
684 }
685 
686 static int
687 newcand(int x, int y, int pred)
688 {
689 	struct cand *q;
690 
691 	if (clen == clistlen) {
692 		clistlen = clistlen * 11 / 10;
693 		clist = xrealloc(clist, clistlen, sizeof(*clist));
694 	}
695 	q = clist + clen;
696 	q->x = x;
697 	q->y = y;
698 	q->pred = pred;
699 	return (clen++);
700 }
701 
702 static int
703 search(int *c, int k, int y)
704 {
705 	int i, j, l, t;
706 
707 	if (clist[c[k]].y < y)	/* quick look for typical case */
708 		return (k + 1);
709 	i = 0;
710 	j = k + 1;
711 	for (;;) {
712 		l = (i + j) / 2;
713 		if (l <= i)
714 			break;
715 		t = clist[c[l]].y;
716 		if (t > y)
717 			j = l;
718 		else if (t < y)
719 			i = l;
720 		else
721 			return (l);
722 	}
723 	return (l + 1);
724 }
725 
726 static void
727 unravel(int p)
728 {
729 	struct cand *q;
730 	int i;
731 
732 	for (i = 0; i <= len[0]; i++)
733 		J[i] = i <= pref ? i :
734 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
735 	for (q = clist + p; q->y != 0; q = clist + q->pred)
736 		J[q->x + pref] = q->y + pref;
737 }
738 
739 /*
740  * Check does double duty:
741  *  1.	ferret out any fortuitous correspondences due
742  *	to confounding by hashing (which result in "jackpot")
743  *  2.  collect random access indexes to the two files
744  */
745 static void
746 check(FILE *f1, FILE *f2, int flags)
747 {
748 	int i, j, jackpot, c, d;
749 	long ctold, ctnew;
750 
751 	rewind(f1);
752 	rewind(f2);
753 	j = 1;
754 	ixold[0] = ixnew[0] = 0;
755 	jackpot = 0;
756 	ctold = ctnew = 0;
757 	for (i = 1; i <= len[0]; i++) {
758 		if (J[i] == 0) {
759 			ixold[i] = ctold += skipline(f1);
760 			continue;
761 		}
762 		while (j < J[i]) {
763 			ixnew[j] = ctnew += skipline(f2);
764 			j++;
765 		}
766 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
767 			for (;;) {
768 				c = getc(f1);
769 				d = getc(f2);
770 				/*
771 				 * GNU diff ignores a missing newline
772 				 * in one file for -b or -w.
773 				 */
774 				if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) &&
775 				    ((c == EOF && d == '\n') ||
776 				    (c == '\n' && d == EOF))) {
777 					break;
778 				}
779 				ctold++;
780 				ctnew++;
781 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
782 				    isspace(d)) {
783 					do {
784 						if (c == '\n')
785 							break;
786 						ctold++;
787 					} while (isspace(c = getc(f1)));
788 					do {
789 						if (d == '\n')
790 							break;
791 						ctnew++;
792 					} while (isspace(d = getc(f2)));
793 				} else if ((flags & D_IGNOREBLANKS)) {
794 					while (isspace(c) && c != '\n') {
795 						c = getc(f1);
796 						ctold++;
797 					}
798 					while (isspace(d) && d != '\n') {
799 						d = getc(f2);
800 						ctnew++;
801 					}
802 				}
803 				if (chrtran[c] != chrtran[d]) {
804 					jackpot++;
805 					J[i] = 0;
806 					if (c != '\n' && c != EOF)
807 						ctold += skipline(f1);
808 					if (d != '\n' && c != EOF)
809 						ctnew += skipline(f2);
810 					break;
811 				}
812 				if (c == '\n' || c == EOF)
813 					break;
814 			}
815 		} else {
816 			for (;;) {
817 				ctold++;
818 				ctnew++;
819 				if ((c = getc(f1)) != (d = getc(f2))) {
820 					/* jackpot++; */
821 					J[i] = 0;
822 					if (c != '\n' && c != EOF)
823 						ctold += skipline(f1);
824 					if (d != '\n' && c != EOF)
825 						ctnew += skipline(f2);
826 					break;
827 				}
828 				if (c == '\n' || c == EOF)
829 					break;
830 			}
831 		}
832 		ixold[i] = ctold;
833 		ixnew[j] = ctnew;
834 		j++;
835 	}
836 	for (; j <= len[1]; j++)
837 		ixnew[j] = ctnew += skipline(f2);
838 	/*
839 	 * if (jackpot)
840 	 *	fprintf(stderr, "jackpot\n");
841 	 */
842 }
843 
844 /* shellsort CACM #201 */
845 static void
846 sort(struct line *a, int n)
847 {
848 	struct line *ai, *aim, w;
849 	int j, m = 0, k;
850 
851 	if (n == 0)
852 		return;
853 	for (j = 1; j <= n; j *= 2)
854 		m = 2 * j - 1;
855 	for (m /= 2; m != 0; m /= 2) {
856 		k = n - m;
857 		for (j = 1; j <= k; j++) {
858 			for (ai = &a[j]; ai > a; ai -= m) {
859 				aim = &ai[m];
860 				if (aim < ai)
861 					break;	/* wraparound */
862 				if (aim->value > ai[0].value ||
863 				    (aim->value == ai[0].value &&
864 					aim->serial > ai[0].serial))
865 					break;
866 				w.value = ai[0].value;
867 				ai[0].value = aim->value;
868 				aim->value = w.value;
869 				w.serial = ai[0].serial;
870 				ai[0].serial = aim->serial;
871 				aim->serial = w.serial;
872 			}
873 		}
874 	}
875 }
876 
877 static void
878 unsort(struct line *f, int l, int *b)
879 {
880 	int *a, i;
881 
882 	a = xcalloc(l + 1, sizeof(*a));
883 	for (i = 1; i <= l; i++)
884 		a[f[i].serial] = f[i].value;
885 	for (i = 1; i <= l; i++)
886 		b[i] = a[i];
887 	xfree(a);
888 }
889 
890 static int
891 skipline(FILE *f)
892 {
893 	int i, c;
894 
895 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
896 		continue;
897 	return (i);
898 }
899 
900 static void
901 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
902 {
903 	int m, i0, i1, j0, j1;
904 
905 	rewind(f1);
906 	rewind(f2);
907 	m = len[0];
908 	J[0] = 0;
909 	J[m + 1] = len[1] + 1;
910 	if (diff_format != D_EDIT) {
911 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
912 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
913 				i0++;
914 			j0 = J[i0 - 1] + 1;
915 			i1 = i0 - 1;
916 			while (i1 < m && J[i1 + 1] == 0)
917 				i1++;
918 			j1 = J[i1 + 1] - 1;
919 			J[i1] = j1;
920 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
921 		}
922 	} else {
923 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
924 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
925 				i0--;
926 			j0 = J[i0 + 1] - 1;
927 			i1 = i0 + 1;
928 			while (i1 > 1 && J[i1 - 1] == 0)
929 				i1--;
930 			j1 = J[i1 - 1] + 1;
931 			J[i1] = j1;
932 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
933 		}
934 	}
935 	if (m == 0)
936 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
937 	if (diff_format == D_IFDEF) {
938 		for (;;) {
939 #define	c i0
940 			if ((c = getc(f1)) == EOF)
941 				return;
942 			diff_output("%c", c);
943 		}
944 #undef c
945 	}
946 	if (anychange != 0) {
947 		if (diff_format == D_CONTEXT)
948 			dump_context_vec(f1, f2, flags);
949 		else if (diff_format == D_UNIFIED)
950 			dump_unified_vec(f1, f2, flags);
951 	}
952 }
953 
954 static void
955 range(int a, int b, char *separator)
956 {
957 	diff_output("%d", a > b ? b : a);
958 	if (a < b)
959 		diff_output("%s%d", separator, b);
960 }
961 
962 static void
963 uni_range(int a, int b)
964 {
965 	if (a < b)
966 		diff_output("%d,%d", a, b - a + 1);
967 	else if (a == b)
968 		diff_output("%d", b);
969 	else
970 		diff_output("%d,0", b);
971 }
972 
973 static char *
974 preadline(int fd, size_t rlen, off_t off)
975 {
976 	char *line;
977 	ssize_t nr;
978 
979 	line = xmalloc(rlen + 1);
980 	if ((nr = pread(fd, line, rlen, off)) < 0)
981 		err(2, "preadline");
982 	if (nr > 0 && line[nr-1] == '\n')
983 		nr--;
984 	line[nr] = '\0';
985 	return (line);
986 }
987 
988 static int
989 ignoreline(char *line)
990 {
991 	int ret;
992 
993 	ret = regexec(&ignore_re, line, 0, NULL, 0);
994 	xfree(line);
995 	return (ret == 0);	/* if it matched, it should be ignored. */
996 }
997 
998 /*
999  * Indicate that there is a difference between lines a and b of the from file
1000  * to get to lines c to d of the to file.  If a is greater then b then there
1001  * are no lines in the from file involved and this means that there were
1002  * lines appended (beginning at b).  If c is greater than d then there are
1003  * lines missing from the to file.
1004  */
1005 static void
1006 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1007     int *pflags)
1008 {
1009 	static size_t max_context = 64;
1010 	int i;
1011 
1012 restart:
1013 	if (diff_format != D_IFDEF && a > b && c > d)
1014 		return;
1015 	if (ignore_pats != NULL) {
1016 		char *line;
1017 		/*
1018 		 * All lines in the change, insert, or delete must
1019 		 * match an ignore pattern for the change to be
1020 		 * ignored.
1021 		 */
1022 		if (a <= b) {		/* Changes and deletes. */
1023 			for (i = a; i <= b; i++) {
1024 				line = preadline(fileno(f1),
1025 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1026 				if (!ignoreline(line))
1027 					goto proceed;
1028 			}
1029 		}
1030 		if (a > b || c <= d) {	/* Changes and inserts. */
1031 			for (i = c; i <= d; i++) {
1032 				line = preadline(fileno(f2),
1033 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1034 				if (!ignoreline(line))
1035 					goto proceed;
1036 			}
1037 		}
1038 		return;
1039 	}
1040 proceed:
1041 	if (*pflags & D_HEADER) {
1042 		diff_output("%s %s %s\n", diffargs, file1, file2);
1043 		*pflags &= ~D_HEADER;
1044 	}
1045 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1046 		/*
1047 		 * Allocate change records as needed.
1048 		 */
1049 		if (context_vec_ptr == context_vec_end - 1) {
1050 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1051 			max_context <<= 1;
1052 			context_vec_start = xrealloc(context_vec_start,
1053 			    max_context, sizeof(*context_vec_start));
1054 			context_vec_end = context_vec_start + max_context;
1055 			context_vec_ptr = context_vec_start + offset;
1056 		}
1057 		if (anychange == 0) {
1058 			/*
1059 			 * Print the context/unidiff header first time through.
1060 			 */
1061 			print_header(file1, file2);
1062 			anychange = 1;
1063 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1064 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1065 			/*
1066 			 * If this change is more than 'diff_context' lines from the
1067 			 * previous change, dump the record and reset it.
1068 			 */
1069 			if (diff_format == D_CONTEXT)
1070 				dump_context_vec(f1, f2, *pflags);
1071 			else
1072 				dump_unified_vec(f1, f2, *pflags);
1073 		}
1074 		context_vec_ptr++;
1075 		context_vec_ptr->a = a;
1076 		context_vec_ptr->b = b;
1077 		context_vec_ptr->c = c;
1078 		context_vec_ptr->d = d;
1079 		return;
1080 	}
1081 	if (anychange == 0)
1082 		anychange = 1;
1083 	switch (diff_format) {
1084 	case D_BRIEF:
1085 		return;
1086 	case D_NORMAL:
1087 	case D_EDIT:
1088 		range(a, b, ",");
1089 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1090 		if (diff_format == D_NORMAL)
1091 			range(c, d, ",");
1092 		diff_output("\n");
1093 		break;
1094 	case D_REVERSE:
1095 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1096 		range(a, b, " ");
1097 		diff_output("\n");
1098 		break;
1099 	case D_NREVERSE:
1100 		if (a > b)
1101 			diff_output("a%d %d\n", b, d - c + 1);
1102 		else {
1103 			diff_output("d%d %d\n", a, b - a + 1);
1104 			if (!(c > d))
1105 				/* add changed lines */
1106 				diff_output("a%d %d\n", b, d - c + 1);
1107 		}
1108 		break;
1109 	}
1110 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1111 		fetch(ixold, a, b, f1, '<', 1, *pflags);
1112 		if (a <= b && c <= d && diff_format == D_NORMAL)
1113 			diff_output("---\n");
1114 	}
1115 	i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1116 	if (i != 0 && diff_format == D_EDIT) {
1117 		/*
1118 		 * A non-zero return value for D_EDIT indicates that the
1119 		 * last line printed was a bare dot (".") that has been
1120 		 * escaped as ".." to prevent ed(1) from misinterpreting
1121 		 * it.  We have to add a substitute command to change this
1122 		 * back and restart where we left off.
1123 		 */
1124 		diff_output(".\n");
1125 		diff_output("%ds/^\\.\\././\n", a);
1126 		a += i;
1127 		c += i;
1128 		goto restart;
1129 	}
1130 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1131 		diff_output(".\n");
1132 	if (inifdef) {
1133 		diff_output("#endif /* %s */\n", ifdefname);
1134 		inifdef = 0;
1135 	}
1136 }
1137 
1138 static int
1139 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1140 {
1141 	int i, j, c, lastc, col, nc;
1142 
1143 	/*
1144 	 * When doing #ifdef's, copy down to current line
1145 	 * if this is the first file, so that stuff makes it to output.
1146 	 */
1147 	if (diff_format == D_IFDEF && oldfile) {
1148 		long curpos = ftell(lb);
1149 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1150 		nc = f[a > b ? b : a - 1] - curpos;
1151 		for (i = 0; i < nc; i++)
1152 			diff_output("%c", getc(lb));
1153 	}
1154 	if (a > b)
1155 		return (0);
1156 	if (diff_format == D_IFDEF) {
1157 		if (inifdef) {
1158 			diff_output("#else /* %s%s */\n",
1159 			    oldfile == 1 ? "!" : "", ifdefname);
1160 		} else {
1161 			if (oldfile)
1162 				diff_output("#ifndef %s\n", ifdefname);
1163 			else
1164 				diff_output("#ifdef %s\n", ifdefname);
1165 		}
1166 		inifdef = 1 + oldfile;
1167 	}
1168 	for (i = a; i <= b; i++) {
1169 		fseek(lb, f[i - 1], SEEK_SET);
1170 		nc = f[i] - f[i - 1];
1171 		if (diff_format != D_IFDEF && ch != '\0') {
1172 			diff_output("%c", ch);
1173 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1174 			    || diff_format == D_UNIFIED))
1175 				diff_output("\t");
1176 			else if (diff_format != D_UNIFIED)
1177 				diff_output(" ");
1178 		}
1179 		col = 0;
1180 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1181 			if ((c = getc(lb)) == EOF) {
1182 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1183 				    diff_format == D_NREVERSE)
1184 					warnx("No newline at end of file");
1185 				else
1186 					diff_output("\n\\ No newline at end of "
1187 					    "file\n");
1188 				return (0);
1189 			}
1190 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1191 				do {
1192 					diff_output(" ");
1193 				} while (++col & 7);
1194 			} else {
1195 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1196 				    && lastc == '.') {
1197 					/*
1198 					 * Don't print a bare "." line
1199 					 * since that will confuse ed(1).
1200 					 * Print ".." instead and return,
1201 					 * giving the caller an offset
1202 					 * from which to restart.
1203 					 */
1204 					diff_output(".\n");
1205 					return (i - a + 1);
1206 				}
1207 				diff_output("%c", c);
1208 				col++;
1209 			}
1210 		}
1211 	}
1212 	return (0);
1213 }
1214 
1215 /*
1216  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1217  */
1218 static int
1219 readhash(FILE *f, int flags)
1220 {
1221 	int i, t, space;
1222 	int sum;
1223 
1224 	sum = 1;
1225 	space = 0;
1226 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1227 		if (flags & D_IGNORECASE)
1228 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1229 				if (t == EOF) {
1230 					if (i == 0)
1231 						return (0);
1232 					break;
1233 				}
1234 				sum = sum * 127 + chrtran[t];
1235 			}
1236 		else
1237 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1238 				if (t == EOF) {
1239 					if (i == 0)
1240 						return (0);
1241 					break;
1242 				}
1243 				sum = sum * 127 + t;
1244 			}
1245 	} else {
1246 		for (i = 0;;) {
1247 			switch (t = getc(f)) {
1248 			case '\t':
1249 			case '\r':
1250 			case '\v':
1251 			case '\f':
1252 			case ' ':
1253 				space++;
1254 				continue;
1255 			default:
1256 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1257 					i++;
1258 					space = 0;
1259 				}
1260 				sum = sum * 127 + chrtran[t];
1261 				i++;
1262 				continue;
1263 			case EOF:
1264 				if (i == 0)
1265 					return (0);
1266 				/* FALLTHROUGH */
1267 			case '\n':
1268 				break;
1269 			}
1270 			break;
1271 		}
1272 	}
1273 	/*
1274 	 * There is a remote possibility that we end up with a zero sum.
1275 	 * Zero is used as an EOF marker, so return 1 instead.
1276 	 */
1277 	return (sum == 0 ? 1 : sum);
1278 }
1279 
1280 static int
1281 asciifile(FILE *f)
1282 {
1283 	unsigned char buf[BUFSIZ];
1284 	size_t i, cnt;
1285 
1286 	if (f == NULL)
1287 		return (1);
1288 
1289 	rewind(f);
1290 	cnt = fread(buf, 1, sizeof(buf), f);
1291 	for (i = 0; i < cnt; i++)
1292 		if (!isprint(buf[i]) && !isspace(buf[i]))
1293 			return (0);
1294 	return (1);
1295 }
1296 
1297 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1298 
1299 static char *
1300 match_function(const long *f, int pos, FILE *fp)
1301 {
1302 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1303 	size_t nc;
1304 	int last = lastline;
1305 	char *state = NULL;
1306 
1307 	lastline = pos;
1308 	while (pos > last) {
1309 		fseek(fp, f[pos - 1], SEEK_SET);
1310 		nc = f[pos] - f[pos - 1];
1311 		if (nc >= sizeof(buf))
1312 			nc = sizeof(buf) - 1;
1313 		nc = fread(buf, 1, nc, fp);
1314 		if (nc > 0) {
1315 			buf[nc] = '\0';
1316 			buf[strcspn(buf, "\n")] = '\0';
1317 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1318 				if (begins_with(buf, "private:")) {
1319 					if (!state)
1320 						state = " (private)";
1321 				} else if (begins_with(buf, "protected:")) {
1322 					if (!state)
1323 						state = " (protected)";
1324 				} else if (begins_with(buf, "public:")) {
1325 					if (!state)
1326 						state = " (public)";
1327 				} else {
1328 					strlcpy(lastbuf, buf, sizeof lastbuf);
1329 					if (state)
1330 						strlcat(lastbuf, state,
1331 						    sizeof lastbuf);
1332 					lastmatchline = pos;
1333 					return lastbuf;
1334 				}
1335 			}
1336 		}
1337 		pos--;
1338 	}
1339 	return lastmatchline > 0 ? lastbuf : NULL;
1340 }
1341 
1342 /* dump accumulated "context" diff changes */
1343 static void
1344 dump_context_vec(FILE *f1, FILE *f2, int flags)
1345 {
1346 	struct context_vec *cvp = context_vec_start;
1347 	int lowa, upb, lowc, upd, do_output;
1348 	int a, b, c, d;
1349 	char ch, *f;
1350 
1351 	if (context_vec_start > context_vec_ptr)
1352 		return;
1353 
1354 	b = d = 0;		/* gcc */
1355 	lowa = MAX(1, cvp->a - diff_context);
1356 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1357 	lowc = MAX(1, cvp->c - diff_context);
1358 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1359 
1360 	diff_output("***************");
1361 	if ((flags & D_PROTOTYPE)) {
1362 		f = match_function(ixold, lowa-1, f1);
1363 		if (f != NULL)
1364 			diff_output(" %s", f);
1365 	}
1366 	diff_output("\n*** ");
1367 	range(lowa, upb, ",");
1368 	diff_output(" ****\n");
1369 
1370 	/*
1371 	 * Output changes to the "old" file.  The first loop suppresses
1372 	 * output if there were no changes to the "old" file (we'll see
1373 	 * the "old" lines as context in the "new" list).
1374 	 */
1375 	do_output = 0;
1376 	for (; cvp <= context_vec_ptr; cvp++)
1377 		if (cvp->a <= cvp->b) {
1378 			cvp = context_vec_start;
1379 			do_output++;
1380 			break;
1381 		}
1382 	if (do_output) {
1383 		while (cvp <= context_vec_ptr) {
1384 			a = cvp->a;
1385 			b = cvp->b;
1386 			c = cvp->c;
1387 			d = cvp->d;
1388 
1389 			if (a <= b && c <= d)
1390 				ch = 'c';
1391 			else
1392 				ch = (a <= b) ? 'd' : 'a';
1393 
1394 			if (ch == 'a')
1395 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1396 			else {
1397 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1398 				fetch(ixold, a, b, f1,
1399 				    ch == 'c' ? '!' : '-', 0, flags);
1400 			}
1401 			lowa = b + 1;
1402 			cvp++;
1403 		}
1404 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1405 	}
1406 	/* output changes to the "new" file */
1407 	diff_output("--- ");
1408 	range(lowc, upd, ",");
1409 	diff_output(" ----\n");
1410 
1411 	do_output = 0;
1412 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1413 		if (cvp->c <= cvp->d) {
1414 			cvp = context_vec_start;
1415 			do_output++;
1416 			break;
1417 		}
1418 	if (do_output) {
1419 		while (cvp <= context_vec_ptr) {
1420 			a = cvp->a;
1421 			b = cvp->b;
1422 			c = cvp->c;
1423 			d = cvp->d;
1424 
1425 			if (a <= b && c <= d)
1426 				ch = 'c';
1427 			else
1428 				ch = (a <= b) ? 'd' : 'a';
1429 
1430 			if (ch == 'd')
1431 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1432 			else {
1433 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1434 				fetch(ixnew, c, d, f2,
1435 				    ch == 'c' ? '!' : '+', 0, flags);
1436 			}
1437 			lowc = d + 1;
1438 			cvp++;
1439 		}
1440 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1441 	}
1442 	context_vec_ptr = context_vec_start - 1;
1443 }
1444 
1445 /* dump accumulated "unified" diff changes */
1446 static void
1447 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1448 {
1449 	struct context_vec *cvp = context_vec_start;
1450 	int lowa, upb, lowc, upd;
1451 	int a, b, c, d;
1452 	char ch, *f;
1453 
1454 	if (context_vec_start > context_vec_ptr)
1455 		return;
1456 
1457 	b = d = 0;		/* gcc */
1458 	lowa = MAX(1, cvp->a - diff_context);
1459 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1460 	lowc = MAX(1, cvp->c - diff_context);
1461 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1462 
1463 	diff_output("@@ -");
1464 	uni_range(lowa, upb);
1465 	diff_output(" +");
1466 	uni_range(lowc, upd);
1467 	diff_output(" @@");
1468 	if ((flags & D_PROTOTYPE)) {
1469 		f = match_function(ixold, lowa-1, f1);
1470 		if (f != NULL)
1471 			diff_output(" %s", f);
1472 	}
1473 	diff_output("\n");
1474 
1475 	/*
1476 	 * Output changes in "unified" diff format--the old and new lines
1477 	 * are printed together.
1478 	 */
1479 	for (; cvp <= context_vec_ptr; cvp++) {
1480 		a = cvp->a;
1481 		b = cvp->b;
1482 		c = cvp->c;
1483 		d = cvp->d;
1484 
1485 		/*
1486 		 * c: both new and old changes
1487 		 * d: only changes in the old file
1488 		 * a: only changes in the new file
1489 		 */
1490 		if (a <= b && c <= d)
1491 			ch = 'c';
1492 		else
1493 			ch = (a <= b) ? 'd' : 'a';
1494 
1495 		switch (ch) {
1496 		case 'c':
1497 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1498 			fetch(ixold, a, b, f1, '-', 0, flags);
1499 			fetch(ixnew, c, d, f2, '+', 0, flags);
1500 			break;
1501 		case 'd':
1502 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1503 			fetch(ixold, a, b, f1, '-', 0, flags);
1504 			break;
1505 		case 'a':
1506 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1507 			fetch(ixnew, c, d, f2, '+', 0, flags);
1508 			break;
1509 		}
1510 		lowa = b + 1;
1511 		lowc = d + 1;
1512 	}
1513 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1514 
1515 	context_vec_ptr = context_vec_start - 1;
1516 }
1517 
1518 static void
1519 print_header(const char *file1, const char *file2)
1520 {
1521 	if (label[0] != NULL)
1522 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1523 		    label[0]);
1524 	else
1525 		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
1526 		    file1, ctime(&stb1.st_mtime));
1527 	if (label[1] != NULL)
1528 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1529 		    label[1]);
1530 	else
1531 		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
1532 		    file2, ctime(&stb2.st_mtime));
1533 }
1534