xref: /openbsd-src/usr.bin/diff/diffreg.c (revision d13be5d47e4149db2549a9828e244d59dbc43f15)
1 /*	$OpenBSD: diffreg.c,v 1.80 2011/04/01 17:25:26 nicm Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/param.h>
68 #include <sys/stat.h>
69 #include <sys/wait.h>
70 
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <stddef.h>
76 #include <stdio.h>
77 #include <stdlib.h>
78 #include <string.h>
79 #include <unistd.h>
80 
81 #include "diff.h"
82 #include "pathnames.h"
83 #include "xmalloc.h"
84 
85 /*
86  * diff - compare two files.
87  */
88 
89 /*
90  *	Uses an algorithm due to Harold Stone, which finds
91  *	a pair of longest identical subsequences in the two
92  *	files.
93  *
94  *	The major goal is to generate the match vector J.
95  *	J[i] is the index of the line in file1 corresponding
96  *	to line i file0. J[i] = 0 if there is no
97  *	such line in file1.
98  *
99  *	Lines are hashed so as to work in core. All potential
100  *	matches are located by sorting the lines of each file
101  *	on the hash (called ``value''). In particular, this
102  *	collects the equivalence classes in file1 together.
103  *	Subroutine equiv replaces the value of each line in
104  *	file0 by the index of the first element of its
105  *	matching equivalence in (the reordered) file1.
106  *	To save space equiv squeezes file1 into a single
107  *	array member in which the equivalence classes
108  *	are simply concatenated, except that their first
109  *	members are flagged by changing sign.
110  *
111  *	Next the indices that point into member are unsorted into
112  *	array class according to the original order of file0.
113  *
114  *	The cleverness lies in routine stone. This marches
115  *	through the lines of file0, developing a vector klist
116  *	of "k-candidates". At step i a k-candidate is a matched
117  *	pair of lines x,y (x in file0 y in file1) such that
118  *	there is a common subsequence of length k
119  *	between the first i lines of file0 and the first y
120  *	lines of file1, but there is no such subsequence for
121  *	any smaller y. x is the earliest possible mate to y
122  *	that occurs in such a subsequence.
123  *
124  *	Whenever any of the members of the equivalence class of
125  *	lines in file1 matable to a line in file0 has serial number
126  *	less than the y of some k-candidate, that k-candidate
127  *	with the smallest such y is replaced. The new
128  *	k-candidate is chained (via pred) to the current
129  *	k-1 candidate so that the actual subsequence can
130  *	be recovered. When a member has serial number greater
131  *	that the y of all k-candidates, the klist is extended.
132  *	At the end, the longest subsequence is pulled out
133  *	and placed in the array J by unravel
134  *
135  *	With J in hand, the matches there recorded are
136  *	check'ed against reality to assure that no spurious
137  *	matches have crept in due to hashing. If they have,
138  *	they are broken, and "jackpot" is recorded--a harmless
139  *	matter except that a true match for a spuriously
140  *	mated line may now be unnecessarily reported as a change.
141  *
142  *	Much of the complexity of the program comes simply
143  *	from trying to minimize core utilization and
144  *	maximize the range of doable problems by dynamically
145  *	allocating what is needed and reusing what is not.
146  *	The core requirements for problems larger than somewhat
147  *	are (in words) 2*length(file0) + length(file1) +
148  *	3*(number of k-candidates installed),  typically about
149  *	6n words for files of length n.
150  */
151 
152 struct cand {
153 	int	x;
154 	int	y;
155 	int	pred;
156 };
157 
158 struct line {
159 	int	serial;
160 	int	value;
161 } *file[2];
162 
163 /*
164  * The following struct is used to record change information when
165  * doing a "context" or "unified" diff.  (see routine "change" to
166  * understand the highly mnemonic field names)
167  */
168 struct context_vec {
169 	int	a;		/* start line in old file */
170 	int	b;		/* end line in old file */
171 	int	c;		/* start line in new file */
172 	int	d;		/* end line in new file */
173 };
174 
175 #define	diff_output	printf
176 static FILE	*opentemp(const char *);
177 static void	 output(char *, FILE *, char *, FILE *, int);
178 static void	 check(FILE *, FILE *, int);
179 static void	 range(int, int, char *);
180 static void	 uni_range(int, int);
181 static void	 dump_context_vec(FILE *, FILE *, int);
182 static void	 dump_unified_vec(FILE *, FILE *, int);
183 static void	 prepare(int, FILE *, off_t, int);
184 static void	 prune(void);
185 static void	 equiv(struct line *, int, struct line *, int, int *);
186 static void	 unravel(int);
187 static void	 unsort(struct line *, int, int *);
188 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
189 static void	 sort(struct line *, int);
190 static void	 print_header(const char *, const char *);
191 static int	 ignoreline(char *);
192 static int	 asciifile(FILE *);
193 static int	 fetch(long *, int, int, FILE *, int, int, int);
194 static int	 newcand(int, int, int);
195 static int	 search(int *, int, int);
196 static int	 skipline(FILE *);
197 static int	 isqrt(int);
198 static int	 stone(int *, int, int *, int *, int);
199 static int	 readhash(FILE *, int);
200 static int	 files_differ(FILE *, FILE *, int);
201 static char	*match_function(const long *, int, FILE *);
202 static char	*preadline(int, size_t, off_t);
203 
204 static int  *J;			/* will be overlaid on class */
205 static int  *class;		/* will be overlaid on file[0] */
206 static int  *klist;		/* will be overlaid on file[0] after class */
207 static int  *member;		/* will be overlaid on file[1] */
208 static int   clen;
209 static int   inifdef;		/* whether or not we are in a #ifdef block */
210 static int   len[2];
211 static int   pref, suff;	/* length of prefix and suffix */
212 static int   slen[2];
213 static int   anychange;
214 static long *ixnew;		/* will be overlaid on file[1] */
215 static long *ixold;		/* will be overlaid on klist */
216 static struct cand *clist;	/* merely a free storage pot for candidates */
217 static int   clistlen;		/* the length of clist */
218 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
219 static u_char *chrtran;		/* translation table for case-folding */
220 static struct context_vec *context_vec_start;
221 static struct context_vec *context_vec_end;
222 static struct context_vec *context_vec_ptr;
223 
224 #define FUNCTION_CONTEXT_SIZE	55
225 static char lastbuf[FUNCTION_CONTEXT_SIZE];
226 static int lastline;
227 static int lastmatchline;
228 
229 
230 /*
231  * chrtran points to one of 2 translation tables: cup2low if folding upper to
232  * lower case clow2low if not folding case
233  */
234 u_char clow2low[256] = {
235 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
236 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
237 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
238 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
239 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
240 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
241 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
242 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
243 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
244 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
245 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
246 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
247 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
248 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
249 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
250 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
251 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
252 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
253 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
254 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
255 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
256 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
257 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
258 	0xfd, 0xfe, 0xff
259 };
260 
261 u_char cup2low[256] = {
262 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
263 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
264 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
265 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
266 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
267 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
268 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
269 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
270 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
271 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
272 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
273 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
274 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
275 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
276 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
277 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
278 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
279 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
280 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
281 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
282 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
283 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
284 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
285 	0xfd, 0xfe, 0xff
286 };
287 
288 int
289 diffreg(char *file1, char *file2, int flags)
290 {
291 	FILE *f1, *f2;
292 	int i, rval, ostdout = -1;
293 	pid_t pid = -1;
294 
295 	f1 = f2 = NULL;
296 	rval = D_SAME;
297 	anychange = 0;
298 	lastline = 0;
299 	lastmatchline = 0;
300 	context_vec_ptr = context_vec_start - 1;
301 	if (flags & D_IGNORECASE)
302 		chrtran = cup2low;
303 	else
304 		chrtran = clow2low;
305 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
306 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
307 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
308 		goto closem;
309 
310 	if (flags & D_EMPTY1)
311 		f1 = fopen(_PATH_DEVNULL, "r");
312 	else {
313 		if (!S_ISREG(stb1.st_mode)) {
314 			if ((f1 = opentemp(file1)) == NULL ||
315 			    fstat(fileno(f1), &stb1) < 0) {
316 				warn("%s", file1);
317 				status |= 2;
318 				goto closem;
319 			}
320 		} else if (strcmp(file1, "-") == 0)
321 			f1 = stdin;
322 		else
323 			f1 = fopen(file1, "r");
324 	}
325 	if (f1 == NULL) {
326 		warn("%s", file1);
327 		status |= 2;
328 		goto closem;
329 	}
330 
331 	if (flags & D_EMPTY2)
332 		f2 = fopen(_PATH_DEVNULL, "r");
333 	else {
334 		if (!S_ISREG(stb2.st_mode)) {
335 			if ((f2 = opentemp(file2)) == NULL ||
336 			    fstat(fileno(f2), &stb2) < 0) {
337 				warn("%s", file2);
338 				status |= 2;
339 				goto closem;
340 			}
341 		} else if (strcmp(file2, "-") == 0)
342 			f2 = stdin;
343 		else
344 			f2 = fopen(file2, "r");
345 	}
346 	if (f2 == NULL) {
347 		warn("%s", file2);
348 		status |= 2;
349 		goto closem;
350 	}
351 
352 	switch (files_differ(f1, f2, flags)) {
353 	case 0:
354 		goto closem;
355 	case 1:
356 		break;
357 	default:
358 		/* error */
359 		status |= 2;
360 		goto closem;
361 	}
362 
363 	if ((flags & D_FORCEASCII) == 0 &&
364 	    (!asciifile(f1) || !asciifile(f2))) {
365 		rval = D_BINARY;
366 		status |= 1;
367 		goto closem;
368 	}
369 	if (lflag) {
370 		/* redirect stdout to pr */
371 		int pfd[2];
372 		char *header;
373 		char *prargv[] = { "pr", "-h", NULL, "-f", NULL };
374 
375 		xasprintf(&header, "%s %s %s", diffargs, file1, file2);
376 		prargv[2] = header;
377 		fflush(stdout);
378 		rewind(stdout);
379 		pipe(pfd);
380 		switch ((pid = fork())) {
381 		case -1:
382 			warnx("No more processes");
383 			status |= 2;
384 			xfree(header);
385 			rval = D_ERROR;
386 			goto closem;
387 		case 0:
388 			/* child */
389 			if (pfd[0] != STDIN_FILENO) {
390 				dup2(pfd[0], STDIN_FILENO);
391 				close(pfd[0]);
392 			}
393 			close(pfd[1]);
394 			execv(_PATH_PR, prargv);
395 			_exit(127);
396 		default:
397 			/* parent */
398 			if (pfd[1] != STDOUT_FILENO) {
399 				ostdout = dup(STDOUT_FILENO);
400 				dup2(pfd[1], STDOUT_FILENO);
401 				close(pfd[1]);
402 			}
403 			close(pfd[0]);
404 			rewind(stdout);
405 			xfree(header);
406 		}
407 	}
408 	prepare(0, f1, stb1.st_size, flags);
409 	prepare(1, f2, stb2.st_size, flags);
410 
411 	prune();
412 	sort(sfile[0], slen[0]);
413 	sort(sfile[1], slen[1]);
414 
415 	member = (int *)file[1];
416 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
417 	member = xrealloc(member, slen[1] + 2, sizeof(*member));
418 
419 	class = (int *)file[0];
420 	unsort(sfile[0], slen[0], class);
421 	class = xrealloc(class, slen[0] + 2, sizeof(*class));
422 
423 	klist = xcalloc(slen[0] + 2, sizeof(*klist));
424 	clen = 0;
425 	clistlen = 100;
426 	clist = xcalloc(clistlen, sizeof(*clist));
427 	i = stone(class, slen[0], member, klist, flags);
428 	xfree(member);
429 	xfree(class);
430 
431 	J = xrealloc(J, len[0] + 2, sizeof(*J));
432 	unravel(klist[i]);
433 	xfree(clist);
434 	xfree(klist);
435 
436 	ixold = xrealloc(ixold, len[0] + 2, sizeof(*ixold));
437 	ixnew = xrealloc(ixnew, len[1] + 2, sizeof(*ixnew));
438 	check(f1, f2, flags);
439 	output(file1, f1, file2, f2, flags);
440 	if (ostdout != -1) {
441 		int wstatus;
442 
443 		/* close the pipe to pr and restore stdout */
444 		fflush(stdout);
445 		rewind(stdout);
446 		if (ostdout != STDOUT_FILENO) {
447 			close(STDOUT_FILENO);
448 			dup2(ostdout, STDOUT_FILENO);
449 			close(ostdout);
450 		}
451 		waitpid(pid, &wstatus, 0);
452 	}
453 closem:
454 	if (anychange) {
455 		status |= 1;
456 		if (rval == D_SAME)
457 			rval = D_DIFFER;
458 	}
459 	if (f1 != NULL)
460 		fclose(f1);
461 	if (f2 != NULL)
462 		fclose(f2);
463 
464 	return (rval);
465 }
466 
467 /*
468  * Check to see if the given files differ.
469  * Returns 0 if they are the same, 1 if different, and -1 on error.
470  * XXX - could use code from cmp(1) [faster]
471  */
472 static int
473 files_differ(FILE *f1, FILE *f2, int flags)
474 {
475 	char buf1[BUFSIZ], buf2[BUFSIZ];
476 	size_t i, j;
477 
478 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
479 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
480 		return (1);
481 	for (;;) {
482 		i = fread(buf1, 1, sizeof(buf1), f1);
483 		j = fread(buf2, 1, sizeof(buf2), f2);
484 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
485 			return (-1);
486 		if (i != j)
487 			return (1);
488 		if (i == 0)
489 			return (0);
490 		if (memcmp(buf1, buf2, i) != 0)
491 			return (1);
492 	}
493 }
494 
495 static FILE *
496 opentemp(const char *file)
497 {
498 	char buf[BUFSIZ], *tempdir, tempfile[MAXPATHLEN];
499 	ssize_t nread;
500 	int ifd, ofd;
501 
502 	if (strcmp(file, "-") == 0)
503 		ifd = STDIN_FILENO;
504 	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
505 		return (NULL);
506 
507 	if ((tempdir = getenv("TMPDIR")) == NULL)
508 		tempdir = _PATH_TMP;
509 
510 	if (strlcpy(tempfile, tempdir, sizeof(tempfile)) >= sizeof(tempfile) ||
511 	    strlcat(tempfile, "/diff.XXXXXXXX", sizeof(tempfile)) >=
512 	    sizeof(tempfile)) {
513 		close(ifd);
514 		errno = ENAMETOOLONG;
515 		return (NULL);
516 	}
517 
518 	if ((ofd = mkstemp(tempfile)) < 0) {
519 		close(ifd);
520 		return (NULL);
521 	}
522 	unlink(tempfile);
523 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
524 		if (write(ofd, buf, nread) != nread) {
525 			close(ifd);
526 			close(ofd);
527 			return (NULL);
528 		}
529 	}
530 	close(ifd);
531 	lseek(ofd, (off_t)0, SEEK_SET);
532 	return (fdopen(ofd, "r"));
533 }
534 
535 char *
536 splice(char *dir, char *file)
537 {
538 	char *tail, *buf;
539 
540 	if ((tail = strrchr(file, '/')) == NULL)
541 		tail = file;
542 	else
543 		tail++;
544 	xasprintf(&buf, "%s/%s", dir, tail);
545 	return (buf);
546 }
547 
548 static void
549 prepare(int i, FILE *fd, off_t filesize, int flags)
550 {
551 	struct line *p;
552 	int j, h;
553 	size_t sz;
554 
555 	rewind(fd);
556 
557 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
558 	if (sz < 100)
559 		sz = 100;
560 
561 	p = xcalloc(sz + 3, sizeof(*p));
562 	for (j = 0; (h = readhash(fd, flags));) {
563 		if (j == sz) {
564 			sz = sz * 3 / 2;
565 			p = xrealloc(p, sz + 3, sizeof(*p));
566 		}
567 		p[++j].value = h;
568 	}
569 	len[i] = j;
570 	file[i] = p;
571 }
572 
573 static void
574 prune(void)
575 {
576 	int i, j;
577 
578 	for (pref = 0; pref < len[0] && pref < len[1] &&
579 	    file[0][pref + 1].value == file[1][pref + 1].value;
580 	    pref++)
581 		;
582 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
583 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
584 	    suff++)
585 		;
586 	for (j = 0; j < 2; j++) {
587 		sfile[j] = file[j] + pref;
588 		slen[j] = len[j] - pref - suff;
589 		for (i = 0; i <= slen[j]; i++)
590 			sfile[j][i].serial = i;
591 	}
592 }
593 
594 static void
595 equiv(struct line *a, int n, struct line *b, int m, int *c)
596 {
597 	int i, j;
598 
599 	i = j = 1;
600 	while (i <= n && j <= m) {
601 		if (a[i].value < b[j].value)
602 			a[i++].value = 0;
603 		else if (a[i].value == b[j].value)
604 			a[i++].value = j;
605 		else
606 			j++;
607 	}
608 	while (i <= n)
609 		a[i++].value = 0;
610 	b[m + 1].value = 0;
611 	j = 0;
612 	while (++j <= m) {
613 		c[j] = -b[j].serial;
614 		while (b[j + 1].value == b[j].value) {
615 			j++;
616 			c[j] = b[j].serial;
617 		}
618 	}
619 	c[j] = -1;
620 }
621 
622 /* Code taken from ping.c */
623 static int
624 isqrt(int n)
625 {
626 	int y, x = 1;
627 
628 	if (n == 0)
629 		return (0);
630 
631 	do { /* newton was a stinker */
632 		y = x;
633 		x = n / x;
634 		x += y;
635 		x /= 2;
636 	} while ((x - y) > 1 || (x - y) < -1);
637 
638 	return (x);
639 }
640 
641 static int
642 stone(int *a, int n, int *b, int *c, int flags)
643 {
644 	int i, k, y, j, l;
645 	int oldc, tc, oldl, sq;
646 	u_int numtries, bound;
647 
648 	if (flags & D_MINIMAL)
649 		bound = UINT_MAX;
650 	else {
651 		sq = isqrt(n);
652 		bound = MAX(256, sq);
653 	}
654 
655 	k = 0;
656 	c[0] = newcand(0, 0, 0);
657 	for (i = 1; i <= n; i++) {
658 		j = a[i];
659 		if (j == 0)
660 			continue;
661 		y = -b[j];
662 		oldl = 0;
663 		oldc = c[0];
664 		numtries = 0;
665 		do {
666 			if (y <= clist[oldc].y)
667 				continue;
668 			l = search(c, k, y);
669 			if (l != oldl + 1)
670 				oldc = c[l - 1];
671 			if (l <= k) {
672 				if (clist[c[l]].y <= y)
673 					continue;
674 				tc = c[l];
675 				c[l] = newcand(i, y, oldc);
676 				oldc = tc;
677 				oldl = l;
678 				numtries++;
679 			} else {
680 				c[l] = newcand(i, y, oldc);
681 				k++;
682 				break;
683 			}
684 		} while ((y = b[++j]) > 0 && numtries < bound);
685 	}
686 	return (k);
687 }
688 
689 static int
690 newcand(int x, int y, int pred)
691 {
692 	struct cand *q;
693 
694 	if (clen == clistlen) {
695 		clistlen = clistlen * 11 / 10;
696 		clist = xrealloc(clist, clistlen, sizeof(*clist));
697 	}
698 	q = clist + clen;
699 	q->x = x;
700 	q->y = y;
701 	q->pred = pred;
702 	return (clen++);
703 }
704 
705 static int
706 search(int *c, int k, int y)
707 {
708 	int i, j, l, t;
709 
710 	if (clist[c[k]].y < y)	/* quick look for typical case */
711 		return (k + 1);
712 	i = 0;
713 	j = k + 1;
714 	for (;;) {
715 		l = (i + j) / 2;
716 		if (l <= i)
717 			break;
718 		t = clist[c[l]].y;
719 		if (t > y)
720 			j = l;
721 		else if (t < y)
722 			i = l;
723 		else
724 			return (l);
725 	}
726 	return (l + 1);
727 }
728 
729 static void
730 unravel(int p)
731 {
732 	struct cand *q;
733 	int i;
734 
735 	for (i = 0; i <= len[0]; i++)
736 		J[i] = i <= pref ? i :
737 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
738 	for (q = clist + p; q->y != 0; q = clist + q->pred)
739 		J[q->x + pref] = q->y + pref;
740 }
741 
742 /*
743  * Check does double duty:
744  *  1.	ferret out any fortuitous correspondences due
745  *	to confounding by hashing (which result in "jackpot")
746  *  2.  collect random access indexes to the two files
747  */
748 static void
749 check(FILE *f1, FILE *f2, int flags)
750 {
751 	int i, j, jackpot, c, d;
752 	long ctold, ctnew;
753 
754 	rewind(f1);
755 	rewind(f2);
756 	j = 1;
757 	ixold[0] = ixnew[0] = 0;
758 	jackpot = 0;
759 	ctold = ctnew = 0;
760 	for (i = 1; i <= len[0]; i++) {
761 		if (J[i] == 0) {
762 			ixold[i] = ctold += skipline(f1);
763 			continue;
764 		}
765 		while (j < J[i]) {
766 			ixnew[j] = ctnew += skipline(f2);
767 			j++;
768 		}
769 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
770 			for (;;) {
771 				c = getc(f1);
772 				d = getc(f2);
773 				/*
774 				 * GNU diff ignores a missing newline
775 				 * in one file for -b or -w.
776 				 */
777 				if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) &&
778 				    ((c == EOF && d == '\n') ||
779 				    (c == '\n' && d == EOF))) {
780 					break;
781 				}
782 				ctold++;
783 				ctnew++;
784 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
785 				    isspace(d)) {
786 					do {
787 						if (c == '\n')
788 							break;
789 						ctold++;
790 					} while (isspace(c = getc(f1)));
791 					do {
792 						if (d == '\n')
793 							break;
794 						ctnew++;
795 					} while (isspace(d = getc(f2)));
796 				} else if ((flags & D_IGNOREBLANKS)) {
797 					while (isspace(c) && c != '\n') {
798 						c = getc(f1);
799 						ctold++;
800 					}
801 					while (isspace(d) && d != '\n') {
802 						d = getc(f2);
803 						ctnew++;
804 					}
805 				}
806 				if (chrtran[c] != chrtran[d]) {
807 					jackpot++;
808 					J[i] = 0;
809 					if (c != '\n' && c != EOF)
810 						ctold += skipline(f1);
811 					if (d != '\n' && c != EOF)
812 						ctnew += skipline(f2);
813 					break;
814 				}
815 				if (c == '\n' || c == EOF)
816 					break;
817 			}
818 		} else {
819 			for (;;) {
820 				ctold++;
821 				ctnew++;
822 				if ((c = getc(f1)) != (d = getc(f2))) {
823 					/* jackpot++; */
824 					J[i] = 0;
825 					if (c != '\n' && c != EOF)
826 						ctold += skipline(f1);
827 					if (d != '\n' && c != EOF)
828 						ctnew += skipline(f2);
829 					break;
830 				}
831 				if (c == '\n' || c == EOF)
832 					break;
833 			}
834 		}
835 		ixold[i] = ctold;
836 		ixnew[j] = ctnew;
837 		j++;
838 	}
839 	for (; j <= len[1]; j++)
840 		ixnew[j] = ctnew += skipline(f2);
841 	/*
842 	 * if (jackpot)
843 	 *	fprintf(stderr, "jackpot\n");
844 	 */
845 }
846 
847 /* shellsort CACM #201 */
848 static void
849 sort(struct line *a, int n)
850 {
851 	struct line *ai, *aim, w;
852 	int j, m = 0, k;
853 
854 	if (n == 0)
855 		return;
856 	for (j = 1; j <= n; j *= 2)
857 		m = 2 * j - 1;
858 	for (m /= 2; m != 0; m /= 2) {
859 		k = n - m;
860 		for (j = 1; j <= k; j++) {
861 			for (ai = &a[j]; ai > a; ai -= m) {
862 				aim = &ai[m];
863 				if (aim < ai)
864 					break;	/* wraparound */
865 				if (aim->value > ai[0].value ||
866 				    (aim->value == ai[0].value &&
867 					aim->serial > ai[0].serial))
868 					break;
869 				w.value = ai[0].value;
870 				ai[0].value = aim->value;
871 				aim->value = w.value;
872 				w.serial = ai[0].serial;
873 				ai[0].serial = aim->serial;
874 				aim->serial = w.serial;
875 			}
876 		}
877 	}
878 }
879 
880 static void
881 unsort(struct line *f, int l, int *b)
882 {
883 	int *a, i;
884 
885 	a = xcalloc(l + 1, sizeof(*a));
886 	for (i = 1; i <= l; i++)
887 		a[f[i].serial] = f[i].value;
888 	for (i = 1; i <= l; i++)
889 		b[i] = a[i];
890 	xfree(a);
891 }
892 
893 static int
894 skipline(FILE *f)
895 {
896 	int i, c;
897 
898 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
899 		continue;
900 	return (i);
901 }
902 
903 static void
904 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
905 {
906 	int m, i0, i1, j0, j1;
907 
908 	rewind(f1);
909 	rewind(f2);
910 	m = len[0];
911 	J[0] = 0;
912 	J[m + 1] = len[1] + 1;
913 	if (diff_format != D_EDIT) {
914 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
915 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
916 				i0++;
917 			j0 = J[i0 - 1] + 1;
918 			i1 = i0 - 1;
919 			while (i1 < m && J[i1 + 1] == 0)
920 				i1++;
921 			j1 = J[i1 + 1] - 1;
922 			J[i1] = j1;
923 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
924 		}
925 	} else {
926 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
927 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
928 				i0--;
929 			j0 = J[i0 + 1] - 1;
930 			i1 = i0 + 1;
931 			while (i1 > 1 && J[i1 - 1] == 0)
932 				i1--;
933 			j1 = J[i1 - 1] + 1;
934 			J[i1] = j1;
935 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
936 		}
937 	}
938 	if (m == 0)
939 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
940 	if (diff_format == D_IFDEF) {
941 		for (;;) {
942 #define	c i0
943 			if ((c = getc(f1)) == EOF)
944 				return;
945 			diff_output("%c", c);
946 		}
947 #undef c
948 	}
949 	if (anychange != 0) {
950 		if (diff_format == D_CONTEXT)
951 			dump_context_vec(f1, f2, flags);
952 		else if (diff_format == D_UNIFIED)
953 			dump_unified_vec(f1, f2, flags);
954 	}
955 }
956 
957 static void
958 range(int a, int b, char *separator)
959 {
960 	diff_output("%d", a > b ? b : a);
961 	if (a < b)
962 		diff_output("%s%d", separator, b);
963 }
964 
965 static void
966 uni_range(int a, int b)
967 {
968 	if (a < b)
969 		diff_output("%d,%d", a, b - a + 1);
970 	else if (a == b)
971 		diff_output("%d", b);
972 	else
973 		diff_output("%d,0", b);
974 }
975 
976 static char *
977 preadline(int fd, size_t rlen, off_t off)
978 {
979 	char *line;
980 	ssize_t nr;
981 
982 	line = xmalloc(rlen + 1);
983 	if ((nr = pread(fd, line, rlen, off)) < 0)
984 		err(2, "preadline");
985 	if (nr > 0 && line[nr-1] == '\n')
986 		nr--;
987 	line[nr] = '\0';
988 	return (line);
989 }
990 
991 static int
992 ignoreline(char *line)
993 {
994 	int ret;
995 
996 	ret = regexec(&ignore_re, line, 0, NULL, 0);
997 	xfree(line);
998 	return (ret == 0);	/* if it matched, it should be ignored. */
999 }
1000 
1001 /*
1002  * Indicate that there is a difference between lines a and b of the from file
1003  * to get to lines c to d of the to file.  If a is greater then b then there
1004  * are no lines in the from file involved and this means that there were
1005  * lines appended (beginning at b).  If c is greater than d then there are
1006  * lines missing from the to file.
1007  */
1008 static void
1009 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1010     int *pflags)
1011 {
1012 	static size_t max_context = 64;
1013 	int i;
1014 
1015 restart:
1016 	if (diff_format != D_IFDEF && a > b && c > d)
1017 		return;
1018 	if (ignore_pats != NULL) {
1019 		char *line;
1020 		/*
1021 		 * All lines in the change, insert, or delete must
1022 		 * match an ignore pattern for the change to be
1023 		 * ignored.
1024 		 */
1025 		if (a <= b) {		/* Changes and deletes. */
1026 			for (i = a; i <= b; i++) {
1027 				line = preadline(fileno(f1),
1028 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1029 				if (!ignoreline(line))
1030 					goto proceed;
1031 			}
1032 		}
1033 		if (a > b || c <= d) {	/* Changes and inserts. */
1034 			for (i = c; i <= d; i++) {
1035 				line = preadline(fileno(f2),
1036 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1037 				if (!ignoreline(line))
1038 					goto proceed;
1039 			}
1040 		}
1041 		return;
1042 	}
1043 proceed:
1044 	if (*pflags & D_HEADER) {
1045 		diff_output("%s %s %s\n", diffargs, file1, file2);
1046 		*pflags &= ~D_HEADER;
1047 	}
1048 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1049 		/*
1050 		 * Allocate change records as needed.
1051 		 */
1052 		if (context_vec_ptr == context_vec_end - 1) {
1053 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1054 			max_context <<= 1;
1055 			context_vec_start = xrealloc(context_vec_start,
1056 			    max_context, sizeof(*context_vec_start));
1057 			context_vec_end = context_vec_start + max_context;
1058 			context_vec_ptr = context_vec_start + offset;
1059 		}
1060 		if (anychange == 0) {
1061 			/*
1062 			 * Print the context/unidiff header first time through.
1063 			 */
1064 			print_header(file1, file2);
1065 			anychange = 1;
1066 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1067 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1068 			/*
1069 			 * If this change is more than 'diff_context' lines from the
1070 			 * previous change, dump the record and reset it.
1071 			 */
1072 			if (diff_format == D_CONTEXT)
1073 				dump_context_vec(f1, f2, *pflags);
1074 			else
1075 				dump_unified_vec(f1, f2, *pflags);
1076 		}
1077 		context_vec_ptr++;
1078 		context_vec_ptr->a = a;
1079 		context_vec_ptr->b = b;
1080 		context_vec_ptr->c = c;
1081 		context_vec_ptr->d = d;
1082 		return;
1083 	}
1084 	if (anychange == 0)
1085 		anychange = 1;
1086 	switch (diff_format) {
1087 	case D_BRIEF:
1088 		return;
1089 	case D_NORMAL:
1090 	case D_EDIT:
1091 		range(a, b, ",");
1092 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1093 		if (diff_format == D_NORMAL)
1094 			range(c, d, ",");
1095 		diff_output("\n");
1096 		break;
1097 	case D_REVERSE:
1098 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1099 		range(a, b, " ");
1100 		diff_output("\n");
1101 		break;
1102 	case D_NREVERSE:
1103 		if (a > b)
1104 			diff_output("a%d %d\n", b, d - c + 1);
1105 		else {
1106 			diff_output("d%d %d\n", a, b - a + 1);
1107 			if (!(c > d))
1108 				/* add changed lines */
1109 				diff_output("a%d %d\n", b, d - c + 1);
1110 		}
1111 		break;
1112 	}
1113 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1114 		fetch(ixold, a, b, f1, '<', 1, *pflags);
1115 		if (a <= b && c <= d && diff_format == D_NORMAL)
1116 			diff_output("---\n");
1117 	}
1118 	i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1119 	if (i != 0 && diff_format == D_EDIT) {
1120 		/*
1121 		 * A non-zero return value for D_EDIT indicates that the
1122 		 * last line printed was a bare dot (".") that has been
1123 		 * escaped as ".." to prevent ed(1) from misinterpreting
1124 		 * it.  We have to add a substitute command to change this
1125 		 * back and restart where we left off.
1126 		 */
1127 		diff_output(".\n");
1128 		diff_output("%ds/^\\.\\././\n", a);
1129 		a += i;
1130 		c += i;
1131 		goto restart;
1132 	}
1133 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1134 		diff_output(".\n");
1135 	if (inifdef) {
1136 		diff_output("#endif /* %s */\n", ifdefname);
1137 		inifdef = 0;
1138 	}
1139 }
1140 
1141 static int
1142 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1143 {
1144 	int i, j, c, lastc, col, nc;
1145 
1146 	/*
1147 	 * When doing #ifdef's, copy down to current line
1148 	 * if this is the first file, so that stuff makes it to output.
1149 	 */
1150 	if (diff_format == D_IFDEF && oldfile) {
1151 		long curpos = ftell(lb);
1152 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1153 		nc = f[a > b ? b : a - 1] - curpos;
1154 		for (i = 0; i < nc; i++)
1155 			diff_output("%c", getc(lb));
1156 	}
1157 	if (a > b)
1158 		return (0);
1159 	if (diff_format == D_IFDEF) {
1160 		if (inifdef) {
1161 			diff_output("#else /* %s%s */\n",
1162 			    oldfile == 1 ? "!" : "", ifdefname);
1163 		} else {
1164 			if (oldfile)
1165 				diff_output("#ifndef %s\n", ifdefname);
1166 			else
1167 				diff_output("#ifdef %s\n", ifdefname);
1168 		}
1169 		inifdef = 1 + oldfile;
1170 	}
1171 	for (i = a; i <= b; i++) {
1172 		fseek(lb, f[i - 1], SEEK_SET);
1173 		nc = f[i] - f[i - 1];
1174 		if (diff_format != D_IFDEF && ch != '\0') {
1175 			diff_output("%c", ch);
1176 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1177 			    || diff_format == D_UNIFIED))
1178 				diff_output("\t");
1179 			else if (diff_format != D_UNIFIED)
1180 				diff_output(" ");
1181 		}
1182 		col = 0;
1183 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1184 			if ((c = getc(lb)) == EOF) {
1185 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1186 				    diff_format == D_NREVERSE)
1187 					warnx("No newline at end of file");
1188 				else
1189 					diff_output("\n\\ No newline at end of "
1190 					    "file\n");
1191 				return (0);
1192 			}
1193 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1194 				do {
1195 					diff_output(" ");
1196 				} while (++col & 7);
1197 			} else {
1198 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1199 				    && lastc == '.') {
1200 					/*
1201 					 * Don't print a bare "." line
1202 					 * since that will confuse ed(1).
1203 					 * Print ".." instead and return,
1204 					 * giving the caller an offset
1205 					 * from which to restart.
1206 					 */
1207 					diff_output(".\n");
1208 					return (i - a + 1);
1209 				}
1210 				diff_output("%c", c);
1211 				col++;
1212 			}
1213 		}
1214 	}
1215 	return (0);
1216 }
1217 
1218 /*
1219  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1220  */
1221 static int
1222 readhash(FILE *f, int flags)
1223 {
1224 	int i, t, space;
1225 	int sum;
1226 
1227 	sum = 1;
1228 	space = 0;
1229 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1230 		if (flags & D_IGNORECASE)
1231 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1232 				if (t == EOF) {
1233 					if (i == 0)
1234 						return (0);
1235 					break;
1236 				}
1237 				sum = sum * 127 + chrtran[t];
1238 			}
1239 		else
1240 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1241 				if (t == EOF) {
1242 					if (i == 0)
1243 						return (0);
1244 					break;
1245 				}
1246 				sum = sum * 127 + t;
1247 			}
1248 	} else {
1249 		for (i = 0;;) {
1250 			switch (t = getc(f)) {
1251 			case '\t':
1252 			case '\r':
1253 			case '\v':
1254 			case '\f':
1255 			case ' ':
1256 				space++;
1257 				continue;
1258 			default:
1259 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1260 					i++;
1261 					space = 0;
1262 				}
1263 				sum = sum * 127 + chrtran[t];
1264 				i++;
1265 				continue;
1266 			case EOF:
1267 				if (i == 0)
1268 					return (0);
1269 				/* FALLTHROUGH */
1270 			case '\n':
1271 				break;
1272 			}
1273 			break;
1274 		}
1275 	}
1276 	/*
1277 	 * There is a remote possibility that we end up with a zero sum.
1278 	 * Zero is used as an EOF marker, so return 1 instead.
1279 	 */
1280 	return (sum == 0 ? 1 : sum);
1281 }
1282 
1283 static int
1284 asciifile(FILE *f)
1285 {
1286 	unsigned char buf[BUFSIZ];
1287 	size_t i, cnt;
1288 
1289 	if (f == NULL)
1290 		return (1);
1291 
1292 	rewind(f);
1293 	cnt = fread(buf, 1, sizeof(buf), f);
1294 	for (i = 0; i < cnt; i++)
1295 		if (!isprint(buf[i]) && !isspace(buf[i]))
1296 			return (0);
1297 	return (1);
1298 }
1299 
1300 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1301 
1302 static char *
1303 match_function(const long *f, int pos, FILE *fp)
1304 {
1305 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1306 	size_t nc;
1307 	int last = lastline;
1308 	char *state = NULL;
1309 
1310 	lastline = pos;
1311 	while (pos > last) {
1312 		fseek(fp, f[pos - 1], SEEK_SET);
1313 		nc = f[pos] - f[pos - 1];
1314 		if (nc >= sizeof(buf))
1315 			nc = sizeof(buf) - 1;
1316 		nc = fread(buf, 1, nc, fp);
1317 		if (nc > 0) {
1318 			buf[nc] = '\0';
1319 			buf[strcspn(buf, "\n")] = '\0';
1320 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1321 				if (begins_with(buf, "private:")) {
1322 					if (!state)
1323 						state = " (private)";
1324 				} else if (begins_with(buf, "protected:")) {
1325 					if (!state)
1326 						state = " (protected)";
1327 				} else if (begins_with(buf, "public:")) {
1328 					if (!state)
1329 						state = " (public)";
1330 				} else {
1331 					strlcpy(lastbuf, buf, sizeof lastbuf);
1332 					if (state)
1333 						strlcat(lastbuf, state,
1334 						    sizeof lastbuf);
1335 					lastmatchline = pos;
1336 					return lastbuf;
1337 				}
1338 			}
1339 		}
1340 		pos--;
1341 	}
1342 	return lastmatchline > 0 ? lastbuf : NULL;
1343 }
1344 
1345 /* dump accumulated "context" diff changes */
1346 static void
1347 dump_context_vec(FILE *f1, FILE *f2, int flags)
1348 {
1349 	struct context_vec *cvp = context_vec_start;
1350 	int lowa, upb, lowc, upd, do_output;
1351 	int a, b, c, d;
1352 	char ch, *f;
1353 
1354 	if (context_vec_start > context_vec_ptr)
1355 		return;
1356 
1357 	b = d = 0;		/* gcc */
1358 	lowa = MAX(1, cvp->a - diff_context);
1359 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1360 	lowc = MAX(1, cvp->c - diff_context);
1361 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1362 
1363 	diff_output("***************");
1364 	if ((flags & D_PROTOTYPE)) {
1365 		f = match_function(ixold, lowa-1, f1);
1366 		if (f != NULL)
1367 			diff_output(" %s", f);
1368 	}
1369 	diff_output("\n*** ");
1370 	range(lowa, upb, ",");
1371 	diff_output(" ****\n");
1372 
1373 	/*
1374 	 * Output changes to the "old" file.  The first loop suppresses
1375 	 * output if there were no changes to the "old" file (we'll see
1376 	 * the "old" lines as context in the "new" list).
1377 	 */
1378 	do_output = 0;
1379 	for (; cvp <= context_vec_ptr; cvp++)
1380 		if (cvp->a <= cvp->b) {
1381 			cvp = context_vec_start;
1382 			do_output++;
1383 			break;
1384 		}
1385 	if (do_output) {
1386 		while (cvp <= context_vec_ptr) {
1387 			a = cvp->a;
1388 			b = cvp->b;
1389 			c = cvp->c;
1390 			d = cvp->d;
1391 
1392 			if (a <= b && c <= d)
1393 				ch = 'c';
1394 			else
1395 				ch = (a <= b) ? 'd' : 'a';
1396 
1397 			if (ch == 'a')
1398 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1399 			else {
1400 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1401 				fetch(ixold, a, b, f1,
1402 				    ch == 'c' ? '!' : '-', 0, flags);
1403 			}
1404 			lowa = b + 1;
1405 			cvp++;
1406 		}
1407 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1408 	}
1409 	/* output changes to the "new" file */
1410 	diff_output("--- ");
1411 	range(lowc, upd, ",");
1412 	diff_output(" ----\n");
1413 
1414 	do_output = 0;
1415 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1416 		if (cvp->c <= cvp->d) {
1417 			cvp = context_vec_start;
1418 			do_output++;
1419 			break;
1420 		}
1421 	if (do_output) {
1422 		while (cvp <= context_vec_ptr) {
1423 			a = cvp->a;
1424 			b = cvp->b;
1425 			c = cvp->c;
1426 			d = cvp->d;
1427 
1428 			if (a <= b && c <= d)
1429 				ch = 'c';
1430 			else
1431 				ch = (a <= b) ? 'd' : 'a';
1432 
1433 			if (ch == 'd')
1434 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1435 			else {
1436 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1437 				fetch(ixnew, c, d, f2,
1438 				    ch == 'c' ? '!' : '+', 0, flags);
1439 			}
1440 			lowc = d + 1;
1441 			cvp++;
1442 		}
1443 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1444 	}
1445 	context_vec_ptr = context_vec_start - 1;
1446 }
1447 
1448 /* dump accumulated "unified" diff changes */
1449 static void
1450 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1451 {
1452 	struct context_vec *cvp = context_vec_start;
1453 	int lowa, upb, lowc, upd;
1454 	int a, b, c, d;
1455 	char ch, *f;
1456 
1457 	if (context_vec_start > context_vec_ptr)
1458 		return;
1459 
1460 	b = d = 0;		/* gcc */
1461 	lowa = MAX(1, cvp->a - diff_context);
1462 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1463 	lowc = MAX(1, cvp->c - diff_context);
1464 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1465 
1466 	diff_output("@@ -");
1467 	uni_range(lowa, upb);
1468 	diff_output(" +");
1469 	uni_range(lowc, upd);
1470 	diff_output(" @@");
1471 	if ((flags & D_PROTOTYPE)) {
1472 		f = match_function(ixold, lowa-1, f1);
1473 		if (f != NULL)
1474 			diff_output(" %s", f);
1475 	}
1476 	diff_output("\n");
1477 
1478 	/*
1479 	 * Output changes in "unified" diff format--the old and new lines
1480 	 * are printed together.
1481 	 */
1482 	for (; cvp <= context_vec_ptr; cvp++) {
1483 		a = cvp->a;
1484 		b = cvp->b;
1485 		c = cvp->c;
1486 		d = cvp->d;
1487 
1488 		/*
1489 		 * c: both new and old changes
1490 		 * d: only changes in the old file
1491 		 * a: only changes in the new file
1492 		 */
1493 		if (a <= b && c <= d)
1494 			ch = 'c';
1495 		else
1496 			ch = (a <= b) ? 'd' : 'a';
1497 
1498 		switch (ch) {
1499 		case 'c':
1500 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1501 			fetch(ixold, a, b, f1, '-', 0, flags);
1502 			fetch(ixnew, c, d, f2, '+', 0, flags);
1503 			break;
1504 		case 'd':
1505 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1506 			fetch(ixold, a, b, f1, '-', 0, flags);
1507 			break;
1508 		case 'a':
1509 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1510 			fetch(ixnew, c, d, f2, '+', 0, flags);
1511 			break;
1512 		}
1513 		lowa = b + 1;
1514 		lowc = d + 1;
1515 	}
1516 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1517 
1518 	context_vec_ptr = context_vec_start - 1;
1519 }
1520 
1521 static void
1522 print_header(const char *file1, const char *file2)
1523 {
1524 	if (label[0] != NULL)
1525 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1526 		    label[0]);
1527 	else
1528 		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
1529 		    file1, ctime(&stb1.st_mtime));
1530 	if (label[1] != NULL)
1531 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1532 		    label[1]);
1533 	else
1534 		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
1535 		    file2, ctime(&stb2.st_mtime));
1536 }
1537