xref: /openbsd-src/usr.bin/diff/diffreg.c (revision 4c413bf691cbfbac2577c705da0adf5b52e344aa)
1 /*	$OpenBSD: diffreg.c,v 1.90 2015/10/26 12:52:27 tedu Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/stat.h>
68 #include <sys/wait.h>
69 
70 #include <ctype.h>
71 #include <err.h>
72 #include <errno.h>
73 #include <fcntl.h>
74 #include <paths.h>
75 #include <stddef.h>
76 #include <stdint.h>
77 #include <stdio.h>
78 #include <stdlib.h>
79 #include <string.h>
80 #include <unistd.h>
81 #include <limits.h>
82 
83 #include "diff.h"
84 #include "xmalloc.h"
85 
86 #define MINIMUM(a, b)	(((a) < (b)) ? (a) : (b))
87 #define MAXIMUM(a, b)	(((a) > (b)) ? (a) : (b))
88 
89 /*
90  * diff - compare two files.
91  */
92 
93 /*
94  *	Uses an algorithm due to Harold Stone, which finds
95  *	a pair of longest identical subsequences in the two
96  *	files.
97  *
98  *	The major goal is to generate the match vector J.
99  *	J[i] is the index of the line in file1 corresponding
100  *	to line i file0. J[i] = 0 if there is no
101  *	such line in file1.
102  *
103  *	Lines are hashed so as to work in core. All potential
104  *	matches are located by sorting the lines of each file
105  *	on the hash (called ``value''). In particular, this
106  *	collects the equivalence classes in file1 together.
107  *	Subroutine equiv replaces the value of each line in
108  *	file0 by the index of the first element of its
109  *	matching equivalence in (the reordered) file1.
110  *	To save space equiv squeezes file1 into a single
111  *	array member in which the equivalence classes
112  *	are simply concatenated, except that their first
113  *	members are flagged by changing sign.
114  *
115  *	Next the indices that point into member are unsorted into
116  *	array class according to the original order of file0.
117  *
118  *	The cleverness lies in routine stone. This marches
119  *	through the lines of file0, developing a vector klist
120  *	of "k-candidates". At step i a k-candidate is a matched
121  *	pair of lines x,y (x in file0 y in file1) such that
122  *	there is a common subsequence of length k
123  *	between the first i lines of file0 and the first y
124  *	lines of file1, but there is no such subsequence for
125  *	any smaller y. x is the earliest possible mate to y
126  *	that occurs in such a subsequence.
127  *
128  *	Whenever any of the members of the equivalence class of
129  *	lines in file1 matable to a line in file0 has serial number
130  *	less than the y of some k-candidate, that k-candidate
131  *	with the smallest such y is replaced. The new
132  *	k-candidate is chained (via pred) to the current
133  *	k-1 candidate so that the actual subsequence can
134  *	be recovered. When a member has serial number greater
135  *	that the y of all k-candidates, the klist is extended.
136  *	At the end, the longest subsequence is pulled out
137  *	and placed in the array J by unravel
138  *
139  *	With J in hand, the matches there recorded are
140  *	check'ed against reality to assure that no spurious
141  *	matches have crept in due to hashing. If they have,
142  *	they are broken, and "jackpot" is recorded--a harmless
143  *	matter except that a true match for a spuriously
144  *	mated line may now be unnecessarily reported as a change.
145  *
146  *	Much of the complexity of the program comes simply
147  *	from trying to minimize core utilization and
148  *	maximize the range of doable problems by dynamically
149  *	allocating what is needed and reusing what is not.
150  *	The core requirements for problems larger than somewhat
151  *	are (in words) 2*length(file0) + length(file1) +
152  *	3*(number of k-candidates installed),  typically about
153  *	6n words for files of length n.
154  */
155 
156 struct cand {
157 	int	x;
158 	int	y;
159 	int	pred;
160 };
161 
162 struct line {
163 	int	serial;
164 	int	value;
165 } *file[2];
166 
167 /*
168  * The following struct is used to record change information when
169  * doing a "context" or "unified" diff.  (see routine "change" to
170  * understand the highly mnemonic field names)
171  */
172 struct context_vec {
173 	int	a;		/* start line in old file */
174 	int	b;		/* end line in old file */
175 	int	c;		/* start line in new file */
176 	int	d;		/* end line in new file */
177 };
178 
179 #define	diff_output	printf
180 static FILE	*opentemp(const char *);
181 static void	 output(char *, FILE *, char *, FILE *, int);
182 static void	 check(FILE *, FILE *, int);
183 static void	 range(int, int, char *);
184 static void	 uni_range(int, int);
185 static void	 dump_context_vec(FILE *, FILE *, int);
186 static void	 dump_unified_vec(FILE *, FILE *, int);
187 static void	 prepare(int, FILE *, off_t, int);
188 static void	 prune(void);
189 static void	 equiv(struct line *, int, struct line *, int, int *);
190 static void	 unravel(int);
191 static void	 unsort(struct line *, int, int *);
192 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
193 static void	 sort(struct line *, int);
194 static void	 print_header(const char *, const char *);
195 static int	 ignoreline(char *);
196 static int	 asciifile(FILE *);
197 static int	 fetch(long *, int, int, FILE *, int, int, int);
198 static int	 newcand(int, int, int);
199 static int	 search(int *, int, int);
200 static int	 skipline(FILE *);
201 static int	 isqrt(int);
202 static int	 stone(int *, int, int *, int *, int);
203 static int	 readhash(FILE *, int);
204 static int	 files_differ(FILE *, FILE *, int);
205 static char	*match_function(const long *, int, FILE *);
206 static char	*preadline(int, size_t, off_t);
207 
208 static int  *J;			/* will be overlaid on class */
209 static int  *class;		/* will be overlaid on file[0] */
210 static int  *klist;		/* will be overlaid on file[0] after class */
211 static int  *member;		/* will be overlaid on file[1] */
212 static int   clen;
213 static int   inifdef;		/* whether or not we are in a #ifdef block */
214 static int   len[2];
215 static int   pref, suff;	/* length of prefix and suffix */
216 static int   slen[2];
217 static int   anychange;
218 static long *ixnew;		/* will be overlaid on file[1] */
219 static long *ixold;		/* will be overlaid on klist */
220 static struct cand *clist;	/* merely a free storage pot for candidates */
221 static int   clistlen;		/* the length of clist */
222 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
223 static u_char *chrtran;		/* translation table for case-folding */
224 static struct context_vec *context_vec_start;
225 static struct context_vec *context_vec_end;
226 static struct context_vec *context_vec_ptr;
227 
228 #define FUNCTION_CONTEXT_SIZE	55
229 static char lastbuf[FUNCTION_CONTEXT_SIZE];
230 static int lastline;
231 static int lastmatchline;
232 
233 
234 /*
235  * chrtran points to one of 2 translation tables: cup2low if folding upper to
236  * lower case clow2low if not folding case
237  */
238 u_char clow2low[256] = {
239 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
240 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
241 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
242 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
243 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
244 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
245 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
246 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
247 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
248 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
249 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
250 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
251 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
252 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
253 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
254 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
255 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
256 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
257 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
258 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
259 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
260 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
261 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
262 	0xfd, 0xfe, 0xff
263 };
264 
265 u_char cup2low[256] = {
266 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
267 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
268 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
269 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
270 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
271 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
272 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
273 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
274 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
275 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
276 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
277 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
278 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
279 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
280 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
281 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
282 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
283 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
284 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
285 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
286 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
287 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
288 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
289 	0xfd, 0xfe, 0xff
290 };
291 
292 int
293 diffreg(char *file1, char *file2, int flags)
294 {
295 	FILE *f1, *f2;
296 	int i, rval;
297 
298 	f1 = f2 = NULL;
299 	rval = D_SAME;
300 	anychange = 0;
301 	lastline = 0;
302 	lastmatchline = 0;
303 	context_vec_ptr = context_vec_start - 1;
304 	if (flags & D_IGNORECASE)
305 		chrtran = cup2low;
306 	else
307 		chrtran = clow2low;
308 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
309 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
310 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
311 		goto closem;
312 
313 	if (flags & D_EMPTY1)
314 		f1 = fopen(_PATH_DEVNULL, "r");
315 	else {
316 		if (!S_ISREG(stb1.st_mode)) {
317 			if ((f1 = opentemp(file1)) == NULL ||
318 			    fstat(fileno(f1), &stb1) < 0) {
319 				warn("%s", file1);
320 				status |= 2;
321 				goto closem;
322 			}
323 		} else if (strcmp(file1, "-") == 0)
324 			f1 = stdin;
325 		else
326 			f1 = fopen(file1, "r");
327 	}
328 	if (f1 == NULL) {
329 		warn("%s", file1);
330 		status |= 2;
331 		goto closem;
332 	}
333 
334 	if (flags & D_EMPTY2)
335 		f2 = fopen(_PATH_DEVNULL, "r");
336 	else {
337 		if (!S_ISREG(stb2.st_mode)) {
338 			if ((f2 = opentemp(file2)) == NULL ||
339 			    fstat(fileno(f2), &stb2) < 0) {
340 				warn("%s", file2);
341 				status |= 2;
342 				goto closem;
343 			}
344 		} else if (strcmp(file2, "-") == 0)
345 			f2 = stdin;
346 		else
347 			f2 = fopen(file2, "r");
348 	}
349 	if (f2 == NULL) {
350 		warn("%s", file2);
351 		status |= 2;
352 		goto closem;
353 	}
354 
355 	switch (files_differ(f1, f2, flags)) {
356 	case 0:
357 		goto closem;
358 	case 1:
359 		break;
360 	default:
361 		/* error */
362 		status |= 2;
363 		goto closem;
364 	}
365 
366 	if ((flags & D_FORCEASCII) == 0 &&
367 	    (!asciifile(f1) || !asciifile(f2))) {
368 		rval = D_BINARY;
369 		status |= 1;
370 		goto closem;
371 	}
372 	prepare(0, f1, stb1.st_size, flags);
373 	prepare(1, f2, stb2.st_size, flags);
374 
375 	prune();
376 	sort(sfile[0], slen[0]);
377 	sort(sfile[1], slen[1]);
378 
379 	member = (int *)file[1];
380 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
381 	member = xreallocarray(member, slen[1] + 2, sizeof(*member));
382 
383 	class = (int *)file[0];
384 	unsort(sfile[0], slen[0], class);
385 	class = xreallocarray(class, slen[0] + 2, sizeof(*class));
386 
387 	klist = xcalloc(slen[0] + 2, sizeof(*klist));
388 	clen = 0;
389 	clistlen = 100;
390 	clist = xcalloc(clistlen, sizeof(*clist));
391 	i = stone(class, slen[0], member, klist, flags);
392 	free(member);
393 	free(class);
394 
395 	J = xreallocarray(J, len[0] + 2, sizeof(*J));
396 	unravel(klist[i]);
397 	free(clist);
398 	free(klist);
399 
400 	ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold));
401 	ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew));
402 	check(f1, f2, flags);
403 	output(file1, f1, file2, f2, flags);
404 closem:
405 	if (anychange) {
406 		status |= 1;
407 		if (rval == D_SAME)
408 			rval = D_DIFFER;
409 	}
410 	if (f1 != NULL)
411 		fclose(f1);
412 	if (f2 != NULL)
413 		fclose(f2);
414 
415 	return (rval);
416 }
417 
418 /*
419  * Check to see if the given files differ.
420  * Returns 0 if they are the same, 1 if different, and -1 on error.
421  * XXX - could use code from cmp(1) [faster]
422  */
423 static int
424 files_differ(FILE *f1, FILE *f2, int flags)
425 {
426 	char buf1[BUFSIZ], buf2[BUFSIZ];
427 	size_t i, j;
428 
429 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
430 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
431 		return (1);
432 	for (;;) {
433 		i = fread(buf1, 1, sizeof(buf1), f1);
434 		j = fread(buf2, 1, sizeof(buf2), f2);
435 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
436 			return (-1);
437 		if (i != j)
438 			return (1);
439 		if (i == 0)
440 			return (0);
441 		if (memcmp(buf1, buf2, i) != 0)
442 			return (1);
443 	}
444 }
445 
446 static FILE *
447 opentemp(const char *file)
448 {
449 	char buf[BUFSIZ], tempfile[PATH_MAX];
450 	ssize_t nread;
451 	int ifd, ofd;
452 
453 	if (strcmp(file, "-") == 0)
454 		ifd = STDIN_FILENO;
455 	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
456 		return (NULL);
457 
458 	(void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
459 
460 	if ((ofd = mkstemp(tempfile)) < 0) {
461 		close(ifd);
462 		return (NULL);
463 	}
464 	unlink(tempfile);
465 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
466 		if (write(ofd, buf, nread) != nread) {
467 			close(ifd);
468 			close(ofd);
469 			return (NULL);
470 		}
471 	}
472 	close(ifd);
473 	lseek(ofd, (off_t)0, SEEK_SET);
474 	return (fdopen(ofd, "r"));
475 }
476 
477 char *
478 splice(char *dir, char *file)
479 {
480 	char *tail, *buf;
481 	size_t dirlen;
482 
483 	dirlen = strlen(dir);
484 	while (dirlen != 0 && dir[dirlen - 1] == '/')
485 	    dirlen--;
486 	if ((tail = strrchr(file, '/')) == NULL)
487 		tail = file;
488 	else
489 		tail++;
490 	xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
491 	return (buf);
492 }
493 
494 static void
495 prepare(int i, FILE *fd, off_t filesize, int flags)
496 {
497 	struct line *p;
498 	int j, h;
499 	size_t sz;
500 
501 	rewind(fd);
502 
503 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
504 	if (sz < 100)
505 		sz = 100;
506 
507 	p = xcalloc(sz + 3, sizeof(*p));
508 	for (j = 0; (h = readhash(fd, flags));) {
509 		if (j == sz) {
510 			sz = sz * 3 / 2;
511 			p = xreallocarray(p, sz + 3, sizeof(*p));
512 		}
513 		p[++j].value = h;
514 	}
515 	len[i] = j;
516 	file[i] = p;
517 }
518 
519 static void
520 prune(void)
521 {
522 	int i, j;
523 
524 	for (pref = 0; pref < len[0] && pref < len[1] &&
525 	    file[0][pref + 1].value == file[1][pref + 1].value;
526 	    pref++)
527 		;
528 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
529 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
530 	    suff++)
531 		;
532 	for (j = 0; j < 2; j++) {
533 		sfile[j] = file[j] + pref;
534 		slen[j] = len[j] - pref - suff;
535 		for (i = 0; i <= slen[j]; i++)
536 			sfile[j][i].serial = i;
537 	}
538 }
539 
540 static void
541 equiv(struct line *a, int n, struct line *b, int m, int *c)
542 {
543 	int i, j;
544 
545 	i = j = 1;
546 	while (i <= n && j <= m) {
547 		if (a[i].value < b[j].value)
548 			a[i++].value = 0;
549 		else if (a[i].value == b[j].value)
550 			a[i++].value = j;
551 		else
552 			j++;
553 	}
554 	while (i <= n)
555 		a[i++].value = 0;
556 	b[m + 1].value = 0;
557 	j = 0;
558 	while (++j <= m) {
559 		c[j] = -b[j].serial;
560 		while (b[j + 1].value == b[j].value) {
561 			j++;
562 			c[j] = b[j].serial;
563 		}
564 	}
565 	c[j] = -1;
566 }
567 
568 /* Code taken from ping.c */
569 static int
570 isqrt(int n)
571 {
572 	int y, x = 1;
573 
574 	if (n == 0)
575 		return (0);
576 
577 	do { /* newton was a stinker */
578 		y = x;
579 		x = n / x;
580 		x += y;
581 		x /= 2;
582 	} while ((x - y) > 1 || (x - y) < -1);
583 
584 	return (x);
585 }
586 
587 static int
588 stone(int *a, int n, int *b, int *c, int flags)
589 {
590 	int i, k, y, j, l;
591 	int oldc, tc, oldl, sq;
592 	u_int numtries, bound;
593 
594 	if (flags & D_MINIMAL)
595 		bound = UINT_MAX;
596 	else {
597 		sq = isqrt(n);
598 		bound = MAXIMUM(256, sq);
599 	}
600 
601 	k = 0;
602 	c[0] = newcand(0, 0, 0);
603 	for (i = 1; i <= n; i++) {
604 		j = a[i];
605 		if (j == 0)
606 			continue;
607 		y = -b[j];
608 		oldl = 0;
609 		oldc = c[0];
610 		numtries = 0;
611 		do {
612 			if (y <= clist[oldc].y)
613 				continue;
614 			l = search(c, k, y);
615 			if (l != oldl + 1)
616 				oldc = c[l - 1];
617 			if (l <= k) {
618 				if (clist[c[l]].y <= y)
619 					continue;
620 				tc = c[l];
621 				c[l] = newcand(i, y, oldc);
622 				oldc = tc;
623 				oldl = l;
624 				numtries++;
625 			} else {
626 				c[l] = newcand(i, y, oldc);
627 				k++;
628 				break;
629 			}
630 		} while ((y = b[++j]) > 0 && numtries < bound);
631 	}
632 	return (k);
633 }
634 
635 static int
636 newcand(int x, int y, int pred)
637 {
638 	struct cand *q;
639 
640 	if (clen == clistlen) {
641 		clistlen = clistlen * 11 / 10;
642 		clist = xreallocarray(clist, clistlen, sizeof(*clist));
643 	}
644 	q = clist + clen;
645 	q->x = x;
646 	q->y = y;
647 	q->pred = pred;
648 	return (clen++);
649 }
650 
651 static int
652 search(int *c, int k, int y)
653 {
654 	int i, j, l, t;
655 
656 	if (clist[c[k]].y < y)	/* quick look for typical case */
657 		return (k + 1);
658 	i = 0;
659 	j = k + 1;
660 	for (;;) {
661 		l = (i + j) / 2;
662 		if (l <= i)
663 			break;
664 		t = clist[c[l]].y;
665 		if (t > y)
666 			j = l;
667 		else if (t < y)
668 			i = l;
669 		else
670 			return (l);
671 	}
672 	return (l + 1);
673 }
674 
675 static void
676 unravel(int p)
677 {
678 	struct cand *q;
679 	int i;
680 
681 	for (i = 0; i <= len[0]; i++)
682 		J[i] = i <= pref ? i :
683 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
684 	for (q = clist + p; q->y != 0; q = clist + q->pred)
685 		J[q->x + pref] = q->y + pref;
686 }
687 
688 /*
689  * Check does double duty:
690  *  1.	ferret out any fortuitous correspondences due
691  *	to confounding by hashing (which result in "jackpot")
692  *  2.  collect random access indexes to the two files
693  */
694 static void
695 check(FILE *f1, FILE *f2, int flags)
696 {
697 	int i, j, jackpot, c, d;
698 	long ctold, ctnew;
699 
700 	rewind(f1);
701 	rewind(f2);
702 	j = 1;
703 	ixold[0] = ixnew[0] = 0;
704 	jackpot = 0;
705 	ctold = ctnew = 0;
706 	for (i = 1; i <= len[0]; i++) {
707 		if (J[i] == 0) {
708 			ixold[i] = ctold += skipline(f1);
709 			continue;
710 		}
711 		while (j < J[i]) {
712 			ixnew[j] = ctnew += skipline(f2);
713 			j++;
714 		}
715 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
716 			for (;;) {
717 				c = getc(f1);
718 				d = getc(f2);
719 				/*
720 				 * GNU diff ignores a missing newline
721 				 * in one file for -b or -w.
722 				 */
723 				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
724 					if (c == EOF && d == '\n') {
725 						ctnew++;
726 						break;
727 					} else if (c == '\n' && d == EOF) {
728 						ctold++;
729 						break;
730 					}
731 				}
732 				ctold++;
733 				ctnew++;
734 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
735 				    isspace(d)) {
736 					do {
737 						if (c == '\n')
738 							break;
739 						ctold++;
740 					} while (isspace(c = getc(f1)));
741 					do {
742 						if (d == '\n')
743 							break;
744 						ctnew++;
745 					} while (isspace(d = getc(f2)));
746 				} else if ((flags & D_IGNOREBLANKS)) {
747 					while (isspace(c) && c != '\n') {
748 						c = getc(f1);
749 						ctold++;
750 					}
751 					while (isspace(d) && d != '\n') {
752 						d = getc(f2);
753 						ctnew++;
754 					}
755 				}
756 				if (chrtran[c] != chrtran[d]) {
757 					jackpot++;
758 					J[i] = 0;
759 					if (c != '\n' && c != EOF)
760 						ctold += skipline(f1);
761 					if (d != '\n' && c != EOF)
762 						ctnew += skipline(f2);
763 					break;
764 				}
765 				if (c == '\n' || c == EOF)
766 					break;
767 			}
768 		} else {
769 			for (;;) {
770 				ctold++;
771 				ctnew++;
772 				if ((c = getc(f1)) != (d = getc(f2))) {
773 					/* jackpot++; */
774 					J[i] = 0;
775 					if (c != '\n' && c != EOF)
776 						ctold += skipline(f1);
777 					if (d != '\n' && c != EOF)
778 						ctnew += skipline(f2);
779 					break;
780 				}
781 				if (c == '\n' || c == EOF)
782 					break;
783 			}
784 		}
785 		ixold[i] = ctold;
786 		ixnew[j] = ctnew;
787 		j++;
788 	}
789 	for (; j <= len[1]; j++)
790 		ixnew[j] = ctnew += skipline(f2);
791 	/*
792 	 * if (jackpot)
793 	 *	fprintf(stderr, "jackpot\n");
794 	 */
795 }
796 
797 /* shellsort CACM #201 */
798 static void
799 sort(struct line *a, int n)
800 {
801 	struct line *ai, *aim, w;
802 	int j, m = 0, k;
803 
804 	if (n == 0)
805 		return;
806 	for (j = 1; j <= n; j *= 2)
807 		m = 2 * j - 1;
808 	for (m /= 2; m != 0; m /= 2) {
809 		k = n - m;
810 		for (j = 1; j <= k; j++) {
811 			for (ai = &a[j]; ai > a; ai -= m) {
812 				aim = &ai[m];
813 				if (aim < ai)
814 					break;	/* wraparound */
815 				if (aim->value > ai[0].value ||
816 				    (aim->value == ai[0].value &&
817 					aim->serial > ai[0].serial))
818 					break;
819 				w.value = ai[0].value;
820 				ai[0].value = aim->value;
821 				aim->value = w.value;
822 				w.serial = ai[0].serial;
823 				ai[0].serial = aim->serial;
824 				aim->serial = w.serial;
825 			}
826 		}
827 	}
828 }
829 
830 static void
831 unsort(struct line *f, int l, int *b)
832 {
833 	int *a, i;
834 
835 	a = xcalloc(l + 1, sizeof(*a));
836 	for (i = 1; i <= l; i++)
837 		a[f[i].serial] = f[i].value;
838 	for (i = 1; i <= l; i++)
839 		b[i] = a[i];
840 	free(a);
841 }
842 
843 static int
844 skipline(FILE *f)
845 {
846 	int i, c;
847 
848 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
849 		continue;
850 	return (i);
851 }
852 
853 static void
854 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
855 {
856 	int m, i0, i1, j0, j1;
857 
858 	rewind(f1);
859 	rewind(f2);
860 	m = len[0];
861 	J[0] = 0;
862 	J[m + 1] = len[1] + 1;
863 	if (diff_format != D_EDIT) {
864 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
865 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
866 				i0++;
867 			j0 = J[i0 - 1] + 1;
868 			i1 = i0 - 1;
869 			while (i1 < m && J[i1 + 1] == 0)
870 				i1++;
871 			j1 = J[i1 + 1] - 1;
872 			J[i1] = j1;
873 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
874 		}
875 	} else {
876 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
877 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
878 				i0--;
879 			j0 = J[i0 + 1] - 1;
880 			i1 = i0 + 1;
881 			while (i1 > 1 && J[i1 - 1] == 0)
882 				i1--;
883 			j1 = J[i1 - 1] + 1;
884 			J[i1] = j1;
885 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
886 		}
887 	}
888 	if (m == 0)
889 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
890 	if (diff_format == D_IFDEF) {
891 		for (;;) {
892 #define	c i0
893 			if ((c = getc(f1)) == EOF)
894 				return;
895 			diff_output("%c", c);
896 		}
897 #undef c
898 	}
899 	if (anychange != 0) {
900 		if (diff_format == D_CONTEXT)
901 			dump_context_vec(f1, f2, flags);
902 		else if (diff_format == D_UNIFIED)
903 			dump_unified_vec(f1, f2, flags);
904 	}
905 }
906 
907 static void
908 range(int a, int b, char *separator)
909 {
910 	diff_output("%d", a > b ? b : a);
911 	if (a < b)
912 		diff_output("%s%d", separator, b);
913 }
914 
915 static void
916 uni_range(int a, int b)
917 {
918 	if (a < b)
919 		diff_output("%d,%d", a, b - a + 1);
920 	else if (a == b)
921 		diff_output("%d", b);
922 	else
923 		diff_output("%d,0", b);
924 }
925 
926 static char *
927 preadline(int fd, size_t rlen, off_t off)
928 {
929 	char *line;
930 	ssize_t nr;
931 
932 	line = xmalloc(rlen + 1);
933 	if ((nr = pread(fd, line, rlen, off)) < 0)
934 		err(2, "preadline");
935 	if (nr > 0 && line[nr-1] == '\n')
936 		nr--;
937 	line[nr] = '\0';
938 	return (line);
939 }
940 
941 static int
942 ignoreline(char *line)
943 {
944 	int ret;
945 
946 	ret = regexec(&ignore_re, line, 0, NULL, 0);
947 	free(line);
948 	return (ret == 0);	/* if it matched, it should be ignored. */
949 }
950 
951 /*
952  * Indicate that there is a difference between lines a and b of the from file
953  * to get to lines c to d of the to file.  If a is greater then b then there
954  * are no lines in the from file involved and this means that there were
955  * lines appended (beginning at b).  If c is greater than d then there are
956  * lines missing from the to file.
957  */
958 static void
959 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
960     int *pflags)
961 {
962 	static size_t max_context = 64;
963 	int i;
964 
965 restart:
966 	if (diff_format != D_IFDEF && a > b && c > d)
967 		return;
968 	if (ignore_pats != NULL) {
969 		char *line;
970 		/*
971 		 * All lines in the change, insert, or delete must
972 		 * match an ignore pattern for the change to be
973 		 * ignored.
974 		 */
975 		if (a <= b) {		/* Changes and deletes. */
976 			for (i = a; i <= b; i++) {
977 				line = preadline(fileno(f1),
978 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
979 				if (!ignoreline(line))
980 					goto proceed;
981 			}
982 		}
983 		if (a > b || c <= d) {	/* Changes and inserts. */
984 			for (i = c; i <= d; i++) {
985 				line = preadline(fileno(f2),
986 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
987 				if (!ignoreline(line))
988 					goto proceed;
989 			}
990 		}
991 		return;
992 	}
993 proceed:
994 	if (*pflags & D_HEADER) {
995 		diff_output("%s %s %s\n", diffargs, file1, file2);
996 		*pflags &= ~D_HEADER;
997 	}
998 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
999 		/*
1000 		 * Allocate change records as needed.
1001 		 */
1002 		if (context_vec_ptr == context_vec_end - 1) {
1003 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1004 			max_context <<= 1;
1005 			context_vec_start = xreallocarray(context_vec_start,
1006 			    max_context, sizeof(*context_vec_start));
1007 			context_vec_end = context_vec_start + max_context;
1008 			context_vec_ptr = context_vec_start + offset;
1009 		}
1010 		if (anychange == 0) {
1011 			/*
1012 			 * Print the context/unidiff header first time through.
1013 			 */
1014 			print_header(file1, file2);
1015 			anychange = 1;
1016 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1017 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1018 			/*
1019 			 * If this change is more than 'diff_context' lines from the
1020 			 * previous change, dump the record and reset it.
1021 			 */
1022 			if (diff_format == D_CONTEXT)
1023 				dump_context_vec(f1, f2, *pflags);
1024 			else
1025 				dump_unified_vec(f1, f2, *pflags);
1026 		}
1027 		context_vec_ptr++;
1028 		context_vec_ptr->a = a;
1029 		context_vec_ptr->b = b;
1030 		context_vec_ptr->c = c;
1031 		context_vec_ptr->d = d;
1032 		return;
1033 	}
1034 	if (anychange == 0)
1035 		anychange = 1;
1036 	switch (diff_format) {
1037 	case D_BRIEF:
1038 		return;
1039 	case D_NORMAL:
1040 	case D_EDIT:
1041 		range(a, b, ",");
1042 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1043 		if (diff_format == D_NORMAL)
1044 			range(c, d, ",");
1045 		diff_output("\n");
1046 		break;
1047 	case D_REVERSE:
1048 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1049 		range(a, b, " ");
1050 		diff_output("\n");
1051 		break;
1052 	case D_NREVERSE:
1053 		if (a > b)
1054 			diff_output("a%d %d\n", b, d - c + 1);
1055 		else {
1056 			diff_output("d%d %d\n", a, b - a + 1);
1057 			if (!(c > d))
1058 				/* add changed lines */
1059 				diff_output("a%d %d\n", b, d - c + 1);
1060 		}
1061 		break;
1062 	}
1063 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1064 		fetch(ixold, a, b, f1, '<', 1, *pflags);
1065 		if (a <= b && c <= d && diff_format == D_NORMAL)
1066 			diff_output("---\n");
1067 	}
1068 	i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1069 	if (i != 0 && diff_format == D_EDIT) {
1070 		/*
1071 		 * A non-zero return value for D_EDIT indicates that the
1072 		 * last line printed was a bare dot (".") that has been
1073 		 * escaped as ".." to prevent ed(1) from misinterpreting
1074 		 * it.  We have to add a substitute command to change this
1075 		 * back and restart where we left off.
1076 		 */
1077 		diff_output(".\n");
1078 		diff_output("%ds/.//\n", a);
1079 		a += i;
1080 		c += i;
1081 		goto restart;
1082 	}
1083 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1084 		diff_output(".\n");
1085 	if (inifdef) {
1086 		diff_output("#endif /* %s */\n", ifdefname);
1087 		inifdef = 0;
1088 	}
1089 }
1090 
1091 static int
1092 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1093 {
1094 	int i, j, c, lastc, col, nc;
1095 
1096 	/*
1097 	 * When doing #ifdef's, copy down to current line
1098 	 * if this is the first file, so that stuff makes it to output.
1099 	 */
1100 	if (diff_format == D_IFDEF && oldfile) {
1101 		long curpos = ftell(lb);
1102 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1103 		nc = f[a > b ? b : a - 1] - curpos;
1104 		for (i = 0; i < nc; i++)
1105 			diff_output("%c", getc(lb));
1106 	}
1107 	if (a > b)
1108 		return (0);
1109 	if (diff_format == D_IFDEF) {
1110 		if (inifdef) {
1111 			diff_output("#else /* %s%s */\n",
1112 			    oldfile == 1 ? "!" : "", ifdefname);
1113 		} else {
1114 			if (oldfile)
1115 				diff_output("#ifndef %s\n", ifdefname);
1116 			else
1117 				diff_output("#ifdef %s\n", ifdefname);
1118 		}
1119 		inifdef = 1 + oldfile;
1120 	}
1121 	for (i = a; i <= b; i++) {
1122 		fseek(lb, f[i - 1], SEEK_SET);
1123 		nc = f[i] - f[i - 1];
1124 		if (diff_format != D_IFDEF && ch != '\0') {
1125 			diff_output("%c", ch);
1126 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1127 			    || diff_format == D_UNIFIED))
1128 				diff_output("\t");
1129 			else if (diff_format != D_UNIFIED)
1130 				diff_output(" ");
1131 		}
1132 		col = 0;
1133 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1134 			if ((c = getc(lb)) == EOF) {
1135 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1136 				    diff_format == D_NREVERSE)
1137 					warnx("No newline at end of file");
1138 				else
1139 					diff_output("\n\\ No newline at end of "
1140 					    "file\n");
1141 				return (0);
1142 			}
1143 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1144 				do {
1145 					diff_output(" ");
1146 				} while (++col & 7);
1147 			} else {
1148 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1149 				    && lastc == '.') {
1150 					/*
1151 					 * Don't print a bare "." line
1152 					 * since that will confuse ed(1).
1153 					 * Print ".." instead and return,
1154 					 * giving the caller an offset
1155 					 * from which to restart.
1156 					 */
1157 					diff_output(".\n");
1158 					return (i - a + 1);
1159 				}
1160 				diff_output("%c", c);
1161 				col++;
1162 			}
1163 		}
1164 	}
1165 	return (0);
1166 }
1167 
1168 /*
1169  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1170  */
1171 static int
1172 readhash(FILE *f, int flags)
1173 {
1174 	int i, t, space;
1175 	int sum;
1176 
1177 	sum = 1;
1178 	space = 0;
1179 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1180 		if (flags & D_IGNORECASE)
1181 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1182 				if (t == EOF) {
1183 					if (i == 0)
1184 						return (0);
1185 					break;
1186 				}
1187 				sum = sum * 127 + chrtran[t];
1188 			}
1189 		else
1190 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1191 				if (t == EOF) {
1192 					if (i == 0)
1193 						return (0);
1194 					break;
1195 				}
1196 				sum = sum * 127 + t;
1197 			}
1198 	} else {
1199 		for (i = 0;;) {
1200 			switch (t = getc(f)) {
1201 			case '\t':
1202 			case '\r':
1203 			case '\v':
1204 			case '\f':
1205 			case ' ':
1206 				space++;
1207 				continue;
1208 			default:
1209 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1210 					i++;
1211 					space = 0;
1212 				}
1213 				sum = sum * 127 + chrtran[t];
1214 				i++;
1215 				continue;
1216 			case EOF:
1217 				if (i == 0)
1218 					return (0);
1219 				/* FALLTHROUGH */
1220 			case '\n':
1221 				break;
1222 			}
1223 			break;
1224 		}
1225 	}
1226 	/*
1227 	 * There is a remote possibility that we end up with a zero sum.
1228 	 * Zero is used as an EOF marker, so return 1 instead.
1229 	 */
1230 	return (sum == 0 ? 1 : sum);
1231 }
1232 
1233 static int
1234 asciifile(FILE *f)
1235 {
1236 	unsigned char buf[BUFSIZ];
1237 	size_t cnt;
1238 
1239 	if (f == NULL)
1240 		return (1);
1241 
1242 	rewind(f);
1243 	cnt = fread(buf, 1, sizeof(buf), f);
1244 	return (memchr(buf, '\0', cnt) == NULL);
1245 }
1246 
1247 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1248 
1249 static char *
1250 match_function(const long *f, int pos, FILE *fp)
1251 {
1252 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1253 	size_t nc;
1254 	int last = lastline;
1255 	char *state = NULL;
1256 
1257 	lastline = pos;
1258 	while (pos > last) {
1259 		fseek(fp, f[pos - 1], SEEK_SET);
1260 		nc = f[pos] - f[pos - 1];
1261 		if (nc >= sizeof(buf))
1262 			nc = sizeof(buf) - 1;
1263 		nc = fread(buf, 1, nc, fp);
1264 		if (nc > 0) {
1265 			buf[nc] = '\0';
1266 			buf[strcspn(buf, "\n")] = '\0';
1267 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1268 				if (begins_with(buf, "private:")) {
1269 					if (!state)
1270 						state = " (private)";
1271 				} else if (begins_with(buf, "protected:")) {
1272 					if (!state)
1273 						state = " (protected)";
1274 				} else if (begins_with(buf, "public:")) {
1275 					if (!state)
1276 						state = " (public)";
1277 				} else {
1278 					strlcpy(lastbuf, buf, sizeof lastbuf);
1279 					if (state)
1280 						strlcat(lastbuf, state,
1281 						    sizeof lastbuf);
1282 					lastmatchline = pos;
1283 					return lastbuf;
1284 				}
1285 			}
1286 		}
1287 		pos--;
1288 	}
1289 	return lastmatchline > 0 ? lastbuf : NULL;
1290 }
1291 
1292 /* dump accumulated "context" diff changes */
1293 static void
1294 dump_context_vec(FILE *f1, FILE *f2, int flags)
1295 {
1296 	struct context_vec *cvp = context_vec_start;
1297 	int lowa, upb, lowc, upd, do_output;
1298 	int a, b, c, d;
1299 	char ch, *f;
1300 
1301 	if (context_vec_start > context_vec_ptr)
1302 		return;
1303 
1304 	b = d = 0;		/* gcc */
1305 	lowa = MAXIMUM(1, cvp->a - diff_context);
1306 	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1307 	lowc = MAXIMUM(1, cvp->c - diff_context);
1308 	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1309 
1310 	diff_output("***************");
1311 	if ((flags & D_PROTOTYPE)) {
1312 		f = match_function(ixold, lowa-1, f1);
1313 		if (f != NULL)
1314 			diff_output(" %s", f);
1315 	}
1316 	diff_output("\n*** ");
1317 	range(lowa, upb, ",");
1318 	diff_output(" ****\n");
1319 
1320 	/*
1321 	 * Output changes to the "old" file.  The first loop suppresses
1322 	 * output if there were no changes to the "old" file (we'll see
1323 	 * the "old" lines as context in the "new" list).
1324 	 */
1325 	do_output = 0;
1326 	for (; cvp <= context_vec_ptr; cvp++)
1327 		if (cvp->a <= cvp->b) {
1328 			cvp = context_vec_start;
1329 			do_output++;
1330 			break;
1331 		}
1332 	if (do_output) {
1333 		while (cvp <= context_vec_ptr) {
1334 			a = cvp->a;
1335 			b = cvp->b;
1336 			c = cvp->c;
1337 			d = cvp->d;
1338 
1339 			if (a <= b && c <= d)
1340 				ch = 'c';
1341 			else
1342 				ch = (a <= b) ? 'd' : 'a';
1343 
1344 			if (ch == 'a')
1345 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1346 			else {
1347 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1348 				fetch(ixold, a, b, f1,
1349 				    ch == 'c' ? '!' : '-', 0, flags);
1350 			}
1351 			lowa = b + 1;
1352 			cvp++;
1353 		}
1354 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1355 	}
1356 	/* output changes to the "new" file */
1357 	diff_output("--- ");
1358 	range(lowc, upd, ",");
1359 	diff_output(" ----\n");
1360 
1361 	do_output = 0;
1362 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1363 		if (cvp->c <= cvp->d) {
1364 			cvp = context_vec_start;
1365 			do_output++;
1366 			break;
1367 		}
1368 	if (do_output) {
1369 		while (cvp <= context_vec_ptr) {
1370 			a = cvp->a;
1371 			b = cvp->b;
1372 			c = cvp->c;
1373 			d = cvp->d;
1374 
1375 			if (a <= b && c <= d)
1376 				ch = 'c';
1377 			else
1378 				ch = (a <= b) ? 'd' : 'a';
1379 
1380 			if (ch == 'd')
1381 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1382 			else {
1383 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1384 				fetch(ixnew, c, d, f2,
1385 				    ch == 'c' ? '!' : '+', 0, flags);
1386 			}
1387 			lowc = d + 1;
1388 			cvp++;
1389 		}
1390 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1391 	}
1392 	context_vec_ptr = context_vec_start - 1;
1393 }
1394 
1395 /* dump accumulated "unified" diff changes */
1396 static void
1397 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1398 {
1399 	struct context_vec *cvp = context_vec_start;
1400 	int lowa, upb, lowc, upd;
1401 	int a, b, c, d;
1402 	char ch, *f;
1403 
1404 	if (context_vec_start > context_vec_ptr)
1405 		return;
1406 
1407 	b = d = 0;		/* gcc */
1408 	lowa = MAXIMUM(1, cvp->a - diff_context);
1409 	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1410 	lowc = MAXIMUM(1, cvp->c - diff_context);
1411 	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1412 
1413 	diff_output("@@ -");
1414 	uni_range(lowa, upb);
1415 	diff_output(" +");
1416 	uni_range(lowc, upd);
1417 	diff_output(" @@");
1418 	if ((flags & D_PROTOTYPE)) {
1419 		f = match_function(ixold, lowa-1, f1);
1420 		if (f != NULL)
1421 			diff_output(" %s", f);
1422 	}
1423 	diff_output("\n");
1424 
1425 	/*
1426 	 * Output changes in "unified" diff format--the old and new lines
1427 	 * are printed together.
1428 	 */
1429 	for (; cvp <= context_vec_ptr; cvp++) {
1430 		a = cvp->a;
1431 		b = cvp->b;
1432 		c = cvp->c;
1433 		d = cvp->d;
1434 
1435 		/*
1436 		 * c: both new and old changes
1437 		 * d: only changes in the old file
1438 		 * a: only changes in the new file
1439 		 */
1440 		if (a <= b && c <= d)
1441 			ch = 'c';
1442 		else
1443 			ch = (a <= b) ? 'd' : 'a';
1444 
1445 		switch (ch) {
1446 		case 'c':
1447 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1448 			fetch(ixold, a, b, f1, '-', 0, flags);
1449 			fetch(ixnew, c, d, f2, '+', 0, flags);
1450 			break;
1451 		case 'd':
1452 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1453 			fetch(ixold, a, b, f1, '-', 0, flags);
1454 			break;
1455 		case 'a':
1456 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1457 			fetch(ixnew, c, d, f2, '+', 0, flags);
1458 			break;
1459 		}
1460 		lowa = b + 1;
1461 		lowc = d + 1;
1462 	}
1463 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1464 
1465 	context_vec_ptr = context_vec_start - 1;
1466 }
1467 
1468 static void
1469 print_header(const char *file1, const char *file2)
1470 {
1471 	if (label[0] != NULL)
1472 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1473 		    label[0]);
1474 	else
1475 		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
1476 		    file1, ctime(&stb1.st_mtime));
1477 	if (label[1] != NULL)
1478 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1479 		    label[1]);
1480 	else
1481 		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
1482 		    file2, ctime(&stb2.st_mtime));
1483 }
1484