xref: /openbsd-src/usr.bin/diff/diffreg.c (revision 40e7295b4ba441dfe69421af3a469a5dfbf9a3e8)
1 /*	$OpenBSD: diffreg.c,v 1.88 2015/10/05 20:15:00 millert Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/stat.h>
68 #include <sys/wait.h>
69 
70 #include <ctype.h>
71 #include <err.h>
72 #include <errno.h>
73 #include <fcntl.h>
74 #include <paths.h>
75 #include <stddef.h>
76 #include <stdint.h>
77 #include <stdio.h>
78 #include <stdlib.h>
79 #include <string.h>
80 #include <unistd.h>
81 #include <limits.h>
82 
83 #include "diff.h"
84 #include "xmalloc.h"
85 
86 #define MINIMUM(a, b)	(((a) < (b)) ? (a) : (b))
87 #define MAXIMUM(a, b)	(((a) > (b)) ? (a) : (b))
88 
89 /*
90  * diff - compare two files.
91  */
92 
93 /*
94  *	Uses an algorithm due to Harold Stone, which finds
95  *	a pair of longest identical subsequences in the two
96  *	files.
97  *
98  *	The major goal is to generate the match vector J.
99  *	J[i] is the index of the line in file1 corresponding
100  *	to line i file0. J[i] = 0 if there is no
101  *	such line in file1.
102  *
103  *	Lines are hashed so as to work in core. All potential
104  *	matches are located by sorting the lines of each file
105  *	on the hash (called ``value''). In particular, this
106  *	collects the equivalence classes in file1 together.
107  *	Subroutine equiv replaces the value of each line in
108  *	file0 by the index of the first element of its
109  *	matching equivalence in (the reordered) file1.
110  *	To save space equiv squeezes file1 into a single
111  *	array member in which the equivalence classes
112  *	are simply concatenated, except that their first
113  *	members are flagged by changing sign.
114  *
115  *	Next the indices that point into member are unsorted into
116  *	array class according to the original order of file0.
117  *
118  *	The cleverness lies in routine stone. This marches
119  *	through the lines of file0, developing a vector klist
120  *	of "k-candidates". At step i a k-candidate is a matched
121  *	pair of lines x,y (x in file0 y in file1) such that
122  *	there is a common subsequence of length k
123  *	between the first i lines of file0 and the first y
124  *	lines of file1, but there is no such subsequence for
125  *	any smaller y. x is the earliest possible mate to y
126  *	that occurs in such a subsequence.
127  *
128  *	Whenever any of the members of the equivalence class of
129  *	lines in file1 matable to a line in file0 has serial number
130  *	less than the y of some k-candidate, that k-candidate
131  *	with the smallest such y is replaced. The new
132  *	k-candidate is chained (via pred) to the current
133  *	k-1 candidate so that the actual subsequence can
134  *	be recovered. When a member has serial number greater
135  *	that the y of all k-candidates, the klist is extended.
136  *	At the end, the longest subsequence is pulled out
137  *	and placed in the array J by unravel
138  *
139  *	With J in hand, the matches there recorded are
140  *	check'ed against reality to assure that no spurious
141  *	matches have crept in due to hashing. If they have,
142  *	they are broken, and "jackpot" is recorded--a harmless
143  *	matter except that a true match for a spuriously
144  *	mated line may now be unnecessarily reported as a change.
145  *
146  *	Much of the complexity of the program comes simply
147  *	from trying to minimize core utilization and
148  *	maximize the range of doable problems by dynamically
149  *	allocating what is needed and reusing what is not.
150  *	The core requirements for problems larger than somewhat
151  *	are (in words) 2*length(file0) + length(file1) +
152  *	3*(number of k-candidates installed),  typically about
153  *	6n words for files of length n.
154  */
155 
156 struct cand {
157 	int	x;
158 	int	y;
159 	int	pred;
160 };
161 
162 struct line {
163 	int	serial;
164 	int	value;
165 } *file[2];
166 
167 /*
168  * The following struct is used to record change information when
169  * doing a "context" or "unified" diff.  (see routine "change" to
170  * understand the highly mnemonic field names)
171  */
172 struct context_vec {
173 	int	a;		/* start line in old file */
174 	int	b;		/* end line in old file */
175 	int	c;		/* start line in new file */
176 	int	d;		/* end line in new file */
177 };
178 
179 #define	diff_output	printf
180 static FILE	*opentemp(const char *);
181 static void	 output(char *, FILE *, char *, FILE *, int);
182 static void	 check(FILE *, FILE *, int);
183 static void	 range(int, int, char *);
184 static void	 uni_range(int, int);
185 static void	 dump_context_vec(FILE *, FILE *, int);
186 static void	 dump_unified_vec(FILE *, FILE *, int);
187 static void	 prepare(int, FILE *, off_t, int);
188 static void	 prune(void);
189 static void	 equiv(struct line *, int, struct line *, int, int *);
190 static void	 unravel(int);
191 static void	 unsort(struct line *, int, int *);
192 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
193 static void	 sort(struct line *, int);
194 static void	 print_header(const char *, const char *);
195 static int	 ignoreline(char *);
196 static int	 asciifile(FILE *);
197 static int	 fetch(long *, int, int, FILE *, int, int, int);
198 static int	 newcand(int, int, int);
199 static int	 search(int *, int, int);
200 static int	 skipline(FILE *);
201 static int	 isqrt(int);
202 static int	 stone(int *, int, int *, int *, int);
203 static int	 readhash(FILE *, int);
204 static int	 files_differ(FILE *, FILE *, int);
205 static char	*match_function(const long *, int, FILE *);
206 static char	*preadline(int, size_t, off_t);
207 
208 static int  *J;			/* will be overlaid on class */
209 static int  *class;		/* will be overlaid on file[0] */
210 static int  *klist;		/* will be overlaid on file[0] after class */
211 static int  *member;		/* will be overlaid on file[1] */
212 static int   clen;
213 static int   inifdef;		/* whether or not we are in a #ifdef block */
214 static int   len[2];
215 static int   pref, suff;	/* length of prefix and suffix */
216 static int   slen[2];
217 static int   anychange;
218 static long *ixnew;		/* will be overlaid on file[1] */
219 static long *ixold;		/* will be overlaid on klist */
220 static struct cand *clist;	/* merely a free storage pot for candidates */
221 static int   clistlen;		/* the length of clist */
222 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
223 static u_char *chrtran;		/* translation table for case-folding */
224 static struct context_vec *context_vec_start;
225 static struct context_vec *context_vec_end;
226 static struct context_vec *context_vec_ptr;
227 
228 #define FUNCTION_CONTEXT_SIZE	55
229 static char lastbuf[FUNCTION_CONTEXT_SIZE];
230 static int lastline;
231 static int lastmatchline;
232 
233 
234 /*
235  * chrtran points to one of 2 translation tables: cup2low if folding upper to
236  * lower case clow2low if not folding case
237  */
238 u_char clow2low[256] = {
239 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
240 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
241 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
242 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
243 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
244 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
245 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
246 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
247 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
248 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
249 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
250 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
251 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
252 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
253 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
254 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
255 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
256 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
257 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
258 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
259 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
260 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
261 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
262 	0xfd, 0xfe, 0xff
263 };
264 
265 u_char cup2low[256] = {
266 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
267 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
268 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
269 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
270 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
271 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
272 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
273 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
274 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
275 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
276 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
277 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
278 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
279 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
280 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
281 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
282 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
283 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
284 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
285 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
286 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
287 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
288 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
289 	0xfd, 0xfe, 0xff
290 };
291 
292 int
293 diffreg(char *file1, char *file2, int flags)
294 {
295 	FILE *f1, *f2;
296 	int i, rval;
297 
298 	f1 = f2 = NULL;
299 	rval = D_SAME;
300 	anychange = 0;
301 	lastline = 0;
302 	lastmatchline = 0;
303 	context_vec_ptr = context_vec_start - 1;
304 	if (flags & D_IGNORECASE)
305 		chrtran = cup2low;
306 	else
307 		chrtran = clow2low;
308 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
309 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
310 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
311 		goto closem;
312 
313 	if (flags & D_EMPTY1)
314 		f1 = fopen(_PATH_DEVNULL, "r");
315 	else {
316 		if (!S_ISREG(stb1.st_mode)) {
317 			if ((f1 = opentemp(file1)) == NULL ||
318 			    fstat(fileno(f1), &stb1) < 0) {
319 				warn("%s", file1);
320 				status |= 2;
321 				goto closem;
322 			}
323 		} else if (strcmp(file1, "-") == 0)
324 			f1 = stdin;
325 		else
326 			f1 = fopen(file1, "r");
327 	}
328 	if (f1 == NULL) {
329 		warn("%s", file1);
330 		status |= 2;
331 		goto closem;
332 	}
333 
334 	if (flags & D_EMPTY2)
335 		f2 = fopen(_PATH_DEVNULL, "r");
336 	else {
337 		if (!S_ISREG(stb2.st_mode)) {
338 			if ((f2 = opentemp(file2)) == NULL ||
339 			    fstat(fileno(f2), &stb2) < 0) {
340 				warn("%s", file2);
341 				status |= 2;
342 				goto closem;
343 			}
344 		} else if (strcmp(file2, "-") == 0)
345 			f2 = stdin;
346 		else
347 			f2 = fopen(file2, "r");
348 	}
349 	if (f2 == NULL) {
350 		warn("%s", file2);
351 		status |= 2;
352 		goto closem;
353 	}
354 
355 	switch (files_differ(f1, f2, flags)) {
356 	case 0:
357 		goto closem;
358 	case 1:
359 		break;
360 	default:
361 		/* error */
362 		status |= 2;
363 		goto closem;
364 	}
365 
366 	if ((flags & D_FORCEASCII) == 0 &&
367 	    (!asciifile(f1) || !asciifile(f2))) {
368 		rval = D_BINARY;
369 		status |= 1;
370 		goto closem;
371 	}
372 	prepare(0, f1, stb1.st_size, flags);
373 	prepare(1, f2, stb2.st_size, flags);
374 
375 	prune();
376 	sort(sfile[0], slen[0]);
377 	sort(sfile[1], slen[1]);
378 
379 	member = (int *)file[1];
380 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
381 	member = xreallocarray(member, slen[1] + 2, sizeof(*member));
382 
383 	class = (int *)file[0];
384 	unsort(sfile[0], slen[0], class);
385 	class = xreallocarray(class, slen[0] + 2, sizeof(*class));
386 
387 	klist = xcalloc(slen[0] + 2, sizeof(*klist));
388 	clen = 0;
389 	clistlen = 100;
390 	clist = xcalloc(clistlen, sizeof(*clist));
391 	i = stone(class, slen[0], member, klist, flags);
392 	free(member);
393 	free(class);
394 
395 	J = xreallocarray(J, len[0] + 2, sizeof(*J));
396 	unravel(klist[i]);
397 	free(clist);
398 	free(klist);
399 
400 	ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold));
401 	ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew));
402 	check(f1, f2, flags);
403 	output(file1, f1, file2, f2, flags);
404 closem:
405 	if (anychange) {
406 		status |= 1;
407 		if (rval == D_SAME)
408 			rval = D_DIFFER;
409 	}
410 	if (f1 != NULL)
411 		fclose(f1);
412 	if (f2 != NULL)
413 		fclose(f2);
414 
415 	return (rval);
416 }
417 
418 /*
419  * Check to see if the given files differ.
420  * Returns 0 if they are the same, 1 if different, and -1 on error.
421  * XXX - could use code from cmp(1) [faster]
422  */
423 static int
424 files_differ(FILE *f1, FILE *f2, int flags)
425 {
426 	char buf1[BUFSIZ], buf2[BUFSIZ];
427 	size_t i, j;
428 
429 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
430 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
431 		return (1);
432 	for (;;) {
433 		i = fread(buf1, 1, sizeof(buf1), f1);
434 		j = fread(buf2, 1, sizeof(buf2), f2);
435 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
436 			return (-1);
437 		if (i != j)
438 			return (1);
439 		if (i == 0)
440 			return (0);
441 		if (memcmp(buf1, buf2, i) != 0)
442 			return (1);
443 	}
444 }
445 
446 static FILE *
447 opentemp(const char *file)
448 {
449 	char buf[BUFSIZ], *tempdir, tempfile[PATH_MAX];
450 	ssize_t nread;
451 	int ifd, ofd;
452 
453 	if (strcmp(file, "-") == 0)
454 		ifd = STDIN_FILENO;
455 	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
456 		return (NULL);
457 
458 	if ((tempdir = getenv("TMPDIR")) == NULL)
459 		tempdir = _PATH_TMP;
460 
461 	if (strlcpy(tempfile, tempdir, sizeof(tempfile)) >= sizeof(tempfile) ||
462 	    strlcat(tempfile, "/diff.XXXXXXXX", sizeof(tempfile)) >=
463 	    sizeof(tempfile)) {
464 		close(ifd);
465 		errno = ENAMETOOLONG;
466 		return (NULL);
467 	}
468 
469 	if ((ofd = mkstemp(tempfile)) < 0) {
470 		close(ifd);
471 		return (NULL);
472 	}
473 	unlink(tempfile);
474 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
475 		if (write(ofd, buf, nread) != nread) {
476 			close(ifd);
477 			close(ofd);
478 			return (NULL);
479 		}
480 	}
481 	close(ifd);
482 	lseek(ofd, (off_t)0, SEEK_SET);
483 	return (fdopen(ofd, "r"));
484 }
485 
486 char *
487 splice(char *dir, char *file)
488 {
489 	char *tail, *buf;
490 	size_t dirlen;
491 
492 	dirlen = strlen(dir);
493 	while (dirlen != 0 && dir[dirlen - 1] == '/')
494 	    dirlen--;
495 	if ((tail = strrchr(file, '/')) == NULL)
496 		tail = file;
497 	else
498 		tail++;
499 	xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
500 	return (buf);
501 }
502 
503 static void
504 prepare(int i, FILE *fd, off_t filesize, int flags)
505 {
506 	struct line *p;
507 	int j, h;
508 	size_t sz;
509 
510 	rewind(fd);
511 
512 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
513 	if (sz < 100)
514 		sz = 100;
515 
516 	p = xcalloc(sz + 3, sizeof(*p));
517 	for (j = 0; (h = readhash(fd, flags));) {
518 		if (j == sz) {
519 			sz = sz * 3 / 2;
520 			p = xreallocarray(p, sz + 3, sizeof(*p));
521 		}
522 		p[++j].value = h;
523 	}
524 	len[i] = j;
525 	file[i] = p;
526 }
527 
528 static void
529 prune(void)
530 {
531 	int i, j;
532 
533 	for (pref = 0; pref < len[0] && pref < len[1] &&
534 	    file[0][pref + 1].value == file[1][pref + 1].value;
535 	    pref++)
536 		;
537 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
538 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
539 	    suff++)
540 		;
541 	for (j = 0; j < 2; j++) {
542 		sfile[j] = file[j] + pref;
543 		slen[j] = len[j] - pref - suff;
544 		for (i = 0; i <= slen[j]; i++)
545 			sfile[j][i].serial = i;
546 	}
547 }
548 
549 static void
550 equiv(struct line *a, int n, struct line *b, int m, int *c)
551 {
552 	int i, j;
553 
554 	i = j = 1;
555 	while (i <= n && j <= m) {
556 		if (a[i].value < b[j].value)
557 			a[i++].value = 0;
558 		else if (a[i].value == b[j].value)
559 			a[i++].value = j;
560 		else
561 			j++;
562 	}
563 	while (i <= n)
564 		a[i++].value = 0;
565 	b[m + 1].value = 0;
566 	j = 0;
567 	while (++j <= m) {
568 		c[j] = -b[j].serial;
569 		while (b[j + 1].value == b[j].value) {
570 			j++;
571 			c[j] = b[j].serial;
572 		}
573 	}
574 	c[j] = -1;
575 }
576 
577 /* Code taken from ping.c */
578 static int
579 isqrt(int n)
580 {
581 	int y, x = 1;
582 
583 	if (n == 0)
584 		return (0);
585 
586 	do { /* newton was a stinker */
587 		y = x;
588 		x = n / x;
589 		x += y;
590 		x /= 2;
591 	} while ((x - y) > 1 || (x - y) < -1);
592 
593 	return (x);
594 }
595 
596 static int
597 stone(int *a, int n, int *b, int *c, int flags)
598 {
599 	int i, k, y, j, l;
600 	int oldc, tc, oldl, sq;
601 	u_int numtries, bound;
602 
603 	if (flags & D_MINIMAL)
604 		bound = UINT_MAX;
605 	else {
606 		sq = isqrt(n);
607 		bound = MAXIMUM(256, sq);
608 	}
609 
610 	k = 0;
611 	c[0] = newcand(0, 0, 0);
612 	for (i = 1; i <= n; i++) {
613 		j = a[i];
614 		if (j == 0)
615 			continue;
616 		y = -b[j];
617 		oldl = 0;
618 		oldc = c[0];
619 		numtries = 0;
620 		do {
621 			if (y <= clist[oldc].y)
622 				continue;
623 			l = search(c, k, y);
624 			if (l != oldl + 1)
625 				oldc = c[l - 1];
626 			if (l <= k) {
627 				if (clist[c[l]].y <= y)
628 					continue;
629 				tc = c[l];
630 				c[l] = newcand(i, y, oldc);
631 				oldc = tc;
632 				oldl = l;
633 				numtries++;
634 			} else {
635 				c[l] = newcand(i, y, oldc);
636 				k++;
637 				break;
638 			}
639 		} while ((y = b[++j]) > 0 && numtries < bound);
640 	}
641 	return (k);
642 }
643 
644 static int
645 newcand(int x, int y, int pred)
646 {
647 	struct cand *q;
648 
649 	if (clen == clistlen) {
650 		clistlen = clistlen * 11 / 10;
651 		clist = xreallocarray(clist, clistlen, sizeof(*clist));
652 	}
653 	q = clist + clen;
654 	q->x = x;
655 	q->y = y;
656 	q->pred = pred;
657 	return (clen++);
658 }
659 
660 static int
661 search(int *c, int k, int y)
662 {
663 	int i, j, l, t;
664 
665 	if (clist[c[k]].y < y)	/* quick look for typical case */
666 		return (k + 1);
667 	i = 0;
668 	j = k + 1;
669 	for (;;) {
670 		l = (i + j) / 2;
671 		if (l <= i)
672 			break;
673 		t = clist[c[l]].y;
674 		if (t > y)
675 			j = l;
676 		else if (t < y)
677 			i = l;
678 		else
679 			return (l);
680 	}
681 	return (l + 1);
682 }
683 
684 static void
685 unravel(int p)
686 {
687 	struct cand *q;
688 	int i;
689 
690 	for (i = 0; i <= len[0]; i++)
691 		J[i] = i <= pref ? i :
692 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
693 	for (q = clist + p; q->y != 0; q = clist + q->pred)
694 		J[q->x + pref] = q->y + pref;
695 }
696 
697 /*
698  * Check does double duty:
699  *  1.	ferret out any fortuitous correspondences due
700  *	to confounding by hashing (which result in "jackpot")
701  *  2.  collect random access indexes to the two files
702  */
703 static void
704 check(FILE *f1, FILE *f2, int flags)
705 {
706 	int i, j, jackpot, c, d;
707 	long ctold, ctnew;
708 
709 	rewind(f1);
710 	rewind(f2);
711 	j = 1;
712 	ixold[0] = ixnew[0] = 0;
713 	jackpot = 0;
714 	ctold = ctnew = 0;
715 	for (i = 1; i <= len[0]; i++) {
716 		if (J[i] == 0) {
717 			ixold[i] = ctold += skipline(f1);
718 			continue;
719 		}
720 		while (j < J[i]) {
721 			ixnew[j] = ctnew += skipline(f2);
722 			j++;
723 		}
724 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
725 			for (;;) {
726 				c = getc(f1);
727 				d = getc(f2);
728 				/*
729 				 * GNU diff ignores a missing newline
730 				 * in one file for -b or -w.
731 				 */
732 				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
733 					if (c == EOF && d == '\n') {
734 						ctnew++;
735 						break;
736 					} else if (c == '\n' && d == EOF) {
737 						ctold++;
738 						break;
739 					}
740 				}
741 				ctold++;
742 				ctnew++;
743 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
744 				    isspace(d)) {
745 					do {
746 						if (c == '\n')
747 							break;
748 						ctold++;
749 					} while (isspace(c = getc(f1)));
750 					do {
751 						if (d == '\n')
752 							break;
753 						ctnew++;
754 					} while (isspace(d = getc(f2)));
755 				} else if ((flags & D_IGNOREBLANKS)) {
756 					while (isspace(c) && c != '\n') {
757 						c = getc(f1);
758 						ctold++;
759 					}
760 					while (isspace(d) && d != '\n') {
761 						d = getc(f2);
762 						ctnew++;
763 					}
764 				}
765 				if (chrtran[c] != chrtran[d]) {
766 					jackpot++;
767 					J[i] = 0;
768 					if (c != '\n' && c != EOF)
769 						ctold += skipline(f1);
770 					if (d != '\n' && c != EOF)
771 						ctnew += skipline(f2);
772 					break;
773 				}
774 				if (c == '\n' || c == EOF)
775 					break;
776 			}
777 		} else {
778 			for (;;) {
779 				ctold++;
780 				ctnew++;
781 				if ((c = getc(f1)) != (d = getc(f2))) {
782 					/* jackpot++; */
783 					J[i] = 0;
784 					if (c != '\n' && c != EOF)
785 						ctold += skipline(f1);
786 					if (d != '\n' && c != EOF)
787 						ctnew += skipline(f2);
788 					break;
789 				}
790 				if (c == '\n' || c == EOF)
791 					break;
792 			}
793 		}
794 		ixold[i] = ctold;
795 		ixnew[j] = ctnew;
796 		j++;
797 	}
798 	for (; j <= len[1]; j++)
799 		ixnew[j] = ctnew += skipline(f2);
800 	/*
801 	 * if (jackpot)
802 	 *	fprintf(stderr, "jackpot\n");
803 	 */
804 }
805 
806 /* shellsort CACM #201 */
807 static void
808 sort(struct line *a, int n)
809 {
810 	struct line *ai, *aim, w;
811 	int j, m = 0, k;
812 
813 	if (n == 0)
814 		return;
815 	for (j = 1; j <= n; j *= 2)
816 		m = 2 * j - 1;
817 	for (m /= 2; m != 0; m /= 2) {
818 		k = n - m;
819 		for (j = 1; j <= k; j++) {
820 			for (ai = &a[j]; ai > a; ai -= m) {
821 				aim = &ai[m];
822 				if (aim < ai)
823 					break;	/* wraparound */
824 				if (aim->value > ai[0].value ||
825 				    (aim->value == ai[0].value &&
826 					aim->serial > ai[0].serial))
827 					break;
828 				w.value = ai[0].value;
829 				ai[0].value = aim->value;
830 				aim->value = w.value;
831 				w.serial = ai[0].serial;
832 				ai[0].serial = aim->serial;
833 				aim->serial = w.serial;
834 			}
835 		}
836 	}
837 }
838 
839 static void
840 unsort(struct line *f, int l, int *b)
841 {
842 	int *a, i;
843 
844 	a = xcalloc(l + 1, sizeof(*a));
845 	for (i = 1; i <= l; i++)
846 		a[f[i].serial] = f[i].value;
847 	for (i = 1; i <= l; i++)
848 		b[i] = a[i];
849 	free(a);
850 }
851 
852 static int
853 skipline(FILE *f)
854 {
855 	int i, c;
856 
857 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
858 		continue;
859 	return (i);
860 }
861 
862 static void
863 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
864 {
865 	int m, i0, i1, j0, j1;
866 
867 	rewind(f1);
868 	rewind(f2);
869 	m = len[0];
870 	J[0] = 0;
871 	J[m + 1] = len[1] + 1;
872 	if (diff_format != D_EDIT) {
873 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
874 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
875 				i0++;
876 			j0 = J[i0 - 1] + 1;
877 			i1 = i0 - 1;
878 			while (i1 < m && J[i1 + 1] == 0)
879 				i1++;
880 			j1 = J[i1 + 1] - 1;
881 			J[i1] = j1;
882 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
883 		}
884 	} else {
885 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
886 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
887 				i0--;
888 			j0 = J[i0 + 1] - 1;
889 			i1 = i0 + 1;
890 			while (i1 > 1 && J[i1 - 1] == 0)
891 				i1--;
892 			j1 = J[i1 - 1] + 1;
893 			J[i1] = j1;
894 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
895 		}
896 	}
897 	if (m == 0)
898 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
899 	if (diff_format == D_IFDEF) {
900 		for (;;) {
901 #define	c i0
902 			if ((c = getc(f1)) == EOF)
903 				return;
904 			diff_output("%c", c);
905 		}
906 #undef c
907 	}
908 	if (anychange != 0) {
909 		if (diff_format == D_CONTEXT)
910 			dump_context_vec(f1, f2, flags);
911 		else if (diff_format == D_UNIFIED)
912 			dump_unified_vec(f1, f2, flags);
913 	}
914 }
915 
916 static void
917 range(int a, int b, char *separator)
918 {
919 	diff_output("%d", a > b ? b : a);
920 	if (a < b)
921 		diff_output("%s%d", separator, b);
922 }
923 
924 static void
925 uni_range(int a, int b)
926 {
927 	if (a < b)
928 		diff_output("%d,%d", a, b - a + 1);
929 	else if (a == b)
930 		diff_output("%d", b);
931 	else
932 		diff_output("%d,0", b);
933 }
934 
935 static char *
936 preadline(int fd, size_t rlen, off_t off)
937 {
938 	char *line;
939 	ssize_t nr;
940 
941 	line = xmalloc(rlen + 1);
942 	if ((nr = pread(fd, line, rlen, off)) < 0)
943 		err(2, "preadline");
944 	if (nr > 0 && line[nr-1] == '\n')
945 		nr--;
946 	line[nr] = '\0';
947 	return (line);
948 }
949 
950 static int
951 ignoreline(char *line)
952 {
953 	int ret;
954 
955 	ret = regexec(&ignore_re, line, 0, NULL, 0);
956 	free(line);
957 	return (ret == 0);	/* if it matched, it should be ignored. */
958 }
959 
960 /*
961  * Indicate that there is a difference between lines a and b of the from file
962  * to get to lines c to d of the to file.  If a is greater then b then there
963  * are no lines in the from file involved and this means that there were
964  * lines appended (beginning at b).  If c is greater than d then there are
965  * lines missing from the to file.
966  */
967 static void
968 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
969     int *pflags)
970 {
971 	static size_t max_context = 64;
972 	int i;
973 
974 restart:
975 	if (diff_format != D_IFDEF && a > b && c > d)
976 		return;
977 	if (ignore_pats != NULL) {
978 		char *line;
979 		/*
980 		 * All lines in the change, insert, or delete must
981 		 * match an ignore pattern for the change to be
982 		 * ignored.
983 		 */
984 		if (a <= b) {		/* Changes and deletes. */
985 			for (i = a; i <= b; i++) {
986 				line = preadline(fileno(f1),
987 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
988 				if (!ignoreline(line))
989 					goto proceed;
990 			}
991 		}
992 		if (a > b || c <= d) {	/* Changes and inserts. */
993 			for (i = c; i <= d; i++) {
994 				line = preadline(fileno(f2),
995 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
996 				if (!ignoreline(line))
997 					goto proceed;
998 			}
999 		}
1000 		return;
1001 	}
1002 proceed:
1003 	if (*pflags & D_HEADER) {
1004 		diff_output("%s %s %s\n", diffargs, file1, file2);
1005 		*pflags &= ~D_HEADER;
1006 	}
1007 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1008 		/*
1009 		 * Allocate change records as needed.
1010 		 */
1011 		if (context_vec_ptr == context_vec_end - 1) {
1012 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1013 			max_context <<= 1;
1014 			context_vec_start = xreallocarray(context_vec_start,
1015 			    max_context, sizeof(*context_vec_start));
1016 			context_vec_end = context_vec_start + max_context;
1017 			context_vec_ptr = context_vec_start + offset;
1018 		}
1019 		if (anychange == 0) {
1020 			/*
1021 			 * Print the context/unidiff header first time through.
1022 			 */
1023 			print_header(file1, file2);
1024 			anychange = 1;
1025 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1026 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1027 			/*
1028 			 * If this change is more than 'diff_context' lines from the
1029 			 * previous change, dump the record and reset it.
1030 			 */
1031 			if (diff_format == D_CONTEXT)
1032 				dump_context_vec(f1, f2, *pflags);
1033 			else
1034 				dump_unified_vec(f1, f2, *pflags);
1035 		}
1036 		context_vec_ptr++;
1037 		context_vec_ptr->a = a;
1038 		context_vec_ptr->b = b;
1039 		context_vec_ptr->c = c;
1040 		context_vec_ptr->d = d;
1041 		return;
1042 	}
1043 	if (anychange == 0)
1044 		anychange = 1;
1045 	switch (diff_format) {
1046 	case D_BRIEF:
1047 		return;
1048 	case D_NORMAL:
1049 	case D_EDIT:
1050 		range(a, b, ",");
1051 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1052 		if (diff_format == D_NORMAL)
1053 			range(c, d, ",");
1054 		diff_output("\n");
1055 		break;
1056 	case D_REVERSE:
1057 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1058 		range(a, b, " ");
1059 		diff_output("\n");
1060 		break;
1061 	case D_NREVERSE:
1062 		if (a > b)
1063 			diff_output("a%d %d\n", b, d - c + 1);
1064 		else {
1065 			diff_output("d%d %d\n", a, b - a + 1);
1066 			if (!(c > d))
1067 				/* add changed lines */
1068 				diff_output("a%d %d\n", b, d - c + 1);
1069 		}
1070 		break;
1071 	}
1072 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1073 		fetch(ixold, a, b, f1, '<', 1, *pflags);
1074 		if (a <= b && c <= d && diff_format == D_NORMAL)
1075 			diff_output("---\n");
1076 	}
1077 	i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1078 	if (i != 0 && diff_format == D_EDIT) {
1079 		/*
1080 		 * A non-zero return value for D_EDIT indicates that the
1081 		 * last line printed was a bare dot (".") that has been
1082 		 * escaped as ".." to prevent ed(1) from misinterpreting
1083 		 * it.  We have to add a substitute command to change this
1084 		 * back and restart where we left off.
1085 		 */
1086 		diff_output(".\n");
1087 		diff_output("%ds/^\\.\\././\n", a);
1088 		a += i;
1089 		c += i;
1090 		goto restart;
1091 	}
1092 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1093 		diff_output(".\n");
1094 	if (inifdef) {
1095 		diff_output("#endif /* %s */\n", ifdefname);
1096 		inifdef = 0;
1097 	}
1098 }
1099 
1100 static int
1101 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1102 {
1103 	int i, j, c, lastc, col, nc;
1104 
1105 	/*
1106 	 * When doing #ifdef's, copy down to current line
1107 	 * if this is the first file, so that stuff makes it to output.
1108 	 */
1109 	if (diff_format == D_IFDEF && oldfile) {
1110 		long curpos = ftell(lb);
1111 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1112 		nc = f[a > b ? b : a - 1] - curpos;
1113 		for (i = 0; i < nc; i++)
1114 			diff_output("%c", getc(lb));
1115 	}
1116 	if (a > b)
1117 		return (0);
1118 	if (diff_format == D_IFDEF) {
1119 		if (inifdef) {
1120 			diff_output("#else /* %s%s */\n",
1121 			    oldfile == 1 ? "!" : "", ifdefname);
1122 		} else {
1123 			if (oldfile)
1124 				diff_output("#ifndef %s\n", ifdefname);
1125 			else
1126 				diff_output("#ifdef %s\n", ifdefname);
1127 		}
1128 		inifdef = 1 + oldfile;
1129 	}
1130 	for (i = a; i <= b; i++) {
1131 		fseek(lb, f[i - 1], SEEK_SET);
1132 		nc = f[i] - f[i - 1];
1133 		if (diff_format != D_IFDEF && ch != '\0') {
1134 			diff_output("%c", ch);
1135 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1136 			    || diff_format == D_UNIFIED))
1137 				diff_output("\t");
1138 			else if (diff_format != D_UNIFIED)
1139 				diff_output(" ");
1140 		}
1141 		col = 0;
1142 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1143 			if ((c = getc(lb)) == EOF) {
1144 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1145 				    diff_format == D_NREVERSE)
1146 					warnx("No newline at end of file");
1147 				else
1148 					diff_output("\n\\ No newline at end of "
1149 					    "file\n");
1150 				return (0);
1151 			}
1152 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1153 				do {
1154 					diff_output(" ");
1155 				} while (++col & 7);
1156 			} else {
1157 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1158 				    && lastc == '.') {
1159 					/*
1160 					 * Don't print a bare "." line
1161 					 * since that will confuse ed(1).
1162 					 * Print ".." instead and return,
1163 					 * giving the caller an offset
1164 					 * from which to restart.
1165 					 */
1166 					diff_output(".\n");
1167 					return (i - a + 1);
1168 				}
1169 				diff_output("%c", c);
1170 				col++;
1171 			}
1172 		}
1173 	}
1174 	return (0);
1175 }
1176 
1177 /*
1178  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1179  */
1180 static int
1181 readhash(FILE *f, int flags)
1182 {
1183 	int i, t, space;
1184 	int sum;
1185 
1186 	sum = 1;
1187 	space = 0;
1188 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1189 		if (flags & D_IGNORECASE)
1190 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1191 				if (t == EOF) {
1192 					if (i == 0)
1193 						return (0);
1194 					break;
1195 				}
1196 				sum = sum * 127 + chrtran[t];
1197 			}
1198 		else
1199 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1200 				if (t == EOF) {
1201 					if (i == 0)
1202 						return (0);
1203 					break;
1204 				}
1205 				sum = sum * 127 + t;
1206 			}
1207 	} else {
1208 		for (i = 0;;) {
1209 			switch (t = getc(f)) {
1210 			case '\t':
1211 			case '\r':
1212 			case '\v':
1213 			case '\f':
1214 			case ' ':
1215 				space++;
1216 				continue;
1217 			default:
1218 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1219 					i++;
1220 					space = 0;
1221 				}
1222 				sum = sum * 127 + chrtran[t];
1223 				i++;
1224 				continue;
1225 			case EOF:
1226 				if (i == 0)
1227 					return (0);
1228 				/* FALLTHROUGH */
1229 			case '\n':
1230 				break;
1231 			}
1232 			break;
1233 		}
1234 	}
1235 	/*
1236 	 * There is a remote possibility that we end up with a zero sum.
1237 	 * Zero is used as an EOF marker, so return 1 instead.
1238 	 */
1239 	return (sum == 0 ? 1 : sum);
1240 }
1241 
1242 static int
1243 asciifile(FILE *f)
1244 {
1245 	unsigned char buf[BUFSIZ];
1246 	size_t cnt;
1247 
1248 	if (f == NULL)
1249 		return (1);
1250 
1251 	rewind(f);
1252 	cnt = fread(buf, 1, sizeof(buf), f);
1253 	return (memchr(buf, '\0', cnt) == NULL);
1254 }
1255 
1256 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1257 
1258 static char *
1259 match_function(const long *f, int pos, FILE *fp)
1260 {
1261 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1262 	size_t nc;
1263 	int last = lastline;
1264 	char *state = NULL;
1265 
1266 	lastline = pos;
1267 	while (pos > last) {
1268 		fseek(fp, f[pos - 1], SEEK_SET);
1269 		nc = f[pos] - f[pos - 1];
1270 		if (nc >= sizeof(buf))
1271 			nc = sizeof(buf) - 1;
1272 		nc = fread(buf, 1, nc, fp);
1273 		if (nc > 0) {
1274 			buf[nc] = '\0';
1275 			buf[strcspn(buf, "\n")] = '\0';
1276 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1277 				if (begins_with(buf, "private:")) {
1278 					if (!state)
1279 						state = " (private)";
1280 				} else if (begins_with(buf, "protected:")) {
1281 					if (!state)
1282 						state = " (protected)";
1283 				} else if (begins_with(buf, "public:")) {
1284 					if (!state)
1285 						state = " (public)";
1286 				} else {
1287 					strlcpy(lastbuf, buf, sizeof lastbuf);
1288 					if (state)
1289 						strlcat(lastbuf, state,
1290 						    sizeof lastbuf);
1291 					lastmatchline = pos;
1292 					return lastbuf;
1293 				}
1294 			}
1295 		}
1296 		pos--;
1297 	}
1298 	return lastmatchline > 0 ? lastbuf : NULL;
1299 }
1300 
1301 /* dump accumulated "context" diff changes */
1302 static void
1303 dump_context_vec(FILE *f1, FILE *f2, int flags)
1304 {
1305 	struct context_vec *cvp = context_vec_start;
1306 	int lowa, upb, lowc, upd, do_output;
1307 	int a, b, c, d;
1308 	char ch, *f;
1309 
1310 	if (context_vec_start > context_vec_ptr)
1311 		return;
1312 
1313 	b = d = 0;		/* gcc */
1314 	lowa = MAXIMUM(1, cvp->a - diff_context);
1315 	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1316 	lowc = MAXIMUM(1, cvp->c - diff_context);
1317 	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1318 
1319 	diff_output("***************");
1320 	if ((flags & D_PROTOTYPE)) {
1321 		f = match_function(ixold, lowa-1, f1);
1322 		if (f != NULL)
1323 			diff_output(" %s", f);
1324 	}
1325 	diff_output("\n*** ");
1326 	range(lowa, upb, ",");
1327 	diff_output(" ****\n");
1328 
1329 	/*
1330 	 * Output changes to the "old" file.  The first loop suppresses
1331 	 * output if there were no changes to the "old" file (we'll see
1332 	 * the "old" lines as context in the "new" list).
1333 	 */
1334 	do_output = 0;
1335 	for (; cvp <= context_vec_ptr; cvp++)
1336 		if (cvp->a <= cvp->b) {
1337 			cvp = context_vec_start;
1338 			do_output++;
1339 			break;
1340 		}
1341 	if (do_output) {
1342 		while (cvp <= context_vec_ptr) {
1343 			a = cvp->a;
1344 			b = cvp->b;
1345 			c = cvp->c;
1346 			d = cvp->d;
1347 
1348 			if (a <= b && c <= d)
1349 				ch = 'c';
1350 			else
1351 				ch = (a <= b) ? 'd' : 'a';
1352 
1353 			if (ch == 'a')
1354 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1355 			else {
1356 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1357 				fetch(ixold, a, b, f1,
1358 				    ch == 'c' ? '!' : '-', 0, flags);
1359 			}
1360 			lowa = b + 1;
1361 			cvp++;
1362 		}
1363 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1364 	}
1365 	/* output changes to the "new" file */
1366 	diff_output("--- ");
1367 	range(lowc, upd, ",");
1368 	diff_output(" ----\n");
1369 
1370 	do_output = 0;
1371 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1372 		if (cvp->c <= cvp->d) {
1373 			cvp = context_vec_start;
1374 			do_output++;
1375 			break;
1376 		}
1377 	if (do_output) {
1378 		while (cvp <= context_vec_ptr) {
1379 			a = cvp->a;
1380 			b = cvp->b;
1381 			c = cvp->c;
1382 			d = cvp->d;
1383 
1384 			if (a <= b && c <= d)
1385 				ch = 'c';
1386 			else
1387 				ch = (a <= b) ? 'd' : 'a';
1388 
1389 			if (ch == 'd')
1390 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1391 			else {
1392 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1393 				fetch(ixnew, c, d, f2,
1394 				    ch == 'c' ? '!' : '+', 0, flags);
1395 			}
1396 			lowc = d + 1;
1397 			cvp++;
1398 		}
1399 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1400 	}
1401 	context_vec_ptr = context_vec_start - 1;
1402 }
1403 
1404 /* dump accumulated "unified" diff changes */
1405 static void
1406 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1407 {
1408 	struct context_vec *cvp = context_vec_start;
1409 	int lowa, upb, lowc, upd;
1410 	int a, b, c, d;
1411 	char ch, *f;
1412 
1413 	if (context_vec_start > context_vec_ptr)
1414 		return;
1415 
1416 	b = d = 0;		/* gcc */
1417 	lowa = MAXIMUM(1, cvp->a - diff_context);
1418 	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1419 	lowc = MAXIMUM(1, cvp->c - diff_context);
1420 	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1421 
1422 	diff_output("@@ -");
1423 	uni_range(lowa, upb);
1424 	diff_output(" +");
1425 	uni_range(lowc, upd);
1426 	diff_output(" @@");
1427 	if ((flags & D_PROTOTYPE)) {
1428 		f = match_function(ixold, lowa-1, f1);
1429 		if (f != NULL)
1430 			diff_output(" %s", f);
1431 	}
1432 	diff_output("\n");
1433 
1434 	/*
1435 	 * Output changes in "unified" diff format--the old and new lines
1436 	 * are printed together.
1437 	 */
1438 	for (; cvp <= context_vec_ptr; cvp++) {
1439 		a = cvp->a;
1440 		b = cvp->b;
1441 		c = cvp->c;
1442 		d = cvp->d;
1443 
1444 		/*
1445 		 * c: both new and old changes
1446 		 * d: only changes in the old file
1447 		 * a: only changes in the new file
1448 		 */
1449 		if (a <= b && c <= d)
1450 			ch = 'c';
1451 		else
1452 			ch = (a <= b) ? 'd' : 'a';
1453 
1454 		switch (ch) {
1455 		case 'c':
1456 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1457 			fetch(ixold, a, b, f1, '-', 0, flags);
1458 			fetch(ixnew, c, d, f2, '+', 0, flags);
1459 			break;
1460 		case 'd':
1461 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1462 			fetch(ixold, a, b, f1, '-', 0, flags);
1463 			break;
1464 		case 'a':
1465 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1466 			fetch(ixnew, c, d, f2, '+', 0, flags);
1467 			break;
1468 		}
1469 		lowa = b + 1;
1470 		lowc = d + 1;
1471 	}
1472 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1473 
1474 	context_vec_ptr = context_vec_start - 1;
1475 }
1476 
1477 static void
1478 print_header(const char *file1, const char *file2)
1479 {
1480 	if (label[0] != NULL)
1481 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1482 		    label[0]);
1483 	else
1484 		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
1485 		    file1, ctime(&stb1.st_mtime));
1486 	if (label[1] != NULL)
1487 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1488 		    label[1]);
1489 	else
1490 		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
1491 		    file2, ctime(&stb2.st_mtime));
1492 }
1493