xref: /openbsd-src/usr.bin/diff/diffreg.c (revision 2821fdb080fdc801ba67bdf2837f5e164f3a2926)
1 /*	$OpenBSD: diffreg.c,v 1.73 2009/10/27 23:59:37 deraadt Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/param.h>
68 #include <sys/stat.h>
69 #include <sys/wait.h>
70 
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <stddef.h>
76 #include <stdio.h>
77 #include <stdlib.h>
78 #include <string.h>
79 #include <unistd.h>
80 
81 #include "diff.h"
82 #include "pathnames.h"
83 #include "xmalloc.h"
84 
85 /*
86  * diff - compare two files.
87  */
88 
89 /*
90  *	Uses an algorithm due to Harold Stone, which finds
91  *	a pair of longest identical subsequences in the two
92  *	files.
93  *
94  *	The major goal is to generate the match vector J.
95  *	J[i] is the index of the line in file1 corresponding
96  *	to line i file0. J[i] = 0 if there is no
97  *	such line in file1.
98  *
99  *	Lines are hashed so as to work in core. All potential
100  *	matches are located by sorting the lines of each file
101  *	on the hash (called ``value''). In particular, this
102  *	collects the equivalence classes in file1 together.
103  *	Subroutine equiv replaces the value of each line in
104  *	file0 by the index of the first element of its
105  *	matching equivalence in (the reordered) file1.
106  *	To save space equiv squeezes file1 into a single
107  *	array member in which the equivalence classes
108  *	are simply concatenated, except that their first
109  *	members are flagged by changing sign.
110  *
111  *	Next the indices that point into member are unsorted into
112  *	array class according to the original order of file0.
113  *
114  *	The cleverness lies in routine stone. This marches
115  *	through the lines of file0, developing a vector klist
116  *	of "k-candidates". At step i a k-candidate is a matched
117  *	pair of lines x,y (x in file0 y in file1) such that
118  *	there is a common subsequence of length k
119  *	between the first i lines of file0 and the first y
120  *	lines of file1, but there is no such subsequence for
121  *	any smaller y. x is the earliest possible mate to y
122  *	that occurs in such a subsequence.
123  *
124  *	Whenever any of the members of the equivalence class of
125  *	lines in file1 matable to a line in file0 has serial number
126  *	less than the y of some k-candidate, that k-candidate
127  *	with the smallest such y is replaced. The new
128  *	k-candidate is chained (via pred) to the current
129  *	k-1 candidate so that the actual subsequence can
130  *	be recovered. When a member has serial number greater
131  *	that the y of all k-candidates, the klist is extended.
132  *	At the end, the longest subsequence is pulled out
133  *	and placed in the array J by unravel
134  *
135  *	With J in hand, the matches there recorded are
136  *	check'ed against reality to assure that no spurious
137  *	matches have crept in due to hashing. If they have,
138  *	they are broken, and "jackpot" is recorded--a harmless
139  *	matter except that a true match for a spuriously
140  *	mated line may now be unnecessarily reported as a change.
141  *
142  *	Much of the complexity of the program comes simply
143  *	from trying to minimize core utilization and
144  *	maximize the range of doable problems by dynamically
145  *	allocating what is needed and reusing what is not.
146  *	The core requirements for problems larger than somewhat
147  *	are (in words) 2*length(file0) + length(file1) +
148  *	3*(number of k-candidates installed),  typically about
149  *	6n words for files of length n.
150  */
151 
152 struct cand {
153 	int	x;
154 	int	y;
155 	int	pred;
156 };
157 
158 struct line {
159 	int	serial;
160 	int	value;
161 } *file[2];
162 
163 /*
164  * The following struct is used to record change information when
165  * doing a "context" or "unified" diff.  (see routine "change" to
166  * understand the highly mnemonic field names)
167  */
168 struct context_vec {
169 	int	a;		/* start line in old file */
170 	int	b;		/* end line in old file */
171 	int	c;		/* start line in new file */
172 	int	d;		/* end line in new file */
173 };
174 
175 static FILE	*opentemp(const char *);
176 static void	 output(char *, FILE *, char *, FILE *, int);
177 static void	 check(char *, FILE *, char *, FILE *, int);
178 static void	 range(int, int, char *);
179 static void	 uni_range(int, int);
180 static void	 dump_context_vec(FILE *, FILE *, int);
181 static void	 dump_unified_vec(FILE *, FILE *, int);
182 static void	 prepare(int, FILE *, off_t, int);
183 static void	 prune(void);
184 static void	 equiv(struct line *, int, struct line *, int, int *);
185 static void	 unravel(int);
186 static void	 unsort(struct line *, int, int *);
187 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
188 static void	 sort(struct line *, int);
189 static void	 print_header(const char *, const char *);
190 static int	 ignoreline(char *);
191 static int	 asciifile(FILE *);
192 static int	 fetch(long *, int, int, FILE *, int, int, int);
193 static int	 newcand(int, int, int);
194 static int	 search(int *, int, int);
195 static int	 skipline(FILE *);
196 static int	 isqrt(int);
197 static int	 stone(int *, int, int *, int *, int);
198 static int	 readhash(FILE *, int);
199 static int	 files_differ(FILE *, FILE *, int);
200 static char	*match_function(const long *, int, FILE *);
201 static char	*preadline(int, size_t, off_t);
202 
203 static int  *J;			/* will be overlaid on class */
204 static int  *class;		/* will be overlaid on file[0] */
205 static int  *klist;		/* will be overlaid on file[0] after class */
206 static int  *member;		/* will be overlaid on file[1] */
207 static int   clen;
208 static int   inifdef;		/* whether or not we are in a #ifdef block */
209 static int   len[2];
210 static int   pref, suff;	/* length of prefix and suffix */
211 static int   slen[2];
212 static int   anychange;
213 static long *ixnew;		/* will be overlaid on file[1] */
214 static long *ixold;		/* will be overlaid on klist */
215 static struct cand *clist;	/* merely a free storage pot for candidates */
216 static int   clistlen;		/* the length of clist */
217 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
218 static u_char *chrtran;		/* translation table for case-folding */
219 static struct context_vec *context_vec_start;
220 static struct context_vec *context_vec_end;
221 static struct context_vec *context_vec_ptr;
222 
223 #define FUNCTION_CONTEXT_SIZE	55
224 static char lastbuf[FUNCTION_CONTEXT_SIZE];
225 static int lastline;
226 static int lastmatchline;
227 
228 
229 /*
230  * chrtran points to one of 2 translation tables: cup2low if folding upper to
231  * lower case clow2low if not folding case
232  */
233 u_char clow2low[256] = {
234 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
235 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
236 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
237 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
238 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
239 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
240 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
241 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
242 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
243 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
244 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
245 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
246 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
247 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
248 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
249 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
250 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
251 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
252 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
253 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
254 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
255 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
256 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
257 	0xfd, 0xfe, 0xff
258 };
259 
260 u_char cup2low[256] = {
261 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
262 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
263 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
264 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
265 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
266 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
267 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
268 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
269 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
270 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
271 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
272 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
273 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
274 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
275 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
276 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
277 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
278 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
279 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
280 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
281 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
282 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
283 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
284 	0xfd, 0xfe, 0xff
285 };
286 
287 int
288 diffreg(char *file1, char *file2, int flags)
289 {
290 	FILE *f1, *f2;
291 	int i, rval, ostdout = -1;
292 	pid_t pid = -1;
293 
294 	f1 = f2 = NULL;
295 	rval = D_SAME;
296 	anychange = 0;
297 	lastline = 0;
298 	lastmatchline = 0;
299 	context_vec_ptr = context_vec_start - 1;
300 	if (flags & D_IGNORECASE)
301 		chrtran = cup2low;
302 	else
303 		chrtran = clow2low;
304 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
305 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
306 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
307 		goto closem;
308 
309 	if (flags & D_EMPTY1)
310 		f1 = fopen(_PATH_DEVNULL, "r");
311 	else {
312 		if (!S_ISREG(stb1.st_mode)) {
313 			if ((f1 = opentemp(file1)) == NULL ||
314 			    fstat(fileno(f1), &stb1) < 0) {
315 				warn("%s", file1);
316 				status |= 2;
317 				goto closem;
318 			}
319 		} else if (strcmp(file1, "-") == 0)
320 			f1 = stdin;
321 		else
322 			f1 = fopen(file1, "r");
323 	}
324 	if (f1 == NULL) {
325 		warn("%s", file1);
326 		status |= 2;
327 		goto closem;
328 	}
329 
330 	if (flags & D_EMPTY2)
331 		f2 = fopen(_PATH_DEVNULL, "r");
332 	else {
333 		if (!S_ISREG(stb2.st_mode)) {
334 			if ((f2 = opentemp(file2)) == NULL ||
335 			    fstat(fileno(f2), &stb2) < 0) {
336 				warn("%s", file2);
337 				status |= 2;
338 				goto closem;
339 			}
340 		} else if (strcmp(file2, "-") == 0)
341 			f2 = stdin;
342 		else
343 			f2 = fopen(file2, "r");
344 	}
345 	if (f2 == NULL) {
346 		warn("%s", file2);
347 		status |= 2;
348 		goto closem;
349 	}
350 
351 	switch (files_differ(f1, f2, flags)) {
352 	case 0:
353 		goto closem;
354 	case 1:
355 		break;
356 	default:
357 		/* error */
358 		status |= 2;
359 		goto closem;
360 	}
361 
362 	if ((flags & D_FORCEASCII) == 0 &&
363 	    (!asciifile(f1) || !asciifile(f2))) {
364 		rval = D_BINARY;
365 		status |= 1;
366 		goto closem;
367 	}
368 	if (lflag) {
369 		/* redirect stdout to pr */
370 		int pfd[2];
371 		char *header;
372 		char *prargv[] = { "pr", "-h", NULL, "-f", NULL };
373 
374 		xasprintf(&header, "%s %s %s", diffargs, file1, file2);
375 		prargv[2] = header;
376 		fflush(stdout);
377 		rewind(stdout);
378 		pipe(pfd);
379 		switch ((pid = fork())) {
380 		case -1:
381 			warnx("No more processes");
382 			status |= 2;
383 			xfree(header);
384 			return (D_ERROR);
385 		case 0:
386 			/* child */
387 			if (pfd[0] != STDIN_FILENO) {
388 				dup2(pfd[0], STDIN_FILENO);
389 				close(pfd[0]);
390 			}
391 			close(pfd[1]);
392 			execv(_PATH_PR, prargv);
393 			_exit(127);
394 		default:
395 			/* parent */
396 			if (pfd[1] != STDOUT_FILENO) {
397 				ostdout = dup(STDOUT_FILENO);
398 				dup2(pfd[1], STDOUT_FILENO);
399 				close(pfd[1]);
400 			}
401 			close(pfd[0]);
402 			rewind(stdout);
403 			xfree(header);
404 		}
405 	}
406 	prepare(0, f1, stb1.st_size, flags);
407 	prepare(1, f2, stb2.st_size, flags);
408 
409 	prune();
410 	sort(sfile[0], slen[0]);
411 	sort(sfile[1], slen[1]);
412 
413 	member = (int *)file[1];
414 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
415 	member = xrealloc(member, slen[1] + 2, sizeof(*member));
416 
417 	class = (int *)file[0];
418 	unsort(sfile[0], slen[0], class);
419 	class = xrealloc(class, slen[0] + 2, sizeof(*class));
420 
421 	klist = xcalloc(slen[0] + 2, sizeof(*klist));
422 	clen = 0;
423 	clistlen = 100;
424 	clist = xcalloc(clistlen, sizeof(*clist));
425 	i = stone(class, slen[0], member, klist, flags);
426 	xfree(member);
427 	xfree(class);
428 
429 	J = xrealloc(J, len[0] + 2, sizeof(*J));
430 	unravel(klist[i]);
431 	xfree(clist);
432 	xfree(klist);
433 
434 	ixold = xrealloc(ixold, len[0] + 2, sizeof(*ixold));
435 	ixnew = xrealloc(ixnew, len[1] + 2, sizeof(*ixnew));
436 	check(file1, f1, file2, f2, flags);
437 	output(file1, f1, file2, f2, flags);
438 	if (ostdout != -1) {
439 		int wstatus;
440 
441 		/* close the pipe to pr and restore stdout */
442 		fflush(stdout);
443 		rewind(stdout);
444 		if (ostdout != STDOUT_FILENO) {
445 			close(STDOUT_FILENO);
446 			dup2(ostdout, STDOUT_FILENO);
447 			close(ostdout);
448 		}
449 		waitpid(pid, &wstatus, 0);
450 	}
451 closem:
452 	if (anychange) {
453 		status |= 1;
454 		if (rval == D_SAME)
455 			rval = D_DIFFER;
456 	}
457 	if (f1 != NULL)
458 		fclose(f1);
459 	if (f2 != NULL)
460 		fclose(f2);
461 
462 	return (rval);
463 }
464 
465 /*
466  * Check to see if the given files differ.
467  * Returns 0 if they are the same, 1 if different, and -1 on error.
468  * XXX - could use code from cmp(1) [faster]
469  */
470 static int
471 files_differ(FILE *f1, FILE *f2, int flags)
472 {
473 	char buf1[BUFSIZ], buf2[BUFSIZ];
474 	size_t i, j;
475 
476 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
477 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
478 		return (1);
479 	for (;;) {
480 		i = fread(buf1, 1, sizeof(buf1), f1);
481 		j = fread(buf2, 1, sizeof(buf2), f2);
482 		if (i != j)
483 			return (1);
484 		if (i == 0 && j == 0) {
485 			if (ferror(f1) || ferror(f2))
486 				return (1);
487 			return (0);
488 		}
489 		if (memcmp(buf1, buf2, i) != 0)
490 			return (1);
491 	}
492 }
493 
494 static FILE *
495 opentemp(const char *file)
496 {
497 	char buf[BUFSIZ], *tempdir, tempfile[MAXPATHLEN];
498 	ssize_t nread;
499 	int ifd, ofd;
500 
501 	if (strcmp(file, "-") == 0)
502 		ifd = STDIN_FILENO;
503 	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
504 		return (NULL);
505 
506 	if ((tempdir = getenv("TMPDIR")) == NULL)
507 		tempdir = _PATH_TMP;
508 
509 	if (strlcpy(tempfile, tempdir, sizeof(tempfile)) >= sizeof(tempfile) ||
510 	    strlcat(tempfile, "/diff.XXXXXXXX", sizeof(tempfile)) >=
511 	    sizeof(tempfile)) {
512 		close(ifd);
513 		errno = ENAMETOOLONG;
514 		return (NULL);
515 	}
516 
517 	if ((ofd = mkstemp(tempfile)) < 0)
518 		return (NULL);
519 	unlink(tempfile);
520 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
521 		if (write(ofd, buf, nread) != nread) {
522 			close(ifd);
523 			close(ofd);
524 			return (NULL);
525 		}
526 	}
527 	close(ifd);
528 	lseek(ofd, (off_t)0, SEEK_SET);
529 	return (fdopen(ofd, "r"));
530 }
531 
532 char *
533 splice(char *dir, char *file)
534 {
535 	char *tail, *buf;
536 
537 	if ((tail = strrchr(file, '/')) == NULL)
538 		tail = file;
539 	else
540 		tail++;
541 	xasprintf(&buf, "%s/%s", dir, tail);
542 	return (buf);
543 }
544 
545 static void
546 prepare(int i, FILE *fd, off_t filesize, int flags)
547 {
548 	struct line *p;
549 	int j, h;
550 	size_t sz;
551 
552 	rewind(fd);
553 
554 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
555 	if (sz < 100)
556 		sz = 100;
557 
558 	p = xcalloc(sz + 3, sizeof(*p));
559 	for (j = 0; (h = readhash(fd, flags));) {
560 		if (j == sz) {
561 			sz = sz * 3 / 2;
562 			p = xrealloc(p, sz + 3, sizeof(*p));
563 		}
564 		p[++j].value = h;
565 	}
566 	len[i] = j;
567 	file[i] = p;
568 }
569 
570 static void
571 prune(void)
572 {
573 	int i, j;
574 
575 	for (pref = 0; pref < len[0] && pref < len[1] &&
576 	    file[0][pref + 1].value == file[1][pref + 1].value;
577 	    pref++)
578 		;
579 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
580 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
581 	    suff++)
582 		;
583 	for (j = 0; j < 2; j++) {
584 		sfile[j] = file[j] + pref;
585 		slen[j] = len[j] - pref - suff;
586 		for (i = 0; i <= slen[j]; i++)
587 			sfile[j][i].serial = i;
588 	}
589 }
590 
591 static void
592 equiv(struct line *a, int n, struct line *b, int m, int *c)
593 {
594 	int i, j;
595 
596 	i = j = 1;
597 	while (i <= n && j <= m) {
598 		if (a[i].value < b[j].value)
599 			a[i++].value = 0;
600 		else if (a[i].value == b[j].value)
601 			a[i++].value = j;
602 		else
603 			j++;
604 	}
605 	while (i <= n)
606 		a[i++].value = 0;
607 	b[m + 1].value = 0;
608 	j = 0;
609 	while (++j <= m) {
610 		c[j] = -b[j].serial;
611 		while (b[j + 1].value == b[j].value) {
612 			j++;
613 			c[j] = b[j].serial;
614 		}
615 	}
616 	c[j] = -1;
617 }
618 
619 /* Code taken from ping.c */
620 static int
621 isqrt(int n)
622 {
623 	int y, x = 1;
624 
625 	if (n == 0)
626 		return (0);
627 
628 	do { /* newton was a stinker */
629 		y = x;
630 		x = n / x;
631 		x += y;
632 		x /= 2;
633 	} while ((x - y) > 1 || (x - y) < -1);
634 
635 	return (x);
636 }
637 
638 static int
639 stone(int *a, int n, int *b, int *c, int flags)
640 {
641 	int i, k, y, j, l;
642 	int oldc, tc, oldl;
643 	u_int numtries;
644 
645 	/* XXX move the isqrt() out of the macro to avoid multiple calls */
646 	const u_int bound = (flags & D_MINIMAL) ? UINT_MAX :
647 	    MAX(256, isqrt(n));
648 
649 	k = 0;
650 	c[0] = newcand(0, 0, 0);
651 	for (i = 1; i <= n; i++) {
652 		j = a[i];
653 		if (j == 0)
654 			continue;
655 		y = -b[j];
656 		oldl = 0;
657 		oldc = c[0];
658 		numtries = 0;
659 		do {
660 			if (y <= clist[oldc].y)
661 				continue;
662 			l = search(c, k, y);
663 			if (l != oldl + 1)
664 				oldc = c[l - 1];
665 			if (l <= k) {
666 				if (clist[c[l]].y <= y)
667 					continue;
668 				tc = c[l];
669 				c[l] = newcand(i, y, oldc);
670 				oldc = tc;
671 				oldl = l;
672 				numtries++;
673 			} else {
674 				c[l] = newcand(i, y, oldc);
675 				k++;
676 				break;
677 			}
678 		} while ((y = b[++j]) > 0 && numtries < bound);
679 	}
680 	return (k);
681 }
682 
683 static int
684 newcand(int x, int y, int pred)
685 {
686 	struct cand *q;
687 
688 	if (clen == clistlen) {
689 		clistlen = clistlen * 11 / 10;
690 		clist = xrealloc(clist, clistlen, sizeof(*clist));
691 	}
692 	q = clist + clen;
693 	q->x = x;
694 	q->y = y;
695 	q->pred = pred;
696 	return (clen++);
697 }
698 
699 static int
700 search(int *c, int k, int y)
701 {
702 	int i, j, l, t;
703 
704 	if (clist[c[k]].y < y)	/* quick look for typical case */
705 		return (k + 1);
706 	i = 0;
707 	j = k + 1;
708 	for (;;) {
709 		l = (i + j) / 2;
710 		if (l <= i)
711 			break;
712 		t = clist[c[l]].y;
713 		if (t > y)
714 			j = l;
715 		else if (t < y)
716 			i = l;
717 		else
718 			return (l);
719 	}
720 	return (l + 1);
721 }
722 
723 static void
724 unravel(int p)
725 {
726 	struct cand *q;
727 	int i;
728 
729 	for (i = 0; i <= len[0]; i++)
730 		J[i] = i <= pref ? i :
731 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
732 	for (q = clist + p; q->y != 0; q = clist + q->pred)
733 		J[q->x + pref] = q->y + pref;
734 }
735 
736 /*
737  * Check does double duty:
738  *  1.	ferret out any fortuitous correspondences due
739  *	to confounding by hashing (which result in "jackpot")
740  *  2.  collect random access indexes to the two files
741  */
742 static void
743 check(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
744 {
745 	int i, j, jackpot, c, d;
746 	long ctold, ctnew;
747 
748 	rewind(f1);
749 	rewind(f2);
750 	j = 1;
751 	ixold[0] = ixnew[0] = 0;
752 	jackpot = 0;
753 	ctold = ctnew = 0;
754 	for (i = 1; i <= len[0]; i++) {
755 		if (J[i] == 0) {
756 			ixold[i] = ctold += skipline(f1);
757 			continue;
758 		}
759 		while (j < J[i]) {
760 			ixnew[j] = ctnew += skipline(f2);
761 			j++;
762 		}
763 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
764 			for (;;) {
765 				c = getc(f1);
766 				d = getc(f2);
767 				/*
768 				 * GNU diff ignores a missing newline
769 				 * in one file for -b or -w.
770 				 */
771 				if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) &&
772 				    ((c == EOF && d == '\n') ||
773 				    (c == '\n' && d == EOF))) {
774 					break;
775 				}
776 				ctold++;
777 				ctnew++;
778 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
779 				    isspace(d)) {
780 					do {
781 						if (c == '\n')
782 							break;
783 						ctold++;
784 					} while (isspace(c = getc(f1)));
785 					do {
786 						if (d == '\n')
787 							break;
788 						ctnew++;
789 					} while (isspace(d = getc(f2)));
790 				} else if ((flags & D_IGNOREBLANKS)) {
791 					while (isspace(c) && c != '\n') {
792 						c = getc(f1);
793 						ctold++;
794 					}
795 					while (isspace(d) && d != '\n') {
796 						d = getc(f2);
797 						ctnew++;
798 					}
799 				}
800 				if (chrtran[c] != chrtran[d]) {
801 					jackpot++;
802 					J[i] = 0;
803 					if (c != '\n' && c != EOF)
804 						ctold += skipline(f1);
805 					if (d != '\n' && c != EOF)
806 						ctnew += skipline(f2);
807 					break;
808 				}
809 				if (c == '\n' || c == EOF)
810 					break;
811 			}
812 		} else {
813 			for (;;) {
814 				ctold++;
815 				ctnew++;
816 				if ((c = getc(f1)) != (d = getc(f2))) {
817 					/* jackpot++; */
818 					J[i] = 0;
819 					if (c != '\n' && c != EOF)
820 						ctold += skipline(f1);
821 					if (d != '\n' && c != EOF)
822 						ctnew += skipline(f2);
823 					break;
824 				}
825 				if (c == '\n' || c == EOF)
826 					break;
827 			}
828 		}
829 		ixold[i] = ctold;
830 		ixnew[j] = ctnew;
831 		j++;
832 	}
833 	for (; j <= len[1]; j++)
834 		ixnew[j] = ctnew += skipline(f2);
835 	/*
836 	 * if (jackpot)
837 	 *	fprintf(stderr, "jackpot\n");
838 	 */
839 }
840 
841 /* shellsort CACM #201 */
842 static void
843 sort(struct line *a, int n)
844 {
845 	struct line *ai, *aim, w;
846 	int j, m = 0, k;
847 
848 	if (n == 0)
849 		return;
850 	for (j = 1; j <= n; j *= 2)
851 		m = 2 * j - 1;
852 	for (m /= 2; m != 0; m /= 2) {
853 		k = n - m;
854 		for (j = 1; j <= k; j++) {
855 			for (ai = &a[j]; ai > a; ai -= m) {
856 				aim = &ai[m];
857 				if (aim < ai)
858 					break;	/* wraparound */
859 				if (aim->value > ai[0].value ||
860 				    (aim->value == ai[0].value &&
861 					aim->serial > ai[0].serial))
862 					break;
863 				w.value = ai[0].value;
864 				ai[0].value = aim->value;
865 				aim->value = w.value;
866 				w.serial = ai[0].serial;
867 				ai[0].serial = aim->serial;
868 				aim->serial = w.serial;
869 			}
870 		}
871 	}
872 }
873 
874 static void
875 unsort(struct line *f, int l, int *b)
876 {
877 	int *a, i;
878 
879 	a = xcalloc(l + 1, sizeof(*a));
880 	for (i = 1; i <= l; i++)
881 		a[f[i].serial] = f[i].value;
882 	for (i = 1; i <= l; i++)
883 		b[i] = a[i];
884 	xfree(a);
885 }
886 
887 static int
888 skipline(FILE *f)
889 {
890 	int i, c;
891 
892 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
893 		continue;
894 	return (i);
895 }
896 
897 static void
898 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
899 {
900 	int m, i0, i1, j0, j1;
901 
902 	rewind(f1);
903 	rewind(f2);
904 	m = len[0];
905 	J[0] = 0;
906 	J[m + 1] = len[1] + 1;
907 	if (diff_format != D_EDIT) {
908 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
909 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
910 				i0++;
911 			j0 = J[i0 - 1] + 1;
912 			i1 = i0 - 1;
913 			while (i1 < m && J[i1 + 1] == 0)
914 				i1++;
915 			j1 = J[i1 + 1] - 1;
916 			J[i1] = j1;
917 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
918 		}
919 	} else {
920 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
921 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
922 				i0--;
923 			j0 = J[i0 + 1] - 1;
924 			i1 = i0 + 1;
925 			while (i1 > 1 && J[i1 - 1] == 0)
926 				i1--;
927 			j1 = J[i1 - 1] + 1;
928 			J[i1] = j1;
929 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
930 		}
931 	}
932 	if (m == 0)
933 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
934 	if (diff_format == D_IFDEF) {
935 		for (;;) {
936 #define	c i0
937 			if ((c = getc(f1)) == EOF)
938 				return;
939 			putchar(c);
940 		}
941 #undef c
942 	}
943 	if (anychange != 0) {
944 		if (diff_format == D_CONTEXT)
945 			dump_context_vec(f1, f2, flags);
946 		else if (diff_format == D_UNIFIED)
947 			dump_unified_vec(f1, f2, flags);
948 	}
949 }
950 
951 static void
952 range(int a, int b, char *separator)
953 {
954 	printf("%d", a > b ? b : a);
955 	if (a < b)
956 		printf("%s%d", separator, b);
957 }
958 
959 static void
960 uni_range(int a, int b)
961 {
962 	if (a < b)
963 		printf("%d,%d", a, b - a + 1);
964 	else if (a == b)
965 		printf("%d", b);
966 	else
967 		printf("%d,0", b);
968 }
969 
970 static char *
971 preadline(int fd, size_t rlen, off_t off)
972 {
973 	char *line;
974 	ssize_t nr;
975 
976 	line = xmalloc(rlen + 1);
977 	if ((nr = pread(fd, line, rlen, off)) < 0)
978 		err(1, "preadline");
979 	if (nr > 0 && line[nr-1] == '\n')
980 		nr--;
981 	line[nr] = '\0';
982 	return (line);
983 }
984 
985 static int
986 ignoreline(char *line)
987 {
988 	int ret;
989 
990 	ret = regexec(&ignore_re, line, 0, NULL, 0);
991 	xfree(line);
992 	return (ret == 0);	/* if it matched, it should be ignored. */
993 }
994 
995 /*
996  * Indicate that there is a difference between lines a and b of the from file
997  * to get to lines c to d of the to file.  If a is greater then b then there
998  * are no lines in the from file involved and this means that there were
999  * lines appended (beginning at b).  If c is greater than d then there are
1000  * lines missing from the to file.
1001  */
1002 static void
1003 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1004     int *pflags)
1005 {
1006 	static size_t max_context = 64;
1007 	int i;
1008 
1009 restart:
1010 	if (diff_format != D_IFDEF && a > b && c > d)
1011 		return;
1012 	if (ignore_pats != NULL) {
1013 		char *line;
1014 		/*
1015 		 * All lines in the change, insert, or delete must
1016 		 * match an ignore pattern for the change to be
1017 		 * ignored.
1018 		 */
1019 		if (a <= b) {		/* Changes and deletes. */
1020 			for (i = a; i <= b; i++) {
1021 				line = preadline(fileno(f1),
1022 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1023 				if (!ignoreline(line))
1024 					goto proceed;
1025 			}
1026 		}
1027 		if (a > b || c <= d) {	/* Changes and inserts. */
1028 			for (i = c; i <= d; i++) {
1029 				line = preadline(fileno(f2),
1030 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1031 				if (!ignoreline(line))
1032 					goto proceed;
1033 			}
1034 		}
1035 		return;
1036 	}
1037 proceed:
1038 	if (*pflags & D_HEADER) {
1039 		printf("%s %s %s\n", diffargs, file1, file2);
1040 		*pflags &= ~D_HEADER;
1041 	}
1042 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1043 		/*
1044 		 * Allocate change records as needed.
1045 		 */
1046 		if (context_vec_ptr == context_vec_end - 1) {
1047 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1048 			max_context <<= 1;
1049 			context_vec_start = xrealloc(context_vec_start,
1050 			    max_context, sizeof(*context_vec_start));
1051 			context_vec_end = context_vec_start + max_context;
1052 			context_vec_ptr = context_vec_start + offset;
1053 		}
1054 		if (anychange == 0) {
1055 			/*
1056 			 * Print the context/unidiff header first time through.
1057 			 */
1058 			print_header(file1, file2);
1059 			anychange = 1;
1060 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1061 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1062 			/*
1063 			 * If this change is more than 'diff_context' lines from the
1064 			 * previous change, dump the record and reset it.
1065 			 */
1066 			if (diff_format == D_CONTEXT)
1067 				dump_context_vec(f1, f2, *pflags);
1068 			else
1069 				dump_unified_vec(f1, f2, *pflags);
1070 		}
1071 		context_vec_ptr++;
1072 		context_vec_ptr->a = a;
1073 		context_vec_ptr->b = b;
1074 		context_vec_ptr->c = c;
1075 		context_vec_ptr->d = d;
1076 		return;
1077 	}
1078 	if (anychange == 0)
1079 		anychange = 1;
1080 	switch (diff_format) {
1081 	case D_BRIEF:
1082 		return;
1083 	case D_NORMAL:
1084 	case D_EDIT:
1085 		range(a, b, ",");
1086 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1087 		if (diff_format == D_NORMAL)
1088 			range(c, d, ",");
1089 		putchar('\n');
1090 		break;
1091 	case D_REVERSE:
1092 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1093 		range(a, b, " ");
1094 		putchar('\n');
1095 		break;
1096 	case D_NREVERSE:
1097 		if (a > b)
1098 			printf("a%d %d\n", b, d - c + 1);
1099 		else {
1100 			printf("d%d %d\n", a, b - a + 1);
1101 			if (!(c > d))
1102 				/* add changed lines */
1103 				printf("a%d %d\n", b, d - c + 1);
1104 		}
1105 		break;
1106 	}
1107 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1108 		fetch(ixold, a, b, f1, '<', 1, *pflags);
1109 		if (a <= b && c <= d && diff_format == D_NORMAL)
1110 			puts("---");
1111 	}
1112 	i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1113 	if (i != 0 && diff_format == D_EDIT) {
1114 		/*
1115 		 * A non-zero return value for D_EDIT indicates that the
1116 		 * last line printed was a bare dot (".") that has been
1117 		 * escaped as ".." to prevent ed(1) from misinterpreting
1118 		 * it.  We have to add a substitute command to change this
1119 		 * back and restart where we left off.
1120 		 */
1121 		puts(".");
1122 		printf("%ds/^\\.\\././\n", a);
1123 		a += i;
1124 		c += i;
1125 		goto restart;
1126 	}
1127 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1128 		puts(".");
1129 	if (inifdef) {
1130 		printf("#endif /* %s */\n", ifdefname);
1131 		inifdef = 0;
1132 	}
1133 }
1134 
1135 static int
1136 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1137 {
1138 	int i, j, c, lastc, col, nc;
1139 
1140 	/*
1141 	 * When doing #ifdef's, copy down to current line
1142 	 * if this is the first file, so that stuff makes it to output.
1143 	 */
1144 	if (diff_format == D_IFDEF && oldfile) {
1145 		long curpos = ftell(lb);
1146 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1147 		nc = f[a > b ? b : a - 1] - curpos;
1148 		for (i = 0; i < nc; i++)
1149 			putchar(getc(lb));
1150 	}
1151 	if (a > b)
1152 		return (0);
1153 	if (diff_format == D_IFDEF) {
1154 		if (inifdef) {
1155 			printf("#else /* %s%s */\n",
1156 			    oldfile == 1 ? "!" : "", ifdefname);
1157 		} else {
1158 			if (oldfile)
1159 				printf("#ifndef %s\n", ifdefname);
1160 			else
1161 				printf("#ifdef %s\n", ifdefname);
1162 		}
1163 		inifdef = 1 + oldfile;
1164 	}
1165 	for (i = a; i <= b; i++) {
1166 		fseek(lb, f[i - 1], SEEK_SET);
1167 		nc = f[i] - f[i - 1];
1168 		if (diff_format != D_IFDEF && ch != '\0') {
1169 			putchar(ch);
1170 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1171 			    || diff_format == D_UNIFIED))
1172 				putchar('\t');
1173 			else if (diff_format != D_UNIFIED)
1174 				putchar(' ');
1175 		}
1176 		col = 0;
1177 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1178 			if ((c = getc(lb)) == EOF) {
1179 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1180 				    diff_format == D_NREVERSE)
1181 					warnx("No newline at end of file");
1182 				else
1183 					puts("\n\\ No newline at end of file");
1184 				return (0);
1185 			}
1186 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1187 				do {
1188 					putchar(' ');
1189 				} while (++col & 7);
1190 			} else {
1191 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1192 				    && lastc == '.') {
1193 					/*
1194 					 * Don't print a bare "." line
1195 					 * since that will confuse ed(1).
1196 					 * Print ".." instead and return,
1197 					 * giving the caller an offset
1198 					 * from which to restart.
1199 					 */
1200 					puts(".");
1201 					return (i - a + 1);
1202 				}
1203 				putchar(c);
1204 				col++;
1205 			}
1206 		}
1207 	}
1208 	return (0);
1209 }
1210 
1211 /*
1212  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1213  */
1214 static int
1215 readhash(FILE *f, int flags)
1216 {
1217 	int i, t, space;
1218 	int sum;
1219 
1220 	sum = 1;
1221 	space = 0;
1222 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1223 		if (flags & D_IGNORECASE)
1224 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1225 				if (t == EOF) {
1226 					if (i == 0)
1227 						return (0);
1228 					break;
1229 				}
1230 				sum = sum * 127 + chrtran[t];
1231 			}
1232 		else
1233 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1234 				if (t == EOF) {
1235 					if (i == 0)
1236 						return (0);
1237 					break;
1238 				}
1239 				sum = sum * 127 + t;
1240 			}
1241 	} else {
1242 		for (i = 0;;) {
1243 			switch (t = getc(f)) {
1244 			case '\t':
1245 			case '\r':
1246 			case '\v':
1247 			case '\f':
1248 			case ' ':
1249 				space++;
1250 				continue;
1251 			default:
1252 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1253 					i++;
1254 					space = 0;
1255 				}
1256 				sum = sum * 127 + chrtran[t];
1257 				i++;
1258 				continue;
1259 			case EOF:
1260 				if (i == 0)
1261 					return (0);
1262 				/* FALLTHROUGH */
1263 			case '\n':
1264 				break;
1265 			}
1266 			break;
1267 		}
1268 	}
1269 	/*
1270 	 * There is a remote possibility that we end up with a zero sum.
1271 	 * Zero is used as an EOF marker, so return 1 instead.
1272 	 */
1273 	return (sum == 0 ? 1 : sum);
1274 }
1275 
1276 static int
1277 asciifile(FILE *f)
1278 {
1279 	unsigned char buf[BUFSIZ];
1280 	size_t i, cnt;
1281 
1282 	if (f == NULL)
1283 		return (1);
1284 
1285 	rewind(f);
1286 	cnt = fread(buf, 1, sizeof(buf), f);
1287 	for (i = 0; i < cnt; i++)
1288 		if (!isprint(buf[i]) && !isspace(buf[i]))
1289 			return (0);
1290 	return (1);
1291 }
1292 
1293 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1294 
1295 static char *
1296 match_function(const long *f, int pos, FILE *fp)
1297 {
1298 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1299 	size_t nc;
1300 	int last = lastline;
1301 	char *state = NULL;
1302 
1303 	lastline = pos;
1304 	while (pos > last) {
1305 		fseek(fp, f[pos - 1], SEEK_SET);
1306 		nc = f[pos] - f[pos - 1];
1307 		if (nc >= sizeof(buf))
1308 			nc = sizeof(buf) - 1;
1309 		nc = fread(buf, 1, nc, fp);
1310 		if (nc > 0) {
1311 			buf[nc] = '\0';
1312 			buf[strcspn(buf, "\n")] = '\0';
1313 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1314 				if (begins_with(buf, "private:")) {
1315 					if (!state)
1316 						state = " (private)";
1317 				} else if (begins_with(buf, "protected:")) {
1318 					if (!state)
1319 						state = " (protected)";
1320 				} else if (begins_with(buf, "public:")) {
1321 					if (!state)
1322 						state = " (public)";
1323 				} else {
1324 					strlcpy(lastbuf, buf, sizeof lastbuf);
1325 					if (state)
1326 						strlcat(lastbuf, state,
1327 						    sizeof lastbuf);
1328 					lastmatchline = pos;
1329 					return lastbuf;
1330 				}
1331 			}
1332 		}
1333 		pos--;
1334 	}
1335 	return lastmatchline > 0 ? lastbuf : NULL;
1336 }
1337 
1338 /* dump accumulated "context" diff changes */
1339 static void
1340 dump_context_vec(FILE *f1, FILE *f2, int flags)
1341 {
1342 	struct context_vec *cvp = context_vec_start;
1343 	int lowa, upb, lowc, upd, do_output;
1344 	int a, b, c, d;
1345 	char ch, *f;
1346 
1347 	if (context_vec_start > context_vec_ptr)
1348 		return;
1349 
1350 	b = d = 0;		/* gcc */
1351 	lowa = MAX(1, cvp->a - diff_context);
1352 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1353 	lowc = MAX(1, cvp->c - diff_context);
1354 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1355 
1356 	printf("***************");
1357 	if ((flags & D_PROTOTYPE)) {
1358 		f = match_function(ixold, lowa-1, f1);
1359 		if (f != NULL) {
1360 			putchar(' ');
1361 			fputs(f, stdout);
1362 		}
1363 	}
1364 	printf("\n*** ");
1365 	range(lowa, upb, ",");
1366 	printf(" ****\n");
1367 
1368 	/*
1369 	 * Output changes to the "old" file.  The first loop suppresses
1370 	 * output if there were no changes to the "old" file (we'll see
1371 	 * the "old" lines as context in the "new" list).
1372 	 */
1373 	do_output = 0;
1374 	for (; cvp <= context_vec_ptr; cvp++)
1375 		if (cvp->a <= cvp->b) {
1376 			cvp = context_vec_start;
1377 			do_output++;
1378 			break;
1379 		}
1380 	if (do_output) {
1381 		while (cvp <= context_vec_ptr) {
1382 			a = cvp->a;
1383 			b = cvp->b;
1384 			c = cvp->c;
1385 			d = cvp->d;
1386 
1387 			if (a <= b && c <= d)
1388 				ch = 'c';
1389 			else
1390 				ch = (a <= b) ? 'd' : 'a';
1391 
1392 			if (ch == 'a')
1393 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1394 			else {
1395 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1396 				fetch(ixold, a, b, f1,
1397 				    ch == 'c' ? '!' : '-', 0, flags);
1398 			}
1399 			lowa = b + 1;
1400 			cvp++;
1401 		}
1402 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1403 	}
1404 	/* output changes to the "new" file */
1405 	printf("--- ");
1406 	range(lowc, upd, ",");
1407 	printf(" ----\n");
1408 
1409 	do_output = 0;
1410 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1411 		if (cvp->c <= cvp->d) {
1412 			cvp = context_vec_start;
1413 			do_output++;
1414 			break;
1415 		}
1416 	if (do_output) {
1417 		while (cvp <= context_vec_ptr) {
1418 			a = cvp->a;
1419 			b = cvp->b;
1420 			c = cvp->c;
1421 			d = cvp->d;
1422 
1423 			if (a <= b && c <= d)
1424 				ch = 'c';
1425 			else
1426 				ch = (a <= b) ? 'd' : 'a';
1427 
1428 			if (ch == 'd')
1429 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1430 			else {
1431 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1432 				fetch(ixnew, c, d, f2,
1433 				    ch == 'c' ? '!' : '+', 0, flags);
1434 			}
1435 			lowc = d + 1;
1436 			cvp++;
1437 		}
1438 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1439 	}
1440 	context_vec_ptr = context_vec_start - 1;
1441 }
1442 
1443 /* dump accumulated "unified" diff changes */
1444 static void
1445 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1446 {
1447 	struct context_vec *cvp = context_vec_start;
1448 	int lowa, upb, lowc, upd;
1449 	int a, b, c, d;
1450 	char ch, *f;
1451 
1452 	if (context_vec_start > context_vec_ptr)
1453 		return;
1454 
1455 	b = d = 0;		/* gcc */
1456 	lowa = MAX(1, cvp->a - diff_context);
1457 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1458 	lowc = MAX(1, cvp->c - diff_context);
1459 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1460 
1461 	fputs("@@ -", stdout);
1462 	uni_range(lowa, upb);
1463 	fputs(" +", stdout);
1464 	uni_range(lowc, upd);
1465 	fputs(" @@", stdout);
1466 	if ((flags & D_PROTOTYPE)) {
1467 		f = match_function(ixold, lowa-1, f1);
1468 		if (f != NULL) {
1469 			putchar(' ');
1470 			fputs(f, stdout);
1471 		}
1472 	}
1473 	putchar('\n');
1474 
1475 	/*
1476 	 * Output changes in "unified" diff format--the old and new lines
1477 	 * are printed together.
1478 	 */
1479 	for (; cvp <= context_vec_ptr; cvp++) {
1480 		a = cvp->a;
1481 		b = cvp->b;
1482 		c = cvp->c;
1483 		d = cvp->d;
1484 
1485 		/*
1486 		 * c: both new and old changes
1487 		 * d: only changes in the old file
1488 		 * a: only changes in the new file
1489 		 */
1490 		if (a <= b && c <= d)
1491 			ch = 'c';
1492 		else
1493 			ch = (a <= b) ? 'd' : 'a';
1494 
1495 		switch (ch) {
1496 		case 'c':
1497 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1498 			fetch(ixold, a, b, f1, '-', 0, flags);
1499 			fetch(ixnew, c, d, f2, '+', 0, flags);
1500 			break;
1501 		case 'd':
1502 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1503 			fetch(ixold, a, b, f1, '-', 0, flags);
1504 			break;
1505 		case 'a':
1506 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1507 			fetch(ixnew, c, d, f2, '+', 0, flags);
1508 			break;
1509 		}
1510 		lowa = b + 1;
1511 		lowc = d + 1;
1512 	}
1513 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1514 
1515 	context_vec_ptr = context_vec_start - 1;
1516 }
1517 
1518 static void
1519 print_header(const char *file1, const char *file2)
1520 {
1521 	if (label[0] != NULL)
1522 		printf("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1523 		    label[0]);
1524 	else
1525 		printf("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
1526 		    file1, ctime(&stb1.st_mtime));
1527 	if (label[1] != NULL)
1528 		printf("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1529 		    label[1]);
1530 	else
1531 		printf("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
1532 		    file2, ctime(&stb2.st_mtime));
1533 }
1534