xref: /openbsd-src/usr.bin/diff/diffreg.c (revision bdcce04dfec768beba2c9a1232bb7f98efdeb0f6)
1 /*	$OpenBSD: diffreg.c,v 1.75 2010/07/14 22:15:57 ray Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/param.h>
68 #include <sys/stat.h>
69 #include <sys/wait.h>
70 
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <stddef.h>
76 #include <stdio.h>
77 #include <stdlib.h>
78 #include <string.h>
79 #include <unistd.h>
80 
81 #include "diff.h"
82 #include "pathnames.h"
83 #include "xmalloc.h"
84 
85 /*
86  * diff - compare two files.
87  */
88 
89 /*
90  *	Uses an algorithm due to Harold Stone, which finds
91  *	a pair of longest identical subsequences in the two
92  *	files.
93  *
94  *	The major goal is to generate the match vector J.
95  *	J[i] is the index of the line in file1 corresponding
96  *	to line i file0. J[i] = 0 if there is no
97  *	such line in file1.
98  *
99  *	Lines are hashed so as to work in core. All potential
100  *	matches are located by sorting the lines of each file
101  *	on the hash (called ``value''). In particular, this
102  *	collects the equivalence classes in file1 together.
103  *	Subroutine equiv replaces the value of each line in
104  *	file0 by the index of the first element of its
105  *	matching equivalence in (the reordered) file1.
106  *	To save space equiv squeezes file1 into a single
107  *	array member in which the equivalence classes
108  *	are simply concatenated, except that their first
109  *	members are flagged by changing sign.
110  *
111  *	Next the indices that point into member are unsorted into
112  *	array class according to the original order of file0.
113  *
114  *	The cleverness lies in routine stone. This marches
115  *	through the lines of file0, developing a vector klist
116  *	of "k-candidates". At step i a k-candidate is a matched
117  *	pair of lines x,y (x in file0 y in file1) such that
118  *	there is a common subsequence of length k
119  *	between the first i lines of file0 and the first y
120  *	lines of file1, but there is no such subsequence for
121  *	any smaller y. x is the earliest possible mate to y
122  *	that occurs in such a subsequence.
123  *
124  *	Whenever any of the members of the equivalence class of
125  *	lines in file1 matable to a line in file0 has serial number
126  *	less than the y of some k-candidate, that k-candidate
127  *	with the smallest such y is replaced. The new
128  *	k-candidate is chained (via pred) to the current
129  *	k-1 candidate so that the actual subsequence can
130  *	be recovered. When a member has serial number greater
131  *	that the y of all k-candidates, the klist is extended.
132  *	At the end, the longest subsequence is pulled out
133  *	and placed in the array J by unravel
134  *
135  *	With J in hand, the matches there recorded are
136  *	check'ed against reality to assure that no spurious
137  *	matches have crept in due to hashing. If they have,
138  *	they are broken, and "jackpot" is recorded--a harmless
139  *	matter except that a true match for a spuriously
140  *	mated line may now be unnecessarily reported as a change.
141  *
142  *	Much of the complexity of the program comes simply
143  *	from trying to minimize core utilization and
144  *	maximize the range of doable problems by dynamically
145  *	allocating what is needed and reusing what is not.
146  *	The core requirements for problems larger than somewhat
147  *	are (in words) 2*length(file0) + length(file1) +
148  *	3*(number of k-candidates installed),  typically about
149  *	6n words for files of length n.
150  */
151 
152 struct cand {
153 	int	x;
154 	int	y;
155 	int	pred;
156 };
157 
158 struct line {
159 	int	serial;
160 	int	value;
161 } *file[2];
162 
163 /*
164  * The following struct is used to record change information when
165  * doing a "context" or "unified" diff.  (see routine "change" to
166  * understand the highly mnemonic field names)
167  */
168 struct context_vec {
169 	int	a;		/* start line in old file */
170 	int	b;		/* end line in old file */
171 	int	c;		/* start line in new file */
172 	int	d;		/* end line in new file */
173 };
174 
175 static FILE	*opentemp(const char *);
176 static void	 output(char *, FILE *, char *, FILE *, int);
177 static void	 check(char *, FILE *, char *, FILE *, int);
178 static void	 range(int, int, char *);
179 static void	 uni_range(int, int);
180 static void	 dump_context_vec(FILE *, FILE *, int);
181 static void	 dump_unified_vec(FILE *, FILE *, int);
182 static void	 prepare(int, FILE *, off_t, int);
183 static void	 prune(void);
184 static void	 equiv(struct line *, int, struct line *, int, int *);
185 static void	 unravel(int);
186 static void	 unsort(struct line *, int, int *);
187 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
188 static void	 sort(struct line *, int);
189 static void	 print_header(const char *, const char *);
190 static int	 ignoreline(char *);
191 static int	 asciifile(FILE *);
192 static int	 fetch(long *, int, int, FILE *, int, int, int);
193 static int	 newcand(int, int, int);
194 static int	 search(int *, int, int);
195 static int	 skipline(FILE *);
196 static int	 isqrt(int);
197 static int	 stone(int *, int, int *, int *, int);
198 static int	 readhash(FILE *, int);
199 static int	 files_differ(FILE *, FILE *, int);
200 static char	*match_function(const long *, int, FILE *);
201 static char	*preadline(int, size_t, off_t);
202 
203 static int  *J;			/* will be overlaid on class */
204 static int  *class;		/* will be overlaid on file[0] */
205 static int  *klist;		/* will be overlaid on file[0] after class */
206 static int  *member;		/* will be overlaid on file[1] */
207 static int   clen;
208 static int   inifdef;		/* whether or not we are in a #ifdef block */
209 static int   len[2];
210 static int   pref, suff;	/* length of prefix and suffix */
211 static int   slen[2];
212 static int   anychange;
213 static long *ixnew;		/* will be overlaid on file[1] */
214 static long *ixold;		/* will be overlaid on klist */
215 static struct cand *clist;	/* merely a free storage pot for candidates */
216 static int   clistlen;		/* the length of clist */
217 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
218 static u_char *chrtran;		/* translation table for case-folding */
219 static struct context_vec *context_vec_start;
220 static struct context_vec *context_vec_end;
221 static struct context_vec *context_vec_ptr;
222 
223 #define FUNCTION_CONTEXT_SIZE	55
224 static char lastbuf[FUNCTION_CONTEXT_SIZE];
225 static int lastline;
226 static int lastmatchline;
227 
228 
229 /*
230  * chrtran points to one of 2 translation tables: cup2low if folding upper to
231  * lower case clow2low if not folding case
232  */
233 u_char clow2low[256] = {
234 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
235 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
236 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
237 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
238 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
239 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
240 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
241 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
242 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
243 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
244 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
245 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
246 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
247 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
248 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
249 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
250 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
251 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
252 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
253 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
254 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
255 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
256 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
257 	0xfd, 0xfe, 0xff
258 };
259 
260 u_char cup2low[256] = {
261 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
262 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
263 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
264 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
265 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
266 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
267 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
268 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
269 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
270 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
271 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
272 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
273 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
274 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
275 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
276 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
277 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
278 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
279 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
280 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
281 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
282 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
283 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
284 	0xfd, 0xfe, 0xff
285 };
286 
287 int
288 diffreg(char *file1, char *file2, int flags)
289 {
290 	FILE *f1, *f2;
291 	int i, rval, ostdout = -1;
292 	pid_t pid = -1;
293 
294 	f1 = f2 = NULL;
295 	rval = D_SAME;
296 	anychange = 0;
297 	lastline = 0;
298 	lastmatchline = 0;
299 	context_vec_ptr = context_vec_start - 1;
300 	if (flags & D_IGNORECASE)
301 		chrtran = cup2low;
302 	else
303 		chrtran = clow2low;
304 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
305 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
306 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
307 		goto closem;
308 
309 	if (flags & D_EMPTY1)
310 		f1 = fopen(_PATH_DEVNULL, "r");
311 	else {
312 		if (!S_ISREG(stb1.st_mode)) {
313 			if ((f1 = opentemp(file1)) == NULL ||
314 			    fstat(fileno(f1), &stb1) < 0) {
315 				warn("%s", file1);
316 				status |= 2;
317 				goto closem;
318 			}
319 		} else if (strcmp(file1, "-") == 0)
320 			f1 = stdin;
321 		else
322 			f1 = fopen(file1, "r");
323 	}
324 	if (f1 == NULL) {
325 		warn("%s", file1);
326 		status |= 2;
327 		goto closem;
328 	}
329 
330 	if (flags & D_EMPTY2)
331 		f2 = fopen(_PATH_DEVNULL, "r");
332 	else {
333 		if (!S_ISREG(stb2.st_mode)) {
334 			if ((f2 = opentemp(file2)) == NULL ||
335 			    fstat(fileno(f2), &stb2) < 0) {
336 				warn("%s", file2);
337 				status |= 2;
338 				goto closem;
339 			}
340 		} else if (strcmp(file2, "-") == 0)
341 			f2 = stdin;
342 		else
343 			f2 = fopen(file2, "r");
344 	}
345 	if (f2 == NULL) {
346 		warn("%s", file2);
347 		status |= 2;
348 		goto closem;
349 	}
350 
351 	switch (files_differ(f1, f2, flags)) {
352 	case 0:
353 		goto closem;
354 	case 1:
355 		break;
356 	default:
357 		/* error */
358 		status |= 2;
359 		goto closem;
360 	}
361 
362 	if ((flags & D_FORCEASCII) == 0 &&
363 	    (!asciifile(f1) || !asciifile(f2))) {
364 		rval = D_BINARY;
365 		status |= 1;
366 		goto closem;
367 	}
368 	if (lflag) {
369 		/* redirect stdout to pr */
370 		int pfd[2];
371 		char *header;
372 		char *prargv[] = { "pr", "-h", NULL, "-f", NULL };
373 
374 		xasprintf(&header, "%s %s %s", diffargs, file1, file2);
375 		prargv[2] = header;
376 		fflush(stdout);
377 		rewind(stdout);
378 		pipe(pfd);
379 		switch ((pid = fork())) {
380 		case -1:
381 			warnx("No more processes");
382 			status |= 2;
383 			xfree(header);
384 			return (D_ERROR);
385 		case 0:
386 			/* child */
387 			if (pfd[0] != STDIN_FILENO) {
388 				dup2(pfd[0], STDIN_FILENO);
389 				close(pfd[0]);
390 			}
391 			close(pfd[1]);
392 			execv(_PATH_PR, prargv);
393 			_exit(127);
394 		default:
395 			/* parent */
396 			if (pfd[1] != STDOUT_FILENO) {
397 				ostdout = dup(STDOUT_FILENO);
398 				dup2(pfd[1], STDOUT_FILENO);
399 				close(pfd[1]);
400 			}
401 			close(pfd[0]);
402 			rewind(stdout);
403 			xfree(header);
404 		}
405 	}
406 	prepare(0, f1, stb1.st_size, flags);
407 	prepare(1, f2, stb2.st_size, flags);
408 
409 	prune();
410 	sort(sfile[0], slen[0]);
411 	sort(sfile[1], slen[1]);
412 
413 	member = (int *)file[1];
414 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
415 	member = xrealloc(member, slen[1] + 2, sizeof(*member));
416 
417 	class = (int *)file[0];
418 	unsort(sfile[0], slen[0], class);
419 	class = xrealloc(class, slen[0] + 2, sizeof(*class));
420 
421 	klist = xcalloc(slen[0] + 2, sizeof(*klist));
422 	clen = 0;
423 	clistlen = 100;
424 	clist = xcalloc(clistlen, sizeof(*clist));
425 	i = stone(class, slen[0], member, klist, flags);
426 	xfree(member);
427 	xfree(class);
428 
429 	J = xrealloc(J, len[0] + 2, sizeof(*J));
430 	unravel(klist[i]);
431 	xfree(clist);
432 	xfree(klist);
433 
434 	ixold = xrealloc(ixold, len[0] + 2, sizeof(*ixold));
435 	ixnew = xrealloc(ixnew, len[1] + 2, sizeof(*ixnew));
436 	check(file1, f1, file2, f2, flags);
437 	output(file1, f1, file2, f2, flags);
438 	if (ostdout != -1) {
439 		int wstatus;
440 
441 		/* close the pipe to pr and restore stdout */
442 		fflush(stdout);
443 		rewind(stdout);
444 		if (ostdout != STDOUT_FILENO) {
445 			close(STDOUT_FILENO);
446 			dup2(ostdout, STDOUT_FILENO);
447 			close(ostdout);
448 		}
449 		waitpid(pid, &wstatus, 0);
450 	}
451 closem:
452 	if (anychange) {
453 		status |= 1;
454 		if (rval == D_SAME)
455 			rval = D_DIFFER;
456 	}
457 	if (f1 != NULL)
458 		fclose(f1);
459 	if (f2 != NULL)
460 		fclose(f2);
461 
462 	return (rval);
463 }
464 
465 /*
466  * Check to see if the given files differ.
467  * Returns 0 if they are the same, 1 if different, and -1 on error.
468  * XXX - could use code from cmp(1) [faster]
469  */
470 static int
471 files_differ(FILE *f1, FILE *f2, int flags)
472 {
473 	char buf1[BUFSIZ], buf2[BUFSIZ];
474 	size_t i, j;
475 
476 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
477 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
478 		return (1);
479 	for (;;) {
480 		i = fread(buf1, 1, sizeof(buf1), f1);
481 		j = fread(buf2, 1, sizeof(buf2), f2);
482 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
483 			return (-1);
484 		if (i != j)
485 			return (1);
486 		if (i == 0)
487 			return (0);
488 		if (memcmp(buf1, buf2, i) != 0)
489 			return (1);
490 	}
491 }
492 
493 static FILE *
494 opentemp(const char *file)
495 {
496 	char buf[BUFSIZ], *tempdir, tempfile[MAXPATHLEN];
497 	ssize_t nread;
498 	int ifd, ofd;
499 
500 	if (strcmp(file, "-") == 0)
501 		ifd = STDIN_FILENO;
502 	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
503 		return (NULL);
504 
505 	if ((tempdir = getenv("TMPDIR")) == NULL)
506 		tempdir = _PATH_TMP;
507 
508 	if (strlcpy(tempfile, tempdir, sizeof(tempfile)) >= sizeof(tempfile) ||
509 	    strlcat(tempfile, "/diff.XXXXXXXX", sizeof(tempfile)) >=
510 	    sizeof(tempfile)) {
511 		close(ifd);
512 		errno = ENAMETOOLONG;
513 		return (NULL);
514 	}
515 
516 	if ((ofd = mkstemp(tempfile)) < 0) {
517 		close(ifd);
518 		return (NULL);
519 	}
520 	unlink(tempfile);
521 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
522 		if (write(ofd, buf, nread) != nread) {
523 			close(ifd);
524 			close(ofd);
525 			return (NULL);
526 		}
527 	}
528 	close(ifd);
529 	lseek(ofd, (off_t)0, SEEK_SET);
530 	return (fdopen(ofd, "r"));
531 }
532 
533 char *
534 splice(char *dir, char *file)
535 {
536 	char *tail, *buf;
537 
538 	if ((tail = strrchr(file, '/')) == NULL)
539 		tail = file;
540 	else
541 		tail++;
542 	xasprintf(&buf, "%s/%s", dir, tail);
543 	return (buf);
544 }
545 
546 static void
547 prepare(int i, FILE *fd, off_t filesize, int flags)
548 {
549 	struct line *p;
550 	int j, h;
551 	size_t sz;
552 
553 	rewind(fd);
554 
555 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
556 	if (sz < 100)
557 		sz = 100;
558 
559 	p = xcalloc(sz + 3, sizeof(*p));
560 	for (j = 0; (h = readhash(fd, flags));) {
561 		if (j == sz) {
562 			sz = sz * 3 / 2;
563 			p = xrealloc(p, sz + 3, sizeof(*p));
564 		}
565 		p[++j].value = h;
566 	}
567 	len[i] = j;
568 	file[i] = p;
569 }
570 
571 static void
572 prune(void)
573 {
574 	int i, j;
575 
576 	for (pref = 0; pref < len[0] && pref < len[1] &&
577 	    file[0][pref + 1].value == file[1][pref + 1].value;
578 	    pref++)
579 		;
580 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
581 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
582 	    suff++)
583 		;
584 	for (j = 0; j < 2; j++) {
585 		sfile[j] = file[j] + pref;
586 		slen[j] = len[j] - pref - suff;
587 		for (i = 0; i <= slen[j]; i++)
588 			sfile[j][i].serial = i;
589 	}
590 }
591 
592 static void
593 equiv(struct line *a, int n, struct line *b, int m, int *c)
594 {
595 	int i, j;
596 
597 	i = j = 1;
598 	while (i <= n && j <= m) {
599 		if (a[i].value < b[j].value)
600 			a[i++].value = 0;
601 		else if (a[i].value == b[j].value)
602 			a[i++].value = j;
603 		else
604 			j++;
605 	}
606 	while (i <= n)
607 		a[i++].value = 0;
608 	b[m + 1].value = 0;
609 	j = 0;
610 	while (++j <= m) {
611 		c[j] = -b[j].serial;
612 		while (b[j + 1].value == b[j].value) {
613 			j++;
614 			c[j] = b[j].serial;
615 		}
616 	}
617 	c[j] = -1;
618 }
619 
620 /* Code taken from ping.c */
621 static int
622 isqrt(int n)
623 {
624 	int y, x = 1;
625 
626 	if (n == 0)
627 		return (0);
628 
629 	do { /* newton was a stinker */
630 		y = x;
631 		x = n / x;
632 		x += y;
633 		x /= 2;
634 	} while ((x - y) > 1 || (x - y) < -1);
635 
636 	return (x);
637 }
638 
639 static int
640 stone(int *a, int n, int *b, int *c, int flags)
641 {
642 	int i, k, y, j, l;
643 	int oldc, tc, oldl;
644 	u_int numtries;
645 
646 	/* XXX move the isqrt() out of the macro to avoid multiple calls */
647 	const u_int bound = (flags & D_MINIMAL) ? UINT_MAX :
648 	    MAX(256, isqrt(n));
649 
650 	k = 0;
651 	c[0] = newcand(0, 0, 0);
652 	for (i = 1; i <= n; i++) {
653 		j = a[i];
654 		if (j == 0)
655 			continue;
656 		y = -b[j];
657 		oldl = 0;
658 		oldc = c[0];
659 		numtries = 0;
660 		do {
661 			if (y <= clist[oldc].y)
662 				continue;
663 			l = search(c, k, y);
664 			if (l != oldl + 1)
665 				oldc = c[l - 1];
666 			if (l <= k) {
667 				if (clist[c[l]].y <= y)
668 					continue;
669 				tc = c[l];
670 				c[l] = newcand(i, y, oldc);
671 				oldc = tc;
672 				oldl = l;
673 				numtries++;
674 			} else {
675 				c[l] = newcand(i, y, oldc);
676 				k++;
677 				break;
678 			}
679 		} while ((y = b[++j]) > 0 && numtries < bound);
680 	}
681 	return (k);
682 }
683 
684 static int
685 newcand(int x, int y, int pred)
686 {
687 	struct cand *q;
688 
689 	if (clen == clistlen) {
690 		clistlen = clistlen * 11 / 10;
691 		clist = xrealloc(clist, clistlen, sizeof(*clist));
692 	}
693 	q = clist + clen;
694 	q->x = x;
695 	q->y = y;
696 	q->pred = pred;
697 	return (clen++);
698 }
699 
700 static int
701 search(int *c, int k, int y)
702 {
703 	int i, j, l, t;
704 
705 	if (clist[c[k]].y < y)	/* quick look for typical case */
706 		return (k + 1);
707 	i = 0;
708 	j = k + 1;
709 	for (;;) {
710 		l = (i + j) / 2;
711 		if (l <= i)
712 			break;
713 		t = clist[c[l]].y;
714 		if (t > y)
715 			j = l;
716 		else if (t < y)
717 			i = l;
718 		else
719 			return (l);
720 	}
721 	return (l + 1);
722 }
723 
724 static void
725 unravel(int p)
726 {
727 	struct cand *q;
728 	int i;
729 
730 	for (i = 0; i <= len[0]; i++)
731 		J[i] = i <= pref ? i :
732 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
733 	for (q = clist + p; q->y != 0; q = clist + q->pred)
734 		J[q->x + pref] = q->y + pref;
735 }
736 
737 /*
738  * Check does double duty:
739  *  1.	ferret out any fortuitous correspondences due
740  *	to confounding by hashing (which result in "jackpot")
741  *  2.  collect random access indexes to the two files
742  */
743 static void
744 check(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
745 {
746 	int i, j, jackpot, c, d;
747 	long ctold, ctnew;
748 
749 	rewind(f1);
750 	rewind(f2);
751 	j = 1;
752 	ixold[0] = ixnew[0] = 0;
753 	jackpot = 0;
754 	ctold = ctnew = 0;
755 	for (i = 1; i <= len[0]; i++) {
756 		if (J[i] == 0) {
757 			ixold[i] = ctold += skipline(f1);
758 			continue;
759 		}
760 		while (j < J[i]) {
761 			ixnew[j] = ctnew += skipline(f2);
762 			j++;
763 		}
764 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
765 			for (;;) {
766 				c = getc(f1);
767 				d = getc(f2);
768 				/*
769 				 * GNU diff ignores a missing newline
770 				 * in one file for -b or -w.
771 				 */
772 				if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) &&
773 				    ((c == EOF && d == '\n') ||
774 				    (c == '\n' && d == EOF))) {
775 					break;
776 				}
777 				ctold++;
778 				ctnew++;
779 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
780 				    isspace(d)) {
781 					do {
782 						if (c == '\n')
783 							break;
784 						ctold++;
785 					} while (isspace(c = getc(f1)));
786 					do {
787 						if (d == '\n')
788 							break;
789 						ctnew++;
790 					} while (isspace(d = getc(f2)));
791 				} else if ((flags & D_IGNOREBLANKS)) {
792 					while (isspace(c) && c != '\n') {
793 						c = getc(f1);
794 						ctold++;
795 					}
796 					while (isspace(d) && d != '\n') {
797 						d = getc(f2);
798 						ctnew++;
799 					}
800 				}
801 				if (chrtran[c] != chrtran[d]) {
802 					jackpot++;
803 					J[i] = 0;
804 					if (c != '\n' && c != EOF)
805 						ctold += skipline(f1);
806 					if (d != '\n' && c != EOF)
807 						ctnew += skipline(f2);
808 					break;
809 				}
810 				if (c == '\n' || c == EOF)
811 					break;
812 			}
813 		} else {
814 			for (;;) {
815 				ctold++;
816 				ctnew++;
817 				if ((c = getc(f1)) != (d = getc(f2))) {
818 					/* jackpot++; */
819 					J[i] = 0;
820 					if (c != '\n' && c != EOF)
821 						ctold += skipline(f1);
822 					if (d != '\n' && c != EOF)
823 						ctnew += skipline(f2);
824 					break;
825 				}
826 				if (c == '\n' || c == EOF)
827 					break;
828 			}
829 		}
830 		ixold[i] = ctold;
831 		ixnew[j] = ctnew;
832 		j++;
833 	}
834 	for (; j <= len[1]; j++)
835 		ixnew[j] = ctnew += skipline(f2);
836 	/*
837 	 * if (jackpot)
838 	 *	fprintf(stderr, "jackpot\n");
839 	 */
840 }
841 
842 /* shellsort CACM #201 */
843 static void
844 sort(struct line *a, int n)
845 {
846 	struct line *ai, *aim, w;
847 	int j, m = 0, k;
848 
849 	if (n == 0)
850 		return;
851 	for (j = 1; j <= n; j *= 2)
852 		m = 2 * j - 1;
853 	for (m /= 2; m != 0; m /= 2) {
854 		k = n - m;
855 		for (j = 1; j <= k; j++) {
856 			for (ai = &a[j]; ai > a; ai -= m) {
857 				aim = &ai[m];
858 				if (aim < ai)
859 					break;	/* wraparound */
860 				if (aim->value > ai[0].value ||
861 				    (aim->value == ai[0].value &&
862 					aim->serial > ai[0].serial))
863 					break;
864 				w.value = ai[0].value;
865 				ai[0].value = aim->value;
866 				aim->value = w.value;
867 				w.serial = ai[0].serial;
868 				ai[0].serial = aim->serial;
869 				aim->serial = w.serial;
870 			}
871 		}
872 	}
873 }
874 
875 static void
876 unsort(struct line *f, int l, int *b)
877 {
878 	int *a, i;
879 
880 	a = xcalloc(l + 1, sizeof(*a));
881 	for (i = 1; i <= l; i++)
882 		a[f[i].serial] = f[i].value;
883 	for (i = 1; i <= l; i++)
884 		b[i] = a[i];
885 	xfree(a);
886 }
887 
888 static int
889 skipline(FILE *f)
890 {
891 	int i, c;
892 
893 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
894 		continue;
895 	return (i);
896 }
897 
898 static void
899 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
900 {
901 	int m, i0, i1, j0, j1;
902 
903 	rewind(f1);
904 	rewind(f2);
905 	m = len[0];
906 	J[0] = 0;
907 	J[m + 1] = len[1] + 1;
908 	if (diff_format != D_EDIT) {
909 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
910 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
911 				i0++;
912 			j0 = J[i0 - 1] + 1;
913 			i1 = i0 - 1;
914 			while (i1 < m && J[i1 + 1] == 0)
915 				i1++;
916 			j1 = J[i1 + 1] - 1;
917 			J[i1] = j1;
918 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
919 		}
920 	} else {
921 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
922 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
923 				i0--;
924 			j0 = J[i0 + 1] - 1;
925 			i1 = i0 + 1;
926 			while (i1 > 1 && J[i1 - 1] == 0)
927 				i1--;
928 			j1 = J[i1 - 1] + 1;
929 			J[i1] = j1;
930 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
931 		}
932 	}
933 	if (m == 0)
934 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
935 	if (diff_format == D_IFDEF) {
936 		for (;;) {
937 #define	c i0
938 			if ((c = getc(f1)) == EOF)
939 				return;
940 			putchar(c);
941 		}
942 #undef c
943 	}
944 	if (anychange != 0) {
945 		if (diff_format == D_CONTEXT)
946 			dump_context_vec(f1, f2, flags);
947 		else if (diff_format == D_UNIFIED)
948 			dump_unified_vec(f1, f2, flags);
949 	}
950 }
951 
952 static void
953 range(int a, int b, char *separator)
954 {
955 	printf("%d", a > b ? b : a);
956 	if (a < b)
957 		printf("%s%d", separator, b);
958 }
959 
960 static void
961 uni_range(int a, int b)
962 {
963 	if (a < b)
964 		printf("%d,%d", a, b - a + 1);
965 	else if (a == b)
966 		printf("%d", b);
967 	else
968 		printf("%d,0", b);
969 }
970 
971 static char *
972 preadline(int fd, size_t rlen, off_t off)
973 {
974 	char *line;
975 	ssize_t nr;
976 
977 	line = xmalloc(rlen + 1);
978 	if ((nr = pread(fd, line, rlen, off)) < 0)
979 		err(1, "preadline");
980 	if (nr > 0 && line[nr-1] == '\n')
981 		nr--;
982 	line[nr] = '\0';
983 	return (line);
984 }
985 
986 static int
987 ignoreline(char *line)
988 {
989 	int ret;
990 
991 	ret = regexec(&ignore_re, line, 0, NULL, 0);
992 	xfree(line);
993 	return (ret == 0);	/* if it matched, it should be ignored. */
994 }
995 
996 /*
997  * Indicate that there is a difference between lines a and b of the from file
998  * to get to lines c to d of the to file.  If a is greater then b then there
999  * are no lines in the from file involved and this means that there were
1000  * lines appended (beginning at b).  If c is greater than d then there are
1001  * lines missing from the to file.
1002  */
1003 static void
1004 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1005     int *pflags)
1006 {
1007 	static size_t max_context = 64;
1008 	int i;
1009 
1010 restart:
1011 	if (diff_format != D_IFDEF && a > b && c > d)
1012 		return;
1013 	if (ignore_pats != NULL) {
1014 		char *line;
1015 		/*
1016 		 * All lines in the change, insert, or delete must
1017 		 * match an ignore pattern for the change to be
1018 		 * ignored.
1019 		 */
1020 		if (a <= b) {		/* Changes and deletes. */
1021 			for (i = a; i <= b; i++) {
1022 				line = preadline(fileno(f1),
1023 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1024 				if (!ignoreline(line))
1025 					goto proceed;
1026 			}
1027 		}
1028 		if (a > b || c <= d) {	/* Changes and inserts. */
1029 			for (i = c; i <= d; i++) {
1030 				line = preadline(fileno(f2),
1031 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1032 				if (!ignoreline(line))
1033 					goto proceed;
1034 			}
1035 		}
1036 		return;
1037 	}
1038 proceed:
1039 	if (*pflags & D_HEADER) {
1040 		printf("%s %s %s\n", diffargs, file1, file2);
1041 		*pflags &= ~D_HEADER;
1042 	}
1043 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1044 		/*
1045 		 * Allocate change records as needed.
1046 		 */
1047 		if (context_vec_ptr == context_vec_end - 1) {
1048 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1049 			max_context <<= 1;
1050 			context_vec_start = xrealloc(context_vec_start,
1051 			    max_context, sizeof(*context_vec_start));
1052 			context_vec_end = context_vec_start + max_context;
1053 			context_vec_ptr = context_vec_start + offset;
1054 		}
1055 		if (anychange == 0) {
1056 			/*
1057 			 * Print the context/unidiff header first time through.
1058 			 */
1059 			print_header(file1, file2);
1060 			anychange = 1;
1061 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1062 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1063 			/*
1064 			 * If this change is more than 'diff_context' lines from the
1065 			 * previous change, dump the record and reset it.
1066 			 */
1067 			if (diff_format == D_CONTEXT)
1068 				dump_context_vec(f1, f2, *pflags);
1069 			else
1070 				dump_unified_vec(f1, f2, *pflags);
1071 		}
1072 		context_vec_ptr++;
1073 		context_vec_ptr->a = a;
1074 		context_vec_ptr->b = b;
1075 		context_vec_ptr->c = c;
1076 		context_vec_ptr->d = d;
1077 		return;
1078 	}
1079 	if (anychange == 0)
1080 		anychange = 1;
1081 	switch (diff_format) {
1082 	case D_BRIEF:
1083 		return;
1084 	case D_NORMAL:
1085 	case D_EDIT:
1086 		range(a, b, ",");
1087 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1088 		if (diff_format == D_NORMAL)
1089 			range(c, d, ",");
1090 		putchar('\n');
1091 		break;
1092 	case D_REVERSE:
1093 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1094 		range(a, b, " ");
1095 		putchar('\n');
1096 		break;
1097 	case D_NREVERSE:
1098 		if (a > b)
1099 			printf("a%d %d\n", b, d - c + 1);
1100 		else {
1101 			printf("d%d %d\n", a, b - a + 1);
1102 			if (!(c > d))
1103 				/* add changed lines */
1104 				printf("a%d %d\n", b, d - c + 1);
1105 		}
1106 		break;
1107 	}
1108 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1109 		fetch(ixold, a, b, f1, '<', 1, *pflags);
1110 		if (a <= b && c <= d && diff_format == D_NORMAL)
1111 			puts("---");
1112 	}
1113 	i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1114 	if (i != 0 && diff_format == D_EDIT) {
1115 		/*
1116 		 * A non-zero return value for D_EDIT indicates that the
1117 		 * last line printed was a bare dot (".") that has been
1118 		 * escaped as ".." to prevent ed(1) from misinterpreting
1119 		 * it.  We have to add a substitute command to change this
1120 		 * back and restart where we left off.
1121 		 */
1122 		puts(".");
1123 		printf("%ds/^\\.\\././\n", a);
1124 		a += i;
1125 		c += i;
1126 		goto restart;
1127 	}
1128 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1129 		puts(".");
1130 	if (inifdef) {
1131 		printf("#endif /* %s */\n", ifdefname);
1132 		inifdef = 0;
1133 	}
1134 }
1135 
1136 static int
1137 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1138 {
1139 	int i, j, c, lastc, col, nc;
1140 
1141 	/*
1142 	 * When doing #ifdef's, copy down to current line
1143 	 * if this is the first file, so that stuff makes it to output.
1144 	 */
1145 	if (diff_format == D_IFDEF && oldfile) {
1146 		long curpos = ftell(lb);
1147 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1148 		nc = f[a > b ? b : a - 1] - curpos;
1149 		for (i = 0; i < nc; i++)
1150 			putchar(getc(lb));
1151 	}
1152 	if (a > b)
1153 		return (0);
1154 	if (diff_format == D_IFDEF) {
1155 		if (inifdef) {
1156 			printf("#else /* %s%s */\n",
1157 			    oldfile == 1 ? "!" : "", ifdefname);
1158 		} else {
1159 			if (oldfile)
1160 				printf("#ifndef %s\n", ifdefname);
1161 			else
1162 				printf("#ifdef %s\n", ifdefname);
1163 		}
1164 		inifdef = 1 + oldfile;
1165 	}
1166 	for (i = a; i <= b; i++) {
1167 		fseek(lb, f[i - 1], SEEK_SET);
1168 		nc = f[i] - f[i - 1];
1169 		if (diff_format != D_IFDEF && ch != '\0') {
1170 			putchar(ch);
1171 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1172 			    || diff_format == D_UNIFIED))
1173 				putchar('\t');
1174 			else if (diff_format != D_UNIFIED)
1175 				putchar(' ');
1176 		}
1177 		col = 0;
1178 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1179 			if ((c = getc(lb)) == EOF) {
1180 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1181 				    diff_format == D_NREVERSE)
1182 					warnx("No newline at end of file");
1183 				else
1184 					puts("\n\\ No newline at end of file");
1185 				return (0);
1186 			}
1187 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1188 				do {
1189 					putchar(' ');
1190 				} while (++col & 7);
1191 			} else {
1192 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1193 				    && lastc == '.') {
1194 					/*
1195 					 * Don't print a bare "." line
1196 					 * since that will confuse ed(1).
1197 					 * Print ".." instead and return,
1198 					 * giving the caller an offset
1199 					 * from which to restart.
1200 					 */
1201 					puts(".");
1202 					return (i - a + 1);
1203 				}
1204 				putchar(c);
1205 				col++;
1206 			}
1207 		}
1208 	}
1209 	return (0);
1210 }
1211 
1212 /*
1213  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1214  */
1215 static int
1216 readhash(FILE *f, int flags)
1217 {
1218 	int i, t, space;
1219 	int sum;
1220 
1221 	sum = 1;
1222 	space = 0;
1223 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1224 		if (flags & D_IGNORECASE)
1225 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1226 				if (t == EOF) {
1227 					if (i == 0)
1228 						return (0);
1229 					break;
1230 				}
1231 				sum = sum * 127 + chrtran[t];
1232 			}
1233 		else
1234 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1235 				if (t == EOF) {
1236 					if (i == 0)
1237 						return (0);
1238 					break;
1239 				}
1240 				sum = sum * 127 + t;
1241 			}
1242 	} else {
1243 		for (i = 0;;) {
1244 			switch (t = getc(f)) {
1245 			case '\t':
1246 			case '\r':
1247 			case '\v':
1248 			case '\f':
1249 			case ' ':
1250 				space++;
1251 				continue;
1252 			default:
1253 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1254 					i++;
1255 					space = 0;
1256 				}
1257 				sum = sum * 127 + chrtran[t];
1258 				i++;
1259 				continue;
1260 			case EOF:
1261 				if (i == 0)
1262 					return (0);
1263 				/* FALLTHROUGH */
1264 			case '\n':
1265 				break;
1266 			}
1267 			break;
1268 		}
1269 	}
1270 	/*
1271 	 * There is a remote possibility that we end up with a zero sum.
1272 	 * Zero is used as an EOF marker, so return 1 instead.
1273 	 */
1274 	return (sum == 0 ? 1 : sum);
1275 }
1276 
1277 static int
1278 asciifile(FILE *f)
1279 {
1280 	unsigned char buf[BUFSIZ];
1281 	size_t i, cnt;
1282 
1283 	if (f == NULL)
1284 		return (1);
1285 
1286 	rewind(f);
1287 	cnt = fread(buf, 1, sizeof(buf), f);
1288 	for (i = 0; i < cnt; i++)
1289 		if (!isprint(buf[i]) && !isspace(buf[i]))
1290 			return (0);
1291 	return (1);
1292 }
1293 
1294 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1295 
1296 static char *
1297 match_function(const long *f, int pos, FILE *fp)
1298 {
1299 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1300 	size_t nc;
1301 	int last = lastline;
1302 	char *state = NULL;
1303 
1304 	lastline = pos;
1305 	while (pos > last) {
1306 		fseek(fp, f[pos - 1], SEEK_SET);
1307 		nc = f[pos] - f[pos - 1];
1308 		if (nc >= sizeof(buf))
1309 			nc = sizeof(buf) - 1;
1310 		nc = fread(buf, 1, nc, fp);
1311 		if (nc > 0) {
1312 			buf[nc] = '\0';
1313 			buf[strcspn(buf, "\n")] = '\0';
1314 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1315 				if (begins_with(buf, "private:")) {
1316 					if (!state)
1317 						state = " (private)";
1318 				} else if (begins_with(buf, "protected:")) {
1319 					if (!state)
1320 						state = " (protected)";
1321 				} else if (begins_with(buf, "public:")) {
1322 					if (!state)
1323 						state = " (public)";
1324 				} else {
1325 					strlcpy(lastbuf, buf, sizeof lastbuf);
1326 					if (state)
1327 						strlcat(lastbuf, state,
1328 						    sizeof lastbuf);
1329 					lastmatchline = pos;
1330 					return lastbuf;
1331 				}
1332 			}
1333 		}
1334 		pos--;
1335 	}
1336 	return lastmatchline > 0 ? lastbuf : NULL;
1337 }
1338 
1339 /* dump accumulated "context" diff changes */
1340 static void
1341 dump_context_vec(FILE *f1, FILE *f2, int flags)
1342 {
1343 	struct context_vec *cvp = context_vec_start;
1344 	int lowa, upb, lowc, upd, do_output;
1345 	int a, b, c, d;
1346 	char ch, *f;
1347 
1348 	if (context_vec_start > context_vec_ptr)
1349 		return;
1350 
1351 	b = d = 0;		/* gcc */
1352 	lowa = MAX(1, cvp->a - diff_context);
1353 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1354 	lowc = MAX(1, cvp->c - diff_context);
1355 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1356 
1357 	printf("***************");
1358 	if ((flags & D_PROTOTYPE)) {
1359 		f = match_function(ixold, lowa-1, f1);
1360 		if (f != NULL) {
1361 			putchar(' ');
1362 			fputs(f, stdout);
1363 		}
1364 	}
1365 	printf("\n*** ");
1366 	range(lowa, upb, ",");
1367 	printf(" ****\n");
1368 
1369 	/*
1370 	 * Output changes to the "old" file.  The first loop suppresses
1371 	 * output if there were no changes to the "old" file (we'll see
1372 	 * the "old" lines as context in the "new" list).
1373 	 */
1374 	do_output = 0;
1375 	for (; cvp <= context_vec_ptr; cvp++)
1376 		if (cvp->a <= cvp->b) {
1377 			cvp = context_vec_start;
1378 			do_output++;
1379 			break;
1380 		}
1381 	if (do_output) {
1382 		while (cvp <= context_vec_ptr) {
1383 			a = cvp->a;
1384 			b = cvp->b;
1385 			c = cvp->c;
1386 			d = cvp->d;
1387 
1388 			if (a <= b && c <= d)
1389 				ch = 'c';
1390 			else
1391 				ch = (a <= b) ? 'd' : 'a';
1392 
1393 			if (ch == 'a')
1394 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1395 			else {
1396 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1397 				fetch(ixold, a, b, f1,
1398 				    ch == 'c' ? '!' : '-', 0, flags);
1399 			}
1400 			lowa = b + 1;
1401 			cvp++;
1402 		}
1403 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1404 	}
1405 	/* output changes to the "new" file */
1406 	printf("--- ");
1407 	range(lowc, upd, ",");
1408 	printf(" ----\n");
1409 
1410 	do_output = 0;
1411 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1412 		if (cvp->c <= cvp->d) {
1413 			cvp = context_vec_start;
1414 			do_output++;
1415 			break;
1416 		}
1417 	if (do_output) {
1418 		while (cvp <= context_vec_ptr) {
1419 			a = cvp->a;
1420 			b = cvp->b;
1421 			c = cvp->c;
1422 			d = cvp->d;
1423 
1424 			if (a <= b && c <= d)
1425 				ch = 'c';
1426 			else
1427 				ch = (a <= b) ? 'd' : 'a';
1428 
1429 			if (ch == 'd')
1430 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1431 			else {
1432 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1433 				fetch(ixnew, c, d, f2,
1434 				    ch == 'c' ? '!' : '+', 0, flags);
1435 			}
1436 			lowc = d + 1;
1437 			cvp++;
1438 		}
1439 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1440 	}
1441 	context_vec_ptr = context_vec_start - 1;
1442 }
1443 
1444 /* dump accumulated "unified" diff changes */
1445 static void
1446 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1447 {
1448 	struct context_vec *cvp = context_vec_start;
1449 	int lowa, upb, lowc, upd;
1450 	int a, b, c, d;
1451 	char ch, *f;
1452 
1453 	if (context_vec_start > context_vec_ptr)
1454 		return;
1455 
1456 	b = d = 0;		/* gcc */
1457 	lowa = MAX(1, cvp->a - diff_context);
1458 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1459 	lowc = MAX(1, cvp->c - diff_context);
1460 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1461 
1462 	fputs("@@ -", stdout);
1463 	uni_range(lowa, upb);
1464 	fputs(" +", stdout);
1465 	uni_range(lowc, upd);
1466 	fputs(" @@", stdout);
1467 	if ((flags & D_PROTOTYPE)) {
1468 		f = match_function(ixold, lowa-1, f1);
1469 		if (f != NULL) {
1470 			putchar(' ');
1471 			fputs(f, stdout);
1472 		}
1473 	}
1474 	putchar('\n');
1475 
1476 	/*
1477 	 * Output changes in "unified" diff format--the old and new lines
1478 	 * are printed together.
1479 	 */
1480 	for (; cvp <= context_vec_ptr; cvp++) {
1481 		a = cvp->a;
1482 		b = cvp->b;
1483 		c = cvp->c;
1484 		d = cvp->d;
1485 
1486 		/*
1487 		 * c: both new and old changes
1488 		 * d: only changes in the old file
1489 		 * a: only changes in the new file
1490 		 */
1491 		if (a <= b && c <= d)
1492 			ch = 'c';
1493 		else
1494 			ch = (a <= b) ? 'd' : 'a';
1495 
1496 		switch (ch) {
1497 		case 'c':
1498 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1499 			fetch(ixold, a, b, f1, '-', 0, flags);
1500 			fetch(ixnew, c, d, f2, '+', 0, flags);
1501 			break;
1502 		case 'd':
1503 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1504 			fetch(ixold, a, b, f1, '-', 0, flags);
1505 			break;
1506 		case 'a':
1507 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1508 			fetch(ixnew, c, d, f2, '+', 0, flags);
1509 			break;
1510 		}
1511 		lowa = b + 1;
1512 		lowc = d + 1;
1513 	}
1514 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1515 
1516 	context_vec_ptr = context_vec_start - 1;
1517 }
1518 
1519 static void
1520 print_header(const char *file1, const char *file2)
1521 {
1522 	if (label[0] != NULL)
1523 		printf("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1524 		    label[0]);
1525 	else
1526 		printf("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
1527 		    file1, ctime(&stb1.st_mtime));
1528 	if (label[1] != NULL)
1529 		printf("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1530 		    label[1]);
1531 	else
1532 		printf("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
1533 		    file2, ctime(&stb2.st_mtime));
1534 }
1535