xref: /openbsd-src/usr.bin/diff/diffreg.c (revision cd1eb269cafb12c415be1749cd4a4b5422710415)
1 /*	$OpenBSD: diffreg.c,v 1.74 2010/03/22 19:33:19 schwarze Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/param.h>
68 #include <sys/stat.h>
69 #include <sys/wait.h>
70 
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <stddef.h>
76 #include <stdio.h>
77 #include <stdlib.h>
78 #include <string.h>
79 #include <unistd.h>
80 
81 #include "diff.h"
82 #include "pathnames.h"
83 #include "xmalloc.h"
84 
85 /*
86  * diff - compare two files.
87  */
88 
89 /*
90  *	Uses an algorithm due to Harold Stone, which finds
91  *	a pair of longest identical subsequences in the two
92  *	files.
93  *
94  *	The major goal is to generate the match vector J.
95  *	J[i] is the index of the line in file1 corresponding
96  *	to line i file0. J[i] = 0 if there is no
97  *	such line in file1.
98  *
99  *	Lines are hashed so as to work in core. All potential
100  *	matches are located by sorting the lines of each file
101  *	on the hash (called ``value''). In particular, this
102  *	collects the equivalence classes in file1 together.
103  *	Subroutine equiv replaces the value of each line in
104  *	file0 by the index of the first element of its
105  *	matching equivalence in (the reordered) file1.
106  *	To save space equiv squeezes file1 into a single
107  *	array member in which the equivalence classes
108  *	are simply concatenated, except that their first
109  *	members are flagged by changing sign.
110  *
111  *	Next the indices that point into member are unsorted into
112  *	array class according to the original order of file0.
113  *
114  *	The cleverness lies in routine stone. This marches
115  *	through the lines of file0, developing a vector klist
116  *	of "k-candidates". At step i a k-candidate is a matched
117  *	pair of lines x,y (x in file0 y in file1) such that
118  *	there is a common subsequence of length k
119  *	between the first i lines of file0 and the first y
120  *	lines of file1, but there is no such subsequence for
121  *	any smaller y. x is the earliest possible mate to y
122  *	that occurs in such a subsequence.
123  *
124  *	Whenever any of the members of the equivalence class of
125  *	lines in file1 matable to a line in file0 has serial number
126  *	less than the y of some k-candidate, that k-candidate
127  *	with the smallest such y is replaced. The new
128  *	k-candidate is chained (via pred) to the current
129  *	k-1 candidate so that the actual subsequence can
130  *	be recovered. When a member has serial number greater
131  *	that the y of all k-candidates, the klist is extended.
132  *	At the end, the longest subsequence is pulled out
133  *	and placed in the array J by unravel
134  *
135  *	With J in hand, the matches there recorded are
136  *	check'ed against reality to assure that no spurious
137  *	matches have crept in due to hashing. If they have,
138  *	they are broken, and "jackpot" is recorded--a harmless
139  *	matter except that a true match for a spuriously
140  *	mated line may now be unnecessarily reported as a change.
141  *
142  *	Much of the complexity of the program comes simply
143  *	from trying to minimize core utilization and
144  *	maximize the range of doable problems by dynamically
145  *	allocating what is needed and reusing what is not.
146  *	The core requirements for problems larger than somewhat
147  *	are (in words) 2*length(file0) + length(file1) +
148  *	3*(number of k-candidates installed),  typically about
149  *	6n words for files of length n.
150  */
151 
152 struct cand {
153 	int	x;
154 	int	y;
155 	int	pred;
156 };
157 
158 struct line {
159 	int	serial;
160 	int	value;
161 } *file[2];
162 
163 /*
164  * The following struct is used to record change information when
165  * doing a "context" or "unified" diff.  (see routine "change" to
166  * understand the highly mnemonic field names)
167  */
168 struct context_vec {
169 	int	a;		/* start line in old file */
170 	int	b;		/* end line in old file */
171 	int	c;		/* start line in new file */
172 	int	d;		/* end line in new file */
173 };
174 
175 static FILE	*opentemp(const char *);
176 static void	 output(char *, FILE *, char *, FILE *, int);
177 static void	 check(char *, FILE *, char *, FILE *, int);
178 static void	 range(int, int, char *);
179 static void	 uni_range(int, int);
180 static void	 dump_context_vec(FILE *, FILE *, int);
181 static void	 dump_unified_vec(FILE *, FILE *, int);
182 static void	 prepare(int, FILE *, off_t, int);
183 static void	 prune(void);
184 static void	 equiv(struct line *, int, struct line *, int, int *);
185 static void	 unravel(int);
186 static void	 unsort(struct line *, int, int *);
187 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
188 static void	 sort(struct line *, int);
189 static void	 print_header(const char *, const char *);
190 static int	 ignoreline(char *);
191 static int	 asciifile(FILE *);
192 static int	 fetch(long *, int, int, FILE *, int, int, int);
193 static int	 newcand(int, int, int);
194 static int	 search(int *, int, int);
195 static int	 skipline(FILE *);
196 static int	 isqrt(int);
197 static int	 stone(int *, int, int *, int *, int);
198 static int	 readhash(FILE *, int);
199 static int	 files_differ(FILE *, FILE *, int);
200 static char	*match_function(const long *, int, FILE *);
201 static char	*preadline(int, size_t, off_t);
202 
203 static int  *J;			/* will be overlaid on class */
204 static int  *class;		/* will be overlaid on file[0] */
205 static int  *klist;		/* will be overlaid on file[0] after class */
206 static int  *member;		/* will be overlaid on file[1] */
207 static int   clen;
208 static int   inifdef;		/* whether or not we are in a #ifdef block */
209 static int   len[2];
210 static int   pref, suff;	/* length of prefix and suffix */
211 static int   slen[2];
212 static int   anychange;
213 static long *ixnew;		/* will be overlaid on file[1] */
214 static long *ixold;		/* will be overlaid on klist */
215 static struct cand *clist;	/* merely a free storage pot for candidates */
216 static int   clistlen;		/* the length of clist */
217 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
218 static u_char *chrtran;		/* translation table for case-folding */
219 static struct context_vec *context_vec_start;
220 static struct context_vec *context_vec_end;
221 static struct context_vec *context_vec_ptr;
222 
223 #define FUNCTION_CONTEXT_SIZE	55
224 static char lastbuf[FUNCTION_CONTEXT_SIZE];
225 static int lastline;
226 static int lastmatchline;
227 
228 
229 /*
230  * chrtran points to one of 2 translation tables: cup2low if folding upper to
231  * lower case clow2low if not folding case
232  */
233 u_char clow2low[256] = {
234 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
235 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
236 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
237 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
238 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
239 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
240 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
241 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
242 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
243 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
244 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
245 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
246 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
247 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
248 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
249 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
250 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
251 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
252 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
253 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
254 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
255 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
256 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
257 	0xfd, 0xfe, 0xff
258 };
259 
260 u_char cup2low[256] = {
261 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
262 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
263 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
264 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
265 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
266 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
267 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
268 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
269 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
270 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
271 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
272 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
273 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
274 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
275 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
276 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
277 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
278 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
279 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
280 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
281 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
282 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
283 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
284 	0xfd, 0xfe, 0xff
285 };
286 
287 int
288 diffreg(char *file1, char *file2, int flags)
289 {
290 	FILE *f1, *f2;
291 	int i, rval, ostdout = -1;
292 	pid_t pid = -1;
293 
294 	f1 = f2 = NULL;
295 	rval = D_SAME;
296 	anychange = 0;
297 	lastline = 0;
298 	lastmatchline = 0;
299 	context_vec_ptr = context_vec_start - 1;
300 	if (flags & D_IGNORECASE)
301 		chrtran = cup2low;
302 	else
303 		chrtran = clow2low;
304 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
305 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
306 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
307 		goto closem;
308 
309 	if (flags & D_EMPTY1)
310 		f1 = fopen(_PATH_DEVNULL, "r");
311 	else {
312 		if (!S_ISREG(stb1.st_mode)) {
313 			if ((f1 = opentemp(file1)) == NULL ||
314 			    fstat(fileno(f1), &stb1) < 0) {
315 				warn("%s", file1);
316 				status |= 2;
317 				goto closem;
318 			}
319 		} else if (strcmp(file1, "-") == 0)
320 			f1 = stdin;
321 		else
322 			f1 = fopen(file1, "r");
323 	}
324 	if (f1 == NULL) {
325 		warn("%s", file1);
326 		status |= 2;
327 		goto closem;
328 	}
329 
330 	if (flags & D_EMPTY2)
331 		f2 = fopen(_PATH_DEVNULL, "r");
332 	else {
333 		if (!S_ISREG(stb2.st_mode)) {
334 			if ((f2 = opentemp(file2)) == NULL ||
335 			    fstat(fileno(f2), &stb2) < 0) {
336 				warn("%s", file2);
337 				status |= 2;
338 				goto closem;
339 			}
340 		} else if (strcmp(file2, "-") == 0)
341 			f2 = stdin;
342 		else
343 			f2 = fopen(file2, "r");
344 	}
345 	if (f2 == NULL) {
346 		warn("%s", file2);
347 		status |= 2;
348 		goto closem;
349 	}
350 
351 	switch (files_differ(f1, f2, flags)) {
352 	case 0:
353 		goto closem;
354 	case 1:
355 		break;
356 	default:
357 		/* error */
358 		status |= 2;
359 		goto closem;
360 	}
361 
362 	if ((flags & D_FORCEASCII) == 0 &&
363 	    (!asciifile(f1) || !asciifile(f2))) {
364 		rval = D_BINARY;
365 		status |= 1;
366 		goto closem;
367 	}
368 	if (lflag) {
369 		/* redirect stdout to pr */
370 		int pfd[2];
371 		char *header;
372 		char *prargv[] = { "pr", "-h", NULL, "-f", NULL };
373 
374 		xasprintf(&header, "%s %s %s", diffargs, file1, file2);
375 		prargv[2] = header;
376 		fflush(stdout);
377 		rewind(stdout);
378 		pipe(pfd);
379 		switch ((pid = fork())) {
380 		case -1:
381 			warnx("No more processes");
382 			status |= 2;
383 			xfree(header);
384 			return (D_ERROR);
385 		case 0:
386 			/* child */
387 			if (pfd[0] != STDIN_FILENO) {
388 				dup2(pfd[0], STDIN_FILENO);
389 				close(pfd[0]);
390 			}
391 			close(pfd[1]);
392 			execv(_PATH_PR, prargv);
393 			_exit(127);
394 		default:
395 			/* parent */
396 			if (pfd[1] != STDOUT_FILENO) {
397 				ostdout = dup(STDOUT_FILENO);
398 				dup2(pfd[1], STDOUT_FILENO);
399 				close(pfd[1]);
400 			}
401 			close(pfd[0]);
402 			rewind(stdout);
403 			xfree(header);
404 		}
405 	}
406 	prepare(0, f1, stb1.st_size, flags);
407 	prepare(1, f2, stb2.st_size, flags);
408 
409 	prune();
410 	sort(sfile[0], slen[0]);
411 	sort(sfile[1], slen[1]);
412 
413 	member = (int *)file[1];
414 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
415 	member = xrealloc(member, slen[1] + 2, sizeof(*member));
416 
417 	class = (int *)file[0];
418 	unsort(sfile[0], slen[0], class);
419 	class = xrealloc(class, slen[0] + 2, sizeof(*class));
420 
421 	klist = xcalloc(slen[0] + 2, sizeof(*klist));
422 	clen = 0;
423 	clistlen = 100;
424 	clist = xcalloc(clistlen, sizeof(*clist));
425 	i = stone(class, slen[0], member, klist, flags);
426 	xfree(member);
427 	xfree(class);
428 
429 	J = xrealloc(J, len[0] + 2, sizeof(*J));
430 	unravel(klist[i]);
431 	xfree(clist);
432 	xfree(klist);
433 
434 	ixold = xrealloc(ixold, len[0] + 2, sizeof(*ixold));
435 	ixnew = xrealloc(ixnew, len[1] + 2, sizeof(*ixnew));
436 	check(file1, f1, file2, f2, flags);
437 	output(file1, f1, file2, f2, flags);
438 	if (ostdout != -1) {
439 		int wstatus;
440 
441 		/* close the pipe to pr and restore stdout */
442 		fflush(stdout);
443 		rewind(stdout);
444 		if (ostdout != STDOUT_FILENO) {
445 			close(STDOUT_FILENO);
446 			dup2(ostdout, STDOUT_FILENO);
447 			close(ostdout);
448 		}
449 		waitpid(pid, &wstatus, 0);
450 	}
451 closem:
452 	if (anychange) {
453 		status |= 1;
454 		if (rval == D_SAME)
455 			rval = D_DIFFER;
456 	}
457 	if (f1 != NULL)
458 		fclose(f1);
459 	if (f2 != NULL)
460 		fclose(f2);
461 
462 	return (rval);
463 }
464 
465 /*
466  * Check to see if the given files differ.
467  * Returns 0 if they are the same, 1 if different, and -1 on error.
468  * XXX - could use code from cmp(1) [faster]
469  */
470 static int
471 files_differ(FILE *f1, FILE *f2, int flags)
472 {
473 	char buf1[BUFSIZ], buf2[BUFSIZ];
474 	size_t i, j;
475 
476 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
477 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
478 		return (1);
479 	for (;;) {
480 		i = fread(buf1, 1, sizeof(buf1), f1);
481 		j = fread(buf2, 1, sizeof(buf2), f2);
482 		if (i != j)
483 			return (1);
484 		if (i == 0 && j == 0) {
485 			if (ferror(f1) || ferror(f2))
486 				return (1);
487 			return (0);
488 		}
489 		if (memcmp(buf1, buf2, i) != 0)
490 			return (1);
491 	}
492 }
493 
494 static FILE *
495 opentemp(const char *file)
496 {
497 	char buf[BUFSIZ], *tempdir, tempfile[MAXPATHLEN];
498 	ssize_t nread;
499 	int ifd, ofd;
500 
501 	if (strcmp(file, "-") == 0)
502 		ifd = STDIN_FILENO;
503 	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
504 		return (NULL);
505 
506 	if ((tempdir = getenv("TMPDIR")) == NULL)
507 		tempdir = _PATH_TMP;
508 
509 	if (strlcpy(tempfile, tempdir, sizeof(tempfile)) >= sizeof(tempfile) ||
510 	    strlcat(tempfile, "/diff.XXXXXXXX", sizeof(tempfile)) >=
511 	    sizeof(tempfile)) {
512 		close(ifd);
513 		errno = ENAMETOOLONG;
514 		return (NULL);
515 	}
516 
517 	if ((ofd = mkstemp(tempfile)) < 0) {
518 		close(ifd);
519 		return (NULL);
520 	}
521 	unlink(tempfile);
522 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
523 		if (write(ofd, buf, nread) != nread) {
524 			close(ifd);
525 			close(ofd);
526 			return (NULL);
527 		}
528 	}
529 	close(ifd);
530 	lseek(ofd, (off_t)0, SEEK_SET);
531 	return (fdopen(ofd, "r"));
532 }
533 
534 char *
535 splice(char *dir, char *file)
536 {
537 	char *tail, *buf;
538 
539 	if ((tail = strrchr(file, '/')) == NULL)
540 		tail = file;
541 	else
542 		tail++;
543 	xasprintf(&buf, "%s/%s", dir, tail);
544 	return (buf);
545 }
546 
547 static void
548 prepare(int i, FILE *fd, off_t filesize, int flags)
549 {
550 	struct line *p;
551 	int j, h;
552 	size_t sz;
553 
554 	rewind(fd);
555 
556 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
557 	if (sz < 100)
558 		sz = 100;
559 
560 	p = xcalloc(sz + 3, sizeof(*p));
561 	for (j = 0; (h = readhash(fd, flags));) {
562 		if (j == sz) {
563 			sz = sz * 3 / 2;
564 			p = xrealloc(p, sz + 3, sizeof(*p));
565 		}
566 		p[++j].value = h;
567 	}
568 	len[i] = j;
569 	file[i] = p;
570 }
571 
572 static void
573 prune(void)
574 {
575 	int i, j;
576 
577 	for (pref = 0; pref < len[0] && pref < len[1] &&
578 	    file[0][pref + 1].value == file[1][pref + 1].value;
579 	    pref++)
580 		;
581 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
582 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
583 	    suff++)
584 		;
585 	for (j = 0; j < 2; j++) {
586 		sfile[j] = file[j] + pref;
587 		slen[j] = len[j] - pref - suff;
588 		for (i = 0; i <= slen[j]; i++)
589 			sfile[j][i].serial = i;
590 	}
591 }
592 
593 static void
594 equiv(struct line *a, int n, struct line *b, int m, int *c)
595 {
596 	int i, j;
597 
598 	i = j = 1;
599 	while (i <= n && j <= m) {
600 		if (a[i].value < b[j].value)
601 			a[i++].value = 0;
602 		else if (a[i].value == b[j].value)
603 			a[i++].value = j;
604 		else
605 			j++;
606 	}
607 	while (i <= n)
608 		a[i++].value = 0;
609 	b[m + 1].value = 0;
610 	j = 0;
611 	while (++j <= m) {
612 		c[j] = -b[j].serial;
613 		while (b[j + 1].value == b[j].value) {
614 			j++;
615 			c[j] = b[j].serial;
616 		}
617 	}
618 	c[j] = -1;
619 }
620 
621 /* Code taken from ping.c */
622 static int
623 isqrt(int n)
624 {
625 	int y, x = 1;
626 
627 	if (n == 0)
628 		return (0);
629 
630 	do { /* newton was a stinker */
631 		y = x;
632 		x = n / x;
633 		x += y;
634 		x /= 2;
635 	} while ((x - y) > 1 || (x - y) < -1);
636 
637 	return (x);
638 }
639 
640 static int
641 stone(int *a, int n, int *b, int *c, int flags)
642 {
643 	int i, k, y, j, l;
644 	int oldc, tc, oldl;
645 	u_int numtries;
646 
647 	/* XXX move the isqrt() out of the macro to avoid multiple calls */
648 	const u_int bound = (flags & D_MINIMAL) ? UINT_MAX :
649 	    MAX(256, isqrt(n));
650 
651 	k = 0;
652 	c[0] = newcand(0, 0, 0);
653 	for (i = 1; i <= n; i++) {
654 		j = a[i];
655 		if (j == 0)
656 			continue;
657 		y = -b[j];
658 		oldl = 0;
659 		oldc = c[0];
660 		numtries = 0;
661 		do {
662 			if (y <= clist[oldc].y)
663 				continue;
664 			l = search(c, k, y);
665 			if (l != oldl + 1)
666 				oldc = c[l - 1];
667 			if (l <= k) {
668 				if (clist[c[l]].y <= y)
669 					continue;
670 				tc = c[l];
671 				c[l] = newcand(i, y, oldc);
672 				oldc = tc;
673 				oldl = l;
674 				numtries++;
675 			} else {
676 				c[l] = newcand(i, y, oldc);
677 				k++;
678 				break;
679 			}
680 		} while ((y = b[++j]) > 0 && numtries < bound);
681 	}
682 	return (k);
683 }
684 
685 static int
686 newcand(int x, int y, int pred)
687 {
688 	struct cand *q;
689 
690 	if (clen == clistlen) {
691 		clistlen = clistlen * 11 / 10;
692 		clist = xrealloc(clist, clistlen, sizeof(*clist));
693 	}
694 	q = clist + clen;
695 	q->x = x;
696 	q->y = y;
697 	q->pred = pred;
698 	return (clen++);
699 }
700 
701 static int
702 search(int *c, int k, int y)
703 {
704 	int i, j, l, t;
705 
706 	if (clist[c[k]].y < y)	/* quick look for typical case */
707 		return (k + 1);
708 	i = 0;
709 	j = k + 1;
710 	for (;;) {
711 		l = (i + j) / 2;
712 		if (l <= i)
713 			break;
714 		t = clist[c[l]].y;
715 		if (t > y)
716 			j = l;
717 		else if (t < y)
718 			i = l;
719 		else
720 			return (l);
721 	}
722 	return (l + 1);
723 }
724 
725 static void
726 unravel(int p)
727 {
728 	struct cand *q;
729 	int i;
730 
731 	for (i = 0; i <= len[0]; i++)
732 		J[i] = i <= pref ? i :
733 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
734 	for (q = clist + p; q->y != 0; q = clist + q->pred)
735 		J[q->x + pref] = q->y + pref;
736 }
737 
738 /*
739  * Check does double duty:
740  *  1.	ferret out any fortuitous correspondences due
741  *	to confounding by hashing (which result in "jackpot")
742  *  2.  collect random access indexes to the two files
743  */
744 static void
745 check(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
746 {
747 	int i, j, jackpot, c, d;
748 	long ctold, ctnew;
749 
750 	rewind(f1);
751 	rewind(f2);
752 	j = 1;
753 	ixold[0] = ixnew[0] = 0;
754 	jackpot = 0;
755 	ctold = ctnew = 0;
756 	for (i = 1; i <= len[0]; i++) {
757 		if (J[i] == 0) {
758 			ixold[i] = ctold += skipline(f1);
759 			continue;
760 		}
761 		while (j < J[i]) {
762 			ixnew[j] = ctnew += skipline(f2);
763 			j++;
764 		}
765 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
766 			for (;;) {
767 				c = getc(f1);
768 				d = getc(f2);
769 				/*
770 				 * GNU diff ignores a missing newline
771 				 * in one file for -b or -w.
772 				 */
773 				if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) &&
774 				    ((c == EOF && d == '\n') ||
775 				    (c == '\n' && d == EOF))) {
776 					break;
777 				}
778 				ctold++;
779 				ctnew++;
780 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
781 				    isspace(d)) {
782 					do {
783 						if (c == '\n')
784 							break;
785 						ctold++;
786 					} while (isspace(c = getc(f1)));
787 					do {
788 						if (d == '\n')
789 							break;
790 						ctnew++;
791 					} while (isspace(d = getc(f2)));
792 				} else if ((flags & D_IGNOREBLANKS)) {
793 					while (isspace(c) && c != '\n') {
794 						c = getc(f1);
795 						ctold++;
796 					}
797 					while (isspace(d) && d != '\n') {
798 						d = getc(f2);
799 						ctnew++;
800 					}
801 				}
802 				if (chrtran[c] != chrtran[d]) {
803 					jackpot++;
804 					J[i] = 0;
805 					if (c != '\n' && c != EOF)
806 						ctold += skipline(f1);
807 					if (d != '\n' && c != EOF)
808 						ctnew += skipline(f2);
809 					break;
810 				}
811 				if (c == '\n' || c == EOF)
812 					break;
813 			}
814 		} else {
815 			for (;;) {
816 				ctold++;
817 				ctnew++;
818 				if ((c = getc(f1)) != (d = getc(f2))) {
819 					/* jackpot++; */
820 					J[i] = 0;
821 					if (c != '\n' && c != EOF)
822 						ctold += skipline(f1);
823 					if (d != '\n' && c != EOF)
824 						ctnew += skipline(f2);
825 					break;
826 				}
827 				if (c == '\n' || c == EOF)
828 					break;
829 			}
830 		}
831 		ixold[i] = ctold;
832 		ixnew[j] = ctnew;
833 		j++;
834 	}
835 	for (; j <= len[1]; j++)
836 		ixnew[j] = ctnew += skipline(f2);
837 	/*
838 	 * if (jackpot)
839 	 *	fprintf(stderr, "jackpot\n");
840 	 */
841 }
842 
843 /* shellsort CACM #201 */
844 static void
845 sort(struct line *a, int n)
846 {
847 	struct line *ai, *aim, w;
848 	int j, m = 0, k;
849 
850 	if (n == 0)
851 		return;
852 	for (j = 1; j <= n; j *= 2)
853 		m = 2 * j - 1;
854 	for (m /= 2; m != 0; m /= 2) {
855 		k = n - m;
856 		for (j = 1; j <= k; j++) {
857 			for (ai = &a[j]; ai > a; ai -= m) {
858 				aim = &ai[m];
859 				if (aim < ai)
860 					break;	/* wraparound */
861 				if (aim->value > ai[0].value ||
862 				    (aim->value == ai[0].value &&
863 					aim->serial > ai[0].serial))
864 					break;
865 				w.value = ai[0].value;
866 				ai[0].value = aim->value;
867 				aim->value = w.value;
868 				w.serial = ai[0].serial;
869 				ai[0].serial = aim->serial;
870 				aim->serial = w.serial;
871 			}
872 		}
873 	}
874 }
875 
876 static void
877 unsort(struct line *f, int l, int *b)
878 {
879 	int *a, i;
880 
881 	a = xcalloc(l + 1, sizeof(*a));
882 	for (i = 1; i <= l; i++)
883 		a[f[i].serial] = f[i].value;
884 	for (i = 1; i <= l; i++)
885 		b[i] = a[i];
886 	xfree(a);
887 }
888 
889 static int
890 skipline(FILE *f)
891 {
892 	int i, c;
893 
894 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
895 		continue;
896 	return (i);
897 }
898 
899 static void
900 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
901 {
902 	int m, i0, i1, j0, j1;
903 
904 	rewind(f1);
905 	rewind(f2);
906 	m = len[0];
907 	J[0] = 0;
908 	J[m + 1] = len[1] + 1;
909 	if (diff_format != D_EDIT) {
910 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
911 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
912 				i0++;
913 			j0 = J[i0 - 1] + 1;
914 			i1 = i0 - 1;
915 			while (i1 < m && J[i1 + 1] == 0)
916 				i1++;
917 			j1 = J[i1 + 1] - 1;
918 			J[i1] = j1;
919 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
920 		}
921 	} else {
922 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
923 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
924 				i0--;
925 			j0 = J[i0 + 1] - 1;
926 			i1 = i0 + 1;
927 			while (i1 > 1 && J[i1 - 1] == 0)
928 				i1--;
929 			j1 = J[i1 - 1] + 1;
930 			J[i1] = j1;
931 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
932 		}
933 	}
934 	if (m == 0)
935 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
936 	if (diff_format == D_IFDEF) {
937 		for (;;) {
938 #define	c i0
939 			if ((c = getc(f1)) == EOF)
940 				return;
941 			putchar(c);
942 		}
943 #undef c
944 	}
945 	if (anychange != 0) {
946 		if (diff_format == D_CONTEXT)
947 			dump_context_vec(f1, f2, flags);
948 		else if (diff_format == D_UNIFIED)
949 			dump_unified_vec(f1, f2, flags);
950 	}
951 }
952 
953 static void
954 range(int a, int b, char *separator)
955 {
956 	printf("%d", a > b ? b : a);
957 	if (a < b)
958 		printf("%s%d", separator, b);
959 }
960 
961 static void
962 uni_range(int a, int b)
963 {
964 	if (a < b)
965 		printf("%d,%d", a, b - a + 1);
966 	else if (a == b)
967 		printf("%d", b);
968 	else
969 		printf("%d,0", b);
970 }
971 
972 static char *
973 preadline(int fd, size_t rlen, off_t off)
974 {
975 	char *line;
976 	ssize_t nr;
977 
978 	line = xmalloc(rlen + 1);
979 	if ((nr = pread(fd, line, rlen, off)) < 0)
980 		err(1, "preadline");
981 	if (nr > 0 && line[nr-1] == '\n')
982 		nr--;
983 	line[nr] = '\0';
984 	return (line);
985 }
986 
987 static int
988 ignoreline(char *line)
989 {
990 	int ret;
991 
992 	ret = regexec(&ignore_re, line, 0, NULL, 0);
993 	xfree(line);
994 	return (ret == 0);	/* if it matched, it should be ignored. */
995 }
996 
997 /*
998  * Indicate that there is a difference between lines a and b of the from file
999  * to get to lines c to d of the to file.  If a is greater then b then there
1000  * are no lines in the from file involved and this means that there were
1001  * lines appended (beginning at b).  If c is greater than d then there are
1002  * lines missing from the to file.
1003  */
1004 static void
1005 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1006     int *pflags)
1007 {
1008 	static size_t max_context = 64;
1009 	int i;
1010 
1011 restart:
1012 	if (diff_format != D_IFDEF && a > b && c > d)
1013 		return;
1014 	if (ignore_pats != NULL) {
1015 		char *line;
1016 		/*
1017 		 * All lines in the change, insert, or delete must
1018 		 * match an ignore pattern for the change to be
1019 		 * ignored.
1020 		 */
1021 		if (a <= b) {		/* Changes and deletes. */
1022 			for (i = a; i <= b; i++) {
1023 				line = preadline(fileno(f1),
1024 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1025 				if (!ignoreline(line))
1026 					goto proceed;
1027 			}
1028 		}
1029 		if (a > b || c <= d) {	/* Changes and inserts. */
1030 			for (i = c; i <= d; i++) {
1031 				line = preadline(fileno(f2),
1032 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1033 				if (!ignoreline(line))
1034 					goto proceed;
1035 			}
1036 		}
1037 		return;
1038 	}
1039 proceed:
1040 	if (*pflags & D_HEADER) {
1041 		printf("%s %s %s\n", diffargs, file1, file2);
1042 		*pflags &= ~D_HEADER;
1043 	}
1044 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1045 		/*
1046 		 * Allocate change records as needed.
1047 		 */
1048 		if (context_vec_ptr == context_vec_end - 1) {
1049 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1050 			max_context <<= 1;
1051 			context_vec_start = xrealloc(context_vec_start,
1052 			    max_context, sizeof(*context_vec_start));
1053 			context_vec_end = context_vec_start + max_context;
1054 			context_vec_ptr = context_vec_start + offset;
1055 		}
1056 		if (anychange == 0) {
1057 			/*
1058 			 * Print the context/unidiff header first time through.
1059 			 */
1060 			print_header(file1, file2);
1061 			anychange = 1;
1062 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1063 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1064 			/*
1065 			 * If this change is more than 'diff_context' lines from the
1066 			 * previous change, dump the record and reset it.
1067 			 */
1068 			if (diff_format == D_CONTEXT)
1069 				dump_context_vec(f1, f2, *pflags);
1070 			else
1071 				dump_unified_vec(f1, f2, *pflags);
1072 		}
1073 		context_vec_ptr++;
1074 		context_vec_ptr->a = a;
1075 		context_vec_ptr->b = b;
1076 		context_vec_ptr->c = c;
1077 		context_vec_ptr->d = d;
1078 		return;
1079 	}
1080 	if (anychange == 0)
1081 		anychange = 1;
1082 	switch (diff_format) {
1083 	case D_BRIEF:
1084 		return;
1085 	case D_NORMAL:
1086 	case D_EDIT:
1087 		range(a, b, ",");
1088 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1089 		if (diff_format == D_NORMAL)
1090 			range(c, d, ",");
1091 		putchar('\n');
1092 		break;
1093 	case D_REVERSE:
1094 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1095 		range(a, b, " ");
1096 		putchar('\n');
1097 		break;
1098 	case D_NREVERSE:
1099 		if (a > b)
1100 			printf("a%d %d\n", b, d - c + 1);
1101 		else {
1102 			printf("d%d %d\n", a, b - a + 1);
1103 			if (!(c > d))
1104 				/* add changed lines */
1105 				printf("a%d %d\n", b, d - c + 1);
1106 		}
1107 		break;
1108 	}
1109 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1110 		fetch(ixold, a, b, f1, '<', 1, *pflags);
1111 		if (a <= b && c <= d && diff_format == D_NORMAL)
1112 			puts("---");
1113 	}
1114 	i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1115 	if (i != 0 && diff_format == D_EDIT) {
1116 		/*
1117 		 * A non-zero return value for D_EDIT indicates that the
1118 		 * last line printed was a bare dot (".") that has been
1119 		 * escaped as ".." to prevent ed(1) from misinterpreting
1120 		 * it.  We have to add a substitute command to change this
1121 		 * back and restart where we left off.
1122 		 */
1123 		puts(".");
1124 		printf("%ds/^\\.\\././\n", a);
1125 		a += i;
1126 		c += i;
1127 		goto restart;
1128 	}
1129 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1130 		puts(".");
1131 	if (inifdef) {
1132 		printf("#endif /* %s */\n", ifdefname);
1133 		inifdef = 0;
1134 	}
1135 }
1136 
1137 static int
1138 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1139 {
1140 	int i, j, c, lastc, col, nc;
1141 
1142 	/*
1143 	 * When doing #ifdef's, copy down to current line
1144 	 * if this is the first file, so that stuff makes it to output.
1145 	 */
1146 	if (diff_format == D_IFDEF && oldfile) {
1147 		long curpos = ftell(lb);
1148 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1149 		nc = f[a > b ? b : a - 1] - curpos;
1150 		for (i = 0; i < nc; i++)
1151 			putchar(getc(lb));
1152 	}
1153 	if (a > b)
1154 		return (0);
1155 	if (diff_format == D_IFDEF) {
1156 		if (inifdef) {
1157 			printf("#else /* %s%s */\n",
1158 			    oldfile == 1 ? "!" : "", ifdefname);
1159 		} else {
1160 			if (oldfile)
1161 				printf("#ifndef %s\n", ifdefname);
1162 			else
1163 				printf("#ifdef %s\n", ifdefname);
1164 		}
1165 		inifdef = 1 + oldfile;
1166 	}
1167 	for (i = a; i <= b; i++) {
1168 		fseek(lb, f[i - 1], SEEK_SET);
1169 		nc = f[i] - f[i - 1];
1170 		if (diff_format != D_IFDEF && ch != '\0') {
1171 			putchar(ch);
1172 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1173 			    || diff_format == D_UNIFIED))
1174 				putchar('\t');
1175 			else if (diff_format != D_UNIFIED)
1176 				putchar(' ');
1177 		}
1178 		col = 0;
1179 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1180 			if ((c = getc(lb)) == EOF) {
1181 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1182 				    diff_format == D_NREVERSE)
1183 					warnx("No newline at end of file");
1184 				else
1185 					puts("\n\\ No newline at end of file");
1186 				return (0);
1187 			}
1188 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1189 				do {
1190 					putchar(' ');
1191 				} while (++col & 7);
1192 			} else {
1193 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1194 				    && lastc == '.') {
1195 					/*
1196 					 * Don't print a bare "." line
1197 					 * since that will confuse ed(1).
1198 					 * Print ".." instead and return,
1199 					 * giving the caller an offset
1200 					 * from which to restart.
1201 					 */
1202 					puts(".");
1203 					return (i - a + 1);
1204 				}
1205 				putchar(c);
1206 				col++;
1207 			}
1208 		}
1209 	}
1210 	return (0);
1211 }
1212 
1213 /*
1214  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1215  */
1216 static int
1217 readhash(FILE *f, int flags)
1218 {
1219 	int i, t, space;
1220 	int sum;
1221 
1222 	sum = 1;
1223 	space = 0;
1224 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1225 		if (flags & D_IGNORECASE)
1226 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1227 				if (t == EOF) {
1228 					if (i == 0)
1229 						return (0);
1230 					break;
1231 				}
1232 				sum = sum * 127 + chrtran[t];
1233 			}
1234 		else
1235 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1236 				if (t == EOF) {
1237 					if (i == 0)
1238 						return (0);
1239 					break;
1240 				}
1241 				sum = sum * 127 + t;
1242 			}
1243 	} else {
1244 		for (i = 0;;) {
1245 			switch (t = getc(f)) {
1246 			case '\t':
1247 			case '\r':
1248 			case '\v':
1249 			case '\f':
1250 			case ' ':
1251 				space++;
1252 				continue;
1253 			default:
1254 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1255 					i++;
1256 					space = 0;
1257 				}
1258 				sum = sum * 127 + chrtran[t];
1259 				i++;
1260 				continue;
1261 			case EOF:
1262 				if (i == 0)
1263 					return (0);
1264 				/* FALLTHROUGH */
1265 			case '\n':
1266 				break;
1267 			}
1268 			break;
1269 		}
1270 	}
1271 	/*
1272 	 * There is a remote possibility that we end up with a zero sum.
1273 	 * Zero is used as an EOF marker, so return 1 instead.
1274 	 */
1275 	return (sum == 0 ? 1 : sum);
1276 }
1277 
1278 static int
1279 asciifile(FILE *f)
1280 {
1281 	unsigned char buf[BUFSIZ];
1282 	size_t i, cnt;
1283 
1284 	if (f == NULL)
1285 		return (1);
1286 
1287 	rewind(f);
1288 	cnt = fread(buf, 1, sizeof(buf), f);
1289 	for (i = 0; i < cnt; i++)
1290 		if (!isprint(buf[i]) && !isspace(buf[i]))
1291 			return (0);
1292 	return (1);
1293 }
1294 
1295 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1296 
1297 static char *
1298 match_function(const long *f, int pos, FILE *fp)
1299 {
1300 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1301 	size_t nc;
1302 	int last = lastline;
1303 	char *state = NULL;
1304 
1305 	lastline = pos;
1306 	while (pos > last) {
1307 		fseek(fp, f[pos - 1], SEEK_SET);
1308 		nc = f[pos] - f[pos - 1];
1309 		if (nc >= sizeof(buf))
1310 			nc = sizeof(buf) - 1;
1311 		nc = fread(buf, 1, nc, fp);
1312 		if (nc > 0) {
1313 			buf[nc] = '\0';
1314 			buf[strcspn(buf, "\n")] = '\0';
1315 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1316 				if (begins_with(buf, "private:")) {
1317 					if (!state)
1318 						state = " (private)";
1319 				} else if (begins_with(buf, "protected:")) {
1320 					if (!state)
1321 						state = " (protected)";
1322 				} else if (begins_with(buf, "public:")) {
1323 					if (!state)
1324 						state = " (public)";
1325 				} else {
1326 					strlcpy(lastbuf, buf, sizeof lastbuf);
1327 					if (state)
1328 						strlcat(lastbuf, state,
1329 						    sizeof lastbuf);
1330 					lastmatchline = pos;
1331 					return lastbuf;
1332 				}
1333 			}
1334 		}
1335 		pos--;
1336 	}
1337 	return lastmatchline > 0 ? lastbuf : NULL;
1338 }
1339 
1340 /* dump accumulated "context" diff changes */
1341 static void
1342 dump_context_vec(FILE *f1, FILE *f2, int flags)
1343 {
1344 	struct context_vec *cvp = context_vec_start;
1345 	int lowa, upb, lowc, upd, do_output;
1346 	int a, b, c, d;
1347 	char ch, *f;
1348 
1349 	if (context_vec_start > context_vec_ptr)
1350 		return;
1351 
1352 	b = d = 0;		/* gcc */
1353 	lowa = MAX(1, cvp->a - diff_context);
1354 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1355 	lowc = MAX(1, cvp->c - diff_context);
1356 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1357 
1358 	printf("***************");
1359 	if ((flags & D_PROTOTYPE)) {
1360 		f = match_function(ixold, lowa-1, f1);
1361 		if (f != NULL) {
1362 			putchar(' ');
1363 			fputs(f, stdout);
1364 		}
1365 	}
1366 	printf("\n*** ");
1367 	range(lowa, upb, ",");
1368 	printf(" ****\n");
1369 
1370 	/*
1371 	 * Output changes to the "old" file.  The first loop suppresses
1372 	 * output if there were no changes to the "old" file (we'll see
1373 	 * the "old" lines as context in the "new" list).
1374 	 */
1375 	do_output = 0;
1376 	for (; cvp <= context_vec_ptr; cvp++)
1377 		if (cvp->a <= cvp->b) {
1378 			cvp = context_vec_start;
1379 			do_output++;
1380 			break;
1381 		}
1382 	if (do_output) {
1383 		while (cvp <= context_vec_ptr) {
1384 			a = cvp->a;
1385 			b = cvp->b;
1386 			c = cvp->c;
1387 			d = cvp->d;
1388 
1389 			if (a <= b && c <= d)
1390 				ch = 'c';
1391 			else
1392 				ch = (a <= b) ? 'd' : 'a';
1393 
1394 			if (ch == 'a')
1395 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1396 			else {
1397 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1398 				fetch(ixold, a, b, f1,
1399 				    ch == 'c' ? '!' : '-', 0, flags);
1400 			}
1401 			lowa = b + 1;
1402 			cvp++;
1403 		}
1404 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1405 	}
1406 	/* output changes to the "new" file */
1407 	printf("--- ");
1408 	range(lowc, upd, ",");
1409 	printf(" ----\n");
1410 
1411 	do_output = 0;
1412 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1413 		if (cvp->c <= cvp->d) {
1414 			cvp = context_vec_start;
1415 			do_output++;
1416 			break;
1417 		}
1418 	if (do_output) {
1419 		while (cvp <= context_vec_ptr) {
1420 			a = cvp->a;
1421 			b = cvp->b;
1422 			c = cvp->c;
1423 			d = cvp->d;
1424 
1425 			if (a <= b && c <= d)
1426 				ch = 'c';
1427 			else
1428 				ch = (a <= b) ? 'd' : 'a';
1429 
1430 			if (ch == 'd')
1431 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1432 			else {
1433 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1434 				fetch(ixnew, c, d, f2,
1435 				    ch == 'c' ? '!' : '+', 0, flags);
1436 			}
1437 			lowc = d + 1;
1438 			cvp++;
1439 		}
1440 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1441 	}
1442 	context_vec_ptr = context_vec_start - 1;
1443 }
1444 
1445 /* dump accumulated "unified" diff changes */
1446 static void
1447 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1448 {
1449 	struct context_vec *cvp = context_vec_start;
1450 	int lowa, upb, lowc, upd;
1451 	int a, b, c, d;
1452 	char ch, *f;
1453 
1454 	if (context_vec_start > context_vec_ptr)
1455 		return;
1456 
1457 	b = d = 0;		/* gcc */
1458 	lowa = MAX(1, cvp->a - diff_context);
1459 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1460 	lowc = MAX(1, cvp->c - diff_context);
1461 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1462 
1463 	fputs("@@ -", stdout);
1464 	uni_range(lowa, upb);
1465 	fputs(" +", stdout);
1466 	uni_range(lowc, upd);
1467 	fputs(" @@", stdout);
1468 	if ((flags & D_PROTOTYPE)) {
1469 		f = match_function(ixold, lowa-1, f1);
1470 		if (f != NULL) {
1471 			putchar(' ');
1472 			fputs(f, stdout);
1473 		}
1474 	}
1475 	putchar('\n');
1476 
1477 	/*
1478 	 * Output changes in "unified" diff format--the old and new lines
1479 	 * are printed together.
1480 	 */
1481 	for (; cvp <= context_vec_ptr; cvp++) {
1482 		a = cvp->a;
1483 		b = cvp->b;
1484 		c = cvp->c;
1485 		d = cvp->d;
1486 
1487 		/*
1488 		 * c: both new and old changes
1489 		 * d: only changes in the old file
1490 		 * a: only changes in the new file
1491 		 */
1492 		if (a <= b && c <= d)
1493 			ch = 'c';
1494 		else
1495 			ch = (a <= b) ? 'd' : 'a';
1496 
1497 		switch (ch) {
1498 		case 'c':
1499 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1500 			fetch(ixold, a, b, f1, '-', 0, flags);
1501 			fetch(ixnew, c, d, f2, '+', 0, flags);
1502 			break;
1503 		case 'd':
1504 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1505 			fetch(ixold, a, b, f1, '-', 0, flags);
1506 			break;
1507 		case 'a':
1508 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1509 			fetch(ixnew, c, d, f2, '+', 0, flags);
1510 			break;
1511 		}
1512 		lowa = b + 1;
1513 		lowc = d + 1;
1514 	}
1515 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1516 
1517 	context_vec_ptr = context_vec_start - 1;
1518 }
1519 
1520 static void
1521 print_header(const char *file1, const char *file2)
1522 {
1523 	if (label[0] != NULL)
1524 		printf("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1525 		    label[0]);
1526 	else
1527 		printf("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
1528 		    file1, ctime(&stb1.st_mtime));
1529 	if (label[1] != NULL)
1530 		printf("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1531 		    label[1]);
1532 	else
1533 		printf("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
1534 		    file2, ctime(&stb2.st_mtime));
1535 }
1536