xref: /openbsd-src/usr.bin/diff/diffreg.c (revision 782453306cbb5c6a2b9a69d6f07600709ab921ee)
1 /*	$OpenBSD: diffreg.c,v 1.81 2012/05/22 12:30:24 millert Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/param.h>
68 #include <sys/stat.h>
69 #include <sys/wait.h>
70 
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <stddef.h>
76 #include <stdio.h>
77 #include <stdlib.h>
78 #include <string.h>
79 #include <unistd.h>
80 
81 #include "diff.h"
82 #include "pathnames.h"
83 #include "xmalloc.h"
84 
85 /*
86  * diff - compare two files.
87  */
88 
89 /*
90  *	Uses an algorithm due to Harold Stone, which finds
91  *	a pair of longest identical subsequences in the two
92  *	files.
93  *
94  *	The major goal is to generate the match vector J.
95  *	J[i] is the index of the line in file1 corresponding
96  *	to line i file0. J[i] = 0 if there is no
97  *	such line in file1.
98  *
99  *	Lines are hashed so as to work in core. All potential
100  *	matches are located by sorting the lines of each file
101  *	on the hash (called ``value''). In particular, this
102  *	collects the equivalence classes in file1 together.
103  *	Subroutine equiv replaces the value of each line in
104  *	file0 by the index of the first element of its
105  *	matching equivalence in (the reordered) file1.
106  *	To save space equiv squeezes file1 into a single
107  *	array member in which the equivalence classes
108  *	are simply concatenated, except that their first
109  *	members are flagged by changing sign.
110  *
111  *	Next the indices that point into member are unsorted into
112  *	array class according to the original order of file0.
113  *
114  *	The cleverness lies in routine stone. This marches
115  *	through the lines of file0, developing a vector klist
116  *	of "k-candidates". At step i a k-candidate is a matched
117  *	pair of lines x,y (x in file0 y in file1) such that
118  *	there is a common subsequence of length k
119  *	between the first i lines of file0 and the first y
120  *	lines of file1, but there is no such subsequence for
121  *	any smaller y. x is the earliest possible mate to y
122  *	that occurs in such a subsequence.
123  *
124  *	Whenever any of the members of the equivalence class of
125  *	lines in file1 matable to a line in file0 has serial number
126  *	less than the y of some k-candidate, that k-candidate
127  *	with the smallest such y is replaced. The new
128  *	k-candidate is chained (via pred) to the current
129  *	k-1 candidate so that the actual subsequence can
130  *	be recovered. When a member has serial number greater
131  *	that the y of all k-candidates, the klist is extended.
132  *	At the end, the longest subsequence is pulled out
133  *	and placed in the array J by unravel
134  *
135  *	With J in hand, the matches there recorded are
136  *	check'ed against reality to assure that no spurious
137  *	matches have crept in due to hashing. If they have,
138  *	they are broken, and "jackpot" is recorded--a harmless
139  *	matter except that a true match for a spuriously
140  *	mated line may now be unnecessarily reported as a change.
141  *
142  *	Much of the complexity of the program comes simply
143  *	from trying to minimize core utilization and
144  *	maximize the range of doable problems by dynamically
145  *	allocating what is needed and reusing what is not.
146  *	The core requirements for problems larger than somewhat
147  *	are (in words) 2*length(file0) + length(file1) +
148  *	3*(number of k-candidates installed),  typically about
149  *	6n words for files of length n.
150  */
151 
152 struct cand {
153 	int	x;
154 	int	y;
155 	int	pred;
156 };
157 
158 struct line {
159 	int	serial;
160 	int	value;
161 } *file[2];
162 
163 /*
164  * The following struct is used to record change information when
165  * doing a "context" or "unified" diff.  (see routine "change" to
166  * understand the highly mnemonic field names)
167  */
168 struct context_vec {
169 	int	a;		/* start line in old file */
170 	int	b;		/* end line in old file */
171 	int	c;		/* start line in new file */
172 	int	d;		/* end line in new file */
173 };
174 
175 #define	diff_output	printf
176 static FILE	*opentemp(const char *);
177 static void	 output(char *, FILE *, char *, FILE *, int);
178 static void	 check(FILE *, FILE *, int);
179 static void	 range(int, int, char *);
180 static void	 uni_range(int, int);
181 static void	 dump_context_vec(FILE *, FILE *, int);
182 static void	 dump_unified_vec(FILE *, FILE *, int);
183 static void	 prepare(int, FILE *, off_t, int);
184 static void	 prune(void);
185 static void	 equiv(struct line *, int, struct line *, int, int *);
186 static void	 unravel(int);
187 static void	 unsort(struct line *, int, int *);
188 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
189 static void	 sort(struct line *, int);
190 static void	 print_header(const char *, const char *);
191 static int	 ignoreline(char *);
192 static int	 asciifile(FILE *);
193 static int	 fetch(long *, int, int, FILE *, int, int, int);
194 static int	 newcand(int, int, int);
195 static int	 search(int *, int, int);
196 static int	 skipline(FILE *);
197 static int	 isqrt(int);
198 static int	 stone(int *, int, int *, int *, int);
199 static int	 readhash(FILE *, int);
200 static int	 files_differ(FILE *, FILE *, int);
201 static char	*match_function(const long *, int, FILE *);
202 static char	*preadline(int, size_t, off_t);
203 
204 static int  *J;			/* will be overlaid on class */
205 static int  *class;		/* will be overlaid on file[0] */
206 static int  *klist;		/* will be overlaid on file[0] after class */
207 static int  *member;		/* will be overlaid on file[1] */
208 static int   clen;
209 static int   inifdef;		/* whether or not we are in a #ifdef block */
210 static int   len[2];
211 static int   pref, suff;	/* length of prefix and suffix */
212 static int   slen[2];
213 static int   anychange;
214 static long *ixnew;		/* will be overlaid on file[1] */
215 static long *ixold;		/* will be overlaid on klist */
216 static struct cand *clist;	/* merely a free storage pot for candidates */
217 static int   clistlen;		/* the length of clist */
218 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
219 static u_char *chrtran;		/* translation table for case-folding */
220 static struct context_vec *context_vec_start;
221 static struct context_vec *context_vec_end;
222 static struct context_vec *context_vec_ptr;
223 
224 #define FUNCTION_CONTEXT_SIZE	55
225 static char lastbuf[FUNCTION_CONTEXT_SIZE];
226 static int lastline;
227 static int lastmatchline;
228 
229 
230 /*
231  * chrtran points to one of 2 translation tables: cup2low if folding upper to
232  * lower case clow2low if not folding case
233  */
234 u_char clow2low[256] = {
235 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
236 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
237 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
238 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
239 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
240 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
241 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
242 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
243 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
244 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
245 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
246 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
247 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
248 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
249 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
250 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
251 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
252 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
253 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
254 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
255 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
256 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
257 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
258 	0xfd, 0xfe, 0xff
259 };
260 
261 u_char cup2low[256] = {
262 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
263 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
264 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
265 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
266 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
267 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
268 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
269 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
270 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
271 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
272 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
273 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
274 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
275 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
276 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
277 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
278 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
279 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
280 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
281 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
282 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
283 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
284 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
285 	0xfd, 0xfe, 0xff
286 };
287 
288 int
289 diffreg(char *file1, char *file2, int flags)
290 {
291 	FILE *f1, *f2;
292 	int i, rval, ostdout = -1;
293 	pid_t pid = -1;
294 
295 	f1 = f2 = NULL;
296 	rval = D_SAME;
297 	anychange = 0;
298 	lastline = 0;
299 	lastmatchline = 0;
300 	context_vec_ptr = context_vec_start - 1;
301 	if (flags & D_IGNORECASE)
302 		chrtran = cup2low;
303 	else
304 		chrtran = clow2low;
305 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
306 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
307 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
308 		goto closem;
309 
310 	if (flags & D_EMPTY1)
311 		f1 = fopen(_PATH_DEVNULL, "r");
312 	else {
313 		if (!S_ISREG(stb1.st_mode)) {
314 			if ((f1 = opentemp(file1)) == NULL ||
315 			    fstat(fileno(f1), &stb1) < 0) {
316 				warn("%s", file1);
317 				status |= 2;
318 				goto closem;
319 			}
320 		} else if (strcmp(file1, "-") == 0)
321 			f1 = stdin;
322 		else
323 			f1 = fopen(file1, "r");
324 	}
325 	if (f1 == NULL) {
326 		warn("%s", file1);
327 		status |= 2;
328 		goto closem;
329 	}
330 
331 	if (flags & D_EMPTY2)
332 		f2 = fopen(_PATH_DEVNULL, "r");
333 	else {
334 		if (!S_ISREG(stb2.st_mode)) {
335 			if ((f2 = opentemp(file2)) == NULL ||
336 			    fstat(fileno(f2), &stb2) < 0) {
337 				warn("%s", file2);
338 				status |= 2;
339 				goto closem;
340 			}
341 		} else if (strcmp(file2, "-") == 0)
342 			f2 = stdin;
343 		else
344 			f2 = fopen(file2, "r");
345 	}
346 	if (f2 == NULL) {
347 		warn("%s", file2);
348 		status |= 2;
349 		goto closem;
350 	}
351 
352 	switch (files_differ(f1, f2, flags)) {
353 	case 0:
354 		goto closem;
355 	case 1:
356 		break;
357 	default:
358 		/* error */
359 		status |= 2;
360 		goto closem;
361 	}
362 
363 	if ((flags & D_FORCEASCII) == 0 &&
364 	    (!asciifile(f1) || !asciifile(f2))) {
365 		rval = D_BINARY;
366 		status |= 1;
367 		goto closem;
368 	}
369 	if (lflag) {
370 		/* redirect stdout to pr */
371 		int pfd[2];
372 		char *header;
373 		char *prargv[] = { "pr", "-h", NULL, "-f", NULL };
374 
375 		xasprintf(&header, "%s %s %s", diffargs, file1, file2);
376 		prargv[2] = header;
377 		fflush(stdout);
378 		rewind(stdout);
379 		pipe(pfd);
380 		switch ((pid = fork())) {
381 		case -1:
382 			warnx("No more processes");
383 			status |= 2;
384 			xfree(header);
385 			rval = D_ERROR;
386 			goto closem;
387 		case 0:
388 			/* child */
389 			if (pfd[0] != STDIN_FILENO) {
390 				dup2(pfd[0], STDIN_FILENO);
391 				close(pfd[0]);
392 			}
393 			close(pfd[1]);
394 			execv(_PATH_PR, prargv);
395 			_exit(127);
396 		default:
397 			/* parent */
398 			if (pfd[1] != STDOUT_FILENO) {
399 				ostdout = dup(STDOUT_FILENO);
400 				dup2(pfd[1], STDOUT_FILENO);
401 				close(pfd[1]);
402 			}
403 			close(pfd[0]);
404 			rewind(stdout);
405 			xfree(header);
406 		}
407 	}
408 	prepare(0, f1, stb1.st_size, flags);
409 	prepare(1, f2, stb2.st_size, flags);
410 
411 	prune();
412 	sort(sfile[0], slen[0]);
413 	sort(sfile[1], slen[1]);
414 
415 	member = (int *)file[1];
416 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
417 	member = xrealloc(member, slen[1] + 2, sizeof(*member));
418 
419 	class = (int *)file[0];
420 	unsort(sfile[0], slen[0], class);
421 	class = xrealloc(class, slen[0] + 2, sizeof(*class));
422 
423 	klist = xcalloc(slen[0] + 2, sizeof(*klist));
424 	clen = 0;
425 	clistlen = 100;
426 	clist = xcalloc(clistlen, sizeof(*clist));
427 	i = stone(class, slen[0], member, klist, flags);
428 	xfree(member);
429 	xfree(class);
430 
431 	J = xrealloc(J, len[0] + 2, sizeof(*J));
432 	unravel(klist[i]);
433 	xfree(clist);
434 	xfree(klist);
435 
436 	ixold = xrealloc(ixold, len[0] + 2, sizeof(*ixold));
437 	ixnew = xrealloc(ixnew, len[1] + 2, sizeof(*ixnew));
438 	check(f1, f2, flags);
439 	output(file1, f1, file2, f2, flags);
440 	if (ostdout != -1) {
441 		int wstatus;
442 
443 		/* close the pipe to pr and restore stdout */
444 		fflush(stdout);
445 		rewind(stdout);
446 		if (ostdout != STDOUT_FILENO) {
447 			close(STDOUT_FILENO);
448 			dup2(ostdout, STDOUT_FILENO);
449 			close(ostdout);
450 		}
451 		waitpid(pid, &wstatus, 0);
452 	}
453 closem:
454 	if (anychange) {
455 		status |= 1;
456 		if (rval == D_SAME)
457 			rval = D_DIFFER;
458 	}
459 	if (f1 != NULL)
460 		fclose(f1);
461 	if (f2 != NULL)
462 		fclose(f2);
463 
464 	return (rval);
465 }
466 
467 /*
468  * Check to see if the given files differ.
469  * Returns 0 if they are the same, 1 if different, and -1 on error.
470  * XXX - could use code from cmp(1) [faster]
471  */
472 static int
473 files_differ(FILE *f1, FILE *f2, int flags)
474 {
475 	char buf1[BUFSIZ], buf2[BUFSIZ];
476 	size_t i, j;
477 
478 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
479 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
480 		return (1);
481 	for (;;) {
482 		i = fread(buf1, 1, sizeof(buf1), f1);
483 		j = fread(buf2, 1, sizeof(buf2), f2);
484 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
485 			return (-1);
486 		if (i != j)
487 			return (1);
488 		if (i == 0)
489 			return (0);
490 		if (memcmp(buf1, buf2, i) != 0)
491 			return (1);
492 	}
493 }
494 
495 static FILE *
496 opentemp(const char *file)
497 {
498 	char buf[BUFSIZ], *tempdir, tempfile[MAXPATHLEN];
499 	ssize_t nread;
500 	int ifd, ofd;
501 
502 	if (strcmp(file, "-") == 0)
503 		ifd = STDIN_FILENO;
504 	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
505 		return (NULL);
506 
507 	if ((tempdir = getenv("TMPDIR")) == NULL)
508 		tempdir = _PATH_TMP;
509 
510 	if (strlcpy(tempfile, tempdir, sizeof(tempfile)) >= sizeof(tempfile) ||
511 	    strlcat(tempfile, "/diff.XXXXXXXX", sizeof(tempfile)) >=
512 	    sizeof(tempfile)) {
513 		close(ifd);
514 		errno = ENAMETOOLONG;
515 		return (NULL);
516 	}
517 
518 	if ((ofd = mkstemp(tempfile)) < 0) {
519 		close(ifd);
520 		return (NULL);
521 	}
522 	unlink(tempfile);
523 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
524 		if (write(ofd, buf, nread) != nread) {
525 			close(ifd);
526 			close(ofd);
527 			return (NULL);
528 		}
529 	}
530 	close(ifd);
531 	lseek(ofd, (off_t)0, SEEK_SET);
532 	return (fdopen(ofd, "r"));
533 }
534 
535 char *
536 splice(char *dir, char *file)
537 {
538 	char *tail, *buf;
539 	size_t dirlen;
540 
541 	dirlen = strlen(dir);
542 	while (dirlen != 0 && dir[dirlen - 1] == '/')
543 	    dirlen--;
544 	if ((tail = strrchr(file, '/')) == NULL)
545 		tail = file;
546 	else
547 		tail++;
548 	xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
549 	return (buf);
550 }
551 
552 static void
553 prepare(int i, FILE *fd, off_t filesize, int flags)
554 {
555 	struct line *p;
556 	int j, h;
557 	size_t sz;
558 
559 	rewind(fd);
560 
561 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
562 	if (sz < 100)
563 		sz = 100;
564 
565 	p = xcalloc(sz + 3, sizeof(*p));
566 	for (j = 0; (h = readhash(fd, flags));) {
567 		if (j == sz) {
568 			sz = sz * 3 / 2;
569 			p = xrealloc(p, sz + 3, sizeof(*p));
570 		}
571 		p[++j].value = h;
572 	}
573 	len[i] = j;
574 	file[i] = p;
575 }
576 
577 static void
578 prune(void)
579 {
580 	int i, j;
581 
582 	for (pref = 0; pref < len[0] && pref < len[1] &&
583 	    file[0][pref + 1].value == file[1][pref + 1].value;
584 	    pref++)
585 		;
586 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
587 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
588 	    suff++)
589 		;
590 	for (j = 0; j < 2; j++) {
591 		sfile[j] = file[j] + pref;
592 		slen[j] = len[j] - pref - suff;
593 		for (i = 0; i <= slen[j]; i++)
594 			sfile[j][i].serial = i;
595 	}
596 }
597 
598 static void
599 equiv(struct line *a, int n, struct line *b, int m, int *c)
600 {
601 	int i, j;
602 
603 	i = j = 1;
604 	while (i <= n && j <= m) {
605 		if (a[i].value < b[j].value)
606 			a[i++].value = 0;
607 		else if (a[i].value == b[j].value)
608 			a[i++].value = j;
609 		else
610 			j++;
611 	}
612 	while (i <= n)
613 		a[i++].value = 0;
614 	b[m + 1].value = 0;
615 	j = 0;
616 	while (++j <= m) {
617 		c[j] = -b[j].serial;
618 		while (b[j + 1].value == b[j].value) {
619 			j++;
620 			c[j] = b[j].serial;
621 		}
622 	}
623 	c[j] = -1;
624 }
625 
626 /* Code taken from ping.c */
627 static int
628 isqrt(int n)
629 {
630 	int y, x = 1;
631 
632 	if (n == 0)
633 		return (0);
634 
635 	do { /* newton was a stinker */
636 		y = x;
637 		x = n / x;
638 		x += y;
639 		x /= 2;
640 	} while ((x - y) > 1 || (x - y) < -1);
641 
642 	return (x);
643 }
644 
645 static int
646 stone(int *a, int n, int *b, int *c, int flags)
647 {
648 	int i, k, y, j, l;
649 	int oldc, tc, oldl, sq;
650 	u_int numtries, bound;
651 
652 	if (flags & D_MINIMAL)
653 		bound = UINT_MAX;
654 	else {
655 		sq = isqrt(n);
656 		bound = MAX(256, sq);
657 	}
658 
659 	k = 0;
660 	c[0] = newcand(0, 0, 0);
661 	for (i = 1; i <= n; i++) {
662 		j = a[i];
663 		if (j == 0)
664 			continue;
665 		y = -b[j];
666 		oldl = 0;
667 		oldc = c[0];
668 		numtries = 0;
669 		do {
670 			if (y <= clist[oldc].y)
671 				continue;
672 			l = search(c, k, y);
673 			if (l != oldl + 1)
674 				oldc = c[l - 1];
675 			if (l <= k) {
676 				if (clist[c[l]].y <= y)
677 					continue;
678 				tc = c[l];
679 				c[l] = newcand(i, y, oldc);
680 				oldc = tc;
681 				oldl = l;
682 				numtries++;
683 			} else {
684 				c[l] = newcand(i, y, oldc);
685 				k++;
686 				break;
687 			}
688 		} while ((y = b[++j]) > 0 && numtries < bound);
689 	}
690 	return (k);
691 }
692 
693 static int
694 newcand(int x, int y, int pred)
695 {
696 	struct cand *q;
697 
698 	if (clen == clistlen) {
699 		clistlen = clistlen * 11 / 10;
700 		clist = xrealloc(clist, clistlen, sizeof(*clist));
701 	}
702 	q = clist + clen;
703 	q->x = x;
704 	q->y = y;
705 	q->pred = pred;
706 	return (clen++);
707 }
708 
709 static int
710 search(int *c, int k, int y)
711 {
712 	int i, j, l, t;
713 
714 	if (clist[c[k]].y < y)	/* quick look for typical case */
715 		return (k + 1);
716 	i = 0;
717 	j = k + 1;
718 	for (;;) {
719 		l = (i + j) / 2;
720 		if (l <= i)
721 			break;
722 		t = clist[c[l]].y;
723 		if (t > y)
724 			j = l;
725 		else if (t < y)
726 			i = l;
727 		else
728 			return (l);
729 	}
730 	return (l + 1);
731 }
732 
733 static void
734 unravel(int p)
735 {
736 	struct cand *q;
737 	int i;
738 
739 	for (i = 0; i <= len[0]; i++)
740 		J[i] = i <= pref ? i :
741 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
742 	for (q = clist + p; q->y != 0; q = clist + q->pred)
743 		J[q->x + pref] = q->y + pref;
744 }
745 
746 /*
747  * Check does double duty:
748  *  1.	ferret out any fortuitous correspondences due
749  *	to confounding by hashing (which result in "jackpot")
750  *  2.  collect random access indexes to the two files
751  */
752 static void
753 check(FILE *f1, FILE *f2, int flags)
754 {
755 	int i, j, jackpot, c, d;
756 	long ctold, ctnew;
757 
758 	rewind(f1);
759 	rewind(f2);
760 	j = 1;
761 	ixold[0] = ixnew[0] = 0;
762 	jackpot = 0;
763 	ctold = ctnew = 0;
764 	for (i = 1; i <= len[0]; i++) {
765 		if (J[i] == 0) {
766 			ixold[i] = ctold += skipline(f1);
767 			continue;
768 		}
769 		while (j < J[i]) {
770 			ixnew[j] = ctnew += skipline(f2);
771 			j++;
772 		}
773 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
774 			for (;;) {
775 				c = getc(f1);
776 				d = getc(f2);
777 				/*
778 				 * GNU diff ignores a missing newline
779 				 * in one file for -b or -w.
780 				 */
781 				if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) &&
782 				    ((c == EOF && d == '\n') ||
783 				    (c == '\n' && d == EOF))) {
784 					break;
785 				}
786 				ctold++;
787 				ctnew++;
788 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
789 				    isspace(d)) {
790 					do {
791 						if (c == '\n')
792 							break;
793 						ctold++;
794 					} while (isspace(c = getc(f1)));
795 					do {
796 						if (d == '\n')
797 							break;
798 						ctnew++;
799 					} while (isspace(d = getc(f2)));
800 				} else if ((flags & D_IGNOREBLANKS)) {
801 					while (isspace(c) && c != '\n') {
802 						c = getc(f1);
803 						ctold++;
804 					}
805 					while (isspace(d) && d != '\n') {
806 						d = getc(f2);
807 						ctnew++;
808 					}
809 				}
810 				if (chrtran[c] != chrtran[d]) {
811 					jackpot++;
812 					J[i] = 0;
813 					if (c != '\n' && c != EOF)
814 						ctold += skipline(f1);
815 					if (d != '\n' && c != EOF)
816 						ctnew += skipline(f2);
817 					break;
818 				}
819 				if (c == '\n' || c == EOF)
820 					break;
821 			}
822 		} else {
823 			for (;;) {
824 				ctold++;
825 				ctnew++;
826 				if ((c = getc(f1)) != (d = getc(f2))) {
827 					/* jackpot++; */
828 					J[i] = 0;
829 					if (c != '\n' && c != EOF)
830 						ctold += skipline(f1);
831 					if (d != '\n' && c != EOF)
832 						ctnew += skipline(f2);
833 					break;
834 				}
835 				if (c == '\n' || c == EOF)
836 					break;
837 			}
838 		}
839 		ixold[i] = ctold;
840 		ixnew[j] = ctnew;
841 		j++;
842 	}
843 	for (; j <= len[1]; j++)
844 		ixnew[j] = ctnew += skipline(f2);
845 	/*
846 	 * if (jackpot)
847 	 *	fprintf(stderr, "jackpot\n");
848 	 */
849 }
850 
851 /* shellsort CACM #201 */
852 static void
853 sort(struct line *a, int n)
854 {
855 	struct line *ai, *aim, w;
856 	int j, m = 0, k;
857 
858 	if (n == 0)
859 		return;
860 	for (j = 1; j <= n; j *= 2)
861 		m = 2 * j - 1;
862 	for (m /= 2; m != 0; m /= 2) {
863 		k = n - m;
864 		for (j = 1; j <= k; j++) {
865 			for (ai = &a[j]; ai > a; ai -= m) {
866 				aim = &ai[m];
867 				if (aim < ai)
868 					break;	/* wraparound */
869 				if (aim->value > ai[0].value ||
870 				    (aim->value == ai[0].value &&
871 					aim->serial > ai[0].serial))
872 					break;
873 				w.value = ai[0].value;
874 				ai[0].value = aim->value;
875 				aim->value = w.value;
876 				w.serial = ai[0].serial;
877 				ai[0].serial = aim->serial;
878 				aim->serial = w.serial;
879 			}
880 		}
881 	}
882 }
883 
884 static void
885 unsort(struct line *f, int l, int *b)
886 {
887 	int *a, i;
888 
889 	a = xcalloc(l + 1, sizeof(*a));
890 	for (i = 1; i <= l; i++)
891 		a[f[i].serial] = f[i].value;
892 	for (i = 1; i <= l; i++)
893 		b[i] = a[i];
894 	xfree(a);
895 }
896 
897 static int
898 skipline(FILE *f)
899 {
900 	int i, c;
901 
902 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
903 		continue;
904 	return (i);
905 }
906 
907 static void
908 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
909 {
910 	int m, i0, i1, j0, j1;
911 
912 	rewind(f1);
913 	rewind(f2);
914 	m = len[0];
915 	J[0] = 0;
916 	J[m + 1] = len[1] + 1;
917 	if (diff_format != D_EDIT) {
918 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
919 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
920 				i0++;
921 			j0 = J[i0 - 1] + 1;
922 			i1 = i0 - 1;
923 			while (i1 < m && J[i1 + 1] == 0)
924 				i1++;
925 			j1 = J[i1 + 1] - 1;
926 			J[i1] = j1;
927 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
928 		}
929 	} else {
930 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
931 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
932 				i0--;
933 			j0 = J[i0 + 1] - 1;
934 			i1 = i0 + 1;
935 			while (i1 > 1 && J[i1 - 1] == 0)
936 				i1--;
937 			j1 = J[i1 - 1] + 1;
938 			J[i1] = j1;
939 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
940 		}
941 	}
942 	if (m == 0)
943 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
944 	if (diff_format == D_IFDEF) {
945 		for (;;) {
946 #define	c i0
947 			if ((c = getc(f1)) == EOF)
948 				return;
949 			diff_output("%c", c);
950 		}
951 #undef c
952 	}
953 	if (anychange != 0) {
954 		if (diff_format == D_CONTEXT)
955 			dump_context_vec(f1, f2, flags);
956 		else if (diff_format == D_UNIFIED)
957 			dump_unified_vec(f1, f2, flags);
958 	}
959 }
960 
961 static void
962 range(int a, int b, char *separator)
963 {
964 	diff_output("%d", a > b ? b : a);
965 	if (a < b)
966 		diff_output("%s%d", separator, b);
967 }
968 
969 static void
970 uni_range(int a, int b)
971 {
972 	if (a < b)
973 		diff_output("%d,%d", a, b - a + 1);
974 	else if (a == b)
975 		diff_output("%d", b);
976 	else
977 		diff_output("%d,0", b);
978 }
979 
980 static char *
981 preadline(int fd, size_t rlen, off_t off)
982 {
983 	char *line;
984 	ssize_t nr;
985 
986 	line = xmalloc(rlen + 1);
987 	if ((nr = pread(fd, line, rlen, off)) < 0)
988 		err(2, "preadline");
989 	if (nr > 0 && line[nr-1] == '\n')
990 		nr--;
991 	line[nr] = '\0';
992 	return (line);
993 }
994 
995 static int
996 ignoreline(char *line)
997 {
998 	int ret;
999 
1000 	ret = regexec(&ignore_re, line, 0, NULL, 0);
1001 	xfree(line);
1002 	return (ret == 0);	/* if it matched, it should be ignored. */
1003 }
1004 
1005 /*
1006  * Indicate that there is a difference between lines a and b of the from file
1007  * to get to lines c to d of the to file.  If a is greater then b then there
1008  * are no lines in the from file involved and this means that there were
1009  * lines appended (beginning at b).  If c is greater than d then there are
1010  * lines missing from the to file.
1011  */
1012 static void
1013 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1014     int *pflags)
1015 {
1016 	static size_t max_context = 64;
1017 	int i;
1018 
1019 restart:
1020 	if (diff_format != D_IFDEF && a > b && c > d)
1021 		return;
1022 	if (ignore_pats != NULL) {
1023 		char *line;
1024 		/*
1025 		 * All lines in the change, insert, or delete must
1026 		 * match an ignore pattern for the change to be
1027 		 * ignored.
1028 		 */
1029 		if (a <= b) {		/* Changes and deletes. */
1030 			for (i = a; i <= b; i++) {
1031 				line = preadline(fileno(f1),
1032 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1033 				if (!ignoreline(line))
1034 					goto proceed;
1035 			}
1036 		}
1037 		if (a > b || c <= d) {	/* Changes and inserts. */
1038 			for (i = c; i <= d; i++) {
1039 				line = preadline(fileno(f2),
1040 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1041 				if (!ignoreline(line))
1042 					goto proceed;
1043 			}
1044 		}
1045 		return;
1046 	}
1047 proceed:
1048 	if (*pflags & D_HEADER) {
1049 		diff_output("%s %s %s\n", diffargs, file1, file2);
1050 		*pflags &= ~D_HEADER;
1051 	}
1052 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1053 		/*
1054 		 * Allocate change records as needed.
1055 		 */
1056 		if (context_vec_ptr == context_vec_end - 1) {
1057 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1058 			max_context <<= 1;
1059 			context_vec_start = xrealloc(context_vec_start,
1060 			    max_context, sizeof(*context_vec_start));
1061 			context_vec_end = context_vec_start + max_context;
1062 			context_vec_ptr = context_vec_start + offset;
1063 		}
1064 		if (anychange == 0) {
1065 			/*
1066 			 * Print the context/unidiff header first time through.
1067 			 */
1068 			print_header(file1, file2);
1069 			anychange = 1;
1070 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1071 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1072 			/*
1073 			 * If this change is more than 'diff_context' lines from the
1074 			 * previous change, dump the record and reset it.
1075 			 */
1076 			if (diff_format == D_CONTEXT)
1077 				dump_context_vec(f1, f2, *pflags);
1078 			else
1079 				dump_unified_vec(f1, f2, *pflags);
1080 		}
1081 		context_vec_ptr++;
1082 		context_vec_ptr->a = a;
1083 		context_vec_ptr->b = b;
1084 		context_vec_ptr->c = c;
1085 		context_vec_ptr->d = d;
1086 		return;
1087 	}
1088 	if (anychange == 0)
1089 		anychange = 1;
1090 	switch (diff_format) {
1091 	case D_BRIEF:
1092 		return;
1093 	case D_NORMAL:
1094 	case D_EDIT:
1095 		range(a, b, ",");
1096 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1097 		if (diff_format == D_NORMAL)
1098 			range(c, d, ",");
1099 		diff_output("\n");
1100 		break;
1101 	case D_REVERSE:
1102 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1103 		range(a, b, " ");
1104 		diff_output("\n");
1105 		break;
1106 	case D_NREVERSE:
1107 		if (a > b)
1108 			diff_output("a%d %d\n", b, d - c + 1);
1109 		else {
1110 			diff_output("d%d %d\n", a, b - a + 1);
1111 			if (!(c > d))
1112 				/* add changed lines */
1113 				diff_output("a%d %d\n", b, d - c + 1);
1114 		}
1115 		break;
1116 	}
1117 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1118 		fetch(ixold, a, b, f1, '<', 1, *pflags);
1119 		if (a <= b && c <= d && diff_format == D_NORMAL)
1120 			diff_output("---\n");
1121 	}
1122 	i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1123 	if (i != 0 && diff_format == D_EDIT) {
1124 		/*
1125 		 * A non-zero return value for D_EDIT indicates that the
1126 		 * last line printed was a bare dot (".") that has been
1127 		 * escaped as ".." to prevent ed(1) from misinterpreting
1128 		 * it.  We have to add a substitute command to change this
1129 		 * back and restart where we left off.
1130 		 */
1131 		diff_output(".\n");
1132 		diff_output("%ds/^\\.\\././\n", a);
1133 		a += i;
1134 		c += i;
1135 		goto restart;
1136 	}
1137 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1138 		diff_output(".\n");
1139 	if (inifdef) {
1140 		diff_output("#endif /* %s */\n", ifdefname);
1141 		inifdef = 0;
1142 	}
1143 }
1144 
1145 static int
1146 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1147 {
1148 	int i, j, c, lastc, col, nc;
1149 
1150 	/*
1151 	 * When doing #ifdef's, copy down to current line
1152 	 * if this is the first file, so that stuff makes it to output.
1153 	 */
1154 	if (diff_format == D_IFDEF && oldfile) {
1155 		long curpos = ftell(lb);
1156 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1157 		nc = f[a > b ? b : a - 1] - curpos;
1158 		for (i = 0; i < nc; i++)
1159 			diff_output("%c", getc(lb));
1160 	}
1161 	if (a > b)
1162 		return (0);
1163 	if (diff_format == D_IFDEF) {
1164 		if (inifdef) {
1165 			diff_output("#else /* %s%s */\n",
1166 			    oldfile == 1 ? "!" : "", ifdefname);
1167 		} else {
1168 			if (oldfile)
1169 				diff_output("#ifndef %s\n", ifdefname);
1170 			else
1171 				diff_output("#ifdef %s\n", ifdefname);
1172 		}
1173 		inifdef = 1 + oldfile;
1174 	}
1175 	for (i = a; i <= b; i++) {
1176 		fseek(lb, f[i - 1], SEEK_SET);
1177 		nc = f[i] - f[i - 1];
1178 		if (diff_format != D_IFDEF && ch != '\0') {
1179 			diff_output("%c", ch);
1180 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1181 			    || diff_format == D_UNIFIED))
1182 				diff_output("\t");
1183 			else if (diff_format != D_UNIFIED)
1184 				diff_output(" ");
1185 		}
1186 		col = 0;
1187 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1188 			if ((c = getc(lb)) == EOF) {
1189 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1190 				    diff_format == D_NREVERSE)
1191 					warnx("No newline at end of file");
1192 				else
1193 					diff_output("\n\\ No newline at end of "
1194 					    "file\n");
1195 				return (0);
1196 			}
1197 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1198 				do {
1199 					diff_output(" ");
1200 				} while (++col & 7);
1201 			} else {
1202 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1203 				    && lastc == '.') {
1204 					/*
1205 					 * Don't print a bare "." line
1206 					 * since that will confuse ed(1).
1207 					 * Print ".." instead and return,
1208 					 * giving the caller an offset
1209 					 * from which to restart.
1210 					 */
1211 					diff_output(".\n");
1212 					return (i - a + 1);
1213 				}
1214 				diff_output("%c", c);
1215 				col++;
1216 			}
1217 		}
1218 	}
1219 	return (0);
1220 }
1221 
1222 /*
1223  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1224  */
1225 static int
1226 readhash(FILE *f, int flags)
1227 {
1228 	int i, t, space;
1229 	int sum;
1230 
1231 	sum = 1;
1232 	space = 0;
1233 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1234 		if (flags & D_IGNORECASE)
1235 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1236 				if (t == EOF) {
1237 					if (i == 0)
1238 						return (0);
1239 					break;
1240 				}
1241 				sum = sum * 127 + chrtran[t];
1242 			}
1243 		else
1244 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1245 				if (t == EOF) {
1246 					if (i == 0)
1247 						return (0);
1248 					break;
1249 				}
1250 				sum = sum * 127 + t;
1251 			}
1252 	} else {
1253 		for (i = 0;;) {
1254 			switch (t = getc(f)) {
1255 			case '\t':
1256 			case '\r':
1257 			case '\v':
1258 			case '\f':
1259 			case ' ':
1260 				space++;
1261 				continue;
1262 			default:
1263 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1264 					i++;
1265 					space = 0;
1266 				}
1267 				sum = sum * 127 + chrtran[t];
1268 				i++;
1269 				continue;
1270 			case EOF:
1271 				if (i == 0)
1272 					return (0);
1273 				/* FALLTHROUGH */
1274 			case '\n':
1275 				break;
1276 			}
1277 			break;
1278 		}
1279 	}
1280 	/*
1281 	 * There is a remote possibility that we end up with a zero sum.
1282 	 * Zero is used as an EOF marker, so return 1 instead.
1283 	 */
1284 	return (sum == 0 ? 1 : sum);
1285 }
1286 
1287 static int
1288 asciifile(FILE *f)
1289 {
1290 	unsigned char buf[BUFSIZ];
1291 	size_t i, cnt;
1292 
1293 	if (f == NULL)
1294 		return (1);
1295 
1296 	rewind(f);
1297 	cnt = fread(buf, 1, sizeof(buf), f);
1298 	for (i = 0; i < cnt; i++)
1299 		if (!isprint(buf[i]) && !isspace(buf[i]))
1300 			return (0);
1301 	return (1);
1302 }
1303 
1304 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1305 
1306 static char *
1307 match_function(const long *f, int pos, FILE *fp)
1308 {
1309 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1310 	size_t nc;
1311 	int last = lastline;
1312 	char *state = NULL;
1313 
1314 	lastline = pos;
1315 	while (pos > last) {
1316 		fseek(fp, f[pos - 1], SEEK_SET);
1317 		nc = f[pos] - f[pos - 1];
1318 		if (nc >= sizeof(buf))
1319 			nc = sizeof(buf) - 1;
1320 		nc = fread(buf, 1, nc, fp);
1321 		if (nc > 0) {
1322 			buf[nc] = '\0';
1323 			buf[strcspn(buf, "\n")] = '\0';
1324 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1325 				if (begins_with(buf, "private:")) {
1326 					if (!state)
1327 						state = " (private)";
1328 				} else if (begins_with(buf, "protected:")) {
1329 					if (!state)
1330 						state = " (protected)";
1331 				} else if (begins_with(buf, "public:")) {
1332 					if (!state)
1333 						state = " (public)";
1334 				} else {
1335 					strlcpy(lastbuf, buf, sizeof lastbuf);
1336 					if (state)
1337 						strlcat(lastbuf, state,
1338 						    sizeof lastbuf);
1339 					lastmatchline = pos;
1340 					return lastbuf;
1341 				}
1342 			}
1343 		}
1344 		pos--;
1345 	}
1346 	return lastmatchline > 0 ? lastbuf : NULL;
1347 }
1348 
1349 /* dump accumulated "context" diff changes */
1350 static void
1351 dump_context_vec(FILE *f1, FILE *f2, int flags)
1352 {
1353 	struct context_vec *cvp = context_vec_start;
1354 	int lowa, upb, lowc, upd, do_output;
1355 	int a, b, c, d;
1356 	char ch, *f;
1357 
1358 	if (context_vec_start > context_vec_ptr)
1359 		return;
1360 
1361 	b = d = 0;		/* gcc */
1362 	lowa = MAX(1, cvp->a - diff_context);
1363 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1364 	lowc = MAX(1, cvp->c - diff_context);
1365 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1366 
1367 	diff_output("***************");
1368 	if ((flags & D_PROTOTYPE)) {
1369 		f = match_function(ixold, lowa-1, f1);
1370 		if (f != NULL)
1371 			diff_output(" %s", f);
1372 	}
1373 	diff_output("\n*** ");
1374 	range(lowa, upb, ",");
1375 	diff_output(" ****\n");
1376 
1377 	/*
1378 	 * Output changes to the "old" file.  The first loop suppresses
1379 	 * output if there were no changes to the "old" file (we'll see
1380 	 * the "old" lines as context in the "new" list).
1381 	 */
1382 	do_output = 0;
1383 	for (; cvp <= context_vec_ptr; cvp++)
1384 		if (cvp->a <= cvp->b) {
1385 			cvp = context_vec_start;
1386 			do_output++;
1387 			break;
1388 		}
1389 	if (do_output) {
1390 		while (cvp <= context_vec_ptr) {
1391 			a = cvp->a;
1392 			b = cvp->b;
1393 			c = cvp->c;
1394 			d = cvp->d;
1395 
1396 			if (a <= b && c <= d)
1397 				ch = 'c';
1398 			else
1399 				ch = (a <= b) ? 'd' : 'a';
1400 
1401 			if (ch == 'a')
1402 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1403 			else {
1404 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1405 				fetch(ixold, a, b, f1,
1406 				    ch == 'c' ? '!' : '-', 0, flags);
1407 			}
1408 			lowa = b + 1;
1409 			cvp++;
1410 		}
1411 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1412 	}
1413 	/* output changes to the "new" file */
1414 	diff_output("--- ");
1415 	range(lowc, upd, ",");
1416 	diff_output(" ----\n");
1417 
1418 	do_output = 0;
1419 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1420 		if (cvp->c <= cvp->d) {
1421 			cvp = context_vec_start;
1422 			do_output++;
1423 			break;
1424 		}
1425 	if (do_output) {
1426 		while (cvp <= context_vec_ptr) {
1427 			a = cvp->a;
1428 			b = cvp->b;
1429 			c = cvp->c;
1430 			d = cvp->d;
1431 
1432 			if (a <= b && c <= d)
1433 				ch = 'c';
1434 			else
1435 				ch = (a <= b) ? 'd' : 'a';
1436 
1437 			if (ch == 'd')
1438 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1439 			else {
1440 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1441 				fetch(ixnew, c, d, f2,
1442 				    ch == 'c' ? '!' : '+', 0, flags);
1443 			}
1444 			lowc = d + 1;
1445 			cvp++;
1446 		}
1447 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1448 	}
1449 	context_vec_ptr = context_vec_start - 1;
1450 }
1451 
1452 /* dump accumulated "unified" diff changes */
1453 static void
1454 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1455 {
1456 	struct context_vec *cvp = context_vec_start;
1457 	int lowa, upb, lowc, upd;
1458 	int a, b, c, d;
1459 	char ch, *f;
1460 
1461 	if (context_vec_start > context_vec_ptr)
1462 		return;
1463 
1464 	b = d = 0;		/* gcc */
1465 	lowa = MAX(1, cvp->a - diff_context);
1466 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1467 	lowc = MAX(1, cvp->c - diff_context);
1468 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1469 
1470 	diff_output("@@ -");
1471 	uni_range(lowa, upb);
1472 	diff_output(" +");
1473 	uni_range(lowc, upd);
1474 	diff_output(" @@");
1475 	if ((flags & D_PROTOTYPE)) {
1476 		f = match_function(ixold, lowa-1, f1);
1477 		if (f != NULL)
1478 			diff_output(" %s", f);
1479 	}
1480 	diff_output("\n");
1481 
1482 	/*
1483 	 * Output changes in "unified" diff format--the old and new lines
1484 	 * are printed together.
1485 	 */
1486 	for (; cvp <= context_vec_ptr; cvp++) {
1487 		a = cvp->a;
1488 		b = cvp->b;
1489 		c = cvp->c;
1490 		d = cvp->d;
1491 
1492 		/*
1493 		 * c: both new and old changes
1494 		 * d: only changes in the old file
1495 		 * a: only changes in the new file
1496 		 */
1497 		if (a <= b && c <= d)
1498 			ch = 'c';
1499 		else
1500 			ch = (a <= b) ? 'd' : 'a';
1501 
1502 		switch (ch) {
1503 		case 'c':
1504 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1505 			fetch(ixold, a, b, f1, '-', 0, flags);
1506 			fetch(ixnew, c, d, f2, '+', 0, flags);
1507 			break;
1508 		case 'd':
1509 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1510 			fetch(ixold, a, b, f1, '-', 0, flags);
1511 			break;
1512 		case 'a':
1513 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1514 			fetch(ixnew, c, d, f2, '+', 0, flags);
1515 			break;
1516 		}
1517 		lowa = b + 1;
1518 		lowc = d + 1;
1519 	}
1520 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1521 
1522 	context_vec_ptr = context_vec_start - 1;
1523 }
1524 
1525 static void
1526 print_header(const char *file1, const char *file2)
1527 {
1528 	if (label[0] != NULL)
1529 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1530 		    label[0]);
1531 	else
1532 		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
1533 		    file1, ctime(&stb1.st_mtime));
1534 	if (label[1] != NULL)
1535 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1536 		    label[1]);
1537 	else
1538 		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
1539 		    file2, ctime(&stb2.st_mtime));
1540 }
1541