xref: /openbsd-src/usr.bin/diff/diffreg.c (revision 91f110e064cd7c194e59e019b83bb7496c1c84d4)
1 /*	$OpenBSD: diffreg.c,v 1.82 2012/07/08 15:48:56 stsp Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/param.h>
68 #include <sys/stat.h>
69 #include <sys/wait.h>
70 
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <stddef.h>
76 #include <stdio.h>
77 #include <stdlib.h>
78 #include <string.h>
79 #include <unistd.h>
80 
81 #include "diff.h"
82 #include "pathnames.h"
83 #include "xmalloc.h"
84 
85 /*
86  * diff - compare two files.
87  */
88 
89 /*
90  *	Uses an algorithm due to Harold Stone, which finds
91  *	a pair of longest identical subsequences in the two
92  *	files.
93  *
94  *	The major goal is to generate the match vector J.
95  *	J[i] is the index of the line in file1 corresponding
96  *	to line i file0. J[i] = 0 if there is no
97  *	such line in file1.
98  *
99  *	Lines are hashed so as to work in core. All potential
100  *	matches are located by sorting the lines of each file
101  *	on the hash (called ``value''). In particular, this
102  *	collects the equivalence classes in file1 together.
103  *	Subroutine equiv replaces the value of each line in
104  *	file0 by the index of the first element of its
105  *	matching equivalence in (the reordered) file1.
106  *	To save space equiv squeezes file1 into a single
107  *	array member in which the equivalence classes
108  *	are simply concatenated, except that their first
109  *	members are flagged by changing sign.
110  *
111  *	Next the indices that point into member are unsorted into
112  *	array class according to the original order of file0.
113  *
114  *	The cleverness lies in routine stone. This marches
115  *	through the lines of file0, developing a vector klist
116  *	of "k-candidates". At step i a k-candidate is a matched
117  *	pair of lines x,y (x in file0 y in file1) such that
118  *	there is a common subsequence of length k
119  *	between the first i lines of file0 and the first y
120  *	lines of file1, but there is no such subsequence for
121  *	any smaller y. x is the earliest possible mate to y
122  *	that occurs in such a subsequence.
123  *
124  *	Whenever any of the members of the equivalence class of
125  *	lines in file1 matable to a line in file0 has serial number
126  *	less than the y of some k-candidate, that k-candidate
127  *	with the smallest such y is replaced. The new
128  *	k-candidate is chained (via pred) to the current
129  *	k-1 candidate so that the actual subsequence can
130  *	be recovered. When a member has serial number greater
131  *	that the y of all k-candidates, the klist is extended.
132  *	At the end, the longest subsequence is pulled out
133  *	and placed in the array J by unravel
134  *
135  *	With J in hand, the matches there recorded are
136  *	check'ed against reality to assure that no spurious
137  *	matches have crept in due to hashing. If they have,
138  *	they are broken, and "jackpot" is recorded--a harmless
139  *	matter except that a true match for a spuriously
140  *	mated line may now be unnecessarily reported as a change.
141  *
142  *	Much of the complexity of the program comes simply
143  *	from trying to minimize core utilization and
144  *	maximize the range of doable problems by dynamically
145  *	allocating what is needed and reusing what is not.
146  *	The core requirements for problems larger than somewhat
147  *	are (in words) 2*length(file0) + length(file1) +
148  *	3*(number of k-candidates installed),  typically about
149  *	6n words for files of length n.
150  */
151 
152 struct cand {
153 	int	x;
154 	int	y;
155 	int	pred;
156 };
157 
158 struct line {
159 	int	serial;
160 	int	value;
161 } *file[2];
162 
163 /*
164  * The following struct is used to record change information when
165  * doing a "context" or "unified" diff.  (see routine "change" to
166  * understand the highly mnemonic field names)
167  */
168 struct context_vec {
169 	int	a;		/* start line in old file */
170 	int	b;		/* end line in old file */
171 	int	c;		/* start line in new file */
172 	int	d;		/* end line in new file */
173 };
174 
175 #define	diff_output	printf
176 static FILE	*opentemp(const char *);
177 static void	 output(char *, FILE *, char *, FILE *, int);
178 static void	 check(FILE *, FILE *, int);
179 static void	 range(int, int, char *);
180 static void	 uni_range(int, int);
181 static void	 dump_context_vec(FILE *, FILE *, int);
182 static void	 dump_unified_vec(FILE *, FILE *, int);
183 static void	 prepare(int, FILE *, off_t, int);
184 static void	 prune(void);
185 static void	 equiv(struct line *, int, struct line *, int, int *);
186 static void	 unravel(int);
187 static void	 unsort(struct line *, int, int *);
188 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
189 static void	 sort(struct line *, int);
190 static void	 print_header(const char *, const char *);
191 static int	 ignoreline(char *);
192 static int	 asciifile(FILE *);
193 static int	 fetch(long *, int, int, FILE *, int, int, int);
194 static int	 newcand(int, int, int);
195 static int	 search(int *, int, int);
196 static int	 skipline(FILE *);
197 static int	 isqrt(int);
198 static int	 stone(int *, int, int *, int *, int);
199 static int	 readhash(FILE *, int);
200 static int	 files_differ(FILE *, FILE *, int);
201 static char	*match_function(const long *, int, FILE *);
202 static char	*preadline(int, size_t, off_t);
203 
204 static int  *J;			/* will be overlaid on class */
205 static int  *class;		/* will be overlaid on file[0] */
206 static int  *klist;		/* will be overlaid on file[0] after class */
207 static int  *member;		/* will be overlaid on file[1] */
208 static int   clen;
209 static int   inifdef;		/* whether or not we are in a #ifdef block */
210 static int   len[2];
211 static int   pref, suff;	/* length of prefix and suffix */
212 static int   slen[2];
213 static int   anychange;
214 static long *ixnew;		/* will be overlaid on file[1] */
215 static long *ixold;		/* will be overlaid on klist */
216 static struct cand *clist;	/* merely a free storage pot for candidates */
217 static int   clistlen;		/* the length of clist */
218 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
219 static u_char *chrtran;		/* translation table for case-folding */
220 static struct context_vec *context_vec_start;
221 static struct context_vec *context_vec_end;
222 static struct context_vec *context_vec_ptr;
223 
224 #define FUNCTION_CONTEXT_SIZE	55
225 static char lastbuf[FUNCTION_CONTEXT_SIZE];
226 static int lastline;
227 static int lastmatchline;
228 
229 
230 /*
231  * chrtran points to one of 2 translation tables: cup2low if folding upper to
232  * lower case clow2low if not folding case
233  */
234 u_char clow2low[256] = {
235 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
236 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
237 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
238 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
239 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
240 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
241 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
242 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
243 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
244 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
245 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
246 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
247 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
248 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
249 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
250 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
251 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
252 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
253 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
254 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
255 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
256 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
257 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
258 	0xfd, 0xfe, 0xff
259 };
260 
261 u_char cup2low[256] = {
262 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
263 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
264 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
265 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
266 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
267 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
268 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
269 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
270 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
271 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
272 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
273 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
274 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
275 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
276 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
277 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
278 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
279 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
280 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
281 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
282 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
283 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
284 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
285 	0xfd, 0xfe, 0xff
286 };
287 
288 int
289 diffreg(char *file1, char *file2, int flags)
290 {
291 	FILE *f1, *f2;
292 	int i, rval, ostdout = -1;
293 	pid_t pid = -1;
294 
295 	f1 = f2 = NULL;
296 	rval = D_SAME;
297 	anychange = 0;
298 	lastline = 0;
299 	lastmatchline = 0;
300 	context_vec_ptr = context_vec_start - 1;
301 	if (flags & D_IGNORECASE)
302 		chrtran = cup2low;
303 	else
304 		chrtran = clow2low;
305 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
306 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
307 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
308 		goto closem;
309 
310 	if (flags & D_EMPTY1)
311 		f1 = fopen(_PATH_DEVNULL, "r");
312 	else {
313 		if (!S_ISREG(stb1.st_mode)) {
314 			if ((f1 = opentemp(file1)) == NULL ||
315 			    fstat(fileno(f1), &stb1) < 0) {
316 				warn("%s", file1);
317 				status |= 2;
318 				goto closem;
319 			}
320 		} else if (strcmp(file1, "-") == 0)
321 			f1 = stdin;
322 		else
323 			f1 = fopen(file1, "r");
324 	}
325 	if (f1 == NULL) {
326 		warn("%s", file1);
327 		status |= 2;
328 		goto closem;
329 	}
330 
331 	if (flags & D_EMPTY2)
332 		f2 = fopen(_PATH_DEVNULL, "r");
333 	else {
334 		if (!S_ISREG(stb2.st_mode)) {
335 			if ((f2 = opentemp(file2)) == NULL ||
336 			    fstat(fileno(f2), &stb2) < 0) {
337 				warn("%s", file2);
338 				status |= 2;
339 				goto closem;
340 			}
341 		} else if (strcmp(file2, "-") == 0)
342 			f2 = stdin;
343 		else
344 			f2 = fopen(file2, "r");
345 	}
346 	if (f2 == NULL) {
347 		warn("%s", file2);
348 		status |= 2;
349 		goto closem;
350 	}
351 
352 	switch (files_differ(f1, f2, flags)) {
353 	case 0:
354 		goto closem;
355 	case 1:
356 		break;
357 	default:
358 		/* error */
359 		status |= 2;
360 		goto closem;
361 	}
362 
363 	if ((flags & D_FORCEASCII) == 0 &&
364 	    (!asciifile(f1) || !asciifile(f2))) {
365 		rval = D_BINARY;
366 		status |= 1;
367 		goto closem;
368 	}
369 	if (lflag) {
370 		/* redirect stdout to pr */
371 		int pfd[2];
372 		char *header;
373 		char *prargv[] = { "pr", "-h", NULL, "-f", NULL };
374 
375 		xasprintf(&header, "%s %s %s", diffargs, file1, file2);
376 		prargv[2] = header;
377 		fflush(stdout);
378 		rewind(stdout);
379 		pipe(pfd);
380 		switch ((pid = fork())) {
381 		case -1:
382 			warnx("No more processes");
383 			status |= 2;
384 			xfree(header);
385 			rval = D_ERROR;
386 			goto closem;
387 		case 0:
388 			/* child */
389 			if (pfd[0] != STDIN_FILENO) {
390 				dup2(pfd[0], STDIN_FILENO);
391 				close(pfd[0]);
392 			}
393 			close(pfd[1]);
394 			execv(_PATH_PR, prargv);
395 			_exit(127);
396 		default:
397 			/* parent */
398 			if (pfd[1] != STDOUT_FILENO) {
399 				ostdout = dup(STDOUT_FILENO);
400 				dup2(pfd[1], STDOUT_FILENO);
401 				close(pfd[1]);
402 			}
403 			close(pfd[0]);
404 			rewind(stdout);
405 			xfree(header);
406 		}
407 	}
408 	prepare(0, f1, stb1.st_size, flags);
409 	prepare(1, f2, stb2.st_size, flags);
410 
411 	prune();
412 	sort(sfile[0], slen[0]);
413 	sort(sfile[1], slen[1]);
414 
415 	member = (int *)file[1];
416 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
417 	member = xrealloc(member, slen[1] + 2, sizeof(*member));
418 
419 	class = (int *)file[0];
420 	unsort(sfile[0], slen[0], class);
421 	class = xrealloc(class, slen[0] + 2, sizeof(*class));
422 
423 	klist = xcalloc(slen[0] + 2, sizeof(*klist));
424 	clen = 0;
425 	clistlen = 100;
426 	clist = xcalloc(clistlen, sizeof(*clist));
427 	i = stone(class, slen[0], member, klist, flags);
428 	xfree(member);
429 	xfree(class);
430 
431 	J = xrealloc(J, len[0] + 2, sizeof(*J));
432 	unravel(klist[i]);
433 	xfree(clist);
434 	xfree(klist);
435 
436 	ixold = xrealloc(ixold, len[0] + 2, sizeof(*ixold));
437 	ixnew = xrealloc(ixnew, len[1] + 2, sizeof(*ixnew));
438 	check(f1, f2, flags);
439 	output(file1, f1, file2, f2, flags);
440 	if (ostdout != -1) {
441 		int wstatus;
442 
443 		/* close the pipe to pr and restore stdout */
444 		fflush(stdout);
445 		rewind(stdout);
446 		if (ostdout != STDOUT_FILENO) {
447 			close(STDOUT_FILENO);
448 			dup2(ostdout, STDOUT_FILENO);
449 			close(ostdout);
450 		}
451 		waitpid(pid, &wstatus, 0);
452 	}
453 closem:
454 	if (anychange) {
455 		status |= 1;
456 		if (rval == D_SAME)
457 			rval = D_DIFFER;
458 	}
459 	if (f1 != NULL)
460 		fclose(f1);
461 	if (f2 != NULL)
462 		fclose(f2);
463 
464 	return (rval);
465 }
466 
467 /*
468  * Check to see if the given files differ.
469  * Returns 0 if they are the same, 1 if different, and -1 on error.
470  * XXX - could use code from cmp(1) [faster]
471  */
472 static int
473 files_differ(FILE *f1, FILE *f2, int flags)
474 {
475 	char buf1[BUFSIZ], buf2[BUFSIZ];
476 	size_t i, j;
477 
478 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
479 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
480 		return (1);
481 	for (;;) {
482 		i = fread(buf1, 1, sizeof(buf1), f1);
483 		j = fread(buf2, 1, sizeof(buf2), f2);
484 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
485 			return (-1);
486 		if (i != j)
487 			return (1);
488 		if (i == 0)
489 			return (0);
490 		if (memcmp(buf1, buf2, i) != 0)
491 			return (1);
492 	}
493 }
494 
495 static FILE *
496 opentemp(const char *file)
497 {
498 	char buf[BUFSIZ], *tempdir, tempfile[MAXPATHLEN];
499 	ssize_t nread;
500 	int ifd, ofd;
501 
502 	if (strcmp(file, "-") == 0)
503 		ifd = STDIN_FILENO;
504 	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
505 		return (NULL);
506 
507 	if ((tempdir = getenv("TMPDIR")) == NULL)
508 		tempdir = _PATH_TMP;
509 
510 	if (strlcpy(tempfile, tempdir, sizeof(tempfile)) >= sizeof(tempfile) ||
511 	    strlcat(tempfile, "/diff.XXXXXXXX", sizeof(tempfile)) >=
512 	    sizeof(tempfile)) {
513 		close(ifd);
514 		errno = ENAMETOOLONG;
515 		return (NULL);
516 	}
517 
518 	if ((ofd = mkstemp(tempfile)) < 0) {
519 		close(ifd);
520 		return (NULL);
521 	}
522 	unlink(tempfile);
523 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
524 		if (write(ofd, buf, nread) != nread) {
525 			close(ifd);
526 			close(ofd);
527 			return (NULL);
528 		}
529 	}
530 	close(ifd);
531 	lseek(ofd, (off_t)0, SEEK_SET);
532 	return (fdopen(ofd, "r"));
533 }
534 
535 char *
536 splice(char *dir, char *file)
537 {
538 	char *tail, *buf;
539 	size_t dirlen;
540 
541 	dirlen = strlen(dir);
542 	while (dirlen != 0 && dir[dirlen - 1] == '/')
543 	    dirlen--;
544 	if ((tail = strrchr(file, '/')) == NULL)
545 		tail = file;
546 	else
547 		tail++;
548 	xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
549 	return (buf);
550 }
551 
552 static void
553 prepare(int i, FILE *fd, off_t filesize, int flags)
554 {
555 	struct line *p;
556 	int j, h;
557 	size_t sz;
558 
559 	rewind(fd);
560 
561 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
562 	if (sz < 100)
563 		sz = 100;
564 
565 	p = xcalloc(sz + 3, sizeof(*p));
566 	for (j = 0; (h = readhash(fd, flags));) {
567 		if (j == sz) {
568 			sz = sz * 3 / 2;
569 			p = xrealloc(p, sz + 3, sizeof(*p));
570 		}
571 		p[++j].value = h;
572 	}
573 	len[i] = j;
574 	file[i] = p;
575 }
576 
577 static void
578 prune(void)
579 {
580 	int i, j;
581 
582 	for (pref = 0; pref < len[0] && pref < len[1] &&
583 	    file[0][pref + 1].value == file[1][pref + 1].value;
584 	    pref++)
585 		;
586 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
587 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
588 	    suff++)
589 		;
590 	for (j = 0; j < 2; j++) {
591 		sfile[j] = file[j] + pref;
592 		slen[j] = len[j] - pref - suff;
593 		for (i = 0; i <= slen[j]; i++)
594 			sfile[j][i].serial = i;
595 	}
596 }
597 
598 static void
599 equiv(struct line *a, int n, struct line *b, int m, int *c)
600 {
601 	int i, j;
602 
603 	i = j = 1;
604 	while (i <= n && j <= m) {
605 		if (a[i].value < b[j].value)
606 			a[i++].value = 0;
607 		else if (a[i].value == b[j].value)
608 			a[i++].value = j;
609 		else
610 			j++;
611 	}
612 	while (i <= n)
613 		a[i++].value = 0;
614 	b[m + 1].value = 0;
615 	j = 0;
616 	while (++j <= m) {
617 		c[j] = -b[j].serial;
618 		while (b[j + 1].value == b[j].value) {
619 			j++;
620 			c[j] = b[j].serial;
621 		}
622 	}
623 	c[j] = -1;
624 }
625 
626 /* Code taken from ping.c */
627 static int
628 isqrt(int n)
629 {
630 	int y, x = 1;
631 
632 	if (n == 0)
633 		return (0);
634 
635 	do { /* newton was a stinker */
636 		y = x;
637 		x = n / x;
638 		x += y;
639 		x /= 2;
640 	} while ((x - y) > 1 || (x - y) < -1);
641 
642 	return (x);
643 }
644 
645 static int
646 stone(int *a, int n, int *b, int *c, int flags)
647 {
648 	int i, k, y, j, l;
649 	int oldc, tc, oldl, sq;
650 	u_int numtries, bound;
651 
652 	if (flags & D_MINIMAL)
653 		bound = UINT_MAX;
654 	else {
655 		sq = isqrt(n);
656 		bound = MAX(256, sq);
657 	}
658 
659 	k = 0;
660 	c[0] = newcand(0, 0, 0);
661 	for (i = 1; i <= n; i++) {
662 		j = a[i];
663 		if (j == 0)
664 			continue;
665 		y = -b[j];
666 		oldl = 0;
667 		oldc = c[0];
668 		numtries = 0;
669 		do {
670 			if (y <= clist[oldc].y)
671 				continue;
672 			l = search(c, k, y);
673 			if (l != oldl + 1)
674 				oldc = c[l - 1];
675 			if (l <= k) {
676 				if (clist[c[l]].y <= y)
677 					continue;
678 				tc = c[l];
679 				c[l] = newcand(i, y, oldc);
680 				oldc = tc;
681 				oldl = l;
682 				numtries++;
683 			} else {
684 				c[l] = newcand(i, y, oldc);
685 				k++;
686 				break;
687 			}
688 		} while ((y = b[++j]) > 0 && numtries < bound);
689 	}
690 	return (k);
691 }
692 
693 static int
694 newcand(int x, int y, int pred)
695 {
696 	struct cand *q;
697 
698 	if (clen == clistlen) {
699 		clistlen = clistlen * 11 / 10;
700 		clist = xrealloc(clist, clistlen, sizeof(*clist));
701 	}
702 	q = clist + clen;
703 	q->x = x;
704 	q->y = y;
705 	q->pred = pred;
706 	return (clen++);
707 }
708 
709 static int
710 search(int *c, int k, int y)
711 {
712 	int i, j, l, t;
713 
714 	if (clist[c[k]].y < y)	/* quick look for typical case */
715 		return (k + 1);
716 	i = 0;
717 	j = k + 1;
718 	for (;;) {
719 		l = (i + j) / 2;
720 		if (l <= i)
721 			break;
722 		t = clist[c[l]].y;
723 		if (t > y)
724 			j = l;
725 		else if (t < y)
726 			i = l;
727 		else
728 			return (l);
729 	}
730 	return (l + 1);
731 }
732 
733 static void
734 unravel(int p)
735 {
736 	struct cand *q;
737 	int i;
738 
739 	for (i = 0; i <= len[0]; i++)
740 		J[i] = i <= pref ? i :
741 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
742 	for (q = clist + p; q->y != 0; q = clist + q->pred)
743 		J[q->x + pref] = q->y + pref;
744 }
745 
746 /*
747  * Check does double duty:
748  *  1.	ferret out any fortuitous correspondences due
749  *	to confounding by hashing (which result in "jackpot")
750  *  2.  collect random access indexes to the two files
751  */
752 static void
753 check(FILE *f1, FILE *f2, int flags)
754 {
755 	int i, j, jackpot, c, d;
756 	long ctold, ctnew;
757 
758 	rewind(f1);
759 	rewind(f2);
760 	j = 1;
761 	ixold[0] = ixnew[0] = 0;
762 	jackpot = 0;
763 	ctold = ctnew = 0;
764 	for (i = 1; i <= len[0]; i++) {
765 		if (J[i] == 0) {
766 			ixold[i] = ctold += skipline(f1);
767 			continue;
768 		}
769 		while (j < J[i]) {
770 			ixnew[j] = ctnew += skipline(f2);
771 			j++;
772 		}
773 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
774 			for (;;) {
775 				c = getc(f1);
776 				d = getc(f2);
777 				/*
778 				 * GNU diff ignores a missing newline
779 				 * in one file for -b or -w.
780 				 */
781 				if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) &&
782 				    ((c == EOF && d == '\n') ||
783 				    (c == '\n' && d == EOF))) {
784 					break;
785 				}
786 				ctold++;
787 				ctnew++;
788 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
789 				    isspace(d)) {
790 					do {
791 						if (c == '\n')
792 							break;
793 						ctold++;
794 					} while (isspace(c = getc(f1)));
795 					do {
796 						if (d == '\n')
797 							break;
798 						ctnew++;
799 					} while (isspace(d = getc(f2)));
800 				} else if ((flags & D_IGNOREBLANKS)) {
801 					while (isspace(c) && c != '\n') {
802 						c = getc(f1);
803 						ctold++;
804 					}
805 					while (isspace(d) && d != '\n') {
806 						d = getc(f2);
807 						ctnew++;
808 					}
809 				}
810 				if (chrtran[c] != chrtran[d]) {
811 					jackpot++;
812 					J[i] = 0;
813 					if (c != '\n' && c != EOF)
814 						ctold += skipline(f1);
815 					if (d != '\n' && c != EOF)
816 						ctnew += skipline(f2);
817 					break;
818 				}
819 				if (c == '\n' || c == EOF)
820 					break;
821 			}
822 		} else {
823 			for (;;) {
824 				ctold++;
825 				ctnew++;
826 				if ((c = getc(f1)) != (d = getc(f2))) {
827 					/* jackpot++; */
828 					J[i] = 0;
829 					if (c != '\n' && c != EOF)
830 						ctold += skipline(f1);
831 					if (d != '\n' && c != EOF)
832 						ctnew += skipline(f2);
833 					break;
834 				}
835 				if (c == '\n' || c == EOF)
836 					break;
837 			}
838 		}
839 		ixold[i] = ctold;
840 		ixnew[j] = ctnew;
841 		j++;
842 	}
843 	for (; j <= len[1]; j++)
844 		ixnew[j] = ctnew += skipline(f2);
845 	/*
846 	 * if (jackpot)
847 	 *	fprintf(stderr, "jackpot\n");
848 	 */
849 }
850 
851 /* shellsort CACM #201 */
852 static void
853 sort(struct line *a, int n)
854 {
855 	struct line *ai, *aim, w;
856 	int j, m = 0, k;
857 
858 	if (n == 0)
859 		return;
860 	for (j = 1; j <= n; j *= 2)
861 		m = 2 * j - 1;
862 	for (m /= 2; m != 0; m /= 2) {
863 		k = n - m;
864 		for (j = 1; j <= k; j++) {
865 			for (ai = &a[j]; ai > a; ai -= m) {
866 				aim = &ai[m];
867 				if (aim < ai)
868 					break;	/* wraparound */
869 				if (aim->value > ai[0].value ||
870 				    (aim->value == ai[0].value &&
871 					aim->serial > ai[0].serial))
872 					break;
873 				w.value = ai[0].value;
874 				ai[0].value = aim->value;
875 				aim->value = w.value;
876 				w.serial = ai[0].serial;
877 				ai[0].serial = aim->serial;
878 				aim->serial = w.serial;
879 			}
880 		}
881 	}
882 }
883 
884 static void
885 unsort(struct line *f, int l, int *b)
886 {
887 	int *a, i;
888 
889 	a = xcalloc(l + 1, sizeof(*a));
890 	for (i = 1; i <= l; i++)
891 		a[f[i].serial] = f[i].value;
892 	for (i = 1; i <= l; i++)
893 		b[i] = a[i];
894 	xfree(a);
895 }
896 
897 static int
898 skipline(FILE *f)
899 {
900 	int i, c;
901 
902 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
903 		continue;
904 	return (i);
905 }
906 
907 static void
908 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
909 {
910 	int m, i0, i1, j0, j1;
911 
912 	rewind(f1);
913 	rewind(f2);
914 	m = len[0];
915 	J[0] = 0;
916 	J[m + 1] = len[1] + 1;
917 	if (diff_format != D_EDIT) {
918 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
919 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
920 				i0++;
921 			j0 = J[i0 - 1] + 1;
922 			i1 = i0 - 1;
923 			while (i1 < m && J[i1 + 1] == 0)
924 				i1++;
925 			j1 = J[i1 + 1] - 1;
926 			J[i1] = j1;
927 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
928 		}
929 	} else {
930 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
931 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
932 				i0--;
933 			j0 = J[i0 + 1] - 1;
934 			i1 = i0 + 1;
935 			while (i1 > 1 && J[i1 - 1] == 0)
936 				i1--;
937 			j1 = J[i1 - 1] + 1;
938 			J[i1] = j1;
939 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
940 		}
941 	}
942 	if (m == 0)
943 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
944 	if (diff_format == D_IFDEF) {
945 		for (;;) {
946 #define	c i0
947 			if ((c = getc(f1)) == EOF)
948 				return;
949 			diff_output("%c", c);
950 		}
951 #undef c
952 	}
953 	if (anychange != 0) {
954 		if (diff_format == D_CONTEXT)
955 			dump_context_vec(f1, f2, flags);
956 		else if (diff_format == D_UNIFIED)
957 			dump_unified_vec(f1, f2, flags);
958 	}
959 }
960 
961 static void
962 range(int a, int b, char *separator)
963 {
964 	diff_output("%d", a > b ? b : a);
965 	if (a < b)
966 		diff_output("%s%d", separator, b);
967 }
968 
969 static void
970 uni_range(int a, int b)
971 {
972 	if (a < b)
973 		diff_output("%d,%d", a, b - a + 1);
974 	else if (a == b)
975 		diff_output("%d", b);
976 	else
977 		diff_output("%d,0", b);
978 }
979 
980 static char *
981 preadline(int fd, size_t rlen, off_t off)
982 {
983 	char *line;
984 	ssize_t nr;
985 
986 	line = xmalloc(rlen + 1);
987 	if ((nr = pread(fd, line, rlen, off)) < 0)
988 		err(2, "preadline");
989 	if (nr > 0 && line[nr-1] == '\n')
990 		nr--;
991 	line[nr] = '\0';
992 	return (line);
993 }
994 
995 static int
996 ignoreline(char *line)
997 {
998 	int ret;
999 
1000 	ret = regexec(&ignore_re, line, 0, NULL, 0);
1001 	xfree(line);
1002 	return (ret == 0);	/* if it matched, it should be ignored. */
1003 }
1004 
1005 /*
1006  * Indicate that there is a difference between lines a and b of the from file
1007  * to get to lines c to d of the to file.  If a is greater then b then there
1008  * are no lines in the from file involved and this means that there were
1009  * lines appended (beginning at b).  If c is greater than d then there are
1010  * lines missing from the to file.
1011  */
1012 static void
1013 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1014     int *pflags)
1015 {
1016 	static size_t max_context = 64;
1017 	int i;
1018 
1019 restart:
1020 	if (diff_format != D_IFDEF && a > b && c > d)
1021 		return;
1022 	if (ignore_pats != NULL) {
1023 		char *line;
1024 		/*
1025 		 * All lines in the change, insert, or delete must
1026 		 * match an ignore pattern for the change to be
1027 		 * ignored.
1028 		 */
1029 		if (a <= b) {		/* Changes and deletes. */
1030 			for (i = a; i <= b; i++) {
1031 				line = preadline(fileno(f1),
1032 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1033 				if (!ignoreline(line))
1034 					goto proceed;
1035 			}
1036 		}
1037 		if (a > b || c <= d) {	/* Changes and inserts. */
1038 			for (i = c; i <= d; i++) {
1039 				line = preadline(fileno(f2),
1040 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1041 				if (!ignoreline(line))
1042 					goto proceed;
1043 			}
1044 		}
1045 		return;
1046 	}
1047 proceed:
1048 	if (*pflags & D_HEADER) {
1049 		diff_output("%s %s %s\n", diffargs, file1, file2);
1050 		*pflags &= ~D_HEADER;
1051 	}
1052 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1053 		/*
1054 		 * Allocate change records as needed.
1055 		 */
1056 		if (context_vec_ptr == context_vec_end - 1) {
1057 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1058 			max_context <<= 1;
1059 			context_vec_start = xrealloc(context_vec_start,
1060 			    max_context, sizeof(*context_vec_start));
1061 			context_vec_end = context_vec_start + max_context;
1062 			context_vec_ptr = context_vec_start + offset;
1063 		}
1064 		if (anychange == 0) {
1065 			/*
1066 			 * Print the context/unidiff header first time through.
1067 			 */
1068 			print_header(file1, file2);
1069 			anychange = 1;
1070 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1071 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1072 			/*
1073 			 * If this change is more than 'diff_context' lines from the
1074 			 * previous change, dump the record and reset it.
1075 			 */
1076 			if (diff_format == D_CONTEXT)
1077 				dump_context_vec(f1, f2, *pflags);
1078 			else
1079 				dump_unified_vec(f1, f2, *pflags);
1080 		}
1081 		context_vec_ptr++;
1082 		context_vec_ptr->a = a;
1083 		context_vec_ptr->b = b;
1084 		context_vec_ptr->c = c;
1085 		context_vec_ptr->d = d;
1086 		return;
1087 	}
1088 	if (anychange == 0)
1089 		anychange = 1;
1090 	switch (diff_format) {
1091 	case D_BRIEF:
1092 		return;
1093 	case D_NORMAL:
1094 	case D_EDIT:
1095 		range(a, b, ",");
1096 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1097 		if (diff_format == D_NORMAL)
1098 			range(c, d, ",");
1099 		diff_output("\n");
1100 		break;
1101 	case D_REVERSE:
1102 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1103 		range(a, b, " ");
1104 		diff_output("\n");
1105 		break;
1106 	case D_NREVERSE:
1107 		if (a > b)
1108 			diff_output("a%d %d\n", b, d - c + 1);
1109 		else {
1110 			diff_output("d%d %d\n", a, b - a + 1);
1111 			if (!(c > d))
1112 				/* add changed lines */
1113 				diff_output("a%d %d\n", b, d - c + 1);
1114 		}
1115 		break;
1116 	}
1117 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1118 		fetch(ixold, a, b, f1, '<', 1, *pflags);
1119 		if (a <= b && c <= d && diff_format == D_NORMAL)
1120 			diff_output("---\n");
1121 	}
1122 	i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1123 	if (i != 0 && diff_format == D_EDIT) {
1124 		/*
1125 		 * A non-zero return value for D_EDIT indicates that the
1126 		 * last line printed was a bare dot (".") that has been
1127 		 * escaped as ".." to prevent ed(1) from misinterpreting
1128 		 * it.  We have to add a substitute command to change this
1129 		 * back and restart where we left off.
1130 		 */
1131 		diff_output(".\n");
1132 		diff_output("%ds/^\\.\\././\n", a);
1133 		a += i;
1134 		c += i;
1135 		goto restart;
1136 	}
1137 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1138 		diff_output(".\n");
1139 	if (inifdef) {
1140 		diff_output("#endif /* %s */\n", ifdefname);
1141 		inifdef = 0;
1142 	}
1143 }
1144 
1145 static int
1146 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1147 {
1148 	int i, j, c, lastc, col, nc;
1149 
1150 	/*
1151 	 * When doing #ifdef's, copy down to current line
1152 	 * if this is the first file, so that stuff makes it to output.
1153 	 */
1154 	if (diff_format == D_IFDEF && oldfile) {
1155 		long curpos = ftell(lb);
1156 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1157 		nc = f[a > b ? b : a - 1] - curpos;
1158 		for (i = 0; i < nc; i++)
1159 			diff_output("%c", getc(lb));
1160 	}
1161 	if (a > b)
1162 		return (0);
1163 	if (diff_format == D_IFDEF) {
1164 		if (inifdef) {
1165 			diff_output("#else /* %s%s */\n",
1166 			    oldfile == 1 ? "!" : "", ifdefname);
1167 		} else {
1168 			if (oldfile)
1169 				diff_output("#ifndef %s\n", ifdefname);
1170 			else
1171 				diff_output("#ifdef %s\n", ifdefname);
1172 		}
1173 		inifdef = 1 + oldfile;
1174 	}
1175 	for (i = a; i <= b; i++) {
1176 		fseek(lb, f[i - 1], SEEK_SET);
1177 		nc = f[i] - f[i - 1];
1178 		if (diff_format != D_IFDEF && ch != '\0') {
1179 			diff_output("%c", ch);
1180 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1181 			    || diff_format == D_UNIFIED))
1182 				diff_output("\t");
1183 			else if (diff_format != D_UNIFIED)
1184 				diff_output(" ");
1185 		}
1186 		col = 0;
1187 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1188 			if ((c = getc(lb)) == EOF) {
1189 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1190 				    diff_format == D_NREVERSE)
1191 					warnx("No newline at end of file");
1192 				else
1193 					diff_output("\n\\ No newline at end of "
1194 					    "file\n");
1195 				return (0);
1196 			}
1197 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1198 				do {
1199 					diff_output(" ");
1200 				} while (++col & 7);
1201 			} else {
1202 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1203 				    && lastc == '.') {
1204 					/*
1205 					 * Don't print a bare "." line
1206 					 * since that will confuse ed(1).
1207 					 * Print ".." instead and return,
1208 					 * giving the caller an offset
1209 					 * from which to restart.
1210 					 */
1211 					diff_output(".\n");
1212 					return (i - a + 1);
1213 				}
1214 				diff_output("%c", c);
1215 				col++;
1216 			}
1217 		}
1218 	}
1219 	return (0);
1220 }
1221 
1222 /*
1223  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1224  */
1225 static int
1226 readhash(FILE *f, int flags)
1227 {
1228 	int i, t, space;
1229 	int sum;
1230 
1231 	sum = 1;
1232 	space = 0;
1233 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1234 		if (flags & D_IGNORECASE)
1235 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1236 				if (t == EOF) {
1237 					if (i == 0)
1238 						return (0);
1239 					break;
1240 				}
1241 				sum = sum * 127 + chrtran[t];
1242 			}
1243 		else
1244 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1245 				if (t == EOF) {
1246 					if (i == 0)
1247 						return (0);
1248 					break;
1249 				}
1250 				sum = sum * 127 + t;
1251 			}
1252 	} else {
1253 		for (i = 0;;) {
1254 			switch (t = getc(f)) {
1255 			case '\t':
1256 			case '\r':
1257 			case '\v':
1258 			case '\f':
1259 			case ' ':
1260 				space++;
1261 				continue;
1262 			default:
1263 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1264 					i++;
1265 					space = 0;
1266 				}
1267 				sum = sum * 127 + chrtran[t];
1268 				i++;
1269 				continue;
1270 			case EOF:
1271 				if (i == 0)
1272 					return (0);
1273 				/* FALLTHROUGH */
1274 			case '\n':
1275 				break;
1276 			}
1277 			break;
1278 		}
1279 	}
1280 	/*
1281 	 * There is a remote possibility that we end up with a zero sum.
1282 	 * Zero is used as an EOF marker, so return 1 instead.
1283 	 */
1284 	return (sum == 0 ? 1 : sum);
1285 }
1286 
1287 static int
1288 asciifile(FILE *f)
1289 {
1290 	unsigned char buf[BUFSIZ];
1291 	size_t cnt;
1292 
1293 	if (f == NULL)
1294 		return (1);
1295 
1296 	rewind(f);
1297 	cnt = fread(buf, 1, sizeof(buf), f);
1298 	return (memchr(buf, '\0', cnt) == NULL);
1299 }
1300 
1301 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1302 
1303 static char *
1304 match_function(const long *f, int pos, FILE *fp)
1305 {
1306 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1307 	size_t nc;
1308 	int last = lastline;
1309 	char *state = NULL;
1310 
1311 	lastline = pos;
1312 	while (pos > last) {
1313 		fseek(fp, f[pos - 1], SEEK_SET);
1314 		nc = f[pos] - f[pos - 1];
1315 		if (nc >= sizeof(buf))
1316 			nc = sizeof(buf) - 1;
1317 		nc = fread(buf, 1, nc, fp);
1318 		if (nc > 0) {
1319 			buf[nc] = '\0';
1320 			buf[strcspn(buf, "\n")] = '\0';
1321 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1322 				if (begins_with(buf, "private:")) {
1323 					if (!state)
1324 						state = " (private)";
1325 				} else if (begins_with(buf, "protected:")) {
1326 					if (!state)
1327 						state = " (protected)";
1328 				} else if (begins_with(buf, "public:")) {
1329 					if (!state)
1330 						state = " (public)";
1331 				} else {
1332 					strlcpy(lastbuf, buf, sizeof lastbuf);
1333 					if (state)
1334 						strlcat(lastbuf, state,
1335 						    sizeof lastbuf);
1336 					lastmatchline = pos;
1337 					return lastbuf;
1338 				}
1339 			}
1340 		}
1341 		pos--;
1342 	}
1343 	return lastmatchline > 0 ? lastbuf : NULL;
1344 }
1345 
1346 /* dump accumulated "context" diff changes */
1347 static void
1348 dump_context_vec(FILE *f1, FILE *f2, int flags)
1349 {
1350 	struct context_vec *cvp = context_vec_start;
1351 	int lowa, upb, lowc, upd, do_output;
1352 	int a, b, c, d;
1353 	char ch, *f;
1354 
1355 	if (context_vec_start > context_vec_ptr)
1356 		return;
1357 
1358 	b = d = 0;		/* gcc */
1359 	lowa = MAX(1, cvp->a - diff_context);
1360 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1361 	lowc = MAX(1, cvp->c - diff_context);
1362 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1363 
1364 	diff_output("***************");
1365 	if ((flags & D_PROTOTYPE)) {
1366 		f = match_function(ixold, lowa-1, f1);
1367 		if (f != NULL)
1368 			diff_output(" %s", f);
1369 	}
1370 	diff_output("\n*** ");
1371 	range(lowa, upb, ",");
1372 	diff_output(" ****\n");
1373 
1374 	/*
1375 	 * Output changes to the "old" file.  The first loop suppresses
1376 	 * output if there were no changes to the "old" file (we'll see
1377 	 * the "old" lines as context in the "new" list).
1378 	 */
1379 	do_output = 0;
1380 	for (; cvp <= context_vec_ptr; cvp++)
1381 		if (cvp->a <= cvp->b) {
1382 			cvp = context_vec_start;
1383 			do_output++;
1384 			break;
1385 		}
1386 	if (do_output) {
1387 		while (cvp <= context_vec_ptr) {
1388 			a = cvp->a;
1389 			b = cvp->b;
1390 			c = cvp->c;
1391 			d = cvp->d;
1392 
1393 			if (a <= b && c <= d)
1394 				ch = 'c';
1395 			else
1396 				ch = (a <= b) ? 'd' : 'a';
1397 
1398 			if (ch == 'a')
1399 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1400 			else {
1401 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1402 				fetch(ixold, a, b, f1,
1403 				    ch == 'c' ? '!' : '-', 0, flags);
1404 			}
1405 			lowa = b + 1;
1406 			cvp++;
1407 		}
1408 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1409 	}
1410 	/* output changes to the "new" file */
1411 	diff_output("--- ");
1412 	range(lowc, upd, ",");
1413 	diff_output(" ----\n");
1414 
1415 	do_output = 0;
1416 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1417 		if (cvp->c <= cvp->d) {
1418 			cvp = context_vec_start;
1419 			do_output++;
1420 			break;
1421 		}
1422 	if (do_output) {
1423 		while (cvp <= context_vec_ptr) {
1424 			a = cvp->a;
1425 			b = cvp->b;
1426 			c = cvp->c;
1427 			d = cvp->d;
1428 
1429 			if (a <= b && c <= d)
1430 				ch = 'c';
1431 			else
1432 				ch = (a <= b) ? 'd' : 'a';
1433 
1434 			if (ch == 'd')
1435 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1436 			else {
1437 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1438 				fetch(ixnew, c, d, f2,
1439 				    ch == 'c' ? '!' : '+', 0, flags);
1440 			}
1441 			lowc = d + 1;
1442 			cvp++;
1443 		}
1444 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1445 	}
1446 	context_vec_ptr = context_vec_start - 1;
1447 }
1448 
1449 /* dump accumulated "unified" diff changes */
1450 static void
1451 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1452 {
1453 	struct context_vec *cvp = context_vec_start;
1454 	int lowa, upb, lowc, upd;
1455 	int a, b, c, d;
1456 	char ch, *f;
1457 
1458 	if (context_vec_start > context_vec_ptr)
1459 		return;
1460 
1461 	b = d = 0;		/* gcc */
1462 	lowa = MAX(1, cvp->a - diff_context);
1463 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1464 	lowc = MAX(1, cvp->c - diff_context);
1465 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1466 
1467 	diff_output("@@ -");
1468 	uni_range(lowa, upb);
1469 	diff_output(" +");
1470 	uni_range(lowc, upd);
1471 	diff_output(" @@");
1472 	if ((flags & D_PROTOTYPE)) {
1473 		f = match_function(ixold, lowa-1, f1);
1474 		if (f != NULL)
1475 			diff_output(" %s", f);
1476 	}
1477 	diff_output("\n");
1478 
1479 	/*
1480 	 * Output changes in "unified" diff format--the old and new lines
1481 	 * are printed together.
1482 	 */
1483 	for (; cvp <= context_vec_ptr; cvp++) {
1484 		a = cvp->a;
1485 		b = cvp->b;
1486 		c = cvp->c;
1487 		d = cvp->d;
1488 
1489 		/*
1490 		 * c: both new and old changes
1491 		 * d: only changes in the old file
1492 		 * a: only changes in the new file
1493 		 */
1494 		if (a <= b && c <= d)
1495 			ch = 'c';
1496 		else
1497 			ch = (a <= b) ? 'd' : 'a';
1498 
1499 		switch (ch) {
1500 		case 'c':
1501 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1502 			fetch(ixold, a, b, f1, '-', 0, flags);
1503 			fetch(ixnew, c, d, f2, '+', 0, flags);
1504 			break;
1505 		case 'd':
1506 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1507 			fetch(ixold, a, b, f1, '-', 0, flags);
1508 			break;
1509 		case 'a':
1510 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1511 			fetch(ixnew, c, d, f2, '+', 0, flags);
1512 			break;
1513 		}
1514 		lowa = b + 1;
1515 		lowc = d + 1;
1516 	}
1517 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1518 
1519 	context_vec_ptr = context_vec_start - 1;
1520 }
1521 
1522 static void
1523 print_header(const char *file1, const char *file2)
1524 {
1525 	if (label[0] != NULL)
1526 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1527 		    label[0]);
1528 	else
1529 		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
1530 		    file1, ctime(&stb1.st_mtime));
1531 	if (label[1] != NULL)
1532 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1533 		    label[1]);
1534 	else
1535 		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
1536 		    file2, ctime(&stb2.st_mtime));
1537 }
1538