xref: /openbsd-src/usr.bin/diff/diffreg.c (revision e9260a2109384575f8cd9fa929a31d4eb3290b65)
1 /*	$OpenBSD: diffreg.c,v 1.85 2015/02/05 12:59:57 millert Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/stat.h>
68 #include <sys/wait.h>
69 
70 #include <ctype.h>
71 #include <err.h>
72 #include <errno.h>
73 #include <fcntl.h>
74 #include <stddef.h>
75 #include <stdint.h>
76 #include <stdio.h>
77 #include <stdlib.h>
78 #include <string.h>
79 #include <unistd.h>
80 #include <limits.h>
81 
82 #include "diff.h"
83 #include "pathnames.h"
84 #include "xmalloc.h"
85 
86 #define MINIMUM(a, b)	(((a) < (b)) ? (a) : (b))
87 #define MAXIMUM(a, b)	(((a) > (b)) ? (a) : (b))
88 
89 /*
90  * diff - compare two files.
91  */
92 
93 /*
94  *	Uses an algorithm due to Harold Stone, which finds
95  *	a pair of longest identical subsequences in the two
96  *	files.
97  *
98  *	The major goal is to generate the match vector J.
99  *	J[i] is the index of the line in file1 corresponding
100  *	to line i file0. J[i] = 0 if there is no
101  *	such line in file1.
102  *
103  *	Lines are hashed so as to work in core. All potential
104  *	matches are located by sorting the lines of each file
105  *	on the hash (called ``value''). In particular, this
106  *	collects the equivalence classes in file1 together.
107  *	Subroutine equiv replaces the value of each line in
108  *	file0 by the index of the first element of its
109  *	matching equivalence in (the reordered) file1.
110  *	To save space equiv squeezes file1 into a single
111  *	array member in which the equivalence classes
112  *	are simply concatenated, except that their first
113  *	members are flagged by changing sign.
114  *
115  *	Next the indices that point into member are unsorted into
116  *	array class according to the original order of file0.
117  *
118  *	The cleverness lies in routine stone. This marches
119  *	through the lines of file0, developing a vector klist
120  *	of "k-candidates". At step i a k-candidate is a matched
121  *	pair of lines x,y (x in file0 y in file1) such that
122  *	there is a common subsequence of length k
123  *	between the first i lines of file0 and the first y
124  *	lines of file1, but there is no such subsequence for
125  *	any smaller y. x is the earliest possible mate to y
126  *	that occurs in such a subsequence.
127  *
128  *	Whenever any of the members of the equivalence class of
129  *	lines in file1 matable to a line in file0 has serial number
130  *	less than the y of some k-candidate, that k-candidate
131  *	with the smallest such y is replaced. The new
132  *	k-candidate is chained (via pred) to the current
133  *	k-1 candidate so that the actual subsequence can
134  *	be recovered. When a member has serial number greater
135  *	that the y of all k-candidates, the klist is extended.
136  *	At the end, the longest subsequence is pulled out
137  *	and placed in the array J by unravel
138  *
139  *	With J in hand, the matches there recorded are
140  *	check'ed against reality to assure that no spurious
141  *	matches have crept in due to hashing. If they have,
142  *	they are broken, and "jackpot" is recorded--a harmless
143  *	matter except that a true match for a spuriously
144  *	mated line may now be unnecessarily reported as a change.
145  *
146  *	Much of the complexity of the program comes simply
147  *	from trying to minimize core utilization and
148  *	maximize the range of doable problems by dynamically
149  *	allocating what is needed and reusing what is not.
150  *	The core requirements for problems larger than somewhat
151  *	are (in words) 2*length(file0) + length(file1) +
152  *	3*(number of k-candidates installed),  typically about
153  *	6n words for files of length n.
154  */
155 
156 struct cand {
157 	int	x;
158 	int	y;
159 	int	pred;
160 };
161 
162 struct line {
163 	int	serial;
164 	int	value;
165 } *file[2];
166 
167 /*
168  * The following struct is used to record change information when
169  * doing a "context" or "unified" diff.  (see routine "change" to
170  * understand the highly mnemonic field names)
171  */
172 struct context_vec {
173 	int	a;		/* start line in old file */
174 	int	b;		/* end line in old file */
175 	int	c;		/* start line in new file */
176 	int	d;		/* end line in new file */
177 };
178 
179 #define	diff_output	printf
180 static FILE	*opentemp(const char *);
181 static void	 output(char *, FILE *, char *, FILE *, int);
182 static void	 check(FILE *, FILE *, int);
183 static void	 range(int, int, char *);
184 static void	 uni_range(int, int);
185 static void	 dump_context_vec(FILE *, FILE *, int);
186 static void	 dump_unified_vec(FILE *, FILE *, int);
187 static void	 prepare(int, FILE *, off_t, int);
188 static void	 prune(void);
189 static void	 equiv(struct line *, int, struct line *, int, int *);
190 static void	 unravel(int);
191 static void	 unsort(struct line *, int, int *);
192 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
193 static void	 sort(struct line *, int);
194 static void	 print_header(const char *, const char *);
195 static int	 ignoreline(char *);
196 static int	 asciifile(FILE *);
197 static int	 fetch(long *, int, int, FILE *, int, int, int);
198 static int	 newcand(int, int, int);
199 static int	 search(int *, int, int);
200 static int	 skipline(FILE *);
201 static int	 isqrt(int);
202 static int	 stone(int *, int, int *, int *, int);
203 static int	 readhash(FILE *, int);
204 static int	 files_differ(FILE *, FILE *, int);
205 static char	*match_function(const long *, int, FILE *);
206 static char	*preadline(int, size_t, off_t);
207 
208 static int  *J;			/* will be overlaid on class */
209 static int  *class;		/* will be overlaid on file[0] */
210 static int  *klist;		/* will be overlaid on file[0] after class */
211 static int  *member;		/* will be overlaid on file[1] */
212 static int   clen;
213 static int   inifdef;		/* whether or not we are in a #ifdef block */
214 static int   len[2];
215 static int   pref, suff;	/* length of prefix and suffix */
216 static int   slen[2];
217 static int   anychange;
218 static long *ixnew;		/* will be overlaid on file[1] */
219 static long *ixold;		/* will be overlaid on klist */
220 static struct cand *clist;	/* merely a free storage pot for candidates */
221 static int   clistlen;		/* the length of clist */
222 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
223 static u_char *chrtran;		/* translation table for case-folding */
224 static struct context_vec *context_vec_start;
225 static struct context_vec *context_vec_end;
226 static struct context_vec *context_vec_ptr;
227 
228 #define FUNCTION_CONTEXT_SIZE	55
229 static char lastbuf[FUNCTION_CONTEXT_SIZE];
230 static int lastline;
231 static int lastmatchline;
232 
233 
234 /*
235  * chrtran points to one of 2 translation tables: cup2low if folding upper to
236  * lower case clow2low if not folding case
237  */
238 u_char clow2low[256] = {
239 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
240 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
241 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
242 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
243 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
244 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
245 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
246 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
247 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
248 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
249 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
250 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
251 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
252 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
253 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
254 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
255 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
256 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
257 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
258 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
259 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
260 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
261 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
262 	0xfd, 0xfe, 0xff
263 };
264 
265 u_char cup2low[256] = {
266 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
267 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
268 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
269 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
270 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
271 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
272 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
273 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
274 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
275 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
276 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
277 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
278 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
279 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
280 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
281 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
282 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
283 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
284 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
285 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
286 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
287 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
288 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
289 	0xfd, 0xfe, 0xff
290 };
291 
292 int
293 diffreg(char *file1, char *file2, int flags)
294 {
295 	FILE *f1, *f2;
296 	int i, rval, ostdout = -1;
297 	pid_t pid = -1;
298 
299 	f1 = f2 = NULL;
300 	rval = D_SAME;
301 	anychange = 0;
302 	lastline = 0;
303 	lastmatchline = 0;
304 	context_vec_ptr = context_vec_start - 1;
305 	if (flags & D_IGNORECASE)
306 		chrtran = cup2low;
307 	else
308 		chrtran = clow2low;
309 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
310 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
311 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
312 		goto closem;
313 
314 	if (flags & D_EMPTY1)
315 		f1 = fopen(_PATH_DEVNULL, "r");
316 	else {
317 		if (!S_ISREG(stb1.st_mode)) {
318 			if ((f1 = opentemp(file1)) == NULL ||
319 			    fstat(fileno(f1), &stb1) < 0) {
320 				warn("%s", file1);
321 				status |= 2;
322 				goto closem;
323 			}
324 		} else if (strcmp(file1, "-") == 0)
325 			f1 = stdin;
326 		else
327 			f1 = fopen(file1, "r");
328 	}
329 	if (f1 == NULL) {
330 		warn("%s", file1);
331 		status |= 2;
332 		goto closem;
333 	}
334 
335 	if (flags & D_EMPTY2)
336 		f2 = fopen(_PATH_DEVNULL, "r");
337 	else {
338 		if (!S_ISREG(stb2.st_mode)) {
339 			if ((f2 = opentemp(file2)) == NULL ||
340 			    fstat(fileno(f2), &stb2) < 0) {
341 				warn("%s", file2);
342 				status |= 2;
343 				goto closem;
344 			}
345 		} else if (strcmp(file2, "-") == 0)
346 			f2 = stdin;
347 		else
348 			f2 = fopen(file2, "r");
349 	}
350 	if (f2 == NULL) {
351 		warn("%s", file2);
352 		status |= 2;
353 		goto closem;
354 	}
355 
356 	switch (files_differ(f1, f2, flags)) {
357 	case 0:
358 		goto closem;
359 	case 1:
360 		break;
361 	default:
362 		/* error */
363 		status |= 2;
364 		goto closem;
365 	}
366 
367 	if ((flags & D_FORCEASCII) == 0 &&
368 	    (!asciifile(f1) || !asciifile(f2))) {
369 		rval = D_BINARY;
370 		status |= 1;
371 		goto closem;
372 	}
373 	if (lflag) {
374 		/* redirect stdout to pr */
375 		int pfd[2];
376 		char *header;
377 		char *prargv[] = { "pr", "-h", NULL, "-f", NULL };
378 
379 		xasprintf(&header, "%s %s %s", diffargs, file1, file2);
380 		prargv[2] = header;
381 		fflush(stdout);
382 		rewind(stdout);
383 		pipe(pfd);
384 		switch ((pid = fork())) {
385 		case -1:
386 			warnx("No more processes");
387 			status |= 2;
388 			xfree(header);
389 			rval = D_ERROR;
390 			goto closem;
391 		case 0:
392 			/* child */
393 			if (pfd[0] != STDIN_FILENO) {
394 				dup2(pfd[0], STDIN_FILENO);
395 				close(pfd[0]);
396 			}
397 			close(pfd[1]);
398 			execv(_PATH_PR, prargv);
399 			_exit(127);
400 		default:
401 			/* parent */
402 			if (pfd[1] != STDOUT_FILENO) {
403 				ostdout = dup(STDOUT_FILENO);
404 				dup2(pfd[1], STDOUT_FILENO);
405 				close(pfd[1]);
406 			}
407 			close(pfd[0]);
408 			rewind(stdout);
409 			xfree(header);
410 		}
411 	}
412 	prepare(0, f1, stb1.st_size, flags);
413 	prepare(1, f2, stb2.st_size, flags);
414 
415 	prune();
416 	sort(sfile[0], slen[0]);
417 	sort(sfile[1], slen[1]);
418 
419 	member = (int *)file[1];
420 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
421 	member = xrealloc(member, slen[1] + 2, sizeof(*member));
422 
423 	class = (int *)file[0];
424 	unsort(sfile[0], slen[0], class);
425 	class = xrealloc(class, slen[0] + 2, sizeof(*class));
426 
427 	klist = xcalloc(slen[0] + 2, sizeof(*klist));
428 	clen = 0;
429 	clistlen = 100;
430 	clist = xcalloc(clistlen, sizeof(*clist));
431 	i = stone(class, slen[0], member, klist, flags);
432 	xfree(member);
433 	xfree(class);
434 
435 	J = xrealloc(J, len[0] + 2, sizeof(*J));
436 	unravel(klist[i]);
437 	xfree(clist);
438 	xfree(klist);
439 
440 	ixold = xrealloc(ixold, len[0] + 2, sizeof(*ixold));
441 	ixnew = xrealloc(ixnew, len[1] + 2, sizeof(*ixnew));
442 	check(f1, f2, flags);
443 	output(file1, f1, file2, f2, flags);
444 	if (ostdout != -1) {
445 		int wstatus;
446 
447 		/* close the pipe to pr and restore stdout */
448 		fflush(stdout);
449 		rewind(stdout);
450 		if (ostdout != STDOUT_FILENO) {
451 			close(STDOUT_FILENO);
452 			dup2(ostdout, STDOUT_FILENO);
453 			close(ostdout);
454 		}
455 		waitpid(pid, &wstatus, 0);
456 	}
457 closem:
458 	if (anychange) {
459 		status |= 1;
460 		if (rval == D_SAME)
461 			rval = D_DIFFER;
462 	}
463 	if (f1 != NULL)
464 		fclose(f1);
465 	if (f2 != NULL)
466 		fclose(f2);
467 
468 	return (rval);
469 }
470 
471 /*
472  * Check to see if the given files differ.
473  * Returns 0 if they are the same, 1 if different, and -1 on error.
474  * XXX - could use code from cmp(1) [faster]
475  */
476 static int
477 files_differ(FILE *f1, FILE *f2, int flags)
478 {
479 	char buf1[BUFSIZ], buf2[BUFSIZ];
480 	size_t i, j;
481 
482 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
483 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
484 		return (1);
485 	for (;;) {
486 		i = fread(buf1, 1, sizeof(buf1), f1);
487 		j = fread(buf2, 1, sizeof(buf2), f2);
488 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
489 			return (-1);
490 		if (i != j)
491 			return (1);
492 		if (i == 0)
493 			return (0);
494 		if (memcmp(buf1, buf2, i) != 0)
495 			return (1);
496 	}
497 }
498 
499 static FILE *
500 opentemp(const char *file)
501 {
502 	char buf[BUFSIZ], *tempdir, tempfile[PATH_MAX];
503 	ssize_t nread;
504 	int ifd, ofd;
505 
506 	if (strcmp(file, "-") == 0)
507 		ifd = STDIN_FILENO;
508 	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
509 		return (NULL);
510 
511 	if ((tempdir = getenv("TMPDIR")) == NULL)
512 		tempdir = _PATH_TMP;
513 
514 	if (strlcpy(tempfile, tempdir, sizeof(tempfile)) >= sizeof(tempfile) ||
515 	    strlcat(tempfile, "/diff.XXXXXXXX", sizeof(tempfile)) >=
516 	    sizeof(tempfile)) {
517 		close(ifd);
518 		errno = ENAMETOOLONG;
519 		return (NULL);
520 	}
521 
522 	if ((ofd = mkstemp(tempfile)) < 0) {
523 		close(ifd);
524 		return (NULL);
525 	}
526 	unlink(tempfile);
527 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
528 		if (write(ofd, buf, nread) != nread) {
529 			close(ifd);
530 			close(ofd);
531 			return (NULL);
532 		}
533 	}
534 	close(ifd);
535 	lseek(ofd, (off_t)0, SEEK_SET);
536 	return (fdopen(ofd, "r"));
537 }
538 
539 char *
540 splice(char *dir, char *file)
541 {
542 	char *tail, *buf;
543 	size_t dirlen;
544 
545 	dirlen = strlen(dir);
546 	while (dirlen != 0 && dir[dirlen - 1] == '/')
547 	    dirlen--;
548 	if ((tail = strrchr(file, '/')) == NULL)
549 		tail = file;
550 	else
551 		tail++;
552 	xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
553 	return (buf);
554 }
555 
556 static void
557 prepare(int i, FILE *fd, off_t filesize, int flags)
558 {
559 	struct line *p;
560 	int j, h;
561 	size_t sz;
562 
563 	rewind(fd);
564 
565 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
566 	if (sz < 100)
567 		sz = 100;
568 
569 	p = xcalloc(sz + 3, sizeof(*p));
570 	for (j = 0; (h = readhash(fd, flags));) {
571 		if (j == sz) {
572 			sz = sz * 3 / 2;
573 			p = xrealloc(p, sz + 3, sizeof(*p));
574 		}
575 		p[++j].value = h;
576 	}
577 	len[i] = j;
578 	file[i] = p;
579 }
580 
581 static void
582 prune(void)
583 {
584 	int i, j;
585 
586 	for (pref = 0; pref < len[0] && pref < len[1] &&
587 	    file[0][pref + 1].value == file[1][pref + 1].value;
588 	    pref++)
589 		;
590 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
591 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
592 	    suff++)
593 		;
594 	for (j = 0; j < 2; j++) {
595 		sfile[j] = file[j] + pref;
596 		slen[j] = len[j] - pref - suff;
597 		for (i = 0; i <= slen[j]; i++)
598 			sfile[j][i].serial = i;
599 	}
600 }
601 
602 static void
603 equiv(struct line *a, int n, struct line *b, int m, int *c)
604 {
605 	int i, j;
606 
607 	i = j = 1;
608 	while (i <= n && j <= m) {
609 		if (a[i].value < b[j].value)
610 			a[i++].value = 0;
611 		else if (a[i].value == b[j].value)
612 			a[i++].value = j;
613 		else
614 			j++;
615 	}
616 	while (i <= n)
617 		a[i++].value = 0;
618 	b[m + 1].value = 0;
619 	j = 0;
620 	while (++j <= m) {
621 		c[j] = -b[j].serial;
622 		while (b[j + 1].value == b[j].value) {
623 			j++;
624 			c[j] = b[j].serial;
625 		}
626 	}
627 	c[j] = -1;
628 }
629 
630 /* Code taken from ping.c */
631 static int
632 isqrt(int n)
633 {
634 	int y, x = 1;
635 
636 	if (n == 0)
637 		return (0);
638 
639 	do { /* newton was a stinker */
640 		y = x;
641 		x = n / x;
642 		x += y;
643 		x /= 2;
644 	} while ((x - y) > 1 || (x - y) < -1);
645 
646 	return (x);
647 }
648 
649 static int
650 stone(int *a, int n, int *b, int *c, int flags)
651 {
652 	int i, k, y, j, l;
653 	int oldc, tc, oldl, sq;
654 	u_int numtries, bound;
655 
656 	if (flags & D_MINIMAL)
657 		bound = UINT_MAX;
658 	else {
659 		sq = isqrt(n);
660 		bound = MAXIMUM(256, sq);
661 	}
662 
663 	k = 0;
664 	c[0] = newcand(0, 0, 0);
665 	for (i = 1; i <= n; i++) {
666 		j = a[i];
667 		if (j == 0)
668 			continue;
669 		y = -b[j];
670 		oldl = 0;
671 		oldc = c[0];
672 		numtries = 0;
673 		do {
674 			if (y <= clist[oldc].y)
675 				continue;
676 			l = search(c, k, y);
677 			if (l != oldl + 1)
678 				oldc = c[l - 1];
679 			if (l <= k) {
680 				if (clist[c[l]].y <= y)
681 					continue;
682 				tc = c[l];
683 				c[l] = newcand(i, y, oldc);
684 				oldc = tc;
685 				oldl = l;
686 				numtries++;
687 			} else {
688 				c[l] = newcand(i, y, oldc);
689 				k++;
690 				break;
691 			}
692 		} while ((y = b[++j]) > 0 && numtries < bound);
693 	}
694 	return (k);
695 }
696 
697 static int
698 newcand(int x, int y, int pred)
699 {
700 	struct cand *q;
701 
702 	if (clen == clistlen) {
703 		clistlen = clistlen * 11 / 10;
704 		clist = xrealloc(clist, clistlen, sizeof(*clist));
705 	}
706 	q = clist + clen;
707 	q->x = x;
708 	q->y = y;
709 	q->pred = pred;
710 	return (clen++);
711 }
712 
713 static int
714 search(int *c, int k, int y)
715 {
716 	int i, j, l, t;
717 
718 	if (clist[c[k]].y < y)	/* quick look for typical case */
719 		return (k + 1);
720 	i = 0;
721 	j = k + 1;
722 	for (;;) {
723 		l = (i + j) / 2;
724 		if (l <= i)
725 			break;
726 		t = clist[c[l]].y;
727 		if (t > y)
728 			j = l;
729 		else if (t < y)
730 			i = l;
731 		else
732 			return (l);
733 	}
734 	return (l + 1);
735 }
736 
737 static void
738 unravel(int p)
739 {
740 	struct cand *q;
741 	int i;
742 
743 	for (i = 0; i <= len[0]; i++)
744 		J[i] = i <= pref ? i :
745 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
746 	for (q = clist + p; q->y != 0; q = clist + q->pred)
747 		J[q->x + pref] = q->y + pref;
748 }
749 
750 /*
751  * Check does double duty:
752  *  1.	ferret out any fortuitous correspondences due
753  *	to confounding by hashing (which result in "jackpot")
754  *  2.  collect random access indexes to the two files
755  */
756 static void
757 check(FILE *f1, FILE *f2, int flags)
758 {
759 	int i, j, jackpot, c, d;
760 	long ctold, ctnew;
761 
762 	rewind(f1);
763 	rewind(f2);
764 	j = 1;
765 	ixold[0] = ixnew[0] = 0;
766 	jackpot = 0;
767 	ctold = ctnew = 0;
768 	for (i = 1; i <= len[0]; i++) {
769 		if (J[i] == 0) {
770 			ixold[i] = ctold += skipline(f1);
771 			continue;
772 		}
773 		while (j < J[i]) {
774 			ixnew[j] = ctnew += skipline(f2);
775 			j++;
776 		}
777 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
778 			for (;;) {
779 				c = getc(f1);
780 				d = getc(f2);
781 				/*
782 				 * GNU diff ignores a missing newline
783 				 * in one file for -b or -w.
784 				 */
785 				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
786 					if (c == EOF && d == '\n') {
787 						ctnew++;
788 						break;
789 					} else if (c == '\n' && d == EOF) {
790 						ctold++;
791 						break;
792 					}
793 				}
794 				ctold++;
795 				ctnew++;
796 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
797 				    isspace(d)) {
798 					do {
799 						if (c == '\n')
800 							break;
801 						ctold++;
802 					} while (isspace(c = getc(f1)));
803 					do {
804 						if (d == '\n')
805 							break;
806 						ctnew++;
807 					} while (isspace(d = getc(f2)));
808 				} else if ((flags & D_IGNOREBLANKS)) {
809 					while (isspace(c) && c != '\n') {
810 						c = getc(f1);
811 						ctold++;
812 					}
813 					while (isspace(d) && d != '\n') {
814 						d = getc(f2);
815 						ctnew++;
816 					}
817 				}
818 				if (chrtran[c] != chrtran[d]) {
819 					jackpot++;
820 					J[i] = 0;
821 					if (c != '\n' && c != EOF)
822 						ctold += skipline(f1);
823 					if (d != '\n' && c != EOF)
824 						ctnew += skipline(f2);
825 					break;
826 				}
827 				if (c == '\n' || c == EOF)
828 					break;
829 			}
830 		} else {
831 			for (;;) {
832 				ctold++;
833 				ctnew++;
834 				if ((c = getc(f1)) != (d = getc(f2))) {
835 					/* jackpot++; */
836 					J[i] = 0;
837 					if (c != '\n' && c != EOF)
838 						ctold += skipline(f1);
839 					if (d != '\n' && c != EOF)
840 						ctnew += skipline(f2);
841 					break;
842 				}
843 				if (c == '\n' || c == EOF)
844 					break;
845 			}
846 		}
847 		ixold[i] = ctold;
848 		ixnew[j] = ctnew;
849 		j++;
850 	}
851 	for (; j <= len[1]; j++)
852 		ixnew[j] = ctnew += skipline(f2);
853 	/*
854 	 * if (jackpot)
855 	 *	fprintf(stderr, "jackpot\n");
856 	 */
857 }
858 
859 /* shellsort CACM #201 */
860 static void
861 sort(struct line *a, int n)
862 {
863 	struct line *ai, *aim, w;
864 	int j, m = 0, k;
865 
866 	if (n == 0)
867 		return;
868 	for (j = 1; j <= n; j *= 2)
869 		m = 2 * j - 1;
870 	for (m /= 2; m != 0; m /= 2) {
871 		k = n - m;
872 		for (j = 1; j <= k; j++) {
873 			for (ai = &a[j]; ai > a; ai -= m) {
874 				aim = &ai[m];
875 				if (aim < ai)
876 					break;	/* wraparound */
877 				if (aim->value > ai[0].value ||
878 				    (aim->value == ai[0].value &&
879 					aim->serial > ai[0].serial))
880 					break;
881 				w.value = ai[0].value;
882 				ai[0].value = aim->value;
883 				aim->value = w.value;
884 				w.serial = ai[0].serial;
885 				ai[0].serial = aim->serial;
886 				aim->serial = w.serial;
887 			}
888 		}
889 	}
890 }
891 
892 static void
893 unsort(struct line *f, int l, int *b)
894 {
895 	int *a, i;
896 
897 	a = xcalloc(l + 1, sizeof(*a));
898 	for (i = 1; i <= l; i++)
899 		a[f[i].serial] = f[i].value;
900 	for (i = 1; i <= l; i++)
901 		b[i] = a[i];
902 	xfree(a);
903 }
904 
905 static int
906 skipline(FILE *f)
907 {
908 	int i, c;
909 
910 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
911 		continue;
912 	return (i);
913 }
914 
915 static void
916 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
917 {
918 	int m, i0, i1, j0, j1;
919 
920 	rewind(f1);
921 	rewind(f2);
922 	m = len[0];
923 	J[0] = 0;
924 	J[m + 1] = len[1] + 1;
925 	if (diff_format != D_EDIT) {
926 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
927 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
928 				i0++;
929 			j0 = J[i0 - 1] + 1;
930 			i1 = i0 - 1;
931 			while (i1 < m && J[i1 + 1] == 0)
932 				i1++;
933 			j1 = J[i1 + 1] - 1;
934 			J[i1] = j1;
935 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
936 		}
937 	} else {
938 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
939 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
940 				i0--;
941 			j0 = J[i0 + 1] - 1;
942 			i1 = i0 + 1;
943 			while (i1 > 1 && J[i1 - 1] == 0)
944 				i1--;
945 			j1 = J[i1 - 1] + 1;
946 			J[i1] = j1;
947 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
948 		}
949 	}
950 	if (m == 0)
951 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
952 	if (diff_format == D_IFDEF) {
953 		for (;;) {
954 #define	c i0
955 			if ((c = getc(f1)) == EOF)
956 				return;
957 			diff_output("%c", c);
958 		}
959 #undef c
960 	}
961 	if (anychange != 0) {
962 		if (diff_format == D_CONTEXT)
963 			dump_context_vec(f1, f2, flags);
964 		else if (diff_format == D_UNIFIED)
965 			dump_unified_vec(f1, f2, flags);
966 	}
967 }
968 
969 static void
970 range(int a, int b, char *separator)
971 {
972 	diff_output("%d", a > b ? b : a);
973 	if (a < b)
974 		diff_output("%s%d", separator, b);
975 }
976 
977 static void
978 uni_range(int a, int b)
979 {
980 	if (a < b)
981 		diff_output("%d,%d", a, b - a + 1);
982 	else if (a == b)
983 		diff_output("%d", b);
984 	else
985 		diff_output("%d,0", b);
986 }
987 
988 static char *
989 preadline(int fd, size_t rlen, off_t off)
990 {
991 	char *line;
992 	ssize_t nr;
993 
994 	line = xmalloc(rlen + 1);
995 	if ((nr = pread(fd, line, rlen, off)) < 0)
996 		err(2, "preadline");
997 	if (nr > 0 && line[nr-1] == '\n')
998 		nr--;
999 	line[nr] = '\0';
1000 	return (line);
1001 }
1002 
1003 static int
1004 ignoreline(char *line)
1005 {
1006 	int ret;
1007 
1008 	ret = regexec(&ignore_re, line, 0, NULL, 0);
1009 	xfree(line);
1010 	return (ret == 0);	/* if it matched, it should be ignored. */
1011 }
1012 
1013 /*
1014  * Indicate that there is a difference between lines a and b of the from file
1015  * to get to lines c to d of the to file.  If a is greater then b then there
1016  * are no lines in the from file involved and this means that there were
1017  * lines appended (beginning at b).  If c is greater than d then there are
1018  * lines missing from the to file.
1019  */
1020 static void
1021 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1022     int *pflags)
1023 {
1024 	static size_t max_context = 64;
1025 	int i;
1026 
1027 restart:
1028 	if (diff_format != D_IFDEF && a > b && c > d)
1029 		return;
1030 	if (ignore_pats != NULL) {
1031 		char *line;
1032 		/*
1033 		 * All lines in the change, insert, or delete must
1034 		 * match an ignore pattern for the change to be
1035 		 * ignored.
1036 		 */
1037 		if (a <= b) {		/* Changes and deletes. */
1038 			for (i = a; i <= b; i++) {
1039 				line = preadline(fileno(f1),
1040 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1041 				if (!ignoreline(line))
1042 					goto proceed;
1043 			}
1044 		}
1045 		if (a > b || c <= d) {	/* Changes and inserts. */
1046 			for (i = c; i <= d; i++) {
1047 				line = preadline(fileno(f2),
1048 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1049 				if (!ignoreline(line))
1050 					goto proceed;
1051 			}
1052 		}
1053 		return;
1054 	}
1055 proceed:
1056 	if (*pflags & D_HEADER) {
1057 		diff_output("%s %s %s\n", diffargs, file1, file2);
1058 		*pflags &= ~D_HEADER;
1059 	}
1060 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1061 		/*
1062 		 * Allocate change records as needed.
1063 		 */
1064 		if (context_vec_ptr == context_vec_end - 1) {
1065 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1066 			max_context <<= 1;
1067 			context_vec_start = xrealloc(context_vec_start,
1068 			    max_context, sizeof(*context_vec_start));
1069 			context_vec_end = context_vec_start + max_context;
1070 			context_vec_ptr = context_vec_start + offset;
1071 		}
1072 		if (anychange == 0) {
1073 			/*
1074 			 * Print the context/unidiff header first time through.
1075 			 */
1076 			print_header(file1, file2);
1077 			anychange = 1;
1078 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1079 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1080 			/*
1081 			 * If this change is more than 'diff_context' lines from the
1082 			 * previous change, dump the record and reset it.
1083 			 */
1084 			if (diff_format == D_CONTEXT)
1085 				dump_context_vec(f1, f2, *pflags);
1086 			else
1087 				dump_unified_vec(f1, f2, *pflags);
1088 		}
1089 		context_vec_ptr++;
1090 		context_vec_ptr->a = a;
1091 		context_vec_ptr->b = b;
1092 		context_vec_ptr->c = c;
1093 		context_vec_ptr->d = d;
1094 		return;
1095 	}
1096 	if (anychange == 0)
1097 		anychange = 1;
1098 	switch (diff_format) {
1099 	case D_BRIEF:
1100 		return;
1101 	case D_NORMAL:
1102 	case D_EDIT:
1103 		range(a, b, ",");
1104 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1105 		if (diff_format == D_NORMAL)
1106 			range(c, d, ",");
1107 		diff_output("\n");
1108 		break;
1109 	case D_REVERSE:
1110 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1111 		range(a, b, " ");
1112 		diff_output("\n");
1113 		break;
1114 	case D_NREVERSE:
1115 		if (a > b)
1116 			diff_output("a%d %d\n", b, d - c + 1);
1117 		else {
1118 			diff_output("d%d %d\n", a, b - a + 1);
1119 			if (!(c > d))
1120 				/* add changed lines */
1121 				diff_output("a%d %d\n", b, d - c + 1);
1122 		}
1123 		break;
1124 	}
1125 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1126 		fetch(ixold, a, b, f1, '<', 1, *pflags);
1127 		if (a <= b && c <= d && diff_format == D_NORMAL)
1128 			diff_output("---\n");
1129 	}
1130 	i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1131 	if (i != 0 && diff_format == D_EDIT) {
1132 		/*
1133 		 * A non-zero return value for D_EDIT indicates that the
1134 		 * last line printed was a bare dot (".") that has been
1135 		 * escaped as ".." to prevent ed(1) from misinterpreting
1136 		 * it.  We have to add a substitute command to change this
1137 		 * back and restart where we left off.
1138 		 */
1139 		diff_output(".\n");
1140 		diff_output("%ds/^\\.\\././\n", a);
1141 		a += i;
1142 		c += i;
1143 		goto restart;
1144 	}
1145 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1146 		diff_output(".\n");
1147 	if (inifdef) {
1148 		diff_output("#endif /* %s */\n", ifdefname);
1149 		inifdef = 0;
1150 	}
1151 }
1152 
1153 static int
1154 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1155 {
1156 	int i, j, c, lastc, col, nc;
1157 
1158 	/*
1159 	 * When doing #ifdef's, copy down to current line
1160 	 * if this is the first file, so that stuff makes it to output.
1161 	 */
1162 	if (diff_format == D_IFDEF && oldfile) {
1163 		long curpos = ftell(lb);
1164 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1165 		nc = f[a > b ? b : a - 1] - curpos;
1166 		for (i = 0; i < nc; i++)
1167 			diff_output("%c", getc(lb));
1168 	}
1169 	if (a > b)
1170 		return (0);
1171 	if (diff_format == D_IFDEF) {
1172 		if (inifdef) {
1173 			diff_output("#else /* %s%s */\n",
1174 			    oldfile == 1 ? "!" : "", ifdefname);
1175 		} else {
1176 			if (oldfile)
1177 				diff_output("#ifndef %s\n", ifdefname);
1178 			else
1179 				diff_output("#ifdef %s\n", ifdefname);
1180 		}
1181 		inifdef = 1 + oldfile;
1182 	}
1183 	for (i = a; i <= b; i++) {
1184 		fseek(lb, f[i - 1], SEEK_SET);
1185 		nc = f[i] - f[i - 1];
1186 		if (diff_format != D_IFDEF && ch != '\0') {
1187 			diff_output("%c", ch);
1188 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1189 			    || diff_format == D_UNIFIED))
1190 				diff_output("\t");
1191 			else if (diff_format != D_UNIFIED)
1192 				diff_output(" ");
1193 		}
1194 		col = 0;
1195 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1196 			if ((c = getc(lb)) == EOF) {
1197 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1198 				    diff_format == D_NREVERSE)
1199 					warnx("No newline at end of file");
1200 				else
1201 					diff_output("\n\\ No newline at end of "
1202 					    "file\n");
1203 				return (0);
1204 			}
1205 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1206 				do {
1207 					diff_output(" ");
1208 				} while (++col & 7);
1209 			} else {
1210 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1211 				    && lastc == '.') {
1212 					/*
1213 					 * Don't print a bare "." line
1214 					 * since that will confuse ed(1).
1215 					 * Print ".." instead and return,
1216 					 * giving the caller an offset
1217 					 * from which to restart.
1218 					 */
1219 					diff_output(".\n");
1220 					return (i - a + 1);
1221 				}
1222 				diff_output("%c", c);
1223 				col++;
1224 			}
1225 		}
1226 	}
1227 	return (0);
1228 }
1229 
1230 /*
1231  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1232  */
1233 static int
1234 readhash(FILE *f, int flags)
1235 {
1236 	int i, t, space;
1237 	int sum;
1238 
1239 	sum = 1;
1240 	space = 0;
1241 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1242 		if (flags & D_IGNORECASE)
1243 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1244 				if (t == EOF) {
1245 					if (i == 0)
1246 						return (0);
1247 					break;
1248 				}
1249 				sum = sum * 127 + chrtran[t];
1250 			}
1251 		else
1252 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1253 				if (t == EOF) {
1254 					if (i == 0)
1255 						return (0);
1256 					break;
1257 				}
1258 				sum = sum * 127 + t;
1259 			}
1260 	} else {
1261 		for (i = 0;;) {
1262 			switch (t = getc(f)) {
1263 			case '\t':
1264 			case '\r':
1265 			case '\v':
1266 			case '\f':
1267 			case ' ':
1268 				space++;
1269 				continue;
1270 			default:
1271 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1272 					i++;
1273 					space = 0;
1274 				}
1275 				sum = sum * 127 + chrtran[t];
1276 				i++;
1277 				continue;
1278 			case EOF:
1279 				if (i == 0)
1280 					return (0);
1281 				/* FALLTHROUGH */
1282 			case '\n':
1283 				break;
1284 			}
1285 			break;
1286 		}
1287 	}
1288 	/*
1289 	 * There is a remote possibility that we end up with a zero sum.
1290 	 * Zero is used as an EOF marker, so return 1 instead.
1291 	 */
1292 	return (sum == 0 ? 1 : sum);
1293 }
1294 
1295 static int
1296 asciifile(FILE *f)
1297 {
1298 	unsigned char buf[BUFSIZ];
1299 	size_t cnt;
1300 
1301 	if (f == NULL)
1302 		return (1);
1303 
1304 	rewind(f);
1305 	cnt = fread(buf, 1, sizeof(buf), f);
1306 	return (memchr(buf, '\0', cnt) == NULL);
1307 }
1308 
1309 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1310 
1311 static char *
1312 match_function(const long *f, int pos, FILE *fp)
1313 {
1314 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1315 	size_t nc;
1316 	int last = lastline;
1317 	char *state = NULL;
1318 
1319 	lastline = pos;
1320 	while (pos > last) {
1321 		fseek(fp, f[pos - 1], SEEK_SET);
1322 		nc = f[pos] - f[pos - 1];
1323 		if (nc >= sizeof(buf))
1324 			nc = sizeof(buf) - 1;
1325 		nc = fread(buf, 1, nc, fp);
1326 		if (nc > 0) {
1327 			buf[nc] = '\0';
1328 			buf[strcspn(buf, "\n")] = '\0';
1329 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1330 				if (begins_with(buf, "private:")) {
1331 					if (!state)
1332 						state = " (private)";
1333 				} else if (begins_with(buf, "protected:")) {
1334 					if (!state)
1335 						state = " (protected)";
1336 				} else if (begins_with(buf, "public:")) {
1337 					if (!state)
1338 						state = " (public)";
1339 				} else {
1340 					strlcpy(lastbuf, buf, sizeof lastbuf);
1341 					if (state)
1342 						strlcat(lastbuf, state,
1343 						    sizeof lastbuf);
1344 					lastmatchline = pos;
1345 					return lastbuf;
1346 				}
1347 			}
1348 		}
1349 		pos--;
1350 	}
1351 	return lastmatchline > 0 ? lastbuf : NULL;
1352 }
1353 
1354 /* dump accumulated "context" diff changes */
1355 static void
1356 dump_context_vec(FILE *f1, FILE *f2, int flags)
1357 {
1358 	struct context_vec *cvp = context_vec_start;
1359 	int lowa, upb, lowc, upd, do_output;
1360 	int a, b, c, d;
1361 	char ch, *f;
1362 
1363 	if (context_vec_start > context_vec_ptr)
1364 		return;
1365 
1366 	b = d = 0;		/* gcc */
1367 	lowa = MAXIMUM(1, cvp->a - diff_context);
1368 	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1369 	lowc = MAXIMUM(1, cvp->c - diff_context);
1370 	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1371 
1372 	diff_output("***************");
1373 	if ((flags & D_PROTOTYPE)) {
1374 		f = match_function(ixold, lowa-1, f1);
1375 		if (f != NULL)
1376 			diff_output(" %s", f);
1377 	}
1378 	diff_output("\n*** ");
1379 	range(lowa, upb, ",");
1380 	diff_output(" ****\n");
1381 
1382 	/*
1383 	 * Output changes to the "old" file.  The first loop suppresses
1384 	 * output if there were no changes to the "old" file (we'll see
1385 	 * the "old" lines as context in the "new" list).
1386 	 */
1387 	do_output = 0;
1388 	for (; cvp <= context_vec_ptr; cvp++)
1389 		if (cvp->a <= cvp->b) {
1390 			cvp = context_vec_start;
1391 			do_output++;
1392 			break;
1393 		}
1394 	if (do_output) {
1395 		while (cvp <= context_vec_ptr) {
1396 			a = cvp->a;
1397 			b = cvp->b;
1398 			c = cvp->c;
1399 			d = cvp->d;
1400 
1401 			if (a <= b && c <= d)
1402 				ch = 'c';
1403 			else
1404 				ch = (a <= b) ? 'd' : 'a';
1405 
1406 			if (ch == 'a')
1407 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1408 			else {
1409 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1410 				fetch(ixold, a, b, f1,
1411 				    ch == 'c' ? '!' : '-', 0, flags);
1412 			}
1413 			lowa = b + 1;
1414 			cvp++;
1415 		}
1416 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1417 	}
1418 	/* output changes to the "new" file */
1419 	diff_output("--- ");
1420 	range(lowc, upd, ",");
1421 	diff_output(" ----\n");
1422 
1423 	do_output = 0;
1424 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1425 		if (cvp->c <= cvp->d) {
1426 			cvp = context_vec_start;
1427 			do_output++;
1428 			break;
1429 		}
1430 	if (do_output) {
1431 		while (cvp <= context_vec_ptr) {
1432 			a = cvp->a;
1433 			b = cvp->b;
1434 			c = cvp->c;
1435 			d = cvp->d;
1436 
1437 			if (a <= b && c <= d)
1438 				ch = 'c';
1439 			else
1440 				ch = (a <= b) ? 'd' : 'a';
1441 
1442 			if (ch == 'd')
1443 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1444 			else {
1445 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1446 				fetch(ixnew, c, d, f2,
1447 				    ch == 'c' ? '!' : '+', 0, flags);
1448 			}
1449 			lowc = d + 1;
1450 			cvp++;
1451 		}
1452 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1453 	}
1454 	context_vec_ptr = context_vec_start - 1;
1455 }
1456 
1457 /* dump accumulated "unified" diff changes */
1458 static void
1459 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1460 {
1461 	struct context_vec *cvp = context_vec_start;
1462 	int lowa, upb, lowc, upd;
1463 	int a, b, c, d;
1464 	char ch, *f;
1465 
1466 	if (context_vec_start > context_vec_ptr)
1467 		return;
1468 
1469 	b = d = 0;		/* gcc */
1470 	lowa = MAXIMUM(1, cvp->a - diff_context);
1471 	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1472 	lowc = MAXIMUM(1, cvp->c - diff_context);
1473 	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1474 
1475 	diff_output("@@ -");
1476 	uni_range(lowa, upb);
1477 	diff_output(" +");
1478 	uni_range(lowc, upd);
1479 	diff_output(" @@");
1480 	if ((flags & D_PROTOTYPE)) {
1481 		f = match_function(ixold, lowa-1, f1);
1482 		if (f != NULL)
1483 			diff_output(" %s", f);
1484 	}
1485 	diff_output("\n");
1486 
1487 	/*
1488 	 * Output changes in "unified" diff format--the old and new lines
1489 	 * are printed together.
1490 	 */
1491 	for (; cvp <= context_vec_ptr; cvp++) {
1492 		a = cvp->a;
1493 		b = cvp->b;
1494 		c = cvp->c;
1495 		d = cvp->d;
1496 
1497 		/*
1498 		 * c: both new and old changes
1499 		 * d: only changes in the old file
1500 		 * a: only changes in the new file
1501 		 */
1502 		if (a <= b && c <= d)
1503 			ch = 'c';
1504 		else
1505 			ch = (a <= b) ? 'd' : 'a';
1506 
1507 		switch (ch) {
1508 		case 'c':
1509 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1510 			fetch(ixold, a, b, f1, '-', 0, flags);
1511 			fetch(ixnew, c, d, f2, '+', 0, flags);
1512 			break;
1513 		case 'd':
1514 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1515 			fetch(ixold, a, b, f1, '-', 0, flags);
1516 			break;
1517 		case 'a':
1518 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1519 			fetch(ixnew, c, d, f2, '+', 0, flags);
1520 			break;
1521 		}
1522 		lowa = b + 1;
1523 		lowc = d + 1;
1524 	}
1525 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1526 
1527 	context_vec_ptr = context_vec_start - 1;
1528 }
1529 
1530 static void
1531 print_header(const char *file1, const char *file2)
1532 {
1533 	if (label[0] != NULL)
1534 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1535 		    label[0]);
1536 	else
1537 		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
1538 		    file1, ctime(&stb1.st_mtime));
1539 	if (label[1] != NULL)
1540 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1541 		    label[1]);
1542 	else
1543 		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
1544 		    file2, ctime(&stb2.st_mtime));
1545 }
1546