xref: /openbsd-src/usr.bin/diff/diffreg.c (revision b9fc9a728fce9c4289b7e9a992665e28d5629a54)
1 /*	$OpenBSD: diffreg.c,v 1.84 2015/01/16 06:40:07 deraadt Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/stat.h>
68 #include <sys/wait.h>
69 
70 #include <ctype.h>
71 #include <err.h>
72 #include <errno.h>
73 #include <fcntl.h>
74 #include <stddef.h>
75 #include <stdio.h>
76 #include <stdlib.h>
77 #include <string.h>
78 #include <unistd.h>
79 #include <limits.h>
80 
81 #include "diff.h"
82 #include "pathnames.h"
83 #include "xmalloc.h"
84 
85 #define MINIMUM(a, b)	(((a) < (b)) ? (a) : (b))
86 #define MAXIMUM(a, b)	(((a) > (b)) ? (a) : (b))
87 
88 /*
89  * diff - compare two files.
90  */
91 
92 /*
93  *	Uses an algorithm due to Harold Stone, which finds
94  *	a pair of longest identical subsequences in the two
95  *	files.
96  *
97  *	The major goal is to generate the match vector J.
98  *	J[i] is the index of the line in file1 corresponding
99  *	to line i file0. J[i] = 0 if there is no
100  *	such line in file1.
101  *
102  *	Lines are hashed so as to work in core. All potential
103  *	matches are located by sorting the lines of each file
104  *	on the hash (called ``value''). In particular, this
105  *	collects the equivalence classes in file1 together.
106  *	Subroutine equiv replaces the value of each line in
107  *	file0 by the index of the first element of its
108  *	matching equivalence in (the reordered) file1.
109  *	To save space equiv squeezes file1 into a single
110  *	array member in which the equivalence classes
111  *	are simply concatenated, except that their first
112  *	members are flagged by changing sign.
113  *
114  *	Next the indices that point into member are unsorted into
115  *	array class according to the original order of file0.
116  *
117  *	The cleverness lies in routine stone. This marches
118  *	through the lines of file0, developing a vector klist
119  *	of "k-candidates". At step i a k-candidate is a matched
120  *	pair of lines x,y (x in file0 y in file1) such that
121  *	there is a common subsequence of length k
122  *	between the first i lines of file0 and the first y
123  *	lines of file1, but there is no such subsequence for
124  *	any smaller y. x is the earliest possible mate to y
125  *	that occurs in such a subsequence.
126  *
127  *	Whenever any of the members of the equivalence class of
128  *	lines in file1 matable to a line in file0 has serial number
129  *	less than the y of some k-candidate, that k-candidate
130  *	with the smallest such y is replaced. The new
131  *	k-candidate is chained (via pred) to the current
132  *	k-1 candidate so that the actual subsequence can
133  *	be recovered. When a member has serial number greater
134  *	that the y of all k-candidates, the klist is extended.
135  *	At the end, the longest subsequence is pulled out
136  *	and placed in the array J by unravel
137  *
138  *	With J in hand, the matches there recorded are
139  *	check'ed against reality to assure that no spurious
140  *	matches have crept in due to hashing. If they have,
141  *	they are broken, and "jackpot" is recorded--a harmless
142  *	matter except that a true match for a spuriously
143  *	mated line may now be unnecessarily reported as a change.
144  *
145  *	Much of the complexity of the program comes simply
146  *	from trying to minimize core utilization and
147  *	maximize the range of doable problems by dynamically
148  *	allocating what is needed and reusing what is not.
149  *	The core requirements for problems larger than somewhat
150  *	are (in words) 2*length(file0) + length(file1) +
151  *	3*(number of k-candidates installed),  typically about
152  *	6n words for files of length n.
153  */
154 
155 struct cand {
156 	int	x;
157 	int	y;
158 	int	pred;
159 };
160 
161 struct line {
162 	int	serial;
163 	int	value;
164 } *file[2];
165 
166 /*
167  * The following struct is used to record change information when
168  * doing a "context" or "unified" diff.  (see routine "change" to
169  * understand the highly mnemonic field names)
170  */
171 struct context_vec {
172 	int	a;		/* start line in old file */
173 	int	b;		/* end line in old file */
174 	int	c;		/* start line in new file */
175 	int	d;		/* end line in new file */
176 };
177 
178 #define	diff_output	printf
179 static FILE	*opentemp(const char *);
180 static void	 output(char *, FILE *, char *, FILE *, int);
181 static void	 check(FILE *, FILE *, int);
182 static void	 range(int, int, char *);
183 static void	 uni_range(int, int);
184 static void	 dump_context_vec(FILE *, FILE *, int);
185 static void	 dump_unified_vec(FILE *, FILE *, int);
186 static void	 prepare(int, FILE *, off_t, int);
187 static void	 prune(void);
188 static void	 equiv(struct line *, int, struct line *, int, int *);
189 static void	 unravel(int);
190 static void	 unsort(struct line *, int, int *);
191 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
192 static void	 sort(struct line *, int);
193 static void	 print_header(const char *, const char *);
194 static int	 ignoreline(char *);
195 static int	 asciifile(FILE *);
196 static int	 fetch(long *, int, int, FILE *, int, int, int);
197 static int	 newcand(int, int, int);
198 static int	 search(int *, int, int);
199 static int	 skipline(FILE *);
200 static int	 isqrt(int);
201 static int	 stone(int *, int, int *, int *, int);
202 static int	 readhash(FILE *, int);
203 static int	 files_differ(FILE *, FILE *, int);
204 static char	*match_function(const long *, int, FILE *);
205 static char	*preadline(int, size_t, off_t);
206 
207 static int  *J;			/* will be overlaid on class */
208 static int  *class;		/* will be overlaid on file[0] */
209 static int  *klist;		/* will be overlaid on file[0] after class */
210 static int  *member;		/* will be overlaid on file[1] */
211 static int   clen;
212 static int   inifdef;		/* whether or not we are in a #ifdef block */
213 static int   len[2];
214 static int   pref, suff;	/* length of prefix and suffix */
215 static int   slen[2];
216 static int   anychange;
217 static long *ixnew;		/* will be overlaid on file[1] */
218 static long *ixold;		/* will be overlaid on klist */
219 static struct cand *clist;	/* merely a free storage pot for candidates */
220 static int   clistlen;		/* the length of clist */
221 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
222 static u_char *chrtran;		/* translation table for case-folding */
223 static struct context_vec *context_vec_start;
224 static struct context_vec *context_vec_end;
225 static struct context_vec *context_vec_ptr;
226 
227 #define FUNCTION_CONTEXT_SIZE	55
228 static char lastbuf[FUNCTION_CONTEXT_SIZE];
229 static int lastline;
230 static int lastmatchline;
231 
232 
233 /*
234  * chrtran points to one of 2 translation tables: cup2low if folding upper to
235  * lower case clow2low if not folding case
236  */
237 u_char clow2low[256] = {
238 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
239 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
240 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
241 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
242 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
243 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
244 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
245 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
246 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
247 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
248 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
249 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
250 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
251 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
252 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
253 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
254 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
255 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
256 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
257 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
258 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
259 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
260 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
261 	0xfd, 0xfe, 0xff
262 };
263 
264 u_char cup2low[256] = {
265 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
266 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
267 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
268 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
269 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
270 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
271 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
272 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
273 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
274 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
275 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
276 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
277 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
278 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
279 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
280 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
281 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
282 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
283 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
284 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
285 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
286 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
287 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
288 	0xfd, 0xfe, 0xff
289 };
290 
291 int
292 diffreg(char *file1, char *file2, int flags)
293 {
294 	FILE *f1, *f2;
295 	int i, rval, ostdout = -1;
296 	pid_t pid = -1;
297 
298 	f1 = f2 = NULL;
299 	rval = D_SAME;
300 	anychange = 0;
301 	lastline = 0;
302 	lastmatchline = 0;
303 	context_vec_ptr = context_vec_start - 1;
304 	if (flags & D_IGNORECASE)
305 		chrtran = cup2low;
306 	else
307 		chrtran = clow2low;
308 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
309 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
310 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
311 		goto closem;
312 
313 	if (flags & D_EMPTY1)
314 		f1 = fopen(_PATH_DEVNULL, "r");
315 	else {
316 		if (!S_ISREG(stb1.st_mode)) {
317 			if ((f1 = opentemp(file1)) == NULL ||
318 			    fstat(fileno(f1), &stb1) < 0) {
319 				warn("%s", file1);
320 				status |= 2;
321 				goto closem;
322 			}
323 		} else if (strcmp(file1, "-") == 0)
324 			f1 = stdin;
325 		else
326 			f1 = fopen(file1, "r");
327 	}
328 	if (f1 == NULL) {
329 		warn("%s", file1);
330 		status |= 2;
331 		goto closem;
332 	}
333 
334 	if (flags & D_EMPTY2)
335 		f2 = fopen(_PATH_DEVNULL, "r");
336 	else {
337 		if (!S_ISREG(stb2.st_mode)) {
338 			if ((f2 = opentemp(file2)) == NULL ||
339 			    fstat(fileno(f2), &stb2) < 0) {
340 				warn("%s", file2);
341 				status |= 2;
342 				goto closem;
343 			}
344 		} else if (strcmp(file2, "-") == 0)
345 			f2 = stdin;
346 		else
347 			f2 = fopen(file2, "r");
348 	}
349 	if (f2 == NULL) {
350 		warn("%s", file2);
351 		status |= 2;
352 		goto closem;
353 	}
354 
355 	switch (files_differ(f1, f2, flags)) {
356 	case 0:
357 		goto closem;
358 	case 1:
359 		break;
360 	default:
361 		/* error */
362 		status |= 2;
363 		goto closem;
364 	}
365 
366 	if ((flags & D_FORCEASCII) == 0 &&
367 	    (!asciifile(f1) || !asciifile(f2))) {
368 		rval = D_BINARY;
369 		status |= 1;
370 		goto closem;
371 	}
372 	if (lflag) {
373 		/* redirect stdout to pr */
374 		int pfd[2];
375 		char *header;
376 		char *prargv[] = { "pr", "-h", NULL, "-f", NULL };
377 
378 		xasprintf(&header, "%s %s %s", diffargs, file1, file2);
379 		prargv[2] = header;
380 		fflush(stdout);
381 		rewind(stdout);
382 		pipe(pfd);
383 		switch ((pid = fork())) {
384 		case -1:
385 			warnx("No more processes");
386 			status |= 2;
387 			xfree(header);
388 			rval = D_ERROR;
389 			goto closem;
390 		case 0:
391 			/* child */
392 			if (pfd[0] != STDIN_FILENO) {
393 				dup2(pfd[0], STDIN_FILENO);
394 				close(pfd[0]);
395 			}
396 			close(pfd[1]);
397 			execv(_PATH_PR, prargv);
398 			_exit(127);
399 		default:
400 			/* parent */
401 			if (pfd[1] != STDOUT_FILENO) {
402 				ostdout = dup(STDOUT_FILENO);
403 				dup2(pfd[1], STDOUT_FILENO);
404 				close(pfd[1]);
405 			}
406 			close(pfd[0]);
407 			rewind(stdout);
408 			xfree(header);
409 		}
410 	}
411 	prepare(0, f1, stb1.st_size, flags);
412 	prepare(1, f2, stb2.st_size, flags);
413 
414 	prune();
415 	sort(sfile[0], slen[0]);
416 	sort(sfile[1], slen[1]);
417 
418 	member = (int *)file[1];
419 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
420 	member = xrealloc(member, slen[1] + 2, sizeof(*member));
421 
422 	class = (int *)file[0];
423 	unsort(sfile[0], slen[0], class);
424 	class = xrealloc(class, slen[0] + 2, sizeof(*class));
425 
426 	klist = xcalloc(slen[0] + 2, sizeof(*klist));
427 	clen = 0;
428 	clistlen = 100;
429 	clist = xcalloc(clistlen, sizeof(*clist));
430 	i = stone(class, slen[0], member, klist, flags);
431 	xfree(member);
432 	xfree(class);
433 
434 	J = xrealloc(J, len[0] + 2, sizeof(*J));
435 	unravel(klist[i]);
436 	xfree(clist);
437 	xfree(klist);
438 
439 	ixold = xrealloc(ixold, len[0] + 2, sizeof(*ixold));
440 	ixnew = xrealloc(ixnew, len[1] + 2, sizeof(*ixnew));
441 	check(f1, f2, flags);
442 	output(file1, f1, file2, f2, flags);
443 	if (ostdout != -1) {
444 		int wstatus;
445 
446 		/* close the pipe to pr and restore stdout */
447 		fflush(stdout);
448 		rewind(stdout);
449 		if (ostdout != STDOUT_FILENO) {
450 			close(STDOUT_FILENO);
451 			dup2(ostdout, STDOUT_FILENO);
452 			close(ostdout);
453 		}
454 		waitpid(pid, &wstatus, 0);
455 	}
456 closem:
457 	if (anychange) {
458 		status |= 1;
459 		if (rval == D_SAME)
460 			rval = D_DIFFER;
461 	}
462 	if (f1 != NULL)
463 		fclose(f1);
464 	if (f2 != NULL)
465 		fclose(f2);
466 
467 	return (rval);
468 }
469 
470 /*
471  * Check to see if the given files differ.
472  * Returns 0 if they are the same, 1 if different, and -1 on error.
473  * XXX - could use code from cmp(1) [faster]
474  */
475 static int
476 files_differ(FILE *f1, FILE *f2, int flags)
477 {
478 	char buf1[BUFSIZ], buf2[BUFSIZ];
479 	size_t i, j;
480 
481 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
482 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
483 		return (1);
484 	for (;;) {
485 		i = fread(buf1, 1, sizeof(buf1), f1);
486 		j = fread(buf2, 1, sizeof(buf2), f2);
487 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
488 			return (-1);
489 		if (i != j)
490 			return (1);
491 		if (i == 0)
492 			return (0);
493 		if (memcmp(buf1, buf2, i) != 0)
494 			return (1);
495 	}
496 }
497 
498 static FILE *
499 opentemp(const char *file)
500 {
501 	char buf[BUFSIZ], *tempdir, tempfile[PATH_MAX];
502 	ssize_t nread;
503 	int ifd, ofd;
504 
505 	if (strcmp(file, "-") == 0)
506 		ifd = STDIN_FILENO;
507 	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
508 		return (NULL);
509 
510 	if ((tempdir = getenv("TMPDIR")) == NULL)
511 		tempdir = _PATH_TMP;
512 
513 	if (strlcpy(tempfile, tempdir, sizeof(tempfile)) >= sizeof(tempfile) ||
514 	    strlcat(tempfile, "/diff.XXXXXXXX", sizeof(tempfile)) >=
515 	    sizeof(tempfile)) {
516 		close(ifd);
517 		errno = ENAMETOOLONG;
518 		return (NULL);
519 	}
520 
521 	if ((ofd = mkstemp(tempfile)) < 0) {
522 		close(ifd);
523 		return (NULL);
524 	}
525 	unlink(tempfile);
526 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
527 		if (write(ofd, buf, nread) != nread) {
528 			close(ifd);
529 			close(ofd);
530 			return (NULL);
531 		}
532 	}
533 	close(ifd);
534 	lseek(ofd, (off_t)0, SEEK_SET);
535 	return (fdopen(ofd, "r"));
536 }
537 
538 char *
539 splice(char *dir, char *file)
540 {
541 	char *tail, *buf;
542 	size_t dirlen;
543 
544 	dirlen = strlen(dir);
545 	while (dirlen != 0 && dir[dirlen - 1] == '/')
546 	    dirlen--;
547 	if ((tail = strrchr(file, '/')) == NULL)
548 		tail = file;
549 	else
550 		tail++;
551 	xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
552 	return (buf);
553 }
554 
555 static void
556 prepare(int i, FILE *fd, off_t filesize, int flags)
557 {
558 	struct line *p;
559 	int j, h;
560 	size_t sz;
561 
562 	rewind(fd);
563 
564 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
565 	if (sz < 100)
566 		sz = 100;
567 
568 	p = xcalloc(sz + 3, sizeof(*p));
569 	for (j = 0; (h = readhash(fd, flags));) {
570 		if (j == sz) {
571 			sz = sz * 3 / 2;
572 			p = xrealloc(p, sz + 3, sizeof(*p));
573 		}
574 		p[++j].value = h;
575 	}
576 	len[i] = j;
577 	file[i] = p;
578 }
579 
580 static void
581 prune(void)
582 {
583 	int i, j;
584 
585 	for (pref = 0; pref < len[0] && pref < len[1] &&
586 	    file[0][pref + 1].value == file[1][pref + 1].value;
587 	    pref++)
588 		;
589 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
590 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
591 	    suff++)
592 		;
593 	for (j = 0; j < 2; j++) {
594 		sfile[j] = file[j] + pref;
595 		slen[j] = len[j] - pref - suff;
596 		for (i = 0; i <= slen[j]; i++)
597 			sfile[j][i].serial = i;
598 	}
599 }
600 
601 static void
602 equiv(struct line *a, int n, struct line *b, int m, int *c)
603 {
604 	int i, j;
605 
606 	i = j = 1;
607 	while (i <= n && j <= m) {
608 		if (a[i].value < b[j].value)
609 			a[i++].value = 0;
610 		else if (a[i].value == b[j].value)
611 			a[i++].value = j;
612 		else
613 			j++;
614 	}
615 	while (i <= n)
616 		a[i++].value = 0;
617 	b[m + 1].value = 0;
618 	j = 0;
619 	while (++j <= m) {
620 		c[j] = -b[j].serial;
621 		while (b[j + 1].value == b[j].value) {
622 			j++;
623 			c[j] = b[j].serial;
624 		}
625 	}
626 	c[j] = -1;
627 }
628 
629 /* Code taken from ping.c */
630 static int
631 isqrt(int n)
632 {
633 	int y, x = 1;
634 
635 	if (n == 0)
636 		return (0);
637 
638 	do { /* newton was a stinker */
639 		y = x;
640 		x = n / x;
641 		x += y;
642 		x /= 2;
643 	} while ((x - y) > 1 || (x - y) < -1);
644 
645 	return (x);
646 }
647 
648 static int
649 stone(int *a, int n, int *b, int *c, int flags)
650 {
651 	int i, k, y, j, l;
652 	int oldc, tc, oldl, sq;
653 	u_int numtries, bound;
654 
655 	if (flags & D_MINIMAL)
656 		bound = UINT_MAX;
657 	else {
658 		sq = isqrt(n);
659 		bound = MAXIMUM(256, sq);
660 	}
661 
662 	k = 0;
663 	c[0] = newcand(0, 0, 0);
664 	for (i = 1; i <= n; i++) {
665 		j = a[i];
666 		if (j == 0)
667 			continue;
668 		y = -b[j];
669 		oldl = 0;
670 		oldc = c[0];
671 		numtries = 0;
672 		do {
673 			if (y <= clist[oldc].y)
674 				continue;
675 			l = search(c, k, y);
676 			if (l != oldl + 1)
677 				oldc = c[l - 1];
678 			if (l <= k) {
679 				if (clist[c[l]].y <= y)
680 					continue;
681 				tc = c[l];
682 				c[l] = newcand(i, y, oldc);
683 				oldc = tc;
684 				oldl = l;
685 				numtries++;
686 			} else {
687 				c[l] = newcand(i, y, oldc);
688 				k++;
689 				break;
690 			}
691 		} while ((y = b[++j]) > 0 && numtries < bound);
692 	}
693 	return (k);
694 }
695 
696 static int
697 newcand(int x, int y, int pred)
698 {
699 	struct cand *q;
700 
701 	if (clen == clistlen) {
702 		clistlen = clistlen * 11 / 10;
703 		clist = xrealloc(clist, clistlen, sizeof(*clist));
704 	}
705 	q = clist + clen;
706 	q->x = x;
707 	q->y = y;
708 	q->pred = pred;
709 	return (clen++);
710 }
711 
712 static int
713 search(int *c, int k, int y)
714 {
715 	int i, j, l, t;
716 
717 	if (clist[c[k]].y < y)	/* quick look for typical case */
718 		return (k + 1);
719 	i = 0;
720 	j = k + 1;
721 	for (;;) {
722 		l = (i + j) / 2;
723 		if (l <= i)
724 			break;
725 		t = clist[c[l]].y;
726 		if (t > y)
727 			j = l;
728 		else if (t < y)
729 			i = l;
730 		else
731 			return (l);
732 	}
733 	return (l + 1);
734 }
735 
736 static void
737 unravel(int p)
738 {
739 	struct cand *q;
740 	int i;
741 
742 	for (i = 0; i <= len[0]; i++)
743 		J[i] = i <= pref ? i :
744 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
745 	for (q = clist + p; q->y != 0; q = clist + q->pred)
746 		J[q->x + pref] = q->y + pref;
747 }
748 
749 /*
750  * Check does double duty:
751  *  1.	ferret out any fortuitous correspondences due
752  *	to confounding by hashing (which result in "jackpot")
753  *  2.  collect random access indexes to the two files
754  */
755 static void
756 check(FILE *f1, FILE *f2, int flags)
757 {
758 	int i, j, jackpot, c, d;
759 	long ctold, ctnew;
760 
761 	rewind(f1);
762 	rewind(f2);
763 	j = 1;
764 	ixold[0] = ixnew[0] = 0;
765 	jackpot = 0;
766 	ctold = ctnew = 0;
767 	for (i = 1; i <= len[0]; i++) {
768 		if (J[i] == 0) {
769 			ixold[i] = ctold += skipline(f1);
770 			continue;
771 		}
772 		while (j < J[i]) {
773 			ixnew[j] = ctnew += skipline(f2);
774 			j++;
775 		}
776 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
777 			for (;;) {
778 				c = getc(f1);
779 				d = getc(f2);
780 				/*
781 				 * GNU diff ignores a missing newline
782 				 * in one file for -b or -w.
783 				 */
784 				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
785 					if (c == EOF && d == '\n') {
786 						ctnew++;
787 						break;
788 					} else if (c == '\n' && d == EOF) {
789 						ctold++;
790 						break;
791 					}
792 				}
793 				ctold++;
794 				ctnew++;
795 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
796 				    isspace(d)) {
797 					do {
798 						if (c == '\n')
799 							break;
800 						ctold++;
801 					} while (isspace(c = getc(f1)));
802 					do {
803 						if (d == '\n')
804 							break;
805 						ctnew++;
806 					} while (isspace(d = getc(f2)));
807 				} else if ((flags & D_IGNOREBLANKS)) {
808 					while (isspace(c) && c != '\n') {
809 						c = getc(f1);
810 						ctold++;
811 					}
812 					while (isspace(d) && d != '\n') {
813 						d = getc(f2);
814 						ctnew++;
815 					}
816 				}
817 				if (chrtran[c] != chrtran[d]) {
818 					jackpot++;
819 					J[i] = 0;
820 					if (c != '\n' && c != EOF)
821 						ctold += skipline(f1);
822 					if (d != '\n' && c != EOF)
823 						ctnew += skipline(f2);
824 					break;
825 				}
826 				if (c == '\n' || c == EOF)
827 					break;
828 			}
829 		} else {
830 			for (;;) {
831 				ctold++;
832 				ctnew++;
833 				if ((c = getc(f1)) != (d = getc(f2))) {
834 					/* jackpot++; */
835 					J[i] = 0;
836 					if (c != '\n' && c != EOF)
837 						ctold += skipline(f1);
838 					if (d != '\n' && c != EOF)
839 						ctnew += skipline(f2);
840 					break;
841 				}
842 				if (c == '\n' || c == EOF)
843 					break;
844 			}
845 		}
846 		ixold[i] = ctold;
847 		ixnew[j] = ctnew;
848 		j++;
849 	}
850 	for (; j <= len[1]; j++)
851 		ixnew[j] = ctnew += skipline(f2);
852 	/*
853 	 * if (jackpot)
854 	 *	fprintf(stderr, "jackpot\n");
855 	 */
856 }
857 
858 /* shellsort CACM #201 */
859 static void
860 sort(struct line *a, int n)
861 {
862 	struct line *ai, *aim, w;
863 	int j, m = 0, k;
864 
865 	if (n == 0)
866 		return;
867 	for (j = 1; j <= n; j *= 2)
868 		m = 2 * j - 1;
869 	for (m /= 2; m != 0; m /= 2) {
870 		k = n - m;
871 		for (j = 1; j <= k; j++) {
872 			for (ai = &a[j]; ai > a; ai -= m) {
873 				aim = &ai[m];
874 				if (aim < ai)
875 					break;	/* wraparound */
876 				if (aim->value > ai[0].value ||
877 				    (aim->value == ai[0].value &&
878 					aim->serial > ai[0].serial))
879 					break;
880 				w.value = ai[0].value;
881 				ai[0].value = aim->value;
882 				aim->value = w.value;
883 				w.serial = ai[0].serial;
884 				ai[0].serial = aim->serial;
885 				aim->serial = w.serial;
886 			}
887 		}
888 	}
889 }
890 
891 static void
892 unsort(struct line *f, int l, int *b)
893 {
894 	int *a, i;
895 
896 	a = xcalloc(l + 1, sizeof(*a));
897 	for (i = 1; i <= l; i++)
898 		a[f[i].serial] = f[i].value;
899 	for (i = 1; i <= l; i++)
900 		b[i] = a[i];
901 	xfree(a);
902 }
903 
904 static int
905 skipline(FILE *f)
906 {
907 	int i, c;
908 
909 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
910 		continue;
911 	return (i);
912 }
913 
914 static void
915 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
916 {
917 	int m, i0, i1, j0, j1;
918 
919 	rewind(f1);
920 	rewind(f2);
921 	m = len[0];
922 	J[0] = 0;
923 	J[m + 1] = len[1] + 1;
924 	if (diff_format != D_EDIT) {
925 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
926 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
927 				i0++;
928 			j0 = J[i0 - 1] + 1;
929 			i1 = i0 - 1;
930 			while (i1 < m && J[i1 + 1] == 0)
931 				i1++;
932 			j1 = J[i1 + 1] - 1;
933 			J[i1] = j1;
934 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
935 		}
936 	} else {
937 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
938 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
939 				i0--;
940 			j0 = J[i0 + 1] - 1;
941 			i1 = i0 + 1;
942 			while (i1 > 1 && J[i1 - 1] == 0)
943 				i1--;
944 			j1 = J[i1 - 1] + 1;
945 			J[i1] = j1;
946 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
947 		}
948 	}
949 	if (m == 0)
950 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
951 	if (diff_format == D_IFDEF) {
952 		for (;;) {
953 #define	c i0
954 			if ((c = getc(f1)) == EOF)
955 				return;
956 			diff_output("%c", c);
957 		}
958 #undef c
959 	}
960 	if (anychange != 0) {
961 		if (diff_format == D_CONTEXT)
962 			dump_context_vec(f1, f2, flags);
963 		else if (diff_format == D_UNIFIED)
964 			dump_unified_vec(f1, f2, flags);
965 	}
966 }
967 
968 static void
969 range(int a, int b, char *separator)
970 {
971 	diff_output("%d", a > b ? b : a);
972 	if (a < b)
973 		diff_output("%s%d", separator, b);
974 }
975 
976 static void
977 uni_range(int a, int b)
978 {
979 	if (a < b)
980 		diff_output("%d,%d", a, b - a + 1);
981 	else if (a == b)
982 		diff_output("%d", b);
983 	else
984 		diff_output("%d,0", b);
985 }
986 
987 static char *
988 preadline(int fd, size_t rlen, off_t off)
989 {
990 	char *line;
991 	ssize_t nr;
992 
993 	line = xmalloc(rlen + 1);
994 	if ((nr = pread(fd, line, rlen, off)) < 0)
995 		err(2, "preadline");
996 	if (nr > 0 && line[nr-1] == '\n')
997 		nr--;
998 	line[nr] = '\0';
999 	return (line);
1000 }
1001 
1002 static int
1003 ignoreline(char *line)
1004 {
1005 	int ret;
1006 
1007 	ret = regexec(&ignore_re, line, 0, NULL, 0);
1008 	xfree(line);
1009 	return (ret == 0);	/* if it matched, it should be ignored. */
1010 }
1011 
1012 /*
1013  * Indicate that there is a difference between lines a and b of the from file
1014  * to get to lines c to d of the to file.  If a is greater then b then there
1015  * are no lines in the from file involved and this means that there were
1016  * lines appended (beginning at b).  If c is greater than d then there are
1017  * lines missing from the to file.
1018  */
1019 static void
1020 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1021     int *pflags)
1022 {
1023 	static size_t max_context = 64;
1024 	int i;
1025 
1026 restart:
1027 	if (diff_format != D_IFDEF && a > b && c > d)
1028 		return;
1029 	if (ignore_pats != NULL) {
1030 		char *line;
1031 		/*
1032 		 * All lines in the change, insert, or delete must
1033 		 * match an ignore pattern for the change to be
1034 		 * ignored.
1035 		 */
1036 		if (a <= b) {		/* Changes and deletes. */
1037 			for (i = a; i <= b; i++) {
1038 				line = preadline(fileno(f1),
1039 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1040 				if (!ignoreline(line))
1041 					goto proceed;
1042 			}
1043 		}
1044 		if (a > b || c <= d) {	/* Changes and inserts. */
1045 			for (i = c; i <= d; i++) {
1046 				line = preadline(fileno(f2),
1047 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1048 				if (!ignoreline(line))
1049 					goto proceed;
1050 			}
1051 		}
1052 		return;
1053 	}
1054 proceed:
1055 	if (*pflags & D_HEADER) {
1056 		diff_output("%s %s %s\n", diffargs, file1, file2);
1057 		*pflags &= ~D_HEADER;
1058 	}
1059 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1060 		/*
1061 		 * Allocate change records as needed.
1062 		 */
1063 		if (context_vec_ptr == context_vec_end - 1) {
1064 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1065 			max_context <<= 1;
1066 			context_vec_start = xrealloc(context_vec_start,
1067 			    max_context, sizeof(*context_vec_start));
1068 			context_vec_end = context_vec_start + max_context;
1069 			context_vec_ptr = context_vec_start + offset;
1070 		}
1071 		if (anychange == 0) {
1072 			/*
1073 			 * Print the context/unidiff header first time through.
1074 			 */
1075 			print_header(file1, file2);
1076 			anychange = 1;
1077 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1078 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1079 			/*
1080 			 * If this change is more than 'diff_context' lines from the
1081 			 * previous change, dump the record and reset it.
1082 			 */
1083 			if (diff_format == D_CONTEXT)
1084 				dump_context_vec(f1, f2, *pflags);
1085 			else
1086 				dump_unified_vec(f1, f2, *pflags);
1087 		}
1088 		context_vec_ptr++;
1089 		context_vec_ptr->a = a;
1090 		context_vec_ptr->b = b;
1091 		context_vec_ptr->c = c;
1092 		context_vec_ptr->d = d;
1093 		return;
1094 	}
1095 	if (anychange == 0)
1096 		anychange = 1;
1097 	switch (diff_format) {
1098 	case D_BRIEF:
1099 		return;
1100 	case D_NORMAL:
1101 	case D_EDIT:
1102 		range(a, b, ",");
1103 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1104 		if (diff_format == D_NORMAL)
1105 			range(c, d, ",");
1106 		diff_output("\n");
1107 		break;
1108 	case D_REVERSE:
1109 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1110 		range(a, b, " ");
1111 		diff_output("\n");
1112 		break;
1113 	case D_NREVERSE:
1114 		if (a > b)
1115 			diff_output("a%d %d\n", b, d - c + 1);
1116 		else {
1117 			diff_output("d%d %d\n", a, b - a + 1);
1118 			if (!(c > d))
1119 				/* add changed lines */
1120 				diff_output("a%d %d\n", b, d - c + 1);
1121 		}
1122 		break;
1123 	}
1124 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1125 		fetch(ixold, a, b, f1, '<', 1, *pflags);
1126 		if (a <= b && c <= d && diff_format == D_NORMAL)
1127 			diff_output("---\n");
1128 	}
1129 	i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1130 	if (i != 0 && diff_format == D_EDIT) {
1131 		/*
1132 		 * A non-zero return value for D_EDIT indicates that the
1133 		 * last line printed was a bare dot (".") that has been
1134 		 * escaped as ".." to prevent ed(1) from misinterpreting
1135 		 * it.  We have to add a substitute command to change this
1136 		 * back and restart where we left off.
1137 		 */
1138 		diff_output(".\n");
1139 		diff_output("%ds/^\\.\\././\n", a);
1140 		a += i;
1141 		c += i;
1142 		goto restart;
1143 	}
1144 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1145 		diff_output(".\n");
1146 	if (inifdef) {
1147 		diff_output("#endif /* %s */\n", ifdefname);
1148 		inifdef = 0;
1149 	}
1150 }
1151 
1152 static int
1153 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1154 {
1155 	int i, j, c, lastc, col, nc;
1156 
1157 	/*
1158 	 * When doing #ifdef's, copy down to current line
1159 	 * if this is the first file, so that stuff makes it to output.
1160 	 */
1161 	if (diff_format == D_IFDEF && oldfile) {
1162 		long curpos = ftell(lb);
1163 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1164 		nc = f[a > b ? b : a - 1] - curpos;
1165 		for (i = 0; i < nc; i++)
1166 			diff_output("%c", getc(lb));
1167 	}
1168 	if (a > b)
1169 		return (0);
1170 	if (diff_format == D_IFDEF) {
1171 		if (inifdef) {
1172 			diff_output("#else /* %s%s */\n",
1173 			    oldfile == 1 ? "!" : "", ifdefname);
1174 		} else {
1175 			if (oldfile)
1176 				diff_output("#ifndef %s\n", ifdefname);
1177 			else
1178 				diff_output("#ifdef %s\n", ifdefname);
1179 		}
1180 		inifdef = 1 + oldfile;
1181 	}
1182 	for (i = a; i <= b; i++) {
1183 		fseek(lb, f[i - 1], SEEK_SET);
1184 		nc = f[i] - f[i - 1];
1185 		if (diff_format != D_IFDEF && ch != '\0') {
1186 			diff_output("%c", ch);
1187 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1188 			    || diff_format == D_UNIFIED))
1189 				diff_output("\t");
1190 			else if (diff_format != D_UNIFIED)
1191 				diff_output(" ");
1192 		}
1193 		col = 0;
1194 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1195 			if ((c = getc(lb)) == EOF) {
1196 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1197 				    diff_format == D_NREVERSE)
1198 					warnx("No newline at end of file");
1199 				else
1200 					diff_output("\n\\ No newline at end of "
1201 					    "file\n");
1202 				return (0);
1203 			}
1204 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1205 				do {
1206 					diff_output(" ");
1207 				} while (++col & 7);
1208 			} else {
1209 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1210 				    && lastc == '.') {
1211 					/*
1212 					 * Don't print a bare "." line
1213 					 * since that will confuse ed(1).
1214 					 * Print ".." instead and return,
1215 					 * giving the caller an offset
1216 					 * from which to restart.
1217 					 */
1218 					diff_output(".\n");
1219 					return (i - a + 1);
1220 				}
1221 				diff_output("%c", c);
1222 				col++;
1223 			}
1224 		}
1225 	}
1226 	return (0);
1227 }
1228 
1229 /*
1230  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1231  */
1232 static int
1233 readhash(FILE *f, int flags)
1234 {
1235 	int i, t, space;
1236 	int sum;
1237 
1238 	sum = 1;
1239 	space = 0;
1240 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1241 		if (flags & D_IGNORECASE)
1242 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1243 				if (t == EOF) {
1244 					if (i == 0)
1245 						return (0);
1246 					break;
1247 				}
1248 				sum = sum * 127 + chrtran[t];
1249 			}
1250 		else
1251 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1252 				if (t == EOF) {
1253 					if (i == 0)
1254 						return (0);
1255 					break;
1256 				}
1257 				sum = sum * 127 + t;
1258 			}
1259 	} else {
1260 		for (i = 0;;) {
1261 			switch (t = getc(f)) {
1262 			case '\t':
1263 			case '\r':
1264 			case '\v':
1265 			case '\f':
1266 			case ' ':
1267 				space++;
1268 				continue;
1269 			default:
1270 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1271 					i++;
1272 					space = 0;
1273 				}
1274 				sum = sum * 127 + chrtran[t];
1275 				i++;
1276 				continue;
1277 			case EOF:
1278 				if (i == 0)
1279 					return (0);
1280 				/* FALLTHROUGH */
1281 			case '\n':
1282 				break;
1283 			}
1284 			break;
1285 		}
1286 	}
1287 	/*
1288 	 * There is a remote possibility that we end up with a zero sum.
1289 	 * Zero is used as an EOF marker, so return 1 instead.
1290 	 */
1291 	return (sum == 0 ? 1 : sum);
1292 }
1293 
1294 static int
1295 asciifile(FILE *f)
1296 {
1297 	unsigned char buf[BUFSIZ];
1298 	size_t cnt;
1299 
1300 	if (f == NULL)
1301 		return (1);
1302 
1303 	rewind(f);
1304 	cnt = fread(buf, 1, sizeof(buf), f);
1305 	return (memchr(buf, '\0', cnt) == NULL);
1306 }
1307 
1308 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1309 
1310 static char *
1311 match_function(const long *f, int pos, FILE *fp)
1312 {
1313 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1314 	size_t nc;
1315 	int last = lastline;
1316 	char *state = NULL;
1317 
1318 	lastline = pos;
1319 	while (pos > last) {
1320 		fseek(fp, f[pos - 1], SEEK_SET);
1321 		nc = f[pos] - f[pos - 1];
1322 		if (nc >= sizeof(buf))
1323 			nc = sizeof(buf) - 1;
1324 		nc = fread(buf, 1, nc, fp);
1325 		if (nc > 0) {
1326 			buf[nc] = '\0';
1327 			buf[strcspn(buf, "\n")] = '\0';
1328 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1329 				if (begins_with(buf, "private:")) {
1330 					if (!state)
1331 						state = " (private)";
1332 				} else if (begins_with(buf, "protected:")) {
1333 					if (!state)
1334 						state = " (protected)";
1335 				} else if (begins_with(buf, "public:")) {
1336 					if (!state)
1337 						state = " (public)";
1338 				} else {
1339 					strlcpy(lastbuf, buf, sizeof lastbuf);
1340 					if (state)
1341 						strlcat(lastbuf, state,
1342 						    sizeof lastbuf);
1343 					lastmatchline = pos;
1344 					return lastbuf;
1345 				}
1346 			}
1347 		}
1348 		pos--;
1349 	}
1350 	return lastmatchline > 0 ? lastbuf : NULL;
1351 }
1352 
1353 /* dump accumulated "context" diff changes */
1354 static void
1355 dump_context_vec(FILE *f1, FILE *f2, int flags)
1356 {
1357 	struct context_vec *cvp = context_vec_start;
1358 	int lowa, upb, lowc, upd, do_output;
1359 	int a, b, c, d;
1360 	char ch, *f;
1361 
1362 	if (context_vec_start > context_vec_ptr)
1363 		return;
1364 
1365 	b = d = 0;		/* gcc */
1366 	lowa = MAXIMUM(1, cvp->a - diff_context);
1367 	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1368 	lowc = MAXIMUM(1, cvp->c - diff_context);
1369 	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1370 
1371 	diff_output("***************");
1372 	if ((flags & D_PROTOTYPE)) {
1373 		f = match_function(ixold, lowa-1, f1);
1374 		if (f != NULL)
1375 			diff_output(" %s", f);
1376 	}
1377 	diff_output("\n*** ");
1378 	range(lowa, upb, ",");
1379 	diff_output(" ****\n");
1380 
1381 	/*
1382 	 * Output changes to the "old" file.  The first loop suppresses
1383 	 * output if there were no changes to the "old" file (we'll see
1384 	 * the "old" lines as context in the "new" list).
1385 	 */
1386 	do_output = 0;
1387 	for (; cvp <= context_vec_ptr; cvp++)
1388 		if (cvp->a <= cvp->b) {
1389 			cvp = context_vec_start;
1390 			do_output++;
1391 			break;
1392 		}
1393 	if (do_output) {
1394 		while (cvp <= context_vec_ptr) {
1395 			a = cvp->a;
1396 			b = cvp->b;
1397 			c = cvp->c;
1398 			d = cvp->d;
1399 
1400 			if (a <= b && c <= d)
1401 				ch = 'c';
1402 			else
1403 				ch = (a <= b) ? 'd' : 'a';
1404 
1405 			if (ch == 'a')
1406 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1407 			else {
1408 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1409 				fetch(ixold, a, b, f1,
1410 				    ch == 'c' ? '!' : '-', 0, flags);
1411 			}
1412 			lowa = b + 1;
1413 			cvp++;
1414 		}
1415 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1416 	}
1417 	/* output changes to the "new" file */
1418 	diff_output("--- ");
1419 	range(lowc, upd, ",");
1420 	diff_output(" ----\n");
1421 
1422 	do_output = 0;
1423 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1424 		if (cvp->c <= cvp->d) {
1425 			cvp = context_vec_start;
1426 			do_output++;
1427 			break;
1428 		}
1429 	if (do_output) {
1430 		while (cvp <= context_vec_ptr) {
1431 			a = cvp->a;
1432 			b = cvp->b;
1433 			c = cvp->c;
1434 			d = cvp->d;
1435 
1436 			if (a <= b && c <= d)
1437 				ch = 'c';
1438 			else
1439 				ch = (a <= b) ? 'd' : 'a';
1440 
1441 			if (ch == 'd')
1442 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1443 			else {
1444 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1445 				fetch(ixnew, c, d, f2,
1446 				    ch == 'c' ? '!' : '+', 0, flags);
1447 			}
1448 			lowc = d + 1;
1449 			cvp++;
1450 		}
1451 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1452 	}
1453 	context_vec_ptr = context_vec_start - 1;
1454 }
1455 
1456 /* dump accumulated "unified" diff changes */
1457 static void
1458 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1459 {
1460 	struct context_vec *cvp = context_vec_start;
1461 	int lowa, upb, lowc, upd;
1462 	int a, b, c, d;
1463 	char ch, *f;
1464 
1465 	if (context_vec_start > context_vec_ptr)
1466 		return;
1467 
1468 	b = d = 0;		/* gcc */
1469 	lowa = MAXIMUM(1, cvp->a - diff_context);
1470 	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1471 	lowc = MAXIMUM(1, cvp->c - diff_context);
1472 	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1473 
1474 	diff_output("@@ -");
1475 	uni_range(lowa, upb);
1476 	diff_output(" +");
1477 	uni_range(lowc, upd);
1478 	diff_output(" @@");
1479 	if ((flags & D_PROTOTYPE)) {
1480 		f = match_function(ixold, lowa-1, f1);
1481 		if (f != NULL)
1482 			diff_output(" %s", f);
1483 	}
1484 	diff_output("\n");
1485 
1486 	/*
1487 	 * Output changes in "unified" diff format--the old and new lines
1488 	 * are printed together.
1489 	 */
1490 	for (; cvp <= context_vec_ptr; cvp++) {
1491 		a = cvp->a;
1492 		b = cvp->b;
1493 		c = cvp->c;
1494 		d = cvp->d;
1495 
1496 		/*
1497 		 * c: both new and old changes
1498 		 * d: only changes in the old file
1499 		 * a: only changes in the new file
1500 		 */
1501 		if (a <= b && c <= d)
1502 			ch = 'c';
1503 		else
1504 			ch = (a <= b) ? 'd' : 'a';
1505 
1506 		switch (ch) {
1507 		case 'c':
1508 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1509 			fetch(ixold, a, b, f1, '-', 0, flags);
1510 			fetch(ixnew, c, d, f2, '+', 0, flags);
1511 			break;
1512 		case 'd':
1513 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1514 			fetch(ixold, a, b, f1, '-', 0, flags);
1515 			break;
1516 		case 'a':
1517 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1518 			fetch(ixnew, c, d, f2, '+', 0, flags);
1519 			break;
1520 		}
1521 		lowa = b + 1;
1522 		lowc = d + 1;
1523 	}
1524 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1525 
1526 	context_vec_ptr = context_vec_start - 1;
1527 }
1528 
1529 static void
1530 print_header(const char *file1, const char *file2)
1531 {
1532 	if (label[0] != NULL)
1533 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1534 		    label[0]);
1535 	else
1536 		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
1537 		    file1, ctime(&stb1.st_mtime));
1538 	if (label[1] != NULL)
1539 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1540 		    label[1]);
1541 	else
1542 		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
1543 		    file2, ctime(&stb2.st_mtime));
1544 }
1545