xref: /openbsd-src/usr.bin/diff/diffreg.c (revision 4fd6ed324346681d94337dd681df9264591cbde1)
1 /*	$OpenBSD: diffreg.c,v 1.70 2007/09/11 15:47:17 gilles Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #ifndef lint
68 static const char rcsid[] = "$OpenBSD: diffreg.c,v 1.70 2007/09/11 15:47:17 gilles Exp $";
69 #endif /* not lint */
70 
71 #include <sys/param.h>
72 #include <sys/stat.h>
73 #include <sys/wait.h>
74 
75 #include <ctype.h>
76 #include <err.h>
77 #include <errno.h>
78 #include <fcntl.h>
79 #include <stddef.h>
80 #include <stdio.h>
81 #include <stdlib.h>
82 #include <string.h>
83 #include <unistd.h>
84 
85 #include "diff.h"
86 #include "pathnames.h"
87 #include "xmalloc.h"
88 
89 /*
90  * diff - compare two files.
91  */
92 
93 /*
94  *	Uses an algorithm due to Harold Stone, which finds
95  *	a pair of longest identical subsequences in the two
96  *	files.
97  *
98  *	The major goal is to generate the match vector J.
99  *	J[i] is the index of the line in file1 corresponding
100  *	to line i file0. J[i] = 0 if there is no
101  *	such line in file1.
102  *
103  *	Lines are hashed so as to work in core. All potential
104  *	matches are located by sorting the lines of each file
105  *	on the hash (called ``value''). In particular, this
106  *	collects the equivalence classes in file1 together.
107  *	Subroutine equiv replaces the value of each line in
108  *	file0 by the index of the first element of its
109  *	matching equivalence in (the reordered) file1.
110  *	To save space equiv squeezes file1 into a single
111  *	array member in which the equivalence classes
112  *	are simply concatenated, except that their first
113  *	members are flagged by changing sign.
114  *
115  *	Next the indices that point into member are unsorted into
116  *	array class according to the original order of file0.
117  *
118  *	The cleverness lies in routine stone. This marches
119  *	through the lines of file0, developing a vector klist
120  *	of "k-candidates". At step i a k-candidate is a matched
121  *	pair of lines x,y (x in file0 y in file1) such that
122  *	there is a common subsequence of length k
123  *	between the first i lines of file0 and the first y
124  *	lines of file1, but there is no such subsequence for
125  *	any smaller y. x is the earliest possible mate to y
126  *	that occurs in such a subsequence.
127  *
128  *	Whenever any of the members of the equivalence class of
129  *	lines in file1 matable to a line in file0 has serial number
130  *	less than the y of some k-candidate, that k-candidate
131  *	with the smallest such y is replaced. The new
132  *	k-candidate is chained (via pred) to the current
133  *	k-1 candidate so that the actual subsequence can
134  *	be recovered. When a member has serial number greater
135  *	that the y of all k-candidates, the klist is extended.
136  *	At the end, the longest subsequence is pulled out
137  *	and placed in the array J by unravel
138  *
139  *	With J in hand, the matches there recorded are
140  *	check'ed against reality to assure that no spurious
141  *	matches have crept in due to hashing. If they have,
142  *	they are broken, and "jackpot" is recorded--a harmless
143  *	matter except that a true match for a spuriously
144  *	mated line may now be unnecessarily reported as a change.
145  *
146  *	Much of the complexity of the program comes simply
147  *	from trying to minimize core utilization and
148  *	maximize the range of doable problems by dynamically
149  *	allocating what is needed and reusing what is not.
150  *	The core requirements for problems larger than somewhat
151  *	are (in words) 2*length(file0) + length(file1) +
152  *	3*(number of k-candidates installed),  typically about
153  *	6n words for files of length n.
154  */
155 
156 struct cand {
157 	int	x;
158 	int	y;
159 	int	pred;
160 };
161 
162 struct line {
163 	int	serial;
164 	int	value;
165 } *file[2];
166 
167 /*
168  * The following struct is used to record change information when
169  * doing a "context" or "unified" diff.  (see routine "change" to
170  * understand the highly mnemonic field names)
171  */
172 struct context_vec {
173 	int	a;		/* start line in old file */
174 	int	b;		/* end line in old file */
175 	int	c;		/* start line in new file */
176 	int	d;		/* end line in new file */
177 };
178 
179 static FILE	*opentemp(const char *);
180 static void	 output(char *, FILE *, char *, FILE *, int);
181 static void	 check(char *, FILE *, char *, FILE *);
182 static void	 range(int, int, char *);
183 static void	 uni_range(int, int);
184 static void	 dump_context_vec(FILE *, FILE *);
185 static void	 dump_unified_vec(FILE *, FILE *);
186 static void	 prepare(int, FILE *, off_t);
187 static void	 prune(void);
188 static void	 equiv(struct line *, int, struct line *, int, int *);
189 static void	 unravel(int);
190 static void	 unsort(struct line *, int, int *);
191 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
192 static void	 sort(struct line *, int);
193 static void	 print_header(const char *, const char *);
194 static int	 ignoreline(char *);
195 static int	 asciifile(FILE *);
196 static int	 fetch(long *, int, int, FILE *, int, int);
197 static int	 newcand(int, int, int);
198 static int	 search(int *, int, int);
199 static int	 skipline(FILE *);
200 static int	 isqrt(int);
201 static int	 stone(int *, int, int *, int *);
202 static int	 readhash(FILE *);
203 static int	 files_differ(FILE *, FILE *, int);
204 static char	*match_function(const long *, int, FILE *);
205 static char	*preadline(int, size_t, off_t);
206 
207 static int  *J;			/* will be overlaid on class */
208 static int  *class;		/* will be overlaid on file[0] */
209 static int  *klist;		/* will be overlaid on file[0] after class */
210 static int  *member;		/* will be overlaid on file[1] */
211 static int   clen;
212 static int   inifdef;		/* whether or not we are in a #ifdef block */
213 static int   len[2];
214 static int   pref, suff;	/* length of prefix and suffix */
215 static int   slen[2];
216 static int   anychange;
217 static long *ixnew;		/* will be overlaid on file[1] */
218 static long *ixold;		/* will be overlaid on klist */
219 static struct cand *clist;	/* merely a free storage pot for candidates */
220 static int   clistlen;		/* the length of clist */
221 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
222 static u_char *chrtran;		/* translation table for case-folding */
223 static struct context_vec *context_vec_start;
224 static struct context_vec *context_vec_end;
225 static struct context_vec *context_vec_ptr;
226 
227 #define FUNCTION_CONTEXT_SIZE	55
228 static char lastbuf[FUNCTION_CONTEXT_SIZE];
229 static int lastline;
230 static int lastmatchline;
231 
232 
233 /*
234  * chrtran points to one of 2 translation tables: cup2low if folding upper to
235  * lower case clow2low if not folding case
236  */
237 u_char clow2low[256] = {
238 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
239 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
240 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
241 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
242 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
243 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
244 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
245 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
246 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
247 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
248 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
249 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
250 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
251 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
252 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
253 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
254 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
255 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
256 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
257 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
258 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
259 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
260 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
261 	0xfd, 0xfe, 0xff
262 };
263 
264 u_char cup2low[256] = {
265 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
266 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
267 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
268 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
269 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
270 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
271 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
272 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
273 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
274 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
275 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
276 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
277 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
278 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
279 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
280 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
281 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
282 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
283 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
284 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
285 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
286 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
287 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
288 	0xfd, 0xfe, 0xff
289 };
290 
291 int
292 diffreg(char *ofile1, char *ofile2, int flags)
293 {
294 	char *file1 = ofile1;
295 	char *file2 = ofile2;
296 	FILE *f1 = NULL;
297 	FILE *f2 = NULL;
298 	int rval = D_SAME;
299 	int i, ostdout = -1;
300 	pid_t pid = -1;
301 
302 	anychange = 0;
303 	lastline = 0;
304 	lastmatchline = 0;
305 	context_vec_ptr = context_vec_start - 1;
306 	chrtran = (iflag ? cup2low : clow2low);
307 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
308 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
309 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
310 		goto closem;
311 
312 	if (flags & D_EMPTY1)
313 		f1 = fopen(_PATH_DEVNULL, "r");
314 	else {
315 		if (!S_ISREG(stb1.st_mode)) {
316 			if ((f1 = opentemp(file1)) == NULL ||
317 			    fstat(fileno(f1), &stb1) < 0) {
318 				warn("%s", file1);
319 				status |= 2;
320 				goto closem;
321 			}
322 		} else if (strcmp(file1, "-") == 0)
323 			f1 = stdin;
324 		else
325 			f1 = fopen(file1, "r");
326 	}
327 	if (f1 == NULL) {
328 		warn("%s", file1);
329 		status |= 2;
330 		goto closem;
331 	}
332 
333 	if (flags & D_EMPTY2)
334 		f2 = fopen(_PATH_DEVNULL, "r");
335 	else {
336 		if (!S_ISREG(stb2.st_mode)) {
337 			if ((f2 = opentemp(file2)) == NULL ||
338 			    fstat(fileno(f2), &stb2) < 0) {
339 				warn("%s", file2);
340 				status |= 2;
341 				goto closem;
342 			}
343 		} else if (strcmp(file2, "-") == 0)
344 			f2 = stdin;
345 		else
346 			f2 = fopen(file2, "r");
347 	}
348 	if (f2 == NULL) {
349 		warn("%s", file2);
350 		status |= 2;
351 		goto closem;
352 	}
353 
354 	switch (files_differ(f1, f2, flags)) {
355 	case 0:
356 		goto closem;
357 	case 1:
358 		break;
359 	default:
360 		/* error */
361 		status |= 2;
362 		goto closem;
363 	}
364 
365 	if (!asciifile(f1) || !asciifile(f2)) {
366 		rval = D_BINARY;
367 		status |= 1;
368 		goto closem;
369 	}
370 	if (lflag) {
371 		/* redirect stdout to pr */
372 		int pfd[2];
373 		char *header;
374 		char *prargv[] = { "pr", "-h", NULL, "-f", NULL };
375 
376 		xasprintf(&header, "%s %s %s", diffargs, file1, file2);
377 		prargv[2] = header;
378 		fflush(stdout);
379 		rewind(stdout);
380 		pipe(pfd);
381 		switch ((pid = fork())) {
382 		case -1:
383 			warnx("No more processes");
384 			status |= 2;
385 			xfree(header);
386 			return (D_ERROR);
387 		case 0:
388 			/* child */
389 			if (pfd[0] != STDIN_FILENO) {
390 				dup2(pfd[0], STDIN_FILENO);
391 				close(pfd[0]);
392 			}
393 			close(pfd[1]);
394 			execv(_PATH_PR, prargv);
395 			_exit(127);
396 		default:
397 			/* parent */
398 			if (pfd[1] != STDOUT_FILENO) {
399 				ostdout = dup(STDOUT_FILENO);
400 				dup2(pfd[1], STDOUT_FILENO);
401 				close(pfd[1]);
402 			}
403 			close(pfd[0]);
404 			rewind(stdout);
405 			xfree(header);
406 		}
407 	}
408 	prepare(0, f1, stb1.st_size);
409 	prepare(1, f2, stb2.st_size);
410 	prune();
411 	sort(sfile[0], slen[0]);
412 	sort(sfile[1], slen[1]);
413 
414 	member = (int *)file[1];
415 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
416 	member = xrealloc(member, slen[1] + 2, sizeof(*member));
417 
418 	class = (int *)file[0];
419 	unsort(sfile[0], slen[0], class);
420 	class = xrealloc(class, slen[0] + 2, sizeof(*class));
421 
422 	klist = xmalloc((slen[0] + 2) * sizeof(*klist));
423 	clen = 0;
424 	clistlen = 100;
425 	clist = xmalloc(clistlen * sizeof(*clist));
426 	i = stone(class, slen[0], member, klist);
427 	xfree(member);
428 	xfree(class);
429 
430 	J = xrealloc(J, len[0] + 2, sizeof(*J));
431 	unravel(klist[i]);
432 	xfree(clist);
433 	xfree(klist);
434 
435 	ixold = xrealloc(ixold, len[0] + 2, sizeof(*ixold));
436 	ixnew = xrealloc(ixnew, len[1] + 2, sizeof(*ixnew));
437 	check(file1, f1, file2, f2);
438 	output(file1, f1, file2, f2, (flags & D_HEADER));
439 	if (ostdout != -1) {
440 		int wstatus;
441 
442 		/* close the pipe to pr and restore stdout */
443 		fflush(stdout);
444 		rewind(stdout);
445 		if (ostdout != STDOUT_FILENO) {
446 			close(STDOUT_FILENO);
447 			dup2(ostdout, STDOUT_FILENO);
448 			close(ostdout);
449 		}
450 		waitpid(pid, &wstatus, 0);
451 	}
452 closem:
453 	if (anychange) {
454 		status |= 1;
455 		if (rval == D_SAME)
456 			rval = D_DIFFER;
457 	}
458 	if (f1 != NULL)
459 		fclose(f1);
460 	if (f2 != NULL)
461 		fclose(f2);
462 	if (file1 != ofile1)
463 		xfree(file1);
464 	if (file2 != ofile2)
465 		xfree(file2);
466 	return (rval);
467 }
468 
469 /*
470  * Check to see if the given files differ.
471  * Returns 0 if they are the same, 1 if different, and -1 on error.
472  * XXX - could use code from cmp(1) [faster]
473  */
474 static int
475 files_differ(FILE *f1, FILE *f2, int flags)
476 {
477 	char buf1[BUFSIZ], buf2[BUFSIZ];
478 	size_t i, j;
479 
480 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
481 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
482 		return (1);
483 	for (;;) {
484 		i = fread(buf1, 1, sizeof(buf1), f1);
485 		j = fread(buf2, 1, sizeof(buf2), f2);
486 		if (i != j)
487 			return (1);
488 		if (i == 0 && j == 0) {
489 			if (ferror(f1) || ferror(f2))
490 				return (1);
491 			return (0);
492 		}
493 		if (memcmp(buf1, buf2, i) != 0)
494 			return (1);
495 	}
496 }
497 
498 static FILE *
499 opentemp(const char *file)
500 {
501 	char buf[BUFSIZ], *tempdir, tempfile[MAXPATHLEN];
502 	ssize_t nread;
503 	int ifd, ofd;
504 
505 	if (strcmp(file, "-") == 0)
506 		ifd = STDIN_FILENO;
507 	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
508 		return (NULL);
509 
510 	if ((tempdir = getenv("TMPDIR")) == NULL)
511 		tempdir = _PATH_TMP;
512 
513 	if (strlcpy(tempfile, tempdir, sizeof(tempfile)) >= sizeof(tempfile) ||
514 	    strlcat(tempfile, "/diff.XXXXXXXX", sizeof(tempfile)) >=
515 	    sizeof(tempfile)) {
516 		close(ifd);
517 		errno = ENAMETOOLONG;
518 		return (NULL);
519 	}
520 
521 	if ((ofd = mkstemp(tempfile)) < 0)
522 		return (NULL);
523 	unlink(tempfile);
524 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
525 		if (write(ofd, buf, nread) != nread) {
526 			close(ifd);
527 			close(ofd);
528 			return (NULL);
529 		}
530 	}
531 	close(ifd);
532 	lseek(ofd, (off_t)0, SEEK_SET);
533 	return (fdopen(ofd, "r"));
534 }
535 
536 char *
537 splice(char *dir, char *file)
538 {
539 	char *tail, *buf;
540 
541 	if ((tail = strrchr(file, '/')) == NULL)
542 		tail = file;
543 	else
544 		tail++;
545 	xasprintf(&buf, "%s/%s", dir, tail);
546 	return (buf);
547 }
548 
549 static void
550 prepare(int i, FILE *fd, off_t filesize)
551 {
552 	struct line *p;
553 	int j, h;
554 	size_t sz;
555 
556 	rewind(fd);
557 
558 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
559 	if (sz < 100)
560 		sz = 100;
561 
562 	p = xmalloc((sz + 3) * sizeof(*p));
563 	for (j = 0; (h = readhash(fd));) {
564 		if (j == sz) {
565 			sz = sz * 3 / 2;
566 			p = xrealloc(p, sz + 3, sizeof(*p));
567 		}
568 		p[++j].value = h;
569 	}
570 	len[i] = j;
571 	file[i] = p;
572 }
573 
574 static void
575 prune(void)
576 {
577 	int i, j;
578 
579 	for (pref = 0; pref < len[0] && pref < len[1] &&
580 	    file[0][pref + 1].value == file[1][pref + 1].value;
581 	    pref++)
582 		;
583 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
584 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
585 	    suff++)
586 		;
587 	for (j = 0; j < 2; j++) {
588 		sfile[j] = file[j] + pref;
589 		slen[j] = len[j] - pref - suff;
590 		for (i = 0; i <= slen[j]; i++)
591 			sfile[j][i].serial = i;
592 	}
593 }
594 
595 static void
596 equiv(struct line *a, int n, struct line *b, int m, int *c)
597 {
598 	int i, j;
599 
600 	i = j = 1;
601 	while (i <= n && j <= m) {
602 		if (a[i].value < b[j].value)
603 			a[i++].value = 0;
604 		else if (a[i].value == b[j].value)
605 			a[i++].value = j;
606 		else
607 			j++;
608 	}
609 	while (i <= n)
610 		a[i++].value = 0;
611 	b[m + 1].value = 0;
612 	j = 0;
613 	while (++j <= m) {
614 		c[j] = -b[j].serial;
615 		while (b[j + 1].value == b[j].value) {
616 			j++;
617 			c[j] = b[j].serial;
618 		}
619 	}
620 	c[j] = -1;
621 }
622 
623 /* Code taken from ping.c */
624 static int
625 isqrt(int n)
626 {
627 	int y, x = 1;
628 
629 	if (n == 0)
630 		return (0);
631 
632 	do { /* newton was a stinker */
633 		y = x;
634 		x = n / x;
635 		x += y;
636 		x /= 2;
637 	} while ((x - y) > 1 || (x - y) < -1);
638 
639 	return (x);
640 }
641 
642 static int
643 stone(int *a, int n, int *b, int *c)
644 {
645 	int i, k, y, j, l;
646 	int oldc, tc, oldl;
647 	u_int numtries;
648 
649 	const u_int bound = dflag ? UINT_MAX : MAX(256, isqrt(n));
650 
651 	k = 0;
652 	c[0] = newcand(0, 0, 0);
653 	for (i = 1; i <= n; i++) {
654 		j = a[i];
655 		if (j == 0)
656 			continue;
657 		y = -b[j];
658 		oldl = 0;
659 		oldc = c[0];
660 		numtries = 0;
661 		do {
662 			if (y <= clist[oldc].y)
663 				continue;
664 			l = search(c, k, y);
665 			if (l != oldl + 1)
666 				oldc = c[l - 1];
667 			if (l <= k) {
668 				if (clist[c[l]].y <= y)
669 					continue;
670 				tc = c[l];
671 				c[l] = newcand(i, y, oldc);
672 				oldc = tc;
673 				oldl = l;
674 				numtries++;
675 			} else {
676 				c[l] = newcand(i, y, oldc);
677 				k++;
678 				break;
679 			}
680 		} while ((y = b[++j]) > 0 && numtries < bound);
681 	}
682 	return (k);
683 }
684 
685 static int
686 newcand(int x, int y, int pred)
687 {
688 	struct cand *q;
689 
690 	if (clen == clistlen) {
691 		clistlen = clistlen * 11 / 10;
692 		clist = xrealloc(clist, clistlen, sizeof(*clist));
693 	}
694 	q = clist + clen;
695 	q->x = x;
696 	q->y = y;
697 	q->pred = pred;
698 	return (clen++);
699 }
700 
701 static int
702 search(int *c, int k, int y)
703 {
704 	int i, j, l, t;
705 
706 	if (clist[c[k]].y < y)	/* quick look for typical case */
707 		return (k + 1);
708 	i = 0;
709 	j = k + 1;
710 	while (1) {
711 		l = i + j;
712 		if ((l >>= 1) <= i)
713 			break;
714 		t = clist[c[l]].y;
715 		if (t > y)
716 			j = l;
717 		else if (t < y)
718 			i = l;
719 		else
720 			return (l);
721 	}
722 	return (l + 1);
723 }
724 
725 static void
726 unravel(int p)
727 {
728 	struct cand *q;
729 	int i;
730 
731 	for (i = 0; i <= len[0]; i++)
732 		J[i] = i <= pref ? i :
733 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
734 	for (q = clist + p; q->y != 0; q = clist + q->pred)
735 		J[q->x + pref] = q->y + pref;
736 }
737 
738 /*
739  * Check does double duty:
740  *  1.	ferret out any fortuitous correspondences due
741  *	to confounding by hashing (which result in "jackpot")
742  *  2.  collect random access indexes to the two files
743  */
744 static void
745 check(char *file1, FILE *f1, char *file2, FILE *f2)
746 {
747 	int i, j, jackpot, c, d;
748 	long ctold, ctnew;
749 
750 	rewind(f1);
751 	rewind(f2);
752 	j = 1;
753 	ixold[0] = ixnew[0] = 0;
754 	jackpot = 0;
755 	ctold = ctnew = 0;
756 	for (i = 1; i <= len[0]; i++) {
757 		if (J[i] == 0) {
758 			ixold[i] = ctold += skipline(f1);
759 			continue;
760 		}
761 		while (j < J[i]) {
762 			ixnew[j] = ctnew += skipline(f2);
763 			j++;
764 		}
765 		if (bflag || wflag || iflag) {
766 			for (;;) {
767 				c = getc(f1);
768 				d = getc(f2);
769 				/*
770 				 * GNU diff ignores a missing newline
771 				 * in one file if bflag || wflag.
772 				 */
773 				if ((bflag || wflag) &&
774 				    ((c == EOF && d == '\n') ||
775 				    (c == '\n' && d == EOF))) {
776 					break;
777 				}
778 				ctold++;
779 				ctnew++;
780 				if (bflag && isspace(c) && isspace(d)) {
781 					do {
782 						if (c == '\n')
783 							break;
784 						ctold++;
785 					} while (isspace(c = getc(f1)));
786 					do {
787 						if (d == '\n')
788 							break;
789 						ctnew++;
790 					} while (isspace(d = getc(f2)));
791 				} else if (wflag) {
792 					while (isspace(c) && c != '\n') {
793 						c = getc(f1);
794 						ctold++;
795 					}
796 					while (isspace(d) && d != '\n') {
797 						d = getc(f2);
798 						ctnew++;
799 					}
800 				}
801 				if (chrtran[c] != chrtran[d]) {
802 					jackpot++;
803 					J[i] = 0;
804 					if (c != '\n' && c != EOF)
805 						ctold += skipline(f1);
806 					if (d != '\n' && c != EOF)
807 						ctnew += skipline(f2);
808 					break;
809 				}
810 				if (c == '\n' || c == EOF)
811 					break;
812 			}
813 		} else {
814 			for (;;) {
815 				ctold++;
816 				ctnew++;
817 				if ((c = getc(f1)) != (d = getc(f2))) {
818 					/* jackpot++; */
819 					J[i] = 0;
820 					if (c != '\n' && c != EOF)
821 						ctold += skipline(f1);
822 					if (d != '\n' && c != EOF)
823 						ctnew += skipline(f2);
824 					break;
825 				}
826 				if (c == '\n' || c == EOF)
827 					break;
828 			}
829 		}
830 		ixold[i] = ctold;
831 		ixnew[j] = ctnew;
832 		j++;
833 	}
834 	for (; j <= len[1]; j++)
835 		ixnew[j] = ctnew += skipline(f2);
836 	/*
837 	 * if (jackpot)
838 	 *	fprintf(stderr, "jackpot\n");
839 	 */
840 }
841 
842 /* shellsort CACM #201 */
843 static void
844 sort(struct line *a, int n)
845 {
846 	struct line *ai, *aim, w;
847 	int j, m = 0, k;
848 
849 	if (n == 0)
850 		return;
851 	for (j = 1; j <= n; j *= 2)
852 		m = 2 * j - 1;
853 	for (m /= 2; m != 0; m /= 2) {
854 		k = n - m;
855 		for (j = 1; j <= k; j++) {
856 			for (ai = &a[j]; ai > a; ai -= m) {
857 				aim = &ai[m];
858 				if (aim < ai)
859 					break;	/* wraparound */
860 				if (aim->value > ai[0].value ||
861 				    (aim->value == ai[0].value &&
862 					aim->serial > ai[0].serial))
863 					break;
864 				w.value = ai[0].value;
865 				ai[0].value = aim->value;
866 				aim->value = w.value;
867 				w.serial = ai[0].serial;
868 				ai[0].serial = aim->serial;
869 				aim->serial = w.serial;
870 			}
871 		}
872 	}
873 }
874 
875 static void
876 unsort(struct line *f, int l, int *b)
877 {
878 	int *a, i;
879 
880 	a = xmalloc((l + 1) * sizeof(*a));
881 	for (i = 1; i <= l; i++)
882 		a[f[i].serial] = f[i].value;
883 	for (i = 1; i <= l; i++)
884 		b[i] = a[i];
885 	xfree(a);
886 }
887 
888 static int
889 skipline(FILE *f)
890 {
891 	int i, c;
892 
893 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
894 		continue;
895 	return (i);
896 }
897 
898 static void
899 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
900 {
901 	int m, i0, i1, j0, j1;
902 
903 	rewind(f1);
904 	rewind(f2);
905 	m = len[0];
906 	J[0] = 0;
907 	J[m + 1] = len[1] + 1;
908 	if (format != D_EDIT) {
909 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
910 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
911 				i0++;
912 			j0 = J[i0 - 1] + 1;
913 			i1 = i0 - 1;
914 			while (i1 < m && J[i1 + 1] == 0)
915 				i1++;
916 			j1 = J[i1 + 1] - 1;
917 			J[i1] = j1;
918 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
919 		}
920 	} else {
921 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
922 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
923 				i0--;
924 			j0 = J[i0 + 1] - 1;
925 			i1 = i0 + 1;
926 			while (i1 > 1 && J[i1 - 1] == 0)
927 				i1--;
928 			j1 = J[i1 - 1] + 1;
929 			J[i1] = j1;
930 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
931 		}
932 	}
933 	if (m == 0)
934 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
935 	if (format == D_IFDEF) {
936 		for (;;) {
937 #define	c i0
938 			if ((c = getc(f1)) == EOF)
939 				return;
940 			putchar(c);
941 		}
942 #undef c
943 	}
944 	if (anychange != 0) {
945 		if (format == D_CONTEXT)
946 			dump_context_vec(f1, f2);
947 		else if (format == D_UNIFIED)
948 			dump_unified_vec(f1, f2);
949 	}
950 }
951 
952 static void
953 range(int a, int b, char *separator)
954 {
955 	printf("%d", a > b ? b : a);
956 	if (a < b)
957 		printf("%s%d", separator, b);
958 }
959 
960 static void
961 uni_range(int a, int b)
962 {
963 	if (a < b)
964 		printf("%d,%d", a, b - a + 1);
965 	else if (a == b)
966 		printf("%d", b);
967 	else
968 		printf("%d,0", b);
969 }
970 
971 static char *
972 preadline(int fd, size_t len, off_t off)
973 {
974 	char *line;
975 	ssize_t nr;
976 
977 	line = xmalloc(len + 1);
978 	if ((nr = pread(fd, line, len, off)) < 0)
979 		err(1, "preadline");
980 	if (nr > 0 && line[nr-1] == '\n')
981 		nr--;
982 	line[nr] = '\0';
983 	return (line);
984 }
985 
986 static int
987 ignoreline(char *line)
988 {
989 	int ret;
990 
991 	ret = regexec(&ignore_re, line, 0, NULL, 0);
992 	xfree(line);
993 	return (ret == 0);	/* if it matched, it should be ignored. */
994 }
995 
996 /*
997  * Indicate that there is a difference between lines a and b of the from file
998  * to get to lines c to d of the to file.  If a is greater then b then there
999  * are no lines in the from file involved and this means that there were
1000  * lines appended (beginning at b).  If c is greater than d then there are
1001  * lines missing from the to file.
1002  */
1003 static void
1004 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1005     int *pflags)
1006 {
1007 	static size_t max_context = 64;
1008 	int i;
1009 
1010 restart:
1011 	if (format != D_IFDEF && a > b && c > d)
1012 		return;
1013 	if (ignore_pats != NULL) {
1014 		char *line;
1015 		/*
1016 		 * All lines in the change, insert, or delete must
1017 		 * match an ignore pattern for the change to be
1018 		 * ignored.
1019 		 */
1020 		if (a <= b) {		/* Changes and deletes. */
1021 			for (i = a; i <= b; i++) {
1022 				line = preadline(fileno(f1),
1023 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1024 				if (!ignoreline(line))
1025 					goto proceed;
1026 			}
1027 		}
1028 		if (a > b || c <= d) {	/* Changes and inserts. */
1029 			for (i = c; i <= d; i++) {
1030 				line = preadline(fileno(f2),
1031 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1032 				if (!ignoreline(line))
1033 					goto proceed;
1034 			}
1035 		}
1036 		return;
1037 	}
1038 proceed:
1039 	if (*pflags & D_HEADER) {
1040 		printf("%s %s %s\n", diffargs, file1, file2);
1041 		*pflags &= ~D_HEADER;
1042 	}
1043 	if (format == D_CONTEXT || format == D_UNIFIED) {
1044 		/*
1045 		 * Allocate change records as needed.
1046 		 */
1047 		if (context_vec_ptr == context_vec_end - 1) {
1048 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1049 			max_context <<= 1;
1050 			context_vec_start = xrealloc(context_vec_start,
1051 			    max_context, sizeof(*context_vec_start));
1052 			context_vec_end = context_vec_start + max_context;
1053 			context_vec_ptr = context_vec_start + offset;
1054 		}
1055 		if (anychange == 0) {
1056 			/*
1057 			 * Print the context/unidiff header first time through.
1058 			 */
1059 			print_header(file1, file2);
1060 			anychange = 1;
1061 		} else if (a > context_vec_ptr->b + (2 * context) + 1 &&
1062 		    c > context_vec_ptr->d + (2 * context) + 1) {
1063 			/*
1064 			 * If this change is more than 'context' lines from the
1065 			 * previous change, dump the record and reset it.
1066 			 */
1067 			if (format == D_CONTEXT)
1068 				dump_context_vec(f1, f2);
1069 			else
1070 				dump_unified_vec(f1, f2);
1071 		}
1072 		context_vec_ptr++;
1073 		context_vec_ptr->a = a;
1074 		context_vec_ptr->b = b;
1075 		context_vec_ptr->c = c;
1076 		context_vec_ptr->d = d;
1077 		return;
1078 	}
1079 	if (anychange == 0)
1080 		anychange = 1;
1081 	switch (format) {
1082 
1083 	case D_BRIEF:
1084 		return;
1085 	case D_NORMAL:
1086 	case D_EDIT:
1087 		range(a, b, ",");
1088 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1089 		if (format == D_NORMAL)
1090 			range(c, d, ",");
1091 		putchar('\n');
1092 		break;
1093 	case D_REVERSE:
1094 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1095 		range(a, b, " ");
1096 		putchar('\n');
1097 		break;
1098 	case D_NREVERSE:
1099 		if (a > b)
1100 			printf("a%d %d\n", b, d - c + 1);
1101 		else {
1102 			printf("d%d %d\n", a, b - a + 1);
1103 			if (!(c > d))
1104 				/* add changed lines */
1105 				printf("a%d %d\n", b, d - c + 1);
1106 		}
1107 		break;
1108 	}
1109 	if (format == D_NORMAL || format == D_IFDEF) {
1110 		fetch(ixold, a, b, f1, '<', 1);
1111 		if (a <= b && c <= d && format == D_NORMAL)
1112 			puts("---");
1113 	}
1114 	i = fetch(ixnew, c, d, f2, format == D_NORMAL ? '>' : '\0', 0);
1115 	if (i != 0 && format == D_EDIT) {
1116 		/*
1117 		 * A non-zero return value for D_EDIT indicates that the
1118 		 * last line printed was a bare dot (".") that has been
1119 		 * escaped as ".." to prevent ed(1) from misinterpreting
1120 		 * it.  We have to add a substitute command to change this
1121 		 * back and restart where we left off.
1122 		 */
1123 		puts(".");
1124 		printf("%ds/^\\.\\././\n", a);
1125 		a += i;
1126 		c += i;
1127 		goto restart;
1128 	}
1129 	if ((format == D_EDIT || format == D_REVERSE) && c <= d)
1130 		puts(".");
1131 	if (inifdef) {
1132 		printf("#endif /* %s */\n", ifdefname);
1133 		inifdef = 0;
1134 	}
1135 }
1136 
1137 static int
1138 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile)
1139 {
1140 	int i, j, c, lastc, col, nc;
1141 
1142 	/*
1143 	 * When doing #ifdef's, copy down to current line
1144 	 * if this is the first file, so that stuff makes it to output.
1145 	 */
1146 	if (format == D_IFDEF && oldfile) {
1147 		long curpos = ftell(lb);
1148 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1149 		nc = f[a > b ? b : a - 1] - curpos;
1150 		for (i = 0; i < nc; i++)
1151 			putchar(getc(lb));
1152 	}
1153 	if (a > b)
1154 		return (0);
1155 	if (format == D_IFDEF) {
1156 		if (inifdef) {
1157 			printf("#else /* %s%s */\n",
1158 			    oldfile == 1 ? "!" : "", ifdefname);
1159 		} else {
1160 			if (oldfile)
1161 				printf("#ifndef %s\n", ifdefname);
1162 			else
1163 				printf("#ifdef %s\n", ifdefname);
1164 		}
1165 		inifdef = 1 + oldfile;
1166 	}
1167 	for (i = a; i <= b; i++) {
1168 		fseek(lb, f[i - 1], SEEK_SET);
1169 		nc = f[i] - f[i - 1];
1170 		if (format != D_IFDEF && ch != '\0') {
1171 			putchar(ch);
1172 			if (Tflag && (format == D_NORMAL || format == D_CONTEXT
1173 			    || format == D_UNIFIED))
1174 				putchar('\t');
1175 			else if (format != D_UNIFIED)
1176 				putchar(' ');
1177 		}
1178 		col = 0;
1179 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1180 			if ((c = getc(lb)) == EOF) {
1181 				if (format == D_EDIT || format == D_REVERSE ||
1182 				    format == D_NREVERSE)
1183 					warnx("No newline at end of file");
1184 				else
1185 					puts("\n\\ No newline at end of file");
1186 				return (0);
1187 			}
1188 			if (c == '\t' && tflag) {
1189 				do {
1190 					putchar(' ');
1191 				} while (++col & 7);
1192 			} else {
1193 				if (format == D_EDIT && j == 1 && c == '\n'
1194 				    && lastc == '.') {
1195 					/*
1196 					 * Don't print a bare "." line
1197 					 * since that will confuse ed(1).
1198 					 * Print ".." instead and return,
1199 					 * giving the caller an offset
1200 					 * from which to restart.
1201 					 */
1202 					puts(".");
1203 					return (i - a + 1);
1204 				}
1205 				putchar(c);
1206 				col++;
1207 			}
1208 		}
1209 	}
1210 	return (0);
1211 }
1212 
1213 /*
1214  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1215  */
1216 static int
1217 readhash(FILE *f)
1218 {
1219 	int i, t, space;
1220 	int sum;
1221 
1222 	sum = 1;
1223 	space = 0;
1224 	if (!bflag && !wflag) {
1225 		if (iflag)
1226 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1227 				if (t == EOF) {
1228 					if (i == 0)
1229 						return (0);
1230 					break;
1231 				}
1232 				sum = sum * 127 + chrtran[t];
1233 			}
1234 		else
1235 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1236 				if (t == EOF) {
1237 					if (i == 0)
1238 						return (0);
1239 					break;
1240 				}
1241 				sum = sum * 127 + t;
1242 			}
1243 	} else {
1244 		for (i = 0;;) {
1245 			switch (t = getc(f)) {
1246 			case '\t':
1247 			case '\r':
1248 			case '\v':
1249 			case '\f':
1250 			case ' ':
1251 				space++;
1252 				continue;
1253 			default:
1254 				if (space && !wflag) {
1255 					i++;
1256 					space = 0;
1257 				}
1258 				sum = sum * 127 + chrtran[t];
1259 				i++;
1260 				continue;
1261 			case EOF:
1262 				if (i == 0)
1263 					return (0);
1264 				/* FALLTHROUGH */
1265 			case '\n':
1266 				break;
1267 			}
1268 			break;
1269 		}
1270 	}
1271 	/*
1272 	 * There is a remote possibility that we end up with a zero sum.
1273 	 * Zero is used as an EOF marker, so return 1 instead.
1274 	 */
1275 	return (sum == 0 ? 1 : sum);
1276 }
1277 
1278 static int
1279 asciifile(FILE *f)
1280 {
1281 	unsigned char buf[BUFSIZ];
1282 	int i, cnt;
1283 
1284 	if (aflag || f == NULL)
1285 		return (1);
1286 
1287 	rewind(f);
1288 	cnt = fread(buf, 1, sizeof(buf), f);
1289 	for (i = 0; i < cnt; i++)
1290 		if (!isprint(buf[i]) && !isspace(buf[i]))
1291 			return (0);
1292 	return (1);
1293 }
1294 
1295 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1296 
1297 static char *
1298 match_function(const long *f, int pos, FILE *file)
1299 {
1300 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1301 	size_t nc;
1302 	int last = lastline;
1303 	char *state = NULL;
1304 
1305 	lastline = pos;
1306 	while (pos > last) {
1307 		fseek(file, f[pos - 1], SEEK_SET);
1308 		nc = f[pos] - f[pos - 1];
1309 		if (nc >= sizeof(buf))
1310 			nc = sizeof(buf) - 1;
1311 		nc = fread(buf, 1, nc, file);
1312 		if (nc > 0) {
1313 			buf[nc] = '\0';
1314 			buf[strcspn(buf, "\n")] = '\0';
1315 
1316 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1317 				if (begins_with(buf, "private:")) {
1318 					if (!state)
1319 						state = " (private)";
1320 				} else if (begins_with(buf, "protected:")) {
1321 					if (!state)
1322 						state = " (protected)";
1323 				} else if (begins_with(buf, "public:")) {
1324 					if (!state)
1325 						state = " (public)";
1326 				} else {
1327 					strlcpy(lastbuf, buf, sizeof lastbuf);
1328 					if (state)
1329 						strlcat(lastbuf, state,
1330 						    sizeof lastbuf);
1331 					lastmatchline = pos;
1332 					return lastbuf;
1333 				}
1334 			}
1335 		}
1336 		pos--;
1337 	}
1338 	return lastmatchline > 0 ? lastbuf : NULL;
1339 }
1340 
1341 /* dump accumulated "context" diff changes */
1342 static void
1343 dump_context_vec(FILE *f1, FILE *f2)
1344 {
1345 	struct context_vec *cvp = context_vec_start;
1346 	int lowa, upb, lowc, upd, do_output;
1347 	int a, b, c, d;
1348 	char ch, *f;
1349 
1350 	if (context_vec_start > context_vec_ptr)
1351 		return;
1352 
1353 	b = d = 0;		/* gcc */
1354 	lowa = MAX(1, cvp->a - context);
1355 	upb = MIN(len[0], context_vec_ptr->b + context);
1356 	lowc = MAX(1, cvp->c - context);
1357 	upd = MIN(len[1], context_vec_ptr->d + context);
1358 
1359 	printf("***************");
1360 	if (pflag) {
1361 		f = match_function(ixold, lowa-1, f1);
1362 		if (f != NULL) {
1363 			putchar(' ');
1364 			fputs(f, stdout);
1365 		}
1366 	}
1367 	printf("\n*** ");
1368 	range(lowa, upb, ",");
1369 	printf(" ****\n");
1370 
1371 	/*
1372 	 * Output changes to the "old" file.  The first loop suppresses
1373 	 * output if there were no changes to the "old" file (we'll see
1374 	 * the "old" lines as context in the "new" list).
1375 	 */
1376 	do_output = 0;
1377 	for (; cvp <= context_vec_ptr; cvp++)
1378 		if (cvp->a <= cvp->b) {
1379 			cvp = context_vec_start;
1380 			do_output++;
1381 			break;
1382 		}
1383 	if (do_output) {
1384 		while (cvp <= context_vec_ptr) {
1385 			a = cvp->a;
1386 			b = cvp->b;
1387 			c = cvp->c;
1388 			d = cvp->d;
1389 
1390 			if (a <= b && c <= d)
1391 				ch = 'c';
1392 			else
1393 				ch = (a <= b) ? 'd' : 'a';
1394 
1395 			if (ch == 'a')
1396 				fetch(ixold, lowa, b, f1, ' ', 0);
1397 			else {
1398 				fetch(ixold, lowa, a - 1, f1, ' ', 0);
1399 				fetch(ixold, a, b, f1,
1400 				    ch == 'c' ? '!' : '-', 0);
1401 			}
1402 			lowa = b + 1;
1403 			cvp++;
1404 		}
1405 		fetch(ixold, b + 1, upb, f1, ' ', 0);
1406 	}
1407 	/* output changes to the "new" file */
1408 	printf("--- ");
1409 	range(lowc, upd, ",");
1410 	printf(" ----\n");
1411 
1412 	do_output = 0;
1413 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1414 		if (cvp->c <= cvp->d) {
1415 			cvp = context_vec_start;
1416 			do_output++;
1417 			break;
1418 		}
1419 	if (do_output) {
1420 		while (cvp <= context_vec_ptr) {
1421 			a = cvp->a;
1422 			b = cvp->b;
1423 			c = cvp->c;
1424 			d = cvp->d;
1425 
1426 			if (a <= b && c <= d)
1427 				ch = 'c';
1428 			else
1429 				ch = (a <= b) ? 'd' : 'a';
1430 
1431 			if (ch == 'd')
1432 				fetch(ixnew, lowc, d, f2, ' ', 0);
1433 			else {
1434 				fetch(ixnew, lowc, c - 1, f2, ' ', 0);
1435 				fetch(ixnew, c, d, f2,
1436 				    ch == 'c' ? '!' : '+', 0);
1437 			}
1438 			lowc = d + 1;
1439 			cvp++;
1440 		}
1441 		fetch(ixnew, d + 1, upd, f2, ' ', 0);
1442 	}
1443 	context_vec_ptr = context_vec_start - 1;
1444 }
1445 
1446 /* dump accumulated "unified" diff changes */
1447 static void
1448 dump_unified_vec(FILE *f1, FILE *f2)
1449 {
1450 	struct context_vec *cvp = context_vec_start;
1451 	int lowa, upb, lowc, upd;
1452 	int a, b, c, d;
1453 	char ch, *f;
1454 
1455 	if (context_vec_start > context_vec_ptr)
1456 		return;
1457 
1458 	b = d = 0;		/* gcc */
1459 	lowa = MAX(1, cvp->a - context);
1460 	upb = MIN(len[0], context_vec_ptr->b + context);
1461 	lowc = MAX(1, cvp->c - context);
1462 	upd = MIN(len[1], context_vec_ptr->d + context);
1463 
1464 	fputs("@@ -", stdout);
1465 	uni_range(lowa, upb);
1466 	fputs(" +", stdout);
1467 	uni_range(lowc, upd);
1468 	fputs(" @@", stdout);
1469 	if (pflag) {
1470 		f = match_function(ixold, lowa-1, f1);
1471 		if (f != NULL) {
1472 			putchar(' ');
1473 			fputs(f, stdout);
1474 		}
1475 	}
1476 	putchar('\n');
1477 
1478 	/*
1479 	 * Output changes in "unified" diff format--the old and new lines
1480 	 * are printed together.
1481 	 */
1482 	for (; cvp <= context_vec_ptr; cvp++) {
1483 		a = cvp->a;
1484 		b = cvp->b;
1485 		c = cvp->c;
1486 		d = cvp->d;
1487 
1488 		/*
1489 		 * c: both new and old changes
1490 		 * d: only changes in the old file
1491 		 * a: only changes in the new file
1492 		 */
1493 		if (a <= b && c <= d)
1494 			ch = 'c';
1495 		else
1496 			ch = (a <= b) ? 'd' : 'a';
1497 
1498 		switch (ch) {
1499 		case 'c':
1500 			fetch(ixold, lowa, a - 1, f1, ' ', 0);
1501 			fetch(ixold, a, b, f1, '-', 0);
1502 			fetch(ixnew, c, d, f2, '+', 0);
1503 			break;
1504 		case 'd':
1505 			fetch(ixold, lowa, a - 1, f1, ' ', 0);
1506 			fetch(ixold, a, b, f1, '-', 0);
1507 			break;
1508 		case 'a':
1509 			fetch(ixnew, lowc, c - 1, f2, ' ', 0);
1510 			fetch(ixnew, c, d, f2, '+', 0);
1511 			break;
1512 		}
1513 		lowa = b + 1;
1514 		lowc = d + 1;
1515 	}
1516 	fetch(ixnew, d + 1, upd, f2, ' ', 0);
1517 
1518 	context_vec_ptr = context_vec_start - 1;
1519 }
1520 
1521 static void
1522 print_header(const char *file1, const char *file2)
1523 {
1524 	if (label[0] != NULL)
1525 		printf("%s %s\n", format == D_CONTEXT ? "***" : "---",
1526 		    label[0]);
1527 	else
1528 		printf("%s %s\t%s", format == D_CONTEXT ? "***" : "---",
1529 		    file1, ctime(&stb1.st_mtime));
1530 	if (label[1] != NULL)
1531 		printf("%s %s\n", format == D_CONTEXT ? "---" : "+++",
1532 		    label[1]);
1533 	else
1534 		printf("%s %s\t%s", format == D_CONTEXT ? "---" : "+++",
1535 		    file2, ctime(&stb2.st_mtime));
1536 }
1537