xref: /openbsd-src/usr.bin/diff/diffreg.c (revision aa215433f75a08be5b26380d5ae69bb662cf630a)
1 /*	$OpenBSD: diffreg.c,v 1.69 2007/06/09 05:16:21 ray Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #ifndef lint
68 static const char rcsid[] = "$OpenBSD: diffreg.c,v 1.69 2007/06/09 05:16:21 ray Exp $";
69 #endif /* not lint */
70 
71 #include <sys/param.h>
72 #include <sys/stat.h>
73 #include <sys/wait.h>
74 
75 #include <ctype.h>
76 #include <err.h>
77 #include <errno.h>
78 #include <fcntl.h>
79 #include <stddef.h>
80 #include <stdio.h>
81 #include <stdlib.h>
82 #include <string.h>
83 #include <unistd.h>
84 
85 #include "diff.h"
86 #include "pathnames.h"
87 #include "xmalloc.h"
88 
89 /*
90  * diff - compare two files.
91  */
92 
93 /*
94  *	Uses an algorithm due to Harold Stone, which finds
95  *	a pair of longest identical subsequences in the two
96  *	files.
97  *
98  *	The major goal is to generate the match vector J.
99  *	J[i] is the index of the line in file1 corresponding
100  *	to line i file0. J[i] = 0 if there is no
101  *	such line in file1.
102  *
103  *	Lines are hashed so as to work in core. All potential
104  *	matches are located by sorting the lines of each file
105  *	on the hash (called ``value''). In particular, this
106  *	collects the equivalence classes in file1 together.
107  *	Subroutine equiv replaces the value of each line in
108  *	file0 by the index of the first element of its
109  *	matching equivalence in (the reordered) file1.
110  *	To save space equiv squeezes file1 into a single
111  *	array member in which the equivalence classes
112  *	are simply concatenated, except that their first
113  *	members are flagged by changing sign.
114  *
115  *	Next the indices that point into member are unsorted into
116  *	array class according to the original order of file0.
117  *
118  *	The cleverness lies in routine stone. This marches
119  *	through the lines of file0, developing a vector klist
120  *	of "k-candidates". At step i a k-candidate is a matched
121  *	pair of lines x,y (x in file0 y in file1) such that
122  *	there is a common subsequence of length k
123  *	between the first i lines of file0 and the first y
124  *	lines of file1, but there is no such subsequence for
125  *	any smaller y. x is the earliest possible mate to y
126  *	that occurs in such a subsequence.
127  *
128  *	Whenever any of the members of the equivalence class of
129  *	lines in file1 matable to a line in file0 has serial number
130  *	less than the y of some k-candidate, that k-candidate
131  *	with the smallest such y is replaced. The new
132  *	k-candidate is chained (via pred) to the current
133  *	k-1 candidate so that the actual subsequence can
134  *	be recovered. When a member has serial number greater
135  *	that the y of all k-candidates, the klist is extended.
136  *	At the end, the longest subsequence is pulled out
137  *	and placed in the array J by unravel
138  *
139  *	With J in hand, the matches there recorded are
140  *	check'ed against reality to assure that no spurious
141  *	matches have crept in due to hashing. If they have,
142  *	they are broken, and "jackpot" is recorded--a harmless
143  *	matter except that a true match for a spuriously
144  *	mated line may now be unnecessarily reported as a change.
145  *
146  *	Much of the complexity of the program comes simply
147  *	from trying to minimize core utilization and
148  *	maximize the range of doable problems by dynamically
149  *	allocating what is needed and reusing what is not.
150  *	The core requirements for problems larger than somewhat
151  *	are (in words) 2*length(file0) + length(file1) +
152  *	3*(number of k-candidates installed),  typically about
153  *	6n words for files of length n.
154  */
155 
156 struct cand {
157 	int	x;
158 	int	y;
159 	int	pred;
160 };
161 
162 struct line {
163 	int	serial;
164 	int	value;
165 } *file[2];
166 
167 /*
168  * The following struct is used to record change information when
169  * doing a "context" or "unified" diff.  (see routine "change" to
170  * understand the highly mnemonic field names)
171  */
172 struct context_vec {
173 	int	a;		/* start line in old file */
174 	int	b;		/* end line in old file */
175 	int	c;		/* start line in new file */
176 	int	d;		/* end line in new file */
177 };
178 
179 static FILE	*opentemp(const char *);
180 static void	 output(char *, FILE *, char *, FILE *, int);
181 static void	 check(char *, FILE *, char *, FILE *);
182 static void	 range(int, int, char *);
183 static void	 uni_range(int, int);
184 static void	 dump_context_vec(FILE *, FILE *);
185 static void	 dump_unified_vec(FILE *, FILE *);
186 static void	 prepare(int, FILE *, off_t);
187 static void	 prune(void);
188 static void	 equiv(struct line *, int, struct line *, int, int *);
189 static void	 unravel(int);
190 static void	 unsort(struct line *, int, int *);
191 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
192 static void	 sort(struct line *, int);
193 static void	 print_header(const char *, const char *);
194 static int	 ignoreline(char *);
195 static int	 asciifile(FILE *);
196 static int	 fetch(long *, int, int, FILE *, int, int);
197 static int	 newcand(int, int, int);
198 static int	 search(int *, int, int);
199 static int	 skipline(FILE *);
200 static int	 isqrt(int);
201 static int	 stone(int *, int, int *, int *);
202 static int	 readhash(FILE *);
203 static int	 files_differ(FILE *, FILE *, int);
204 static char	*match_function(const long *, int, FILE *);
205 static char	*preadline(int, size_t, off_t);
206 
207 static int  *J;			/* will be overlaid on class */
208 static int  *class;		/* will be overlaid on file[0] */
209 static int  *klist;		/* will be overlaid on file[0] after class */
210 static int  *member;		/* will be overlaid on file[1] */
211 static int   clen;
212 static int   inifdef;		/* whether or not we are in a #ifdef block */
213 static int   len[2];
214 static int   pref, suff;	/* length of prefix and suffix */
215 static int   slen[2];
216 static int   anychange;
217 static long *ixnew;		/* will be overlaid on file[1] */
218 static long *ixold;		/* will be overlaid on klist */
219 static struct cand *clist;	/* merely a free storage pot for candidates */
220 static int   clistlen;		/* the length of clist */
221 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
222 static u_char *chrtran;		/* translation table for case-folding */
223 static struct context_vec *context_vec_start;
224 static struct context_vec *context_vec_end;
225 static struct context_vec *context_vec_ptr;
226 
227 #define FUNCTION_CONTEXT_SIZE	55
228 static char lastbuf[FUNCTION_CONTEXT_SIZE];
229 static int lastline;
230 static int lastmatchline;
231 
232 
233 /*
234  * chrtran points to one of 2 translation tables: cup2low if folding upper to
235  * lower case clow2low if not folding case
236  */
237 u_char clow2low[256] = {
238 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
239 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
240 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
241 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
242 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
243 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
244 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
245 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
246 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
247 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
248 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
249 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
250 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
251 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
252 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
253 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
254 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
255 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
256 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
257 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
258 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
259 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
260 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
261 	0xfd, 0xfe, 0xff
262 };
263 
264 u_char cup2low[256] = {
265 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
266 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
267 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
268 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
269 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
270 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
271 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
272 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
273 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
274 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
275 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
276 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
277 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
278 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
279 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
280 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
281 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
282 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
283 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
284 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
285 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
286 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
287 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
288 	0xfd, 0xfe, 0xff
289 };
290 
291 int
292 diffreg(char *ofile1, char *ofile2, int flags)
293 {
294 	char *file1 = ofile1;
295 	char *file2 = ofile2;
296 	FILE *f1 = NULL;
297 	FILE *f2 = NULL;
298 	int rval = D_SAME;
299 	int i, ostdout = -1;
300 	pid_t pid = -1;
301 
302 	anychange = 0;
303 	lastline = 0;
304 	lastmatchline = 0;
305 	context_vec_ptr = context_vec_start - 1;
306 	chrtran = (iflag ? cup2low : clow2low);
307 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
308 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
309 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
310 		goto closem;
311 
312 	if (flags & D_EMPTY1)
313 		f1 = fopen(_PATH_DEVNULL, "r");
314 	else {
315 		if (!S_ISREG(stb1.st_mode)) {
316 			if ((f1 = opentemp(file1)) == NULL ||
317 			    fstat(fileno(f1), &stb1) < 0) {
318 				warn("%s", file1);
319 				status |= 2;
320 				goto closem;
321 			}
322 		} else if (strcmp(file1, "-") == 0)
323 			f1 = stdin;
324 		else
325 			f1 = fopen(file1, "r");
326 	}
327 	if (f1 == NULL) {
328 		warn("%s", file1);
329 		status |= 2;
330 		goto closem;
331 	}
332 
333 	if (flags & D_EMPTY2)
334 		f2 = fopen(_PATH_DEVNULL, "r");
335 	else {
336 		if (!S_ISREG(stb2.st_mode)) {
337 			if ((f2 = opentemp(file2)) == NULL ||
338 			    fstat(fileno(f2), &stb2) < 0) {
339 				warn("%s", file2);
340 				status |= 2;
341 				goto closem;
342 			}
343 		} else if (strcmp(file2, "-") == 0)
344 			f2 = stdin;
345 		else
346 			f2 = fopen(file2, "r");
347 	}
348 	if (f2 == NULL) {
349 		warn("%s", file2);
350 		status |= 2;
351 		goto closem;
352 	}
353 
354 	switch (files_differ(f1, f2, flags)) {
355 	case 0:
356 		goto closem;
357 	case 1:
358 		break;
359 	default:
360 		/* error */
361 		status |= 2;
362 		goto closem;
363 	}
364 
365 	if (!asciifile(f1) || !asciifile(f2)) {
366 		rval = D_BINARY;
367 		status |= 1;
368 		goto closem;
369 	}
370 	if (lflag) {
371 		/* redirect stdout to pr */
372 		int pfd[2];
373 		char *header;
374 		char *prargv[] = { "pr", "-h", NULL, "-f", NULL };
375 
376 		xasprintf(&header, "%s %s %s", diffargs, file1, file2);
377 		prargv[2] = header;
378 		fflush(stdout);
379 		rewind(stdout);
380 		pipe(pfd);
381 		switch ((pid = fork())) {
382 		case -1:
383 			warnx("No more processes");
384 			status |= 2;
385 			xfree(header);
386 			return (D_ERROR);
387 		case 0:
388 			/* child */
389 			if (pfd[0] != STDIN_FILENO) {
390 				dup2(pfd[0], STDIN_FILENO);
391 				close(pfd[0]);
392 			}
393 			close(pfd[1]);
394 			execv(_PATH_PR, prargv);
395 			_exit(127);
396 		default:
397 			/* parent */
398 			if (pfd[1] != STDOUT_FILENO) {
399 				ostdout = dup(STDOUT_FILENO);
400 				dup2(pfd[1], STDOUT_FILENO);
401 				close(pfd[1]);
402 			}
403 			close(pfd[0]);
404 			rewind(stdout);
405 			xfree(header);
406 		}
407 	}
408 	prepare(0, f1, stb1.st_size);
409 	prepare(1, f2, stb2.st_size);
410 	prune();
411 	sort(sfile[0], slen[0]);
412 	sort(sfile[1], slen[1]);
413 
414 	member = (int *)file[1];
415 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
416 	member = xrealloc(member, slen[1] + 2, sizeof(*member));
417 
418 	class = (int *)file[0];
419 	unsort(sfile[0], slen[0], class);
420 	class = xrealloc(class, slen[0] + 2, sizeof(*class));
421 
422 	klist = xmalloc((slen[0] + 2) * sizeof(*klist));
423 	clen = 0;
424 	clistlen = 100;
425 	clist = xmalloc(clistlen * sizeof(*clist));
426 	i = stone(class, slen[0], member, klist);
427 	xfree(member);
428 	xfree(class);
429 
430 	J = xrealloc(J, len[0] + 2, sizeof(*J));
431 	unravel(klist[i]);
432 	xfree(clist);
433 	xfree(klist);
434 
435 	ixold = xrealloc(ixold, len[0] + 2, sizeof(*ixold));
436 	ixnew = xrealloc(ixnew, len[1] + 2, sizeof(*ixnew));
437 	check(file1, f1, file2, f2);
438 	output(file1, f1, file2, f2, (flags & D_HEADER));
439 	if (ostdout != -1) {
440 		int wstatus;
441 
442 		/* close the pipe to pr and restore stdout */
443 		fflush(stdout);
444 		rewind(stdout);
445 		if (ostdout != STDOUT_FILENO) {
446 			close(STDOUT_FILENO);
447 			dup2(ostdout, STDOUT_FILENO);
448 			close(ostdout);
449 		}
450 		waitpid(pid, &wstatus, 0);
451 	}
452 closem:
453 	if (anychange) {
454 		status |= 1;
455 		if (rval == D_SAME)
456 			rval = D_DIFFER;
457 	}
458 	if (f1 != NULL)
459 		fclose(f1);
460 	if (f2 != NULL)
461 		fclose(f2);
462 	if (file1 != ofile1)
463 		xfree(file1);
464 	if (file2 != ofile2)
465 		xfree(file2);
466 	return (rval);
467 }
468 
469 /*
470  * Check to see if the given files differ.
471  * Returns 0 if they are the same, 1 if different, and -1 on error.
472  * XXX - could use code from cmp(1) [faster]
473  */
474 static int
475 files_differ(FILE *f1, FILE *f2, int flags)
476 {
477 	char buf1[BUFSIZ], buf2[BUFSIZ];
478 	size_t i, j;
479 
480 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
481 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
482 		return (1);
483 	for (;;) {
484 		i = fread(buf1, 1, sizeof(buf1), f1);
485 		j = fread(buf2, 1, sizeof(buf2), f2);
486 		if (i != j)
487 			return (1);
488 		if (i == 0 && j == 0) {
489 			if (ferror(f1) || ferror(f2))
490 				return (1);
491 			return (0);
492 		}
493 		if (memcmp(buf1, buf2, i) != 0)
494 			return (1);
495 	}
496 }
497 
498 static FILE *
499 opentemp(const char *file)
500 {
501 	char buf[BUFSIZ], *tempdir, tempfile[MAXPATHLEN];
502 	ssize_t nread;
503 	int ifd, ofd;
504 
505 	if (strcmp(file, "-") == 0)
506 		ifd = STDIN_FILENO;
507 	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
508 		return (NULL);
509 
510 	if ((tempdir = getenv("TMPDIR")) == NULL)
511 		tempdir = _PATH_TMP;
512 
513 	if (strlcpy(tempfile, tempdir, sizeof(tempfile)) >= sizeof(tempfile) ||
514 	    strlcat(tempfile, "/diff.XXXXXXXX", sizeof(tempfile)) >=
515 	    sizeof(tempfile)) {
516 		close(ifd);
517 		errno = ENAMETOOLONG;
518 		return (NULL);
519 	}
520 
521 	if ((ofd = mkstemp(tempfile)) < 0)
522 		return (NULL);
523 	unlink(tempfile);
524 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
525 		if (write(ofd, buf, nread) != nread) {
526 			close(ifd);
527 			close(ofd);
528 			return (NULL);
529 		}
530 	}
531 	close(ifd);
532 	lseek(ofd, (off_t)0, SEEK_SET);
533 	return (fdopen(ofd, "r"));
534 }
535 
536 char *
537 splice(char *dir, char *file)
538 {
539 	char *tail, *buf;
540 
541 	if ((tail = strrchr(file, '/')) == NULL)
542 		tail = file;
543 	else
544 		tail++;
545 	xasprintf(&buf, "%s/%s", dir, tail);
546 	return (buf);
547 }
548 
549 static void
550 prepare(int i, FILE *fd, off_t filesize)
551 {
552 	struct line *p;
553 	int j, h;
554 	size_t sz;
555 
556 	rewind(fd);
557 
558 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
559 	if (sz < 100)
560 		sz = 100;
561 
562 	p = xmalloc((sz + 3) * sizeof(*p));
563 	for (j = 0; (h = readhash(fd));) {
564 		if (j == sz) {
565 			sz = sz * 3 / 2;
566 			p = xrealloc(p, sz + 3, sizeof(*p));
567 		}
568 		p[++j].value = h;
569 	}
570 	len[i] = j;
571 	file[i] = p;
572 }
573 
574 static void
575 prune(void)
576 {
577 	int i, j;
578 
579 	for (pref = 0; pref < len[0] && pref < len[1] &&
580 	    file[0][pref + 1].value == file[1][pref + 1].value;
581 	    pref++)
582 		;
583 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
584 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
585 	    suff++)
586 		;
587 	for (j = 0; j < 2; j++) {
588 		sfile[j] = file[j] + pref;
589 		slen[j] = len[j] - pref - suff;
590 		for (i = 0; i <= slen[j]; i++)
591 			sfile[j][i].serial = i;
592 	}
593 }
594 
595 static void
596 equiv(struct line *a, int n, struct line *b, int m, int *c)
597 {
598 	int i, j;
599 
600 	i = j = 1;
601 	while (i <= n && j <= m) {
602 		if (a[i].value < b[j].value)
603 			a[i++].value = 0;
604 		else if (a[i].value == b[j].value)
605 			a[i++].value = j;
606 		else
607 			j++;
608 	}
609 	while (i <= n)
610 		a[i++].value = 0;
611 	b[m + 1].value = 0;
612 	j = 0;
613 	while (++j <= m) {
614 		c[j] = -b[j].serial;
615 		while (b[j + 1].value == b[j].value) {
616 			j++;
617 			c[j] = b[j].serial;
618 		}
619 	}
620 	c[j] = -1;
621 }
622 
623 /* Code taken from ping.c */
624 static int
625 isqrt(int n)
626 {
627 	int y, x = 1;
628 
629 	if (n == 0)
630 		return (0);
631 
632 	do { /* newton was a stinker */
633 		y = x;
634 		x = n / x;
635 		x += y;
636 		x /= 2;
637 	} while ((x - y) > 1 || (x - y) < -1);
638 
639 	return (x);
640 }
641 
642 static int
643 stone(int *a, int n, int *b, int *c)
644 {
645 	int i, k, y, j, l;
646 	int oldc, tc, oldl;
647 	u_int numtries;
648 
649 	const u_int bound = dflag ? UINT_MAX : MAX(256, isqrt(n));
650 
651 	k = 0;
652 	c[0] = newcand(0, 0, 0);
653 	for (i = 1; i <= n; i++) {
654 		j = a[i];
655 		if (j == 0)
656 			continue;
657 		y = -b[j];
658 		oldl = 0;
659 		oldc = c[0];
660 		numtries = 0;
661 		do {
662 			if (y <= clist[oldc].y)
663 				continue;
664 			l = search(c, k, y);
665 			if (l != oldl + 1)
666 				oldc = c[l - 1];
667 			if (l <= k) {
668 				if (clist[c[l]].y <= y)
669 					continue;
670 				tc = c[l];
671 				c[l] = newcand(i, y, oldc);
672 				oldc = tc;
673 				oldl = l;
674 				numtries++;
675 			} else {
676 				c[l] = newcand(i, y, oldc);
677 				k++;
678 				break;
679 			}
680 		} while ((y = b[++j]) > 0 && numtries < bound);
681 	}
682 	return (k);
683 }
684 
685 static int
686 newcand(int x, int y, int pred)
687 {
688 	struct cand *q;
689 
690 	if (clen == clistlen) {
691 		clistlen = clistlen * 11 / 10;
692 		clist = xrealloc(clist, clistlen, sizeof(*clist));
693 	}
694 	q = clist + clen;
695 	q->x = x;
696 	q->y = y;
697 	q->pred = pred;
698 	return (clen++);
699 }
700 
701 static int
702 search(int *c, int k, int y)
703 {
704 	int i, j, l, t;
705 
706 	if (clist[c[k]].y < y)	/* quick look for typical case */
707 		return (k + 1);
708 	i = 0;
709 	j = k + 1;
710 	while (1) {
711 		l = i + j;
712 		if ((l >>= 1) <= i)
713 			break;
714 		t = clist[c[l]].y;
715 		if (t > y)
716 			j = l;
717 		else if (t < y)
718 			i = l;
719 		else
720 			return (l);
721 	}
722 	return (l + 1);
723 }
724 
725 static void
726 unravel(int p)
727 {
728 	struct cand *q;
729 	int i;
730 
731 	for (i = 0; i <= len[0]; i++)
732 		J[i] = i <= pref ? i :
733 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
734 	for (q = clist + p; q->y != 0; q = clist + q->pred)
735 		J[q->x + pref] = q->y + pref;
736 }
737 
738 /*
739  * Check does double duty:
740  *  1.	ferret out any fortuitous correspondences due
741  *	to confounding by hashing (which result in "jackpot")
742  *  2.  collect random access indexes to the two files
743  */
744 static void
745 check(char *file1, FILE *f1, char *file2, FILE *f2)
746 {
747 	int i, j, jackpot, c, d;
748 	long ctold, ctnew;
749 
750 	rewind(f1);
751 	rewind(f2);
752 	j = 1;
753 	ixold[0] = ixnew[0] = 0;
754 	jackpot = 0;
755 	ctold = ctnew = 0;
756 	for (i = 1; i <= len[0]; i++) {
757 		if (J[i] == 0) {
758 			ixold[i] = ctold += skipline(f1);
759 			continue;
760 		}
761 		while (j < J[i]) {
762 			ixnew[j] = ctnew += skipline(f2);
763 			j++;
764 		}
765 		if (bflag || wflag || iflag) {
766 			for (;;) {
767 				c = getc(f1);
768 				d = getc(f2);
769 				/*
770 				 * GNU diff ignores a missing newline
771 				 * in one file if bflag || wflag.
772 				 */
773 				if ((bflag || wflag) &&
774 				    ((c == EOF && d == '\n') ||
775 				    (c == '\n' && d == EOF))) {
776 					break;
777 				}
778 				ctold++;
779 				ctnew++;
780 				if (bflag && isspace(c) && isspace(d)) {
781 					do {
782 						if (c == '\n')
783 							break;
784 						ctold++;
785 					} while (isspace(c = getc(f1)));
786 					do {
787 						if (d == '\n')
788 							break;
789 						ctnew++;
790 					} while (isspace(d = getc(f2)));
791 				} else if (wflag) {
792 					while (isspace(c) && c != '\n') {
793 						c = getc(f1);
794 						ctold++;
795 					}
796 					while (isspace(d) && d != '\n') {
797 						d = getc(f2);
798 						ctnew++;
799 					}
800 				}
801 				if (chrtran[c] != chrtran[d]) {
802 					jackpot++;
803 					J[i] = 0;
804 					if (c != '\n' && c != EOF)
805 						ctold += skipline(f1);
806 					if (d != '\n' && c != EOF)
807 						ctnew += skipline(f2);
808 					break;
809 				}
810 				if (c == '\n' || c == EOF)
811 					break;
812 			}
813 		} else {
814 			for (;;) {
815 				ctold++;
816 				ctnew++;
817 				if ((c = getc(f1)) != (d = getc(f2))) {
818 					/* jackpot++; */
819 					J[i] = 0;
820 					if (c != '\n' && c != EOF)
821 						ctold += skipline(f1);
822 					if (d != '\n' && c != EOF)
823 						ctnew += skipline(f2);
824 					break;
825 				}
826 				if (c == '\n' || c == EOF)
827 					break;
828 			}
829 		}
830 		ixold[i] = ctold;
831 		ixnew[j] = ctnew;
832 		j++;
833 	}
834 	for (; j <= len[1]; j++)
835 		ixnew[j] = ctnew += skipline(f2);
836 	/*
837 	 * if (jackpot)
838 	 *	fprintf(stderr, "jackpot\n");
839 	 */
840 }
841 
842 /* shellsort CACM #201 */
843 static void
844 sort(struct line *a, int n)
845 {
846 	struct line *ai, *aim, w;
847 	int j, m = 0, k;
848 
849 	if (n == 0)
850 		return;
851 	for (j = 1; j <= n; j *= 2)
852 		m = 2 * j - 1;
853 	for (m /= 2; m != 0; m /= 2) {
854 		k = n - m;
855 		for (j = 1; j <= k; j++) {
856 			for (ai = &a[j]; ai > a; ai -= m) {
857 				aim = &ai[m];
858 				if (aim < ai)
859 					break;	/* wraparound */
860 				if (aim->value > ai[0].value ||
861 				    (aim->value == ai[0].value &&
862 					aim->serial > ai[0].serial))
863 					break;
864 				w.value = ai[0].value;
865 				ai[0].value = aim->value;
866 				aim->value = w.value;
867 				w.serial = ai[0].serial;
868 				ai[0].serial = aim->serial;
869 				aim->serial = w.serial;
870 			}
871 		}
872 	}
873 }
874 
875 static void
876 unsort(struct line *f, int l, int *b)
877 {
878 	int *a, i;
879 
880 	a = xmalloc((l + 1) * sizeof(*a));
881 	for (i = 1; i <= l; i++)
882 		a[f[i].serial] = f[i].value;
883 	for (i = 1; i <= l; i++)
884 		b[i] = a[i];
885 	xfree(a);
886 }
887 
888 static int
889 skipline(FILE *f)
890 {
891 	int i, c;
892 
893 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
894 		continue;
895 	return (i);
896 }
897 
898 static void
899 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
900 {
901 	int m, i0, i1, j0, j1;
902 
903 	rewind(f1);
904 	rewind(f2);
905 	m = len[0];
906 	J[0] = 0;
907 	J[m + 1] = len[1] + 1;
908 	if (format != D_EDIT) {
909 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
910 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
911 				i0++;
912 			j0 = J[i0 - 1] + 1;
913 			i1 = i0 - 1;
914 			while (i1 < m && J[i1 + 1] == 0)
915 				i1++;
916 			j1 = J[i1 + 1] - 1;
917 			J[i1] = j1;
918 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
919 		}
920 	} else {
921 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
922 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
923 				i0--;
924 			j0 = J[i0 + 1] - 1;
925 			i1 = i0 + 1;
926 			while (i1 > 1 && J[i1 - 1] == 0)
927 				i1--;
928 			j1 = J[i1 - 1] + 1;
929 			J[i1] = j1;
930 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
931 		}
932 	}
933 	if (m == 0)
934 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
935 	if (format == D_IFDEF) {
936 		for (;;) {
937 #define	c i0
938 			if ((c = getc(f1)) == EOF)
939 				return;
940 			putchar(c);
941 		}
942 #undef c
943 	}
944 	if (anychange != 0) {
945 		if (format == D_CONTEXT)
946 			dump_context_vec(f1, f2);
947 		else if (format == D_UNIFIED)
948 			dump_unified_vec(f1, f2);
949 	}
950 }
951 
952 static void
953 range(int a, int b, char *separator)
954 {
955 	printf("%d", a > b ? b : a);
956 	if (a < b)
957 		printf("%s%d", separator, b);
958 }
959 
960 static void
961 uni_range(int a, int b)
962 {
963 	if (a < b)
964 		printf("%d,%d", a, b - a + 1);
965 	else if (a == b)
966 		printf("%d", b);
967 	else
968 		printf("%d,0", b);
969 }
970 
971 static char *
972 preadline(int fd, size_t len, off_t off)
973 {
974 	char *line;
975 	ssize_t nr;
976 
977 	line = xmalloc(len + 1);
978 	if ((nr = pread(fd, line, len, off)) < 0)
979 		err(1, "preadline");
980 	if (nr > 0 && line[nr-1] == '\n')
981 		nr--;
982 	line[nr] = '\0';
983 	return (line);
984 }
985 
986 static int
987 ignoreline(char *line)
988 {
989 	int ret;
990 
991 	ret = regexec(&ignore_re, line, 0, NULL, 0);
992 	xfree(line);
993 	return (ret == 0);	/* if it matched, it should be ignored. */
994 }
995 
996 /*
997  * Indicate that there is a difference between lines a and b of the from file
998  * to get to lines c to d of the to file.  If a is greater then b then there
999  * are no lines in the from file involved and this means that there were
1000  * lines appended (beginning at b).  If c is greater than d then there are
1001  * lines missing from the to file.
1002  */
1003 static void
1004 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1005     int *pflags)
1006 {
1007 	static size_t max_context = 64;
1008 	int i;
1009 
1010 restart:
1011 	if (format != D_IFDEF && a > b && c > d)
1012 		return;
1013 	if (ignore_pats != NULL) {
1014 		char *line;
1015 		/*
1016 		 * All lines in the change, insert, or delete must
1017 		 * match an ignore pattern for the change to be
1018 		 * ignored.
1019 		 */
1020 		if (a <= b) {		/* Changes and deletes. */
1021 			for (i = a; i <= b; i++) {
1022 				line = preadline(fileno(f1),
1023 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1024 				if (!ignoreline(line))
1025 					goto proceed;
1026 			}
1027 		}
1028 		if (a > b || c <= d) {	/* Changes and inserts. */
1029 			for (i = c; i <= d; i++) {
1030 				line = preadline(fileno(f2),
1031 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1032 				if (!ignoreline(line))
1033 					goto proceed;
1034 			}
1035 		}
1036 		return;
1037 	}
1038 proceed:
1039 	if (*pflags & D_HEADER) {
1040 		printf("%s %s %s\n", diffargs, file1, file2);
1041 		*pflags &= ~D_HEADER;
1042 	}
1043 	if (format == D_CONTEXT || format == D_UNIFIED) {
1044 		/*
1045 		 * Allocate change records as needed.
1046 		 */
1047 		if (context_vec_ptr == context_vec_end - 1) {
1048 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1049 			max_context <<= 1;
1050 			context_vec_start = xrealloc(context_vec_start,
1051 			    max_context, sizeof(*context_vec_start));
1052 			context_vec_end = context_vec_start + max_context;
1053 			context_vec_ptr = context_vec_start + offset;
1054 		}
1055 		if (anychange == 0) {
1056 			/*
1057 			 * Print the context/unidiff header first time through.
1058 			 */
1059 			print_header(file1, file2);
1060 			anychange = 1;
1061 		} else if (a > context_vec_ptr->b + (2 * context) + 1 &&
1062 		    c > context_vec_ptr->d + (2 * context) + 1) {
1063 			/*
1064 			 * If this change is more than 'context' lines from the
1065 			 * previous change, dump the record and reset it.
1066 			 */
1067 			if (format == D_CONTEXT)
1068 				dump_context_vec(f1, f2);
1069 			else
1070 				dump_unified_vec(f1, f2);
1071 		}
1072 		context_vec_ptr++;
1073 		context_vec_ptr->a = a;
1074 		context_vec_ptr->b = b;
1075 		context_vec_ptr->c = c;
1076 		context_vec_ptr->d = d;
1077 		return;
1078 	}
1079 	if (anychange == 0)
1080 		anychange = 1;
1081 	switch (format) {
1082 
1083 	case D_BRIEF:
1084 		return;
1085 	case D_NORMAL:
1086 	case D_EDIT:
1087 		range(a, b, ",");
1088 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1089 		if (format == D_NORMAL)
1090 			range(c, d, ",");
1091 		putchar('\n');
1092 		break;
1093 	case D_REVERSE:
1094 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1095 		range(a, b, " ");
1096 		putchar('\n');
1097 		break;
1098 	case D_NREVERSE:
1099 		if (a > b)
1100 			printf("a%d %d\n", b, d - c + 1);
1101 		else {
1102 			printf("d%d %d\n", a, b - a + 1);
1103 			if (!(c > d))
1104 				/* add changed lines */
1105 				printf("a%d %d\n", b, d - c + 1);
1106 		}
1107 		break;
1108 	}
1109 	if (format == D_NORMAL || format == D_IFDEF) {
1110 		fetch(ixold, a, b, f1, '<', 1);
1111 		if (a <= b && c <= d && format == D_NORMAL)
1112 			puts("---");
1113 	}
1114 	i = fetch(ixnew, c, d, f2, format == D_NORMAL ? '>' : '\0', 0);
1115 	if (i != 0 && format == D_EDIT) {
1116 		/*
1117 		 * A non-zero return value for D_EDIT indicates that the
1118 		 * last line printed was a bare dot (".") that has been
1119 		 * escaped as ".." to prevent ed(1) from misinterpreting
1120 		 * it.  We have to add a substitute command to change this
1121 		 * back and restart where we left off.
1122 		 */
1123 		puts(".");
1124 		printf("%ds/^\\.\\././\n", a);
1125 		a += i;
1126 		c += i;
1127 		goto restart;
1128 	}
1129 	if ((format == D_EDIT || format == D_REVERSE) && c <= d)
1130 		puts(".");
1131 	if (inifdef) {
1132 		printf("#endif /* %s */\n", ifdefname);
1133 		inifdef = 0;
1134 	}
1135 }
1136 
1137 static int
1138 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile)
1139 {
1140 	int i, j, c, lastc, col, nc;
1141 
1142 	/*
1143 	 * When doing #ifdef's, copy down to current line
1144 	 * if this is the first file, so that stuff makes it to output.
1145 	 */
1146 	if (format == D_IFDEF && oldfile) {
1147 		long curpos = ftell(lb);
1148 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1149 		nc = f[a > b ? b : a - 1] - curpos;
1150 		for (i = 0; i < nc; i++)
1151 			putchar(getc(lb));
1152 	}
1153 	if (a > b)
1154 		return (0);
1155 	if (format == D_IFDEF) {
1156 		if (inifdef) {
1157 			printf("#else /* %s%s */\n",
1158 			    oldfile == 1 ? "!" : "", ifdefname);
1159 		} else {
1160 			if (oldfile)
1161 				printf("#ifndef %s\n", ifdefname);
1162 			else
1163 				printf("#ifdef %s\n", ifdefname);
1164 		}
1165 		inifdef = 1 + oldfile;
1166 	}
1167 	for (i = a; i <= b; i++) {
1168 		fseek(lb, f[i - 1], SEEK_SET);
1169 		nc = f[i] - f[i - 1];
1170 		if (format != D_IFDEF && ch != '\0') {
1171 			putchar(ch);
1172 			if (Tflag && (format == D_NORMAL || format == D_CONTEXT
1173 			    || format == D_UNIFIED))
1174 				putchar('\t');
1175 			else if (format != D_UNIFIED)
1176 				putchar(' ');
1177 		}
1178 		col = 0;
1179 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1180 			if ((c = getc(lb)) == EOF) {
1181 				if (format == D_EDIT || format == D_REVERSE ||
1182 				    format == D_NREVERSE)
1183 					warnx("No newline at end of file");
1184 				else
1185 					puts("\n\\ No newline at end of file");
1186 				return (0);
1187 			}
1188 			if (c == '\t' && tflag) {
1189 				do {
1190 					putchar(' ');
1191 				} while (++col & 7);
1192 			} else {
1193 				if (format == D_EDIT && j == 1 && c == '\n'
1194 				    && lastc == '.') {
1195 					/*
1196 					 * Don't print a bare "." line
1197 					 * since that will confuse ed(1).
1198 					 * Print ".." instead and return,
1199 					 * giving the caller an offset
1200 					 * from which to restart.
1201 					 */
1202 					puts(".");
1203 					return (i - a + 1);
1204 				}
1205 				putchar(c);
1206 				col++;
1207 			}
1208 		}
1209 	}
1210 	return (0);
1211 }
1212 
1213 /*
1214  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1215  */
1216 static int
1217 readhash(FILE *f)
1218 {
1219 	int i, t, space;
1220 	int sum;
1221 
1222 	sum = 1;
1223 	space = 0;
1224 	if (!bflag && !wflag) {
1225 		if (iflag)
1226 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1227 				if (t == EOF) {
1228 					if (i == 0)
1229 						return (0);
1230 					break;
1231 				}
1232 				sum = sum * 127 + chrtran[t];
1233 			}
1234 		else
1235 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1236 				if (t == EOF) {
1237 					if (i == 0)
1238 						return (0);
1239 					break;
1240 				}
1241 				sum = sum * 127 + t;
1242 			}
1243 	} else {
1244 		for (i = 0;;) {
1245 			switch (t = getc(f)) {
1246 			case '\t':
1247 			case '\r':
1248 			case '\v':
1249 			case '\f':
1250 			case ' ':
1251 				space++;
1252 				continue;
1253 			default:
1254 				if (space && !wflag) {
1255 					i++;
1256 					space = 0;
1257 				}
1258 				sum = sum * 127 + chrtran[t];
1259 				i++;
1260 				continue;
1261 			case EOF:
1262 				if (i == 0)
1263 					return (0);
1264 				/* FALLTHROUGH */
1265 			case '\n':
1266 				break;
1267 			}
1268 			break;
1269 		}
1270 	}
1271 	/*
1272 	 * There is a remote possibility that we end up with a zero sum.
1273 	 * Zero is used as an EOF marker, so return 1 instead.
1274 	 */
1275 	return (sum == 0 ? 1 : sum);
1276 }
1277 
1278 static int
1279 asciifile(FILE *f)
1280 {
1281 	unsigned char buf[BUFSIZ];
1282 	int i, cnt;
1283 
1284 	if (aflag || f == NULL)
1285 		return (1);
1286 
1287 	rewind(f);
1288 	cnt = fread(buf, 1, sizeof(buf), f);
1289 	for (i = 0; i < cnt; i++)
1290 		if (!isprint(buf[i]) && !isspace(buf[i]))
1291 			return (0);
1292 	return (1);
1293 }
1294 
1295 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1296 
1297 static char *
1298 match_function(const long *f, int pos, FILE *file)
1299 {
1300 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1301 	size_t nc;
1302 	int last = lastline;
1303 	char *p;
1304 	char *state = NULL;
1305 
1306 	lastline = pos;
1307 	while (pos > last) {
1308 		fseek(file, f[pos - 1], SEEK_SET);
1309 		nc = f[pos] - f[pos - 1];
1310 		if (nc >= sizeof(buf))
1311 			nc = sizeof(buf) - 1;
1312 		nc = fread(buf, 1, nc, file);
1313 		if (nc > 0) {
1314 			buf[nc] = '\0';
1315 			p = strchr(buf, '\n');
1316 			if (p != NULL)
1317 				*p = '\0';
1318 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1319 				if (begins_with(buf, "private:")) {
1320 					if (!state)
1321 						state = " (private)";
1322 				} else if (begins_with(buf, "protected:")) {
1323 					if (!state)
1324 						state = " (protected)";
1325 				} else if (begins_with(buf, "public:")) {
1326 					if (!state)
1327 						state = " (public)";
1328 				} else {
1329 					strlcpy(lastbuf, buf, sizeof lastbuf);
1330 					if (state)
1331 						strlcat(lastbuf, state,
1332 						    sizeof lastbuf);
1333 					lastmatchline = pos;
1334 					return lastbuf;
1335 				}
1336 			}
1337 		}
1338 		pos--;
1339 	}
1340 	return lastmatchline > 0 ? lastbuf : NULL;
1341 }
1342 
1343 /* dump accumulated "context" diff changes */
1344 static void
1345 dump_context_vec(FILE *f1, FILE *f2)
1346 {
1347 	struct context_vec *cvp = context_vec_start;
1348 	int lowa, upb, lowc, upd, do_output;
1349 	int a, b, c, d;
1350 	char ch, *f;
1351 
1352 	if (context_vec_start > context_vec_ptr)
1353 		return;
1354 
1355 	b = d = 0;		/* gcc */
1356 	lowa = MAX(1, cvp->a - context);
1357 	upb = MIN(len[0], context_vec_ptr->b + context);
1358 	lowc = MAX(1, cvp->c - context);
1359 	upd = MIN(len[1], context_vec_ptr->d + context);
1360 
1361 	printf("***************");
1362 	if (pflag) {
1363 		f = match_function(ixold, lowa-1, f1);
1364 		if (f != NULL) {
1365 			putchar(' ');
1366 			fputs(f, stdout);
1367 		}
1368 	}
1369 	printf("\n*** ");
1370 	range(lowa, upb, ",");
1371 	printf(" ****\n");
1372 
1373 	/*
1374 	 * Output changes to the "old" file.  The first loop suppresses
1375 	 * output if there were no changes to the "old" file (we'll see
1376 	 * the "old" lines as context in the "new" list).
1377 	 */
1378 	do_output = 0;
1379 	for (; cvp <= context_vec_ptr; cvp++)
1380 		if (cvp->a <= cvp->b) {
1381 			cvp = context_vec_start;
1382 			do_output++;
1383 			break;
1384 		}
1385 	if (do_output) {
1386 		while (cvp <= context_vec_ptr) {
1387 			a = cvp->a;
1388 			b = cvp->b;
1389 			c = cvp->c;
1390 			d = cvp->d;
1391 
1392 			if (a <= b && c <= d)
1393 				ch = 'c';
1394 			else
1395 				ch = (a <= b) ? 'd' : 'a';
1396 
1397 			if (ch == 'a')
1398 				fetch(ixold, lowa, b, f1, ' ', 0);
1399 			else {
1400 				fetch(ixold, lowa, a - 1, f1, ' ', 0);
1401 				fetch(ixold, a, b, f1,
1402 				    ch == 'c' ? '!' : '-', 0);
1403 			}
1404 			lowa = b + 1;
1405 			cvp++;
1406 		}
1407 		fetch(ixold, b + 1, upb, f1, ' ', 0);
1408 	}
1409 	/* output changes to the "new" file */
1410 	printf("--- ");
1411 	range(lowc, upd, ",");
1412 	printf(" ----\n");
1413 
1414 	do_output = 0;
1415 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1416 		if (cvp->c <= cvp->d) {
1417 			cvp = context_vec_start;
1418 			do_output++;
1419 			break;
1420 		}
1421 	if (do_output) {
1422 		while (cvp <= context_vec_ptr) {
1423 			a = cvp->a;
1424 			b = cvp->b;
1425 			c = cvp->c;
1426 			d = cvp->d;
1427 
1428 			if (a <= b && c <= d)
1429 				ch = 'c';
1430 			else
1431 				ch = (a <= b) ? 'd' : 'a';
1432 
1433 			if (ch == 'd')
1434 				fetch(ixnew, lowc, d, f2, ' ', 0);
1435 			else {
1436 				fetch(ixnew, lowc, c - 1, f2, ' ', 0);
1437 				fetch(ixnew, c, d, f2,
1438 				    ch == 'c' ? '!' : '+', 0);
1439 			}
1440 			lowc = d + 1;
1441 			cvp++;
1442 		}
1443 		fetch(ixnew, d + 1, upd, f2, ' ', 0);
1444 	}
1445 	context_vec_ptr = context_vec_start - 1;
1446 }
1447 
1448 /* dump accumulated "unified" diff changes */
1449 static void
1450 dump_unified_vec(FILE *f1, FILE *f2)
1451 {
1452 	struct context_vec *cvp = context_vec_start;
1453 	int lowa, upb, lowc, upd;
1454 	int a, b, c, d;
1455 	char ch, *f;
1456 
1457 	if (context_vec_start > context_vec_ptr)
1458 		return;
1459 
1460 	b = d = 0;		/* gcc */
1461 	lowa = MAX(1, cvp->a - context);
1462 	upb = MIN(len[0], context_vec_ptr->b + context);
1463 	lowc = MAX(1, cvp->c - context);
1464 	upd = MIN(len[1], context_vec_ptr->d + context);
1465 
1466 	fputs("@@ -", stdout);
1467 	uni_range(lowa, upb);
1468 	fputs(" +", stdout);
1469 	uni_range(lowc, upd);
1470 	fputs(" @@", stdout);
1471 	if (pflag) {
1472 		f = match_function(ixold, lowa-1, f1);
1473 		if (f != NULL) {
1474 			putchar(' ');
1475 			fputs(f, stdout);
1476 		}
1477 	}
1478 	putchar('\n');
1479 
1480 	/*
1481 	 * Output changes in "unified" diff format--the old and new lines
1482 	 * are printed together.
1483 	 */
1484 	for (; cvp <= context_vec_ptr; cvp++) {
1485 		a = cvp->a;
1486 		b = cvp->b;
1487 		c = cvp->c;
1488 		d = cvp->d;
1489 
1490 		/*
1491 		 * c: both new and old changes
1492 		 * d: only changes in the old file
1493 		 * a: only changes in the new file
1494 		 */
1495 		if (a <= b && c <= d)
1496 			ch = 'c';
1497 		else
1498 			ch = (a <= b) ? 'd' : 'a';
1499 
1500 		switch (ch) {
1501 		case 'c':
1502 			fetch(ixold, lowa, a - 1, f1, ' ', 0);
1503 			fetch(ixold, a, b, f1, '-', 0);
1504 			fetch(ixnew, c, d, f2, '+', 0);
1505 			break;
1506 		case 'd':
1507 			fetch(ixold, lowa, a - 1, f1, ' ', 0);
1508 			fetch(ixold, a, b, f1, '-', 0);
1509 			break;
1510 		case 'a':
1511 			fetch(ixnew, lowc, c - 1, f2, ' ', 0);
1512 			fetch(ixnew, c, d, f2, '+', 0);
1513 			break;
1514 		}
1515 		lowa = b + 1;
1516 		lowc = d + 1;
1517 	}
1518 	fetch(ixnew, d + 1, upd, f2, ' ', 0);
1519 
1520 	context_vec_ptr = context_vec_start - 1;
1521 }
1522 
1523 static void
1524 print_header(const char *file1, const char *file2)
1525 {
1526 	if (label[0] != NULL)
1527 		printf("%s %s\n", format == D_CONTEXT ? "***" : "---",
1528 		    label[0]);
1529 	else
1530 		printf("%s %s\t%s", format == D_CONTEXT ? "***" : "---",
1531 		    file1, ctime(&stb1.st_mtime));
1532 	if (label[1] != NULL)
1533 		printf("%s %s\n", format == D_CONTEXT ? "---" : "+++",
1534 		    label[1]);
1535 	else
1536 		printf("%s %s\t%s", format == D_CONTEXT ? "---" : "+++",
1537 		    file2, ctime(&stb2.st_mtime));
1538 }
1539