xref: /openbsd-src/usr.bin/diff/diffreg.c (revision 94fd4554194a14f126fba33b837cc68a1df42468)
1 /*	$OpenBSD: diffreg.c,v 1.67 2007/03/18 21:12:27 espie Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #ifndef lint
68 static const char rcsid[] = "$OpenBSD: diffreg.c,v 1.67 2007/03/18 21:12:27 espie Exp $";
69 #endif /* not lint */
70 
71 #include <sys/param.h>
72 #include <sys/stat.h>
73 #include <sys/wait.h>
74 
75 #include <ctype.h>
76 #include <err.h>
77 #include <errno.h>
78 #include <fcntl.h>
79 #include <stddef.h>
80 #include <stdio.h>
81 #include <stdlib.h>
82 #include <string.h>
83 #include <unistd.h>
84 
85 #include "diff.h"
86 #include "pathnames.h"
87 
88 /*
89  * diff - compare two files.
90  */
91 
92 /*
93  *	Uses an algorithm due to Harold Stone, which finds
94  *	a pair of longest identical subsequences in the two
95  *	files.
96  *
97  *	The major goal is to generate the match vector J.
98  *	J[i] is the index of the line in file1 corresponding
99  *	to line i file0. J[i] = 0 if there is no
100  *	such line in file1.
101  *
102  *	Lines are hashed so as to work in core. All potential
103  *	matches are located by sorting the lines of each file
104  *	on the hash (called ``value''). In particular, this
105  *	collects the equivalence classes in file1 together.
106  *	Subroutine equiv replaces the value of each line in
107  *	file0 by the index of the first element of its
108  *	matching equivalence in (the reordered) file1.
109  *	To save space equiv squeezes file1 into a single
110  *	array member in which the equivalence classes
111  *	are simply concatenated, except that their first
112  *	members are flagged by changing sign.
113  *
114  *	Next the indices that point into member are unsorted into
115  *	array class according to the original order of file0.
116  *
117  *	The cleverness lies in routine stone. This marches
118  *	through the lines of file0, developing a vector klist
119  *	of "k-candidates". At step i a k-candidate is a matched
120  *	pair of lines x,y (x in file0 y in file1) such that
121  *	there is a common subsequence of length k
122  *	between the first i lines of file0 and the first y
123  *	lines of file1, but there is no such subsequence for
124  *	any smaller y. x is the earliest possible mate to y
125  *	that occurs in such a subsequence.
126  *
127  *	Whenever any of the members of the equivalence class of
128  *	lines in file1 matable to a line in file0 has serial number
129  *	less than the y of some k-candidate, that k-candidate
130  *	with the smallest such y is replaced. The new
131  *	k-candidate is chained (via pred) to the current
132  *	k-1 candidate so that the actual subsequence can
133  *	be recovered. When a member has serial number greater
134  *	that the y of all k-candidates, the klist is extended.
135  *	At the end, the longest subsequence is pulled out
136  *	and placed in the array J by unravel
137  *
138  *	With J in hand, the matches there recorded are
139  *	check'ed against reality to assure that no spurious
140  *	matches have crept in due to hashing. If they have,
141  *	they are broken, and "jackpot" is recorded--a harmless
142  *	matter except that a true match for a spuriously
143  *	mated line may now be unnecessarily reported as a change.
144  *
145  *	Much of the complexity of the program comes simply
146  *	from trying to minimize core utilization and
147  *	maximize the range of doable problems by dynamically
148  *	allocating what is needed and reusing what is not.
149  *	The core requirements for problems larger than somewhat
150  *	are (in words) 2*length(file0) + length(file1) +
151  *	3*(number of k-candidates installed),  typically about
152  *	6n words for files of length n.
153  */
154 
155 struct cand {
156 	int x;
157 	int y;
158 	int pred;
159 };
160 
161 struct line {
162 	int serial;
163 	int value;
164 } *file[2];
165 
166 /*
167  * The following struct is used to record change information when
168  * doing a "context" or "unified" diff.  (see routine "change" to
169  * understand the highly mnemonic field names)
170  */
171 struct context_vec {
172 	int a;			/* start line in old file */
173 	int b;			/* end line in old file */
174 	int c;			/* start line in new file */
175 	int d;			/* end line in new file */
176 };
177 
178 static int  *J;			/* will be overlaid on class */
179 static int  *class;		/* will be overlaid on file[0] */
180 static int  *klist;		/* will be overlaid on file[0] after class */
181 static int  *member;		/* will be overlaid on file[1] */
182 static int   clen;
183 static int   inifdef;		/* whether or not we are in a #ifdef block */
184 static int   len[2];
185 static int   pref, suff;	/* length of prefix and suffix */
186 static int   slen[2];
187 static int   anychange;
188 static long *ixnew;		/* will be overlaid on file[1] */
189 static long *ixold;		/* will be overlaid on klist */
190 static struct cand *clist;	/* merely a free storage pot for candidates */
191 static int   clistlen;		/* the length of clist */
192 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
193 static u_char *chrtran;		/* translation table for case-folding */
194 static struct context_vec *context_vec_start;
195 static struct context_vec *context_vec_end;
196 static struct context_vec *context_vec_ptr;
197 
198 #define FUNCTION_CONTEXT_SIZE	55
199 static char lastbuf[FUNCTION_CONTEXT_SIZE];
200 static int lastline;
201 static int lastmatchline;
202 
203 static FILE *opentemp(const char *);
204 static void output(char *, FILE *, char *, FILE *, int);
205 static void check(char *, FILE *, char *, FILE *);
206 static void range(int, int, char *);
207 static void uni_range(int, int);
208 static void dump_context_vec(FILE *, FILE *);
209 static void dump_unified_vec(FILE *, FILE *);
210 static void prepare(int, FILE *, off_t);
211 static void prune(void);
212 static void equiv(struct line *, int, struct line *, int, int *);
213 static void unravel(int);
214 static void unsort(struct line *, int, int *);
215 static void change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
216 static void sort(struct line *, int);
217 static void print_header(const char *, const char *);
218 static int  ignoreline(char *);
219 static int  asciifile(FILE *);
220 static int  fetch(long *, int, int, FILE *, int, int);
221 static int  newcand(int, int, int);
222 static int  search(int *, int, int);
223 static int  skipline(FILE *);
224 static int  isqrt(int);
225 static int  stone(int *, int, int *, int *);
226 static int  readhash(FILE *);
227 static int  files_differ(FILE *, FILE *, int);
228 static __inline int min(int, int);
229 static __inline int max(int, int);
230 static char *match_function(const long *, int, FILE *);
231 static char *preadline(int, size_t, off_t);
232 
233 
234 /*
235  * chrtran points to one of 2 translation tables: cup2low if folding upper to
236  * lower case clow2low if not folding case
237  */
238 u_char clow2low[256] = {
239 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
240 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
241 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
242 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
243 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
244 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
245 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
246 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
247 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
248 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
249 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
250 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
251 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
252 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
253 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
254 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
255 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
256 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
257 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
258 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
259 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
260 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
261 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
262 	0xfd, 0xfe, 0xff
263 };
264 
265 u_char cup2low[256] = {
266 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
267 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
268 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
269 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
270 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
271 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
272 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
273 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
274 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
275 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
276 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
277 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
278 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
279 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
280 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
281 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
282 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
283 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
284 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
285 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
286 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
287 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
288 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
289 	0xfd, 0xfe, 0xff
290 };
291 
292 int
293 diffreg(char *ofile1, char *ofile2, int flags)
294 {
295 	char *file1 = ofile1;
296 	char *file2 = ofile2;
297 	FILE *f1 = NULL;
298 	FILE *f2 = NULL;
299 	int rval = D_SAME;
300 	int i, ostdout = -1;
301 	pid_t pid = -1;
302 
303 	anychange = 0;
304 	lastline = 0;
305 	lastmatchline = 0;
306 	context_vec_ptr = context_vec_start - 1;
307 	chrtran = (iflag ? cup2low : clow2low);
308 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
309 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
310 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
311 		goto closem;
312 
313 	if (flags & D_EMPTY1)
314 		f1 = fopen(_PATH_DEVNULL, "r");
315 	else {
316 		if (!S_ISREG(stb1.st_mode)) {
317 			if ((f1 = opentemp(file1)) == NULL ||
318 			    fstat(fileno(f1), &stb1) < 0) {
319 				warn("%s", file1);
320 				status |= 2;
321 				goto closem;
322 			}
323 		} else if (strcmp(file1, "-") == 0)
324 			f1 = stdin;
325 		else
326 			f1 = fopen(file1, "r");
327 	}
328 	if (f1 == NULL) {
329 		warn("%s", file1);
330 		status |= 2;
331 		goto closem;
332 	}
333 
334 	if (flags & D_EMPTY2)
335 		f2 = fopen(_PATH_DEVNULL, "r");
336 	else {
337 		if (!S_ISREG(stb2.st_mode)) {
338 			if ((f2 = opentemp(file2)) == NULL ||
339 			    fstat(fileno(f2), &stb2) < 0) {
340 				warn("%s", file2);
341 				status |= 2;
342 				goto closem;
343 			}
344 		} else if (strcmp(file2, "-") == 0)
345 			f2 = stdin;
346 		else
347 			f2 = fopen(file2, "r");
348 	}
349 	if (f2 == NULL) {
350 		warn("%s", file2);
351 		status |= 2;
352 		goto closem;
353 	}
354 
355 	switch (files_differ(f1, f2, flags)) {
356 	case 0:
357 		goto closem;
358 	case 1:
359 		break;
360 	default:
361 		/* error */
362 		status |= 2;
363 		goto closem;
364 	}
365 
366 	if (!asciifile(f1) || !asciifile(f2)) {
367 		rval = D_BINARY;
368 		status |= 1;
369 		goto closem;
370 	}
371 	if (lflag) {
372 		/* redirect stdout to pr */
373 		int pfd[2];
374 		char *header;
375 		char *prargv[] = { "pr", "-h", NULL, "-f", NULL };
376 
377 		easprintf(&header, "%s %s %s", diffargs, file1, file2);
378 		prargv[2] = header;
379 		fflush(stdout);
380 		rewind(stdout);
381 		pipe(pfd);
382 		switch ((pid = fork())) {
383 		case -1:
384 			warnx("No more processes");
385 			status |= 2;
386 			free(header);
387 			return (D_ERROR);
388 		case 0:
389 			/* child */
390 			if (pfd[0] != STDIN_FILENO) {
391 				dup2(pfd[0], STDIN_FILENO);
392 				close(pfd[0]);
393 			}
394 			close(pfd[1]);
395 			execv(_PATH_PR, prargv);
396 			_exit(127);
397 		default:
398 			/* parent */
399 			if (pfd[1] != STDOUT_FILENO) {
400 				ostdout = dup(STDOUT_FILENO);
401 				dup2(pfd[1], STDOUT_FILENO);
402 				close(pfd[1]);
403 			}
404 			close(pfd[0]);
405 			rewind(stdout);
406 			free(header);
407 		}
408 	}
409 	prepare(0, f1, stb1.st_size);
410 	prepare(1, f2, stb2.st_size);
411 	prune();
412 	sort(sfile[0], slen[0]);
413 	sort(sfile[1], slen[1]);
414 
415 	member = (int *)file[1];
416 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
417 	member = erealloc(member, (slen[1] + 2) * sizeof(int));
418 
419 	class = (int *)file[0];
420 	unsort(sfile[0], slen[0], class);
421 	class = erealloc(class, (slen[0] + 2) * sizeof(int));
422 
423 	klist = emalloc((slen[0] + 2) * sizeof(int));
424 	clen = 0;
425 	clistlen = 100;
426 	clist = emalloc(clistlen * sizeof(struct cand));
427 	i = stone(class, slen[0], member, klist);
428 	free(member);
429 	free(class);
430 
431 	J = erealloc(J, (len[0] + 2) * sizeof(int));
432 	unravel(klist[i]);
433 	free(clist);
434 	free(klist);
435 
436 	ixold = erealloc(ixold, (len[0] + 2) * sizeof(long));
437 	ixnew = erealloc(ixnew, (len[1] + 2) * sizeof(long));
438 	check(file1, f1, file2, f2);
439 	output(file1, f1, file2, f2, (flags & D_HEADER));
440 	if (ostdout != -1) {
441 		int wstatus;
442 
443 		/* close the pipe to pr and restore stdout */
444 		fflush(stdout);
445 		rewind(stdout);
446 		if (ostdout != STDOUT_FILENO) {
447 			close(STDOUT_FILENO);
448 			dup2(ostdout, STDOUT_FILENO);
449 			close(ostdout);
450 		}
451 		waitpid(pid, &wstatus, 0);
452 	}
453 closem:
454 	if (anychange) {
455 		status |= 1;
456 		if (rval == D_SAME)
457 			rval = D_DIFFER;
458 	}
459 	if (f1 != NULL)
460 		fclose(f1);
461 	if (f2 != NULL)
462 		fclose(f2);
463 	if (file1 != ofile1)
464 		free(file1);
465 	if (file2 != ofile2)
466 		free(file2);
467 	return (rval);
468 }
469 
470 /*
471  * Check to see if the given files differ.
472  * Returns 0 if they are the same, 1 if different, and -1 on error.
473  * XXX - could use code from cmp(1) [faster]
474  */
475 static int
476 files_differ(FILE *f1, FILE *f2, int flags)
477 {
478 	char buf1[BUFSIZ], buf2[BUFSIZ];
479 	size_t i, j;
480 
481 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
482 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
483 		return (1);
484 	for (;;) {
485 		i = fread(buf1, 1, sizeof(buf1), f1);
486 		j = fread(buf2, 1, sizeof(buf2), f2);
487 		if (i != j)
488 			return (1);
489 		if (i == 0 && j == 0) {
490 			if (ferror(f1) || ferror(f2))
491 				return (1);
492 			return (0);
493 		}
494 		if (memcmp(buf1, buf2, i) != 0)
495 			return (1);
496 	}
497 }
498 
499 static FILE *
500 opentemp(const char *file)
501 {
502 	char buf[BUFSIZ], *tempdir, tempfile[MAXPATHLEN];
503 	ssize_t nread;
504 	int ifd, ofd;
505 
506 	if (strcmp(file, "-") == 0)
507 		ifd = STDIN_FILENO;
508 	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
509 		return (NULL);
510 
511 	if ((tempdir = getenv("TMPDIR")) == NULL)
512 		tempdir = _PATH_TMP;
513 
514 	if (strlcpy(tempfile, tempdir, sizeof(tempfile)) >= sizeof(tempfile) ||
515 	    strlcat(tempfile, "/diff.XXXXXXXX", sizeof(tempfile)) >=
516 	    sizeof(tempfile)) {
517 		close(ifd);
518 		errno = ENAMETOOLONG;
519 		return (NULL);
520 	}
521 
522 	if ((ofd = mkstemp(tempfile)) < 0)
523 		return (NULL);
524 	unlink(tempfile);
525 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
526 		if (write(ofd, buf, nread) != nread) {
527 			close(ifd);
528 			close(ofd);
529 			return (NULL);
530 		}
531 	}
532 	close(ifd);
533 	lseek(ofd, (off_t)0, SEEK_SET);
534 	return (fdopen(ofd, "r"));
535 }
536 
537 char *
538 splice(char *dir, char *file)
539 {
540 	char *tail, *buf;
541 
542 	if ((tail = strrchr(file, '/')) == NULL)
543 		tail = file;
544 	else
545 		tail++;
546 	easprintf(&buf, "%s/%s", dir, tail);
547 	return (buf);
548 }
549 
550 static void
551 prepare(int i, FILE *fd, off_t filesize)
552 {
553 	struct line *p;
554 	int j, h;
555 	size_t sz;
556 
557 	rewind(fd);
558 
559 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
560 	if (sz < 100)
561 		sz = 100;
562 
563 	p = emalloc((sz + 3) * sizeof(struct line));
564 	for (j = 0; (h = readhash(fd));) {
565 		if (j == sz) {
566 			sz = sz * 3 / 2;
567 			p = erealloc(p, (sz + 3) * sizeof(struct line));
568 		}
569 		p[++j].value = h;
570 	}
571 	len[i] = j;
572 	file[i] = p;
573 }
574 
575 static void
576 prune(void)
577 {
578 	int i, j;
579 
580 	for (pref = 0; pref < len[0] && pref < len[1] &&
581 	    file[0][pref + 1].value == file[1][pref + 1].value;
582 	    pref++)
583 		;
584 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
585 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
586 	    suff++)
587 		;
588 	for (j = 0; j < 2; j++) {
589 		sfile[j] = file[j] + pref;
590 		slen[j] = len[j] - pref - suff;
591 		for (i = 0; i <= slen[j]; i++)
592 			sfile[j][i].serial = i;
593 	}
594 }
595 
596 static void
597 equiv(struct line *a, int n, struct line *b, int m, int *c)
598 {
599 	int i, j;
600 
601 	i = j = 1;
602 	while (i <= n && j <= m) {
603 		if (a[i].value < b[j].value)
604 			a[i++].value = 0;
605 		else if (a[i].value == b[j].value)
606 			a[i++].value = j;
607 		else
608 			j++;
609 	}
610 	while (i <= n)
611 		a[i++].value = 0;
612 	b[m + 1].value = 0;
613 	j = 0;
614 	while (++j <= m) {
615 		c[j] = -b[j].serial;
616 		while (b[j + 1].value == b[j].value) {
617 			j++;
618 			c[j] = b[j].serial;
619 		}
620 	}
621 	c[j] = -1;
622 }
623 
624 /* Code taken from ping.c */
625 static int
626 isqrt(int n)
627 {
628 	int y, x = 1;
629 
630 	if (n == 0)
631 		return(0);
632 
633 	do { /* newton was a stinker */
634 		y = x;
635 		x = n / x;
636 		x += y;
637 		x /= 2;
638 	} while ((x - y) > 1 || (x - y) < -1);
639 
640 	return (x);
641 }
642 
643 static int
644 stone(int *a, int n, int *b, int *c)
645 {
646 	int i, k, y, j, l;
647 	int oldc, tc, oldl;
648 	u_int numtries;
649 
650 	const u_int bound = dflag ? UINT_MAX : max(256, isqrt(n));
651 
652 	k = 0;
653 	c[0] = newcand(0, 0, 0);
654 	for (i = 1; i <= n; i++) {
655 		j = a[i];
656 		if (j == 0)
657 			continue;
658 		y = -b[j];
659 		oldl = 0;
660 		oldc = c[0];
661 		numtries = 0;
662 		do {
663 			if (y <= clist[oldc].y)
664 				continue;
665 			l = search(c, k, y);
666 			if (l != oldl + 1)
667 				oldc = c[l - 1];
668 			if (l <= k) {
669 				if (clist[c[l]].y <= y)
670 					continue;
671 				tc = c[l];
672 				c[l] = newcand(i, y, oldc);
673 				oldc = tc;
674 				oldl = l;
675 				numtries++;
676 			} else {
677 				c[l] = newcand(i, y, oldc);
678 				k++;
679 				break;
680 			}
681 		} while ((y = b[++j]) > 0 && numtries < bound);
682 	}
683 	return (k);
684 }
685 
686 static int
687 newcand(int x, int y, int pred)
688 {
689 	struct cand *q;
690 
691 	if (clen == clistlen) {
692 		clistlen = clistlen * 11 / 10;
693 		clist = erealloc(clist, clistlen * sizeof(struct cand));
694 	}
695 	q = clist + clen;
696 	q->x = x;
697 	q->y = y;
698 	q->pred = pred;
699 	return (clen++);
700 }
701 
702 static int
703 search(int *c, int k, int y)
704 {
705 	int i, j, l, t;
706 
707 	if (clist[c[k]].y < y)	/* quick look for typical case */
708 		return (k + 1);
709 	i = 0;
710 	j = k + 1;
711 	while (1) {
712 		l = i + j;
713 		if ((l >>= 1) <= i)
714 			break;
715 		t = clist[c[l]].y;
716 		if (t > y)
717 			j = l;
718 		else if (t < y)
719 			i = l;
720 		else
721 			return (l);
722 	}
723 	return (l + 1);
724 }
725 
726 static void
727 unravel(int p)
728 {
729 	struct cand *q;
730 	int i;
731 
732 	for (i = 0; i <= len[0]; i++)
733 		J[i] = i <= pref ? i :
734 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
735 	for (q = clist + p; q->y != 0; q = clist + q->pred)
736 		J[q->x + pref] = q->y + pref;
737 }
738 
739 /*
740  * Check does double duty:
741  *  1.	ferret out any fortuitous correspondences due
742  *	to confounding by hashing (which result in "jackpot")
743  *  2.  collect random access indexes to the two files
744  */
745 static void
746 check(char *file1, FILE *f1, char *file2, FILE *f2)
747 {
748 	int i, j, jackpot, c, d;
749 	long ctold, ctnew;
750 
751 	rewind(f1);
752 	rewind(f2);
753 	j = 1;
754 	ixold[0] = ixnew[0] = 0;
755 	jackpot = 0;
756 	ctold = ctnew = 0;
757 	for (i = 1; i <= len[0]; i++) {
758 		if (J[i] == 0) {
759 			ixold[i] = ctold += skipline(f1);
760 			continue;
761 		}
762 		while (j < J[i]) {
763 			ixnew[j] = ctnew += skipline(f2);
764 			j++;
765 		}
766 		if (bflag || wflag || iflag) {
767 			for (;;) {
768 				c = getc(f1);
769 				d = getc(f2);
770 				/*
771 				 * GNU diff ignores a missing newline
772 				 * in one file if bflag || wflag.
773 				 */
774 				if ((bflag || wflag) &&
775 				    ((c == EOF && d == '\n') ||
776 				    (c == '\n' && d == EOF))) {
777 					break;
778 				}
779 				ctold++;
780 				ctnew++;
781 				if (bflag && isspace(c) && isspace(d)) {
782 					do {
783 						if (c == '\n')
784 							break;
785 						ctold++;
786 					} while (isspace(c = getc(f1)));
787 					do {
788 						if (d == '\n')
789 							break;
790 						ctnew++;
791 					} while (isspace(d = getc(f2)));
792 				} else if (wflag) {
793 					while (isspace(c) && c != '\n') {
794 						c = getc(f1);
795 						ctold++;
796 					}
797 					while (isspace(d) && d != '\n') {
798 						d = getc(f2);
799 						ctnew++;
800 					}
801 				}
802 				if (chrtran[c] != chrtran[d]) {
803 					jackpot++;
804 					J[i] = 0;
805 					if (c != '\n' && c != EOF)
806 						ctold += skipline(f1);
807 					if (d != '\n' && c != EOF)
808 						ctnew += skipline(f2);
809 					break;
810 				}
811 				if (c == '\n' || c == EOF)
812 					break;
813 			}
814 		} else {
815 			for (;;) {
816 				ctold++;
817 				ctnew++;
818 				if ((c = getc(f1)) != (d = getc(f2))) {
819 					/* jackpot++; */
820 					J[i] = 0;
821 					if (c != '\n' && c != EOF)
822 						ctold += skipline(f1);
823 					if (d != '\n' && c != EOF)
824 						ctnew += skipline(f2);
825 					break;
826 				}
827 				if (c == '\n' || c == EOF)
828 					break;
829 			}
830 		}
831 		ixold[i] = ctold;
832 		ixnew[j] = ctnew;
833 		j++;
834 	}
835 	for (; j <= len[1]; j++)
836 		ixnew[j] = ctnew += skipline(f2);
837 	/*
838 	 * if (jackpot)
839 	 *	fprintf(stderr, "jackpot\n");
840 	 */
841 }
842 
843 /* shellsort CACM #201 */
844 static void
845 sort(struct line *a, int n)
846 {
847 	struct line *ai, *aim, w;
848 	int j, m = 0, k;
849 
850 	if (n == 0)
851 		return;
852 	for (j = 1; j <= n; j *= 2)
853 		m = 2 * j - 1;
854 	for (m /= 2; m != 0; m /= 2) {
855 		k = n - m;
856 		for (j = 1; j <= k; j++) {
857 			for (ai = &a[j]; ai > a; ai -= m) {
858 				aim = &ai[m];
859 				if (aim < ai)
860 					break;	/* wraparound */
861 				if (aim->value > ai[0].value ||
862 				    (aim->value == ai[0].value &&
863 					aim->serial > ai[0].serial))
864 					break;
865 				w.value = ai[0].value;
866 				ai[0].value = aim->value;
867 				aim->value = w.value;
868 				w.serial = ai[0].serial;
869 				ai[0].serial = aim->serial;
870 				aim->serial = w.serial;
871 			}
872 		}
873 	}
874 }
875 
876 static void
877 unsort(struct line *f, int l, int *b)
878 {
879 	int *a, i;
880 
881 	a = emalloc((l + 1) * sizeof(int));
882 	for (i = 1; i <= l; i++)
883 		a[f[i].serial] = f[i].value;
884 	for (i = 1; i <= l; i++)
885 		b[i] = a[i];
886 	free(a);
887 }
888 
889 static int
890 skipline(FILE *f)
891 {
892 	int i, c;
893 
894 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
895 		continue;
896 	return (i);
897 }
898 
899 static void
900 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
901 {
902 	int m, i0, i1, j0, j1;
903 
904 	rewind(f1);
905 	rewind(f2);
906 	m = len[0];
907 	J[0] = 0;
908 	J[m + 1] = len[1] + 1;
909 	if (format != D_EDIT) {
910 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
911 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
912 				i0++;
913 			j0 = J[i0 - 1] + 1;
914 			i1 = i0 - 1;
915 			while (i1 < m && J[i1 + 1] == 0)
916 				i1++;
917 			j1 = J[i1 + 1] - 1;
918 			J[i1] = j1;
919 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
920 		}
921 	} else {
922 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
923 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
924 				i0--;
925 			j0 = J[i0 + 1] - 1;
926 			i1 = i0 + 1;
927 			while (i1 > 1 && J[i1 - 1] == 0)
928 				i1--;
929 			j1 = J[i1 - 1] + 1;
930 			J[i1] = j1;
931 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
932 		}
933 	}
934 	if (m == 0)
935 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
936 	if (format == D_IFDEF) {
937 		for (;;) {
938 #define	c i0
939 			if ((c = getc(f1)) == EOF)
940 				return;
941 			putchar(c);
942 		}
943 #undef c
944 	}
945 	if (anychange != 0) {
946 		if (format == D_CONTEXT)
947 			dump_context_vec(f1, f2);
948 		else if (format == D_UNIFIED)
949 			dump_unified_vec(f1, f2);
950 	}
951 }
952 
953 static __inline void
954 range(int a, int b, char *separator)
955 {
956 	printf("%d", a > b ? b : a);
957 	if (a < b)
958 		printf("%s%d", separator, b);
959 }
960 
961 static __inline void
962 uni_range(int a, int b)
963 {
964 	if (a < b)
965 		printf("%d,%d", a, b - a + 1);
966 	else if (a == b)
967 		printf("%d", b);
968 	else
969 		printf("%d,0", b);
970 }
971 
972 static char *
973 preadline(int fd, size_t len, off_t off)
974 {
975 	char *line;
976 	ssize_t nr;
977 
978 	line = emalloc(len + 1);
979 	if ((nr = pread(fd, line, len, off)) < 0)
980 		err(1, "preadline");
981 	if (nr > 0 && line[nr-1] == '\n')
982 		nr--;
983 	line[nr] = '\0';
984 	return (line);
985 }
986 
987 static int
988 ignoreline(char *line)
989 {
990 	int ret;
991 
992 	ret = regexec(&ignore_re, line, 0, NULL, 0);
993 	free(line);
994 	return (ret == 0);	/* if it matched, it should be ignored. */
995 }
996 
997 /*
998  * Indicate that there is a difference between lines a and b of the from file
999  * to get to lines c to d of the to file.  If a is greater then b then there
1000  * are no lines in the from file involved and this means that there were
1001  * lines appended (beginning at b).  If c is greater than d then there are
1002  * lines missing from the to file.
1003  */
1004 static void
1005 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1006     int *pflags)
1007 {
1008 	static size_t max_context = 64;
1009 	int i;
1010 
1011 restart:
1012 	if (format != D_IFDEF && a > b && c > d)
1013 		return;
1014 	if (ignore_pats != NULL) {
1015 		char *line;
1016 		/*
1017 		 * All lines in the change, insert, or delete must
1018 		 * match an ignore pattern for the change to be
1019 		 * ignored.
1020 		 */
1021 		if (a <= b) {		/* Changes and deletes. */
1022 			for (i = a; i <= b; i++) {
1023 				line = preadline(fileno(f1),
1024 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1025 				if (!ignoreline(line))
1026 					goto proceed;
1027 			}
1028 		}
1029 		if (a > b || c <= d) {	/* Changes and inserts. */
1030 			for (i = c; i <= d; i++) {
1031 				line = preadline(fileno(f2),
1032 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1033 				if (!ignoreline(line))
1034 					goto proceed;
1035 			}
1036 		}
1037 		return;
1038 	}
1039 proceed:
1040 	if (*pflags & D_HEADER) {
1041 		printf("%s %s %s\n", diffargs, file1, file2);
1042 		*pflags &= ~D_HEADER;
1043 	}
1044 	if (format == D_CONTEXT || format == D_UNIFIED) {
1045 		/*
1046 		 * Allocate change records as needed.
1047 		 */
1048 		if (context_vec_ptr == context_vec_end - 1) {
1049 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1050 			max_context <<= 1;
1051 			context_vec_start = erealloc(context_vec_start,
1052 			    max_context * sizeof(struct context_vec));
1053 			context_vec_end = context_vec_start + max_context;
1054 			context_vec_ptr = context_vec_start + offset;
1055 		}
1056 		if (anychange == 0) {
1057 			/*
1058 			 * Print the context/unidiff header first time through.
1059 			 */
1060 			print_header(file1, file2);
1061 			anychange = 1;
1062 		} else if (a > context_vec_ptr->b + (2 * context) + 1 &&
1063 		    c > context_vec_ptr->d + (2 * context) + 1) {
1064 			/*
1065 			 * If this change is more than 'context' lines from the
1066 			 * previous change, dump the record and reset it.
1067 			 */
1068 			if (format == D_CONTEXT)
1069 				dump_context_vec(f1, f2);
1070 			else
1071 				dump_unified_vec(f1, f2);
1072 		}
1073 		context_vec_ptr++;
1074 		context_vec_ptr->a = a;
1075 		context_vec_ptr->b = b;
1076 		context_vec_ptr->c = c;
1077 		context_vec_ptr->d = d;
1078 		return;
1079 	}
1080 	if (anychange == 0)
1081 		anychange = 1;
1082 	switch (format) {
1083 
1084 	case D_BRIEF:
1085 		return;
1086 	case D_NORMAL:
1087 	case D_EDIT:
1088 		range(a, b, ",");
1089 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1090 		if (format == D_NORMAL)
1091 			range(c, d, ",");
1092 		putchar('\n');
1093 		break;
1094 	case D_REVERSE:
1095 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1096 		range(a, b, " ");
1097 		putchar('\n');
1098 		break;
1099 	case D_NREVERSE:
1100 		if (a > b)
1101 			printf("a%d %d\n", b, d - c + 1);
1102 		else {
1103 			printf("d%d %d\n", a, b - a + 1);
1104 			if (!(c > d))
1105 				/* add changed lines */
1106 				printf("a%d %d\n", b, d - c + 1);
1107 		}
1108 		break;
1109 	}
1110 	if (format == D_NORMAL || format == D_IFDEF) {
1111 		fetch(ixold, a, b, f1, '<', 1);
1112 		if (a <= b && c <= d && format == D_NORMAL)
1113 			puts("---");
1114 	}
1115 	i = fetch(ixnew, c, d, f2, format == D_NORMAL ? '>' : '\0', 0);
1116 	if (i != 0 && format == D_EDIT) {
1117 		/*
1118 		 * A non-zero return value for D_EDIT indicates that the
1119 		 * last line printed was a bare dot (".") that has been
1120 		 * escaped as ".." to prevent ed(1) from misinterpreting
1121 		 * it.  We have to add a substitute command to change this
1122 		 * back and restart where we left off.
1123 		 */
1124 		puts(".");
1125 		printf("%ds/^\\.\\././\n", a);
1126 		a += i;
1127 		c += i;
1128 		goto restart;
1129 	}
1130 	if ((format == D_EDIT || format == D_REVERSE) && c <= d)
1131 		puts(".");
1132 	if (inifdef) {
1133 		printf("#endif /* %s */\n", ifdefname);
1134 		inifdef = 0;
1135 	}
1136 }
1137 
1138 static int
1139 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile)
1140 {
1141 	int i, j, c, lastc, col, nc;
1142 
1143 	/*
1144 	 * When doing #ifdef's, copy down to current line
1145 	 * if this is the first file, so that stuff makes it to output.
1146 	 */
1147 	if (format == D_IFDEF && oldfile) {
1148 		long curpos = ftell(lb);
1149 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1150 		nc = f[a > b ? b : a - 1] - curpos;
1151 		for (i = 0; i < nc; i++)
1152 			putchar(getc(lb));
1153 	}
1154 	if (a > b)
1155 		return (0);
1156 	if (format == D_IFDEF) {
1157 		if (inifdef) {
1158 			printf("#else /* %s%s */\n",
1159 			    oldfile == 1 ? "!" : "", ifdefname);
1160 		} else {
1161 			if (oldfile)
1162 				printf("#ifndef %s\n", ifdefname);
1163 			else
1164 				printf("#ifdef %s\n", ifdefname);
1165 		}
1166 		inifdef = 1 + oldfile;
1167 	}
1168 	for (i = a; i <= b; i++) {
1169 		fseek(lb, f[i - 1], SEEK_SET);
1170 		nc = f[i] - f[i - 1];
1171 		if (format != D_IFDEF && ch != '\0') {
1172 			putchar(ch);
1173 			if (Tflag && (format == D_NORMAL || format == D_CONTEXT
1174 			    || format == D_UNIFIED))
1175 				putchar('\t');
1176 			else if (format != D_UNIFIED)
1177 				putchar(' ');
1178 		}
1179 		col = 0;
1180 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1181 			if ((c = getc(lb)) == EOF) {
1182 				if (format == D_EDIT || format == D_REVERSE ||
1183 				    format == D_NREVERSE)
1184 					warnx("No newline at end of file");
1185 				else
1186 					puts("\n\\ No newline at end of file");
1187 				return (0);
1188 			}
1189 			if (c == '\t' && tflag) {
1190 				do {
1191 					putchar(' ');
1192 				} while (++col & 7);
1193 			} else {
1194 				if (format == D_EDIT && j == 1 && c == '\n'
1195 				    && lastc == '.') {
1196 					/*
1197 					 * Don't print a bare "." line
1198 					 * since that will confuse ed(1).
1199 					 * Print ".." instead and return,
1200 					 * giving the caller an offset
1201 					 * from which to restart.
1202 					 */
1203 					puts(".");
1204 					return (i - a + 1);
1205 				}
1206 				putchar(c);
1207 				col++;
1208 			}
1209 		}
1210 	}
1211 	return (0);
1212 }
1213 
1214 /*
1215  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1216  */
1217 static int
1218 readhash(FILE *f)
1219 {
1220 	int i, t, space;
1221 	int sum;
1222 
1223 	sum = 1;
1224 	space = 0;
1225 	if (!bflag && !wflag) {
1226 		if (iflag)
1227 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1228 				if (t == EOF) {
1229 					if (i == 0)
1230 						return (0);
1231 					break;
1232 				}
1233 				sum = sum * 127 + chrtran[t];
1234 			}
1235 		else
1236 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1237 				if (t == EOF) {
1238 					if (i == 0)
1239 						return (0);
1240 					break;
1241 				}
1242 				sum = sum * 127 + t;
1243 			}
1244 	} else {
1245 		for (i = 0;;) {
1246 			switch (t = getc(f)) {
1247 			case '\t':
1248 			case '\r':
1249 			case '\v':
1250 			case '\f':
1251 			case ' ':
1252 				space++;
1253 				continue;
1254 			default:
1255 				if (space && !wflag) {
1256 					i++;
1257 					space = 0;
1258 				}
1259 				sum = sum * 127 + chrtran[t];
1260 				i++;
1261 				continue;
1262 			case EOF:
1263 				if (i == 0)
1264 					return (0);
1265 				/* FALLTHROUGH */
1266 			case '\n':
1267 				break;
1268 			}
1269 			break;
1270 		}
1271 	}
1272 	/*
1273 	 * There is a remote possibility that we end up with a zero sum.
1274 	 * Zero is used as an EOF marker, so return 1 instead.
1275 	 */
1276 	return (sum == 0 ? 1 : sum);
1277 }
1278 
1279 static int
1280 asciifile(FILE *f)
1281 {
1282 	unsigned char buf[BUFSIZ];
1283 	int i, cnt;
1284 
1285 	if (aflag || f == NULL)
1286 		return (1);
1287 
1288 	rewind(f);
1289 	cnt = fread(buf, 1, sizeof(buf), f);
1290 	for (i = 0; i < cnt; i++)
1291 		if (!isprint(buf[i]) && !isspace(buf[i]))
1292 			return (0);
1293 	return (1);
1294 }
1295 
1296 static __inline int min(int a, int b)
1297 {
1298 	return (a < b ? a : b);
1299 }
1300 
1301 static __inline int max(int a, int b)
1302 {
1303 	return (a > b ? a : b);
1304 }
1305 
1306 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1307 
1308 static char *
1309 match_function(const long *f, int pos, FILE *file)
1310 {
1311 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1312 	size_t nc;
1313 	int last = lastline;
1314 	char *p;
1315 	char *state = NULL;
1316 
1317 	lastline = pos;
1318 	while (pos > last) {
1319 		fseek(file, f[pos - 1], SEEK_SET);
1320 		nc = f[pos] - f[pos - 1];
1321 		if (nc >= sizeof(buf))
1322 			nc = sizeof(buf) - 1;
1323 		nc = fread(buf, 1, nc, file);
1324 		if (nc > 0) {
1325 			buf[nc] = '\0';
1326 			p = strchr(buf, '\n');
1327 			if (p != NULL)
1328 				*p = '\0';
1329 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1330 				if (begins_with(buf, "private:")) {
1331 					if (!state)
1332 						state = " (private)";
1333 				} else if (begins_with(buf, "protected:")) {
1334 					if (!state)
1335 						state = " (protected)";
1336 				} else if (begins_with(buf, "public:")) {
1337 					if (!state)
1338 						state = " (public)";
1339 				} else {
1340 					strlcpy(lastbuf, buf, sizeof lastbuf);
1341 					if (state)
1342 						strlcat(lastbuf, state,
1343 						    sizeof lastbuf);
1344 					lastmatchline = pos;
1345 					return lastbuf;
1346 				}
1347 			}
1348 		}
1349 		pos--;
1350 	}
1351 	return lastmatchline > 0 ? lastbuf : NULL;
1352 }
1353 
1354 /* dump accumulated "context" diff changes */
1355 static void
1356 dump_context_vec(FILE *f1, FILE *f2)
1357 {
1358 	struct context_vec *cvp = context_vec_start;
1359 	int lowa, upb, lowc, upd, do_output;
1360 	int a, b, c, d;
1361 	char ch, *f;
1362 
1363 	if (context_vec_start > context_vec_ptr)
1364 		return;
1365 
1366 	b = d = 0;		/* gcc */
1367 	lowa = max(1, cvp->a - context);
1368 	upb = min(len[0], context_vec_ptr->b + context);
1369 	lowc = max(1, cvp->c - context);
1370 	upd = min(len[1], context_vec_ptr->d + context);
1371 
1372 	printf("***************");
1373 	if (pflag) {
1374 		f = match_function(ixold, lowa-1, f1);
1375 		if (f != NULL) {
1376 			putchar(' ');
1377 			fputs(f, stdout);
1378 		}
1379 	}
1380 	printf("\n*** ");
1381 	range(lowa, upb, ",");
1382 	printf(" ****\n");
1383 
1384 	/*
1385 	 * Output changes to the "old" file.  The first loop suppresses
1386 	 * output if there were no changes to the "old" file (we'll see
1387 	 * the "old" lines as context in the "new" list).
1388 	 */
1389 	do_output = 0;
1390 	for (; cvp <= context_vec_ptr; cvp++)
1391 		if (cvp->a <= cvp->b) {
1392 			cvp = context_vec_start;
1393 			do_output++;
1394 			break;
1395 		}
1396 	if (do_output) {
1397 		while (cvp <= context_vec_ptr) {
1398 			a = cvp->a;
1399 			b = cvp->b;
1400 			c = cvp->c;
1401 			d = cvp->d;
1402 
1403 			if (a <= b && c <= d)
1404 				ch = 'c';
1405 			else
1406 				ch = (a <= b) ? 'd' : 'a';
1407 
1408 			if (ch == 'a')
1409 				fetch(ixold, lowa, b, f1, ' ', 0);
1410 			else {
1411 				fetch(ixold, lowa, a - 1, f1, ' ', 0);
1412 				fetch(ixold, a, b, f1,
1413 				    ch == 'c' ? '!' : '-', 0);
1414 			}
1415 			lowa = b + 1;
1416 			cvp++;
1417 		}
1418 		fetch(ixold, b + 1, upb, f1, ' ', 0);
1419 	}
1420 	/* output changes to the "new" file */
1421 	printf("--- ");
1422 	range(lowc, upd, ",");
1423 	printf(" ----\n");
1424 
1425 	do_output = 0;
1426 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1427 		if (cvp->c <= cvp->d) {
1428 			cvp = context_vec_start;
1429 			do_output++;
1430 			break;
1431 		}
1432 	if (do_output) {
1433 		while (cvp <= context_vec_ptr) {
1434 			a = cvp->a;
1435 			b = cvp->b;
1436 			c = cvp->c;
1437 			d = cvp->d;
1438 
1439 			if (a <= b && c <= d)
1440 				ch = 'c';
1441 			else
1442 				ch = (a <= b) ? 'd' : 'a';
1443 
1444 			if (ch == 'd')
1445 				fetch(ixnew, lowc, d, f2, ' ', 0);
1446 			else {
1447 				fetch(ixnew, lowc, c - 1, f2, ' ', 0);
1448 				fetch(ixnew, c, d, f2,
1449 				    ch == 'c' ? '!' : '+', 0);
1450 			}
1451 			lowc = d + 1;
1452 			cvp++;
1453 		}
1454 		fetch(ixnew, d + 1, upd, f2, ' ', 0);
1455 	}
1456 	context_vec_ptr = context_vec_start - 1;
1457 }
1458 
1459 /* dump accumulated "unified" diff changes */
1460 static void
1461 dump_unified_vec(FILE *f1, FILE *f2)
1462 {
1463 	struct context_vec *cvp = context_vec_start;
1464 	int lowa, upb, lowc, upd;
1465 	int a, b, c, d;
1466 	char ch, *f;
1467 
1468 	if (context_vec_start > context_vec_ptr)
1469 		return;
1470 
1471 	b = d = 0;		/* gcc */
1472 	lowa = max(1, cvp->a - context);
1473 	upb = min(len[0], context_vec_ptr->b + context);
1474 	lowc = max(1, cvp->c - context);
1475 	upd = min(len[1], context_vec_ptr->d + context);
1476 
1477 	fputs("@@ -", stdout);
1478 	uni_range(lowa, upb);
1479 	fputs(" +", stdout);
1480 	uni_range(lowc, upd);
1481 	fputs(" @@", stdout);
1482 	if (pflag) {
1483 		f = match_function(ixold, lowa-1, f1);
1484 		if (f != NULL) {
1485 			putchar(' ');
1486 			fputs(f, stdout);
1487 		}
1488 	}
1489 	putchar('\n');
1490 
1491 	/*
1492 	 * Output changes in "unified" diff format--the old and new lines
1493 	 * are printed together.
1494 	 */
1495 	for (; cvp <= context_vec_ptr; cvp++) {
1496 		a = cvp->a;
1497 		b = cvp->b;
1498 		c = cvp->c;
1499 		d = cvp->d;
1500 
1501 		/*
1502 		 * c: both new and old changes
1503 		 * d: only changes in the old file
1504 		 * a: only changes in the new file
1505 		 */
1506 		if (a <= b && c <= d)
1507 			ch = 'c';
1508 		else
1509 			ch = (a <= b) ? 'd' : 'a';
1510 
1511 		switch (ch) {
1512 		case 'c':
1513 			fetch(ixold, lowa, a - 1, f1, ' ', 0);
1514 			fetch(ixold, a, b, f1, '-', 0);
1515 			fetch(ixnew, c, d, f2, '+', 0);
1516 			break;
1517 		case 'd':
1518 			fetch(ixold, lowa, a - 1, f1, ' ', 0);
1519 			fetch(ixold, a, b, f1, '-', 0);
1520 			break;
1521 		case 'a':
1522 			fetch(ixnew, lowc, c - 1, f2, ' ', 0);
1523 			fetch(ixnew, c, d, f2, '+', 0);
1524 			break;
1525 		}
1526 		lowa = b + 1;
1527 		lowc = d + 1;
1528 	}
1529 	fetch(ixnew, d + 1, upd, f2, ' ', 0);
1530 
1531 	context_vec_ptr = context_vec_start - 1;
1532 }
1533 
1534 static void
1535 print_header(const char *file1, const char *file2)
1536 {
1537 	if (label[0] != NULL)
1538 		printf("%s %s\n", format == D_CONTEXT ? "***" : "---",
1539 		    label[0]);
1540 	else
1541 		printf("%s %s\t%s", format == D_CONTEXT ? "***" : "---",
1542 		    file1, ctime(&stb1.st_mtime));
1543 	if (label[1] != NULL)
1544 		printf("%s %s\n", format == D_CONTEXT ? "---" : "+++",
1545 		    label[1]);
1546 	else
1547 		printf("%s %s\t%s", format == D_CONTEXT ? "---" : "+++",
1548 		    file2, ctime(&stb2.st_mtime));
1549 }
1550