xref: /openbsd-src/usr.bin/diff/diffreg.c (revision cf2525843d483a385de106a1361b2b9c18d96583)
1 /*	$OpenBSD: diffreg.c,v 1.64 2006/02/22 07:26:08 otto Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #ifndef lint
68 static const char rcsid[] = "$OpenBSD: diffreg.c,v 1.64 2006/02/22 07:26:08 otto Exp $";
69 #endif /* not lint */
70 
71 #include <sys/param.h>
72 #include <sys/stat.h>
73 #include <sys/wait.h>
74 
75 #include <ctype.h>
76 #include <err.h>
77 #include <errno.h>
78 #include <fcntl.h>
79 #include <stddef.h>
80 #include <stdio.h>
81 #include <stdlib.h>
82 #include <string.h>
83 #include <unistd.h>
84 
85 #include "diff.h"
86 #include "pathnames.h"
87 
88 /*
89  * diff - compare two files.
90  */
91 
92 /*
93  *	Uses an algorithm due to Harold Stone, which finds
94  *	a pair of longest identical subsequences in the two
95  *	files.
96  *
97  *	The major goal is to generate the match vector J.
98  *	J[i] is the index of the line in file1 corresponding
99  *	to line i file0. J[i] = 0 if there is no
100  *	such line in file1.
101  *
102  *	Lines are hashed so as to work in core. All potential
103  *	matches are located by sorting the lines of each file
104  *	on the hash (called ``value''). In particular, this
105  *	collects the equivalence classes in file1 together.
106  *	Subroutine equiv replaces the value of each line in
107  *	file0 by the index of the first element of its
108  *	matching equivalence in (the reordered) file1.
109  *	To save space equiv squeezes file1 into a single
110  *	array member in which the equivalence classes
111  *	are simply concatenated, except that their first
112  *	members are flagged by changing sign.
113  *
114  *	Next the indices that point into member are unsorted into
115  *	array class according to the original order of file0.
116  *
117  *	The cleverness lies in routine stone. This marches
118  *	through the lines of file0, developing a vector klist
119  *	of "k-candidates". At step i a k-candidate is a matched
120  *	pair of lines x,y (x in file0 y in file1) such that
121  *	there is a common subsequence of length k
122  *	between the first i lines of file0 and the first y
123  *	lines of file1, but there is no such subsequence for
124  *	any smaller y. x is the earliest possible mate to y
125  *	that occurs in such a subsequence.
126  *
127  *	Whenever any of the members of the equivalence class of
128  *	lines in file1 matable to a line in file0 has serial number
129  *	less than the y of some k-candidate, that k-candidate
130  *	with the smallest such y is replaced. The new
131  *	k-candidate is chained (via pred) to the current
132  *	k-1 candidate so that the actual subsequence can
133  *	be recovered. When a member has serial number greater
134  *	that the y of all k-candidates, the klist is extended.
135  *	At the end, the longest subsequence is pulled out
136  *	and placed in the array J by unravel
137  *
138  *	With J in hand, the matches there recorded are
139  *	check'ed against reality to assure that no spurious
140  *	matches have crept in due to hashing. If they have,
141  *	they are broken, and "jackpot" is recorded--a harmless
142  *	matter except that a true match for a spuriously
143  *	mated line may now be unnecessarily reported as a change.
144  *
145  *	Much of the complexity of the program comes simply
146  *	from trying to minimize core utilization and
147  *	maximize the range of doable problems by dynamically
148  *	allocating what is needed and reusing what is not.
149  *	The core requirements for problems larger than somewhat
150  *	are (in words) 2*length(file0) + length(file1) +
151  *	3*(number of k-candidates installed),  typically about
152  *	6n words for files of length n.
153  */
154 
155 struct cand {
156 	int x;
157 	int y;
158 	int pred;
159 };
160 
161 struct line {
162 	int serial;
163 	int value;
164 } *file[2];
165 
166 /*
167  * The following struct is used to record change information when
168  * doing a "context" or "unified" diff.  (see routine "change" to
169  * understand the highly mnemonic field names)
170  */
171 struct context_vec {
172 	int a;			/* start line in old file */
173 	int b;			/* end line in old file */
174 	int c;			/* start line in new file */
175 	int d;			/* end line in new file */
176 };
177 
178 static int  *J;			/* will be overlaid on class */
179 static int  *class;		/* will be overlaid on file[0] */
180 static int  *klist;		/* will be overlaid on file[0] after class */
181 static int  *member;		/* will be overlaid on file[1] */
182 static int   clen;
183 static int   inifdef;		/* whether or not we are in a #ifdef block */
184 static int   len[2];
185 static int   pref, suff;	/* length of prefix and suffix */
186 static int   slen[2];
187 static int   anychange;
188 static long *ixnew;		/* will be overlaid on file[1] */
189 static long *ixold;		/* will be overlaid on klist */
190 static struct cand *clist;	/* merely a free storage pot for candidates */
191 static int   clistlen;		/* the length of clist */
192 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
193 static u_char *chrtran;		/* translation table for case-folding */
194 static struct context_vec *context_vec_start;
195 static struct context_vec *context_vec_end;
196 static struct context_vec *context_vec_ptr;
197 
198 #define FUNCTION_CONTEXT_SIZE	41
199 static char lastbuf[FUNCTION_CONTEXT_SIZE];
200 static int lastline;
201 static int lastmatchline;
202 
203 static FILE *opentemp(const char *);
204 static void output(char *, FILE *, char *, FILE *);
205 static void check(char *, FILE *, char *, FILE *);
206 static void range(int, int, char *);
207 static void uni_range(int, int);
208 static void dump_context_vec(FILE *, FILE *);
209 static void dump_unified_vec(FILE *, FILE *);
210 static void prepare(int, FILE *, off_t);
211 static void prune(void);
212 static void equiv(struct line *, int, struct line *, int, int *);
213 static void unravel(int);
214 static void unsort(struct line *, int, int *);
215 static void change(char *, FILE *, char *, FILE *, int, int, int, int);
216 static void sort(struct line *, int);
217 static void print_header(const char *, const char *);
218 static int  ignoreline(char *);
219 static int  asciifile(FILE *);
220 static int  fetch(long *, int, int, FILE *, int, int);
221 static int  newcand(int, int, int);
222 static int  search(int *, int, int);
223 static int  skipline(FILE *);
224 static int  isqrt(int);
225 static int  stone(int *, int, int *, int *);
226 static int  readhash(FILE *);
227 static int  files_differ(FILE *, FILE *, int);
228 static __inline int min(int, int);
229 static __inline int max(int, int);
230 static char *match_function(const long *, int, FILE *);
231 static char *preadline(int, size_t, off_t);
232 
233 
234 /*
235  * chrtran points to one of 2 translation tables: cup2low if folding upper to
236  * lower case clow2low if not folding case
237  */
238 u_char clow2low[256] = {
239 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
240 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
241 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
242 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
243 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
244 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
245 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
246 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
247 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
248 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
249 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
250 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
251 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
252 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
253 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
254 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
255 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
256 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
257 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
258 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
259 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
260 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
261 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
262 	0xfd, 0xfe, 0xff
263 };
264 
265 u_char cup2low[256] = {
266 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
267 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
268 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
269 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
270 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
271 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
272 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
273 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
274 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
275 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
276 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
277 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
278 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
279 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
280 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
281 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
282 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
283 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
284 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
285 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
286 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
287 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
288 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
289 	0xfd, 0xfe, 0xff
290 };
291 
292 int
293 diffreg(char *ofile1, char *ofile2, int flags)
294 {
295 	char *file1 = ofile1;
296 	char *file2 = ofile2;
297 	FILE *f1 = NULL;
298 	FILE *f2 = NULL;
299 	int rval = D_SAME;
300 	int i, ostdout = -1;
301 	pid_t pid = -1;
302 
303 	anychange = 0;
304 	lastline = 0;
305 	lastmatchline = 0;
306 	context_vec_ptr = context_vec_start - 1;
307 	chrtran = (iflag ? cup2low : clow2low);
308 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
309 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
310 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
311 		goto closem;
312 
313 	if (flags & D_EMPTY1)
314 		f1 = fopen(_PATH_DEVNULL, "r");
315 	else {
316 		if (!S_ISREG(stb1.st_mode)) {
317 			if ((f1 = opentemp(file1)) == NULL ||
318 			    fstat(fileno(f1), &stb1) < 0) {
319 				warn("%s", file1);
320 				status |= 2;
321 				goto closem;
322 			}
323 		} else if (strcmp(file1, "-") == 0)
324 			f1 = stdin;
325 		else
326 			f1 = fopen(file1, "r");
327 	}
328 	if (f1 == NULL) {
329 		warn("%s", file1);
330 		status |= 2;
331 		goto closem;
332 	}
333 
334 	if (flags & D_EMPTY2)
335 		f2 = fopen(_PATH_DEVNULL, "r");
336 	else {
337 		if (!S_ISREG(stb2.st_mode)) {
338 			if ((f2 = opentemp(file2)) == NULL ||
339 			    fstat(fileno(f2), &stb2) < 0) {
340 				warn("%s", file2);
341 				status |= 2;
342 				goto closem;
343 			}
344 		} else if (strcmp(file2, "-") == 0)
345 			f2 = stdin;
346 		else
347 			f2 = fopen(file2, "r");
348 	}
349 	if (f2 == NULL) {
350 		warn("%s", file2);
351 		status |= 2;
352 		goto closem;
353 	}
354 
355 	switch (files_differ(f1, f2, flags)) {
356 	case 0:
357 		goto closem;
358 	case 1:
359 		break;
360 	default:
361 		/* error */
362 		status |= 2;
363 		goto closem;
364 	}
365 
366 	if (!asciifile(f1) || !asciifile(f2)) {
367 		rval = D_BINARY;
368 		status |= 1;
369 		goto closem;
370 	}
371 	if (lflag) {
372 		/* redirect stdout to pr */
373 		int pfd[2];
374 		char *header;
375 		char *prargv[] = { "pr", "-h", NULL, "-f", NULL };
376 
377 		easprintf(&header, "%s %s %s", diffargs, file1, file2);
378 		prargv[2] = header;
379 		fflush(stdout);
380 		rewind(stdout);
381 		pipe(pfd);
382 		switch ((pid = fork())) {
383 		case -1:
384 			warnx("No more processes");
385 			status |= 2;
386 			free(header);
387 			return (D_ERROR);
388 		case 0:
389 			/* child */
390 			if (pfd[0] != STDIN_FILENO) {
391 				dup2(pfd[0], STDIN_FILENO);
392 				close(pfd[0]);
393 			}
394 			close(pfd[1]);
395 			execv(_PATH_PR, prargv);
396 			_exit(127);
397 		default:
398 			/* parent */
399 			if (pfd[1] != STDOUT_FILENO) {
400 				ostdout = dup(STDOUT_FILENO);
401 				dup2(pfd[1], STDOUT_FILENO);
402 				close(pfd[1]);
403 			}
404 			close(pfd[0]);
405 			rewind(stdout);
406 			free(header);
407 		}
408 	} else if (flags & D_HEADER)
409 		printf("%s %s %s\n", diffargs, file1, file2);
410 	prepare(0, f1, stb1.st_size);
411 	prepare(1, f2, stb2.st_size);
412 	prune();
413 	sort(sfile[0], slen[0]);
414 	sort(sfile[1], slen[1]);
415 
416 	member = (int *)file[1];
417 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
418 	member = erealloc(member, (slen[1] + 2) * sizeof(int));
419 
420 	class = (int *)file[0];
421 	unsort(sfile[0], slen[0], class);
422 	class = erealloc(class, (slen[0] + 2) * sizeof(int));
423 
424 	klist = emalloc((slen[0] + 2) * sizeof(int));
425 	clen = 0;
426 	clistlen = 100;
427 	clist = emalloc(clistlen * sizeof(struct cand));
428 	i = stone(class, slen[0], member, klist);
429 	free(member);
430 	free(class);
431 
432 	J = erealloc(J, (len[0] + 2) * sizeof(int));
433 	unravel(klist[i]);
434 	free(clist);
435 	free(klist);
436 
437 	ixold = erealloc(ixold, (len[0] + 2) * sizeof(long));
438 	ixnew = erealloc(ixnew, (len[1] + 2) * sizeof(long));
439 	check(file1, f1, file2, f2);
440 	output(file1, f1, file2, f2);
441 	if (ostdout != -1) {
442 		int wstatus;
443 
444 		/* close the pipe to pr and restore stdout */
445 		fflush(stdout);
446 		rewind(stdout);
447 		if (ostdout != STDOUT_FILENO) {
448 			close(STDOUT_FILENO);
449 			dup2(ostdout, STDOUT_FILENO);
450 			close(ostdout);
451 		}
452 		waitpid(pid, &wstatus, 0);
453 	}
454 closem:
455 	if (anychange) {
456 		status |= 1;
457 		if (rval == D_SAME)
458 			rval = D_DIFFER;
459 	}
460 	if (f1 != NULL)
461 		fclose(f1);
462 	if (f2 != NULL)
463 		fclose(f2);
464 	if (file1 != ofile1)
465 		free(file1);
466 	if (file2 != ofile2)
467 		free(file2);
468 	return (rval);
469 }
470 
471 /*
472  * Check to see if the given files differ.
473  * Returns 0 if they are the same, 1 if different, and -1 on error.
474  * XXX - could use code from cmp(1) [faster]
475  */
476 static int
477 files_differ(FILE *f1, FILE *f2, int flags)
478 {
479 	char buf1[BUFSIZ], buf2[BUFSIZ];
480 	size_t i, j;
481 
482 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
483 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
484 		return (1);
485 	for (;;) {
486 		i = fread(buf1, 1, sizeof(buf1), f1);
487 		j = fread(buf2, 1, sizeof(buf2), f2);
488 		if (i != j)
489 			return (1);
490 		if (i == 0 && j == 0) {
491 			if (ferror(f1) || ferror(f2))
492 				return (1);
493 			return (0);
494 		}
495 		if (memcmp(buf1, buf2, i) != 0)
496 			return (1);
497 	}
498 }
499 
500 static FILE *
501 opentemp(const char *file)
502 {
503 	char buf[BUFSIZ], *tempdir, tempfile[MAXPATHLEN];
504 	ssize_t nread;
505 	int ifd, ofd;
506 
507 	if (strcmp(file, "-") == 0)
508 		ifd = STDIN_FILENO;
509 	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
510 		return (NULL);
511 
512 	if ((tempdir = getenv("TMPDIR")) == NULL)
513 		tempdir = _PATH_TMP;
514 
515 	if (strlcpy(tempfile, tempdir, sizeof(tempfile)) >= sizeof(tempfile) ||
516 	    strlcat(tempfile, "/diff.XXXXXXXX", sizeof(tempfile)) >=
517 	    sizeof(tempfile)) {
518 		close(ifd);
519 		errno = ENAMETOOLONG;
520 		return (NULL);
521 	}
522 
523 	if ((ofd = mkstemp(tempfile)) < 0)
524 		return (NULL);
525 	unlink(tempfile);
526 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
527 		if (write(ofd, buf, nread) != nread) {
528 			close(ifd);
529 			close(ofd);
530 			return (NULL);
531 		}
532 	}
533 	close(ifd);
534 	lseek(ofd, (off_t)0, SEEK_SET);
535 	return (fdopen(ofd, "r"));
536 }
537 
538 char *
539 splice(char *dir, char *file)
540 {
541 	char *tail, *buf;
542 
543 	if ((tail = strrchr(file, '/')) == NULL)
544 		tail = file;
545 	else
546 		tail++;
547 	easprintf(&buf, "%s/%s", dir, tail);
548 	return (buf);
549 }
550 
551 static void
552 prepare(int i, FILE *fd, off_t filesize)
553 {
554 	struct line *p;
555 	int j, h;
556 	size_t sz;
557 
558 	rewind(fd);
559 
560 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
561 	if (sz < 100)
562 		sz = 100;
563 
564 	p = emalloc((sz + 3) * sizeof(struct line));
565 	for (j = 0; (h = readhash(fd));) {
566 		if (j == sz) {
567 			sz = sz * 3 / 2;
568 			p = erealloc(p, (sz + 3) * sizeof(struct line));
569 		}
570 		p[++j].value = h;
571 	}
572 	len[i] = j;
573 	file[i] = p;
574 }
575 
576 static void
577 prune(void)
578 {
579 	int i, j;
580 
581 	for (pref = 0; pref < len[0] && pref < len[1] &&
582 	    file[0][pref + 1].value == file[1][pref + 1].value;
583 	    pref++)
584 		;
585 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
586 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
587 	    suff++)
588 		;
589 	for (j = 0; j < 2; j++) {
590 		sfile[j] = file[j] + pref;
591 		slen[j] = len[j] - pref - suff;
592 		for (i = 0; i <= slen[j]; i++)
593 			sfile[j][i].serial = i;
594 	}
595 }
596 
597 static void
598 equiv(struct line *a, int n, struct line *b, int m, int *c)
599 {
600 	int i, j;
601 
602 	i = j = 1;
603 	while (i <= n && j <= m) {
604 		if (a[i].value < b[j].value)
605 			a[i++].value = 0;
606 		else if (a[i].value == b[j].value)
607 			a[i++].value = j;
608 		else
609 			j++;
610 	}
611 	while (i <= n)
612 		a[i++].value = 0;
613 	b[m + 1].value = 0;
614 	j = 0;
615 	while (++j <= m) {
616 		c[j] = -b[j].serial;
617 		while (b[j + 1].value == b[j].value) {
618 			j++;
619 			c[j] = b[j].serial;
620 		}
621 	}
622 	c[j] = -1;
623 }
624 
625 /* Code taken from ping.c */
626 static int
627 isqrt(int n)
628 {
629 	int y, x = 1;
630 
631 	if (n == 0)
632 		return(0);
633 
634 	do { /* newton was a stinker */
635 		y = x;
636 		x = n / x;
637 		x += y;
638 		x /= 2;
639 	} while ((x - y) > 1 || (x - y) < -1);
640 
641 	return (x);
642 }
643 
644 static int
645 stone(int *a, int n, int *b, int *c)
646 {
647 	int i, k, y, j, l;
648 	int oldc, tc, oldl;
649 	u_int numtries;
650 
651 	const u_int bound = dflag ? UINT_MAX : max(256, isqrt(n));
652 
653 	k = 0;
654 	c[0] = newcand(0, 0, 0);
655 	for (i = 1; i <= n; i++) {
656 		j = a[i];
657 		if (j == 0)
658 			continue;
659 		y = -b[j];
660 		oldl = 0;
661 		oldc = c[0];
662 		numtries = 0;
663 		do {
664 			if (y <= clist[oldc].y)
665 				continue;
666 			l = search(c, k, y);
667 			if (l != oldl + 1)
668 				oldc = c[l - 1];
669 			if (l <= k) {
670 				if (clist[c[l]].y <= y)
671 					continue;
672 				tc = c[l];
673 				c[l] = newcand(i, y, oldc);
674 				oldc = tc;
675 				oldl = l;
676 				numtries++;
677 			} else {
678 				c[l] = newcand(i, y, oldc);
679 				k++;
680 				break;
681 			}
682 		} while ((y = b[++j]) > 0 && numtries < bound);
683 	}
684 	return (k);
685 }
686 
687 static int
688 newcand(int x, int y, int pred)
689 {
690 	struct cand *q;
691 
692 	if (clen == clistlen) {
693 		clistlen = clistlen * 11 / 10;
694 		clist = erealloc(clist, clistlen * sizeof(struct cand));
695 	}
696 	q = clist + clen;
697 	q->x = x;
698 	q->y = y;
699 	q->pred = pred;
700 	return (clen++);
701 }
702 
703 static int
704 search(int *c, int k, int y)
705 {
706 	int i, j, l, t;
707 
708 	if (clist[c[k]].y < y)	/* quick look for typical case */
709 		return (k + 1);
710 	i = 0;
711 	j = k + 1;
712 	while (1) {
713 		l = i + j;
714 		if ((l >>= 1) <= i)
715 			break;
716 		t = clist[c[l]].y;
717 		if (t > y)
718 			j = l;
719 		else if (t < y)
720 			i = l;
721 		else
722 			return (l);
723 	}
724 	return (l + 1);
725 }
726 
727 static void
728 unravel(int p)
729 {
730 	struct cand *q;
731 	int i;
732 
733 	for (i = 0; i <= len[0]; i++)
734 		J[i] = i <= pref ? i :
735 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
736 	for (q = clist + p; q->y != 0; q = clist + q->pred)
737 		J[q->x + pref] = q->y + pref;
738 }
739 
740 /*
741  * Check does double duty:
742  *  1.	ferret out any fortuitous correspondences due
743  *	to confounding by hashing (which result in "jackpot")
744  *  2.  collect random access indexes to the two files
745  */
746 static void
747 check(char *file1, FILE *f1, char *file2, FILE *f2)
748 {
749 	int i, j, jackpot, c, d;
750 	long ctold, ctnew;
751 
752 	rewind(f1);
753 	rewind(f2);
754 	j = 1;
755 	ixold[0] = ixnew[0] = 0;
756 	jackpot = 0;
757 	ctold = ctnew = 0;
758 	for (i = 1; i <= len[0]; i++) {
759 		if (J[i] == 0) {
760 			ixold[i] = ctold += skipline(f1);
761 			continue;
762 		}
763 		while (j < J[i]) {
764 			ixnew[j] = ctnew += skipline(f2);
765 			j++;
766 		}
767 		if (bflag || wflag || iflag) {
768 			for (;;) {
769 				c = getc(f1);
770 				d = getc(f2);
771 				/*
772 				 * GNU diff ignores a missing newline
773 				 * in one file if bflag || wflag.
774 				 */
775 				if ((bflag || wflag) &&
776 				    ((c == EOF && d == '\n') ||
777 				    (c == '\n' && d == EOF))) {
778 					break;
779 				}
780 				ctold++;
781 				ctnew++;
782 				if (bflag && isspace(c) && isspace(d)) {
783 					do {
784 						if (c == '\n')
785 							break;
786 						ctold++;
787 					} while (isspace(c = getc(f1)));
788 					do {
789 						if (d == '\n')
790 							break;
791 						ctnew++;
792 					} while (isspace(d = getc(f2)));
793 				} else if (wflag) {
794 					while (isspace(c) && c != '\n') {
795 						c = getc(f1);
796 						ctold++;
797 					}
798 					while (isspace(d) && d != '\n') {
799 						d = getc(f2);
800 						ctnew++;
801 					}
802 				}
803 				if (chrtran[c] != chrtran[d]) {
804 					jackpot++;
805 					J[i] = 0;
806 					if (c != '\n' && c != EOF)
807 						ctold += skipline(f1);
808 					if (d != '\n' && c != EOF)
809 						ctnew += skipline(f2);
810 					break;
811 				}
812 				if (c == '\n' || c == EOF)
813 					break;
814 			}
815 		} else {
816 			for (;;) {
817 				ctold++;
818 				ctnew++;
819 				if ((c = getc(f1)) != (d = getc(f2))) {
820 					/* jackpot++; */
821 					J[i] = 0;
822 					if (c != '\n' && c != EOF)
823 						ctold += skipline(f1);
824 					if (d != '\n' && c != EOF)
825 						ctnew += skipline(f2);
826 					break;
827 				}
828 				if (c == '\n' || c == EOF)
829 					break;
830 			}
831 		}
832 		ixold[i] = ctold;
833 		ixnew[j] = ctnew;
834 		j++;
835 	}
836 	for (; j <= len[1]; j++)
837 		ixnew[j] = ctnew += skipline(f2);
838 	/*
839 	 * if (jackpot)
840 	 *	fprintf(stderr, "jackpot\n");
841 	 */
842 }
843 
844 /* shellsort CACM #201 */
845 static void
846 sort(struct line *a, int n)
847 {
848 	struct line *ai, *aim, w;
849 	int j, m = 0, k;
850 
851 	if (n == 0)
852 		return;
853 	for (j = 1; j <= n; j *= 2)
854 		m = 2 * j - 1;
855 	for (m /= 2; m != 0; m /= 2) {
856 		k = n - m;
857 		for (j = 1; j <= k; j++) {
858 			for (ai = &a[j]; ai > a; ai -= m) {
859 				aim = &ai[m];
860 				if (aim < ai)
861 					break;	/* wraparound */
862 				if (aim->value > ai[0].value ||
863 				    (aim->value == ai[0].value &&
864 					aim->serial > ai[0].serial))
865 					break;
866 				w.value = ai[0].value;
867 				ai[0].value = aim->value;
868 				aim->value = w.value;
869 				w.serial = ai[0].serial;
870 				ai[0].serial = aim->serial;
871 				aim->serial = w.serial;
872 			}
873 		}
874 	}
875 }
876 
877 static void
878 unsort(struct line *f, int l, int *b)
879 {
880 	int *a, i;
881 
882 	a = emalloc((l + 1) * sizeof(int));
883 	for (i = 1; i <= l; i++)
884 		a[f[i].serial] = f[i].value;
885 	for (i = 1; i <= l; i++)
886 		b[i] = a[i];
887 	free(a);
888 }
889 
890 static int
891 skipline(FILE *f)
892 {
893 	int i, c;
894 
895 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
896 		continue;
897 	return (i);
898 }
899 
900 static void
901 output(char *file1, FILE *f1, char *file2, FILE *f2)
902 {
903 	int m, i0, i1, j0, j1;
904 
905 	rewind(f1);
906 	rewind(f2);
907 	m = len[0];
908 	J[0] = 0;
909 	J[m + 1] = len[1] + 1;
910 	if (format != D_EDIT) {
911 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
912 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
913 				i0++;
914 			j0 = J[i0 - 1] + 1;
915 			i1 = i0 - 1;
916 			while (i1 < m && J[i1 + 1] == 0)
917 				i1++;
918 			j1 = J[i1 + 1] - 1;
919 			J[i1] = j1;
920 			change(file1, f1, file2, f2, i0, i1, j0, j1);
921 		}
922 	} else {
923 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
924 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
925 				i0--;
926 			j0 = J[i0 + 1] - 1;
927 			i1 = i0 + 1;
928 			while (i1 > 1 && J[i1 - 1] == 0)
929 				i1--;
930 			j1 = J[i1 - 1] + 1;
931 			J[i1] = j1;
932 			change(file1, f1, file2, f2, i1, i0, j1, j0);
933 		}
934 	}
935 	if (m == 0)
936 		change(file1, f1, file2, f2, 1, 0, 1, len[1]);
937 	if (format == D_IFDEF) {
938 		for (;;) {
939 #define	c i0
940 			if ((c = getc(f1)) == EOF)
941 				return;
942 			putchar(c);
943 		}
944 #undef c
945 	}
946 	if (anychange != 0) {
947 		if (format == D_CONTEXT)
948 			dump_context_vec(f1, f2);
949 		else if (format == D_UNIFIED)
950 			dump_unified_vec(f1, f2);
951 	}
952 }
953 
954 static __inline void
955 range(int a, int b, char *separator)
956 {
957 	printf("%d", a > b ? b : a);
958 	if (a < b)
959 		printf("%s%d", separator, b);
960 }
961 
962 static __inline void
963 uni_range(int a, int b)
964 {
965 	if (a < b)
966 		printf("%d,%d", a, b - a + 1);
967 	else if (a == b)
968 		printf("%d", b);
969 	else
970 		printf("%d,0", b);
971 }
972 
973 static char *
974 preadline(int fd, size_t len, off_t off)
975 {
976 	char *line;
977 	ssize_t nr;
978 
979 	line = emalloc(len + 1);
980 	if ((nr = pread(fd, line, len, off)) < 0)
981 		err(1, "preadline");
982 	if (nr > 0 && line[nr-1] == '\n')
983 		nr--;
984 	line[nr] = '\0';
985 	return (line);
986 }
987 
988 static int
989 ignoreline(char *line)
990 {
991 	int ret;
992 
993 	ret = regexec(&ignore_re, line, 0, NULL, 0);
994 	free(line);
995 	return (ret == 0);	/* if it matched, it should be ignored. */
996 }
997 
998 /*
999  * Indicate that there is a difference between lines a and b of the from file
1000  * to get to lines c to d of the to file.  If a is greater then b then there
1001  * are no lines in the from file involved and this means that there were
1002  * lines appended (beginning at b).  If c is greater than d then there are
1003  * lines missing from the to file.
1004  */
1005 static void
1006 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d)
1007 {
1008 	static size_t max_context = 64;
1009 	int i;
1010 
1011 restart:
1012 	if (format != D_IFDEF && a > b && c > d)
1013 		return;
1014 	if (ignore_pats != NULL) {
1015 		char *line;
1016 		/*
1017 		 * All lines in the change, insert, or delete must
1018 		 * match an ignore pattern for the change to be
1019 		 * ignored.
1020 		 */
1021 		if (a <= b) {		/* Changes and deletes. */
1022 			for (i = a; i <= b; i++) {
1023 				line = preadline(fileno(f1),
1024 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1025 				if (!ignoreline(line))
1026 					goto proceed;
1027 			}
1028 		}
1029 		if (a > b || c <= d) {	/* Changes and inserts. */
1030 			for (i = c; i <= d; i++) {
1031 				line = preadline(fileno(f2),
1032 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1033 				if (!ignoreline(line))
1034 					goto proceed;
1035 			}
1036 		}
1037 		return;
1038 	}
1039 proceed:
1040 	if (format == D_CONTEXT || format == D_UNIFIED) {
1041 		/*
1042 		 * Allocate change records as needed.
1043 		 */
1044 		if (context_vec_ptr == context_vec_end - 1) {
1045 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1046 			max_context <<= 1;
1047 			context_vec_start = erealloc(context_vec_start,
1048 			    max_context * sizeof(struct context_vec));
1049 			context_vec_end = context_vec_start + max_context;
1050 			context_vec_ptr = context_vec_start + offset;
1051 		}
1052 		if (anychange == 0) {
1053 			/*
1054 			 * Print the context/unidiff header first time through.
1055 			 */
1056 			print_header(file1, file2);
1057 			anychange = 1;
1058 		} else if (a > context_vec_ptr->b + (2 * context) + 1 &&
1059 		    c > context_vec_ptr->d + (2 * context) + 1) {
1060 			/*
1061 			 * If this change is more than 'context' lines from the
1062 			 * previous change, dump the record and reset it.
1063 			 */
1064 			if (format == D_CONTEXT)
1065 				dump_context_vec(f1, f2);
1066 			else
1067 				dump_unified_vec(f1, f2);
1068 		}
1069 		context_vec_ptr++;
1070 		context_vec_ptr->a = a;
1071 		context_vec_ptr->b = b;
1072 		context_vec_ptr->c = c;
1073 		context_vec_ptr->d = d;
1074 		return;
1075 	}
1076 	if (anychange == 0)
1077 		anychange = 1;
1078 	switch (format) {
1079 
1080 	case D_BRIEF:
1081 		return;
1082 	case D_NORMAL:
1083 	case D_EDIT:
1084 		range(a, b, ",");
1085 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1086 		if (format == D_NORMAL)
1087 			range(c, d, ",");
1088 		putchar('\n');
1089 		break;
1090 	case D_REVERSE:
1091 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1092 		range(a, b, " ");
1093 		putchar('\n');
1094 		break;
1095 	case D_NREVERSE:
1096 		if (a > b)
1097 			printf("a%d %d\n", b, d - c + 1);
1098 		else {
1099 			printf("d%d %d\n", a, b - a + 1);
1100 			if (!(c > d))
1101 				/* add changed lines */
1102 				printf("a%d %d\n", b, d - c + 1);
1103 		}
1104 		break;
1105 	}
1106 	if (format == D_NORMAL || format == D_IFDEF) {
1107 		fetch(ixold, a, b, f1, '<', 1);
1108 		if (a <= b && c <= d && format == D_NORMAL)
1109 			puts("---");
1110 	}
1111 	i = fetch(ixnew, c, d, f2, format == D_NORMAL ? '>' : '\0', 0);
1112 	if (i != 0 && format == D_EDIT) {
1113 		/*
1114 		 * A non-zero return value for D_EDIT indicates that the
1115 		 * last line printed was a bare dot (".") that has been
1116 		 * escaped as ".." to prevent ed(1) from misinterpreting
1117 		 * it.  We have to add a substitute command to change this
1118 		 * back and restart where we left off.
1119 		 */
1120 		puts(".");
1121 		printf("%ds/^\\.\\././\n", a);
1122 		a += i;
1123 		c += i;
1124 		goto restart;
1125 	}
1126 	if ((format == D_EDIT || format == D_REVERSE) && c <= d)
1127 		puts(".");
1128 	if (inifdef) {
1129 		printf("#endif /* %s */\n", ifdefname);
1130 		inifdef = 0;
1131 	}
1132 }
1133 
1134 static int
1135 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile)
1136 {
1137 	int i, j, c, lastc, col, nc;
1138 
1139 	/*
1140 	 * When doing #ifdef's, copy down to current line
1141 	 * if this is the first file, so that stuff makes it to output.
1142 	 */
1143 	if (format == D_IFDEF && oldfile) {
1144 		long curpos = ftell(lb);
1145 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1146 		nc = f[a > b ? b : a - 1] - curpos;
1147 		for (i = 0; i < nc; i++)
1148 			putchar(getc(lb));
1149 	}
1150 	if (a > b)
1151 		return (0);
1152 	if (format == D_IFDEF) {
1153 		if (inifdef) {
1154 			printf("#else /* %s%s */\n",
1155 			    oldfile == 1 ? "!" : "", ifdefname);
1156 		} else {
1157 			if (oldfile)
1158 				printf("#ifndef %s\n", ifdefname);
1159 			else
1160 				printf("#ifdef %s\n", ifdefname);
1161 		}
1162 		inifdef = 1 + oldfile;
1163 	}
1164 	for (i = a; i <= b; i++) {
1165 		fseek(lb, f[i - 1], SEEK_SET);
1166 		nc = f[i] - f[i - 1];
1167 		if (format != D_IFDEF && ch != '\0') {
1168 			putchar(ch);
1169 			if (Tflag && (format == D_NORMAL || format == D_CONTEXT
1170 			    || format == D_UNIFIED))
1171 				putchar('\t');
1172 			else if (format != D_UNIFIED)
1173 				putchar(' ');
1174 		}
1175 		col = 0;
1176 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1177 			if ((c = getc(lb)) == EOF) {
1178 				if (format == D_EDIT || format == D_REVERSE ||
1179 				    format == D_NREVERSE)
1180 					warnx("No newline at end of file");
1181 				else
1182 					puts("\n\\ No newline at end of file");
1183 				return (0);
1184 			}
1185 			if (c == '\t' && tflag) {
1186 				do {
1187 					putchar(' ');
1188 				} while (++col & 7);
1189 			} else {
1190 				if (format == D_EDIT && j == 1 && c == '\n'
1191 				    && lastc == '.') {
1192 					/*
1193 					 * Don't print a bare "." line
1194 					 * since that will confuse ed(1).
1195 					 * Print ".." instead and return,
1196 					 * giving the caller an offset
1197 					 * from which to restart.
1198 					 */
1199 					puts(".");
1200 					return (i - a + 1);
1201 				}
1202 				putchar(c);
1203 				col++;
1204 			}
1205 		}
1206 	}
1207 	return (0);
1208 }
1209 
1210 /*
1211  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1212  */
1213 static int
1214 readhash(FILE *f)
1215 {
1216 	int i, t, space;
1217 	int sum;
1218 
1219 	sum = 1;
1220 	space = 0;
1221 	if (!bflag && !wflag) {
1222 		if (iflag)
1223 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1224 				if (t == EOF) {
1225 					if (i == 0)
1226 						return (0);
1227 					break;
1228 				}
1229 				sum = sum * 127 + chrtran[t];
1230 			}
1231 		else
1232 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1233 				if (t == EOF) {
1234 					if (i == 0)
1235 						return (0);
1236 					break;
1237 				}
1238 				sum = sum * 127 + t;
1239 			}
1240 	} else {
1241 		for (i = 0;;) {
1242 			switch (t = getc(f)) {
1243 			case '\t':
1244 			case '\r':
1245 			case '\v':
1246 			case '\f':
1247 			case ' ':
1248 				space++;
1249 				continue;
1250 			default:
1251 				if (space && !wflag) {
1252 					i++;
1253 					space = 0;
1254 				}
1255 				sum = sum * 127 + chrtran[t];
1256 				i++;
1257 				continue;
1258 			case EOF:
1259 				if (i == 0)
1260 					return (0);
1261 				/* FALLTHROUGH */
1262 			case '\n':
1263 				break;
1264 			}
1265 			break;
1266 		}
1267 	}
1268 	/*
1269 	 * There is a remote possibility that we end up with a zero sum.
1270 	 * Zero is used as an EOF marker, so return 1 instead.
1271 	 */
1272 	return (sum == 0 ? 1 : sum);
1273 }
1274 
1275 static int
1276 asciifile(FILE *f)
1277 {
1278 	unsigned char buf[BUFSIZ];
1279 	int i, cnt;
1280 
1281 	if (aflag || f == NULL)
1282 		return (1);
1283 
1284 	rewind(f);
1285 	cnt = fread(buf, 1, sizeof(buf), f);
1286 	for (i = 0; i < cnt; i++)
1287 		if (!isprint(buf[i]) && !isspace(buf[i]))
1288 			return (0);
1289 	return (1);
1290 }
1291 
1292 static __inline int min(int a, int b)
1293 {
1294 	return (a < b ? a : b);
1295 }
1296 
1297 static __inline int max(int a, int b)
1298 {
1299 	return (a > b ? a : b);
1300 }
1301 
1302 static char *
1303 match_function(const long *f, int pos, FILE *file)
1304 {
1305 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1306 	size_t nc;
1307 	int last = lastline;
1308 	char *p;
1309 
1310 	lastline = pos;
1311 	while (pos > last) {
1312 		fseek(file, f[pos - 1], SEEK_SET);
1313 		nc = f[pos] - f[pos - 1];
1314 		if (nc >= sizeof(buf))
1315 			nc = sizeof(buf) - 1;
1316 		nc = fread(buf, 1, nc, file);
1317 		if (nc > 0) {
1318 			buf[nc] = '\0';
1319 			p = strchr(buf, '\n');
1320 			if (p != NULL)
1321 				*p = '\0';
1322 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1323 				strlcpy(lastbuf, buf, sizeof lastbuf);
1324 				lastmatchline = pos;
1325 				return lastbuf;
1326 			}
1327 		}
1328 		pos--;
1329 	}
1330 	return lastmatchline > 0 ? lastbuf : NULL;
1331 }
1332 
1333 /* dump accumulated "context" diff changes */
1334 static void
1335 dump_context_vec(FILE *f1, FILE *f2)
1336 {
1337 	struct context_vec *cvp = context_vec_start;
1338 	int lowa, upb, lowc, upd, do_output;
1339 	int a, b, c, d;
1340 	char ch, *f;
1341 
1342 	if (context_vec_start > context_vec_ptr)
1343 		return;
1344 
1345 	b = d = 0;		/* gcc */
1346 	lowa = max(1, cvp->a - context);
1347 	upb = min(len[0], context_vec_ptr->b + context);
1348 	lowc = max(1, cvp->c - context);
1349 	upd = min(len[1], context_vec_ptr->d + context);
1350 
1351 	printf("***************");
1352 	if (pflag) {
1353 		f = match_function(ixold, lowa-1, f1);
1354 		if (f != NULL) {
1355 			putchar(' ');
1356 			fputs(f, stdout);
1357 		}
1358 	}
1359 	printf("\n*** ");
1360 	range(lowa, upb, ",");
1361 	printf(" ****\n");
1362 
1363 	/*
1364 	 * Output changes to the "old" file.  The first loop suppresses
1365 	 * output if there were no changes to the "old" file (we'll see
1366 	 * the "old" lines as context in the "new" list).
1367 	 */
1368 	do_output = 0;
1369 	for (; cvp <= context_vec_ptr; cvp++)
1370 		if (cvp->a <= cvp->b) {
1371 			cvp = context_vec_start;
1372 			do_output++;
1373 			break;
1374 		}
1375 	if (do_output) {
1376 		while (cvp <= context_vec_ptr) {
1377 			a = cvp->a;
1378 			b = cvp->b;
1379 			c = cvp->c;
1380 			d = cvp->d;
1381 
1382 			if (a <= b && c <= d)
1383 				ch = 'c';
1384 			else
1385 				ch = (a <= b) ? 'd' : 'a';
1386 
1387 			if (ch == 'a')
1388 				fetch(ixold, lowa, b, f1, ' ', 0);
1389 			else {
1390 				fetch(ixold, lowa, a - 1, f1, ' ', 0);
1391 				fetch(ixold, a, b, f1,
1392 				    ch == 'c' ? '!' : '-', 0);
1393 			}
1394 			lowa = b + 1;
1395 			cvp++;
1396 		}
1397 		fetch(ixold, b + 1, upb, f1, ' ', 0);
1398 	}
1399 	/* output changes to the "new" file */
1400 	printf("--- ");
1401 	range(lowc, upd, ",");
1402 	printf(" ----\n");
1403 
1404 	do_output = 0;
1405 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1406 		if (cvp->c <= cvp->d) {
1407 			cvp = context_vec_start;
1408 			do_output++;
1409 			break;
1410 		}
1411 	if (do_output) {
1412 		while (cvp <= context_vec_ptr) {
1413 			a = cvp->a;
1414 			b = cvp->b;
1415 			c = cvp->c;
1416 			d = cvp->d;
1417 
1418 			if (a <= b && c <= d)
1419 				ch = 'c';
1420 			else
1421 				ch = (a <= b) ? 'd' : 'a';
1422 
1423 			if (ch == 'd')
1424 				fetch(ixnew, lowc, d, f2, ' ', 0);
1425 			else {
1426 				fetch(ixnew, lowc, c - 1, f2, ' ', 0);
1427 				fetch(ixnew, c, d, f2,
1428 				    ch == 'c' ? '!' : '+', 0);
1429 			}
1430 			lowc = d + 1;
1431 			cvp++;
1432 		}
1433 		fetch(ixnew, d + 1, upd, f2, ' ', 0);
1434 	}
1435 	context_vec_ptr = context_vec_start - 1;
1436 }
1437 
1438 /* dump accumulated "unified" diff changes */
1439 static void
1440 dump_unified_vec(FILE *f1, FILE *f2)
1441 {
1442 	struct context_vec *cvp = context_vec_start;
1443 	int lowa, upb, lowc, upd;
1444 	int a, b, c, d;
1445 	char ch, *f;
1446 
1447 	if (context_vec_start > context_vec_ptr)
1448 		return;
1449 
1450 	b = d = 0;		/* gcc */
1451 	lowa = max(1, cvp->a - context);
1452 	upb = min(len[0], context_vec_ptr->b + context);
1453 	lowc = max(1, cvp->c - context);
1454 	upd = min(len[1], context_vec_ptr->d + context);
1455 
1456 	fputs("@@ -", stdout);
1457 	uni_range(lowa, upb);
1458 	fputs(" +", stdout);
1459 	uni_range(lowc, upd);
1460 	fputs(" @@", stdout);
1461 	if (pflag) {
1462 		f = match_function(ixold, lowa-1, f1);
1463 		if (f != NULL) {
1464 			putchar(' ');
1465 			fputs(f, stdout);
1466 		}
1467 	}
1468 	putchar('\n');
1469 
1470 	/*
1471 	 * Output changes in "unified" diff format--the old and new lines
1472 	 * are printed together.
1473 	 */
1474 	for (; cvp <= context_vec_ptr; cvp++) {
1475 		a = cvp->a;
1476 		b = cvp->b;
1477 		c = cvp->c;
1478 		d = cvp->d;
1479 
1480 		/*
1481 		 * c: both new and old changes
1482 		 * d: only changes in the old file
1483 		 * a: only changes in the new file
1484 		 */
1485 		if (a <= b && c <= d)
1486 			ch = 'c';
1487 		else
1488 			ch = (a <= b) ? 'd' : 'a';
1489 
1490 		switch (ch) {
1491 		case 'c':
1492 			fetch(ixold, lowa, a - 1, f1, ' ', 0);
1493 			fetch(ixold, a, b, f1, '-', 0);
1494 			fetch(ixnew, c, d, f2, '+', 0);
1495 			break;
1496 		case 'd':
1497 			fetch(ixold, lowa, a - 1, f1, ' ', 0);
1498 			fetch(ixold, a, b, f1, '-', 0);
1499 			break;
1500 		case 'a':
1501 			fetch(ixnew, lowc, c - 1, f2, ' ', 0);
1502 			fetch(ixnew, c, d, f2, '+', 0);
1503 			break;
1504 		}
1505 		lowa = b + 1;
1506 		lowc = d + 1;
1507 	}
1508 	fetch(ixnew, d + 1, upd, f2, ' ', 0);
1509 
1510 	context_vec_ptr = context_vec_start - 1;
1511 }
1512 
1513 static void
1514 print_header(const char *file1, const char *file2)
1515 {
1516 	if (label[0] != NULL)
1517 		printf("%s %s\n", format == D_CONTEXT ? "***" : "---",
1518 		    label[0]);
1519 	else
1520 		printf("%s %s\t%s", format == D_CONTEXT ? "***" : "---",
1521 		    file1, ctime(&stb1.st_mtime));
1522 	if (label[1] != NULL)
1523 		printf("%s %s\n", format == D_CONTEXT ? "---" : "+++",
1524 		    label[1]);
1525 	else
1526 		printf("%s %s\t%s", format == D_CONTEXT ? "---" : "+++",
1527 		    file2, ctime(&stb2.st_mtime));
1528 }
1529