xref: /openbsd-src/usr.bin/diff/diffreg.c (revision dba1d6ea0e7adc7407ba8b634ca533c869704ccd)
1 /*	$OpenBSD: diffreg.c,v 1.71 2009/06/06 15:00:27 ray Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #ifndef lint
68 static const char rcsid[] = "$OpenBSD: diffreg.c,v 1.71 2009/06/06 15:00:27 ray Exp $";
69 #endif /* not lint */
70 
71 #include <sys/param.h>
72 #include <sys/stat.h>
73 #include <sys/wait.h>
74 
75 #include <ctype.h>
76 #include <err.h>
77 #include <errno.h>
78 #include <fcntl.h>
79 #include <stddef.h>
80 #include <stdio.h>
81 #include <stdlib.h>
82 #include <string.h>
83 #include <unistd.h>
84 
85 #include "diff.h"
86 #include "pathnames.h"
87 #include "xmalloc.h"
88 
89 /*
90  * diff - compare two files.
91  */
92 
93 /*
94  *	Uses an algorithm due to Harold Stone, which finds
95  *	a pair of longest identical subsequences in the two
96  *	files.
97  *
98  *	The major goal is to generate the match vector J.
99  *	J[i] is the index of the line in file1 corresponding
100  *	to line i file0. J[i] = 0 if there is no
101  *	such line in file1.
102  *
103  *	Lines are hashed so as to work in core. All potential
104  *	matches are located by sorting the lines of each file
105  *	on the hash (called ``value''). In particular, this
106  *	collects the equivalence classes in file1 together.
107  *	Subroutine equiv replaces the value of each line in
108  *	file0 by the index of the first element of its
109  *	matching equivalence in (the reordered) file1.
110  *	To save space equiv squeezes file1 into a single
111  *	array member in which the equivalence classes
112  *	are simply concatenated, except that their first
113  *	members are flagged by changing sign.
114  *
115  *	Next the indices that point into member are unsorted into
116  *	array class according to the original order of file0.
117  *
118  *	The cleverness lies in routine stone. This marches
119  *	through the lines of file0, developing a vector klist
120  *	of "k-candidates". At step i a k-candidate is a matched
121  *	pair of lines x,y (x in file0 y in file1) such that
122  *	there is a common subsequence of length k
123  *	between the first i lines of file0 and the first y
124  *	lines of file1, but there is no such subsequence for
125  *	any smaller y. x is the earliest possible mate to y
126  *	that occurs in such a subsequence.
127  *
128  *	Whenever any of the members of the equivalence class of
129  *	lines in file1 matable to a line in file0 has serial number
130  *	less than the y of some k-candidate, that k-candidate
131  *	with the smallest such y is replaced. The new
132  *	k-candidate is chained (via pred) to the current
133  *	k-1 candidate so that the actual subsequence can
134  *	be recovered. When a member has serial number greater
135  *	that the y of all k-candidates, the klist is extended.
136  *	At the end, the longest subsequence is pulled out
137  *	and placed in the array J by unravel
138  *
139  *	With J in hand, the matches there recorded are
140  *	check'ed against reality to assure that no spurious
141  *	matches have crept in due to hashing. If they have,
142  *	they are broken, and "jackpot" is recorded--a harmless
143  *	matter except that a true match for a spuriously
144  *	mated line may now be unnecessarily reported as a change.
145  *
146  *	Much of the complexity of the program comes simply
147  *	from trying to minimize core utilization and
148  *	maximize the range of doable problems by dynamically
149  *	allocating what is needed and reusing what is not.
150  *	The core requirements for problems larger than somewhat
151  *	are (in words) 2*length(file0) + length(file1) +
152  *	3*(number of k-candidates installed),  typically about
153  *	6n words for files of length n.
154  */
155 
156 struct cand {
157 	int	x;
158 	int	y;
159 	int	pred;
160 };
161 
162 struct line {
163 	int	serial;
164 	int	value;
165 } *file[2];
166 
167 /*
168  * The following struct is used to record change information when
169  * doing a "context" or "unified" diff.  (see routine "change" to
170  * understand the highly mnemonic field names)
171  */
172 struct context_vec {
173 	int	a;		/* start line in old file */
174 	int	b;		/* end line in old file */
175 	int	c;		/* start line in new file */
176 	int	d;		/* end line in new file */
177 };
178 
179 static FILE	*opentemp(const char *);
180 static void	 output(char *, FILE *, char *, FILE *, int);
181 static void	 check(char *, FILE *, char *, FILE *, int);
182 static void	 range(int, int, char *);
183 static void	 uni_range(int, int);
184 static void	 dump_context_vec(FILE *, FILE *, int);
185 static void	 dump_unified_vec(FILE *, FILE *, int);
186 static void	 prepare(int, FILE *, off_t, int);
187 static void	 prune(void);
188 static void	 equiv(struct line *, int, struct line *, int, int *);
189 static void	 unravel(int);
190 static void	 unsort(struct line *, int, int *);
191 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
192 static void	 sort(struct line *, int);
193 static void	 print_header(const char *, const char *);
194 static int	 ignoreline(char *);
195 static int	 asciifile(FILE *);
196 static int	 fetch(long *, int, int, FILE *, int, int, int);
197 static int	 newcand(int, int, int);
198 static int	 search(int *, int, int);
199 static int	 skipline(FILE *);
200 static int	 isqrt(int);
201 static int	 stone(int *, int, int *, int *, int);
202 static int	 readhash(FILE *, int);
203 static int	 files_differ(FILE *, FILE *, int);
204 static char	*match_function(const long *, int, FILE *);
205 static char	*preadline(int, size_t, off_t);
206 
207 static int  *J;			/* will be overlaid on class */
208 static int  *class;		/* will be overlaid on file[0] */
209 static int  *klist;		/* will be overlaid on file[0] after class */
210 static int  *member;		/* will be overlaid on file[1] */
211 static int   clen;
212 static int   inifdef;		/* whether or not we are in a #ifdef block */
213 static int   len[2];
214 static int   pref, suff;	/* length of prefix and suffix */
215 static int   slen[2];
216 static int   anychange;
217 static long *ixnew;		/* will be overlaid on file[1] */
218 static long *ixold;		/* will be overlaid on klist */
219 static struct cand *clist;	/* merely a free storage pot for candidates */
220 static int   clistlen;		/* the length of clist */
221 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
222 static u_char *chrtran;		/* translation table for case-folding */
223 static struct context_vec *context_vec_start;
224 static struct context_vec *context_vec_end;
225 static struct context_vec *context_vec_ptr;
226 
227 #define FUNCTION_CONTEXT_SIZE	55
228 static char lastbuf[FUNCTION_CONTEXT_SIZE];
229 static int lastline;
230 static int lastmatchline;
231 
232 
233 /*
234  * chrtran points to one of 2 translation tables: cup2low if folding upper to
235  * lower case clow2low if not folding case
236  */
237 u_char clow2low[256] = {
238 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
239 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
240 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
241 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
242 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
243 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
244 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
245 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
246 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
247 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
248 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
249 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
250 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
251 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
252 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
253 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
254 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
255 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
256 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
257 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
258 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
259 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
260 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
261 	0xfd, 0xfe, 0xff
262 };
263 
264 u_char cup2low[256] = {
265 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
266 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
267 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
268 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
269 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
270 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
271 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
272 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
273 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
274 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
275 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
276 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
277 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
278 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
279 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
280 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
281 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
282 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
283 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
284 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
285 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
286 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
287 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
288 	0xfd, 0xfe, 0xff
289 };
290 
291 int
292 diffreg(char *ofile1, char *ofile2, int flags)
293 {
294 	char *file1 = ofile1;
295 	char *file2 = ofile2;
296 	FILE *f1, *f2;
297 	int i, rval, ostdout = -1;
298 	pid_t pid = -1;
299 
300 	f1 = f2 = NULL;
301 	rval = D_SAME;
302 	anychange = 0;
303 	lastline = 0;
304 	lastmatchline = 0;
305 	context_vec_ptr = context_vec_start - 1;
306 	if (flags & D_IGNORECASE)
307 		chrtran = cup2low;
308 	else
309 		chrtran = clow2low;
310 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
311 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
312 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
313 		goto closem;
314 
315 	if (flags & D_EMPTY1)
316 		f1 = fopen(_PATH_DEVNULL, "r");
317 	else {
318 		if (!S_ISREG(stb1.st_mode)) {
319 			if ((f1 = opentemp(file1)) == NULL ||
320 			    fstat(fileno(f1), &stb1) < 0) {
321 				warn("%s", file1);
322 				status |= 2;
323 				goto closem;
324 			}
325 		} else if (strcmp(file1, "-") == 0)
326 			f1 = stdin;
327 		else
328 			f1 = fopen(file1, "r");
329 	}
330 	if (f1 == NULL) {
331 		warn("%s", file1);
332 		status |= 2;
333 		goto closem;
334 	}
335 
336 	if (flags & D_EMPTY2)
337 		f2 = fopen(_PATH_DEVNULL, "r");
338 	else {
339 		if (!S_ISREG(stb2.st_mode)) {
340 			if ((f2 = opentemp(file2)) == NULL ||
341 			    fstat(fileno(f2), &stb2) < 0) {
342 				warn("%s", file2);
343 				status |= 2;
344 				goto closem;
345 			}
346 		} else if (strcmp(file2, "-") == 0)
347 			f2 = stdin;
348 		else
349 			f2 = fopen(file2, "r");
350 	}
351 	if (f2 == NULL) {
352 		warn("%s", file2);
353 		status |= 2;
354 		goto closem;
355 	}
356 
357 	switch (files_differ(f1, f2, flags)) {
358 	case 0:
359 		goto closem;
360 	case 1:
361 		break;
362 	default:
363 		/* error */
364 		status |= 2;
365 		goto closem;
366 	}
367 
368 	if ((flags & D_FORCEASCII) == 0 &&
369 	    (!asciifile(f1) || !asciifile(f2))) {
370 		rval = D_BINARY;
371 		status |= 1;
372 		goto closem;
373 	}
374 	if (lflag) {
375 		/* redirect stdout to pr */
376 		int pfd[2];
377 		char *header;
378 		char *prargv[] = { "pr", "-h", NULL, "-f", NULL };
379 
380 		xasprintf(&header, "%s %s %s", diffargs, file1, file2);
381 		prargv[2] = header;
382 		fflush(stdout);
383 		rewind(stdout);
384 		pipe(pfd);
385 		switch ((pid = fork())) {
386 		case -1:
387 			warnx("No more processes");
388 			status |= 2;
389 			xfree(header);
390 			return (D_ERROR);
391 		case 0:
392 			/* child */
393 			if (pfd[0] != STDIN_FILENO) {
394 				dup2(pfd[0], STDIN_FILENO);
395 				close(pfd[0]);
396 			}
397 			close(pfd[1]);
398 			execv(_PATH_PR, prargv);
399 			_exit(127);
400 		default:
401 			/* parent */
402 			if (pfd[1] != STDOUT_FILENO) {
403 				ostdout = dup(STDOUT_FILENO);
404 				dup2(pfd[1], STDOUT_FILENO);
405 				close(pfd[1]);
406 			}
407 			close(pfd[0]);
408 			rewind(stdout);
409 			xfree(header);
410 		}
411 	}
412 	prepare(0, f1, stb1.st_size, flags);
413 	prepare(1, f2, stb2.st_size, flags);
414 	prune();
415 	sort(sfile[0], slen[0]);
416 	sort(sfile[1], slen[1]);
417 
418 	member = (int *)file[1];
419 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
420 	member = xrealloc(member, slen[1] + 2, sizeof(*member));
421 
422 	class = (int *)file[0];
423 	unsort(sfile[0], slen[0], class);
424 	class = xrealloc(class, slen[0] + 2, sizeof(*class));
425 
426 	klist = xmalloc((slen[0] + 2) * sizeof(*klist));
427 	clen = 0;
428 	clistlen = 100;
429 	clist = xmalloc(clistlen * sizeof(*clist));
430 	i = stone(class, slen[0], member, klist, flags);
431 	xfree(member);
432 	xfree(class);
433 
434 	J = xrealloc(J, len[0] + 2, sizeof(*J));
435 	unravel(klist[i]);
436 	xfree(clist);
437 	xfree(klist);
438 
439 	ixold = xrealloc(ixold, len[0] + 2, sizeof(*ixold));
440 	ixnew = xrealloc(ixnew, len[1] + 2, sizeof(*ixnew));
441 	check(file1, f1, file2, f2, flags);
442 	output(file1, f1, file2, f2, flags);
443 	if (ostdout != -1) {
444 		int wstatus;
445 
446 		/* close the pipe to pr and restore stdout */
447 		fflush(stdout);
448 		rewind(stdout);
449 		if (ostdout != STDOUT_FILENO) {
450 			close(STDOUT_FILENO);
451 			dup2(ostdout, STDOUT_FILENO);
452 			close(ostdout);
453 		}
454 		waitpid(pid, &wstatus, 0);
455 	}
456 closem:
457 	if (anychange) {
458 		status |= 1;
459 		if (rval == D_SAME)
460 			rval = D_DIFFER;
461 	}
462 	if (f1 != NULL)
463 		fclose(f1);
464 	if (f2 != NULL)
465 		fclose(f2);
466 	if (file1 != ofile1)
467 		xfree(file1);
468 	if (file2 != ofile2)
469 		xfree(file2);
470 
471 	return (rval);
472 }
473 
474 /*
475  * Check to see if the given files differ.
476  * Returns 0 if they are the same, 1 if different, and -1 on error.
477  * XXX - could use code from cmp(1) [faster]
478  */
479 static int
480 files_differ(FILE *f1, FILE *f2, int flags)
481 {
482 	char buf1[BUFSIZ], buf2[BUFSIZ];
483 	size_t i, j;
484 
485 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
486 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
487 		return (1);
488 	for (;;) {
489 		i = fread(buf1, 1, sizeof(buf1), f1);
490 		j = fread(buf2, 1, sizeof(buf2), f2);
491 		if (i != j)
492 			return (1);
493 		if (i == 0 && j == 0) {
494 			if (ferror(f1) || ferror(f2))
495 				return (1);
496 			return (0);
497 		}
498 		if (memcmp(buf1, buf2, i) != 0)
499 			return (1);
500 	}
501 }
502 
503 static FILE *
504 opentemp(const char *file)
505 {
506 	char buf[BUFSIZ], *tempdir, tempfile[MAXPATHLEN];
507 	ssize_t nread;
508 	int ifd, ofd;
509 
510 	if (strcmp(file, "-") == 0)
511 		ifd = STDIN_FILENO;
512 	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
513 		return (NULL);
514 
515 	if ((tempdir = getenv("TMPDIR")) == NULL)
516 		tempdir = _PATH_TMP;
517 
518 	if (strlcpy(tempfile, tempdir, sizeof(tempfile)) >= sizeof(tempfile) ||
519 	    strlcat(tempfile, "/diff.XXXXXXXX", sizeof(tempfile)) >=
520 	    sizeof(tempfile)) {
521 		close(ifd);
522 		errno = ENAMETOOLONG;
523 		return (NULL);
524 	}
525 
526 	if ((ofd = mkstemp(tempfile)) < 0)
527 		return (NULL);
528 	unlink(tempfile);
529 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
530 		if (write(ofd, buf, nread) != nread) {
531 			close(ifd);
532 			close(ofd);
533 			return (NULL);
534 		}
535 	}
536 	close(ifd);
537 	lseek(ofd, (off_t)0, SEEK_SET);
538 	return (fdopen(ofd, "r"));
539 }
540 
541 char *
542 splice(char *dir, char *file)
543 {
544 	char *tail, *buf;
545 
546 	if ((tail = strrchr(file, '/')) == NULL)
547 		tail = file;
548 	else
549 		tail++;
550 	xasprintf(&buf, "%s/%s", dir, tail);
551 	return (buf);
552 }
553 
554 static void
555 prepare(int i, FILE *fd, off_t filesize, int flags)
556 {
557 	struct line *p;
558 	int j, h;
559 	size_t sz;
560 
561 	rewind(fd);
562 
563 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
564 	if (sz < 100)
565 		sz = 100;
566 
567 	p = xmalloc((sz + 3) * sizeof(*p));
568 	for (j = 0; (h = readhash(fd, flags));) {
569 		if (j == sz) {
570 			sz = sz * 3 / 2;
571 			p = xrealloc(p, sz + 3, sizeof(*p));
572 		}
573 		p[++j].value = h;
574 	}
575 	len[i] = j;
576 	file[i] = p;
577 }
578 
579 static void
580 prune(void)
581 {
582 	int i, j;
583 
584 	for (pref = 0; pref < len[0] && pref < len[1] &&
585 	    file[0][pref + 1].value == file[1][pref + 1].value;
586 	    pref++)
587 		;
588 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
589 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
590 	    suff++)
591 		;
592 	for (j = 0; j < 2; j++) {
593 		sfile[j] = file[j] + pref;
594 		slen[j] = len[j] - pref - suff;
595 		for (i = 0; i <= slen[j]; i++)
596 			sfile[j][i].serial = i;
597 	}
598 }
599 
600 static void
601 equiv(struct line *a, int n, struct line *b, int m, int *c)
602 {
603 	int i, j;
604 
605 	i = j = 1;
606 	while (i <= n && j <= m) {
607 		if (a[i].value < b[j].value)
608 			a[i++].value = 0;
609 		else if (a[i].value == b[j].value)
610 			a[i++].value = j;
611 		else
612 			j++;
613 	}
614 	while (i <= n)
615 		a[i++].value = 0;
616 	b[m + 1].value = 0;
617 	j = 0;
618 	while (++j <= m) {
619 		c[j] = -b[j].serial;
620 		while (b[j + 1].value == b[j].value) {
621 			j++;
622 			c[j] = b[j].serial;
623 		}
624 	}
625 	c[j] = -1;
626 }
627 
628 /* Code taken from ping.c */
629 static int
630 isqrt(int n)
631 {
632 	int y, x = 1;
633 
634 	if (n == 0)
635 		return (0);
636 
637 	do { /* newton was a stinker */
638 		y = x;
639 		x = n / x;
640 		x += y;
641 		x /= 2;
642 	} while ((x - y) > 1 || (x - y) < -1);
643 
644 	return (x);
645 }
646 
647 static int
648 stone(int *a, int n, int *b, int *c, int flags)
649 {
650 	int i, k, y, j, l;
651 	int oldc, tc, oldl;
652 	u_int numtries;
653 
654 	/* XXX move the isqrt() out of the macro to avoid multiple calls */
655 	const u_int bound = (flags & D_MINIMAL) ? UINT_MAX :
656 	    MAX(256, isqrt(n));
657 
658 	k = 0;
659 	c[0] = newcand(0, 0, 0);
660 	for (i = 1; i <= n; i++) {
661 		j = a[i];
662 		if (j == 0)
663 			continue;
664 		y = -b[j];
665 		oldl = 0;
666 		oldc = c[0];
667 		numtries = 0;
668 		do {
669 			if (y <= clist[oldc].y)
670 				continue;
671 			l = search(c, k, y);
672 			if (l != oldl + 1)
673 				oldc = c[l - 1];
674 			if (l <= k) {
675 				if (clist[c[l]].y <= y)
676 					continue;
677 				tc = c[l];
678 				c[l] = newcand(i, y, oldc);
679 				oldc = tc;
680 				oldl = l;
681 				numtries++;
682 			} else {
683 				c[l] = newcand(i, y, oldc);
684 				k++;
685 				break;
686 			}
687 		} while ((y = b[++j]) > 0 && numtries < bound);
688 	}
689 	return (k);
690 }
691 
692 static int
693 newcand(int x, int y, int pred)
694 {
695 	struct cand *q;
696 
697 	if (clen == clistlen) {
698 		clistlen = clistlen * 11 / 10;
699 		clist = xrealloc(clist, clistlen, sizeof(*clist));
700 	}
701 	q = clist + clen;
702 	q->x = x;
703 	q->y = y;
704 	q->pred = pred;
705 	return (clen++);
706 }
707 
708 static int
709 search(int *c, int k, int y)
710 {
711 	int i, j, l, t;
712 
713 	if (clist[c[k]].y < y)	/* quick look for typical case */
714 		return (k + 1);
715 	i = 0;
716 	j = k + 1;
717 	for (;;) {
718 		l = (i + j) / 2;
719 		if (l <= i)
720 			break;
721 		t = clist[c[l]].y;
722 		if (t > y)
723 			j = l;
724 		else if (t < y)
725 			i = l;
726 		else
727 			return (l);
728 	}
729 	return (l + 1);
730 }
731 
732 static void
733 unravel(int p)
734 {
735 	struct cand *q;
736 	int i;
737 
738 	for (i = 0; i <= len[0]; i++)
739 		J[i] = i <= pref ? i :
740 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
741 	for (q = clist + p; q->y != 0; q = clist + q->pred)
742 		J[q->x + pref] = q->y + pref;
743 }
744 
745 /*
746  * Check does double duty:
747  *  1.	ferret out any fortuitous correspondences due
748  *	to confounding by hashing (which result in "jackpot")
749  *  2.  collect random access indexes to the two files
750  */
751 static void
752 check(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
753 {
754 	int i, j, jackpot, c, d;
755 	long ctold, ctnew;
756 
757 	rewind(f1);
758 	rewind(f2);
759 	j = 1;
760 	ixold[0] = ixnew[0] = 0;
761 	jackpot = 0;
762 	ctold = ctnew = 0;
763 	for (i = 1; i <= len[0]; i++) {
764 		if (J[i] == 0) {
765 			ixold[i] = ctold += skipline(f1);
766 			continue;
767 		}
768 		while (j < J[i]) {
769 			ixnew[j] = ctnew += skipline(f2);
770 			j++;
771 		}
772 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
773 			for (;;) {
774 				c = getc(f1);
775 				d = getc(f2);
776 				/*
777 				 * GNU diff ignores a missing newline
778 				 * in one file for -b or -w.
779 				 */
780 				if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) &&
781 				    ((c == EOF && d == '\n') ||
782 				    (c == '\n' && d == EOF))) {
783 					break;
784 				}
785 				ctold++;
786 				ctnew++;
787 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
788 				    isspace(d)) {
789 					do {
790 						if (c == '\n')
791 							break;
792 						ctold++;
793 					} while (isspace(c = getc(f1)));
794 					do {
795 						if (d == '\n')
796 							break;
797 						ctnew++;
798 					} while (isspace(d = getc(f2)));
799 				} else if ((flags & D_IGNOREBLANKS)) {
800 					while (isspace(c) && c != '\n') {
801 						c = getc(f1);
802 						ctold++;
803 					}
804 					while (isspace(d) && d != '\n') {
805 						d = getc(f2);
806 						ctnew++;
807 					}
808 				}
809 				if (chrtran[c] != chrtran[d]) {
810 					jackpot++;
811 					J[i] = 0;
812 					if (c != '\n' && c != EOF)
813 						ctold += skipline(f1);
814 					if (d != '\n' && c != EOF)
815 						ctnew += skipline(f2);
816 					break;
817 				}
818 				if (c == '\n' || c == EOF)
819 					break;
820 			}
821 		} else {
822 			for (;;) {
823 				ctold++;
824 				ctnew++;
825 				if ((c = getc(f1)) != (d = getc(f2))) {
826 					/* jackpot++; */
827 					J[i] = 0;
828 					if (c != '\n' && c != EOF)
829 						ctold += skipline(f1);
830 					if (d != '\n' && c != EOF)
831 						ctnew += skipline(f2);
832 					break;
833 				}
834 				if (c == '\n' || c == EOF)
835 					break;
836 			}
837 		}
838 		ixold[i] = ctold;
839 		ixnew[j] = ctnew;
840 		j++;
841 	}
842 	for (; j <= len[1]; j++)
843 		ixnew[j] = ctnew += skipline(f2);
844 	/*
845 	 * if (jackpot)
846 	 *	fprintf(stderr, "jackpot\n");
847 	 */
848 }
849 
850 /* shellsort CACM #201 */
851 static void
852 sort(struct line *a, int n)
853 {
854 	struct line *ai, *aim, w;
855 	int j, m = 0, k;
856 
857 	if (n == 0)
858 		return;
859 	for (j = 1; j <= n; j *= 2)
860 		m = 2 * j - 1;
861 	for (m /= 2; m != 0; m /= 2) {
862 		k = n - m;
863 		for (j = 1; j <= k; j++) {
864 			for (ai = &a[j]; ai > a; ai -= m) {
865 				aim = &ai[m];
866 				if (aim < ai)
867 					break;	/* wraparound */
868 				if (aim->value > ai[0].value ||
869 				    (aim->value == ai[0].value &&
870 					aim->serial > ai[0].serial))
871 					break;
872 				w.value = ai[0].value;
873 				ai[0].value = aim->value;
874 				aim->value = w.value;
875 				w.serial = ai[0].serial;
876 				ai[0].serial = aim->serial;
877 				aim->serial = w.serial;
878 			}
879 		}
880 	}
881 }
882 
883 static void
884 unsort(struct line *f, int l, int *b)
885 {
886 	int *a, i;
887 
888 	a = xmalloc((l + 1) * sizeof(*a));
889 	for (i = 1; i <= l; i++)
890 		a[f[i].serial] = f[i].value;
891 	for (i = 1; i <= l; i++)
892 		b[i] = a[i];
893 	xfree(a);
894 }
895 
896 static int
897 skipline(FILE *f)
898 {
899 	int i, c;
900 
901 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
902 		continue;
903 	return (i);
904 }
905 
906 static void
907 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
908 {
909 	int m, i0, i1, j0, j1;
910 
911 	rewind(f1);
912 	rewind(f2);
913 	m = len[0];
914 	J[0] = 0;
915 	J[m + 1] = len[1] + 1;
916 	if (format != D_EDIT) {
917 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
918 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
919 				i0++;
920 			j0 = J[i0 - 1] + 1;
921 			i1 = i0 - 1;
922 			while (i1 < m && J[i1 + 1] == 0)
923 				i1++;
924 			j1 = J[i1 + 1] - 1;
925 			J[i1] = j1;
926 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
927 		}
928 	} else {
929 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
930 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
931 				i0--;
932 			j0 = J[i0 + 1] - 1;
933 			i1 = i0 + 1;
934 			while (i1 > 1 && J[i1 - 1] == 0)
935 				i1--;
936 			j1 = J[i1 - 1] + 1;
937 			J[i1] = j1;
938 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
939 		}
940 	}
941 	if (m == 0)
942 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
943 	if (format == D_IFDEF) {
944 		for (;;) {
945 #define	c i0
946 			if ((c = getc(f1)) == EOF)
947 				return;
948 			putchar(c);
949 		}
950 #undef c
951 	}
952 	if (anychange != 0) {
953 		if (format == D_CONTEXT)
954 			dump_context_vec(f1, f2, flags);
955 		else if (format == D_UNIFIED)
956 			dump_unified_vec(f1, f2, flags);
957 	}
958 }
959 
960 static void
961 range(int a, int b, char *separator)
962 {
963 	printf("%d", a > b ? b : a);
964 	if (a < b)
965 		printf("%s%d", separator, b);
966 }
967 
968 static void
969 uni_range(int a, int b)
970 {
971 	if (a < b)
972 		printf("%d,%d", a, b - a + 1);
973 	else if (a == b)
974 		printf("%d", b);
975 	else
976 		printf("%d,0", b);
977 }
978 
979 static char *
980 preadline(int fd, size_t rlen, off_t off)
981 {
982 	char *line;
983 	ssize_t nr;
984 
985 	line = xmalloc(rlen + 1);
986 	if ((nr = pread(fd, line, rlen, off)) < 0)
987 		err(1, "preadline");
988 	if (nr > 0 && line[nr-1] == '\n')
989 		nr--;
990 	line[nr] = '\0';
991 	return (line);
992 }
993 
994 static int
995 ignoreline(char *line)
996 {
997 	int ret;
998 
999 	ret = regexec(&ignore_re, line, 0, NULL, 0);
1000 	xfree(line);
1001 	return (ret == 0);	/* if it matched, it should be ignored. */
1002 }
1003 
1004 /*
1005  * Indicate that there is a difference between lines a and b of the from file
1006  * to get to lines c to d of the to file.  If a is greater then b then there
1007  * are no lines in the from file involved and this means that there were
1008  * lines appended (beginning at b).  If c is greater than d then there are
1009  * lines missing from the to file.
1010  */
1011 static void
1012 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1013     int *pflags)
1014 {
1015 	static size_t max_context = 64;
1016 	int i;
1017 
1018 restart:
1019 	if (format != D_IFDEF && a > b && c > d)
1020 		return;
1021 	if (ignore_pats != NULL) {
1022 		char *line;
1023 		/*
1024 		 * All lines in the change, insert, or delete must
1025 		 * match an ignore pattern for the change to be
1026 		 * ignored.
1027 		 */
1028 		if (a <= b) {		/* Changes and deletes. */
1029 			for (i = a; i <= b; i++) {
1030 				line = preadline(fileno(f1),
1031 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1032 				if (!ignoreline(line))
1033 					goto proceed;
1034 			}
1035 		}
1036 		if (a > b || c <= d) {	/* Changes and inserts. */
1037 			for (i = c; i <= d; i++) {
1038 				line = preadline(fileno(f2),
1039 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1040 				if (!ignoreline(line))
1041 					goto proceed;
1042 			}
1043 		}
1044 		return;
1045 	}
1046 proceed:
1047 	if (*pflags & D_HEADER) {
1048 		printf("%s %s %s\n", diffargs, file1, file2);
1049 		*pflags &= ~D_HEADER;
1050 	}
1051 	if (format == D_CONTEXT || format == D_UNIFIED) {
1052 		/*
1053 		 * Allocate change records as needed.
1054 		 */
1055 		if (context_vec_ptr == context_vec_end - 1) {
1056 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1057 			max_context <<= 1;
1058 			context_vec_start = xrealloc(context_vec_start,
1059 			    max_context, sizeof(*context_vec_start));
1060 			context_vec_end = context_vec_start + max_context;
1061 			context_vec_ptr = context_vec_start + offset;
1062 		}
1063 		if (anychange == 0) {
1064 			/*
1065 			 * Print the context/unidiff header first time through.
1066 			 */
1067 			print_header(file1, file2);
1068 			anychange = 1;
1069 		} else if (a > context_vec_ptr->b + (2 * context) + 1 &&
1070 		    c > context_vec_ptr->d + (2 * context) + 1) {
1071 			/*
1072 			 * If this change is more than 'context' lines from the
1073 			 * previous change, dump the record and reset it.
1074 			 */
1075 			if (format == D_CONTEXT)
1076 				dump_context_vec(f1, f2, *pflags);
1077 			else
1078 				dump_unified_vec(f1, f2, *pflags);
1079 		}
1080 		context_vec_ptr++;
1081 		context_vec_ptr->a = a;
1082 		context_vec_ptr->b = b;
1083 		context_vec_ptr->c = c;
1084 		context_vec_ptr->d = d;
1085 		return;
1086 	}
1087 	if (anychange == 0)
1088 		anychange = 1;
1089 	switch (format) {
1090 
1091 	case D_BRIEF:
1092 		return;
1093 	case D_NORMAL:
1094 	case D_EDIT:
1095 		range(a, b, ",");
1096 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1097 		if (format == D_NORMAL)
1098 			range(c, d, ",");
1099 		putchar('\n');
1100 		break;
1101 	case D_REVERSE:
1102 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1103 		range(a, b, " ");
1104 		putchar('\n');
1105 		break;
1106 	case D_NREVERSE:
1107 		if (a > b)
1108 			printf("a%d %d\n", b, d - c + 1);
1109 		else {
1110 			printf("d%d %d\n", a, b - a + 1);
1111 			if (!(c > d))
1112 				/* add changed lines */
1113 				printf("a%d %d\n", b, d - c + 1);
1114 		}
1115 		break;
1116 	}
1117 	if (format == D_NORMAL || format == D_IFDEF) {
1118 		fetch(ixold, a, b, f1, '<', 1, *pflags);
1119 		if (a <= b && c <= d && format == D_NORMAL)
1120 			puts("---");
1121 	}
1122 	i = fetch(ixnew, c, d, f2, format == D_NORMAL ? '>' : '\0', 0, *pflags);
1123 	if (i != 0 && format == D_EDIT) {
1124 		/*
1125 		 * A non-zero return value for D_EDIT indicates that the
1126 		 * last line printed was a bare dot (".") that has been
1127 		 * escaped as ".." to prevent ed(1) from misinterpreting
1128 		 * it.  We have to add a substitute command to change this
1129 		 * back and restart where we left off.
1130 		 */
1131 		puts(".");
1132 		printf("%ds/^\\.\\././\n", a);
1133 		a += i;
1134 		c += i;
1135 		goto restart;
1136 	}
1137 	if ((format == D_EDIT || format == D_REVERSE) && c <= d)
1138 		puts(".");
1139 	if (inifdef) {
1140 		printf("#endif /* %s */\n", ifdefname);
1141 		inifdef = 0;
1142 	}
1143 }
1144 
1145 static int
1146 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1147 {
1148 	int i, j, c, lastc, col, nc;
1149 
1150 	/*
1151 	 * When doing #ifdef's, copy down to current line
1152 	 * if this is the first file, so that stuff makes it to output.
1153 	 */
1154 	if (format == D_IFDEF && oldfile) {
1155 		long curpos = ftell(lb);
1156 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1157 		nc = f[a > b ? b : a - 1] - curpos;
1158 		for (i = 0; i < nc; i++)
1159 			putchar(getc(lb));
1160 	}
1161 	if (a > b)
1162 		return (0);
1163 	if (format == D_IFDEF) {
1164 		if (inifdef) {
1165 			printf("#else /* %s%s */\n",
1166 			    oldfile == 1 ? "!" : "", ifdefname);
1167 		} else {
1168 			if (oldfile)
1169 				printf("#ifndef %s\n", ifdefname);
1170 			else
1171 				printf("#ifdef %s\n", ifdefname);
1172 		}
1173 		inifdef = 1 + oldfile;
1174 	}
1175 	for (i = a; i <= b; i++) {
1176 		fseek(lb, f[i - 1], SEEK_SET);
1177 		nc = f[i] - f[i - 1];
1178 		if (format != D_IFDEF && ch != '\0') {
1179 			putchar(ch);
1180 			if (Tflag && (format == D_NORMAL || format == D_CONTEXT
1181 			    || format == D_UNIFIED))
1182 				putchar('\t');
1183 			else if (format != D_UNIFIED)
1184 				putchar(' ');
1185 		}
1186 		col = 0;
1187 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1188 			if ((c = getc(lb)) == EOF) {
1189 				if (format == D_EDIT || format == D_REVERSE ||
1190 				    format == D_NREVERSE)
1191 					warnx("No newline at end of file");
1192 				else
1193 					puts("\n\\ No newline at end of file");
1194 				return (0);
1195 			}
1196 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1197 				do {
1198 					putchar(' ');
1199 				} while (++col & 7);
1200 			} else {
1201 				if (format == D_EDIT && j == 1 && c == '\n'
1202 				    && lastc == '.') {
1203 					/*
1204 					 * Don't print a bare "." line
1205 					 * since that will confuse ed(1).
1206 					 * Print ".." instead and return,
1207 					 * giving the caller an offset
1208 					 * from which to restart.
1209 					 */
1210 					puts(".");
1211 					return (i - a + 1);
1212 				}
1213 				putchar(c);
1214 				col++;
1215 			}
1216 		}
1217 	}
1218 	return (0);
1219 }
1220 
1221 /*
1222  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1223  */
1224 static int
1225 readhash(FILE *f, int flags)
1226 {
1227 	int i, t, space;
1228 	int sum;
1229 
1230 	sum = 1;
1231 	space = 0;
1232 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1233 		if (flags & D_IGNORECASE)
1234 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1235 				if (t == EOF) {
1236 					if (i == 0)
1237 						return (0);
1238 					break;
1239 				}
1240 				sum = sum * 127 + chrtran[t];
1241 			}
1242 		else
1243 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1244 				if (t == EOF) {
1245 					if (i == 0)
1246 						return (0);
1247 					break;
1248 				}
1249 				sum = sum * 127 + t;
1250 			}
1251 	} else {
1252 		for (i = 0;;) {
1253 			switch (t = getc(f)) {
1254 			case '\t':
1255 			case '\r':
1256 			case '\v':
1257 			case '\f':
1258 			case ' ':
1259 				space++;
1260 				continue;
1261 			default:
1262 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1263 					i++;
1264 					space = 0;
1265 				}
1266 				sum = sum * 127 + chrtran[t];
1267 				i++;
1268 				continue;
1269 			case EOF:
1270 				if (i == 0)
1271 					return (0);
1272 				/* FALLTHROUGH */
1273 			case '\n':
1274 				break;
1275 			}
1276 			break;
1277 		}
1278 	}
1279 	/*
1280 	 * There is a remote possibility that we end up with a zero sum.
1281 	 * Zero is used as an EOF marker, so return 1 instead.
1282 	 */
1283 	return (sum == 0 ? 1 : sum);
1284 }
1285 
1286 static int
1287 asciifile(FILE *f)
1288 {
1289 	unsigned char buf[BUFSIZ];
1290 	size_t i, cnt;
1291 
1292 	if (f == NULL)
1293 		return (1);
1294 
1295 	rewind(f);
1296 	cnt = fread(buf, 1, sizeof(buf), f);
1297 	for (i = 0; i < cnt; i++)
1298 		if (!isprint(buf[i]) && !isspace(buf[i]))
1299 			return (0);
1300 	return (1);
1301 }
1302 
1303 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1304 
1305 static char *
1306 match_function(const long *f, int pos, FILE *fp)
1307 {
1308 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1309 	size_t nc;
1310 	int last = lastline;
1311 	char *state = NULL;
1312 
1313 	lastline = pos;
1314 	while (pos > last) {
1315 		fseek(fp, f[pos - 1], SEEK_SET);
1316 		nc = f[pos] - f[pos - 1];
1317 		if (nc >= sizeof(buf))
1318 			nc = sizeof(buf) - 1;
1319 		nc = fread(buf, 1, nc, fp);
1320 		if (nc > 0) {
1321 			buf[nc] = '\0';
1322 			buf[strcspn(buf, "\n")] = '\0';
1323 
1324 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1325 				if (begins_with(buf, "private:")) {
1326 					if (!state)
1327 						state = " (private)";
1328 				} else if (begins_with(buf, "protected:")) {
1329 					if (!state)
1330 						state = " (protected)";
1331 				} else if (begins_with(buf, "public:")) {
1332 					if (!state)
1333 						state = " (public)";
1334 				} else {
1335 					strlcpy(lastbuf, buf, sizeof lastbuf);
1336 					if (state)
1337 						strlcat(lastbuf, state,
1338 						    sizeof lastbuf);
1339 					lastmatchline = pos;
1340 					return lastbuf;
1341 				}
1342 			}
1343 		}
1344 		pos--;
1345 	}
1346 	return lastmatchline > 0 ? lastbuf : NULL;
1347 }
1348 
1349 /* dump accumulated "context" diff changes */
1350 static void
1351 dump_context_vec(FILE *f1, FILE *f2, int flags)
1352 {
1353 	struct context_vec *cvp = context_vec_start;
1354 	int lowa, upb, lowc, upd, do_output;
1355 	int a, b, c, d;
1356 	char ch, *f;
1357 
1358 	if (context_vec_start > context_vec_ptr)
1359 		return;
1360 
1361 	b = d = 0;		/* gcc */
1362 	lowa = MAX(1, cvp->a - context);
1363 	upb = MIN(len[0], context_vec_ptr->b + context);
1364 	lowc = MAX(1, cvp->c - context);
1365 	upd = MIN(len[1], context_vec_ptr->d + context);
1366 
1367 	printf("***************");
1368 	if ((flags & D_PROTOTYPE)) {
1369 		f = match_function(ixold, lowa-1, f1);
1370 		if (f != NULL) {
1371 			putchar(' ');
1372 			fputs(f, stdout);
1373 		}
1374 	}
1375 	printf("\n*** ");
1376 	range(lowa, upb, ",");
1377 	printf(" ****\n");
1378 
1379 	/*
1380 	 * Output changes to the "old" file.  The first loop suppresses
1381 	 * output if there were no changes to the "old" file (we'll see
1382 	 * the "old" lines as context in the "new" list).
1383 	 */
1384 	do_output = 0;
1385 	for (; cvp <= context_vec_ptr; cvp++)
1386 		if (cvp->a <= cvp->b) {
1387 			cvp = context_vec_start;
1388 			do_output++;
1389 			break;
1390 		}
1391 	if (do_output) {
1392 		while (cvp <= context_vec_ptr) {
1393 			a = cvp->a;
1394 			b = cvp->b;
1395 			c = cvp->c;
1396 			d = cvp->d;
1397 
1398 			if (a <= b && c <= d)
1399 				ch = 'c';
1400 			else
1401 				ch = (a <= b) ? 'd' : 'a';
1402 
1403 			if (ch == 'a')
1404 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1405 			else {
1406 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1407 				fetch(ixold, a, b, f1,
1408 				    ch == 'c' ? '!' : '-', 0, flags);
1409 			}
1410 			lowa = b + 1;
1411 			cvp++;
1412 		}
1413 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1414 	}
1415 	/* output changes to the "new" file */
1416 	printf("--- ");
1417 	range(lowc, upd, ",");
1418 	printf(" ----\n");
1419 
1420 	do_output = 0;
1421 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1422 		if (cvp->c <= cvp->d) {
1423 			cvp = context_vec_start;
1424 			do_output++;
1425 			break;
1426 		}
1427 	if (do_output) {
1428 		while (cvp <= context_vec_ptr) {
1429 			a = cvp->a;
1430 			b = cvp->b;
1431 			c = cvp->c;
1432 			d = cvp->d;
1433 
1434 			if (a <= b && c <= d)
1435 				ch = 'c';
1436 			else
1437 				ch = (a <= b) ? 'd' : 'a';
1438 
1439 			if (ch == 'd')
1440 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1441 			else {
1442 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1443 				fetch(ixnew, c, d, f2,
1444 				    ch == 'c' ? '!' : '+', 0, flags);
1445 			}
1446 			lowc = d + 1;
1447 			cvp++;
1448 		}
1449 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1450 	}
1451 	context_vec_ptr = context_vec_start - 1;
1452 }
1453 
1454 /* dump accumulated "unified" diff changes */
1455 static void
1456 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1457 {
1458 	struct context_vec *cvp = context_vec_start;
1459 	int lowa, upb, lowc, upd;
1460 	int a, b, c, d;
1461 	char ch, *f;
1462 
1463 	if (context_vec_start > context_vec_ptr)
1464 		return;
1465 
1466 	b = d = 0;		/* gcc */
1467 	lowa = MAX(1, cvp->a - context);
1468 	upb = MIN(len[0], context_vec_ptr->b + context);
1469 	lowc = MAX(1, cvp->c - context);
1470 	upd = MIN(len[1], context_vec_ptr->d + context);
1471 
1472 	fputs("@@ -", stdout);
1473 	uni_range(lowa, upb);
1474 	fputs(" +", stdout);
1475 	uni_range(lowc, upd);
1476 	fputs(" @@", stdout);
1477 	if ((flags & D_PROTOTYPE)) {
1478 		f = match_function(ixold, lowa-1, f1);
1479 		if (f != NULL) {
1480 			putchar(' ');
1481 			fputs(f, stdout);
1482 		}
1483 	}
1484 	putchar('\n');
1485 
1486 	/*
1487 	 * Output changes in "unified" diff format--the old and new lines
1488 	 * are printed together.
1489 	 */
1490 	for (; cvp <= context_vec_ptr; cvp++) {
1491 		a = cvp->a;
1492 		b = cvp->b;
1493 		c = cvp->c;
1494 		d = cvp->d;
1495 
1496 		/*
1497 		 * c: both new and old changes
1498 		 * d: only changes in the old file
1499 		 * a: only changes in the new file
1500 		 */
1501 		if (a <= b && c <= d)
1502 			ch = 'c';
1503 		else
1504 			ch = (a <= b) ? 'd' : 'a';
1505 
1506 		switch (ch) {
1507 		case 'c':
1508 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1509 			fetch(ixold, a, b, f1, '-', 0, flags);
1510 			fetch(ixnew, c, d, f2, '+', 0, flags);
1511 			break;
1512 		case 'd':
1513 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1514 			fetch(ixold, a, b, f1, '-', 0, flags);
1515 			break;
1516 		case 'a':
1517 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1518 			fetch(ixnew, c, d, f2, '+', 0, flags);
1519 			break;
1520 		}
1521 		lowa = b + 1;
1522 		lowc = d + 1;
1523 	}
1524 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1525 
1526 	context_vec_ptr = context_vec_start - 1;
1527 }
1528 
1529 static void
1530 print_header(const char *file1, const char *file2)
1531 {
1532 	if (label[0] != NULL)
1533 		printf("%s %s\n", format == D_CONTEXT ? "***" : "---",
1534 		    label[0]);
1535 	else
1536 		printf("%s %s\t%s", format == D_CONTEXT ? "***" : "---",
1537 		    file1, ctime(&stb1.st_mtime));
1538 	if (label[1] != NULL)
1539 		printf("%s %s\n", format == D_CONTEXT ? "---" : "+++",
1540 		    label[1]);
1541 	else
1542 		printf("%s %s\t%s", format == D_CONTEXT ? "---" : "+++",
1543 		    file2, ctime(&stb2.st_mtime));
1544 }
1545