xref: /minix3/minix/usr.bin/diff/diffreg.c (revision dc2c582f364d955cff3a86092716702b099d2db2)
1 /*	$OpenBSD: diffreg.c,v 1.74 2010/03/22 19:33:19 schwarze Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/param.h>
68 #include <sys/stat.h>
69 #include <sys/wait.h>
70 
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <stddef.h>
76 #include <stdio.h>
77 #include <stdlib.h>
78 #include <string.h>
79 #include <unistd.h>
80 #include <string.h>
81 #include <time.h>
82 
83 #include "diff.h"
84 #include "pathnames.h"
85 #include "xmalloc.h"
86 
87 /*
88  * diff - compare two files.
89  */
90 
91 /*
92  *	Uses an algorithm due to Harold Stone, which finds
93  *	a pair of longest identical subsequences in the two
94  *	files.
95  *
96  *	The major goal is to generate the match vector J.
97  *	J[i] is the index of the line in file1 corresponding
98  *	to line i file0. J[i] = 0 if there is no
99  *	such line in file1.
100  *
101  *	Lines are hashed so as to work in core. All potential
102  *	matches are located by sorting the lines of each file
103  *	on the hash (called ``value''). In particular, this
104  *	collects the equivalence classes in file1 together.
105  *	Subroutine equiv replaces the value of each line in
106  *	file0 by the index of the first element of its
107  *	matching equivalence in (the reordered) file1.
108  *	To save space equiv squeezes file1 into a single
109  *	array member in which the equivalence classes
110  *	are simply concatenated, except that their first
111  *	members are flagged by changing sign.
112  *
113  *	Next the indices that point into member are unsorted into
114  *	array class according to the original order of file0.
115  *
116  *	The cleverness lies in routine stone. This marches
117  *	through the lines of file0, developing a vector klist
118  *	of "k-candidates". At step i a k-candidate is a matched
119  *	pair of lines x,y (x in file0 y in file1) such that
120  *	there is a common subsequence of length k
121  *	between the first i lines of file0 and the first y
122  *	lines of file1, but there is no such subsequence for
123  *	any smaller y. x is the earliest possible mate to y
124  *	that occurs in such a subsequence.
125  *
126  *	Whenever any of the members of the equivalence class of
127  *	lines in file1 matable to a line in file0 has serial number
128  *	less than the y of some k-candidate, that k-candidate
129  *	with the smallest such y is replaced. The new
130  *	k-candidate is chained (via pred) to the current
131  *	k-1 candidate so that the actual subsequence can
132  *	be recovered. When a member has serial number greater
133  *	that the y of all k-candidates, the klist is extended.
134  *	At the end, the longest subsequence is pulled out
135  *	and placed in the array J by unravel
136  *
137  *	With J in hand, the matches there recorded are
138  *	check'ed against reality to assure that no spurious
139  *	matches have crept in due to hashing. If they have,
140  *	they are broken, and "jackpot" is recorded--a harmless
141  *	matter except that a true match for a spuriously
142  *	mated line may now be unnecessarily reported as a change.
143  *
144  *	Much of the complexity of the program comes simply
145  *	from trying to minimize core utilization and
146  *	maximize the range of doable problems by dynamically
147  *	allocating what is needed and reusing what is not.
148  *	The core requirements for problems larger than somewhat
149  *	are (in words) 2*length(file0) + length(file1) +
150  *	3*(number of k-candidates installed),  typically about
151  *	6n words for files of length n.
152  */
153 
154 struct cand {
155 	int	x;
156 	int	y;
157 	int	pred;
158 };
159 
160 struct line {
161 	int	serial;
162 	int	value;
163 } *file[2];
164 
165 /*
166  * The following struct is used to record change information when
167  * doing a "context" or "unified" diff.  (see routine "change" to
168  * understand the highly mnemonic field names)
169  */
170 struct context_vec {
171 	int	a;		/* start line in old file */
172 	int	b;		/* end line in old file */
173 	int	c;		/* start line in new file */
174 	int	d;		/* end line in new file */
175 };
176 
177 static FILE	*opentemp(const char *);
178 static void	 output(char *, FILE *, char *, FILE *, int);
179 static void	 check(char *, FILE *, char *, FILE *, int);
180 static void	 range(int, int, char *);
181 static void	 uni_range(int, int);
182 static void	 dump_context_vec(FILE *, FILE *, int);
183 static void	 dump_unified_vec(FILE *, FILE *, int);
184 static void	 prepare(int, FILE *, off_t, int);
185 static void	 prune(void);
186 static void	 equiv(struct line *, int, struct line *, int, int *);
187 static void	 unravel(int);
188 static void	 unsort(struct line *, int, int *);
189 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
190 static void	 sort(struct line *, int);
191 static void	 print_header(const char *, const char *);
192 static int	 ignoreline(char *);
193 static int	 asciifile(FILE *);
194 static int	 fetch(long *, int, int, FILE *, int, int, int);
195 static int	 newcand(int, int, int);
196 static int	 search(int *, int, int);
197 static int	 skipline(FILE *);
198 static int	 isqrt(int);
199 static int	 stone(int *, int, int *, int *, int);
200 static int	 readhash(FILE *, int);
201 static int	 files_differ(FILE *, FILE *, int);
202 static char	*match_function(const long *, int, FILE *);
203 static char	*preadline(int, size_t, off_t);
204 
205 static int  *J;			/* will be overlaid on class */
206 static int  *class;		/* will be overlaid on file[0] */
207 static int  *klist;		/* will be overlaid on file[0] after class */
208 static int  *member;		/* will be overlaid on file[1] */
209 static int   clen;
210 static int   inifdef;		/* whether or not we are in a #ifdef block */
211 static int   len[2];
212 static int   pref, suff;	/* length of prefix and suffix */
213 static int   slen[2];
214 static int   anychange;
215 static long *ixnew;		/* will be overlaid on file[1] */
216 static long *ixold;		/* will be overlaid on klist */
217 static struct cand *clist;	/* merely a free storage pot for candidates */
218 static int   clistlen;		/* the length of clist */
219 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
220 static u_char *chrtran;		/* translation table for case-folding */
221 static struct context_vec *context_vec_start;
222 static struct context_vec *context_vec_end;
223 static struct context_vec *context_vec_ptr;
224 
225 #define FUNCTION_CONTEXT_SIZE	55
226 static char lastbuf[FUNCTION_CONTEXT_SIZE];
227 static int lastline;
228 static int lastmatchline;
229 
230 
231 /*
232  * chrtran points to one of 2 translation tables: cup2low if folding upper to
233  * lower case clow2low if not folding case
234  */
235 u_char clow2low[256] = {
236 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
237 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
238 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
239 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
240 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
241 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
242 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
243 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
244 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
245 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
246 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
247 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
248 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
249 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
250 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
251 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
252 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
253 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
254 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
255 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
256 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
257 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
258 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
259 	0xfd, 0xfe, 0xff
260 };
261 
262 u_char cup2low[256] = {
263 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
264 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
265 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
266 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
267 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
268 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
269 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
270 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
271 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
272 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
273 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
274 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
275 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
276 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
277 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
278 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
279 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
280 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
281 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
282 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
283 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
284 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
285 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
286 	0xfd, 0xfe, 0xff
287 };
288 
289 int
290 diffreg(char *file1, char *file2, int flags)
291 {
292 	FILE *f1, *f2;
293 	int i, rval, ostdout = -1;
294 	pid_t pid = -1;
295 
296 	f1 = f2 = NULL;
297 	rval = D_SAME;
298 	anychange = 0;
299 	lastline = 0;
300 	lastmatchline = 0;
301 	context_vec_ptr = context_vec_start - 1;
302 	if (flags & D_IGNORECASE)
303 		chrtran = cup2low;
304 	else
305 		chrtran = clow2low;
306 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
307 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
308 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
309 		goto closem;
310 
311 	if (flags & D_EMPTY1)
312 		f1 = fopen(_PATH_DEVNULL, "r");
313 	else {
314 		if (!S_ISREG(stb1.st_mode)) {
315 			if ((f1 = opentemp(file1)) == NULL ||
316 			    fstat(fileno(f1), &stb1) < 0) {
317 				warn("%s", file1);
318 				status |= 2;
319 				goto closem;
320 			}
321 		} else if (strcmp(file1, "-") == 0)
322 			f1 = stdin;
323 		else
324 			f1 = fopen(file1, "r");
325 	}
326 	if (f1 == NULL) {
327 		warn("%s", file1);
328 		status |= 2;
329 		goto closem;
330 	}
331 
332 	if (flags & D_EMPTY2)
333 		f2 = fopen(_PATH_DEVNULL, "r");
334 	else {
335 		if (!S_ISREG(stb2.st_mode)) {
336 			if ((f2 = opentemp(file2)) == NULL ||
337 			    fstat(fileno(f2), &stb2) < 0) {
338 				warn("%s", file2);
339 				status |= 2;
340 				goto closem;
341 			}
342 		} else if (strcmp(file2, "-") == 0)
343 			f2 = stdin;
344 		else
345 			f2 = fopen(file2, "r");
346 	}
347 	if (f2 == NULL) {
348 		warn("%s", file2);
349 		status |= 2;
350 		goto closem;
351 	}
352 
353 	switch (files_differ(f1, f2, flags)) {
354 	case 0:
355 		goto closem;
356 	case 1:
357 		break;
358 	default:
359 		/* error */
360 		status |= 2;
361 		goto closem;
362 	}
363 
364 	if ((flags & D_FORCEASCII) == 0 &&
365 	    (!asciifile(f1) || !asciifile(f2))) {
366 		rval = D_BINARY;
367 		status |= 1;
368 		goto closem;
369 	}
370 	if (lflag) {
371 		/* redirect stdout to pr */
372 		int pfd[2];
373 		char *header;
374 		int len;
375 		char *prargv[] = { "pr", "-h", NULL, "-f", NULL };
376 
377 		len = strlen(diffargs) + strlen(file1) + strlen(file2) + 10;
378 		if(!(header = malloc(len)))
379 			errx(1, "diffreg can't alloc");
380 		snprintf(header, len, "%s %s %s", diffargs, file1, file2);
381 		prargv[2] = header;
382 		fflush(stdout);
383 		rewind(stdout);
384 		pipe(pfd);
385 		switch ((pid = fork())) {
386 		case -1:
387 			warnx("No more processes");
388 			status |= 2;
389 			xfree(header);
390 			return (D_ERROR);
391 		case 0:
392 			/* child */
393 			if (pfd[0] != STDIN_FILENO) {
394 				dup2(pfd[0], STDIN_FILENO);
395 				close(pfd[0]);
396 			}
397 			close(pfd[1]);
398 			execv(_PATH_PR, prargv);
399 			_exit(127);
400 		default:
401 			/* parent */
402 			if (pfd[1] != STDOUT_FILENO) {
403 				ostdout = dup(STDOUT_FILENO);
404 				dup2(pfd[1], STDOUT_FILENO);
405 				close(pfd[1]);
406 			}
407 			close(pfd[0]);
408 			rewind(stdout);
409 			xfree(header);
410 		}
411 	}
412 	prepare(0, f1, stb1.st_size, flags);
413 	prepare(1, f2, stb2.st_size, flags);
414 
415 	prune();
416 	sort(sfile[0], slen[0]);
417 	sort(sfile[1], slen[1]);
418 
419 	member = (int *)file[1];
420 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
421 	member = xrealloc(member, slen[1] + 2, sizeof(*member));
422 
423 	class = (int *)file[0];
424 	unsort(sfile[0], slen[0], class);
425 	class = xrealloc(class, slen[0] + 2, sizeof(*class));
426 
427 	klist = xcalloc(slen[0] + 2, sizeof(*klist));
428 	clen = 0;
429 	clistlen = 100;
430 	clist = xcalloc(clistlen, sizeof(*clist));
431 	i = stone(class, slen[0], member, klist, flags);
432 	xfree(member);
433 	xfree(class);
434 
435 	J = xrealloc(J, len[0] + 2, sizeof(*J));
436 	unravel(klist[i]);
437 	xfree(clist);
438 	xfree(klist);
439 
440 	ixold = xrealloc(ixold, len[0] + 2, sizeof(*ixold));
441 	ixnew = xrealloc(ixnew, len[1] + 2, sizeof(*ixnew));
442 	check(file1, f1, file2, f2, flags);
443 	output(file1, f1, file2, f2, flags);
444 	if (ostdout != -1) {
445 		int wstatus;
446 
447 		/* close the pipe to pr and restore stdout */
448 		fflush(stdout);
449 		rewind(stdout);
450 		if (ostdout != STDOUT_FILENO) {
451 			close(STDOUT_FILENO);
452 			dup2(ostdout, STDOUT_FILENO);
453 			close(ostdout);
454 		}
455 		waitpid(pid, &wstatus, 0);
456 	}
457 closem:
458 	if (anychange) {
459 		status |= 1;
460 		if (rval == D_SAME)
461 			rval = D_DIFFER;
462 	}
463 	if (f1 != NULL)
464 		fclose(f1);
465 	if (f2 != NULL)
466 		fclose(f2);
467 
468 	return (rval);
469 }
470 
471 /*
472  * Check to see if the given files differ.
473  * Returns 0 if they are the same, 1 if different, and -1 on error.
474  * XXX - could use code from cmp(1) [faster]
475  */
476 static int
477 files_differ(FILE *f1, FILE *f2, int flags)
478 {
479 	char buf1[BUFSIZ], buf2[BUFSIZ];
480 	size_t i, j;
481 
482 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
483 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
484 		return (1);
485 	for (;;) {
486 		i = fread(buf1, 1, sizeof(buf1), f1);
487 		j = fread(buf2, 1, sizeof(buf2), f2);
488 		if (i != j)
489 			return (1);
490 		if (i == 0 && j == 0) {
491 			if (ferror(f1) || ferror(f2))
492 				return (1);
493 			return (0);
494 		}
495 		if (memcmp(buf1, buf2, i) != 0)
496 			return (1);
497 	}
498 }
499 
500 static FILE *
501 opentemp(const char *file)
502 {
503 	char buf[BUFSIZ], *tempdir, tempfile[MAXPATHLEN];
504 	ssize_t nread;
505 	int ifd, ofd;
506 
507 	if (strcmp(file, "-") == 0)
508 		ifd = STDIN_FILENO;
509 	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
510 		return (NULL);
511 
512 	if ((tempdir = getenv("TMPDIR")) == NULL)
513 		tempdir = _PATH_TMP;
514 
515 	if (strlcpy(tempfile, tempdir, sizeof(tempfile)) >= sizeof(tempfile) ||
516 	    strlcat(tempfile, "/diff.XXXXXXXX", sizeof(tempfile)) >=
517 	    sizeof(tempfile)) {
518 		close(ifd);
519 		errno = ENAMETOOLONG;
520 		return (NULL);
521 	}
522 
523 	if ((ofd = mkstemp(tempfile)) < 0) {
524 		close(ifd);
525 		return (NULL);
526 	}
527 	unlink(tempfile);
528 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
529 		if (write(ofd, buf, nread) != nread) {
530 			close(ifd);
531 			close(ofd);
532 			return (NULL);
533 		}
534 	}
535 	close(ifd);
536 	lseek(ofd, (off_t)0, SEEK_SET);
537 	return (fdopen(ofd, "r"));
538 }
539 
540 char *
541 splice(char *dir, char *file)
542 {
543 	char *tail, *buf;
544 	int len;
545 
546 	if ((tail = strrchr(file, '/')) == NULL)
547 		tail = file;
548 	else
549 		tail++;
550 	len = strlen(dir) + strlen(tail) + 1;
551 	if(!(buf = malloc(len)))
552 		errx(1, "splice can't alloc");
553 	snprintf(buf, len, "%s/%s", dir, tail);
554 	return (buf);
555 }
556 
557 static void
558 prepare(int i, FILE *fd, off_t filesize, int flags)
559 {
560 	struct line *p;
561 	int j, h;
562 	size_t sz;
563 
564 	rewind(fd);
565 
566 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
567 	if (sz < 100)
568 		sz = 100;
569 
570 	p = xcalloc(sz + 3, sizeof(*p));
571 	for (j = 0; (h = readhash(fd, flags));) {
572 		if (j == sz) {
573 			sz = sz * 3 / 2;
574 			p = xrealloc(p, sz + 3, sizeof(*p));
575 		}
576 		p[++j].value = h;
577 	}
578 	len[i] = j;
579 	file[i] = p;
580 }
581 
582 static void
583 prune(void)
584 {
585 	int i, j;
586 
587 	for (pref = 0; pref < len[0] && pref < len[1] &&
588 	    file[0][pref + 1].value == file[1][pref + 1].value;
589 	    pref++)
590 		;
591 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
592 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
593 	    suff++)
594 		;
595 	for (j = 0; j < 2; j++) {
596 		sfile[j] = file[j] + pref;
597 		slen[j] = len[j] - pref - suff;
598 		for (i = 0; i <= slen[j]; i++)
599 			sfile[j][i].serial = i;
600 	}
601 }
602 
603 static void
604 equiv(struct line *a, int n, struct line *b, int m, int *c)
605 {
606 	int i, j;
607 
608 	i = j = 1;
609 	while (i <= n && j <= m) {
610 		if (a[i].value < b[j].value)
611 			a[i++].value = 0;
612 		else if (a[i].value == b[j].value)
613 			a[i++].value = j;
614 		else
615 			j++;
616 	}
617 	while (i <= n)
618 		a[i++].value = 0;
619 	b[m + 1].value = 0;
620 	j = 0;
621 	while (++j <= m) {
622 		c[j] = -b[j].serial;
623 		while (b[j + 1].value == b[j].value) {
624 			j++;
625 			c[j] = b[j].serial;
626 		}
627 	}
628 	c[j] = -1;
629 }
630 
631 /* Code taken from ping.c */
632 static int
633 isqrt(int n)
634 {
635 	int y, x = 1;
636 
637 	if (n == 0)
638 		return (0);
639 
640 	do { /* newton was a stinker */
641 		y = x;
642 		x = n / x;
643 		x += y;
644 		x /= 2;
645 	} while ((x - y) > 1 || (x - y) < -1);
646 
647 	return (x);
648 }
649 
650 static int
651 stone(int *a, int n, int *b, int *c, int flags)
652 {
653 	int i, k, y, j, l;
654 	int oldc, tc, oldl;
655 	u_int numtries;
656 
657 	/* XXX move the isqrt() out of the macro to avoid multiple calls */
658 	const u_int bound = (flags & D_MINIMAL) ? UINT_MAX :
659 	    MAX(256, isqrt(n));
660 
661 	k = 0;
662 	c[0] = newcand(0, 0, 0);
663 	for (i = 1; i <= n; i++) {
664 		j = a[i];
665 		if (j == 0)
666 			continue;
667 		y = -b[j];
668 		oldl = 0;
669 		oldc = c[0];
670 		numtries = 0;
671 		do {
672 			if (y <= clist[oldc].y)
673 				continue;
674 			l = search(c, k, y);
675 			if (l != oldl + 1)
676 				oldc = c[l - 1];
677 			if (l <= k) {
678 				if (clist[c[l]].y <= y)
679 					continue;
680 				tc = c[l];
681 				c[l] = newcand(i, y, oldc);
682 				oldc = tc;
683 				oldl = l;
684 				numtries++;
685 			} else {
686 				c[l] = newcand(i, y, oldc);
687 				k++;
688 				break;
689 			}
690 		} while ((y = b[++j]) > 0 && numtries < bound);
691 	}
692 	return (k);
693 }
694 
695 static int
696 newcand(int x, int y, int pred)
697 {
698 	struct cand *q;
699 
700 	if (clen == clistlen) {
701 		clistlen = clistlen * 11 / 10;
702 		clist = xrealloc(clist, clistlen, sizeof(*clist));
703 	}
704 	q = clist + clen;
705 	q->x = x;
706 	q->y = y;
707 	q->pred = pred;
708 	return (clen++);
709 }
710 
711 static int
712 search(int *c, int k, int y)
713 {
714 	int i, j, l, t;
715 
716 	if (clist[c[k]].y < y)	/* quick look for typical case */
717 		return (k + 1);
718 	i = 0;
719 	j = k + 1;
720 	for (;;) {
721 		l = (i + j) / 2;
722 		if (l <= i)
723 			break;
724 		t = clist[c[l]].y;
725 		if (t > y)
726 			j = l;
727 		else if (t < y)
728 			i = l;
729 		else
730 			return (l);
731 	}
732 	return (l + 1);
733 }
734 
735 static void
736 unravel(int p)
737 {
738 	struct cand *q;
739 	int i;
740 
741 	for (i = 0; i <= len[0]; i++)
742 		J[i] = i <= pref ? i :
743 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
744 	for (q = clist + p; q->y != 0; q = clist + q->pred)
745 		J[q->x + pref] = q->y + pref;
746 }
747 
748 /*
749  * Check does double duty:
750  *  1.	ferret out any fortuitous correspondences due
751  *	to confounding by hashing (which result in "jackpot")
752  *  2.  collect random access indexes to the two files
753  */
754 static void
755 check(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
756 {
757 	int i, j, jackpot, c, d;
758 	long ctold, ctnew;
759 
760 	rewind(f1);
761 	rewind(f2);
762 	j = 1;
763 	ixold[0] = ixnew[0] = 0;
764 	jackpot = 0;
765 	ctold = ctnew = 0;
766 	for (i = 1; i <= len[0]; i++) {
767 		if (J[i] == 0) {
768 			ixold[i] = ctold += skipline(f1);
769 			continue;
770 		}
771 		while (j < J[i]) {
772 			ixnew[j] = ctnew += skipline(f2);
773 			j++;
774 		}
775 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
776 			for (;;) {
777 				c = getc(f1);
778 				d = getc(f2);
779 				/*
780 				 * GNU diff ignores a missing newline
781 				 * in one file for -b or -w.
782 				 */
783 				if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) &&
784 				    ((c == EOF && d == '\n') ||
785 				    (c == '\n' && d == EOF))) {
786 					break;
787 				}
788 				ctold++;
789 				ctnew++;
790 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
791 				    isspace(d)) {
792 					do {
793 						if (c == '\n')
794 							break;
795 						ctold++;
796 					} while (isspace(c = getc(f1)));
797 					do {
798 						if (d == '\n')
799 							break;
800 						ctnew++;
801 					} while (isspace(d = getc(f2)));
802 				} else if ((flags & D_IGNOREBLANKS)) {
803 					while (isspace(c) && c != '\n') {
804 						c = getc(f1);
805 						ctold++;
806 					}
807 					while (isspace(d) && d != '\n') {
808 						d = getc(f2);
809 						ctnew++;
810 					}
811 				}
812 				if (chrtran[c] != chrtran[d]) {
813 					jackpot++;
814 					J[i] = 0;
815 					if (c != '\n' && c != EOF)
816 						ctold += skipline(f1);
817 					if (d != '\n' && c != EOF)
818 						ctnew += skipline(f2);
819 					break;
820 				}
821 				if (c == '\n' || c == EOF)
822 					break;
823 			}
824 		} else {
825 			for (;;) {
826 				ctold++;
827 				ctnew++;
828 				if ((c = getc(f1)) != (d = getc(f2))) {
829 					/* jackpot++; */
830 					J[i] = 0;
831 					if (c != '\n' && c != EOF)
832 						ctold += skipline(f1);
833 					if (d != '\n' && c != EOF)
834 						ctnew += skipline(f2);
835 					break;
836 				}
837 				if (c == '\n' || c == EOF)
838 					break;
839 			}
840 		}
841 		ixold[i] = ctold;
842 		ixnew[j] = ctnew;
843 		j++;
844 	}
845 	for (; j <= len[1]; j++)
846 		ixnew[j] = ctnew += skipline(f2);
847 	/*
848 	 * if (jackpot)
849 	 *	fprintf(stderr, "jackpot\n");
850 	 */
851 }
852 
853 /* shellsort CACM #201 */
854 static void
855 sort(struct line *a, int n)
856 {
857 	struct line *ai, *aim, w;
858 	int j, m = 0, k;
859 
860 	if (n == 0)
861 		return;
862 	for (j = 1; j <= n; j *= 2)
863 		m = 2 * j - 1;
864 	for (m /= 2; m != 0; m /= 2) {
865 		k = n - m;
866 		for (j = 1; j <= k; j++) {
867 			for (ai = &a[j]; ai > a; ai -= m) {
868 				aim = &ai[m];
869 				if (aim < ai)
870 					break;	/* wraparound */
871 				if (aim->value > ai[0].value ||
872 				    (aim->value == ai[0].value &&
873 					aim->serial > ai[0].serial))
874 					break;
875 				w.value = ai[0].value;
876 				ai[0].value = aim->value;
877 				aim->value = w.value;
878 				w.serial = ai[0].serial;
879 				ai[0].serial = aim->serial;
880 				aim->serial = w.serial;
881 			}
882 		}
883 	}
884 }
885 
886 static void
887 unsort(struct line *f, int l, int *b)
888 {
889 	int *a, i;
890 
891 	a = xcalloc(l + 1, sizeof(*a));
892 	for (i = 1; i <= l; i++)
893 		a[f[i].serial] = f[i].value;
894 	for (i = 1; i <= l; i++)
895 		b[i] = a[i];
896 	xfree(a);
897 }
898 
899 static int
900 skipline(FILE *f)
901 {
902 	int i, c;
903 
904 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
905 		continue;
906 	return (i);
907 }
908 
909 static void
910 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
911 {
912 	int m, i0, i1, j0, j1;
913 
914 	rewind(f1);
915 	rewind(f2);
916 	m = len[0];
917 	J[0] = 0;
918 	J[m + 1] = len[1] + 1;
919 	if (diff_format != D_EDIT) {
920 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
921 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
922 				i0++;
923 			j0 = J[i0 - 1] + 1;
924 			i1 = i0 - 1;
925 			while (i1 < m && J[i1 + 1] == 0)
926 				i1++;
927 			j1 = J[i1 + 1] - 1;
928 			J[i1] = j1;
929 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
930 		}
931 	} else {
932 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
933 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
934 				i0--;
935 			j0 = J[i0 + 1] - 1;
936 			i1 = i0 + 1;
937 			while (i1 > 1 && J[i1 - 1] == 0)
938 				i1--;
939 			j1 = J[i1 - 1] + 1;
940 			J[i1] = j1;
941 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
942 		}
943 	}
944 	if (m == 0)
945 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
946 	if (diff_format == D_IFDEF) {
947 		for (;;) {
948 #define	c i0
949 			if ((c = getc(f1)) == EOF)
950 				return;
951 			putchar(c);
952 		}
953 #undef c
954 	}
955 	if (anychange != 0) {
956 		if (diff_format == D_CONTEXT)
957 			dump_context_vec(f1, f2, flags);
958 		else if (diff_format == D_UNIFIED)
959 			dump_unified_vec(f1, f2, flags);
960 	}
961 }
962 
963 static void
964 range(int a, int b, char *separator)
965 {
966 	printf("%d", a > b ? b : a);
967 	if (a < b)
968 		printf("%s%d", separator, b);
969 }
970 
971 static void
972 uni_range(int a, int b)
973 {
974 	if (a < b)
975 		printf("%d,%d", a, b - a + 1);
976 	else if (a == b)
977 		printf("%d", b);
978 	else
979 		printf("%d,0", b);
980 }
981 
982 static char *
983 preadline(int fd, size_t rlen, off_t off)
984 {
985 	char *line;
986 	ssize_t nr;
987 
988 	line = xmalloc(rlen + 1);
989 	if ((nr = pread(fd, line, rlen, off)) < 0)
990 		err(1, "preadline");
991 	if (nr > 0 && line[nr-1] == '\n')
992 		nr--;
993 	line[nr] = '\0';
994 	return (line);
995 }
996 
997 static int
998 ignoreline(char *line)
999 {
1000 	int ret;
1001 
1002 	ret = regexec(&ignore_re, line, 0, NULL, 0);
1003 	xfree(line);
1004 	return (ret == 0);	/* if it matched, it should be ignored. */
1005 }
1006 
1007 /*
1008  * Indicate that there is a difference between lines a and b of the from file
1009  * to get to lines c to d of the to file.  If a is greater then b then there
1010  * are no lines in the from file involved and this means that there were
1011  * lines appended (beginning at b).  If c is greater than d then there are
1012  * lines missing from the to file.
1013  */
1014 static void
1015 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1016     int *pflags)
1017 {
1018 	static size_t max_context = 64;
1019 	int i;
1020 
1021 restart:
1022 	if (diff_format != D_IFDEF && a > b && c > d)
1023 		return;
1024 	if (ignore_pats != NULL) {
1025 		char *line;
1026 		/*
1027 		 * All lines in the change, insert, or delete must
1028 		 * match an ignore pattern for the change to be
1029 		 * ignored.
1030 		 */
1031 		if (a <= b) {		/* Changes and deletes. */
1032 			for (i = a; i <= b; i++) {
1033 				line = preadline(fileno(f1),
1034 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1035 				if (!ignoreline(line))
1036 					goto proceed;
1037 			}
1038 		}
1039 		if (a > b || c <= d) {	/* Changes and inserts. */
1040 			for (i = c; i <= d; i++) {
1041 				line = preadline(fileno(f2),
1042 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1043 				if (!ignoreline(line))
1044 					goto proceed;
1045 			}
1046 		}
1047 		return;
1048 	}
1049 proceed:
1050 	if (*pflags & D_HEADER) {
1051 		printf("%s %s %s\n", diffargs, file1, file2);
1052 		*pflags &= ~D_HEADER;
1053 	}
1054 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1055 		/*
1056 		 * Allocate change records as needed.
1057 		 */
1058 		if (context_vec_ptr == context_vec_end - 1) {
1059 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1060 			max_context <<= 1;
1061 			context_vec_start = xrealloc(context_vec_start,
1062 			    max_context, sizeof(*context_vec_start));
1063 			context_vec_end = context_vec_start + max_context;
1064 			context_vec_ptr = context_vec_start + offset;
1065 		}
1066 		if (anychange == 0) {
1067 			/*
1068 			 * Print the context/unidiff header first time through.
1069 			 */
1070 			print_header(file1, file2);
1071 			anychange = 1;
1072 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1073 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1074 			/*
1075 			 * If this change is more than 'diff_context' lines from the
1076 			 * previous change, dump the record and reset it.
1077 			 */
1078 			if (diff_format == D_CONTEXT)
1079 				dump_context_vec(f1, f2, *pflags);
1080 			else
1081 				dump_unified_vec(f1, f2, *pflags);
1082 		}
1083 		context_vec_ptr++;
1084 		context_vec_ptr->a = a;
1085 		context_vec_ptr->b = b;
1086 		context_vec_ptr->c = c;
1087 		context_vec_ptr->d = d;
1088 		return;
1089 	}
1090 	if (anychange == 0)
1091 		anychange = 1;
1092 	switch (diff_format) {
1093 	case D_BRIEF:
1094 		return;
1095 	case D_NORMAL:
1096 	case D_EDIT:
1097 		range(a, b, ",");
1098 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1099 		if (diff_format == D_NORMAL)
1100 			range(c, d, ",");
1101 		putchar('\n');
1102 		break;
1103 	case D_REVERSE:
1104 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1105 		range(a, b, " ");
1106 		putchar('\n');
1107 		break;
1108 	case D_NREVERSE:
1109 		if (a > b)
1110 			printf("a%d %d\n", b, d - c + 1);
1111 		else {
1112 			printf("d%d %d\n", a, b - a + 1);
1113 			if (!(c > d))
1114 				/* add changed lines */
1115 				printf("a%d %d\n", b, d - c + 1);
1116 		}
1117 		break;
1118 	}
1119 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1120 		fetch(ixold, a, b, f1, '<', 1, *pflags);
1121 		if (a <= b && c <= d && diff_format == D_NORMAL)
1122 			puts("---");
1123 	}
1124 	i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1125 	if (i != 0 && diff_format == D_EDIT) {
1126 		/*
1127 		 * A non-zero return value for D_EDIT indicates that the
1128 		 * last line printed was a bare dot (".") that has been
1129 		 * escaped as ".." to prevent ed(1) from misinterpreting
1130 		 * it.  We have to add a substitute command to change this
1131 		 * back and restart where we left off.
1132 		 */
1133 		puts(".");
1134 		printf("%ds/^\\.\\././\n", a);
1135 		a += i;
1136 		c += i;
1137 		goto restart;
1138 	}
1139 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1140 		puts(".");
1141 	if (inifdef) {
1142 		printf("#endif /* %s */\n", ifdefname);
1143 		inifdef = 0;
1144 	}
1145 }
1146 
1147 static int
1148 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1149 {
1150 	int i, j, c, lastc, col, nc;
1151 
1152 	/*
1153 	 * When doing #ifdef's, copy down to current line
1154 	 * if this is the first file, so that stuff makes it to output.
1155 	 */
1156 	if (diff_format == D_IFDEF && oldfile) {
1157 		long curpos = ftell(lb);
1158 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1159 		nc = f[a > b ? b : a - 1] - curpos;
1160 		for (i = 0; i < nc; i++)
1161 			putchar(getc(lb));
1162 	}
1163 	if (a > b)
1164 		return (0);
1165 	if (diff_format == D_IFDEF) {
1166 		if (inifdef) {
1167 			printf("#else /* %s%s */\n",
1168 			    oldfile == 1 ? "!" : "", ifdefname);
1169 		} else {
1170 			if (oldfile)
1171 				printf("#ifndef %s\n", ifdefname);
1172 			else
1173 				printf("#ifdef %s\n", ifdefname);
1174 		}
1175 		inifdef = 1 + oldfile;
1176 	}
1177 	for (i = a; i <= b; i++) {
1178 		fseek(lb, f[i - 1], SEEK_SET);
1179 		nc = f[i] - f[i - 1];
1180 		if (diff_format != D_IFDEF && ch != '\0') {
1181 			putchar(ch);
1182 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1183 			    || diff_format == D_UNIFIED))
1184 				putchar('\t');
1185 			else if (diff_format != D_UNIFIED)
1186 				putchar(' ');
1187 		}
1188 		col = 0;
1189 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1190 			if ((c = getc(lb)) == EOF) {
1191 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1192 				    diff_format == D_NREVERSE)
1193 					warnx("No newline at end of file");
1194 				else
1195 					puts("\n\\ No newline at end of file");
1196 				return (0);
1197 			}
1198 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1199 				do {
1200 					putchar(' ');
1201 				} while (++col & 7);
1202 			} else {
1203 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1204 				    && lastc == '.') {
1205 					/*
1206 					 * Don't print a bare "." line
1207 					 * since that will confuse ed(1).
1208 					 * Print ".." instead and return,
1209 					 * giving the caller an offset
1210 					 * from which to restart.
1211 					 */
1212 					puts(".");
1213 					return (i - a + 1);
1214 				}
1215 				putchar(c);
1216 				col++;
1217 			}
1218 		}
1219 	}
1220 	return (0);
1221 }
1222 
1223 /*
1224  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1225  */
1226 static int
1227 readhash(FILE *f, int flags)
1228 {
1229 	int i, t, space;
1230 	int sum;
1231 
1232 	sum = 1;
1233 	space = 0;
1234 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1235 		if (flags & D_IGNORECASE)
1236 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1237 				if (t == EOF) {
1238 					if (i == 0)
1239 						return (0);
1240 					break;
1241 				}
1242 				sum = sum * 127 + chrtran[t];
1243 			}
1244 		else
1245 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1246 				if (t == EOF) {
1247 					if (i == 0)
1248 						return (0);
1249 					break;
1250 				}
1251 				sum = sum * 127 + t;
1252 			}
1253 	} else {
1254 		for (i = 0;;) {
1255 			switch (t = getc(f)) {
1256 			case '\t':
1257 			case '\r':
1258 			case '\v':
1259 			case '\f':
1260 			case ' ':
1261 				space++;
1262 				continue;
1263 			default:
1264 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1265 					i++;
1266 					space = 0;
1267 				}
1268 				sum = sum * 127 + chrtran[t];
1269 				i++;
1270 				continue;
1271 			case EOF:
1272 				if (i == 0)
1273 					return (0);
1274 				/* FALLTHROUGH */
1275 			case '\n':
1276 				break;
1277 			}
1278 			break;
1279 		}
1280 	}
1281 	/*
1282 	 * There is a remote possibility that we end up with a zero sum.
1283 	 * Zero is used as an EOF marker, so return 1 instead.
1284 	 */
1285 	return (sum == 0 ? 1 : sum);
1286 }
1287 
1288 static int
1289 asciifile(FILE *f)
1290 {
1291 	unsigned char buf[BUFSIZ];
1292 	size_t i, cnt;
1293 
1294 	if (f == NULL)
1295 		return (1);
1296 
1297 	rewind(f);
1298 	cnt = fread(buf, 1, sizeof(buf), f);
1299 	for (i = 0; i < cnt; i++)
1300 		if (!isprint(buf[i]) && !isspace(buf[i]))
1301 			return (0);
1302 	return (1);
1303 }
1304 
1305 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1306 
1307 static char *
1308 match_function(const long *f, int pos, FILE *fp)
1309 {
1310 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1311 	size_t nc;
1312 	int last = lastline;
1313 	char *state = NULL;
1314 
1315 	lastline = pos;
1316 	while (pos > last) {
1317 		fseek(fp, f[pos - 1], SEEK_SET);
1318 		nc = f[pos] - f[pos - 1];
1319 		if (nc >= sizeof(buf))
1320 			nc = sizeof(buf) - 1;
1321 		nc = fread((char *) buf, 1, nc, fp);
1322 		if (nc > 0) {
1323 			buf[nc] = '\0';
1324 			buf[strcspn((char *) buf, "\n")] = '\0';
1325 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1326 				if (begins_with((char *)buf, "private:")) {
1327 					if (!state)
1328 						state = " (private)";
1329 				} else if (begins_with((char *)buf, "protected:")) {
1330 					if (!state)
1331 						state = " (protected)";
1332 				} else if (begins_with((char *)buf, "public:")) {
1333 					if (!state)
1334 						state = " (public)";
1335 				} else {
1336 					strlcpy(lastbuf, (char *) buf, sizeof lastbuf);
1337 					if (state)
1338 						strlcat(lastbuf, state,
1339 						    sizeof lastbuf);
1340 					lastmatchline = pos;
1341 					return lastbuf;
1342 				}
1343 			}
1344 		}
1345 		pos--;
1346 	}
1347 	return lastmatchline > 0 ? lastbuf : NULL;
1348 }
1349 
1350 /* dump accumulated "context" diff changes */
1351 static void
1352 dump_context_vec(FILE *f1, FILE *f2, int flags)
1353 {
1354 	struct context_vec *cvp = context_vec_start;
1355 	int lowa, upb, lowc, upd, do_output;
1356 	int a, b, c, d;
1357 	char ch, *f;
1358 
1359 	if (context_vec_start > context_vec_ptr)
1360 		return;
1361 
1362 	b = d = 0;		/* gcc */
1363 	lowa = MAX(1, cvp->a - diff_context);
1364 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1365 	lowc = MAX(1, cvp->c - diff_context);
1366 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1367 
1368 	printf("***************");
1369 	if ((flags & D_PROTOTYPE)) {
1370 		f = match_function(ixold, lowa-1, f1);
1371 		if (f != NULL) {
1372 			putchar(' ');
1373 			fputs(f, stdout);
1374 		}
1375 	}
1376 	printf("\n*** ");
1377 	range(lowa, upb, ",");
1378 	printf(" ****\n");
1379 
1380 	/*
1381 	 * Output changes to the "old" file.  The first loop suppresses
1382 	 * output if there were no changes to the "old" file (we'll see
1383 	 * the "old" lines as context in the "new" list).
1384 	 */
1385 	do_output = 0;
1386 	for (; cvp <= context_vec_ptr; cvp++)
1387 		if (cvp->a <= cvp->b) {
1388 			cvp = context_vec_start;
1389 			do_output++;
1390 			break;
1391 		}
1392 	if (do_output) {
1393 		while (cvp <= context_vec_ptr) {
1394 			a = cvp->a;
1395 			b = cvp->b;
1396 			c = cvp->c;
1397 			d = cvp->d;
1398 
1399 			if (a <= b && c <= d)
1400 				ch = 'c';
1401 			else
1402 				ch = (a <= b) ? 'd' : 'a';
1403 
1404 			if (ch == 'a')
1405 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1406 			else {
1407 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1408 				fetch(ixold, a, b, f1,
1409 				    ch == 'c' ? '!' : '-', 0, flags);
1410 			}
1411 			lowa = b + 1;
1412 			cvp++;
1413 		}
1414 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1415 	}
1416 	/* output changes to the "new" file */
1417 	printf("--- ");
1418 	range(lowc, upd, ",");
1419 	printf(" ----\n");
1420 
1421 	do_output = 0;
1422 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1423 		if (cvp->c <= cvp->d) {
1424 			cvp = context_vec_start;
1425 			do_output++;
1426 			break;
1427 		}
1428 	if (do_output) {
1429 		while (cvp <= context_vec_ptr) {
1430 			a = cvp->a;
1431 			b = cvp->b;
1432 			c = cvp->c;
1433 			d = cvp->d;
1434 
1435 			if (a <= b && c <= d)
1436 				ch = 'c';
1437 			else
1438 				ch = (a <= b) ? 'd' : 'a';
1439 
1440 			if (ch == 'd')
1441 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1442 			else {
1443 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1444 				fetch(ixnew, c, d, f2,
1445 				    ch == 'c' ? '!' : '+', 0, flags);
1446 			}
1447 			lowc = d + 1;
1448 			cvp++;
1449 		}
1450 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1451 	}
1452 	context_vec_ptr = context_vec_start - 1;
1453 }
1454 
1455 /* dump accumulated "unified" diff changes */
1456 static void
1457 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1458 {
1459 	struct context_vec *cvp = context_vec_start;
1460 	int lowa, upb, lowc, upd;
1461 	int a, b, c, d;
1462 	char ch, *f;
1463 
1464 	if (context_vec_start > context_vec_ptr)
1465 		return;
1466 
1467 	b = d = 0;		/* gcc */
1468 	lowa = MAX(1, cvp->a - diff_context);
1469 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1470 	lowc = MAX(1, cvp->c - diff_context);
1471 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1472 
1473 	fputs("@@ -", stdout);
1474 	uni_range(lowa, upb);
1475 	fputs(" +", stdout);
1476 	uni_range(lowc, upd);
1477 	fputs(" @@", stdout);
1478 	if ((flags & D_PROTOTYPE)) {
1479 		f = match_function(ixold, lowa-1, f1);
1480 		if (f != NULL) {
1481 			putchar(' ');
1482 			fputs(f, stdout);
1483 		}
1484 	}
1485 	putchar('\n');
1486 
1487 	/*
1488 	 * Output changes in "unified" diff format--the old and new lines
1489 	 * are printed together.
1490 	 */
1491 	for (; cvp <= context_vec_ptr; cvp++) {
1492 		a = cvp->a;
1493 		b = cvp->b;
1494 		c = cvp->c;
1495 		d = cvp->d;
1496 
1497 		/*
1498 		 * c: both new and old changes
1499 		 * d: only changes in the old file
1500 		 * a: only changes in the new file
1501 		 */
1502 		if (a <= b && c <= d)
1503 			ch = 'c';
1504 		else
1505 			ch = (a <= b) ? 'd' : 'a';
1506 
1507 		switch (ch) {
1508 		case 'c':
1509 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1510 			fetch(ixold, a, b, f1, '-', 0, flags);
1511 			fetch(ixnew, c, d, f2, '+', 0, flags);
1512 			break;
1513 		case 'd':
1514 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1515 			fetch(ixold, a, b, f1, '-', 0, flags);
1516 			break;
1517 		case 'a':
1518 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1519 			fetch(ixnew, c, d, f2, '+', 0, flags);
1520 			break;
1521 		}
1522 		lowa = b + 1;
1523 		lowc = d + 1;
1524 	}
1525 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1526 
1527 	context_vec_ptr = context_vec_start - 1;
1528 }
1529 
1530 static void
1531 print_header(const char *file1, const char *file2)
1532 {
1533 	if (label[0] != NULL)
1534 		printf("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1535 		    label[0]);
1536 	else
1537 		printf("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
1538 		    file1, ctime(&stb1.st_mtime));
1539 	if (label[1] != NULL)
1540 		printf("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1541 		    label[1]);
1542 	else
1543 		printf("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
1544 		    file2, ctime(&stb2.st_mtime));
1545 }
1546