xref: /openbsd-src/usr.bin/diff/diffreg.c (revision d226b3cdb54524d153d4343cbf0da8b5938e3623)
1 /*	$OpenBSD: diffreg.c,v 1.10 2003/06/25 03:46:45 deraadt Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 
37 #include <sys/types.h>
38 
39 #include <stdlib.h>
40 #include <unistd.h>
41 #include <fcntl.h>
42 #include <string.h>
43 
44 #include "diff.h"
45 #include "pathnames.h"
46 
47 #if 0
48 static char const sccsid[] = "@(#)diffreg.c 4.21 4/6/90";
49 #endif
50 
51 /*
52  * diff - compare two files.
53  */
54 
55 /*
56  *	Uses an algorithm due to Harold Stone, which finds
57  *	a pair of longest identical subsequences in the two
58  *	files.
59  *
60  *	The major goal is to generate the match vector J.
61  *	J[i] is the index of the line in file1 corresponding
62  *	to line i file0. J[i] = 0 if there is no
63  *	such line in file1.
64  *
65  *	Lines are hashed so as to work in core. All potential
66  *	matches are located by sorting the lines of each file
67  *	on the hash (called ``value''). In particular, this
68  *	collects the equivalence classes in file1 together.
69  *	Subroutine equiv replaces the value of each line in
70  *	file0 by the index of the first element of its
71  *	matching equivalence in (the reordered) file1.
72  *	To save space equiv squeezes file1 into a single
73  *	array member in which the equivalence classes
74  *	are simply concatenated, except that their first
75  *	members are flagged by changing sign.
76  *
77  *	Next the indices that point into member are unsorted into
78  *	array class according to the original order of file0.
79  *
80  *	The cleverness lies in routine stone. This marches
81  *	through the lines of file0, developing a vector klist
82  *	of "k-candidates". At step i a k-candidate is a matched
83  *	pair of lines x,y (x in file0 y in file1) such that
84  *	there is a common subsequence of length k
85  *	between the first i lines of file0 and the first y
86  *	lines of file1, but there is no such subsequence for
87  *	any smaller y. x is the earliest possible mate to y
88  *	that occurs in such a subsequence.
89  *
90  *	Whenever any of the members of the equivalence class of
91  *	lines in file1 matable to a line in file0 has serial number
92  *	less than the y of some k-candidate, that k-candidate
93  *	with the smallest such y is replaced. The new
94  *	k-candidate is chained (via pred) to the current
95  *	k-1 candidate so that the actual subsequence can
96  *	be recovered. When a member has serial number greater
97  *	that the y of all k-candidates, the klist is extended.
98  *	At the end, the longest subsequence is pulled out
99  *	and placed in the array J by unravel
100  *
101  *	With J in hand, the matches there recorded are
102  *	check'ed against reality to assure that no spurious
103  *	matches have crept in due to hashing. If they have,
104  *	they are broken, and "jackpot" is recorded--a harmless
105  *	matter except that a true match for a spuriously
106  *	mated line may now be unnecessarily reported as a change.
107  *
108  *	Much of the complexity of the program comes simply
109  *	from trying to minimize core utilization and
110  *	maximize the range of doable problems by dynamically
111  *	allocating what is needed and reusing what is not.
112  *	The core requirements for problems larger than somewhat
113  *	are (in words) 2*length(file0) + length(file1) +
114  *	3*(number of k-candidates installed),  typically about
115  *	6n words for files of length n.
116  */
117 
118 #define	prints(s)	fputs(s,stdout)
119 
120 FILE *input[2];
121 
122 struct cand {
123 	int x;
124 	int y;
125 	int pred;
126 } cand;
127 
128 struct line {
129 	int serial;
130 	int value;
131 } *file[2];
132 
133 int len[2];
134 struct line *sfile[2];	/* shortened by pruning common prefix and suffix */
135 int slen[2];
136 int pref, suff;			/* length of prefix and suffix */
137 int *class;			/* will be overlaid on file[0] */
138 int *member;			/* will be overlaid on file[1] */
139 int *klist;			/* will be overlaid on file[0] after class */
140 struct cand *clist;		/* merely a free storage pot for candidates */
141 int clen = 0;
142 int *J;				/* will be overlaid on class */
143 long *ixold;			/* will be overlaid on klist */
144 long *ixnew;			/* will be overlaid on file[1] */
145 u_char *chrtran;		/* translation table for case-folding */
146 
147 static void fetch(long *, int, int, FILE *, char *, int);
148 static void output(void);
149 static void check(void);
150 static void range(int, int, char *);
151 static void dump_context_vec(void);
152 static void prepare(int, FILE *);
153 static void prune(void);
154 static void equiv(struct line *, int, struct line *, int, int *);
155 static void unravel(int);
156 static void unsort(struct line *, int, int *);
157 static void change(int, int, int, int);
158 static void sort(struct line *, int);
159 static int newcand(int, int, int);
160 static int search(int *, int, int);
161 static int skipline(int);
162 static int asciifile(FILE *);
163 static int stone(int *, int, int *, int *);
164 static int readhash(FILE *);
165 
166 /*
167  * chrtran points to one of 2 translation tables: cup2low if folding upper to
168  * lower case clow2low if not folding case
169  */
170 u_char clow2low[256] = {
171 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
172 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
173 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
174 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
175 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
176 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
177 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
178 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
179 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
180 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
181 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
182 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
183 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
184 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
185 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
186 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
187 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
188 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
189 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
190 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
191 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
192 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
193 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
194 	0xfd, 0xfe, 0xff
195 };
196 
197 u_char cup2low[256] = {
198 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
199 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
200 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
201 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
202 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
203 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
204 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
205 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
206 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
207 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
208 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
209 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
210 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
211 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
212 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
213 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
214 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
215 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
216 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
217 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
218 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
219 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
220 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
221 	0xfd, 0xfe, 0xff
222 };
223 
224 void
225 diffreg(void)
226 {
227 	char buf1[BUFSIZ], buf2[BUFSIZ];
228 	FILE *f1, *f2;
229 	int i, j;
230 
231 	if (hflag) {
232 		diffargv[0] = "diffh";
233 		execv(diffh, diffargv);
234 		fprintf(stderr, "diff: ");
235 		perror(diffh);
236 		done(0);
237 	}
238 	chrtran = (iflag ? cup2low : clow2low);
239 	if ((stb1.st_mode & S_IFMT) == S_IFDIR) {
240 		file1 = splice(file1, file2);
241 		if (stat(file1, &stb1) < 0) {
242 			fprintf(stderr, "diff: ");
243 			perror(file1);
244 			done(0);
245 		}
246 	} else if ((stb2.st_mode & S_IFMT) == S_IFDIR) {
247 		file2 = splice(file2, file1);
248 		if (stat(file2, &stb2) < 0) {
249 			fprintf(stderr, "diff: ");
250 			perror(file2);
251 			done(0);
252 		}
253 	} else if ((stb1.st_mode & S_IFMT) != S_IFREG || !strcmp(file1, "-")) {
254 		if (!strcmp(file2, "-")) {
255 			fprintf(stderr, "diff: can't specify - -\n");
256 			done(0);
257 		}
258 		file1 = copytemp();
259 		if (stat(file1, &stb1) < 0) {
260 			fprintf(stderr, "diff: ");
261 			perror(file1);
262 			done(0);
263 		}
264 	} else if ((stb2.st_mode & S_IFMT) != S_IFREG || !strcmp(file2, "-")) {
265 		file2 = copytemp();
266 		if (stat(file2, &stb2) < 0) {
267 			fprintf(stderr, "diff: ");
268 			perror(file2);
269 			done(0);
270 		}
271 	}
272 	if ((f1 = fopen(file1, "r")) == NULL) {
273 		fprintf(stderr, "diff: ");
274 		perror(file1);
275 		done(0);
276 	}
277 	if ((f2 = fopen(file2, "r")) == NULL) {
278 		fprintf(stderr, "diff: ");
279 		perror(file2);
280 		fclose(f1);
281 		done(0);
282 	}
283 	if (stb1.st_size != stb2.st_size)
284 		goto notsame;
285 	for (;;) {
286 		i = fread(buf1, 1, BUFSIZ, f1);
287 		j = fread(buf2, 1, BUFSIZ, f2);
288 		if (i < 0 || j < 0 || i != j)
289 			goto notsame;
290 		if (i == 0 && j == 0) {
291 			fclose(f1);
292 			fclose(f2);
293 			status = 0;	/* files don't differ */
294 			goto same;
295 		}
296 		for (j = 0; j < i; j++)
297 			if (buf1[j] != buf2[j])
298 				goto notsame;
299 	}
300 notsame:
301 	/*
302 	 *	Files certainly differ at this point; set status accordingly
303 	 */
304 	status = 1;
305 	if (!asciifile(f1) || !asciifile(f2)) {
306 		printf("Binary files %s and %s differ\n", file1, file2);
307 		fclose(f1);
308 		fclose(f2);
309 		done(0);
310 	}
311 	prepare(0, f1);
312 	prepare(1, f2);
313 	fclose(f1);
314 	fclose(f2);
315 	prune();
316 	sort(sfile[0], slen[0]);
317 	sort(sfile[1], slen[1]);
318 
319 	member = (int *)file[1];
320 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
321 	member = ralloc(member, (slen[1] + 2) * sizeof(int));
322 
323 	class = (int *)file[0];
324 	unsort(sfile[0], slen[0], class);
325 	class = ralloc(class, (slen[0] + 2) * sizeof(int));
326 
327 	klist = talloc((slen[0] + 2) * sizeof(int));
328 	clist = talloc(sizeof(cand));
329 	i = stone(class, slen[0], member, klist);
330 	free(member);
331 	free(class);
332 
333 	J = talloc((len[0] + 2) * sizeof(int));
334 	unravel(klist[i]);
335 	free(clist);
336 	free(klist);
337 
338 	ixold = talloc((len[0] + 2) * sizeof(long));
339 	ixnew = talloc((len[1] + 2) * sizeof(long));
340 	check();
341 	output();
342 	status = anychange;
343 same:
344 	if (opt == D_CONTEXT && anychange == 0)
345 		printf("No differences encountered\n");
346 	done(0);
347 }
348 
349 char *tempfile = _PATH_TMP;
350 
351 char *
352 copytemp(void)
353 {
354 	char buf[BUFSIZ];
355 	int i, f;
356 
357 	signal(SIGHUP, catchsig);
358 	signal(SIGINT, catchsig);
359 	signal(SIGPIPE, catchsig);
360 	signal(SIGTERM, catchsig);
361 	f = mkstemp(tempfile);
362 	if (f < 0) {
363 		fprintf(stderr, "diff: ");
364 		perror(tempfile);
365 		done(0);
366 	}
367 	while ((i = read(0, buf, BUFSIZ)) > 0)
368 		if (write(f, buf, i) != i) {
369 			fprintf(stderr, "diff: ");
370 			perror(tempfile);
371 			done(0);
372 		}
373 	close(f);
374 	return (tempfile);
375 }
376 
377 char *
378 splice(char *dir, char *file)
379 {
380 	char *tail, buf[BUFSIZ];
381 
382 	if (!strcmp(file, "-")) {
383 		fprintf(stderr, "diff: can't specify - with other arg directory\n");
384 		done(0);
385 	}
386 	tail = strrchr(file, '/');
387 	if (tail == 0)
388 		tail = file;
389 	else
390 		tail++;
391 	snprintf(buf, sizeof buf, "%s/%s", dir, tail);
392 	return (strdup(buf));
393 }
394 
395 static void
396 prepare(int i, FILE *fd)
397 {
398 	struct line *p;
399 	int j, h;
400 
401 	fseek(fd, 0, 0);
402 	p = talloc(3 * sizeof(struct line));
403 	for (j = 0; (h = readhash(fd));) {
404 		p = ralloc(p, (++j + 3) * sizeof(struct line));
405 		p[j].value = h;
406 	}
407 	len[i] = j;
408 	file[i] = p;
409 }
410 
411 static void
412 prune(void)
413 {
414 	int i, j;
415 
416 	for (pref = 0; pref < len[0] && pref < len[1] &&
417 	    file[0][pref + 1].value == file[1][pref + 1].value;
418 	    pref++);
419 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
420 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
421 	    suff++);
422 	for (j = 0; j < 2; j++) {
423 		sfile[j] = file[j] + pref;
424 		slen[j] = len[j] - pref - suff;
425 		for (i = 0; i <= slen[j]; i++)
426 			sfile[j][i].serial = i;
427 	}
428 }
429 
430 static void
431 equiv(struct line *a, int n, struct line *b, int m, int *c)
432 {
433 	int i, j;
434 
435 	i = j = 1;
436 	while (i <= n && j <= m) {
437 		if (a[i].value < b[j].value)
438 			a[i++].value = 0;
439 		else if (a[i].value == b[j].value)
440 			a[i++].value = j;
441 		else
442 			j++;
443 	}
444 	while (i <= n)
445 		a[i++].value = 0;
446 	b[m + 1].value = 0;
447 	j = 0;
448 	while (++j <= m) {
449 		c[j] = -b[j].serial;
450 		while (b[j + 1].value == b[j].value) {
451 			j++;
452 			c[j] = b[j].serial;
453 		}
454 	}
455 	c[j] = -1;
456 }
457 
458 static int
459 stone(int *a, int n, int *b, int *c)
460 {
461 	int i, k, y, j, l;
462 	int oldc, tc, oldl;
463 
464 	k = 0;
465 	c[0] = newcand(0, 0, 0);
466 	for (i = 1; i <= n; i++) {
467 		j = a[i];
468 		if (j == 0)
469 			continue;
470 		y = -b[j];
471 		oldl = 0;
472 		oldc = c[0];
473 		do {
474 			if (y <= clist[oldc].y)
475 				continue;
476 			l = search(c, k, y);
477 			if (l != oldl + 1)
478 				oldc = c[l - 1];
479 			if (l <= k) {
480 				if (clist[c[l]].y <= y)
481 					continue;
482 				tc = c[l];
483 				c[l] = newcand(i, y, oldc);
484 				oldc = tc;
485 				oldl = l;
486 			} else {
487 				c[l] = newcand(i, y, oldc);
488 				k++;
489 				break;
490 			}
491 		} while ((y = b[++j]) > 0);
492 	}
493 	return (k);
494 }
495 
496 static int
497 newcand(int x, int y, int pred)
498 {
499 	struct cand *q;
500 
501 	clist = ralloc(clist, ++clen * sizeof(cand));
502 	q = clist + clen - 1;
503 	q->x = x;
504 	q->y = y;
505 	q->pred = pred;
506 	return (clen - 1);
507 }
508 
509 static int
510 search(int *c, int k, int y)
511 {
512 	int i, j, l, t;
513 
514 	if (clist[c[k]].y < y)	/* quick look for typical case */
515 		return (k + 1);
516 	i = 0;
517 	j = k + 1;
518 	while (1) {
519 		l = i + j;
520 		if ((l >>= 1) <= i)
521 			break;
522 		t = clist[c[l]].y;
523 		if (t > y)
524 			j = l;
525 		else if (t < y)
526 			i = l;
527 		else
528 			return (l);
529 	}
530 	return (l + 1);
531 }
532 
533 static void
534 unravel(int p)
535 {
536 	struct cand *q;
537 	int i;
538 
539 	for (i = 0; i <= len[0]; i++)
540 		J[i] = i <= pref ? i :
541 		    i > len[0] - suff ? i + len[1] - len[0] :
542 		    0;
543 	for (q = clist + p; q->y != 0; q = clist + q->pred)
544 		J[q->x + pref] = q->y + pref;
545 }
546 
547 /*
548  * check does double duty: 1.  ferret out any fortuitous correspondences due
549  * to confounding by hashing (which result in "jackpot") 2.  collect random
550  * access indexes to the two files
551  */
552 
553 static void
554 check(void)
555 {
556 	int i, j, jackpot, c, d;
557 	long ctold, ctnew;
558 
559 	if ((input[0] = fopen(file1, "r")) == NULL) {
560 		perror(file1);
561 		done(0);
562 	}
563 	if ((input[1] = fopen(file2, "r")) == NULL) {
564 		perror(file2);
565 		done(0);
566 	}
567 	j = 1;
568 	ixold[0] = ixnew[0] = 0;
569 	jackpot = 0;
570 	ctold = ctnew = 0;
571 	for (i = 1; i <= len[0]; i++) {
572 		if (J[i] == 0) {
573 			ixold[i] = ctold += skipline(0);
574 			continue;
575 		}
576 		while (j < J[i]) {
577 			ixnew[j] = ctnew += skipline(1);
578 			j++;
579 		}
580 		if (bflag || wflag || iflag) {
581 			for (;;) {
582 				c = getc(input[0]);
583 				d = getc(input[1]);
584 				ctold++;
585 				ctnew++;
586 				if (bflag && isspace(c) && isspace(d)) {
587 					do {
588 						if (c == '\n')
589 							break;
590 						ctold++;
591 					} while (isspace(c = getc(input[0])));
592 					do {
593 						if (d == '\n')
594 							break;
595 						ctnew++;
596 					} while (isspace(d = getc(input[1])));
597 				} else if (wflag) {
598 					while (isspace(c) && c != '\n') {
599 						c = getc(input[0]);
600 						ctold++;
601 					}
602 					while (isspace(d) && d != '\n') {
603 						d = getc(input[1]);
604 						ctnew++;
605 					}
606 				}
607 				if (chrtran[c] != chrtran[d]) {
608 					jackpot++;
609 					J[i] = 0;
610 					if (c != '\n')
611 						ctold += skipline(0);
612 					if (d != '\n')
613 						ctnew += skipline(1);
614 					break;
615 				}
616 				if (c == '\n')
617 					break;
618 			}
619 		} else {
620 			for (;;) {
621 				ctold++;
622 				ctnew++;
623 				if ((c = getc(input[0])) != (d = getc(input[1]))) {
624 					/* jackpot++; */
625 					J[i] = 0;
626 					if (c != '\n')
627 						ctold += skipline(0);
628 					if (d != '\n')
629 						ctnew += skipline(1);
630 					break;
631 				}
632 				if (c == '\n')
633 					break;
634 			}
635 		}
636 		ixold[i] = ctold;
637 		ixnew[j] = ctnew;
638 		j++;
639 	}
640 	for (; j <= len[1]; j++) {
641 		ixnew[j] = ctnew += skipline(1);
642 	}
643 	fclose(input[0]);
644 	fclose(input[1]);
645 	/*
646 	 * if (jackpot)
647 	 *	fprintf(stderr, "jackpot\n");
648 	 */
649 }
650 
651 /* shellsort CACM #201 */
652 static void
653 sort(struct line *a, int n)
654 {
655 	struct line *ai, *aim, w;
656 	int j, m = 0, k;
657 
658 	if (n == 0)
659 		return;
660 	for (j = 1; j <= n; j *= 2)
661 		m = 2 * j - 1;
662 	for (m /= 2; m != 0; m /= 2) {
663 		k = n - m;
664 		for (j = 1; j <= k; j++) {
665 			for (ai = &a[j]; ai > a; ai -= m) {
666 				aim = &ai[m];
667 				if (aim < ai)
668 					break;	/* wraparound */
669 				if (aim->value > ai[0].value ||
670 				    (aim->value == ai[0].value &&
671 					aim->serial > ai[0].serial))
672 					break;
673 				w.value = ai[0].value;
674 				ai[0].value = aim->value;
675 				aim->value = w.value;
676 				w.serial = ai[0].serial;
677 				ai[0].serial = aim->serial;
678 				aim->serial = w.serial;
679 			}
680 		}
681 	}
682 }
683 
684 static void
685 unsort(struct line *f, int l, int *b)
686 {
687 	int *a, i;
688 
689 	a = talloc((l + 1) * sizeof(int));
690 	for (i = 1; i <= l; i++)
691 		a[f[i].serial] = f[i].value;
692 	for (i = 1; i <= l; i++)
693 		b[i] = a[i];
694 	free(a);
695 }
696 
697 static int
698 skipline(int f)
699 {
700 	int i, c;
701 
702 	for (i = 1; (c = getc(input[f])) != '\n'; i++)
703 		if (c < 0)
704 			return (i);
705 	return (i);
706 }
707 
708 static void
709 output(void)
710 {
711 	int m, i0, i1, j0, j1;
712 
713 	input[0] = fopen(file1, "r");
714 	input[1] = fopen(file2, "r");
715 	m = len[0];
716 	J[0] = 0;
717 	J[m + 1] = len[1] + 1;
718 	if (opt != D_EDIT) {
719 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
720 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
721 				i0++;
722 			j0 = J[i0 - 1] + 1;
723 			i1 = i0 - 1;
724 			while (i1 < m && J[i1 + 1] == 0)
725 				i1++;
726 			j1 = J[i1 + 1] - 1;
727 			J[i1] = j1;
728 			change(i0, i1, j0, j1);
729 		}
730 	} else {
731 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
732 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
733 				i0--;
734 			j0 = J[i0 + 1] - 1;
735 			i1 = i0 + 1;
736 			while (i1 > 1 && J[i1 - 1] == 0)
737 				i1--;
738 			j1 = J[i1 - 1] + 1;
739 			J[i1] = j1;
740 			change(i1, i0, j1, j0);
741 		}
742 	}
743 	if (m == 0)
744 		change(1, 0, 1, len[1]);
745 	if (opt == D_IFDEF) {
746 		for (;;) {
747 #define	c i0
748 			c = getc(input[0]);
749 			if (c < 0)
750 				return;
751 			putchar(c);
752 		}
753 #undef c
754 	}
755 	if (anychange && opt == D_CONTEXT)
756 		dump_context_vec();
757 }
758 
759 /*
760  * The following struct is used to record change information when
761  * doing a "context" diff.  (see routine "change" to understand the
762  * highly mneumonic field names)
763  */
764 struct context_vec {
765 	int a;			/* start line in old file */
766 	int b;			/* end line in old file */
767 	int c;			/* start line in new file */
768 	int d;			/* end line in new file */
769 };
770 
771 struct context_vec *context_vec_start, *context_vec_end, *context_vec_ptr;
772 
773 #define	MAX_CONTEXT	128
774 
775 /*
776  * indicate that there is a difference between lines a and b of the from file
777  * to get to lines c to d of the to file. If a is greater then b then there
778  * are no lines in the from file involved and this means that there were
779  * lines appended (beginning at b). If c is greater than d then there are
780  * lines missing from the to file.
781  */
782 static void
783 change(int a, int b, int c, int d)
784 {
785 	struct stat stbuf;
786 
787 	if (opt != D_IFDEF && a > b && c > d)
788 		return;
789 	if (anychange == 0) {
790 		anychange = 1;
791 		if (opt == D_CONTEXT) {
792 			printf("*** %s	", file1);
793 			stat(file1, &stbuf);
794 			printf("%s--- %s	",
795 			    ctime(&stbuf.st_mtime), file2);
796 			stat(file2, &stbuf);
797 			printf("%s", ctime(&stbuf.st_mtime));
798 
799 			context_vec_start = talloc(MAX_CONTEXT *
800 			    sizeof(struct context_vec));
801 			context_vec_end = context_vec_start + MAX_CONTEXT;
802 			context_vec_ptr = context_vec_start - 1;
803 		}
804 	}
805 	if (opt == D_CONTEXT) {
806 		/*
807 		 * if this new change is within 'context' lines of
808 		 * the previous change, just add it to the change
809 		 * record.  If the record is full or if this
810 		 * change is more than 'context' lines from the previous
811 		 * change, dump the record, reset it & add the new change.
812 		 */
813 		if (context_vec_ptr >= context_vec_end ||
814 		    (context_vec_ptr >= context_vec_start &&
815 			a > (context_vec_ptr->b + 2 * context) &&
816 			c > (context_vec_ptr->d + 2 * context)))
817 			dump_context_vec();
818 
819 		context_vec_ptr++;
820 		context_vec_ptr->a = a;
821 		context_vec_ptr->b = b;
822 		context_vec_ptr->c = c;
823 		context_vec_ptr->d = d;
824 		return;
825 	}
826 	switch (opt) {
827 
828 	case D_NORMAL:
829 	case D_EDIT:
830 		range(a, b, ",");
831 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
832 		if (opt == D_NORMAL)
833 			range(c, d, ",");
834 		putchar('\n');
835 		break;
836 	case D_REVERSE:
837 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
838 		range(a, b, " ");
839 		putchar('\n');
840 		break;
841 	case D_NREVERSE:
842 		if (a > b)
843 			printf("a%d %d\n", b, d - c + 1);
844 		else {
845 			printf("d%d %d\n", a, b - a + 1);
846 			if (!(c > d))
847 				/* add changed lines */
848 				printf("a%d %d\n", b, d - c + 1);
849 		}
850 		break;
851 	}
852 	if (opt == D_NORMAL || opt == D_IFDEF) {
853 		fetch(ixold, a, b, input[0], "< ", 1);
854 		if (a <= b && c <= d && opt == D_NORMAL)
855 			prints("---\n");
856 	}
857 	fetch(ixnew, c, d, input[1], opt == D_NORMAL ? "> " : "", 0);
858 	if ((opt == D_EDIT || opt == D_REVERSE) && c <= d)
859 		prints(".\n");
860 	if (inifdef) {
861 		fprintf(stdout, "#endif /* %s */\n", endifname);
862 		inifdef = 0;
863 	}
864 }
865 
866 static void
867 range(int a, int b, char *separator)
868 {
869 	printf("%d", a > b ? b : a);
870 	if (a < b)
871 		printf("%s%d", separator, b);
872 }
873 
874 static void
875 fetch(long *f, int a, int b, FILE *lb, char *s, int oldfile)
876 {
877 	int oneflag = (*ifdef1 != '\0') != (*ifdef2 != '\0');
878 	int i, j, c, col, nc;
879 
880 	/*
881 	 * When doing #ifdef's, copy down to current line
882 	 * if this is the first file, so that stuff makes it to output.
883 	 */
884 	if (opt == D_IFDEF && oldfile) {
885 		long curpos = ftell(lb);
886 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
887 		nc = f[a > b ? b : a - 1] - curpos;
888 		for (i = 0; i < nc; i++)
889 			putchar(getc(lb));
890 	}
891 	if (a > b)
892 		return;
893 	if (opt == D_IFDEF) {
894 		if (inifdef)
895 			fprintf(stdout, "#else /* %s%s */\n",
896 			    oneflag && oldfile == 1 ? "!" : "", ifdef2);
897 		else {
898 			if (oneflag) {
899 				/* There was only one ifdef given */
900 				endifname = ifdef2;
901 				if (oldfile)
902 					fprintf(stdout, "#ifndef %s\n", endifname);
903 				else
904 					fprintf(stdout, "#ifdef %s\n", endifname);
905 			} else {
906 				endifname = oldfile ? ifdef1 : ifdef2;
907 				fprintf(stdout, "#ifdef %s\n", endifname);
908 			}
909 		}
910 		inifdef = 1 + oldfile;
911 	}
912 	for (i = a; i <= b; i++) {
913 		fseek(lb, f[i - 1], 0);
914 		nc = f[i] - f[i - 1];
915 		if (opt != D_IFDEF)
916 			prints(s);
917 		col = 0;
918 		for (j = 0; j < nc; j++) {
919 			c = getc(lb);
920 			if (c == '\t' && tflag)
921 				do
922 					putchar(' ');
923 				while (++col & 7);
924 			else {
925 				putchar(c);
926 				col++;
927 			}
928 		}
929 	}
930 
931 	if (inifdef && !wantelses) {
932 		fprintf(stdout, "#endif /* %s */\n", endifname);
933 		inifdef = 0;
934 	}
935 }
936 
937 #define POW2			/* define only if HALFLONG is 2**n */
938 #define HALFLONG 16
939 #define low(x)	(x&((1L<<HALFLONG)-1))
940 #define high(x)	(x>>HALFLONG)
941 
942 /*
943  * hashing has the effect of
944  * arranging line in 7-bit bytes and then
945  * summing 1-s complement in 16-bit hunks
946  */
947 static int
948 readhash(FILE *f)
949 {
950 	unsigned int shift;
951 	int t, space;
952 	long sum;
953 
954 	sum = 1;
955 	space = 0;
956 	if (!bflag && !wflag) {
957 		if (iflag)
958 			for (shift = 0; (t = getc(f)) != '\n'; shift += 7) {
959 				if (t == -1)
960 					return (0);
961 				sum += (long)chrtran[t] << (shift
962 #ifdef POW2
963 				    &= HALFLONG - 1);
964 #else
965 				    %= HALFLONG);
966 #endif
967 			}
968 		else
969 			for (shift = 0; (t = getc(f)) != '\n'; shift += 7) {
970 				if (t == -1)
971 					return (0);
972 				sum += (long)t << (shift
973 #ifdef POW2
974 				    &= HALFLONG - 1);
975 #else
976 				    %= HALFLONG);
977 #endif
978 			}
979 	} else {
980 		for (shift = 0;;) {
981 			switch (t = getc(f)) {
982 			case -1:
983 				return (0);
984 			case '\t':
985 			case ' ':
986 				space++;
987 				continue;
988 			default:
989 				if (space && !wflag) {
990 					shift += 7;
991 					space = 0;
992 				}
993 				sum += (long)chrtran[t] << (shift
994 #ifdef POW2
995 				    &= HALFLONG - 1);
996 #else
997 				    %= HALFLONG);
998 #endif
999 				shift += 7;
1000 				continue;
1001 			case '\n':
1002 				break;
1003 			}
1004 			break;
1005 		}
1006 	}
1007 	sum = low(sum) + high(sum);
1008 	return ((short) low(sum) + (short) high(sum));
1009 }
1010 
1011 static int
1012 asciifile(FILE *f)
1013 {
1014 	char buf[BUFSIZ], *cp;
1015 	int cnt;
1016 
1017 	fseek(f, 0, 0);
1018 	cnt = fread(buf, 1, BUFSIZ, f);
1019 	cp = buf;
1020 	while (--cnt >= 0)
1021 		if (*cp++ & 0200)
1022 			return (0);
1023 	return (1);
1024 }
1025 
1026 
1027 /* dump accumulated "context" diff changes */
1028 static void
1029 dump_context_vec(void)
1030 {
1031 	struct context_vec *cvp = context_vec_start;
1032 	int lowa, upb, lowc, upd, do_output;
1033 	int a, b, c, d;
1034 	char ch;
1035 
1036 	if (cvp > context_vec_ptr)
1037 		return;
1038 
1039 	b = d = 0;		/* gcc */
1040 	lowa = max(1, cvp->a - context);
1041 	upb = min(len[0], context_vec_ptr->b + context);
1042 	lowc = max(1, cvp->c - context);
1043 	upd = min(len[1], context_vec_ptr->d + context);
1044 
1045 	printf("***************\n*** ");
1046 	range(lowa, upb, ",");
1047 	printf(" ****\n");
1048 
1049 	/*
1050 	 * output changes to the "old" file.  The first loop suppresses
1051 	 * output if there were no changes to the "old" file (we'll see
1052 	 * the "old" lines as context in the "new" list).
1053 	 */
1054 	do_output = 0;
1055 	for (; cvp <= context_vec_ptr; cvp++)
1056 		if (cvp->a <= cvp->b) {
1057 			cvp = context_vec_start;
1058 			do_output++;
1059 			break;
1060 		}
1061 	if (do_output) {
1062 		while (cvp <= context_vec_ptr) {
1063 			a = cvp->a;
1064 			b = cvp->b;
1065 			c = cvp->c;
1066 			d = cvp->d;
1067 
1068 			if (a <= b && c <= d)
1069 				ch = 'c';
1070 			else
1071 				ch = (a <= b) ? 'd' : 'a';
1072 
1073 			if (ch == 'a')
1074 				fetch(ixold, lowa, b, input[0], "  ", 0);
1075 			else {
1076 				fetch(ixold, lowa, a - 1, input[0], "  ", 0);
1077 				fetch(ixold, a, b, input[0],
1078 				    ch == 'c' ? "! " : "- ", 0);
1079 			}
1080 			lowa = b + 1;
1081 			cvp++;
1082 		}
1083 		fetch(ixold, b + 1, upb, input[0], "  ", 0);
1084 	}
1085 	/* output changes to the "new" file */
1086 	printf("--- ");
1087 	range(lowc, upd, ",");
1088 	printf(" ----\n");
1089 
1090 	do_output = 0;
1091 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1092 		if (cvp->c <= cvp->d) {
1093 			cvp = context_vec_start;
1094 			do_output++;
1095 			break;
1096 		}
1097 	if (do_output) {
1098 		while (cvp <= context_vec_ptr) {
1099 			a = cvp->a;
1100 			b = cvp->b;
1101 			c = cvp->c;
1102 			d = cvp->d;
1103 
1104 			if (a <= b && c <= d)
1105 				ch = 'c';
1106 			else
1107 				ch = (a <= b) ? 'd' : 'a';
1108 
1109 			if (ch == 'd')
1110 				fetch(ixnew, lowc, d, input[1], "  ", 0);
1111 			else {
1112 				fetch(ixnew, lowc, c - 1, input[1], "  ", 0);
1113 				fetch(ixnew, c, d, input[1],
1114 				    ch == 'c' ? "! " : "+ ", 0);
1115 			}
1116 			lowc = d + 1;
1117 			cvp++;
1118 		}
1119 		fetch(ixnew, d + 1, upd, input[1], "  ", 0);
1120 	}
1121 	context_vec_ptr = context_vec_start - 1;
1122 }
1123