xref: /openbsd-src/usr.bin/diff/diffreg.c (revision 5004e76e0957e576070580d43a0b7bf642d3309c)
1 /*	$OpenBSD: diffreg.c,v 1.7 2003/06/25 03:39:23 tedu Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 
37 #include <sys/types.h>
38 
39 #include <stdlib.h>
40 #include <unistd.h>
41 #include <fcntl.h>
42 #include <string.h>
43 
44 #include "diff.h"
45 #include "pathnames.h"
46 
47 #if 0
48 static char const sccsid[] = "@(#)diffreg.c 4.21 4/6/90";
49 #endif
50 
51 /*
52  * diff - compare two files.
53  */
54 
55 /*
56  *	Uses an algorithm due to Harold Stone, which finds
57  *	a pair of longest identical subsequences in the two
58  *	files.
59  *
60  *	The major goal is to generate the match vector J.
61  *	J[i] is the index of the line in file1 corresponding
62  *	to line i file0. J[i] = 0 if there is no
63  *	such line in file1.
64  *
65  *	Lines are hashed so as to work in core. All potential
66  *	matches are located by sorting the lines of each file
67  *	on the hash (called ``value''). In particular, this
68  *	collects the equivalence classes in file1 together.
69  *	Subroutine equiv replaces the value of each line in
70  *	file0 by the index of the first element of its
71  *	matching equivalence in (the reordered) file1.
72  *	To save space equiv squeezes file1 into a single
73  *	array member in which the equivalence classes
74  *	are simply concatenated, except that their first
75  *	members are flagged by changing sign.
76  *
77  *	Next the indices that point into member are unsorted into
78  *	array class according to the original order of file0.
79  *
80  *	The cleverness lies in routine stone. This marches
81  *	through the lines of file0, developing a vector klist
82  *	of "k-candidates". At step i a k-candidate is a matched
83  *	pair of lines x,y (x in file0 y in file1) such that
84  *	there is a common subsequence of length k
85  *	between the first i lines of file0 and the first y
86  *	lines of file1, but there is no such subsequence for
87  *	any smaller y. x is the earliest possible mate to y
88  *	that occurs in such a subsequence.
89  *
90  *	Whenever any of the members of the equivalence class of
91  *	lines in file1 matable to a line in file0 has serial number
92  *	less than the y of some k-candidate, that k-candidate
93  *	with the smallest such y is replaced. The new
94  *	k-candidate is chained (via pred) to the current
95  *	k-1 candidate so that the actual subsequence can
96  *	be recovered. When a member has serial number greater
97  *	that the y of all k-candidates, the klist is extended.
98  *	At the end, the longest subsequence is pulled out
99  *	and placed in the array J by unravel
100  *
101  *	With J in hand, the matches there recorded are
102  *	check'ed against reality to assure that no spurious
103  *	matches have crept in due to hashing. If they have,
104  *	they are broken, and "jackpot" is recorded--a harmless
105  *	matter except that a true match for a spuriously
106  *	mated line may now be unnecessarily reported as a change.
107  *
108  *	Much of the complexity of the program comes simply
109  *	from trying to minimize core utilization and
110  *	maximize the range of doable problems by dynamically
111  *	allocating what is needed and reusing what is not.
112  *	The core requirements for problems larger than somewhat
113  *	are (in words) 2*length(file0) + length(file1) +
114  *	3*(number of k-candidates installed),  typically about
115  *	6n words for files of length n.
116  */
117 
118 #define	prints(s)	fputs(s,stdout)
119 
120 FILE *input[2];
121 
122 struct cand {
123 	int x;
124 	int y;
125 	int pred;
126 } cand;
127 
128 struct line {
129 	int serial;
130 	int value;
131 } *file[2], line;
132 
133 int len[2];
134 struct line *sfile[2];	/* shortened by pruning common prefix and suffix */
135 int slen[2];
136 int pref, suff;			/* length of prefix and suffix */
137 int *class;			/* will be overlaid on file[0] */
138 int *member;			/* will be overlaid on file[1] */
139 int *klist;			/* will be overlaid on file[0] after class */
140 struct cand *clist;		/* merely a free storage pot for candidates */
141 int clen = 0;
142 int *J;				/* will be overlaid on class */
143 long *ixold;			/* will be overlaid on klist */
144 long *ixnew;			/* will be overlaid on file[1] */
145 u_char *chrtran;		/* translation table for case-folding */
146 
147 static void fetch(long *, int, int, FILE *, char *, int);
148 static void output(void);
149 static void check(void);
150 static void range(int, int, char *);
151 static void dump_context_vec(void);
152 static void prepare(int, FILE *);
153 static void prune(void);
154 static void equiv(struct line *, int, struct line *, int, int *);
155 static void unravel(int);
156 static void unsort(struct line *, int, int *);
157 static void change(int, int, int, int);
158 static void sort(struct line *, int);
159 static int newcand(int, int, int);
160 static int search(int *, int, int);
161 static int skipline(int);
162 static int asciifile(FILE *);
163 static int stone(int *, int, int *, int *);
164 static int readhash(FILE *);
165 
166 /*
167  * chrtran points to one of 2 translation tables: cup2low if folding upper to
168  * lower case clow2low if not folding case
169  */
170 u_char clow2low[256] = {
171 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
172 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
173 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
174 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
175 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
176 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
177 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
178 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
179 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
180 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
181 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
182 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
183 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
184 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
185 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
186 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
187 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
188 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
189 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
190 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
191 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
192 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
193 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
194 	0xfd, 0xfe, 0xff
195 };
196 
197 u_char cup2low[256] = {
198 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
199 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
200 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
201 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
202 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
203 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
204 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
205 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
206 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
207 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
208 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
209 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
210 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
211 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
212 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
213 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
214 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
215 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
216 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
217 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
218 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
219 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
220 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
221 	0xfd, 0xfe, 0xff
222 };
223 
224 void
225 diffreg(void)
226 {
227 	int i, j;
228 	FILE *f1, *f2;
229 	char buf1[BUFSIZ], buf2[BUFSIZ];
230 
231 	if (hflag) {
232 		diffargv[0] = "diffh";
233 		execv(diffh, diffargv);
234 		fprintf(stderr, "diff: ");
235 		perror(diffh);
236 		done(0);
237 	}
238 	chrtran = (iflag ? cup2low : clow2low);
239 	if ((stb1.st_mode & S_IFMT) == S_IFDIR) {
240 		file1 = splice(file1, file2);
241 		if (stat(file1, &stb1) < 0) {
242 			fprintf(stderr, "diff: ");
243 			perror(file1);
244 			done(0);
245 		}
246 	} else if ((stb2.st_mode & S_IFMT) == S_IFDIR) {
247 		file2 = splice(file2, file1);
248 		if (stat(file2, &stb2) < 0) {
249 			fprintf(stderr, "diff: ");
250 			perror(file2);
251 			done(0);
252 		}
253 	} else if ((stb1.st_mode & S_IFMT) != S_IFREG || !strcmp(file1, "-")) {
254 		if (!strcmp(file2, "-")) {
255 			fprintf(stderr, "diff: can't specify - -\n");
256 			done(0);
257 		}
258 		file1 = copytemp();
259 		if (stat(file1, &stb1) < 0) {
260 			fprintf(stderr, "diff: ");
261 			perror(file1);
262 			done(0);
263 		}
264 	} else if ((stb2.st_mode & S_IFMT) != S_IFREG || !strcmp(file2, "-")) {
265 		file2 = copytemp();
266 		if (stat(file2, &stb2) < 0) {
267 			fprintf(stderr, "diff: ");
268 			perror(file2);
269 			done(0);
270 		}
271 	}
272 	if ((f1 = fopen(file1, "r")) == NULL) {
273 		fprintf(stderr, "diff: ");
274 		perror(file1);
275 		done(0);
276 	}
277 	if ((f2 = fopen(file2, "r")) == NULL) {
278 		fprintf(stderr, "diff: ");
279 		perror(file2);
280 		fclose(f1);
281 		done(0);
282 	}
283 	if (stb1.st_size != stb2.st_size)
284 		goto notsame;
285 	for (;;) {
286 		i = fread(buf1, 1, BUFSIZ, f1);
287 		j = fread(buf2, 1, BUFSIZ, f2);
288 		if (i < 0 || j < 0 || i != j)
289 			goto notsame;
290 		if (i == 0 && j == 0) {
291 			fclose(f1);
292 			fclose(f2);
293 			status = 0;	/* files don't differ */
294 			goto same;
295 		}
296 		for (j = 0; j < i; j++)
297 			if (buf1[j] != buf2[j])
298 				goto notsame;
299 	}
300 notsame:
301 	/*
302 	 *	Files certainly differ at this point; set status accordingly
303 	 */
304 	status = 1;
305 	if (!asciifile(f1) || !asciifile(f2)) {
306 		printf("Binary files %s and %s differ\n", file1, file2);
307 		fclose(f1);
308 		fclose(f2);
309 		done(0);
310 	}
311 	prepare(0, f1);
312 	prepare(1, f2);
313 	fclose(f1);
314 	fclose(f2);
315 	prune();
316 	sort(sfile[0], slen[0]);
317 	sort(sfile[1], slen[1]);
318 
319 	member = (int *)file[1];
320 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
321 	member = ralloc(member, (slen[1] + 2) * sizeof(int));
322 
323 	class = (int *)file[0];
324 	unsort(sfile[0], slen[0], class);
325 	class = ralloc(class, (slen[0] + 2) * sizeof(int));
326 
327 	klist = talloc((slen[0] + 2) * sizeof(int));
328 	clist = talloc(sizeof(cand));
329 	i = stone(class, slen[0], member, klist);
330 	free(member);
331 	free(class);
332 
333 	J = talloc((len[0] + 2) * sizeof(int));
334 	unravel(klist[i]);
335 	free(clist);
336 	free(klist);
337 
338 	ixold = talloc((len[0] + 2) * sizeof(long));
339 	ixnew = talloc((len[1] + 2) * sizeof(long));
340 	check();
341 	output();
342 	status = anychange;
343 same:
344 	if (opt == D_CONTEXT && anychange == 0)
345 		printf("No differences encountered\n");
346 	done(0);
347 }
348 
349 char *tempfile = _PATH_TMP;
350 
351 char *
352 copytemp(void)
353 {
354 	char buf[BUFSIZ];
355 	int i, f;
356 
357 	signal(SIGHUP, catchsig);
358 	signal(SIGINT, catchsig);
359 	signal(SIGPIPE, catchsig);
360 	signal(SIGTERM, catchsig);
361 	f = mkstemp(tempfile);
362 	if (f < 0) {
363 		fprintf(stderr, "diff: ");
364 		perror(tempfile);
365 		done(0);
366 	}
367 	while ((i = read(0, buf, BUFSIZ)) > 0)
368 		if (write(f, buf, i) != i) {
369 			fprintf(stderr, "diff: ");
370 			perror(tempfile);
371 			done(0);
372 		}
373 	close(f);
374 	return (tempfile);
375 }
376 
377 char *
378 splice(char *dir, char *file)
379 {
380 	char *tail;
381 	char buf[BUFSIZ];
382 
383 	if (!strcmp(file, "-")) {
384 		fprintf(stderr, "diff: can't specify - with other arg directory\n");
385 		done(0);
386 	}
387 	tail = rindex(file, '/');
388 	if (tail == 0)
389 		tail = file;
390 	else
391 		tail++;
392 	snprintf(buf, sizeof buf, "%s/%s", dir, tail);
393 	return (strdup(buf));
394 }
395 
396 static void
397 prepare(int i, FILE *fd)
398 {
399 	struct line *p;
400 	int j, h;
401 
402 	fseek(fd, 0, 0);
403 	p = talloc(3 * sizeof(line));
404 	for (j = 0; (h = readhash(fd));) {
405 		p = ralloc(p, (++j + 3) * sizeof(line));
406 		p[j].value = h;
407 	}
408 	len[i] = j;
409 	file[i] = p;
410 }
411 
412 static void
413 prune(void)
414 {
415 	int i, j;
416 	for (pref = 0; pref < len[0] && pref < len[1] &&
417 	    file[0][pref + 1].value == file[1][pref + 1].value;
418 	    pref++);
419 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
420 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
421 	    suff++);
422 	for (j = 0; j < 2; j++) {
423 		sfile[j] = file[j] + pref;
424 		slen[j] = len[j] - pref - suff;
425 		for (i = 0; i <= slen[j]; i++)
426 			sfile[j][i].serial = i;
427 	}
428 }
429 
430 static void
431 equiv(struct line *a, int n, struct line *b, int m, int *c)
432 {
433 	int i, j;
434 	i = j = 1;
435 	while (i <= n && j <= m) {
436 		if (a[i].value < b[j].value)
437 			a[i++].value = 0;
438 		else if (a[i].value == b[j].value)
439 			a[i++].value = j;
440 		else
441 			j++;
442 	}
443 	while (i <= n)
444 		a[i++].value = 0;
445 	b[m + 1].value = 0;
446 	j = 0;
447 	while (++j <= m) {
448 		c[j] = -b[j].serial;
449 		while (b[j + 1].value == b[j].value) {
450 			j++;
451 			c[j] = b[j].serial;
452 		}
453 	}
454 	c[j] = -1;
455 }
456 
457 static int
458 stone(int *a, int n, int *b, int *c)
459 {
460 	int i, k, y;
461 	int j, l;
462 	int oldc, tc;
463 	int oldl;
464 	k = 0;
465 	c[0] = newcand(0, 0, 0);
466 	for (i = 1; i <= n; i++) {
467 		j = a[i];
468 		if (j == 0)
469 			continue;
470 		y = -b[j];
471 		oldl = 0;
472 		oldc = c[0];
473 		do {
474 			if (y <= clist[oldc].y)
475 				continue;
476 			l = search(c, k, y);
477 			if (l != oldl + 1)
478 				oldc = c[l - 1];
479 			if (l <= k) {
480 				if (clist[c[l]].y <= y)
481 					continue;
482 				tc = c[l];
483 				c[l] = newcand(i, y, oldc);
484 				oldc = tc;
485 				oldl = l;
486 			} else {
487 				c[l] = newcand(i, y, oldc);
488 				k++;
489 				break;
490 			}
491 		} while ((y = b[++j]) > 0);
492 	}
493 	return (k);
494 }
495 
496 static int
497 newcand(int x, int y, int pred)
498 {
499 	struct cand *q;
500 
501 	clist = ralloc(clist, ++clen * sizeof(cand));
502 	q = clist + clen - 1;
503 	q->x = x;
504 	q->y = y;
505 	q->pred = pred;
506 	return (clen - 1);
507 }
508 
509 static int
510 search(int *c, int k, int y)
511 {
512 	int i, j, l;
513 	int t;
514 	if (clist[c[k]].y < y)	/* quick look for typical case */
515 		return (k + 1);
516 	i = 0;
517 	j = k + 1;
518 	while (1) {
519 		l = i + j;
520 		if ((l >>= 1) <= i)
521 			break;
522 		t = clist[c[l]].y;
523 		if (t > y)
524 			j = l;
525 		else if (t < y)
526 			i = l;
527 		else
528 			return (l);
529 	}
530 	return (l + 1);
531 }
532 
533 static void
534 unravel(int p)
535 {
536 	int i;
537 	struct cand *q;
538 	for (i = 0; i <= len[0]; i++)
539 		J[i] = i <= pref ? i :
540 		    i > len[0] - suff ? i + len[1] - len[0] :
541 		    0;
542 	for (q = clist + p; q->y != 0; q = clist + q->pred)
543 		J[q->x + pref] = q->y + pref;
544 }
545 
546 /*
547  * check does double duty: 1.  ferret out any fortuitous correspondences due
548  * to confounding by hashing (which result in "jackpot") 2.  collect random
549  * access indexes to the two files
550  */
551 
552 static void
553 check(void)
554 {
555 	int i, j;
556 	int jackpot;
557 	long ctold, ctnew;
558 	int c, d;
559 
560 	if ((input[0] = fopen(file1, "r")) == NULL) {
561 		perror(file1);
562 		done(0);
563 	}
564 	if ((input[1] = fopen(file2, "r")) == NULL) {
565 		perror(file2);
566 		done(0);
567 	}
568 	j = 1;
569 	ixold[0] = ixnew[0] = 0;
570 	jackpot = 0;
571 	ctold = ctnew = 0;
572 	for (i = 1; i <= len[0]; i++) {
573 		if (J[i] == 0) {
574 			ixold[i] = ctold += skipline(0);
575 			continue;
576 		}
577 		while (j < J[i]) {
578 			ixnew[j] = ctnew += skipline(1);
579 			j++;
580 		}
581 		if (bflag || wflag || iflag) {
582 			for (;;) {
583 				c = getc(input[0]);
584 				d = getc(input[1]);
585 				ctold++;
586 				ctnew++;
587 				if (bflag && isspace(c) && isspace(d)) {
588 					do {
589 						if (c == '\n')
590 							break;
591 						ctold++;
592 					} while (isspace(c = getc(input[0])));
593 					do {
594 						if (d == '\n')
595 							break;
596 						ctnew++;
597 					} while (isspace(d = getc(input[1])));
598 				} else if (wflag) {
599 					while (isspace(c) && c != '\n') {
600 						c = getc(input[0]);
601 						ctold++;
602 					}
603 					while (isspace(d) && d != '\n') {
604 						d = getc(input[1]);
605 						ctnew++;
606 					}
607 				}
608 				if (chrtran[c] != chrtran[d]) {
609 					jackpot++;
610 					J[i] = 0;
611 					if (c != '\n')
612 						ctold += skipline(0);
613 					if (d != '\n')
614 						ctnew += skipline(1);
615 					break;
616 				}
617 				if (c == '\n')
618 					break;
619 			}
620 		} else {
621 			for (;;) {
622 				ctold++;
623 				ctnew++;
624 				if ((c = getc(input[0])) != (d = getc(input[1]))) {
625 					/* jackpot++; */
626 					J[i] = 0;
627 					if (c != '\n')
628 						ctold += skipline(0);
629 					if (d != '\n')
630 						ctnew += skipline(1);
631 					break;
632 				}
633 				if (c == '\n')
634 					break;
635 			}
636 		}
637 		ixold[i] = ctold;
638 		ixnew[j] = ctnew;
639 		j++;
640 	}
641 	for (; j <= len[1]; j++) {
642 		ixnew[j] = ctnew += skipline(1);
643 	}
644 	fclose(input[0]);
645 	fclose(input[1]);
646 	/*
647 		if(jackpot)
648 			fprintf(stderr, "jackpot\n");
649 	*/
650 }
651 
652 static void
653 sort(struct line *a, int n)
654 {				/* shellsort CACM #201 */
655 	struct line w;
656 	int j, m = 0;		/* gcc */
657 	struct line *ai;
658 	struct line *aim;
659 	int k;
660 
661 	if (n == 0)
662 		return;
663 	for (j = 1; j <= n; j *= 2)
664 		m = 2 * j - 1;
665 	for (m /= 2; m != 0; m /= 2) {
666 		k = n - m;
667 		for (j = 1; j <= k; j++) {
668 			for (ai = &a[j]; ai > a; ai -= m) {
669 				aim = &ai[m];
670 				if (aim < ai)
671 					break;	/* wraparound */
672 				if (aim->value > ai[0].value ||
673 				    (aim->value == ai[0].value &&
674 					aim->serial > ai[0].serial))
675 					break;
676 				w.value = ai[0].value;
677 				ai[0].value = aim->value;
678 				aim->value = w.value;
679 				w.serial = ai[0].serial;
680 				ai[0].serial = aim->serial;
681 				aim->serial = w.serial;
682 			}
683 		}
684 	}
685 }
686 
687 static void
688 unsort(struct line *f, int l, int *b)
689 {
690 	int *a;
691 	int i;
692 
693 	a = talloc((l + 1) * sizeof(int));
694 	for (i = 1; i <= l; i++)
695 		a[f[i].serial] = f[i].value;
696 	for (i = 1; i <= l; i++)
697 		b[i] = a[i];
698 	free(a);
699 }
700 
701 static int
702 skipline(int f)
703 {
704 	int i, c;
705 
706 	for (i = 1; (c = getc(input[f])) != '\n'; i++)
707 		if (c < 0)
708 			return (i);
709 	return (i);
710 }
711 
712 static void
713 output(void)
714 {
715 	int m;
716 	int i0, i1, j1;
717 	int j0;
718 	input[0] = fopen(file1, "r");
719 	input[1] = fopen(file2, "r");
720 	m = len[0];
721 	J[0] = 0;
722 	J[m + 1] = len[1] + 1;
723 	if (opt != D_EDIT) {
724 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
725 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
726 				i0++;
727 			j0 = J[i0 - 1] + 1;
728 			i1 = i0 - 1;
729 			while (i1 < m && J[i1 + 1] == 0)
730 				i1++;
731 			j1 = J[i1 + 1] - 1;
732 			J[i1] = j1;
733 			change(i0, i1, j0, j1);
734 		}
735 	} else {
736 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
737 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
738 				i0--;
739 			j0 = J[i0 + 1] - 1;
740 			i1 = i0 + 1;
741 			while (i1 > 1 && J[i1 - 1] == 0)
742 				i1--;
743 			j1 = J[i1 - 1] + 1;
744 			J[i1] = j1;
745 			change(i1, i0, j1, j0);
746 		}
747 	}
748 	if (m == 0)
749 		change(1, 0, 1, len[1]);
750 	if (opt == D_IFDEF) {
751 		for (;;) {
752 #define	c i0
753 			c = getc(input[0]);
754 			if (c < 0)
755 				return;
756 			putchar(c);
757 		}
758 #undef c
759 	}
760 	if (anychange && opt == D_CONTEXT)
761 		dump_context_vec();
762 }
763 
764 /*
765  * The following struct is used to record change information when
766  * doing a "context" diff.  (see routine "change" to understand the
767  * highly mneumonic field names)
768  */
769 struct context_vec {
770 	int a;			/* start line in old file */
771 	int b;			/* end line in old file */
772 	int c;			/* start line in new file */
773 	int d;			/* end line in new file */
774 };
775 
776 struct context_vec *context_vec_start, *context_vec_end, *context_vec_ptr;
777 
778 #define	MAX_CONTEXT	128
779 
780 /*
781  * indicate that there is a difference between lines a and b of the from file
782  * to get to lines c to d of the to file. If a is greater then b then there
783  * are no lines in the from file involved and this means that there were
784  * lines appended (beginning at b). If c is greater than d then there are
785  * lines missing from the to file.
786  */
787 static void
788 change(int a, int b, int c, int d)
789 {
790 	struct stat stbuf;
791 
792 	if (opt != D_IFDEF && a > b && c > d)
793 		return;
794 	if (anychange == 0) {
795 		anychange = 1;
796 		if (opt == D_CONTEXT) {
797 			printf("*** %s	", file1);
798 			stat(file1, &stbuf);
799 			printf("%s--- %s	",
800 			    ctime(&stbuf.st_mtime), file2);
801 			stat(file2, &stbuf);
802 			printf("%s", ctime(&stbuf.st_mtime));
803 
804 			context_vec_start = talloc(MAX_CONTEXT *
805 			    sizeof(struct context_vec));
806 			context_vec_end = context_vec_start + MAX_CONTEXT;
807 			context_vec_ptr = context_vec_start - 1;
808 		}
809 	}
810 	if (opt == D_CONTEXT) {
811 		/*
812 		 * if this new change is within 'context' lines of
813 		 * the previous change, just add it to the change
814 		 * record.  If the record is full or if this
815 		 * change is more than 'context' lines from the previous
816 		 * change, dump the record, reset it & add the new change.
817 		 */
818 		if (context_vec_ptr >= context_vec_end ||
819 		    (context_vec_ptr >= context_vec_start &&
820 			a > (context_vec_ptr->b + 2 * context) &&
821 			c > (context_vec_ptr->d + 2 * context)))
822 			dump_context_vec();
823 
824 		context_vec_ptr++;
825 		context_vec_ptr->a = a;
826 		context_vec_ptr->b = b;
827 		context_vec_ptr->c = c;
828 		context_vec_ptr->d = d;
829 		return;
830 	}
831 	switch (opt) {
832 
833 	case D_NORMAL:
834 	case D_EDIT:
835 		range(a, b, ",");
836 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
837 		if (opt == D_NORMAL)
838 			range(c, d, ",");
839 		putchar('\n');
840 		break;
841 	case D_REVERSE:
842 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
843 		range(a, b, " ");
844 		putchar('\n');
845 		break;
846 	case D_NREVERSE:
847 		if (a > b)
848 			printf("a%d %d\n", b, d - c + 1);
849 		else {
850 			printf("d%d %d\n", a, b - a + 1);
851 			if (!(c > d))
852 				/* add changed lines */
853 				printf("a%d %d\n", b, d - c + 1);
854 		}
855 		break;
856 	}
857 	if (opt == D_NORMAL || opt == D_IFDEF) {
858 		fetch(ixold, a, b, input[0], "< ", 1);
859 		if (a <= b && c <= d && opt == D_NORMAL)
860 			prints("---\n");
861 	}
862 	fetch(ixnew, c, d, input[1], opt == D_NORMAL ? "> " : "", 0);
863 	if ((opt == D_EDIT || opt == D_REVERSE) && c <= d)
864 		prints(".\n");
865 	if (inifdef) {
866 		fprintf(stdout, "#endif /* %s */\n", endifname);
867 		inifdef = 0;
868 	}
869 }
870 
871 static void
872 range(int a, int b, char *separator)
873 {
874 	printf("%d", a > b ? b : a);
875 	if (a < b) {
876 		printf("%s%d", separator, b);
877 	}
878 }
879 
880 static void
881 fetch(long *f, int a, int b, FILE *lb, char *s, int oldfile)
882 {
883 	int i, j;
884 	int c;
885 	int col;
886 	int nc;
887 	int oneflag = (*ifdef1 != '\0') != (*ifdef2 != '\0');
888 
889 	/*
890 	 * When doing #ifdef's, copy down to current line
891 	 * if this is the first file, so that stuff makes it to output.
892 	 */
893 	if (opt == D_IFDEF && oldfile) {
894 		long curpos = ftell(lb);
895 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
896 		nc = f[a > b ? b : a - 1] - curpos;
897 		for (i = 0; i < nc; i++)
898 			putchar(getc(lb));
899 	}
900 	if (a > b)
901 		return;
902 	if (opt == D_IFDEF) {
903 		if (inifdef)
904 			fprintf(stdout, "#else /* %s%s */\n", oneflag && oldfile == 1 ? "!" : "", ifdef2);
905 		else {
906 			if (oneflag) {
907 				/* There was only one ifdef given */
908 				endifname = ifdef2;
909 				if (oldfile)
910 					fprintf(stdout, "#ifndef %s\n", endifname);
911 				else
912 					fprintf(stdout, "#ifdef %s\n", endifname);
913 			} else {
914 				endifname = oldfile ? ifdef1 : ifdef2;
915 				fprintf(stdout, "#ifdef %s\n", endifname);
916 			}
917 		}
918 		inifdef = 1 + oldfile;
919 	}
920 	for (i = a; i <= b; i++) {
921 		fseek(lb, f[i - 1], 0);
922 		nc = f[i] - f[i - 1];
923 		if (opt != D_IFDEF)
924 			prints(s);
925 		col = 0;
926 		for (j = 0; j < nc; j++) {
927 			c = getc(lb);
928 			if (c == '\t' && tflag)
929 				do
930 					putchar(' ');
931 				while (++col & 7);
932 			else {
933 				putchar(c);
934 				col++;
935 			}
936 		}
937 	}
938 
939 	if (inifdef && !wantelses) {
940 		fprintf(stdout, "#endif /* %s */\n", endifname);
941 		inifdef = 0;
942 	}
943 }
944 
945 #define POW2			/* define only if HALFLONG is 2**n */
946 #define HALFLONG 16
947 #define low(x)	(x&((1L<<HALFLONG)-1))
948 #define high(x)	(x>>HALFLONG)
949 
950 /*
951  * hashing has the effect of
952  * arranging line in 7-bit bytes and then
953  * summing 1-s complement in 16-bit hunks
954  */
955 static int
956 readhash(FILE *f)
957 {
958 	long sum;
959 	unsigned shift;
960 	int t;
961 	int space;
962 
963 	sum = 1;
964 	space = 0;
965 	if (!bflag && !wflag) {
966 		if (iflag)
967 			for (shift = 0; (t = getc(f)) != '\n'; shift += 7) {
968 				if (t == -1)
969 					return (0);
970 				sum += (long)chrtran[t] << (shift
971 #ifdef POW2
972 				    &= HALFLONG - 1);
973 #else
974 				    %= HALFLONG);
975 #endif
976 			}
977 		else
978 			for (shift = 0; (t = getc(f)) != '\n'; shift += 7) {
979 				if (t == -1)
980 					return (0);
981 				sum += (long)t << (shift
982 #ifdef POW2
983 				    &= HALFLONG - 1);
984 #else
985 				    %= HALFLONG);
986 #endif
987 			}
988 	} else {
989 		for (shift = 0;;) {
990 			switch (t = getc(f)) {
991 			case -1:
992 				return (0);
993 			case '\t':
994 			case ' ':
995 				space++;
996 				continue;
997 			default:
998 				if (space && !wflag) {
999 					shift += 7;
1000 					space = 0;
1001 				}
1002 				sum += (long)chrtran[t] << (shift
1003 #ifdef POW2
1004 				    &= HALFLONG - 1);
1005 #else
1006 				    %= HALFLONG);
1007 #endif
1008 				shift += 7;
1009 				continue;
1010 			case '\n':
1011 				break;
1012 			}
1013 			break;
1014 		}
1015 	}
1016 	sum = low(sum) + high(sum);
1017 	return ((short) low(sum) + (short) high(sum));
1018 }
1019 
1020 static int
1021 asciifile(FILE *f)
1022 {
1023 	char buf[BUFSIZ];
1024 	int cnt;
1025 	char *cp;
1026 
1027 	fseek(f, 0, 0);
1028 	cnt = fread(buf, 1, BUFSIZ, f);
1029 	cp = buf;
1030 	while (--cnt >= 0)
1031 		if (*cp++ & 0200)
1032 			return (0);
1033 	return (1);
1034 }
1035 
1036 
1037 /* dump accumulated "context" diff changes */
1038 static void
1039 dump_context_vec(void)
1040 {
1041 	int a, b, c, d;
1042 	char ch;
1043 	struct context_vec *cvp = context_vec_start;
1044 	int lowa, upb, lowc, upd;
1045 	int do_output;
1046 
1047 	if (cvp > context_vec_ptr)
1048 		return;
1049 
1050 	b = d = 0;		/* gcc */
1051 	lowa = max(1, cvp->a - context);
1052 	upb = min(len[0], context_vec_ptr->b + context);
1053 	lowc = max(1, cvp->c - context);
1054 	upd = min(len[1], context_vec_ptr->d + context);
1055 
1056 	printf("***************\n*** ");
1057 	range(lowa, upb, ",");
1058 	printf(" ****\n");
1059 
1060 	/*
1061 	 * output changes to the "old" file.  The first loop suppresses
1062 	 * output if there were no changes to the "old" file (we'll see
1063 	 * the "old" lines as context in the "new" list).
1064 	 */
1065 	do_output = 0;
1066 	for (; cvp <= context_vec_ptr; cvp++)
1067 		if (cvp->a <= cvp->b) {
1068 			cvp = context_vec_start;
1069 			do_output++;
1070 			break;
1071 		}
1072 	if (do_output) {
1073 		while (cvp <= context_vec_ptr) {
1074 			a = cvp->a;
1075 			b = cvp->b;
1076 			c = cvp->c;
1077 			d = cvp->d;
1078 
1079 			if (a <= b && c <= d)
1080 				ch = 'c';
1081 			else
1082 				ch = (a <= b) ? 'd' : 'a';
1083 
1084 			if (ch == 'a')
1085 				fetch(ixold, lowa, b, input[0], "  ", 0);
1086 			else {
1087 				fetch(ixold, lowa, a - 1, input[0], "  ", 0);
1088 				fetch(ixold, a, b, input[0], ch == 'c' ? "! " : "- ", 0);
1089 			}
1090 			lowa = b + 1;
1091 			cvp++;
1092 		}
1093 		fetch(ixold, b + 1, upb, input[0], "  ", 0);
1094 	}
1095 	/* output changes to the "new" file */
1096 	printf("--- ");
1097 	range(lowc, upd, ",");
1098 	printf(" ----\n");
1099 
1100 	do_output = 0;
1101 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1102 		if (cvp->c <= cvp->d) {
1103 			cvp = context_vec_start;
1104 			do_output++;
1105 			break;
1106 		}
1107 	if (do_output) {
1108 		while (cvp <= context_vec_ptr) {
1109 			a = cvp->a;
1110 			b = cvp->b;
1111 			c = cvp->c;
1112 			d = cvp->d;
1113 
1114 			if (a <= b && c <= d)
1115 				ch = 'c';
1116 			else
1117 				ch = (a <= b) ? 'd' : 'a';
1118 
1119 			if (ch == 'd')
1120 				fetch(ixnew, lowc, d, input[1], "  ", 0);
1121 			else {
1122 				fetch(ixnew, lowc, c - 1, input[1], "  ", 0);
1123 				fetch(ixnew, c, d, input[1], ch == 'c' ? "! " : "+ ", 0);
1124 			}
1125 			lowc = d + 1;
1126 			cvp++;
1127 		}
1128 		fetch(ixnew, d + 1, upd, input[1], "  ", 0);
1129 	}
1130 	context_vec_ptr = context_vec_start - 1;
1131 }
1132