xref: /openbsd-src/usr.bin/diff/diffreg.c (revision 49dffe13d7fe900b7adc58a2f8d6809cffbe02d7)
1 /*	$OpenBSD: diffreg.c,v 1.16 2003/06/25 21:43:49 millert Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 
37 #include <sys/types.h>
38 
39 #include <stdlib.h>
40 #include <unistd.h>
41 #include <fcntl.h>
42 #include <string.h>
43 
44 #include "diff.h"
45 #include "pathnames.h"
46 
47 #if 0
48 static char const sccsid[] = "@(#)diffreg.c 4.21 4/6/90";
49 #endif
50 
51 /*
52  * diff - compare two files.
53  */
54 
55 /*
56  *	Uses an algorithm due to Harold Stone, which finds
57  *	a pair of longest identical subsequences in the two
58  *	files.
59  *
60  *	The major goal is to generate the match vector J.
61  *	J[i] is the index of the line in file1 corresponding
62  *	to line i file0. J[i] = 0 if there is no
63  *	such line in file1.
64  *
65  *	Lines are hashed so as to work in core. All potential
66  *	matches are located by sorting the lines of each file
67  *	on the hash (called ``value''). In particular, this
68  *	collects the equivalence classes in file1 together.
69  *	Subroutine equiv replaces the value of each line in
70  *	file0 by the index of the first element of its
71  *	matching equivalence in (the reordered) file1.
72  *	To save space equiv squeezes file1 into a single
73  *	array member in which the equivalence classes
74  *	are simply concatenated, except that their first
75  *	members are flagged by changing sign.
76  *
77  *	Next the indices that point into member are unsorted into
78  *	array class according to the original order of file0.
79  *
80  *	The cleverness lies in routine stone. This marches
81  *	through the lines of file0, developing a vector klist
82  *	of "k-candidates". At step i a k-candidate is a matched
83  *	pair of lines x,y (x in file0 y in file1) such that
84  *	there is a common subsequence of length k
85  *	between the first i lines of file0 and the first y
86  *	lines of file1, but there is no such subsequence for
87  *	any smaller y. x is the earliest possible mate to y
88  *	that occurs in such a subsequence.
89  *
90  *	Whenever any of the members of the equivalence class of
91  *	lines in file1 matable to a line in file0 has serial number
92  *	less than the y of some k-candidate, that k-candidate
93  *	with the smallest such y is replaced. The new
94  *	k-candidate is chained (via pred) to the current
95  *	k-1 candidate so that the actual subsequence can
96  *	be recovered. When a member has serial number greater
97  *	that the y of all k-candidates, the klist is extended.
98  *	At the end, the longest subsequence is pulled out
99  *	and placed in the array J by unravel
100  *
101  *	With J in hand, the matches there recorded are
102  *	check'ed against reality to assure that no spurious
103  *	matches have crept in due to hashing. If they have,
104  *	they are broken, and "jackpot" is recorded--a harmless
105  *	matter except that a true match for a spuriously
106  *	mated line may now be unnecessarily reported as a change.
107  *
108  *	Much of the complexity of the program comes simply
109  *	from trying to minimize core utilization and
110  *	maximize the range of doable problems by dynamically
111  *	allocating what is needed and reusing what is not.
112  *	The core requirements for problems larger than somewhat
113  *	are (in words) 2*length(file0) + length(file1) +
114  *	3*(number of k-candidates installed),  typically about
115  *	6n words for files of length n.
116  */
117 
118 #define	prints(s)	fputs(s,stdout)
119 
120 FILE *input[2];
121 
122 struct cand {
123 	int x;
124 	int y;
125 	int pred;
126 } cand;
127 
128 struct line {
129 	int serial;
130 	int value;
131 } *file[2];
132 
133 int len[2];
134 struct line *sfile[2];	/* shortened by pruning common prefix and suffix */
135 int slen[2];
136 int pref, suff;			/* length of prefix and suffix */
137 int *class;			/* will be overlaid on file[0] */
138 int *member;			/* will be overlaid on file[1] */
139 int *klist;			/* will be overlaid on file[0] after class */
140 struct cand *clist;		/* merely a free storage pot for candidates */
141 int clen = 0;
142 int *J;				/* will be overlaid on class */
143 long *ixold;			/* will be overlaid on klist */
144 long *ixnew;			/* will be overlaid on file[1] */
145 u_char *chrtran;		/* translation table for case-folding */
146 
147 static void fetch(long *, int, int, FILE *, char *, int);
148 static void output(void);
149 static void check(void);
150 static void range(int, int, char *);
151 static void dump_context_vec(void);
152 static void prepare(int, FILE *);
153 static void prune(void);
154 static void equiv(struct line *, int, struct line *, int, int *);
155 static void unravel(int);
156 static void unsort(struct line *, int, int *);
157 static void change(int, int, int, int);
158 static void sort(struct line *, int);
159 static int newcand(int, int, int);
160 static int search(int *, int, int);
161 static int skipline(int);
162 static int asciifile(FILE *);
163 static int stone(int *, int, int *, int *);
164 static int readhash(FILE *);
165 
166 /*
167  * chrtran points to one of 2 translation tables: cup2low if folding upper to
168  * lower case clow2low if not folding case
169  */
170 u_char clow2low[256] = {
171 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
172 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
173 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
174 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
175 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
176 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
177 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
178 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
179 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
180 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
181 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
182 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
183 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
184 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
185 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
186 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
187 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
188 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
189 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
190 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
191 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
192 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
193 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
194 	0xfd, 0xfe, 0xff
195 };
196 
197 u_char cup2low[256] = {
198 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
199 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
200 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
201 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
202 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
203 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
204 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
205 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
206 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
207 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
208 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
209 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
210 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
211 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
212 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
213 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
214 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
215 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
216 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
217 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
218 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
219 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
220 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
221 	0xfd, 0xfe, 0xff
222 };
223 
224 void
225 diffreg(void)
226 {
227 	char buf1[BUFSIZ], buf2[BUFSIZ];
228 	FILE *f1, *f2;
229 	int i, j;
230 
231 	if (hflag) {
232 		diffargv[0] = "diffh";
233 		execv(diffh, diffargv);
234 		warn("%s", diffh);
235 		done(0);
236 	}
237 	chrtran = (iflag ? cup2low : clow2low);
238 	if ((stb1.st_mode & S_IFMT) == S_IFDIR) {
239 		file1 = splice(file1, file2);
240 		if (stat(file1, &stb1) < 0) {
241 			warn("%s", file1);
242 			done(0);
243 		}
244 	} else if ((stb2.st_mode & S_IFMT) == S_IFDIR) {
245 		file2 = splice(file2, file1);
246 		if (stat(file2, &stb2) < 0) {
247 			warn("%s", file2);
248 			done(0);
249 		}
250 	} else if ((stb1.st_mode & S_IFMT) != S_IFREG || !strcmp(file1, "-")) {
251 		if (!strcmp(file2, "-")) {
252 			warnx("can't specify - -");
253 			done(0);
254 		}
255 		file1 = copytemp();
256 		if (stat(file1, &stb1) < 0) {
257 			warn("%s", file1);
258 			done(0);
259 		}
260 	} else if ((stb2.st_mode & S_IFMT) != S_IFREG || !strcmp(file2, "-")) {
261 		file2 = copytemp();
262 		if (stat(file2, &stb2) < 0) {
263 			warn("%s", file2);
264 			done(0);
265 		}
266 	}
267 	if ((f1 = fopen(file1, "r")) == NULL) {
268 		warn("%s", file1);
269 		done(0);
270 	}
271 	if ((f2 = fopen(file2, "r")) == NULL) {
272 		warn("%s", file2);
273 		fclose(f1);
274 		done(0);
275 	}
276 	if (stb1.st_size != stb2.st_size)
277 		goto notsame;
278 	for (;;) {
279 		i = fread(buf1, 1, BUFSIZ, f1);
280 		j = fread(buf2, 1, BUFSIZ, f2);
281 		if (i < 0 || j < 0 || i != j)
282 			goto notsame;
283 		if (i == 0 && j == 0) {
284 			fclose(f1);
285 			fclose(f2);
286 			status = 0;	/* files don't differ */
287 			goto same;
288 		}
289 		for (j = 0; j < i; j++)
290 			if (buf1[j] != buf2[j])
291 				goto notsame;
292 	}
293 notsame:
294 	/*
295 	 * Files certainly differ at this point; set status accordingly
296 	 */
297 	status = 1;
298 	if (!asciifile(f1) || !asciifile(f2)) {
299 		printf("Binary files %s and %s differ\n", file1, file2);
300 		fclose(f1);
301 		fclose(f2);
302 		done(0);
303 	}
304 	prepare(0, f1);
305 	prepare(1, f2);
306 	fclose(f1);
307 	fclose(f2);
308 	prune();
309 	sort(sfile[0], slen[0]);
310 	sort(sfile[1], slen[1]);
311 
312 	member = (int *)file[1];
313 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
314 	member = erealloc(member, (slen[1] + 2) * sizeof(int));
315 
316 	class = (int *)file[0];
317 	unsort(sfile[0], slen[0], class);
318 	class = erealloc(class, (slen[0] + 2) * sizeof(int));
319 
320 	klist = emalloc((slen[0] + 2) * sizeof(int));
321 	clist = emalloc(sizeof(cand));
322 	i = stone(class, slen[0], member, klist);
323 	free(member);
324 	free(class);
325 
326 	J = emalloc((len[0] + 2) * sizeof(int));
327 	unravel(klist[i]);
328 	free(clist);
329 	free(klist);
330 
331 	ixold = emalloc((len[0] + 2) * sizeof(long));
332 	ixnew = emalloc((len[1] + 2) * sizeof(long));
333 	check();
334 	output();
335 	status = anychange;
336 same:
337 	if (opt == D_CONTEXT && anychange == 0)
338 		printf("No differences encountered\n");
339 	done(0);
340 }
341 
342 char *tempfile = _PATH_TMP;
343 
344 char *
345 copytemp(void)
346 {
347 	char buf[BUFSIZ];
348 	int i, f;
349 
350 	signal(SIGHUP, done);
351 	signal(SIGINT, done);
352 	signal(SIGPIPE, done);
353 	signal(SIGTERM, done);
354 	f = mkstemp(tempfile);
355 	if (f < 0) {
356 		warn("%s", tempfile);
357 		done(0);
358 	}
359 	while ((i = read(0, buf, BUFSIZ)) > 0)
360 		if (write(f, buf, i) != i) {
361 			warn("%s", tempfile);
362 			done(0);
363 		}
364 	close(f);
365 	return (tempfile);
366 }
367 
368 char *
369 splice(char *dir, char *file)
370 {
371 	char *tail, *buf;
372 	size_t len;
373 
374 	if (!strcmp(file, "-")) {
375 		warnx("can't specify - with other arg directory");
376 		done(0);
377 	}
378 	tail = strrchr(file, '/');
379 	if (tail == NULL)
380 		tail = file;
381 	else
382 		tail++;
383 	len = strlen(dir) + strlen(tail) + 1;
384 	buf = emalloc(len);
385 	snprintf(buf, len, "%s/%s", dir, tail);
386 	return (buf);
387 }
388 
389 static void
390 prepare(int i, FILE *fd)
391 {
392 	struct line *p;
393 	int j, h;
394 
395 	fseek(fd, 0L, SEEK_SET);
396 	p = emalloc(3 * sizeof(struct line));
397 	for (j = 0; (h = readhash(fd));) {
398 		p = erealloc(p, (++j + 3) * sizeof(struct line));
399 		p[j].value = h;
400 	}
401 	len[i] = j;
402 	file[i] = p;
403 }
404 
405 static void
406 prune(void)
407 {
408 	int i, j;
409 
410 	for (pref = 0; pref < len[0] && pref < len[1] &&
411 	    file[0][pref + 1].value == file[1][pref + 1].value;
412 	    pref++)
413 		;
414 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
415 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
416 	    suff++)
417 		;
418 	for (j = 0; j < 2; j++) {
419 		sfile[j] = file[j] + pref;
420 		slen[j] = len[j] - pref - suff;
421 		for (i = 0; i <= slen[j]; i++)
422 			sfile[j][i].serial = i;
423 	}
424 }
425 
426 static void
427 equiv(struct line *a, int n, struct line *b, int m, int *c)
428 {
429 	int i, j;
430 
431 	i = j = 1;
432 	while (i <= n && j <= m) {
433 		if (a[i].value < b[j].value)
434 			a[i++].value = 0;
435 		else if (a[i].value == b[j].value)
436 			a[i++].value = j;
437 		else
438 			j++;
439 	}
440 	while (i <= n)
441 		a[i++].value = 0;
442 	b[m + 1].value = 0;
443 	j = 0;
444 	while (++j <= m) {
445 		c[j] = -b[j].serial;
446 		while (b[j + 1].value == b[j].value) {
447 			j++;
448 			c[j] = b[j].serial;
449 		}
450 	}
451 	c[j] = -1;
452 }
453 
454 static int
455 stone(int *a, int n, int *b, int *c)
456 {
457 	int i, k, y, j, l;
458 	int oldc, tc, oldl;
459 
460 	k = 0;
461 	c[0] = newcand(0, 0, 0);
462 	for (i = 1; i <= n; i++) {
463 		j = a[i];
464 		if (j == 0)
465 			continue;
466 		y = -b[j];
467 		oldl = 0;
468 		oldc = c[0];
469 		do {
470 			if (y <= clist[oldc].y)
471 				continue;
472 			l = search(c, k, y);
473 			if (l != oldl + 1)
474 				oldc = c[l - 1];
475 			if (l <= k) {
476 				if (clist[c[l]].y <= y)
477 					continue;
478 				tc = c[l];
479 				c[l] = newcand(i, y, oldc);
480 				oldc = tc;
481 				oldl = l;
482 			} else {
483 				c[l] = newcand(i, y, oldc);
484 				k++;
485 				break;
486 			}
487 		} while ((y = b[++j]) > 0);
488 	}
489 	return (k);
490 }
491 
492 static int
493 newcand(int x, int y, int pred)
494 {
495 	struct cand *q;
496 
497 	clist = erealloc(clist, ++clen * sizeof(cand));
498 	q = clist + clen - 1;
499 	q->x = x;
500 	q->y = y;
501 	q->pred = pred;
502 	return (clen - 1);
503 }
504 
505 static int
506 search(int *c, int k, int y)
507 {
508 	int i, j, l, t;
509 
510 	if (clist[c[k]].y < y)	/* quick look for typical case */
511 		return (k + 1);
512 	i = 0;
513 	j = k + 1;
514 	while (1) {
515 		l = i + j;
516 		if ((l >>= 1) <= i)
517 			break;
518 		t = clist[c[l]].y;
519 		if (t > y)
520 			j = l;
521 		else if (t < y)
522 			i = l;
523 		else
524 			return (l);
525 	}
526 	return (l + 1);
527 }
528 
529 static void
530 unravel(int p)
531 {
532 	struct cand *q;
533 	int i;
534 
535 	for (i = 0; i <= len[0]; i++)
536 		J[i] = i <= pref ? i :
537 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
538 	for (q = clist + p; q->y != 0; q = clist + q->pred)
539 		J[q->x + pref] = q->y + pref;
540 }
541 
542 /*
543  * Check does double duty:
544  *  1.	ferret out any fortuitous correspondences due
545  *	to confounding by hashing (which result in "jackpot")
546  *  2.  collect random access indexes to the two files
547  */
548 static void
549 check(void)
550 {
551 	int i, j, jackpot, c, d;
552 	long ctold, ctnew;
553 
554 	if ((input[0] = fopen(file1, "r")) == NULL) {
555 		perror(file1);
556 		done(0);
557 	}
558 	if ((input[1] = fopen(file2, "r")) == NULL) {
559 		perror(file2);
560 		done(0);
561 	}
562 	j = 1;
563 	ixold[0] = ixnew[0] = 0;
564 	jackpot = 0;
565 	ctold = ctnew = 0;
566 	for (i = 1; i <= len[0]; i++) {
567 		if (J[i] == 0) {
568 			ixold[i] = ctold += skipline(0);
569 			continue;
570 		}
571 		while (j < J[i]) {
572 			ixnew[j] = ctnew += skipline(1);
573 			j++;
574 		}
575 		if (bflag || wflag || iflag) {
576 			for (;;) {
577 				c = getc(input[0]);
578 				d = getc(input[1]);
579 				ctold++;
580 				ctnew++;
581 				if (bflag && isspace(c) && isspace(d)) {
582 					do {
583 						if (c == '\n')
584 							break;
585 						ctold++;
586 					} while (isspace(c = getc(input[0])));
587 					do {
588 						if (d == '\n')
589 							break;
590 						ctnew++;
591 					} while (isspace(d = getc(input[1])));
592 				} else if (wflag) {
593 					while (isspace(c) && c != '\n') {
594 						c = getc(input[0]);
595 						ctold++;
596 					}
597 					while (isspace(d) && d != '\n') {
598 						d = getc(input[1]);
599 						ctnew++;
600 					}
601 				}
602 				if (chrtran[c] != chrtran[d]) {
603 					jackpot++;
604 					J[i] = 0;
605 					if (c != '\n')
606 						ctold += skipline(0);
607 					if (d != '\n')
608 						ctnew += skipline(1);
609 					break;
610 				}
611 				if (c == '\n')
612 					break;
613 			}
614 		} else {
615 			for (;;) {
616 				ctold++;
617 				ctnew++;
618 				if ((c = getc(input[0])) != (d = getc(input[1]))) {
619 					/* jackpot++; */
620 					J[i] = 0;
621 					if (c != '\n')
622 						ctold += skipline(0);
623 					if (d != '\n')
624 						ctnew += skipline(1);
625 					break;
626 				}
627 				if (c == '\n')
628 					break;
629 			}
630 		}
631 		ixold[i] = ctold;
632 		ixnew[j] = ctnew;
633 		j++;
634 	}
635 	for (; j <= len[1]; j++) {
636 		ixnew[j] = ctnew += skipline(1);
637 	}
638 	fclose(input[0]);
639 	fclose(input[1]);
640 	/*
641 	 * if (jackpot)
642 	 *	fprintf(stderr, "jackpot\n");
643 	 */
644 }
645 
646 /* shellsort CACM #201 */
647 static void
648 sort(struct line *a, int n)
649 {
650 	struct line *ai, *aim, w;
651 	int j, m = 0, k;
652 
653 	if (n == 0)
654 		return;
655 	for (j = 1; j <= n; j *= 2)
656 		m = 2 * j - 1;
657 	for (m /= 2; m != 0; m /= 2) {
658 		k = n - m;
659 		for (j = 1; j <= k; j++) {
660 			for (ai = &a[j]; ai > a; ai -= m) {
661 				aim = &ai[m];
662 				if (aim < ai)
663 					break;	/* wraparound */
664 				if (aim->value > ai[0].value ||
665 				    (aim->value == ai[0].value &&
666 					aim->serial > ai[0].serial))
667 					break;
668 				w.value = ai[0].value;
669 				ai[0].value = aim->value;
670 				aim->value = w.value;
671 				w.serial = ai[0].serial;
672 				ai[0].serial = aim->serial;
673 				aim->serial = w.serial;
674 			}
675 		}
676 	}
677 }
678 
679 static void
680 unsort(struct line *f, int l, int *b)
681 {
682 	int *a, i;
683 
684 	a = emalloc((l + 1) * sizeof(int));
685 	for (i = 1; i <= l; i++)
686 		a[f[i].serial] = f[i].value;
687 	for (i = 1; i <= l; i++)
688 		b[i] = a[i];
689 	free(a);
690 }
691 
692 static int
693 skipline(int f)
694 {
695 	int i, c;
696 
697 	for (i = 1; (c = getc(input[f])) != '\n'; i++)
698 		if (c < 0)
699 			return (i);
700 	return (i);
701 }
702 
703 static void
704 output(void)
705 {
706 	int m, i0, i1, j0, j1;
707 
708 	input[0] = fopen(file1, "r");
709 	input[1] = fopen(file2, "r");
710 	m = len[0];
711 	J[0] = 0;
712 	J[m + 1] = len[1] + 1;
713 	if (opt != D_EDIT) {
714 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
715 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
716 				i0++;
717 			j0 = J[i0 - 1] + 1;
718 			i1 = i0 - 1;
719 			while (i1 < m && J[i1 + 1] == 0)
720 				i1++;
721 			j1 = J[i1 + 1] - 1;
722 			J[i1] = j1;
723 			change(i0, i1, j0, j1);
724 		}
725 	} else {
726 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
727 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
728 				i0--;
729 			j0 = J[i0 + 1] - 1;
730 			i1 = i0 + 1;
731 			while (i1 > 1 && J[i1 - 1] == 0)
732 				i1--;
733 			j1 = J[i1 - 1] + 1;
734 			J[i1] = j1;
735 			change(i1, i0, j1, j0);
736 		}
737 	}
738 	if (m == 0)
739 		change(1, 0, 1, len[1]);
740 	if (opt == D_IFDEF) {
741 		for (;;) {
742 #define	c i0
743 			c = getc(input[0]);
744 			if (c < 0)
745 				return;
746 			putchar(c);
747 		}
748 #undef c
749 	}
750 	if (anychange && opt == D_CONTEXT)
751 		dump_context_vec();
752 }
753 
754 /*
755  * The following struct is used to record change information when
756  * doing a "context" diff.  (see routine "change" to understand the
757  * highly mneumonic field names)
758  */
759 struct context_vec {
760 	int a;			/* start line in old file */
761 	int b;			/* end line in old file */
762 	int c;			/* start line in new file */
763 	int d;			/* end line in new file */
764 };
765 
766 struct context_vec *context_vec_start, *context_vec_end, *context_vec_ptr;
767 
768 #define	MAX_CONTEXT	128
769 
770 /*
771  * indicate that there is a difference between lines a and b of the from file
772  * to get to lines c to d of the to file. If a is greater then b then there
773  * are no lines in the from file involved and this means that there were
774  * lines appended (beginning at b). If c is greater than d then there are
775  * lines missing from the to file.
776  */
777 static void
778 change(int a, int b, int c, int d)
779 {
780 	struct stat stbuf;
781 
782 	if (opt != D_IFDEF && a > b && c > d)
783 		return;
784 	if (anychange == 0) {
785 		anychange = 1;
786 		if (opt == D_CONTEXT) {
787 			printf("*** %s	", file1);
788 			stat(file1, &stbuf);
789 			printf("%s--- %s	",
790 			    ctime(&stbuf.st_mtime), file2);
791 			stat(file2, &stbuf);
792 			printf("%s", ctime(&stbuf.st_mtime));
793 
794 			context_vec_start = emalloc(MAX_CONTEXT *
795 			    sizeof(struct context_vec));
796 			context_vec_end = context_vec_start + MAX_CONTEXT;
797 			context_vec_ptr = context_vec_start - 1;
798 		}
799 	}
800 	if (opt == D_CONTEXT) {
801 		/*
802 		 * if this new change is within 'context' lines of
803 		 * the previous change, just add it to the change
804 		 * record.  If the record is full or if this
805 		 * change is more than 'context' lines from the previous
806 		 * change, dump the record, reset it & add the new change.
807 		 */
808 		if (context_vec_ptr >= context_vec_end ||
809 		    (context_vec_ptr >= context_vec_start &&
810 			a > (context_vec_ptr->b + 2 * context) &&
811 			c > (context_vec_ptr->d + 2 * context)))
812 			dump_context_vec();
813 
814 		context_vec_ptr++;
815 		context_vec_ptr->a = a;
816 		context_vec_ptr->b = b;
817 		context_vec_ptr->c = c;
818 		context_vec_ptr->d = d;
819 		return;
820 	}
821 	switch (opt) {
822 
823 	case D_NORMAL:
824 	case D_EDIT:
825 		range(a, b, ",");
826 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
827 		if (opt == D_NORMAL)
828 			range(c, d, ",");
829 		putchar('\n');
830 		break;
831 	case D_REVERSE:
832 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
833 		range(a, b, " ");
834 		putchar('\n');
835 		break;
836 	case D_NREVERSE:
837 		if (a > b)
838 			printf("a%d %d\n", b, d - c + 1);
839 		else {
840 			printf("d%d %d\n", a, b - a + 1);
841 			if (!(c > d))
842 				/* add changed lines */
843 				printf("a%d %d\n", b, d - c + 1);
844 		}
845 		break;
846 	}
847 	if (opt == D_NORMAL || opt == D_IFDEF) {
848 		fetch(ixold, a, b, input[0], "< ", 1);
849 		if (a <= b && c <= d && opt == D_NORMAL)
850 			prints("---\n");
851 	}
852 	fetch(ixnew, c, d, input[1], opt == D_NORMAL ? "> " : "", 0);
853 	if ((opt == D_EDIT || opt == D_REVERSE) && c <= d)
854 		prints(".\n");
855 	if (inifdef) {
856 		fprintf(stdout, "#endif /* %s */\n", endifname);
857 		inifdef = 0;
858 	}
859 }
860 
861 static void
862 range(int a, int b, char *separator)
863 {
864 	printf("%d", a > b ? b : a);
865 	if (a < b)
866 		printf("%s%d", separator, b);
867 }
868 
869 static void
870 fetch(long *f, int a, int b, FILE *lb, char *s, int oldfile)
871 {
872 	int oneflag = (*ifdef1 != '\0') != (*ifdef2 != '\0');
873 	int i, j, c, col, nc;
874 
875 	/*
876 	 * When doing #ifdef's, copy down to current line
877 	 * if this is the first file, so that stuff makes it to output.
878 	 */
879 	if (opt == D_IFDEF && oldfile) {
880 		long curpos = ftell(lb);
881 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
882 		nc = f[a > b ? b : a - 1] - curpos;
883 		for (i = 0; i < nc; i++)
884 			putchar(getc(lb));
885 	}
886 	if (a > b)
887 		return;
888 	if (opt == D_IFDEF) {
889 		if (inifdef)
890 			fprintf(stdout, "#else /* %s%s */\n",
891 			    oneflag && oldfile == 1 ? "!" : "", ifdef2);
892 		else {
893 			if (oneflag) {
894 				/* There was only one ifdef given */
895 				endifname = ifdef2;
896 				if (oldfile)
897 					fprintf(stdout, "#ifndef %s\n", endifname);
898 				else
899 					fprintf(stdout, "#ifdef %s\n", endifname);
900 			} else {
901 				endifname = oldfile ? ifdef1 : ifdef2;
902 				fprintf(stdout, "#ifdef %s\n", endifname);
903 			}
904 		}
905 		inifdef = 1 + oldfile;
906 	}
907 	for (i = a; i <= b; i++) {
908 		fseek(lb, f[i - 1], SEEK_SET);
909 		nc = f[i] - f[i - 1];
910 		if (opt != D_IFDEF)
911 			prints(s);
912 		col = 0;
913 		for (j = 0; j < nc; j++) {
914 			c = getc(lb);
915 			if (c == '\t' && tflag)
916 				do
917 					putchar(' ');
918 				while (++col & 7);
919 			else {
920 				putchar(c);
921 				col++;
922 			}
923 		}
924 	}
925 
926 	if (inifdef && !wantelses) {
927 		fprintf(stdout, "#endif /* %s */\n", endifname);
928 		inifdef = 0;
929 	}
930 }
931 
932 #define POW2			/* define only if HALFLONG is 2**n */
933 #define HALFLONG 16
934 #define low(x)	(x&((1L<<HALFLONG)-1))
935 #define high(x)	(x>>HALFLONG)
936 
937 /*
938  * hashing has the effect of
939  * arranging line in 7-bit bytes and then
940  * summing 1-s complement in 16-bit hunks
941  */
942 static int
943 readhash(FILE *f)
944 {
945 	unsigned int shift;
946 	int t, space;
947 	long sum;
948 
949 	sum = 1;
950 	space = 0;
951 	if (!bflag && !wflag) {
952 		if (iflag)
953 			for (shift = 0; (t = getc(f)) != '\n'; shift += 7) {
954 				if (t == -1)
955 					return (0);
956 				sum += (long)chrtran[t] << (shift
957 #ifdef POW2
958 				    &= HALFLONG - 1);
959 #else
960 				    %= HALFLONG);
961 #endif
962 			}
963 		else
964 			for (shift = 0; (t = getc(f)) != '\n'; shift += 7) {
965 				if (t == -1)
966 					return (0);
967 				sum += (long)t << (shift
968 #ifdef POW2
969 				    &= HALFLONG - 1);
970 #else
971 				    %= HALFLONG);
972 #endif
973 			}
974 	} else {
975 		for (shift = 0;;) {
976 			switch (t = getc(f)) {
977 			case -1:
978 				return (0);
979 			case '\t':
980 			case ' ':
981 				space++;
982 				continue;
983 			default:
984 				if (space && !wflag) {
985 					shift += 7;
986 					space = 0;
987 				}
988 				sum += (long)chrtran[t] << (shift
989 #ifdef POW2
990 				    &= HALFLONG - 1);
991 #else
992 				    %= HALFLONG);
993 #endif
994 				shift += 7;
995 				continue;
996 			case '\n':
997 				break;
998 			}
999 			break;
1000 		}
1001 	}
1002 	sum = low(sum) + high(sum);
1003 	return ((short) low(sum) + (short) high(sum));
1004 }
1005 
1006 static int
1007 asciifile(FILE *f)
1008 {
1009 	char buf[BUFSIZ], *cp;
1010 	int cnt;
1011 
1012 	fseek(f, 0L, SEEK_SET);
1013 	cnt = fread(buf, 1, BUFSIZ, f);
1014 	cp = buf;
1015 	while (--cnt >= 0)
1016 		if (*cp++ & 0200)
1017 			return (0);
1018 	return (1);
1019 }
1020 
1021 /* dump accumulated "context" diff changes */
1022 static void
1023 dump_context_vec(void)
1024 {
1025 	struct context_vec *cvp = context_vec_start;
1026 	int lowa, upb, lowc, upd, do_output;
1027 	int a, b, c, d;
1028 	char ch;
1029 
1030 	if (cvp > context_vec_ptr)
1031 		return;
1032 
1033 	b = d = 0;		/* gcc */
1034 	lowa = max(1, cvp->a - context);
1035 	upb = min(len[0], context_vec_ptr->b + context);
1036 	lowc = max(1, cvp->c - context);
1037 	upd = min(len[1], context_vec_ptr->d + context);
1038 
1039 	printf("***************\n*** ");
1040 	range(lowa, upb, ",");
1041 	printf(" ****\n");
1042 
1043 	/*
1044 	 * output changes to the "old" file.  The first loop suppresses
1045 	 * output if there were no changes to the "old" file (we'll see
1046 	 * the "old" lines as context in the "new" list).
1047 	 */
1048 	do_output = 0;
1049 	for (; cvp <= context_vec_ptr; cvp++)
1050 		if (cvp->a <= cvp->b) {
1051 			cvp = context_vec_start;
1052 			do_output++;
1053 			break;
1054 		}
1055 	if (do_output) {
1056 		while (cvp <= context_vec_ptr) {
1057 			a = cvp->a;
1058 			b = cvp->b;
1059 			c = cvp->c;
1060 			d = cvp->d;
1061 
1062 			if (a <= b && c <= d)
1063 				ch = 'c';
1064 			else
1065 				ch = (a <= b) ? 'd' : 'a';
1066 
1067 			if (ch == 'a')
1068 				fetch(ixold, lowa, b, input[0], "  ", 0);
1069 			else {
1070 				fetch(ixold, lowa, a - 1, input[0], "  ", 0);
1071 				fetch(ixold, a, b, input[0],
1072 				    ch == 'c' ? "! " : "- ", 0);
1073 			}
1074 			lowa = b + 1;
1075 			cvp++;
1076 		}
1077 		fetch(ixold, b + 1, upb, input[0], "  ", 0);
1078 	}
1079 	/* output changes to the "new" file */
1080 	printf("--- ");
1081 	range(lowc, upd, ",");
1082 	printf(" ----\n");
1083 
1084 	do_output = 0;
1085 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1086 		if (cvp->c <= cvp->d) {
1087 			cvp = context_vec_start;
1088 			do_output++;
1089 			break;
1090 		}
1091 	if (do_output) {
1092 		while (cvp <= context_vec_ptr) {
1093 			a = cvp->a;
1094 			b = cvp->b;
1095 			c = cvp->c;
1096 			d = cvp->d;
1097 
1098 			if (a <= b && c <= d)
1099 				ch = 'c';
1100 			else
1101 				ch = (a <= b) ? 'd' : 'a';
1102 
1103 			if (ch == 'd')
1104 				fetch(ixnew, lowc, d, input[1], "  ", 0);
1105 			else {
1106 				fetch(ixnew, lowc, c - 1, input[1], "  ", 0);
1107 				fetch(ixnew, c, d, input[1],
1108 				    ch == 'c' ? "! " : "+ ", 0);
1109 			}
1110 			lowc = d + 1;
1111 			cvp++;
1112 		}
1113 		fetch(ixnew, d + 1, upd, input[1], "  ", 0);
1114 	}
1115 	context_vec_ptr = context_vec_start - 1;
1116 }
1117