xref: /openbsd-src/usr.bin/diff/diffreg.c (revision c42aed393163dc59fed20be357abb1e4a21a3996)
1 /*	$OpenBSD: diffreg.c,v 1.15 2003/06/25 17:49:22 millert Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 
37 #include <sys/types.h>
38 
39 #include <stdlib.h>
40 #include <unistd.h>
41 #include <fcntl.h>
42 #include <string.h>
43 
44 #include "diff.h"
45 #include "pathnames.h"
46 
47 #if 0
48 static char const sccsid[] = "@(#)diffreg.c 4.21 4/6/90";
49 #endif
50 
51 /*
52  * diff - compare two files.
53  */
54 
55 /*
56  *	Uses an algorithm due to Harold Stone, which finds
57  *	a pair of longest identical subsequences in the two
58  *	files.
59  *
60  *	The major goal is to generate the match vector J.
61  *	J[i] is the index of the line in file1 corresponding
62  *	to line i file0. J[i] = 0 if there is no
63  *	such line in file1.
64  *
65  *	Lines are hashed so as to work in core. All potential
66  *	matches are located by sorting the lines of each file
67  *	on the hash (called ``value''). In particular, this
68  *	collects the equivalence classes in file1 together.
69  *	Subroutine equiv replaces the value of each line in
70  *	file0 by the index of the first element of its
71  *	matching equivalence in (the reordered) file1.
72  *	To save space equiv squeezes file1 into a single
73  *	array member in which the equivalence classes
74  *	are simply concatenated, except that their first
75  *	members are flagged by changing sign.
76  *
77  *	Next the indices that point into member are unsorted into
78  *	array class according to the original order of file0.
79  *
80  *	The cleverness lies in routine stone. This marches
81  *	through the lines of file0, developing a vector klist
82  *	of "k-candidates". At step i a k-candidate is a matched
83  *	pair of lines x,y (x in file0 y in file1) such that
84  *	there is a common subsequence of length k
85  *	between the first i lines of file0 and the first y
86  *	lines of file1, but there is no such subsequence for
87  *	any smaller y. x is the earliest possible mate to y
88  *	that occurs in such a subsequence.
89  *
90  *	Whenever any of the members of the equivalence class of
91  *	lines in file1 matable to a line in file0 has serial number
92  *	less than the y of some k-candidate, that k-candidate
93  *	with the smallest such y is replaced. The new
94  *	k-candidate is chained (via pred) to the current
95  *	k-1 candidate so that the actual subsequence can
96  *	be recovered. When a member has serial number greater
97  *	that the y of all k-candidates, the klist is extended.
98  *	At the end, the longest subsequence is pulled out
99  *	and placed in the array J by unravel
100  *
101  *	With J in hand, the matches there recorded are
102  *	check'ed against reality to assure that no spurious
103  *	matches have crept in due to hashing. If they have,
104  *	they are broken, and "jackpot" is recorded--a harmless
105  *	matter except that a true match for a spuriously
106  *	mated line may now be unnecessarily reported as a change.
107  *
108  *	Much of the complexity of the program comes simply
109  *	from trying to minimize core utilization and
110  *	maximize the range of doable problems by dynamically
111  *	allocating what is needed and reusing what is not.
112  *	The core requirements for problems larger than somewhat
113  *	are (in words) 2*length(file0) + length(file1) +
114  *	3*(number of k-candidates installed),  typically about
115  *	6n words for files of length n.
116  */
117 
118 #define	prints(s)	fputs(s,stdout)
119 
120 FILE *input[2];
121 
122 struct cand {
123 	int x;
124 	int y;
125 	int pred;
126 } cand;
127 
128 struct line {
129 	int serial;
130 	int value;
131 } *file[2];
132 
133 int len[2];
134 struct line *sfile[2];	/* shortened by pruning common prefix and suffix */
135 int slen[2];
136 int pref, suff;			/* length of prefix and suffix */
137 int *class;			/* will be overlaid on file[0] */
138 int *member;			/* will be overlaid on file[1] */
139 int *klist;			/* will be overlaid on file[0] after class */
140 struct cand *clist;		/* merely a free storage pot for candidates */
141 int clen = 0;
142 int *J;				/* will be overlaid on class */
143 long *ixold;			/* will be overlaid on klist */
144 long *ixnew;			/* will be overlaid on file[1] */
145 u_char *chrtran;		/* translation table for case-folding */
146 
147 static void fetch(long *, int, int, FILE *, char *, int);
148 static void output(void);
149 static void check(void);
150 static void range(int, int, char *);
151 static void dump_context_vec(void);
152 static void prepare(int, FILE *);
153 static void prune(void);
154 static void equiv(struct line *, int, struct line *, int, int *);
155 static void unravel(int);
156 static void unsort(struct line *, int, int *);
157 static void change(int, int, int, int);
158 static void sort(struct line *, int);
159 static int newcand(int, int, int);
160 static int search(int *, int, int);
161 static int skipline(int);
162 static int asciifile(FILE *);
163 static int stone(int *, int, int *, int *);
164 static int readhash(FILE *);
165 
166 /*
167  * chrtran points to one of 2 translation tables: cup2low if folding upper to
168  * lower case clow2low if not folding case
169  */
170 u_char clow2low[256] = {
171 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
172 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
173 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
174 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
175 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
176 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
177 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
178 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
179 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
180 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
181 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
182 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
183 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
184 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
185 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
186 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
187 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
188 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
189 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
190 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
191 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
192 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
193 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
194 	0xfd, 0xfe, 0xff
195 };
196 
197 u_char cup2low[256] = {
198 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
199 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
200 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
201 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
202 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
203 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
204 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
205 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
206 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
207 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
208 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
209 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
210 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
211 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
212 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
213 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
214 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
215 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
216 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
217 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
218 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
219 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
220 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
221 	0xfd, 0xfe, 0xff
222 };
223 
224 void
225 diffreg(void)
226 {
227 	char buf1[BUFSIZ], buf2[BUFSIZ];
228 	FILE *f1, *f2;
229 	int i, j;
230 
231 	if (hflag) {
232 		diffargv[0] = "diffh";
233 		execv(diffh, diffargv);
234 		warn("%s", diffh);
235 		done(0);
236 	}
237 	chrtran = (iflag ? cup2low : clow2low);
238 	if ((stb1.st_mode & S_IFMT) == S_IFDIR) {
239 		file1 = splice(file1, file2);
240 		if (stat(file1, &stb1) < 0) {
241 			warn("%s", file1);
242 			done(0);
243 		}
244 	} else if ((stb2.st_mode & S_IFMT) == S_IFDIR) {
245 		file2 = splice(file2, file1);
246 		if (stat(file2, &stb2) < 0) {
247 			warn("%s", file2);
248 			done(0);
249 		}
250 	} else if ((stb1.st_mode & S_IFMT) != S_IFREG || !strcmp(file1, "-")) {
251 		if (!strcmp(file2, "-")) {
252 			warnx("can't specify - -");
253 			done(0);
254 		}
255 		file1 = copytemp();
256 		if (stat(file1, &stb1) < 0) {
257 			warn("%s", file1);
258 			done(0);
259 		}
260 	} else if ((stb2.st_mode & S_IFMT) != S_IFREG || !strcmp(file2, "-")) {
261 		file2 = copytemp();
262 		if (stat(file2, &stb2) < 0) {
263 			warn("%s", file2);
264 			done(0);
265 		}
266 	}
267 	if ((f1 = fopen(file1, "r")) == NULL) {
268 		warn("%s", file1);
269 		done(0);
270 	}
271 	if ((f2 = fopen(file2, "r")) == NULL) {
272 		warn("%s", file2);
273 		fclose(f1);
274 		done(0);
275 	}
276 	if (stb1.st_size != stb2.st_size)
277 		goto notsame;
278 	for (;;) {
279 		i = fread(buf1, 1, BUFSIZ, f1);
280 		j = fread(buf2, 1, BUFSIZ, f2);
281 		if (i < 0 || j < 0 || i != j)
282 			goto notsame;
283 		if (i == 0 && j == 0) {
284 			fclose(f1);
285 			fclose(f2);
286 			status = 0;	/* files don't differ */
287 			goto same;
288 		}
289 		for (j = 0; j < i; j++)
290 			if (buf1[j] != buf2[j])
291 				goto notsame;
292 	}
293 notsame:
294 	/*
295 	 * Files certainly differ at this point; set status accordingly
296 	 */
297 	status = 1;
298 	if (!asciifile(f1) || !asciifile(f2)) {
299 		printf("Binary files %s and %s differ\n", file1, file2);
300 		fclose(f1);
301 		fclose(f2);
302 		done(0);
303 	}
304 	prepare(0, f1);
305 	prepare(1, f2);
306 	fclose(f1);
307 	fclose(f2);
308 	prune();
309 	sort(sfile[0], slen[0]);
310 	sort(sfile[1], slen[1]);
311 
312 	member = (int *)file[1];
313 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
314 	member = ralloc(member, (slen[1] + 2) * sizeof(int));
315 
316 	class = (int *)file[0];
317 	unsort(sfile[0], slen[0], class);
318 	class = ralloc(class, (slen[0] + 2) * sizeof(int));
319 
320 	klist = talloc((slen[0] + 2) * sizeof(int));
321 	clist = talloc(sizeof(cand));
322 	i = stone(class, slen[0], member, klist);
323 	free(member);
324 	free(class);
325 
326 	J = talloc((len[0] + 2) * sizeof(int));
327 	unravel(klist[i]);
328 	free(clist);
329 	free(klist);
330 
331 	ixold = talloc((len[0] + 2) * sizeof(long));
332 	ixnew = talloc((len[1] + 2) * sizeof(long));
333 	check();
334 	output();
335 	status = anychange;
336 same:
337 	if (opt == D_CONTEXT && anychange == 0)
338 		printf("No differences encountered\n");
339 	done(0);
340 }
341 
342 char *tempfile = _PATH_TMP;
343 
344 char *
345 copytemp(void)
346 {
347 	char buf[BUFSIZ];
348 	int i, f;
349 
350 	signal(SIGHUP, done);
351 	signal(SIGINT, done);
352 	signal(SIGPIPE, done);
353 	signal(SIGTERM, done);
354 	f = mkstemp(tempfile);
355 	if (f < 0) {
356 		warn("%s", tempfile);
357 		done(0);
358 	}
359 	while ((i = read(0, buf, BUFSIZ)) > 0)
360 		if (write(f, buf, i) != i) {
361 			warn("%s", tempfile);
362 			done(0);
363 		}
364 	close(f);
365 	return (tempfile);
366 }
367 
368 char *
369 splice(char *dir, char *file)
370 {
371 	char *tail, buf[BUFSIZ];
372 
373 	if (!strcmp(file, "-")) {
374 		warnx("can't specify - with other arg directory");
375 		done(0);
376 	}
377 	tail = strrchr(file, '/');
378 	if (tail == 0)
379 		tail = file;
380 	else
381 		tail++;
382 	snprintf(buf, sizeof buf, "%s/%s", dir, tail);
383 	return (strdup(buf));
384 }
385 
386 static void
387 prepare(int i, FILE *fd)
388 {
389 	struct line *p;
390 	int j, h;
391 
392 	fseek(fd, 0L, SEEK_SET);
393 	p = talloc(3 * sizeof(struct line));
394 	for (j = 0; (h = readhash(fd));) {
395 		p = ralloc(p, (++j + 3) * sizeof(struct line));
396 		p[j].value = h;
397 	}
398 	len[i] = j;
399 	file[i] = p;
400 }
401 
402 static void
403 prune(void)
404 {
405 	int i, j;
406 
407 	for (pref = 0; pref < len[0] && pref < len[1] &&
408 	    file[0][pref + 1].value == file[1][pref + 1].value;
409 	    pref++)
410 		;
411 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
412 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
413 	    suff++)
414 		;
415 	for (j = 0; j < 2; j++) {
416 		sfile[j] = file[j] + pref;
417 		slen[j] = len[j] - pref - suff;
418 		for (i = 0; i <= slen[j]; i++)
419 			sfile[j][i].serial = i;
420 	}
421 }
422 
423 static void
424 equiv(struct line *a, int n, struct line *b, int m, int *c)
425 {
426 	int i, j;
427 
428 	i = j = 1;
429 	while (i <= n && j <= m) {
430 		if (a[i].value < b[j].value)
431 			a[i++].value = 0;
432 		else if (a[i].value == b[j].value)
433 			a[i++].value = j;
434 		else
435 			j++;
436 	}
437 	while (i <= n)
438 		a[i++].value = 0;
439 	b[m + 1].value = 0;
440 	j = 0;
441 	while (++j <= m) {
442 		c[j] = -b[j].serial;
443 		while (b[j + 1].value == b[j].value) {
444 			j++;
445 			c[j] = b[j].serial;
446 		}
447 	}
448 	c[j] = -1;
449 }
450 
451 static int
452 stone(int *a, int n, int *b, int *c)
453 {
454 	int i, k, y, j, l;
455 	int oldc, tc, oldl;
456 
457 	k = 0;
458 	c[0] = newcand(0, 0, 0);
459 	for (i = 1; i <= n; i++) {
460 		j = a[i];
461 		if (j == 0)
462 			continue;
463 		y = -b[j];
464 		oldl = 0;
465 		oldc = c[0];
466 		do {
467 			if (y <= clist[oldc].y)
468 				continue;
469 			l = search(c, k, y);
470 			if (l != oldl + 1)
471 				oldc = c[l - 1];
472 			if (l <= k) {
473 				if (clist[c[l]].y <= y)
474 					continue;
475 				tc = c[l];
476 				c[l] = newcand(i, y, oldc);
477 				oldc = tc;
478 				oldl = l;
479 			} else {
480 				c[l] = newcand(i, y, oldc);
481 				k++;
482 				break;
483 			}
484 		} while ((y = b[++j]) > 0);
485 	}
486 	return (k);
487 }
488 
489 static int
490 newcand(int x, int y, int pred)
491 {
492 	struct cand *q;
493 
494 	clist = ralloc(clist, ++clen * sizeof(cand));
495 	q = clist + clen - 1;
496 	q->x = x;
497 	q->y = y;
498 	q->pred = pred;
499 	return (clen - 1);
500 }
501 
502 static int
503 search(int *c, int k, int y)
504 {
505 	int i, j, l, t;
506 
507 	if (clist[c[k]].y < y)	/* quick look for typical case */
508 		return (k + 1);
509 	i = 0;
510 	j = k + 1;
511 	while (1) {
512 		l = i + j;
513 		if ((l >>= 1) <= i)
514 			break;
515 		t = clist[c[l]].y;
516 		if (t > y)
517 			j = l;
518 		else if (t < y)
519 			i = l;
520 		else
521 			return (l);
522 	}
523 	return (l + 1);
524 }
525 
526 static void
527 unravel(int p)
528 {
529 	struct cand *q;
530 	int i;
531 
532 	for (i = 0; i <= len[0]; i++)
533 		J[i] = i <= pref ? i :
534 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
535 	for (q = clist + p; q->y != 0; q = clist + q->pred)
536 		J[q->x + pref] = q->y + pref;
537 }
538 
539 /*
540  * check does double duty: 1.  ferret out any fortuitous correspondences due
541  * to confounding by hashing (which result in "jackpot") 2.  collect random
542  * access indexes to the two files
543  */
544 
545 static void
546 check(void)
547 {
548 	int i, j, jackpot, c, d;
549 	long ctold, ctnew;
550 
551 	if ((input[0] = fopen(file1, "r")) == NULL) {
552 		perror(file1);
553 		done(0);
554 	}
555 	if ((input[1] = fopen(file2, "r")) == NULL) {
556 		perror(file2);
557 		done(0);
558 	}
559 	j = 1;
560 	ixold[0] = ixnew[0] = 0;
561 	jackpot = 0;
562 	ctold = ctnew = 0;
563 	for (i = 1; i <= len[0]; i++) {
564 		if (J[i] == 0) {
565 			ixold[i] = ctold += skipline(0);
566 			continue;
567 		}
568 		while (j < J[i]) {
569 			ixnew[j] = ctnew += skipline(1);
570 			j++;
571 		}
572 		if (bflag || wflag || iflag) {
573 			for (;;) {
574 				c = getc(input[0]);
575 				d = getc(input[1]);
576 				ctold++;
577 				ctnew++;
578 				if (bflag && isspace(c) && isspace(d)) {
579 					do {
580 						if (c == '\n')
581 							break;
582 						ctold++;
583 					} while (isspace(c = getc(input[0])));
584 					do {
585 						if (d == '\n')
586 							break;
587 						ctnew++;
588 					} while (isspace(d = getc(input[1])));
589 				} else if (wflag) {
590 					while (isspace(c) && c != '\n') {
591 						c = getc(input[0]);
592 						ctold++;
593 					}
594 					while (isspace(d) && d != '\n') {
595 						d = getc(input[1]);
596 						ctnew++;
597 					}
598 				}
599 				if (chrtran[c] != chrtran[d]) {
600 					jackpot++;
601 					J[i] = 0;
602 					if (c != '\n')
603 						ctold += skipline(0);
604 					if (d != '\n')
605 						ctnew += skipline(1);
606 					break;
607 				}
608 				if (c == '\n')
609 					break;
610 			}
611 		} else {
612 			for (;;) {
613 				ctold++;
614 				ctnew++;
615 				if ((c = getc(input[0])) != (d = getc(input[1]))) {
616 					/* jackpot++; */
617 					J[i] = 0;
618 					if (c != '\n')
619 						ctold += skipline(0);
620 					if (d != '\n')
621 						ctnew += skipline(1);
622 					break;
623 				}
624 				if (c == '\n')
625 					break;
626 			}
627 		}
628 		ixold[i] = ctold;
629 		ixnew[j] = ctnew;
630 		j++;
631 	}
632 	for (; j <= len[1]; j++) {
633 		ixnew[j] = ctnew += skipline(1);
634 	}
635 	fclose(input[0]);
636 	fclose(input[1]);
637 	/*
638 	 * if (jackpot)
639 	 *	fprintf(stderr, "jackpot\n");
640 	 */
641 }
642 
643 /* shellsort CACM #201 */
644 static void
645 sort(struct line *a, int n)
646 {
647 	struct line *ai, *aim, w;
648 	int j, m = 0, k;
649 
650 	if (n == 0)
651 		return;
652 	for (j = 1; j <= n; j *= 2)
653 		m = 2 * j - 1;
654 	for (m /= 2; m != 0; m /= 2) {
655 		k = n - m;
656 		for (j = 1; j <= k; j++) {
657 			for (ai = &a[j]; ai > a; ai -= m) {
658 				aim = &ai[m];
659 				if (aim < ai)
660 					break;	/* wraparound */
661 				if (aim->value > ai[0].value ||
662 				    (aim->value == ai[0].value &&
663 					aim->serial > ai[0].serial))
664 					break;
665 				w.value = ai[0].value;
666 				ai[0].value = aim->value;
667 				aim->value = w.value;
668 				w.serial = ai[0].serial;
669 				ai[0].serial = aim->serial;
670 				aim->serial = w.serial;
671 			}
672 		}
673 	}
674 }
675 
676 static void
677 unsort(struct line *f, int l, int *b)
678 {
679 	int *a, i;
680 
681 	a = talloc((l + 1) * sizeof(int));
682 	for (i = 1; i <= l; i++)
683 		a[f[i].serial] = f[i].value;
684 	for (i = 1; i <= l; i++)
685 		b[i] = a[i];
686 	free(a);
687 }
688 
689 static int
690 skipline(int f)
691 {
692 	int i, c;
693 
694 	for (i = 1; (c = getc(input[f])) != '\n'; i++)
695 		if (c < 0)
696 			return (i);
697 	return (i);
698 }
699 
700 static void
701 output(void)
702 {
703 	int m, i0, i1, j0, j1;
704 
705 	input[0] = fopen(file1, "r");
706 	input[1] = fopen(file2, "r");
707 	m = len[0];
708 	J[0] = 0;
709 	J[m + 1] = len[1] + 1;
710 	if (opt != D_EDIT) {
711 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
712 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
713 				i0++;
714 			j0 = J[i0 - 1] + 1;
715 			i1 = i0 - 1;
716 			while (i1 < m && J[i1 + 1] == 0)
717 				i1++;
718 			j1 = J[i1 + 1] - 1;
719 			J[i1] = j1;
720 			change(i0, i1, j0, j1);
721 		}
722 	} else {
723 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
724 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
725 				i0--;
726 			j0 = J[i0 + 1] - 1;
727 			i1 = i0 + 1;
728 			while (i1 > 1 && J[i1 - 1] == 0)
729 				i1--;
730 			j1 = J[i1 - 1] + 1;
731 			J[i1] = j1;
732 			change(i1, i0, j1, j0);
733 		}
734 	}
735 	if (m == 0)
736 		change(1, 0, 1, len[1]);
737 	if (opt == D_IFDEF) {
738 		for (;;) {
739 #define	c i0
740 			c = getc(input[0]);
741 			if (c < 0)
742 				return;
743 			putchar(c);
744 		}
745 #undef c
746 	}
747 	if (anychange && opt == D_CONTEXT)
748 		dump_context_vec();
749 }
750 
751 /*
752  * The following struct is used to record change information when
753  * doing a "context" diff.  (see routine "change" to understand the
754  * highly mneumonic field names)
755  */
756 struct context_vec {
757 	int a;			/* start line in old file */
758 	int b;			/* end line in old file */
759 	int c;			/* start line in new file */
760 	int d;			/* end line in new file */
761 };
762 
763 struct context_vec *context_vec_start, *context_vec_end, *context_vec_ptr;
764 
765 #define	MAX_CONTEXT	128
766 
767 /*
768  * indicate that there is a difference between lines a and b of the from file
769  * to get to lines c to d of the to file. If a is greater then b then there
770  * are no lines in the from file involved and this means that there were
771  * lines appended (beginning at b). If c is greater than d then there are
772  * lines missing from the to file.
773  */
774 static void
775 change(int a, int b, int c, int d)
776 {
777 	struct stat stbuf;
778 
779 	if (opt != D_IFDEF && a > b && c > d)
780 		return;
781 	if (anychange == 0) {
782 		anychange = 1;
783 		if (opt == D_CONTEXT) {
784 			printf("*** %s	", file1);
785 			stat(file1, &stbuf);
786 			printf("%s--- %s	",
787 			    ctime(&stbuf.st_mtime), file2);
788 			stat(file2, &stbuf);
789 			printf("%s", ctime(&stbuf.st_mtime));
790 
791 			context_vec_start = talloc(MAX_CONTEXT *
792 			    sizeof(struct context_vec));
793 			context_vec_end = context_vec_start + MAX_CONTEXT;
794 			context_vec_ptr = context_vec_start - 1;
795 		}
796 	}
797 	if (opt == D_CONTEXT) {
798 		/*
799 		 * if this new change is within 'context' lines of
800 		 * the previous change, just add it to the change
801 		 * record.  If the record is full or if this
802 		 * change is more than 'context' lines from the previous
803 		 * change, dump the record, reset it & add the new change.
804 		 */
805 		if (context_vec_ptr >= context_vec_end ||
806 		    (context_vec_ptr >= context_vec_start &&
807 			a > (context_vec_ptr->b + 2 * context) &&
808 			c > (context_vec_ptr->d + 2 * context)))
809 			dump_context_vec();
810 
811 		context_vec_ptr++;
812 		context_vec_ptr->a = a;
813 		context_vec_ptr->b = b;
814 		context_vec_ptr->c = c;
815 		context_vec_ptr->d = d;
816 		return;
817 	}
818 	switch (opt) {
819 
820 	case D_NORMAL:
821 	case D_EDIT:
822 		range(a, b, ",");
823 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
824 		if (opt == D_NORMAL)
825 			range(c, d, ",");
826 		putchar('\n');
827 		break;
828 	case D_REVERSE:
829 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
830 		range(a, b, " ");
831 		putchar('\n');
832 		break;
833 	case D_NREVERSE:
834 		if (a > b)
835 			printf("a%d %d\n", b, d - c + 1);
836 		else {
837 			printf("d%d %d\n", a, b - a + 1);
838 			if (!(c > d))
839 				/* add changed lines */
840 				printf("a%d %d\n", b, d - c + 1);
841 		}
842 		break;
843 	}
844 	if (opt == D_NORMAL || opt == D_IFDEF) {
845 		fetch(ixold, a, b, input[0], "< ", 1);
846 		if (a <= b && c <= d && opt == D_NORMAL)
847 			prints("---\n");
848 	}
849 	fetch(ixnew, c, d, input[1], opt == D_NORMAL ? "> " : "", 0);
850 	if ((opt == D_EDIT || opt == D_REVERSE) && c <= d)
851 		prints(".\n");
852 	if (inifdef) {
853 		fprintf(stdout, "#endif /* %s */\n", endifname);
854 		inifdef = 0;
855 	}
856 }
857 
858 static void
859 range(int a, int b, char *separator)
860 {
861 	printf("%d", a > b ? b : a);
862 	if (a < b)
863 		printf("%s%d", separator, b);
864 }
865 
866 static void
867 fetch(long *f, int a, int b, FILE *lb, char *s, int oldfile)
868 {
869 	int oneflag = (*ifdef1 != '\0') != (*ifdef2 != '\0');
870 	int i, j, c, col, nc;
871 
872 	/*
873 	 * When doing #ifdef's, copy down to current line
874 	 * if this is the first file, so that stuff makes it to output.
875 	 */
876 	if (opt == D_IFDEF && oldfile) {
877 		long curpos = ftell(lb);
878 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
879 		nc = f[a > b ? b : a - 1] - curpos;
880 		for (i = 0; i < nc; i++)
881 			putchar(getc(lb));
882 	}
883 	if (a > b)
884 		return;
885 	if (opt == D_IFDEF) {
886 		if (inifdef)
887 			fprintf(stdout, "#else /* %s%s */\n",
888 			    oneflag && oldfile == 1 ? "!" : "", ifdef2);
889 		else {
890 			if (oneflag) {
891 				/* There was only one ifdef given */
892 				endifname = ifdef2;
893 				if (oldfile)
894 					fprintf(stdout, "#ifndef %s\n", endifname);
895 				else
896 					fprintf(stdout, "#ifdef %s\n", endifname);
897 			} else {
898 				endifname = oldfile ? ifdef1 : ifdef2;
899 				fprintf(stdout, "#ifdef %s\n", endifname);
900 			}
901 		}
902 		inifdef = 1 + oldfile;
903 	}
904 	for (i = a; i <= b; i++) {
905 		fseek(lb, f[i - 1], SEEK_SET);
906 		nc = f[i] - f[i - 1];
907 		if (opt != D_IFDEF)
908 			prints(s);
909 		col = 0;
910 		for (j = 0; j < nc; j++) {
911 			c = getc(lb);
912 			if (c == '\t' && tflag)
913 				do
914 					putchar(' ');
915 				while (++col & 7);
916 			else {
917 				putchar(c);
918 				col++;
919 			}
920 		}
921 	}
922 
923 	if (inifdef && !wantelses) {
924 		fprintf(stdout, "#endif /* %s */\n", endifname);
925 		inifdef = 0;
926 	}
927 }
928 
929 #define POW2			/* define only if HALFLONG is 2**n */
930 #define HALFLONG 16
931 #define low(x)	(x&((1L<<HALFLONG)-1))
932 #define high(x)	(x>>HALFLONG)
933 
934 /*
935  * hashing has the effect of
936  * arranging line in 7-bit bytes and then
937  * summing 1-s complement in 16-bit hunks
938  */
939 static int
940 readhash(FILE *f)
941 {
942 	unsigned int shift;
943 	int t, space;
944 	long sum;
945 
946 	sum = 1;
947 	space = 0;
948 	if (!bflag && !wflag) {
949 		if (iflag)
950 			for (shift = 0; (t = getc(f)) != '\n'; shift += 7) {
951 				if (t == -1)
952 					return (0);
953 				sum += (long)chrtran[t] << (shift
954 #ifdef POW2
955 				    &= HALFLONG - 1);
956 #else
957 				    %= HALFLONG);
958 #endif
959 			}
960 		else
961 			for (shift = 0; (t = getc(f)) != '\n'; shift += 7) {
962 				if (t == -1)
963 					return (0);
964 				sum += (long)t << (shift
965 #ifdef POW2
966 				    &= HALFLONG - 1);
967 #else
968 				    %= HALFLONG);
969 #endif
970 			}
971 	} else {
972 		for (shift = 0;;) {
973 			switch (t = getc(f)) {
974 			case -1:
975 				return (0);
976 			case '\t':
977 			case ' ':
978 				space++;
979 				continue;
980 			default:
981 				if (space && !wflag) {
982 					shift += 7;
983 					space = 0;
984 				}
985 				sum += (long)chrtran[t] << (shift
986 #ifdef POW2
987 				    &= HALFLONG - 1);
988 #else
989 				    %= HALFLONG);
990 #endif
991 				shift += 7;
992 				continue;
993 			case '\n':
994 				break;
995 			}
996 			break;
997 		}
998 	}
999 	sum = low(sum) + high(sum);
1000 	return ((short) low(sum) + (short) high(sum));
1001 }
1002 
1003 static int
1004 asciifile(FILE *f)
1005 {
1006 	char buf[BUFSIZ], *cp;
1007 	int cnt;
1008 
1009 	fseek(f, 0L, SEEK_SET);
1010 	cnt = fread(buf, 1, BUFSIZ, f);
1011 	cp = buf;
1012 	while (--cnt >= 0)
1013 		if (*cp++ & 0200)
1014 			return (0);
1015 	return (1);
1016 }
1017 
1018 
1019 /* dump accumulated "context" diff changes */
1020 static void
1021 dump_context_vec(void)
1022 {
1023 	struct context_vec *cvp = context_vec_start;
1024 	int lowa, upb, lowc, upd, do_output;
1025 	int a, b, c, d;
1026 	char ch;
1027 
1028 	if (cvp > context_vec_ptr)
1029 		return;
1030 
1031 	b = d = 0;		/* gcc */
1032 	lowa = max(1, cvp->a - context);
1033 	upb = min(len[0], context_vec_ptr->b + context);
1034 	lowc = max(1, cvp->c - context);
1035 	upd = min(len[1], context_vec_ptr->d + context);
1036 
1037 	printf("***************\n*** ");
1038 	range(lowa, upb, ",");
1039 	printf(" ****\n");
1040 
1041 	/*
1042 	 * output changes to the "old" file.  The first loop suppresses
1043 	 * output if there were no changes to the "old" file (we'll see
1044 	 * the "old" lines as context in the "new" list).
1045 	 */
1046 	do_output = 0;
1047 	for (; cvp <= context_vec_ptr; cvp++)
1048 		if (cvp->a <= cvp->b) {
1049 			cvp = context_vec_start;
1050 			do_output++;
1051 			break;
1052 		}
1053 	if (do_output) {
1054 		while (cvp <= context_vec_ptr) {
1055 			a = cvp->a;
1056 			b = cvp->b;
1057 			c = cvp->c;
1058 			d = cvp->d;
1059 
1060 			if (a <= b && c <= d)
1061 				ch = 'c';
1062 			else
1063 				ch = (a <= b) ? 'd' : 'a';
1064 
1065 			if (ch == 'a')
1066 				fetch(ixold, lowa, b, input[0], "  ", 0);
1067 			else {
1068 				fetch(ixold, lowa, a - 1, input[0], "  ", 0);
1069 				fetch(ixold, a, b, input[0],
1070 				    ch == 'c' ? "! " : "- ", 0);
1071 			}
1072 			lowa = b + 1;
1073 			cvp++;
1074 		}
1075 		fetch(ixold, b + 1, upb, input[0], "  ", 0);
1076 	}
1077 	/* output changes to the "new" file */
1078 	printf("--- ");
1079 	range(lowc, upd, ",");
1080 	printf(" ----\n");
1081 
1082 	do_output = 0;
1083 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1084 		if (cvp->c <= cvp->d) {
1085 			cvp = context_vec_start;
1086 			do_output++;
1087 			break;
1088 		}
1089 	if (do_output) {
1090 		while (cvp <= context_vec_ptr) {
1091 			a = cvp->a;
1092 			b = cvp->b;
1093 			c = cvp->c;
1094 			d = cvp->d;
1095 
1096 			if (a <= b && c <= d)
1097 				ch = 'c';
1098 			else
1099 				ch = (a <= b) ? 'd' : 'a';
1100 
1101 			if (ch == 'd')
1102 				fetch(ixnew, lowc, d, input[1], "  ", 0);
1103 			else {
1104 				fetch(ixnew, lowc, c - 1, input[1], "  ", 0);
1105 				fetch(ixnew, c, d, input[1],
1106 				    ch == 'c' ? "! " : "+ ", 0);
1107 			}
1108 			lowc = d + 1;
1109 			cvp++;
1110 		}
1111 		fetch(ixnew, d + 1, upd, input[1], "  ", 0);
1112 	}
1113 	context_vec_ptr = context_vec_start - 1;
1114 }
1115