xref: /openbsd-src/usr.bin/diff/diffreg.c (revision 0c9f493eda33a1be8b85995ddecb0196ef2f949c)
1 /*	$OpenBSD: diffreg.c,v 1.4 2003/06/25 03:25:29 tedu Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 
37 #include <sys/types.h>
38 
39 #include <stdlib.h>
40 #include <unistd.h>
41 #include <fcntl.h>
42 #include <string.h>
43 
44 #include "diff.h"
45 #include "pathnames.h"
46 
47 #if 0
48 static char const sccsid[] = "@(#)diffreg.c 4.21 4/6/90";
49 #endif
50 
51 /*
52  * diff - compare two files.
53  */
54 
55 /*
56  *	Uses an algorithm due to Harold Stone, which finds
57  *	a pair of longest identical subsequences in the two
58  *	files.
59  *
60  *	The major goal is to generate the match vector J.
61  *	J[i] is the index of the line in file1 corresponding
62  *	to line i file0. J[i] = 0 if there is no
63  *	such line in file1.
64  *
65  *	Lines are hashed so as to work in core. All potential
66  *	matches are located by sorting the lines of each file
67  *	on the hash (called ``value''). In particular, this
68  *	collects the equivalence classes in file1 together.
69  *	Subroutine equiv replaces the value of each line in
70  *	file0 by the index of the first element of its
71  *	matching equivalence in (the reordered) file1.
72  *	To save space equiv squeezes file1 into a single
73  *	array member in which the equivalence classes
74  *	are simply concatenated, except that their first
75  *	members are flagged by changing sign.
76  *
77  *	Next the indices that point into member are unsorted into
78  *	array class according to the original order of file0.
79  *
80  *	The cleverness lies in routine stone. This marches
81  *	through the lines of file0, developing a vector klist
82  *	of "k-candidates". At step i a k-candidate is a matched
83  *	pair of lines x,y (x in file0 y in file1) such that
84  *	there is a common subsequence of length k
85  *	between the first i lines of file0 and the first y
86  *	lines of file1, but there is no such subsequence for
87  *	any smaller y. x is the earliest possible mate to y
88  *	that occurs in such a subsequence.
89  *
90  *	Whenever any of the members of the equivalence class of
91  *	lines in file1 matable to a line in file0 has serial number
92  *	less than the y of some k-candidate, that k-candidate
93  *	with the smallest such y is replaced. The new
94  *	k-candidate is chained (via pred) to the current
95  *	k-1 candidate so that the actual subsequence can
96  *	be recovered. When a member has serial number greater
97  *	that the y of all k-candidates, the klist is extended.
98  *	At the end, the longest subsequence is pulled out
99  *	and placed in the array J by unravel
100  *
101  *	With J in hand, the matches there recorded are
102  *	check'ed against reality to assure that no spurious
103  *	matches have crept in due to hashing. If they have,
104  *	they are broken, and "jackpot" is recorded--a harmless
105  *	matter except that a true match for a spuriously
106  *	mated line may now be unnecessarily reported as a change.
107  *
108  *	Much of the complexity of the program comes simply
109  *	from trying to minimize core utilization and
110  *	maximize the range of doable problems by dynamically
111  *	allocating what is needed and reusing what is not.
112  *	The core requirements for problems larger than somewhat
113  *	are (in words) 2*length(file0) + length(file1) +
114  *	3*(number of k-candidates installed),  typically about
115  *	6n words for files of length n.
116  */
117 
118 #define	prints(s)	fputs(s,stdout)
119 
120 FILE *input[2];
121 FILE *fopen();
122 
123 struct cand {
124 	int x;
125 	int y;
126 	int pred;
127 } cand;
128 
129 struct line {
130 	int serial;
131 	int value;
132 } *file[2], line;
133 
134 int len[2];
135 struct line *sfile[2];	/* shortened by pruning common prefix and suffix */
136 int slen[2];
137 int pref, suff;			/* length of prefix and suffix */
138 int *class;			/* will be overlaid on file[0] */
139 int *member;			/* will be overlaid on file[1] */
140 int *klist;			/* will be overlaid on file[0] after class */
141 struct cand *clist;		/* merely a free storage pot for candidates */
142 int clen = 0;
143 int *J;				/* will be overlaid on class */
144 long *ixold;			/* will be overlaid on klist */
145 long *ixnew;			/* will be overlaid on file[1] */
146 char *chrtran;			/* translation table for case-folding */
147 
148 static void fetch(long *, int, int, FILE *, char *, int);
149 static void output(void);
150 static void check(void);
151 static void range(int, int, char *);
152 static void dump_context_vec(void);
153 static void prepare(int, FILE *);
154 static void prune(void);
155 static void equiv(struct line *, int, struct line *, int, int *);
156 static void unravel(int);
157 static void unsort(struct line *, int, int *);
158 static void change(int, int, int, int);
159 static void sort(struct line *, int);
160 static int newcand(int, int, int);
161 static int search(int *, int, int);
162 static int skipline(int);
163 static int asciifile(FILE *);
164 static int stone(int *, int, int *, int *);
165 static int readhash(FILE *);
166 
167 /*
168  * chrtran points to one of 2 translation tables: cup2low if folding upper to
169  * lower case clow2low if not folding case
170  */
171 char clow2low[256] = {
172 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
173 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
174 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
175 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
176 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
177 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
178 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
179 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
180 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
181 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
182 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
183 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
184 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
185 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
186 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
187 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
188 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
189 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
190 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
191 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
192 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
193 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
194 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
195 	0xfd, 0xfe, 0xff
196 };
197 
198 char cup2low[256] = {
199 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
200 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
201 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
202 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
203 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
204 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
205 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
206 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
207 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
208 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
209 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
210 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
211 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
212 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
213 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
214 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
215 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
216 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
217 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
218 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
219 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
220 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
221 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
222 	0xfd, 0xfe, 0xff
223 };
224 
225 void
226 diffreg(void)
227 {
228 	int i, j;
229 	FILE *f1, *f2;
230 	char buf1[BUFSIZ], buf2[BUFSIZ];
231 
232 	if (hflag) {
233 		diffargv[0] = "diffh";
234 		execv(diffh, diffargv);
235 		fprintf(stderr, "diff: ");
236 		perror(diffh);
237 		done();
238 	}
239 	chrtran = (iflag ? cup2low : clow2low);
240 	if ((stb1.st_mode & S_IFMT) == S_IFDIR) {
241 		file1 = splice(file1, file2);
242 		if (stat(file1, &stb1) < 0) {
243 			fprintf(stderr, "diff: ");
244 			perror(file1);
245 			done();
246 		}
247 	} else if ((stb2.st_mode & S_IFMT) == S_IFDIR) {
248 		file2 = splice(file2, file1);
249 		if (stat(file2, &stb2) < 0) {
250 			fprintf(stderr, "diff: ");
251 			perror(file2);
252 			done();
253 		}
254 	} else if ((stb1.st_mode & S_IFMT) != S_IFREG || !strcmp(file1, "-")) {
255 		if (!strcmp(file2, "-")) {
256 			fprintf(stderr, "diff: can't specify - -\n");
257 			done();
258 		}
259 		file1 = copytemp();
260 		if (stat(file1, &stb1) < 0) {
261 			fprintf(stderr, "diff: ");
262 			perror(file1);
263 			done();
264 		}
265 	} else if ((stb2.st_mode & S_IFMT) != S_IFREG || !strcmp(file2, "-")) {
266 		file2 = copytemp();
267 		if (stat(file2, &stb2) < 0) {
268 			fprintf(stderr, "diff: ");
269 			perror(file2);
270 			done();
271 		}
272 	}
273 	if ((f1 = fopen(file1, "r")) == NULL) {
274 		fprintf(stderr, "diff: ");
275 		perror(file1);
276 		done();
277 	}
278 	if ((f2 = fopen(file2, "r")) == NULL) {
279 		fprintf(stderr, "diff: ");
280 		perror(file2);
281 		fclose(f1);
282 		done();
283 	}
284 	if (stb1.st_size != stb2.st_size)
285 		goto notsame;
286 	for (;;) {
287 		i = fread(buf1, 1, BUFSIZ, f1);
288 		j = fread(buf2, 1, BUFSIZ, f2);
289 		if (i < 0 || j < 0 || i != j)
290 			goto notsame;
291 		if (i == 0 && j == 0) {
292 			fclose(f1);
293 			fclose(f2);
294 			status = 0;	/* files don't differ */
295 			goto same;
296 		}
297 		for (j = 0; j < i; j++)
298 			if (buf1[j] != buf2[j])
299 				goto notsame;
300 	}
301 notsame:
302 	/*
303 	 *	Files certainly differ at this point; set status accordingly
304 	 */
305 	status = 1;
306 	if (!asciifile(f1) || !asciifile(f2)) {
307 		printf("Binary files %s and %s differ\n", file1, file2);
308 		fclose(f1);
309 		fclose(f2);
310 		done();
311 	}
312 	prepare(0, f1);
313 	prepare(1, f2);
314 	fclose(f1);
315 	fclose(f2);
316 	prune();
317 	sort(sfile[0], slen[0]);
318 	sort(sfile[1], slen[1]);
319 
320 	member = (int *)file[1];
321 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
322 	member = ralloc(member, (slen[1] + 2) * sizeof(int));
323 
324 	class = (int *)file[0];
325 	unsort(sfile[0], slen[0], class);
326 	class = ralloc(class, (slen[0] + 2) * sizeof(int));
327 
328 	klist = talloc((slen[0] + 2) * sizeof(int));
329 	clist = talloc(sizeof(cand));
330 	i = stone(class, slen[0], member, klist);
331 	free(member);
332 	free(class);
333 
334 	J = talloc((len[0] + 2) * sizeof(int));
335 	unravel(klist[i]);
336 	free(clist);
337 	free(klist);
338 
339 	ixold = talloc((len[0] + 2) * sizeof(long));
340 	ixnew = talloc((len[1] + 2) * sizeof(long));
341 	check();
342 	output();
343 	status = anychange;
344 same:
345 	if (opt == D_CONTEXT && anychange == 0)
346 		printf("No differences encountered\n");
347 	done();
348 }
349 
350 char *tempfile = _PATH_TMP;
351 
352 char *
353 copytemp(void)
354 {
355 	char buf[BUFSIZ];
356 	int i, f;
357 
358 	signal(SIGHUP, catchsig);
359 	signal(SIGINT, catchsig);
360 	signal(SIGPIPE, catchsig);
361 	signal(SIGTERM, catchsig);
362 	f = mkstemp(tempfile);
363 	if (f < 0) {
364 		fprintf(stderr, "diff: ");
365 		perror(tempfile);
366 		done();
367 	}
368 	while ((i = read(0, buf, BUFSIZ)) > 0)
369 		if (write(f, buf, i) != i) {
370 			fprintf(stderr, "diff: ");
371 			perror(tempfile);
372 			done();
373 		}
374 	close(f);
375 	return (tempfile);
376 }
377 
378 char *
379 splice(char *dir, char *file)
380 {
381 	char *tail;
382 	char buf[BUFSIZ];
383 
384 	if (!strcmp(file, "-")) {
385 		fprintf(stderr, "diff: can't specify - with other arg directory\n");
386 		done();
387 	}
388 	tail = rindex(file, '/');
389 	if (tail == 0)
390 		tail = file;
391 	else
392 		tail++;
393 	snprintf(buf, sizeof buf, "%s/%s", dir, tail);
394 	return (strdup(buf));
395 }
396 
397 static void
398 prepare(int i, FILE *fd)
399 {
400 	struct line *p;
401 	int j, h;
402 
403 	fseek(fd, 0, 0);
404 	p = talloc(3 * sizeof(line));
405 	for (j = 0; (h = readhash(fd));) {
406 		p = ralloc(p, (++j + 3) * sizeof(line));
407 		p[j].value = h;
408 	}
409 	len[i] = j;
410 	file[i] = p;
411 }
412 
413 static void
414 prune(void)
415 {
416 	int i, j;
417 	for (pref = 0; pref < len[0] && pref < len[1] &&
418 	    file[0][pref + 1].value == file[1][pref + 1].value;
419 	    pref++);
420 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
421 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
422 	    suff++);
423 	for (j = 0; j < 2; j++) {
424 		sfile[j] = file[j] + pref;
425 		slen[j] = len[j] - pref - suff;
426 		for (i = 0; i <= slen[j]; i++)
427 			sfile[j][i].serial = i;
428 	}
429 }
430 
431 static void
432 equiv(struct line *a, int n, struct line *b, int m, int *c)
433 {
434 	int i, j;
435 	i = j = 1;
436 	while (i <= n && j <= m) {
437 		if (a[i].value < b[j].value)
438 			a[i++].value = 0;
439 		else if (a[i].value == b[j].value)
440 			a[i++].value = j;
441 		else
442 			j++;
443 	}
444 	while (i <= n)
445 		a[i++].value = 0;
446 	b[m + 1].value = 0;
447 	j = 0;
448 	while (++j <= m) {
449 		c[j] = -b[j].serial;
450 		while (b[j + 1].value == b[j].value) {
451 			j++;
452 			c[j] = b[j].serial;
453 		}
454 	}
455 	c[j] = -1;
456 }
457 
458 static int
459 stone(int *a, int n, int *b, int *c)
460 {
461 	int i, k, y;
462 	int j, l;
463 	int oldc, tc;
464 	int oldl;
465 	k = 0;
466 	c[0] = newcand(0, 0, 0);
467 	for (i = 1; i <= n; i++) {
468 		j = a[i];
469 		if (j == 0)
470 			continue;
471 		y = -b[j];
472 		oldl = 0;
473 		oldc = c[0];
474 		do {
475 			if (y <= clist[oldc].y)
476 				continue;
477 			l = search(c, k, y);
478 			if (l != oldl + 1)
479 				oldc = c[l - 1];
480 			if (l <= k) {
481 				if (clist[c[l]].y <= y)
482 					continue;
483 				tc = c[l];
484 				c[l] = newcand(i, y, oldc);
485 				oldc = tc;
486 				oldl = l;
487 			} else {
488 				c[l] = newcand(i, y, oldc);
489 				k++;
490 				break;
491 			}
492 		} while ((y = b[++j]) > 0);
493 	}
494 	return (k);
495 }
496 
497 static int
498 newcand(int x, int y, int pred)
499 {
500 	struct cand *q;
501 
502 	clist = ralloc(clist, ++clen * sizeof(cand));
503 	q = clist + clen - 1;
504 	q->x = x;
505 	q->y = y;
506 	q->pred = pred;
507 	return (clen - 1);
508 }
509 
510 static int
511 search(int *c, int k, int y)
512 {
513 	int i, j, l;
514 	int t;
515 	if (clist[c[k]].y < y)	/* quick look for typical case */
516 		return (k + 1);
517 	i = 0;
518 	j = k + 1;
519 	while (1) {
520 		l = i + j;
521 		if ((l >>= 1) <= i)
522 			break;
523 		t = clist[c[l]].y;
524 		if (t > y)
525 			j = l;
526 		else if (t < y)
527 			i = l;
528 		else
529 			return (l);
530 	}
531 	return (l + 1);
532 }
533 
534 static void
535 unravel(int p)
536 {
537 	int i;
538 	struct cand *q;
539 	for (i = 0; i <= len[0]; i++)
540 		J[i] = i <= pref ? i :
541 		    i > len[0] - suff ? i + len[1] - len[0] :
542 		    0;
543 	for (q = clist + p; q->y != 0; q = clist + q->pred)
544 		J[q->x + pref] = q->y + pref;
545 }
546 
547 /*
548  * check does double duty: 1.  ferret out any fortuitous correspondences due
549  * to confounding by hashing (which result in "jackpot") 2.  collect random
550  * access indexes to the two files
551  */
552 
553 static void
554 check(void)
555 {
556 	int i, j;
557 	int jackpot;
558 	long ctold, ctnew;
559 	int c, d;
560 
561 	if ((input[0] = fopen(file1, "r")) == NULL) {
562 		perror(file1);
563 		done();
564 	}
565 	if ((input[1] = fopen(file2, "r")) == NULL) {
566 		perror(file2);
567 		done();
568 	}
569 	j = 1;
570 	ixold[0] = ixnew[0] = 0;
571 	jackpot = 0;
572 	ctold = ctnew = 0;
573 	for (i = 1; i <= len[0]; i++) {
574 		if (J[i] == 0) {
575 			ixold[i] = ctold += skipline(0);
576 			continue;
577 		}
578 		while (j < J[i]) {
579 			ixnew[j] = ctnew += skipline(1);
580 			j++;
581 		}
582 		if (bflag || wflag || iflag) {
583 			for (;;) {
584 				c = getc(input[0]);
585 				d = getc(input[1]);
586 				ctold++;
587 				ctnew++;
588 				if (bflag && isspace(c) && isspace(d)) {
589 					do {
590 						if (c == '\n')
591 							break;
592 						ctold++;
593 					} while (isspace(c = getc(input[0])));
594 					do {
595 						if (d == '\n')
596 							break;
597 						ctnew++;
598 					} while (isspace(d = getc(input[1])));
599 				} else if (wflag) {
600 					while (isspace(c) && c != '\n') {
601 						c = getc(input[0]);
602 						ctold++;
603 					}
604 					while (isspace(d) && d != '\n') {
605 						d = getc(input[1]);
606 						ctnew++;
607 					}
608 				}
609 				if (chrtran[c] != chrtran[d]) {
610 					jackpot++;
611 					J[i] = 0;
612 					if (c != '\n')
613 						ctold += skipline(0);
614 					if (d != '\n')
615 						ctnew += skipline(1);
616 					break;
617 				}
618 				if (c == '\n')
619 					break;
620 			}
621 		} else {
622 			for (;;) {
623 				ctold++;
624 				ctnew++;
625 				if ((c = getc(input[0])) != (d = getc(input[1]))) {
626 					/* jackpot++; */
627 					J[i] = 0;
628 					if (c != '\n')
629 						ctold += skipline(0);
630 					if (d != '\n')
631 						ctnew += skipline(1);
632 					break;
633 				}
634 				if (c == '\n')
635 					break;
636 			}
637 		}
638 		ixold[i] = ctold;
639 		ixnew[j] = ctnew;
640 		j++;
641 	}
642 	for (; j <= len[1]; j++) {
643 		ixnew[j] = ctnew += skipline(1);
644 	}
645 	fclose(input[0]);
646 	fclose(input[1]);
647 	/*
648 		if(jackpot)
649 			fprintf(stderr, "jackpot\n");
650 	*/
651 }
652 
653 static void
654 sort(struct line *a, int n)
655 {				/* shellsort CACM #201 */
656 	struct line w;
657 	int j, m = 0;		/* gcc */
658 	struct line *ai;
659 	struct line *aim;
660 	int k;
661 
662 	if (n == 0)
663 		return;
664 	for (j = 1; j <= n; j *= 2)
665 		m = 2 * j - 1;
666 	for (m /= 2; m != 0; m /= 2) {
667 		k = n - m;
668 		for (j = 1; j <= k; j++) {
669 			for (ai = &a[j]; ai > a; ai -= m) {
670 				aim = &ai[m];
671 				if (aim < ai)
672 					break;	/* wraparound */
673 				if (aim->value > ai[0].value ||
674 				    (aim->value == ai[0].value &&
675 					aim->serial > ai[0].serial))
676 					break;
677 				w.value = ai[0].value;
678 				ai[0].value = aim->value;
679 				aim->value = w.value;
680 				w.serial = ai[0].serial;
681 				ai[0].serial = aim->serial;
682 				aim->serial = w.serial;
683 			}
684 		}
685 	}
686 }
687 
688 static void
689 unsort(struct line *f, int l, int *b)
690 {
691 	int *a;
692 	int i;
693 
694 	a = talloc((l + 1) * sizeof(int));
695 	for (i = 1; i <= l; i++)
696 		a[f[i].serial] = f[i].value;
697 	for (i = 1; i <= l; i++)
698 		b[i] = a[i];
699 	free(a);
700 }
701 
702 static int
703 skipline(int f)
704 {
705 	int i, c;
706 
707 	for (i = 1; (c = getc(input[f])) != '\n'; i++)
708 		if (c < 0)
709 			return (i);
710 	return (i);
711 }
712 
713 static void
714 output(void)
715 {
716 	int m;
717 	int i0, i1, j1;
718 	int j0;
719 	input[0] = fopen(file1, "r");
720 	input[1] = fopen(file2, "r");
721 	m = len[0];
722 	J[0] = 0;
723 	J[m + 1] = len[1] + 1;
724 	if (opt != D_EDIT) {
725 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
726 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
727 				i0++;
728 			j0 = J[i0 - 1] + 1;
729 			i1 = i0 - 1;
730 			while (i1 < m && J[i1 + 1] == 0)
731 				i1++;
732 			j1 = J[i1 + 1] - 1;
733 			J[i1] = j1;
734 			change(i0, i1, j0, j1);
735 		}
736 	} else {
737 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
738 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
739 				i0--;
740 			j0 = J[i0 + 1] - 1;
741 			i1 = i0 + 1;
742 			while (i1 > 1 && J[i1 - 1] == 0)
743 				i1--;
744 			j1 = J[i1 - 1] + 1;
745 			J[i1] = j1;
746 			change(i1, i0, j1, j0);
747 		}
748 	}
749 	if (m == 0)
750 		change(1, 0, 1, len[1]);
751 	if (opt == D_IFDEF) {
752 		for (;;) {
753 #define	c i0
754 			c = getc(input[0]);
755 			if (c < 0)
756 				return;
757 			putchar(c);
758 		}
759 #undef c
760 	}
761 	if (anychange && opt == D_CONTEXT)
762 		dump_context_vec();
763 }
764 
765 /*
766  * The following struct is used to record change information when
767  * doing a "context" diff.  (see routine "change" to understand the
768  * highly mneumonic field names)
769  */
770 struct context_vec {
771 	int a;			/* start line in old file */
772 	int b;			/* end line in old file */
773 	int c;			/* start line in new file */
774 	int d;			/* end line in new file */
775 };
776 
777 struct context_vec *context_vec_start, *context_vec_end, *context_vec_ptr;
778 
779 #define	MAX_CONTEXT	128
780 
781 /*
782  * indicate that there is a difference between lines a and b of the from file
783  * to get to lines c to d of the to file. If a is greater then b then there
784  * are no lines in the from file involved and this means that there were
785  * lines appended (beginning at b). If c is greater than d then there are
786  * lines missing from the to file.
787  */
788 static void
789 change(int a, int b, int c, int d)
790 {
791 	struct stat stbuf;
792 
793 	if (opt != D_IFDEF && a > b && c > d)
794 		return;
795 	if (anychange == 0) {
796 		anychange = 1;
797 		if (opt == D_CONTEXT) {
798 			printf("*** %s	", file1);
799 			stat(file1, &stbuf);
800 			printf("%s--- %s	",
801 			    ctime(&stbuf.st_mtime), file2);
802 			stat(file2, &stbuf);
803 			printf("%s", ctime(&stbuf.st_mtime));
804 
805 			context_vec_start = talloc(MAX_CONTEXT *
806 			    sizeof(struct context_vec));
807 			context_vec_end = context_vec_start + MAX_CONTEXT;
808 			context_vec_ptr = context_vec_start - 1;
809 		}
810 	}
811 	if (opt == D_CONTEXT) {
812 		/*
813 		 * if this new change is within 'context' lines of
814 		 * the previous change, just add it to the change
815 		 * record.  If the record is full or if this
816 		 * change is more than 'context' lines from the previous
817 		 * change, dump the record, reset it & add the new change.
818 		 */
819 		if (context_vec_ptr >= context_vec_end ||
820 		    (context_vec_ptr >= context_vec_start &&
821 			a > (context_vec_ptr->b + 2 * context) &&
822 			c > (context_vec_ptr->d + 2 * context)))
823 			dump_context_vec();
824 
825 		context_vec_ptr++;
826 		context_vec_ptr->a = a;
827 		context_vec_ptr->b = b;
828 		context_vec_ptr->c = c;
829 		context_vec_ptr->d = d;
830 		return;
831 	}
832 	switch (opt) {
833 
834 	case D_NORMAL:
835 	case D_EDIT:
836 		range(a, b, ",");
837 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
838 		if (opt == D_NORMAL)
839 			range(c, d, ",");
840 		putchar('\n');
841 		break;
842 	case D_REVERSE:
843 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
844 		range(a, b, " ");
845 		putchar('\n');
846 		break;
847 	case D_NREVERSE:
848 		if (a > b)
849 			printf("a%d %d\n", b, d - c + 1);
850 		else {
851 			printf("d%d %d\n", a, b - a + 1);
852 			if (!(c > d))
853 				/* add changed lines */
854 				printf("a%d %d\n", b, d - c + 1);
855 		}
856 		break;
857 	}
858 	if (opt == D_NORMAL || opt == D_IFDEF) {
859 		fetch(ixold, a, b, input[0], "< ", 1);
860 		if (a <= b && c <= d && opt == D_NORMAL)
861 			prints("---\n");
862 	}
863 	fetch(ixnew, c, d, input[1], opt == D_NORMAL ? "> " : "", 0);
864 	if ((opt == D_EDIT || opt == D_REVERSE) && c <= d)
865 		prints(".\n");
866 	if (inifdef) {
867 		fprintf(stdout, "#endif /* %s */\n", endifname);
868 		inifdef = 0;
869 	}
870 }
871 
872 static void
873 range(int a, int b, char *separator)
874 {
875 	printf("%d", a > b ? b : a);
876 	if (a < b) {
877 		printf("%s%d", separator, b);
878 	}
879 }
880 
881 static void
882 fetch(long *f, int a, int b, FILE *lb, char *s, int oldfile)
883 {
884 	int i, j;
885 	int c;
886 	int col;
887 	int nc;
888 	int oneflag = (*ifdef1 != '\0') != (*ifdef2 != '\0');
889 
890 	/*
891 	 * When doing #ifdef's, copy down to current line
892 	 * if this is the first file, so that stuff makes it to output.
893 	 */
894 	if (opt == D_IFDEF && oldfile) {
895 		long curpos = ftell(lb);
896 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
897 		nc = f[a > b ? b : a - 1] - curpos;
898 		for (i = 0; i < nc; i++)
899 			putchar(getc(lb));
900 	}
901 	if (a > b)
902 		return;
903 	if (opt == D_IFDEF) {
904 		if (inifdef)
905 			fprintf(stdout, "#else /* %s%s */\n", oneflag && oldfile == 1 ? "!" : "", ifdef2);
906 		else {
907 			if (oneflag) {
908 				/* There was only one ifdef given */
909 				endifname = ifdef2;
910 				if (oldfile)
911 					fprintf(stdout, "#ifndef %s\n", endifname);
912 				else
913 					fprintf(stdout, "#ifdef %s\n", endifname);
914 			} else {
915 				endifname = oldfile ? ifdef1 : ifdef2;
916 				fprintf(stdout, "#ifdef %s\n", endifname);
917 			}
918 		}
919 		inifdef = 1 + oldfile;
920 	}
921 	for (i = a; i <= b; i++) {
922 		fseek(lb, f[i - 1], 0);
923 		nc = f[i] - f[i - 1];
924 		if (opt != D_IFDEF)
925 			prints(s);
926 		col = 0;
927 		for (j = 0; j < nc; j++) {
928 			c = getc(lb);
929 			if (c == '\t' && tflag)
930 				do
931 					putchar(' ');
932 				while (++col & 7);
933 			else {
934 				putchar(c);
935 				col++;
936 			}
937 		}
938 	}
939 
940 	if (inifdef && !wantelses) {
941 		fprintf(stdout, "#endif /* %s */\n", endifname);
942 		inifdef = 0;
943 	}
944 }
945 
946 #define POW2			/* define only if HALFLONG is 2**n */
947 #define HALFLONG 16
948 #define low(x)	(x&((1L<<HALFLONG)-1))
949 #define high(x)	(x>>HALFLONG)
950 
951 /*
952  * hashing has the effect of
953  * arranging line in 7-bit bytes and then
954  * summing 1-s complement in 16-bit hunks
955  */
956 static int
957 readhash(FILE *f)
958 {
959 	long sum;
960 	unsigned shift;
961 	int t;
962 	int space;
963 
964 	sum = 1;
965 	space = 0;
966 	if (!bflag && !wflag) {
967 		if (iflag)
968 			for (shift = 0; (t = getc(f)) != '\n'; shift += 7) {
969 				if (t == -1)
970 					return (0);
971 				sum += (long)chrtran[t] << (shift
972 #ifdef POW2
973 				    &= HALFLONG - 1);
974 #else
975 				    %= HALFLONG);
976 #endif
977 			}
978 		else
979 			for (shift = 0; (t = getc(f)) != '\n'; shift += 7) {
980 				if (t == -1)
981 					return (0);
982 				sum += (long)t << (shift
983 #ifdef POW2
984 				    &= HALFLONG - 1);
985 #else
986 				    %= HALFLONG);
987 #endif
988 			}
989 	} else {
990 		for (shift = 0;;) {
991 			switch (t = getc(f)) {
992 			case -1:
993 				return (0);
994 			case '\t':
995 			case ' ':
996 				space++;
997 				continue;
998 			default:
999 				if (space && !wflag) {
1000 					shift += 7;
1001 					space = 0;
1002 				}
1003 				sum += (long)chrtran[t] << (shift
1004 #ifdef POW2
1005 				    &= HALFLONG - 1);
1006 #else
1007 				    %= HALFLONG);
1008 #endif
1009 				shift += 7;
1010 				continue;
1011 			case '\n':
1012 				break;
1013 			}
1014 			break;
1015 		}
1016 	}
1017 	sum = low(sum) + high(sum);
1018 	return ((short) low(sum) + (short) high(sum));
1019 }
1020 
1021 static int
1022 asciifile(FILE *f)
1023 {
1024 	char buf[BUFSIZ];
1025 	int cnt;
1026 	char *cp;
1027 
1028 	fseek(f, 0, 0);
1029 	cnt = fread(buf, 1, BUFSIZ, f);
1030 	cp = buf;
1031 	while (--cnt >= 0)
1032 		if (*cp++ & 0200)
1033 			return (0);
1034 	return (1);
1035 }
1036 
1037 
1038 /* dump accumulated "context" diff changes */
1039 static void
1040 dump_context_vec(void)
1041 {
1042 	int a, b, c, d;
1043 	char ch;
1044 	struct context_vec *cvp = context_vec_start;
1045 	int lowa, upb, lowc, upd;
1046 	int do_output;
1047 
1048 	if (cvp > context_vec_ptr)
1049 		return;
1050 
1051 	b = d = 0;		/* gcc */
1052 	lowa = max(1, cvp->a - context);
1053 	upb = min(len[0], context_vec_ptr->b + context);
1054 	lowc = max(1, cvp->c - context);
1055 	upd = min(len[1], context_vec_ptr->d + context);
1056 
1057 	printf("***************\n*** ");
1058 	range(lowa, upb, ",");
1059 	printf(" ****\n");
1060 
1061 	/*
1062 	 * output changes to the "old" file.  The first loop suppresses
1063 	 * output if there were no changes to the "old" file (we'll see
1064 	 * the "old" lines as context in the "new" list).
1065 	 */
1066 	do_output = 0;
1067 	for (; cvp <= context_vec_ptr; cvp++)
1068 		if (cvp->a <= cvp->b) {
1069 			cvp = context_vec_start;
1070 			do_output++;
1071 			break;
1072 		}
1073 	if (do_output) {
1074 		while (cvp <= context_vec_ptr) {
1075 			a = cvp->a;
1076 			b = cvp->b;
1077 			c = cvp->c;
1078 			d = cvp->d;
1079 
1080 			if (a <= b && c <= d)
1081 				ch = 'c';
1082 			else
1083 				ch = (a <= b) ? 'd' : 'a';
1084 
1085 			if (ch == 'a')
1086 				fetch(ixold, lowa, b, input[0], "  ", 0);
1087 			else {
1088 				fetch(ixold, lowa, a - 1, input[0], "  ", 0);
1089 				fetch(ixold, a, b, input[0], ch == 'c' ? "! " : "- ", 0);
1090 			}
1091 			lowa = b + 1;
1092 			cvp++;
1093 		}
1094 		fetch(ixold, b + 1, upb, input[0], "  ", 0);
1095 	}
1096 	/* output changes to the "new" file */
1097 	printf("--- ");
1098 	range(lowc, upd, ",");
1099 	printf(" ----\n");
1100 
1101 	do_output = 0;
1102 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1103 		if (cvp->c <= cvp->d) {
1104 			cvp = context_vec_start;
1105 			do_output++;
1106 			break;
1107 		}
1108 	if (do_output) {
1109 		while (cvp <= context_vec_ptr) {
1110 			a = cvp->a;
1111 			b = cvp->b;
1112 			c = cvp->c;
1113 			d = cvp->d;
1114 
1115 			if (a <= b && c <= d)
1116 				ch = 'c';
1117 			else
1118 				ch = (a <= b) ? 'd' : 'a';
1119 
1120 			if (ch == 'd')
1121 				fetch(ixnew, lowc, d, input[1], "  ", 0);
1122 			else {
1123 				fetch(ixnew, lowc, c - 1, input[1], "  ", 0);
1124 				fetch(ixnew, c, d, input[1], ch == 'c' ? "! " : "+ ", 0);
1125 			}
1126 			lowc = d + 1;
1127 			cvp++;
1128 		}
1129 		fetch(ixnew, d + 1, upd, input[1], "  ", 0);
1130 	}
1131 	context_vec_ptr = context_vec_start - 1;
1132 }
1133