xref: /openbsd-src/usr.bin/diff/diffreg.c (revision 91cf91eeec978125080d4e998a2fe316f7b6c38a)
1 /*	$OpenBSD: diffreg.c,v 1.12 2003/06/25 03:53:59 deraadt Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 
37 #include <sys/types.h>
38 
39 #include <stdlib.h>
40 #include <unistd.h>
41 #include <fcntl.h>
42 #include <string.h>
43 
44 #include "diff.h"
45 #include "pathnames.h"
46 
47 #if 0
48 static char const sccsid[] = "@(#)diffreg.c 4.21 4/6/90";
49 #endif
50 
51 /*
52  * diff - compare two files.
53  */
54 
55 /*
56  *	Uses an algorithm due to Harold Stone, which finds
57  *	a pair of longest identical subsequences in the two
58  *	files.
59  *
60  *	The major goal is to generate the match vector J.
61  *	J[i] is the index of the line in file1 corresponding
62  *	to line i file0. J[i] = 0 if there is no
63  *	such line in file1.
64  *
65  *	Lines are hashed so as to work in core. All potential
66  *	matches are located by sorting the lines of each file
67  *	on the hash (called ``value''). In particular, this
68  *	collects the equivalence classes in file1 together.
69  *	Subroutine equiv replaces the value of each line in
70  *	file0 by the index of the first element of its
71  *	matching equivalence in (the reordered) file1.
72  *	To save space equiv squeezes file1 into a single
73  *	array member in which the equivalence classes
74  *	are simply concatenated, except that their first
75  *	members are flagged by changing sign.
76  *
77  *	Next the indices that point into member are unsorted into
78  *	array class according to the original order of file0.
79  *
80  *	The cleverness lies in routine stone. This marches
81  *	through the lines of file0, developing a vector klist
82  *	of "k-candidates". At step i a k-candidate is a matched
83  *	pair of lines x,y (x in file0 y in file1) such that
84  *	there is a common subsequence of length k
85  *	between the first i lines of file0 and the first y
86  *	lines of file1, but there is no such subsequence for
87  *	any smaller y. x is the earliest possible mate to y
88  *	that occurs in such a subsequence.
89  *
90  *	Whenever any of the members of the equivalence class of
91  *	lines in file1 matable to a line in file0 has serial number
92  *	less than the y of some k-candidate, that k-candidate
93  *	with the smallest such y is replaced. The new
94  *	k-candidate is chained (via pred) to the current
95  *	k-1 candidate so that the actual subsequence can
96  *	be recovered. When a member has serial number greater
97  *	that the y of all k-candidates, the klist is extended.
98  *	At the end, the longest subsequence is pulled out
99  *	and placed in the array J by unravel
100  *
101  *	With J in hand, the matches there recorded are
102  *	check'ed against reality to assure that no spurious
103  *	matches have crept in due to hashing. If they have,
104  *	they are broken, and "jackpot" is recorded--a harmless
105  *	matter except that a true match for a spuriously
106  *	mated line may now be unnecessarily reported as a change.
107  *
108  *	Much of the complexity of the program comes simply
109  *	from trying to minimize core utilization and
110  *	maximize the range of doable problems by dynamically
111  *	allocating what is needed and reusing what is not.
112  *	The core requirements for problems larger than somewhat
113  *	are (in words) 2*length(file0) + length(file1) +
114  *	3*(number of k-candidates installed),  typically about
115  *	6n words for files of length n.
116  */
117 
118 #define	prints(s)	fputs(s,stdout)
119 
120 FILE *input[2];
121 
122 struct cand {
123 	int x;
124 	int y;
125 	int pred;
126 } cand;
127 
128 struct line {
129 	int serial;
130 	int value;
131 } *file[2];
132 
133 int len[2];
134 struct line *sfile[2];	/* shortened by pruning common prefix and suffix */
135 int slen[2];
136 int pref, suff;			/* length of prefix and suffix */
137 int *class;			/* will be overlaid on file[0] */
138 int *member;			/* will be overlaid on file[1] */
139 int *klist;			/* will be overlaid on file[0] after class */
140 struct cand *clist;		/* merely a free storage pot for candidates */
141 int clen = 0;
142 int *J;				/* will be overlaid on class */
143 long *ixold;			/* will be overlaid on klist */
144 long *ixnew;			/* will be overlaid on file[1] */
145 u_char *chrtran;		/* translation table for case-folding */
146 
147 static void fetch(long *, int, int, FILE *, char *, int);
148 static void output(void);
149 static void check(void);
150 static void range(int, int, char *);
151 static void dump_context_vec(void);
152 static void prepare(int, FILE *);
153 static void prune(void);
154 static void equiv(struct line *, int, struct line *, int, int *);
155 static void unravel(int);
156 static void unsort(struct line *, int, int *);
157 static void change(int, int, int, int);
158 static void sort(struct line *, int);
159 static int newcand(int, int, int);
160 static int search(int *, int, int);
161 static int skipline(int);
162 static int asciifile(FILE *);
163 static int stone(int *, int, int *, int *);
164 static int readhash(FILE *);
165 
166 /*
167  * chrtran points to one of 2 translation tables: cup2low if folding upper to
168  * lower case clow2low if not folding case
169  */
170 u_char clow2low[256] = {
171 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
172 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
173 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
174 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
175 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
176 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
177 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
178 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
179 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
180 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
181 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
182 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
183 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
184 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
185 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
186 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
187 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
188 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
189 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
190 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
191 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
192 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
193 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
194 	0xfd, 0xfe, 0xff
195 };
196 
197 u_char cup2low[256] = {
198 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
199 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
200 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
201 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
202 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
203 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
204 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
205 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
206 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
207 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
208 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
209 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
210 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
211 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
212 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
213 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
214 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
215 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
216 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
217 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
218 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
219 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
220 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
221 	0xfd, 0xfe, 0xff
222 };
223 
224 void
225 diffreg(void)
226 {
227 	char buf1[BUFSIZ], buf2[BUFSIZ];
228 	FILE *f1, *f2;
229 	int i, j;
230 
231 	if (hflag) {
232 		diffargv[0] = "diffh";
233 		execv(diffh, diffargv);
234 		fprintf(stderr, "diff: ");
235 		perror(diffh);
236 		done(0);
237 	}
238 	chrtran = (iflag ? cup2low : clow2low);
239 	if ((stb1.st_mode & S_IFMT) == S_IFDIR) {
240 		file1 = splice(file1, file2);
241 		if (stat(file1, &stb1) < 0) {
242 			fprintf(stderr, "diff: ");
243 			perror(file1);
244 			done(0);
245 		}
246 	} else if ((stb2.st_mode & S_IFMT) == S_IFDIR) {
247 		file2 = splice(file2, file1);
248 		if (stat(file2, &stb2) < 0) {
249 			fprintf(stderr, "diff: ");
250 			perror(file2);
251 			done(0);
252 		}
253 	} else if ((stb1.st_mode & S_IFMT) != S_IFREG || !strcmp(file1, "-")) {
254 		if (!strcmp(file2, "-")) {
255 			fprintf(stderr, "diff: can't specify - -\n");
256 			done(0);
257 		}
258 		file1 = copytemp();
259 		if (stat(file1, &stb1) < 0) {
260 			fprintf(stderr, "diff: ");
261 			perror(file1);
262 			done(0);
263 		}
264 	} else if ((stb2.st_mode & S_IFMT) != S_IFREG || !strcmp(file2, "-")) {
265 		file2 = copytemp();
266 		if (stat(file2, &stb2) < 0) {
267 			fprintf(stderr, "diff: ");
268 			perror(file2);
269 			done(0);
270 		}
271 	}
272 	if ((f1 = fopen(file1, "r")) == NULL) {
273 		fprintf(stderr, "diff: ");
274 		perror(file1);
275 		done(0);
276 	}
277 	if ((f2 = fopen(file2, "r")) == NULL) {
278 		fprintf(stderr, "diff: ");
279 		perror(file2);
280 		fclose(f1);
281 		done(0);
282 	}
283 	if (stb1.st_size != stb2.st_size)
284 		goto notsame;
285 	for (;;) {
286 		i = fread(buf1, 1, BUFSIZ, f1);
287 		j = fread(buf2, 1, BUFSIZ, f2);
288 		if (i < 0 || j < 0 || i != j)
289 			goto notsame;
290 		if (i == 0 && j == 0) {
291 			fclose(f1);
292 			fclose(f2);
293 			status = 0;	/* files don't differ */
294 			goto same;
295 		}
296 		for (j = 0; j < i; j++)
297 			if (buf1[j] != buf2[j])
298 				goto notsame;
299 	}
300 notsame:
301 	/*
302 	 * Files certainly differ at this point; set status accordingly
303 	 */
304 	status = 1;
305 	if (!asciifile(f1) || !asciifile(f2)) {
306 		printf("Binary files %s and %s differ\n", file1, file2);
307 		fclose(f1);
308 		fclose(f2);
309 		done(0);
310 	}
311 	prepare(0, f1);
312 	prepare(1, f2);
313 	fclose(f1);
314 	fclose(f2);
315 	prune();
316 	sort(sfile[0], slen[0]);
317 	sort(sfile[1], slen[1]);
318 
319 	member = (int *)file[1];
320 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
321 	member = ralloc(member, (slen[1] + 2) * sizeof(int));
322 
323 	class = (int *)file[0];
324 	unsort(sfile[0], slen[0], class);
325 	class = ralloc(class, (slen[0] + 2) * sizeof(int));
326 
327 	klist = talloc((slen[0] + 2) * sizeof(int));
328 	clist = talloc(sizeof(cand));
329 	i = stone(class, slen[0], member, klist);
330 	free(member);
331 	free(class);
332 
333 	J = talloc((len[0] + 2) * sizeof(int));
334 	unravel(klist[i]);
335 	free(clist);
336 	free(klist);
337 
338 	ixold = talloc((len[0] + 2) * sizeof(long));
339 	ixnew = talloc((len[1] + 2) * sizeof(long));
340 	check();
341 	output();
342 	status = anychange;
343 same:
344 	if (opt == D_CONTEXT && anychange == 0)
345 		printf("No differences encountered\n");
346 	done(0);
347 }
348 
349 char *tempfile = _PATH_TMP;
350 
351 char *
352 copytemp(void)
353 {
354 	char buf[BUFSIZ];
355 	int i, f;
356 
357 	signal(SIGHUP, catchsig);
358 	signal(SIGINT, catchsig);
359 	signal(SIGPIPE, catchsig);
360 	signal(SIGTERM, catchsig);
361 	f = mkstemp(tempfile);
362 	if (f < 0) {
363 		fprintf(stderr, "diff: ");
364 		perror(tempfile);
365 		done(0);
366 	}
367 	while ((i = read(0, buf, BUFSIZ)) > 0)
368 		if (write(f, buf, i) != i) {
369 			fprintf(stderr, "diff: ");
370 			perror(tempfile);
371 			done(0);
372 		}
373 	close(f);
374 	return (tempfile);
375 }
376 
377 char *
378 splice(char *dir, char *file)
379 {
380 	char *tail, buf[BUFSIZ];
381 
382 	if (!strcmp(file, "-")) {
383 		fprintf(stderr, "diff: can't specify - with other arg directory\n");
384 		done(0);
385 	}
386 	tail = strrchr(file, '/');
387 	if (tail == 0)
388 		tail = file;
389 	else
390 		tail++;
391 	snprintf(buf, sizeof buf, "%s/%s", dir, tail);
392 	return (strdup(buf));
393 }
394 
395 static void
396 prepare(int i, FILE *fd)
397 {
398 	struct line *p;
399 	int j, h;
400 
401 	fseek(fd, (off_t)0, SEEK_SET);
402 	p = talloc(3 * sizeof(struct line));
403 	for (j = 0; (h = readhash(fd));) {
404 		p = ralloc(p, (++j + 3) * sizeof(struct line));
405 		p[j].value = h;
406 	}
407 	len[i] = j;
408 	file[i] = p;
409 }
410 
411 static void
412 prune(void)
413 {
414 	int i, j;
415 
416 	for (pref = 0; pref < len[0] && pref < len[1] &&
417 	    file[0][pref + 1].value == file[1][pref + 1].value;
418 	    pref++)
419 		;
420 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
421 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
422 	    suff++)
423 		;
424 	for (j = 0; j < 2; j++) {
425 		sfile[j] = file[j] + pref;
426 		slen[j] = len[j] - pref - suff;
427 		for (i = 0; i <= slen[j]; i++)
428 			sfile[j][i].serial = i;
429 	}
430 }
431 
432 static void
433 equiv(struct line *a, int n, struct line *b, int m, int *c)
434 {
435 	int i, j;
436 
437 	i = j = 1;
438 	while (i <= n && j <= m) {
439 		if (a[i].value < b[j].value)
440 			a[i++].value = 0;
441 		else if (a[i].value == b[j].value)
442 			a[i++].value = j;
443 		else
444 			j++;
445 	}
446 	while (i <= n)
447 		a[i++].value = 0;
448 	b[m + 1].value = 0;
449 	j = 0;
450 	while (++j <= m) {
451 		c[j] = -b[j].serial;
452 		while (b[j + 1].value == b[j].value) {
453 			j++;
454 			c[j] = b[j].serial;
455 		}
456 	}
457 	c[j] = -1;
458 }
459 
460 static int
461 stone(int *a, int n, int *b, int *c)
462 {
463 	int i, k, y, j, l;
464 	int oldc, tc, oldl;
465 
466 	k = 0;
467 	c[0] = newcand(0, 0, 0);
468 	for (i = 1; i <= n; i++) {
469 		j = a[i];
470 		if (j == 0)
471 			continue;
472 		y = -b[j];
473 		oldl = 0;
474 		oldc = c[0];
475 		do {
476 			if (y <= clist[oldc].y)
477 				continue;
478 			l = search(c, k, y);
479 			if (l != oldl + 1)
480 				oldc = c[l - 1];
481 			if (l <= k) {
482 				if (clist[c[l]].y <= y)
483 					continue;
484 				tc = c[l];
485 				c[l] = newcand(i, y, oldc);
486 				oldc = tc;
487 				oldl = l;
488 			} else {
489 				c[l] = newcand(i, y, oldc);
490 				k++;
491 				break;
492 			}
493 		} while ((y = b[++j]) > 0);
494 	}
495 	return (k);
496 }
497 
498 static int
499 newcand(int x, int y, int pred)
500 {
501 	struct cand *q;
502 
503 	clist = ralloc(clist, ++clen * sizeof(cand));
504 	q = clist + clen - 1;
505 	q->x = x;
506 	q->y = y;
507 	q->pred = pred;
508 	return (clen - 1);
509 }
510 
511 static int
512 search(int *c, int k, int y)
513 {
514 	int i, j, l, t;
515 
516 	if (clist[c[k]].y < y)	/* quick look for typical case */
517 		return (k + 1);
518 	i = 0;
519 	j = k + 1;
520 	while (1) {
521 		l = i + j;
522 		if ((l >>= 1) <= i)
523 			break;
524 		t = clist[c[l]].y;
525 		if (t > y)
526 			j = l;
527 		else if (t < y)
528 			i = l;
529 		else
530 			return (l);
531 	}
532 	return (l + 1);
533 }
534 
535 static void
536 unravel(int p)
537 {
538 	struct cand *q;
539 	int i;
540 
541 	for (i = 0; i <= len[0]; i++)
542 		J[i] = i <= pref ? i :
543 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
544 	for (q = clist + p; q->y != 0; q = clist + q->pred)
545 		J[q->x + pref] = q->y + pref;
546 }
547 
548 /*
549  * check does double duty: 1.  ferret out any fortuitous correspondences due
550  * to confounding by hashing (which result in "jackpot") 2.  collect random
551  * access indexes to the two files
552  */
553 
554 static void
555 check(void)
556 {
557 	int i, j, jackpot, c, d;
558 	long ctold, ctnew;
559 
560 	if ((input[0] = fopen(file1, "r")) == NULL) {
561 		perror(file1);
562 		done(0);
563 	}
564 	if ((input[1] = fopen(file2, "r")) == NULL) {
565 		perror(file2);
566 		done(0);
567 	}
568 	j = 1;
569 	ixold[0] = ixnew[0] = 0;
570 	jackpot = 0;
571 	ctold = ctnew = 0;
572 	for (i = 1; i <= len[0]; i++) {
573 		if (J[i] == 0) {
574 			ixold[i] = ctold += skipline(0);
575 			continue;
576 		}
577 		while (j < J[i]) {
578 			ixnew[j] = ctnew += skipline(1);
579 			j++;
580 		}
581 		if (bflag || wflag || iflag) {
582 			for (;;) {
583 				c = getc(input[0]);
584 				d = getc(input[1]);
585 				ctold++;
586 				ctnew++;
587 				if (bflag && isspace(c) && isspace(d)) {
588 					do {
589 						if (c == '\n')
590 							break;
591 						ctold++;
592 					} while (isspace(c = getc(input[0])));
593 					do {
594 						if (d == '\n')
595 							break;
596 						ctnew++;
597 					} while (isspace(d = getc(input[1])));
598 				} else if (wflag) {
599 					while (isspace(c) && c != '\n') {
600 						c = getc(input[0]);
601 						ctold++;
602 					}
603 					while (isspace(d) && d != '\n') {
604 						d = getc(input[1]);
605 						ctnew++;
606 					}
607 				}
608 				if (chrtran[c] != chrtran[d]) {
609 					jackpot++;
610 					J[i] = 0;
611 					if (c != '\n')
612 						ctold += skipline(0);
613 					if (d != '\n')
614 						ctnew += skipline(1);
615 					break;
616 				}
617 				if (c == '\n')
618 					break;
619 			}
620 		} else {
621 			for (;;) {
622 				ctold++;
623 				ctnew++;
624 				if ((c = getc(input[0])) != (d = getc(input[1]))) {
625 					/* jackpot++; */
626 					J[i] = 0;
627 					if (c != '\n')
628 						ctold += skipline(0);
629 					if (d != '\n')
630 						ctnew += skipline(1);
631 					break;
632 				}
633 				if (c == '\n')
634 					break;
635 			}
636 		}
637 		ixold[i] = ctold;
638 		ixnew[j] = ctnew;
639 		j++;
640 	}
641 	for (; j <= len[1]; j++) {
642 		ixnew[j] = ctnew += skipline(1);
643 	}
644 	fclose(input[0]);
645 	fclose(input[1]);
646 	/*
647 	 * if (jackpot)
648 	 *	fprintf(stderr, "jackpot\n");
649 	 */
650 }
651 
652 /* shellsort CACM #201 */
653 static void
654 sort(struct line *a, int n)
655 {
656 	struct line *ai, *aim, w;
657 	int j, m = 0, k;
658 
659 	if (n == 0)
660 		return;
661 	for (j = 1; j <= n; j *= 2)
662 		m = 2 * j - 1;
663 	for (m /= 2; m != 0; m /= 2) {
664 		k = n - m;
665 		for (j = 1; j <= k; j++) {
666 			for (ai = &a[j]; ai > a; ai -= m) {
667 				aim = &ai[m];
668 				if (aim < ai)
669 					break;	/* wraparound */
670 				if (aim->value > ai[0].value ||
671 				    (aim->value == ai[0].value &&
672 					aim->serial > ai[0].serial))
673 					break;
674 				w.value = ai[0].value;
675 				ai[0].value = aim->value;
676 				aim->value = w.value;
677 				w.serial = ai[0].serial;
678 				ai[0].serial = aim->serial;
679 				aim->serial = w.serial;
680 			}
681 		}
682 	}
683 }
684 
685 static void
686 unsort(struct line *f, int l, int *b)
687 {
688 	int *a, i;
689 
690 	a = talloc((l + 1) * sizeof(int));
691 	for (i = 1; i <= l; i++)
692 		a[f[i].serial] = f[i].value;
693 	for (i = 1; i <= l; i++)
694 		b[i] = a[i];
695 	free(a);
696 }
697 
698 static int
699 skipline(int f)
700 {
701 	int i, c;
702 
703 	for (i = 1; (c = getc(input[f])) != '\n'; i++)
704 		if (c < 0)
705 			return (i);
706 	return (i);
707 }
708 
709 static void
710 output(void)
711 {
712 	int m, i0, i1, j0, j1;
713 
714 	input[0] = fopen(file1, "r");
715 	input[1] = fopen(file2, "r");
716 	m = len[0];
717 	J[0] = 0;
718 	J[m + 1] = len[1] + 1;
719 	if (opt != D_EDIT) {
720 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
721 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
722 				i0++;
723 			j0 = J[i0 - 1] + 1;
724 			i1 = i0 - 1;
725 			while (i1 < m && J[i1 + 1] == 0)
726 				i1++;
727 			j1 = J[i1 + 1] - 1;
728 			J[i1] = j1;
729 			change(i0, i1, j0, j1);
730 		}
731 	} else {
732 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
733 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
734 				i0--;
735 			j0 = J[i0 + 1] - 1;
736 			i1 = i0 + 1;
737 			while (i1 > 1 && J[i1 - 1] == 0)
738 				i1--;
739 			j1 = J[i1 - 1] + 1;
740 			J[i1] = j1;
741 			change(i1, i0, j1, j0);
742 		}
743 	}
744 	if (m == 0)
745 		change(1, 0, 1, len[1]);
746 	if (opt == D_IFDEF) {
747 		for (;;) {
748 #define	c i0
749 			c = getc(input[0]);
750 			if (c < 0)
751 				return;
752 			putchar(c);
753 		}
754 #undef c
755 	}
756 	if (anychange && opt == D_CONTEXT)
757 		dump_context_vec();
758 }
759 
760 /*
761  * The following struct is used to record change information when
762  * doing a "context" diff.  (see routine "change" to understand the
763  * highly mneumonic field names)
764  */
765 struct context_vec {
766 	int a;			/* start line in old file */
767 	int b;			/* end line in old file */
768 	int c;			/* start line in new file */
769 	int d;			/* end line in new file */
770 };
771 
772 struct context_vec *context_vec_start, *context_vec_end, *context_vec_ptr;
773 
774 #define	MAX_CONTEXT	128
775 
776 /*
777  * indicate that there is a difference between lines a and b of the from file
778  * to get to lines c to d of the to file. If a is greater then b then there
779  * are no lines in the from file involved and this means that there were
780  * lines appended (beginning at b). If c is greater than d then there are
781  * lines missing from the to file.
782  */
783 static void
784 change(int a, int b, int c, int d)
785 {
786 	struct stat stbuf;
787 
788 	if (opt != D_IFDEF && a > b && c > d)
789 		return;
790 	if (anychange == 0) {
791 		anychange = 1;
792 		if (opt == D_CONTEXT) {
793 			printf("*** %s	", file1);
794 			stat(file1, &stbuf);
795 			printf("%s--- %s	",
796 			    ctime(&stbuf.st_mtime), file2);
797 			stat(file2, &stbuf);
798 			printf("%s", ctime(&stbuf.st_mtime));
799 
800 			context_vec_start = talloc(MAX_CONTEXT *
801 			    sizeof(struct context_vec));
802 			context_vec_end = context_vec_start + MAX_CONTEXT;
803 			context_vec_ptr = context_vec_start - 1;
804 		}
805 	}
806 	if (opt == D_CONTEXT) {
807 		/*
808 		 * if this new change is within 'context' lines of
809 		 * the previous change, just add it to the change
810 		 * record.  If the record is full or if this
811 		 * change is more than 'context' lines from the previous
812 		 * change, dump the record, reset it & add the new change.
813 		 */
814 		if (context_vec_ptr >= context_vec_end ||
815 		    (context_vec_ptr >= context_vec_start &&
816 			a > (context_vec_ptr->b + 2 * context) &&
817 			c > (context_vec_ptr->d + 2 * context)))
818 			dump_context_vec();
819 
820 		context_vec_ptr++;
821 		context_vec_ptr->a = a;
822 		context_vec_ptr->b = b;
823 		context_vec_ptr->c = c;
824 		context_vec_ptr->d = d;
825 		return;
826 	}
827 	switch (opt) {
828 
829 	case D_NORMAL:
830 	case D_EDIT:
831 		range(a, b, ",");
832 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
833 		if (opt == D_NORMAL)
834 			range(c, d, ",");
835 		putchar('\n');
836 		break;
837 	case D_REVERSE:
838 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
839 		range(a, b, " ");
840 		putchar('\n');
841 		break;
842 	case D_NREVERSE:
843 		if (a > b)
844 			printf("a%d %d\n", b, d - c + 1);
845 		else {
846 			printf("d%d %d\n", a, b - a + 1);
847 			if (!(c > d))
848 				/* add changed lines */
849 				printf("a%d %d\n", b, d - c + 1);
850 		}
851 		break;
852 	}
853 	if (opt == D_NORMAL || opt == D_IFDEF) {
854 		fetch(ixold, a, b, input[0], "< ", 1);
855 		if (a <= b && c <= d && opt == D_NORMAL)
856 			prints("---\n");
857 	}
858 	fetch(ixnew, c, d, input[1], opt == D_NORMAL ? "> " : "", 0);
859 	if ((opt == D_EDIT || opt == D_REVERSE) && c <= d)
860 		prints(".\n");
861 	if (inifdef) {
862 		fprintf(stdout, "#endif /* %s */\n", endifname);
863 		inifdef = 0;
864 	}
865 }
866 
867 static void
868 range(int a, int b, char *separator)
869 {
870 	printf("%d", a > b ? b : a);
871 	if (a < b)
872 		printf("%s%d", separator, b);
873 }
874 
875 static void
876 fetch(long *f, int a, int b, FILE *lb, char *s, int oldfile)
877 {
878 	int oneflag = (*ifdef1 != '\0') != (*ifdef2 != '\0');
879 	int i, j, c, col, nc;
880 
881 	/*
882 	 * When doing #ifdef's, copy down to current line
883 	 * if this is the first file, so that stuff makes it to output.
884 	 */
885 	if (opt == D_IFDEF && oldfile) {
886 		long curpos = ftell(lb);
887 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
888 		nc = f[a > b ? b : a - 1] - curpos;
889 		for (i = 0; i < nc; i++)
890 			putchar(getc(lb));
891 	}
892 	if (a > b)
893 		return;
894 	if (opt == D_IFDEF) {
895 		if (inifdef)
896 			fprintf(stdout, "#else /* %s%s */\n",
897 			    oneflag && oldfile == 1 ? "!" : "", ifdef2);
898 		else {
899 			if (oneflag) {
900 				/* There was only one ifdef given */
901 				endifname = ifdef2;
902 				if (oldfile)
903 					fprintf(stdout, "#ifndef %s\n", endifname);
904 				else
905 					fprintf(stdout, "#ifdef %s\n", endifname);
906 			} else {
907 				endifname = oldfile ? ifdef1 : ifdef2;
908 				fprintf(stdout, "#ifdef %s\n", endifname);
909 			}
910 		}
911 		inifdef = 1 + oldfile;
912 	}
913 	for (i = a; i <= b; i++) {
914 		fseek(lb, f[i - 1], SEEK_SET);
915 		nc = f[i] - f[i - 1];
916 		if (opt != D_IFDEF)
917 			prints(s);
918 		col = 0;
919 		for (j = 0; j < nc; j++) {
920 			c = getc(lb);
921 			if (c == '\t' && tflag)
922 				do
923 					putchar(' ');
924 				while (++col & 7)
925 					;
926 			else {
927 				putchar(c);
928 				col++;
929 			}
930 		}
931 	}
932 
933 	if (inifdef && !wantelses) {
934 		fprintf(stdout, "#endif /* %s */\n", endifname);
935 		inifdef = 0;
936 	}
937 }
938 
939 #define POW2			/* define only if HALFLONG is 2**n */
940 #define HALFLONG 16
941 #define low(x)	(x&((1L<<HALFLONG)-1))
942 #define high(x)	(x>>HALFLONG)
943 
944 /*
945  * hashing has the effect of
946  * arranging line in 7-bit bytes and then
947  * summing 1-s complement in 16-bit hunks
948  */
949 static int
950 readhash(FILE *f)
951 {
952 	unsigned int shift;
953 	int t, space;
954 	long sum;
955 
956 	sum = 1;
957 	space = 0;
958 	if (!bflag && !wflag) {
959 		if (iflag)
960 			for (shift = 0; (t = getc(f)) != '\n'; shift += 7) {
961 				if (t == -1)
962 					return (0);
963 				sum += (long)chrtran[t] << (shift
964 #ifdef POW2
965 				    &= HALFLONG - 1);
966 #else
967 				    %= HALFLONG);
968 #endif
969 			}
970 		else
971 			for (shift = 0; (t = getc(f)) != '\n'; shift += 7) {
972 				if (t == -1)
973 					return (0);
974 				sum += (long)t << (shift
975 #ifdef POW2
976 				    &= HALFLONG - 1);
977 #else
978 				    %= HALFLONG);
979 #endif
980 			}
981 	} else {
982 		for (shift = 0;;) {
983 			switch (t = getc(f)) {
984 			case -1:
985 				return (0);
986 			case '\t':
987 			case ' ':
988 				space++;
989 				continue;
990 			default:
991 				if (space && !wflag) {
992 					shift += 7;
993 					space = 0;
994 				}
995 				sum += (long)chrtran[t] << (shift
996 #ifdef POW2
997 				    &= HALFLONG - 1);
998 #else
999 				    %= HALFLONG);
1000 #endif
1001 				shift += 7;
1002 				continue;
1003 			case '\n':
1004 				break;
1005 			}
1006 			break;
1007 		}
1008 	}
1009 	sum = low(sum) + high(sum);
1010 	return ((short) low(sum) + (short) high(sum));
1011 }
1012 
1013 static int
1014 asciifile(FILE *f)
1015 {
1016 	char buf[BUFSIZ], *cp;
1017 	int cnt;
1018 
1019 	fseek(f, (off_t)0, SEEK_SET);
1020 	cnt = fread(buf, 1, BUFSIZ, f);
1021 	cp = buf;
1022 	while (--cnt >= 0)
1023 		if (*cp++ & 0200)
1024 			return (0);
1025 	return (1);
1026 }
1027 
1028 
1029 /* dump accumulated "context" diff changes */
1030 static void
1031 dump_context_vec(void)
1032 {
1033 	struct context_vec *cvp = context_vec_start;
1034 	int lowa, upb, lowc, upd, do_output;
1035 	int a, b, c, d;
1036 	char ch;
1037 
1038 	if (cvp > context_vec_ptr)
1039 		return;
1040 
1041 	b = d = 0;		/* gcc */
1042 	lowa = max(1, cvp->a - context);
1043 	upb = min(len[0], context_vec_ptr->b + context);
1044 	lowc = max(1, cvp->c - context);
1045 	upd = min(len[1], context_vec_ptr->d + context);
1046 
1047 	printf("***************\n*** ");
1048 	range(lowa, upb, ",");
1049 	printf(" ****\n");
1050 
1051 	/*
1052 	 * output changes to the "old" file.  The first loop suppresses
1053 	 * output if there were no changes to the "old" file (we'll see
1054 	 * the "old" lines as context in the "new" list).
1055 	 */
1056 	do_output = 0;
1057 	for (; cvp <= context_vec_ptr; cvp++)
1058 		if (cvp->a <= cvp->b) {
1059 			cvp = context_vec_start;
1060 			do_output++;
1061 			break;
1062 		}
1063 	if (do_output) {
1064 		while (cvp <= context_vec_ptr) {
1065 			a = cvp->a;
1066 			b = cvp->b;
1067 			c = cvp->c;
1068 			d = cvp->d;
1069 
1070 			if (a <= b && c <= d)
1071 				ch = 'c';
1072 			else
1073 				ch = (a <= b) ? 'd' : 'a';
1074 
1075 			if (ch == 'a')
1076 				fetch(ixold, lowa, b, input[0], "  ", 0);
1077 			else {
1078 				fetch(ixold, lowa, a - 1, input[0], "  ", 0);
1079 				fetch(ixold, a, b, input[0],
1080 				    ch == 'c' ? "! " : "- ", 0);
1081 			}
1082 			lowa = b + 1;
1083 			cvp++;
1084 		}
1085 		fetch(ixold, b + 1, upb, input[0], "  ", 0);
1086 	}
1087 	/* output changes to the "new" file */
1088 	printf("--- ");
1089 	range(lowc, upd, ",");
1090 	printf(" ----\n");
1091 
1092 	do_output = 0;
1093 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1094 		if (cvp->c <= cvp->d) {
1095 			cvp = context_vec_start;
1096 			do_output++;
1097 			break;
1098 		}
1099 	if (do_output) {
1100 		while (cvp <= context_vec_ptr) {
1101 			a = cvp->a;
1102 			b = cvp->b;
1103 			c = cvp->c;
1104 			d = cvp->d;
1105 
1106 			if (a <= b && c <= d)
1107 				ch = 'c';
1108 			else
1109 				ch = (a <= b) ? 'd' : 'a';
1110 
1111 			if (ch == 'd')
1112 				fetch(ixnew, lowc, d, input[1], "  ", 0);
1113 			else {
1114 				fetch(ixnew, lowc, c - 1, input[1], "  ", 0);
1115 				fetch(ixnew, c, d, input[1],
1116 				    ch == 'c' ? "! " : "+ ", 0);
1117 			}
1118 			lowc = d + 1;
1119 			cvp++;
1120 		}
1121 		fetch(ixnew, d + 1, upd, input[1], "  ", 0);
1122 	}
1123 	context_vec_ptr = context_vec_start - 1;
1124 }
1125