xref: /openbsd-src/usr.bin/diff3/diff3prog.c (revision f9bbbf4534171e957bb7652e706dd78782fbffa2)
1 /*	$OpenBSD: diff3prog.c,v 1.12 2012/03/04 04:05:15 fgsch Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diff3.c	8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <stdio.h>
68 #include <stdlib.h>
69 #include <string.h>
70 #include <ctype.h>
71 #include <err.h>
72 
73 /* diff3 - 3-way differential file comparison */
74 
75 /* diff3 [-ex3EX] d13 d23 f1 f2 f3 [m1 m3]
76  *
77  * d13 = diff report on f1 vs f3
78  * d23 = diff report on f2 vs f3
79  * f1, f2, f3 the 3 files
80  * if changes in f1 overlap with changes in f3, m1 and m3 are used
81  * to mark the overlaps; otherwise, the file names f1 and f3 are used
82  * (only for options E and X).
83  */
84 
85 /*
86  * "from" is first in range of changed lines; "to" is last+1
87  * from=to=line after point of insertion for added lines.
88  */
89 struct  range {
90 	int from;
91 	int to;
92 };
93 struct diff {
94 	struct range old;
95 	struct range new;
96 };
97 
98 size_t szchanges;
99 
100 struct diff *d13;
101 struct diff *d23;
102 /*
103  * "de" is used to gather editing scripts.  These are later spewed out in
104  * reverse order.  Its first element must be all zero, the "new" component
105  * of "de" contains line positions or byte positions depending on when you
106  * look (!?).  Array overlap indicates which sections in "de" correspond to
107  * lines that are different in all three files.
108  */
109 struct diff *de;
110 char *overlap;
111 int  overlapcnt;
112 FILE *fp[3];
113 int cline[3];		/* # of the last-read line in each file (0-2) */
114 /*
115  * the latest known correspondence between line numbers of the 3 files
116  * is stored in last[1-3];
117  */
118 int last[4];
119 int eflag;
120 int oflag;		/* indicates whether to mark overlaps (-E or -X)*/
121 int debug  = 0;
122 char f1mark[40], f3mark[40];	/* markers for -E and -X */
123 
124 int duplicate(struct range *, struct range *);
125 int edit(struct diff *, int, int);
126 char *getchange(FILE *);
127 char *get_line(FILE *, size_t *);
128 int number(char **);
129 int readin(char *, struct diff **);
130 int skip(int, int, char *);
131 void change(int, struct range *, int);
132 void keep(int, struct range *);
133 void merge(int, int);
134 void prange(struct range *);
135 void repos(int);
136 void separate(const char *);
137 __dead void edscript(int);
138 __dead void trouble(void);
139 void increase(void);
140 __dead void usage(void);
141 
142 int
143 main(int argc, char **argv)
144 {
145 	int ch, i, m, n;
146 
147 	eflag = 0;
148 	oflag = 0;
149 	while ((ch = getopt(argc, argv, "EeXx3")) != -1) {
150 		switch (ch) {
151 		case 'E':
152 			eflag = 3;
153 			oflag = 1;
154 			break;
155 		case 'e':
156 			eflag = 3;
157 			break;
158 		case 'X':
159 			oflag = eflag = 1;
160 			break;
161 		case 'x':
162 			eflag = 1;
163 			break;
164 		case '3':
165 			eflag = 2;
166 			break;
167 		}
168 	}
169 	argc -= optind;
170 	argv += optind;
171 	/* XXX - argc usage seems wrong here */
172 	if (argc < 5)
173 		usage();
174 
175 	if (oflag) {
176 		(void)snprintf(f1mark, sizeof(f1mark), "<<<<<<< %s",
177 		    argc >= 6 ? argv[5] : argv[2]);
178 		(void)snprintf(f3mark, sizeof(f3mark), ">>>>>>> %s",
179 		    argc >= 7 ? argv[6] : argv[4]);
180 	}
181 
182 	increase();
183 	m = readin(argv[0], &d13);
184 	n = readin(argv[1], &d23);
185 	for (i = 0; i <= 2; i++) {
186 		if ((fp[i] = fopen(argv[i + 2], "r")) == NULL)
187 			err(EXIT_FAILURE, "can't open %s", argv[i + 2]);
188 	}
189 	merge(m, n);
190 	exit(EXIT_SUCCESS);
191 }
192 
193 /*
194  * Pick up the line numbers of all changes from one change file.
195  * (This puts the numbers in a vector, which is not strictly necessary,
196  * since the vector is processed in one sequential pass.
197  * The vector could be optimized out of existence)
198  */
199 int
200 readin(char *name, struct diff **dd)
201 {
202 	int a, b, c, d, i;
203 	char kind, *p;
204 
205 	fp[0] = fopen(name, "r");
206 	if (fp[0] == NULL)
207 		err(EXIT_FAILURE, "can't open %s", name);
208 	for (i=0; (p = getchange(fp[0])); i++) {
209 		if (i >= szchanges - 1)
210 			increase();
211 		a = b = number(&p);
212 		if (*p == ',') {
213 			p++;
214 			b = number(&p);
215 		}
216 		kind = *p++;
217 		c = d = number(&p);
218 		if (*p==',') {
219 			p++;
220 			d = number(&p);
221 		}
222 		if (kind == 'a')
223 			a++;
224 		if (kind == 'd')
225 			c++;
226 		b++;
227 		d++;
228 		(*dd)[i].old.from = a;
229 		(*dd)[i].old.to = b;
230 		(*dd)[i].new.from = c;
231 		(*dd)[i].new.to = d;
232 	}
233 	if (i) {
234 		(*dd)[i].old.from = (*dd)[i-1].old.to;
235 		(*dd)[i].new.from = (*dd)[i-1].new.to;
236 	}
237 	(void)fclose(fp[0]);
238 	return (i);
239 }
240 
241 int
242 number(char **lc)
243 {
244 	int nn;
245 	nn = 0;
246 	while (isdigit((unsigned char)(**lc)))
247 		nn = nn*10 + *(*lc)++ - '0';
248 	return (nn);
249 }
250 
251 char *
252 getchange(FILE *b)
253 {
254 	char *line;
255 
256 	while ((line = get_line(b, NULL))) {
257 		if (isdigit((unsigned char)line[0]))
258 			return (line);
259 	}
260 	return (NULL);
261 }
262 
263 char *
264 get_line(FILE *b, size_t *n)
265 {
266 	char *cp;
267 	size_t len;
268 	static char *buf;
269 	static size_t bufsize;
270 
271 	if ((cp = fgetln(b, &len)) == NULL)
272 		return (NULL);
273 
274 	if (cp[len - 1] != '\n')
275 		len++;
276 	if (len + 1 > bufsize) {
277 		do {
278 			bufsize += 1024;
279 		} while (len + 1 > bufsize);
280 		if ((buf = realloc(buf, bufsize)) == NULL)
281 			err(EXIT_FAILURE, NULL);
282 	}
283 	memcpy(buf, cp, len - 1);
284 	buf[len - 1] = '\n';
285 	buf[len] = '\0';
286 	if (n != NULL)
287 		*n = len;
288 	return (buf);
289 }
290 
291 void
292 merge(int m1, int m2)
293 {
294 	struct diff *d1, *d2, *d3;
295 	int dup, j, t1, t2;
296 
297 	d1 = d13;
298 	d2 = d23;
299 	j = 0;
300 	while ((t1 = d1 < d13 + m1) | (t2 = d2 < d23 + m2)) {
301 		if (debug) {
302 			printf("%d,%d=%d,%d %d,%d=%d,%d\n",
303 			d1->old.from,d1->old.to,
304 			d1->new.from,d1->new.to,
305 			d2->old.from,d2->old.to,
306 			d2->new.from,d2->new.to);
307 		}
308 		/* first file is different from others */
309 		if (!t2 || (t1 && d1->new.to < d2->new.from)) {
310 			/* stuff peculiar to 1st file */
311 			if (eflag==0) {
312 				separate("1");
313 				change(1, &d1->old, 0);
314 				keep(2, &d1->new);
315 				change(3, &d1->new, 0);
316 			}
317 			d1++;
318 			continue;
319 		}
320 		/* second file is different from others */
321 		if (!t1 || (t2 && d2->new.to < d1->new.from)) {
322 			if (eflag==0) {
323 				separate("2");
324 				keep(1, &d2->new);
325 				change(2, &d2->old, 0);
326 				change(3, &d2->new, 0);
327 			}
328 			d2++;
329 			continue;
330 		}
331 		/*
332 		 * Merge overlapping changes in first file
333 		 * this happens after extension (see below).
334 		 */
335 		if (d1 + 1 < d13 + m1 && d1->new.to >= d1[1].new.from) {
336 			d1[1].old.from = d1->old.from;
337 			d1[1].new.from = d1->new.from;
338 			d1++;
339 			continue;
340 		}
341 
342 		/* merge overlapping changes in second */
343 		if (d2 + 1 < d23 + m2 && d2->new.to >= d2[1].new.from) {
344 			d2[1].old.from = d2->old.from;
345 			d2[1].new.from = d2->new.from;
346 			d2++;
347 			continue;
348 		}
349 		/* stuff peculiar to third file or different in all */
350 		if (d1->new.from == d2->new.from && d1->new.to == d2->new.to) {
351 			dup = duplicate(&d1->old,&d2->old);
352 			/*
353 			 * dup = 0 means all files differ
354 			 * dup = 1 means files 1 and 2 identical
355 			 */
356 			if (eflag==0) {
357 				separate(dup ? "3" : "");
358 				change(1, &d1->old, dup);
359 				change(2, &d2->old, 0);
360 				d3 = d1->old.to > d1->old.from ? d1 : d2;
361 				change(3, &d3->new, 0);
362 			} else
363 				j = edit(d1, dup, j);
364 			d1++;
365 			d2++;
366 			continue;
367 		}
368 		/*
369 		 * Overlapping changes from file 1 and 2; extend changes
370 		 * appropriately to make them coincide.
371 		 */
372 		if (d1->new.from < d2->new.from) {
373 			d2->old.from -= d2->new.from-d1->new.from;
374 			d2->new.from = d1->new.from;
375 		} else if (d2->new.from < d1->new.from) {
376 			d1->old.from -= d1->new.from-d2->new.from;
377 			d1->new.from = d2->new.from;
378 		}
379 		if (d1->new.to > d2->new.to) {
380 			d2->old.to += d1->new.to - d2->new.to;
381 			d2->new.to = d1->new.to;
382 		} else if (d2->new.to > d1->new.to) {
383 			d1->old.to += d2->new.to - d1->new.to;
384 			d1->new.to = d2->new.to;
385 		}
386 	}
387 	if (eflag)
388 		edscript(j);
389 }
390 
391 void
392 separate(const char *s)
393 {
394 	printf("====%s\n", s);
395 }
396 
397 /*
398  * The range of lines rold.from thru rold.to in file i is to be changed.
399  * It is to be printed only if it does not duplicate something to be
400  * printed later.
401  */
402 void
403 change(int i, struct range *rold, int dup)
404 {
405 	printf("%d:", i);
406 	last[i] = rold->to;
407 	prange(rold);
408 	if (dup || debug)
409 		return;
410 	i--;
411 	(void)skip(i, rold->from, NULL);
412 	(void)skip(i, rold->to, "  ");
413 }
414 
415 /*
416  * print the range of line numbers, rold.from thru rold.to, as n1,n2 or n1
417  */
418 void
419 prange(struct range *rold)
420 {
421 	if (rold->to <= rold->from)
422 		printf("%da\n", rold->from - 1);
423 	else {
424 		printf("%d", rold->from);
425 		if (rold->to > rold->from+1)
426 			printf(",%d", rold->to - 1);
427 		printf("c\n");
428 	}
429 }
430 
431 /*
432  * No difference was reported by diff between file 1 (or 2) and file 3,
433  * and an artificial dummy difference (trange) must be ginned up to
434  * correspond to the change reported in the other file.
435  */
436 void
437 keep(int i, struct range *rnew)
438 {
439 	int delta;
440 	struct range trange;
441 
442 	delta = last[3] - last[i];
443 	trange.from = rnew->from - delta;
444 	trange.to = rnew->to - delta;
445 	change(i, &trange, 1);
446 }
447 
448 /*
449  * skip to just before line number from in file "i".  If "pr" is non-NULL,
450  * print all skipped stuff with string pr as a prefix.
451  */
452 int
453 skip(int i, int from, char *pr)
454 {
455 	size_t j, n;
456 	char *line;
457 
458 	for (n = 0; cline[i] < from - 1; n += j) {
459 		if ((line = get_line(fp[i], &j)) == NULL)
460 			trouble();
461 		if (pr != NULL)
462 			printf("%s%s", pr, line);
463 		cline[i]++;
464 	}
465 	return ((int) n);
466 }
467 
468 /*
469  * Return 1 or 0 according as the old range (in file 1) contains exactly
470  * the same data as the new range (in file 2).
471  */
472 int
473 duplicate(struct range *r1, struct range *r2)
474 {
475 	int c,d;
476 	int nchar;
477 	int nline;
478 
479 	if (r1->to-r1->from != r2->to-r2->from)
480 		return (0);
481 	(void)skip(0, r1->from, NULL);
482 	(void)skip(1, r2->from, NULL);
483 	nchar = 0;
484 	for (nline=0; nline < r1->to - r1->from; nline++) {
485 		do {
486 			c = getc(fp[0]);
487 			d = getc(fp[1]);
488 			if (c == -1 || d== -1)
489 				trouble();
490 			nchar++;
491 			if (c != d) {
492 				repos(nchar);
493 				return (0);
494 			}
495 		} while (c != '\n');
496 	}
497 	repos(nchar);
498 	return (1);
499 }
500 
501 void
502 repos(int nchar)
503 {
504 	int i;
505 
506 	for (i = 0; i < 2; i++)
507 		(void)fseek(fp[i], (long)-nchar, SEEK_CUR);
508 }
509 
510 __dead void
511 trouble(void)
512 {
513 	errx(EXIT_FAILURE, "logic error");
514 }
515 
516 /*
517  * collect an editing script for later regurgitation
518  */
519 int
520 edit(struct diff *diff, int dup, int j)
521 {
522 	if (((dup + 1) & eflag) == 0)
523 		return (j);
524 	j++;
525 	overlap[j] = !dup;
526 	if (!dup)
527 		overlapcnt++;
528 	de[j].old.from = diff->old.from;
529 	de[j].old.to = diff->old.to;
530 	de[j].new.from = de[j-1].new.to + skip(2, diff->new.from, NULL);
531 	de[j].new.to = de[j].new.from + skip(2, diff->new.to, NULL);
532 	return (j);
533 }
534 
535 /* regurgitate */
536 __dead void
537 edscript(int n)
538 {
539 	int j,k;
540 	char block[BUFSIZ];
541 
542 	for (n = n; n > 0; n--) {
543 		if (!oflag || !overlap[n])
544 			prange(&de[n].old);
545 		else
546 			printf("%da\n=======\n", de[n].old.to -1);
547 		(void)fseek(fp[2], (long)de[n].new.from, SEEK_SET);
548 		for (k = de[n].new.to-de[n].new.from; k > 0; k-= j) {
549 			j = k > BUFSIZ ? BUFSIZ : k;
550 			if (fread(block, 1, j, fp[2]) != j)
551 				trouble();
552 			(void)fwrite(block, 1, j, stdout);
553 		}
554 		if (!oflag || !overlap[n])
555 			printf(".\n");
556 		else {
557 			printf("%s\n.\n", f3mark);
558 			printf("%da\n%s\n.\n", de[n].old.from - 1, f1mark);
559 		}
560 	}
561 	exit(overlapcnt);
562 }
563 
564 void
565 increase(void)
566 {
567 	struct diff *p;
568 	char *q;
569 	size_t newsz, incr;
570 
571 	/* are the memset(3) calls needed? */
572 	newsz = szchanges == 0 ? 64 : 2 * szchanges;
573 	incr = newsz - szchanges;
574 
575 	p = realloc(d13, newsz * sizeof(struct diff));
576 	if (p == NULL)
577 		err(1, NULL);
578 	memset(p + szchanges, 0, incr * sizeof(struct diff));
579 	d13 = p;
580 	p = realloc(d23, newsz * sizeof(struct diff));
581 	if (p == NULL)
582 		err(1, NULL);
583 	memset(p + szchanges, 0, incr * sizeof(struct diff));
584 	d23 = p;
585 	p = realloc(de, newsz * sizeof(struct diff));
586 	if (p == NULL)
587 		err(1, NULL);
588 	memset(p + szchanges, 0, incr * sizeof(struct diff));
589 	de = p;
590 	q = realloc(overlap, newsz * sizeof(char));
591 	if (q == NULL)
592 		err(1, NULL);
593 	memset(q + szchanges, 0, incr * sizeof(char));
594 	overlap = q;
595 	szchanges = newsz;
596 }
597 
598 
599 __dead void
600 usage(void)
601 {
602 	extern char *__progname;
603 
604 	fprintf(stderr, "usage: %s [-exEX3] /tmp/d3a.?????????? "
605 	    "/tmp/d3b.?????????? file1 file2 file3\n", __progname);
606 	exit(EXIT_FAILURE);
607 }
608