xref: /openbsd-src/usr.bin/diff3/diff3prog.c (revision de8cc8edbc71bd3e3bc7fbffa27ba0e564c37d8b)
1 /*	$OpenBSD: diff3prog.c,v 1.20 2020/06/26 07:28:47 stsp Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diff3.c	8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <ctype.h>
68 #include <err.h>
69 #include <stdio.h>
70 #include <stdlib.h>
71 #include <string.h>
72 #include <unistd.h>
73 
74 /* diff3 - 3-way differential file comparison */
75 
76 /* diff3 [-ex3EX] d13 d23 f1 f2 f3 [m1 m3]
77  *
78  * d13 = diff report on f1 vs f3
79  * d23 = diff report on f2 vs f3
80  * f1, f2, f3 the 3 files
81  * if changes in f1 overlap with changes in f3, m1 and m3 are used
82  * to mark the overlaps; otherwise, the file names f1 and f3 are used
83  * (only for options E and X).
84  */
85 
86 /*
87  * "from" is first in range of changed lines; "to" is last+1
88  * from=to=line after point of insertion for added lines.
89  */
90 struct  range {
91 	int from;
92 	int to;
93 };
94 struct diff {
95 	struct range old;
96 	struct range new;
97 };
98 
99 size_t szchanges;
100 
101 struct diff *d13;
102 struct diff *d23;
103 /*
104  * "de" is used to gather editing scripts.  These are later spewed out in
105  * reverse order.  Its first element must be all zero, the "new" component
106  * of "de" contains line positions or byte positions depending on when you
107  * look (!?).  Array overlap indicates which sections in "de" correspond to
108  * lines that are different in all three files.
109  */
110 struct diff *de;
111 char *overlap;
112 int  overlapcnt;
113 FILE *fp[3];
114 int cline[3];		/* # of the last-read line in each file (0-2) */
115 /*
116  * the latest known correspondence between line numbers of the 3 files
117  * is stored in last[1-3];
118  */
119 int last[4];
120 int eflag;
121 int oflag;		/* indicates whether to mark overlaps (-E or -X)*/
122 int debug  = 0;
123 char f1mark[40], f3mark[40];	/* markers for -E and -X */
124 
125 int duplicate(struct range *, struct range *);
126 int edit(struct diff *, int, int);
127 char *getchange(FILE *);
128 char *get_line(FILE *, size_t *);
129 int number(char **);
130 int readin(char *, struct diff **);
131 int skip(int, int, char *);
132 void change(int, struct range *, int);
133 void keep(int, struct range *);
134 void merge(int, int);
135 void prange(struct range *);
136 void repos(int);
137 void separate(const char *);
138 __dead void edscript(int);
139 __dead void trouble(void);
140 void increase(void);
141 __dead void usage(void);
142 
143 int
144 main(int argc, char **argv)
145 {
146 	int ch, i, m, n;
147 
148 	if (pledge("stdio rpath", NULL) == -1)
149 		err(1, "pledge");
150 
151 	eflag = 0;
152 	oflag = 0;
153 	while ((ch = getopt(argc, argv, "EeXx3")) != -1) {
154 		switch (ch) {
155 		case 'E':
156 			eflag = 3;
157 			oflag = 1;
158 			break;
159 		case 'e':
160 			eflag = 3;
161 			break;
162 		case 'X':
163 			oflag = eflag = 1;
164 			break;
165 		case 'x':
166 			eflag = 1;
167 			break;
168 		case '3':
169 			eflag = 2;
170 			break;
171 		}
172 	}
173 	argc -= optind;
174 	argv += optind;
175 	/* XXX - argc usage seems wrong here */
176 	if (argc < 5)
177 		usage();
178 
179 	if (oflag) {
180 		(void)snprintf(f1mark, sizeof(f1mark), "<<<<<<< %s",
181 		    argc >= 6 ? argv[5] : argv[2]);
182 		(void)snprintf(f3mark, sizeof(f3mark), ">>>>>>> %s",
183 		    argc >= 7 ? argv[6] : argv[4]);
184 	}
185 
186 	increase();
187 	m = readin(argv[0], &d13);
188 	n = readin(argv[1], &d23);
189 	for (i = 0; i <= 2; i++) {
190 		if ((fp[i] = fopen(argv[i + 2], "r")) == NULL)
191 			err(EXIT_FAILURE, "can't open %s", argv[i + 2]);
192 	}
193 	merge(m, n);
194 	exit(EXIT_SUCCESS);
195 }
196 
197 /*
198  * Pick up the line numbers of all changes from one change file.
199  * (This puts the numbers in a vector, which is not strictly necessary,
200  * since the vector is processed in one sequential pass.
201  * The vector could be optimized out of existence)
202  */
203 int
204 readin(char *name, struct diff **dd)
205 {
206 	int a, b, c, d, i;
207 	char kind, *p;
208 
209 	fp[0] = fopen(name, "r");
210 	if (fp[0] == NULL)
211 		err(EXIT_FAILURE, "can't open %s", name);
212 	for (i=0; (p = getchange(fp[0])); i++) {
213 		if (i >= szchanges - 1)
214 			increase();
215 		a = b = number(&p);
216 		if (*p == ',') {
217 			p++;
218 			b = number(&p);
219 		}
220 		kind = *p++;
221 		c = d = number(&p);
222 		if (*p==',') {
223 			p++;
224 			d = number(&p);
225 		}
226 		if (kind == 'a')
227 			a++;
228 		if (kind == 'd')
229 			c++;
230 		b++;
231 		d++;
232 		(*dd)[i].old.from = a;
233 		(*dd)[i].old.to = b;
234 		(*dd)[i].new.from = c;
235 		(*dd)[i].new.to = d;
236 	}
237 	if (i) {
238 		(*dd)[i].old.from = (*dd)[i-1].old.to;
239 		(*dd)[i].new.from = (*dd)[i-1].new.to;
240 	}
241 	(void)fclose(fp[0]);
242 	return (i);
243 }
244 
245 int
246 number(char **lc)
247 {
248 	int nn;
249 	nn = 0;
250 	while (isdigit((unsigned char)(**lc)))
251 		nn = nn*10 + *(*lc)++ - '0';
252 	return (nn);
253 }
254 
255 char *
256 getchange(FILE *b)
257 {
258 	char *line;
259 
260 	while ((line = get_line(b, NULL))) {
261 		if (isdigit((unsigned char)line[0]))
262 			return (line);
263 	}
264 	return (NULL);
265 }
266 
267 char *
268 get_line(FILE *b, size_t *n)
269 {
270 	char *cp;
271 	size_t len;
272 	static char *buf;
273 	static size_t bufsize;
274 
275 	if ((cp = fgetln(b, &len)) == NULL)
276 		return (NULL);
277 
278 	if (cp[len - 1] != '\n')
279 		len++;
280 	if (len + 1 > bufsize) {
281 		do {
282 			bufsize += 1024;
283 		} while (len + 1 > bufsize);
284 		if ((buf = realloc(buf, bufsize)) == NULL)
285 			err(EXIT_FAILURE, NULL);
286 	}
287 	memcpy(buf, cp, len - 1);
288 	buf[len - 1] = '\n';
289 	buf[len] = '\0';
290 	if (n != NULL)
291 		*n = len;
292 	return (buf);
293 }
294 
295 void
296 merge(int m1, int m2)
297 {
298 	struct diff *d1, *d2, *d3;
299 	int dup, j, t1, t2;
300 
301 	d1 = d13;
302 	d2 = d23;
303 	j = 0;
304 	for (;;) {
305 		t1 = (d1 < d13 + m1);
306 		t2 = (d2 < d23 + m2);
307 		if (!t1 && !t2)
308 			break;
309 
310 		if (debug) {
311 			printf("%d,%d=%d,%d %d,%d=%d,%d\n",
312 			d1->old.from,d1->old.to,
313 			d1->new.from,d1->new.to,
314 			d2->old.from,d2->old.to,
315 			d2->new.from,d2->new.to);
316 		}
317 		/* first file is different from others */
318 		if (!t2 || (t1 && d1->new.to < d2->new.from)) {
319 			/* stuff peculiar to 1st file */
320 			if (eflag==0) {
321 				separate("1");
322 				change(1, &d1->old, 0);
323 				keep(2, &d1->new);
324 				change(3, &d1->new, 0);
325 			}
326 			d1++;
327 			continue;
328 		}
329 		/* second file is different from others */
330 		if (!t1 || (t2 && d2->new.to < d1->new.from)) {
331 			if (eflag==0) {
332 				separate("2");
333 				keep(1, &d2->new);
334 				change(2, &d2->old, 0);
335 				change(3, &d2->new, 0);
336 			}
337 			d2++;
338 			continue;
339 		}
340 		/*
341 		 * Merge overlapping changes in first file
342 		 * this happens after extension (see below).
343 		 */
344 		if (d1 + 1 < d13 + m1 && d1->new.to >= d1[1].new.from) {
345 			d1[1].old.from = d1->old.from;
346 			d1[1].new.from = d1->new.from;
347 			d1++;
348 			continue;
349 		}
350 
351 		/* merge overlapping changes in second */
352 		if (d2 + 1 < d23 + m2 && d2->new.to >= d2[1].new.from) {
353 			d2[1].old.from = d2->old.from;
354 			d2[1].new.from = d2->new.from;
355 			d2++;
356 			continue;
357 		}
358 		/* stuff peculiar to third file or different in all */
359 		if (d1->new.from == d2->new.from && d1->new.to == d2->new.to) {
360 			dup = duplicate(&d1->old,&d2->old);
361 			/*
362 			 * dup = 0 means all files differ
363 			 * dup = 1 means files 1 and 2 identical
364 			 */
365 			if (eflag==0) {
366 				separate(dup ? "3" : "");
367 				change(1, &d1->old, dup);
368 				change(2, &d2->old, 0);
369 				d3 = d1->old.to > d1->old.from ? d1 : d2;
370 				change(3, &d3->new, 0);
371 			} else
372 				j = edit(d1, dup, j);
373 			d1++;
374 			d2++;
375 			continue;
376 		}
377 		/*
378 		 * Overlapping changes from file 1 and 2; extend changes
379 		 * appropriately to make them coincide.
380 		 */
381 		if (d1->new.from < d2->new.from) {
382 			d2->old.from -= d2->new.from-d1->new.from;
383 			d2->new.from = d1->new.from;
384 		} else if (d2->new.from < d1->new.from) {
385 			d1->old.from -= d1->new.from-d2->new.from;
386 			d1->new.from = d2->new.from;
387 		}
388 		if (d1->new.to > d2->new.to) {
389 			d2->old.to += d1->new.to - d2->new.to;
390 			d2->new.to = d1->new.to;
391 		} else if (d2->new.to > d1->new.to) {
392 			d1->old.to += d2->new.to - d1->new.to;
393 			d1->new.to = d2->new.to;
394 		}
395 	}
396 	if (eflag)
397 		edscript(j);
398 }
399 
400 void
401 separate(const char *s)
402 {
403 	printf("====%s\n", s);
404 }
405 
406 /*
407  * The range of lines rold.from thru rold.to in file i is to be changed.
408  * It is to be printed only if it does not duplicate something to be
409  * printed later.
410  */
411 void
412 change(int i, struct range *rold, int dup)
413 {
414 	printf("%d:", i);
415 	last[i] = rold->to;
416 	prange(rold);
417 	if (dup || debug)
418 		return;
419 	i--;
420 	(void)skip(i, rold->from, NULL);
421 	(void)skip(i, rold->to, "  ");
422 }
423 
424 /*
425  * print the range of line numbers, rold.from thru rold.to, as n1,n2 or n1
426  */
427 void
428 prange(struct range *rold)
429 {
430 	if (rold->to <= rold->from)
431 		printf("%da\n", rold->from - 1);
432 	else {
433 		printf("%d", rold->from);
434 		if (rold->to > rold->from+1)
435 			printf(",%d", rold->to - 1);
436 		printf("c\n");
437 	}
438 }
439 
440 /*
441  * No difference was reported by diff between file 1 (or 2) and file 3,
442  * and an artificial dummy difference (trange) must be ginned up to
443  * correspond to the change reported in the other file.
444  */
445 void
446 keep(int i, struct range *rnew)
447 {
448 	int delta;
449 	struct range trange;
450 
451 	delta = last[3] - last[i];
452 	trange.from = rnew->from - delta;
453 	trange.to = rnew->to - delta;
454 	change(i, &trange, 1);
455 }
456 
457 /*
458  * skip to just before line number from in file "i".  If "pr" is non-NULL,
459  * print all skipped stuff with string pr as a prefix.
460  */
461 int
462 skip(int i, int from, char *pr)
463 {
464 	size_t j, n;
465 	char *line;
466 
467 	for (n = 0; cline[i] < from - 1; n += j) {
468 		if ((line = get_line(fp[i], &j)) == NULL)
469 			trouble();
470 		if (pr != NULL)
471 			printf("%s%s", pr, line);
472 		cline[i]++;
473 	}
474 	return ((int) n);
475 }
476 
477 /*
478  * Return 1 or 0 according as the old range (in file 1) contains exactly
479  * the same data as the new range (in file 2).
480  */
481 int
482 duplicate(struct range *r1, struct range *r2)
483 {
484 	int c,d;
485 	int nchar;
486 	int nline;
487 
488 	if (r1->to-r1->from != r2->to-r2->from)
489 		return (0);
490 	(void)skip(0, r1->from, NULL);
491 	(void)skip(1, r2->from, NULL);
492 	nchar = 0;
493 	for (nline=0; nline < r1->to - r1->from; nline++) {
494 		do {
495 			c = getc(fp[0]);
496 			d = getc(fp[1]);
497 			if (c == -1 || d== -1)
498 				trouble();
499 			nchar++;
500 			if (c != d) {
501 				repos(nchar);
502 				return (0);
503 			}
504 		} while (c != '\n');
505 	}
506 	repos(nchar);
507 	return (1);
508 }
509 
510 void
511 repos(int nchar)
512 {
513 	int i;
514 
515 	for (i = 0; i < 2; i++)
516 		(void)fseek(fp[i], (long)-nchar, SEEK_CUR);
517 }
518 
519 __dead void
520 trouble(void)
521 {
522 	errx(EXIT_FAILURE, "logic error");
523 }
524 
525 /*
526  * collect an editing script for later regurgitation
527  */
528 int
529 edit(struct diff *diff, int dup, int j)
530 {
531 	if (((dup + 1) & eflag) == 0)
532 		return (j);
533 	j++;
534 	overlap[j] = !dup;
535 	if (!dup)
536 		overlapcnt++;
537 	de[j].old.from = diff->old.from;
538 	de[j].old.to = diff->old.to;
539 	de[j].new.from = de[j-1].new.to + skip(2, diff->new.from, NULL);
540 	de[j].new.to = de[j].new.from + skip(2, diff->new.to, NULL);
541 	return (j);
542 }
543 
544 /* regurgitate */
545 __dead void
546 edscript(int n)
547 {
548 	int j,k;
549 	char block[BUFSIZ];
550 
551 	for (; n > 0; n--) {
552 		if (!oflag || !overlap[n])
553 			prange(&de[n].old);
554 		else
555 			printf("%da\n=======\n", de[n].old.to -1);
556 		(void)fseek(fp[2], (long)de[n].new.from, SEEK_SET);
557 		for (k = de[n].new.to-de[n].new.from; k > 0; k-= j) {
558 			size_t r;
559 			j = k > BUFSIZ ? BUFSIZ : k;
560 			r = fread(block, 1, j, fp[2]);
561 			if (r == 0) {
562 				if (feof(fp[2]))
563 					break;
564 				trouble();
565 			}
566 			if (r != j)
567 				j = r;
568 			(void)fwrite(block, 1, j, stdout);
569 		}
570 		if (!oflag || !overlap[n])
571 			printf(".\n");
572 		else {
573 			printf("%s\n.\n", f3mark);
574 			printf("%da\n%s\n.\n", de[n].old.from - 1, f1mark);
575 		}
576 	}
577 	exit(overlapcnt);
578 }
579 
580 void
581 increase(void)
582 {
583 	struct diff *p;
584 	char *q;
585 	size_t newsz, incr;
586 
587 	/* are the memset(3) calls needed? */
588 	newsz = szchanges == 0 ? 64 : 2 * szchanges;
589 	incr = newsz - szchanges;
590 
591 	p = reallocarray(d13, newsz, sizeof(struct diff));
592 	if (p == NULL)
593 		err(1, NULL);
594 	memset(p + szchanges, 0, incr * sizeof(struct diff));
595 	d13 = p;
596 	p = reallocarray(d23, newsz, sizeof(struct diff));
597 	if (p == NULL)
598 		err(1, NULL);
599 	memset(p + szchanges, 0, incr * sizeof(struct diff));
600 	d23 = p;
601 	p = reallocarray(de, newsz, sizeof(struct diff));
602 	if (p == NULL)
603 		err(1, NULL);
604 	memset(p + szchanges, 0, incr * sizeof(struct diff));
605 	de = p;
606 	q = reallocarray(overlap, newsz, sizeof(char));
607 	if (q == NULL)
608 		err(1, NULL);
609 	memset(q + szchanges, 0, incr * sizeof(char));
610 	overlap = q;
611 	szchanges = newsz;
612 }
613 
614 
615 __dead void
616 usage(void)
617 {
618 	extern char *__progname;
619 
620 	fprintf(stderr, "usage: %s [-exEX3] /tmp/d3a.?????????? "
621 	    "/tmp/d3b.?????????? file1 file2 file3\n", __progname);
622 	exit(EXIT_FAILURE);
623 }
624