xref: /openbsd-src/usr.bin/diff3/diff3prog.c (revision 68496fbfeef6367e95fd4b17bd92bf121b8a1e63)
1 /*	$OpenBSD: diff3prog.c,v 1.7 2005/08/01 08:17:57 otto Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diff3.c	8.1 (Berkeley) 6/6/93
65  */
66 
67 #ifndef lint
68 static const char copyright[] =
69 "@(#) Copyright (c) 1991, 1993\n\
70 	The Regents of the University of California.  All rights reserved.\n";
71 #endif /* not lint */
72 
73 #ifndef lint
74 static const char rcsid[] = "$OpenBSD: diff3prog.c,v 1.7 2005/08/01 08:17:57 otto Exp $";
75 #endif /* not lint */
76 
77 #include <stdio.h>
78 #include <stdlib.h>
79 #include <string.h>
80 #include <ctype.h>
81 #include <err.h>
82 
83 /* diff3 - 3-way differential file comparison */
84 
85 /* diff3 [-ex3EX] d13 d23 f1 f2 f3 [m1 m3]
86  *
87  * d13 = diff report on f1 vs f3
88  * d23 = diff report on f2 vs f3
89  * f1, f2, f3 the 3 files
90  * if changes in f1 overlap with changes in f3, m1 and m3 are used
91  * to mark the overlaps; otherwise, the file names f1 and f3 are used
92  * (only for options E and X).
93  */
94 
95 /*
96  * "from" is first in range of changed lines; "to" is last+1
97  * from=to=line after point of insertion for added lines.
98  */
99 struct  range {
100 	int from;
101 	int to;
102 };
103 struct diff {
104 	struct range old;
105 	struct range new;
106 };
107 
108 size_t szchanges;
109 
110 struct diff *d13;
111 struct diff *d23;
112 /*
113  * "de" is used to gather editing scripts.  These are later spewed out in
114  * reverse order.  Its first element must be all zero, the "new" component
115  * of "de" contains line positions or byte positions depending on when you
116  * look (!?).  Array overlap indicates which sections in "de" correspond to
117  * lines that are different in all three files.
118  */
119 struct diff *de;
120 char *overlap;
121 int  overlapcnt;
122 FILE *fp[3];
123 int cline[3];		/* # of the last-read line in each file (0-2) */
124 /*
125  * the latest known correspondence between line numbers of the 3 files
126  * is stored in last[1-3];
127  */
128 int last[4];
129 int eflag;
130 int oflag;		/* indicates whether to mark overlaps (-E or -X)*/
131 int debug  = 0;
132 char f1mark[40], f3mark[40];	/* markers for -E and -X */
133 
134 int duplicate(struct range *, struct range *);
135 int edit(struct diff *, int, int);
136 char *getchange(FILE *);
137 char *getline(FILE *, size_t *);
138 int number(char **);
139 int readin(char *, struct diff **);
140 int skip(int, int, char *);
141 void change(int, struct range *, int);
142 void keep(int, struct range *);
143 void merge(int, int);
144 void prange(struct range *);
145 void repos(int);
146 void separate(const char *);
147 __dead void edscript(int);
148 __dead void trouble(void);
149 void increase(void);
150 __dead void usage(void);
151 
152 int
153 main(int argc, char **argv)
154 {
155 	int ch, i, m, n;
156 
157 	eflag = 0;
158 	oflag = 0;
159 	while ((ch = getopt(argc, argv, "EeXx3")) != -1) {
160 		switch (ch) {
161 		case 'E':
162 			eflag = 3;
163 			oflag = 1;
164 			break;
165 		case 'e':
166 			eflag = 3;
167 			break;
168 		case 'X':
169 			oflag = eflag = 1;
170 			break;
171 		case 'x':
172 			eflag = 1;
173 			break;
174 		case '3':
175 			eflag = 2;
176 			break;
177 		}
178 	}
179 	argc -= optind;
180 	argv += optind;
181 	/* XXX - argc usage seems wrong here */
182 	if (argc < 5)
183 		usage();
184 
185 	if (oflag) {
186 		(void)snprintf(f1mark, sizeof(f1mark), "<<<<<<< %s",
187 		    argc >= 6 ? argv[5] : argv[2]);
188 		(void)snprintf(f3mark, sizeof(f3mark), ">>>>>>> %s",
189 		    argc >= 7 ? argv[6] : argv[4]);
190 	}
191 
192 	increase();
193 	m = readin(argv[0], &d13);
194 	n = readin(argv[1], &d23);
195 	for (i = 0; i <= 2; i++) {
196 		if ((fp[i] = fopen(argv[i + 2], "r")) == NULL) {
197 			printf("diff3: can't open %s\n", argv[i + 2]);
198 			exit(EXIT_FAILURE);
199 		}
200 	}
201 	merge(m, n);
202 	exit(EXIT_SUCCESS);
203 }
204 
205 /*
206  * Pick up the line numbers of all changes from one change file.
207  * (This puts the numbers in a vector, which is not strictly necessary,
208  * since the vector is processed in one sequential pass.
209  * The vector could be optimized out of existence)
210  */
211 int
212 readin(char *name, struct diff **dd)
213 {
214 	int a, b, c, d, i;
215 	char kind, *p;
216 
217 	fp[0] = fopen(name, "r");
218 	for (i=0; (p = getchange(fp[0])); i++) {
219 		if (i >= szchanges - 1)
220 			increase();
221 		a = b = number(&p);
222 		if (*p == ',') {
223 			p++;
224 			b = number(&p);
225 		}
226 		kind = *p++;
227 		c = d = number(&p);
228 		if (*p==',') {
229 			p++;
230 			d = number(&p);
231 		}
232 		if (kind == 'a')
233 			a++;
234 		if (kind == 'd')
235 			c++;
236 		b++;
237 		d++;
238 		(*dd)[i].old.from = a;
239 		(*dd)[i].old.to = b;
240 		(*dd)[i].new.from = c;
241 		(*dd)[i].new.to = d;
242 	}
243 	(*dd)[i].old.from = (*dd)[i-1].old.to;
244 	(*dd)[i].new.from = (*dd)[i-1].new.to;
245 	(void)fclose(fp[0]);
246 	return (i);
247 }
248 
249 int
250 number(char **lc)
251 {
252 	int nn;
253 	nn = 0;
254 	while (isdigit((unsigned char)(**lc)))
255 		nn = nn*10 + *(*lc)++ - '0';
256 	return (nn);
257 }
258 
259 char *
260 getchange(FILE *b)
261 {
262 	char *line;
263 
264 	while ((line = getline(b, NULL))) {
265 		if (isdigit((unsigned char)line[0]))
266 			return (line);
267 	}
268 	return (NULL);
269 }
270 
271 char *
272 getline(FILE *b, size_t *n)
273 {
274 	char *cp;
275 	size_t len;
276 	static char *buf;
277 	static size_t bufsize;
278 
279 	if ((cp = fgetln(b, &len)) == NULL)
280 		return (NULL);
281 
282 	if (cp[len - 1] != '\n')
283 		len++;
284 	if (len + 1 > bufsize) {
285 		do {
286 			bufsize += 1024;
287 		} while (len + 1 > bufsize);
288 		if ((buf = realloc(buf, bufsize)) == NULL)
289 			err(EXIT_FAILURE, NULL);
290 	}
291 	memcpy(buf, cp, len - 1);
292 	buf[len - 1] = '\n';
293 	buf[len] = '\0';
294 	if (n != NULL)
295 		*n = len;
296 	return (buf);
297 }
298 
299 void
300 merge(int m1, int m2)
301 {
302 	struct diff *d1, *d2, *d3;
303 	int dup, j, t1, t2;
304 
305 	d1 = d13;
306 	d2 = d23;
307 	j = 0;
308 	while ((t1 = d1 < d13 + m1) | (t2 = d2 < d23 + m2)) {
309 		if (debug) {
310 			printf("%d,%d=%d,%d %d,%d=%d,%d\n",
311 			d1->old.from,d1->old.to,
312 			d1->new.from,d1->new.to,
313 			d2->old.from,d2->old.to,
314 			d2->new.from,d2->new.to);
315 		}
316 		/* first file is different from others */
317 		if (!t2 || (t1 && d1->new.to < d2->new.from)) {
318 			/* stuff peculiar to 1st file */
319 			if (eflag==0) {
320 				separate("1");
321 				change(1, &d1->old, 0);
322 				keep(2, &d1->new);
323 				change(3, &d1->new, 0);
324 			}
325 			d1++;
326 			continue;
327 		}
328 		/* second file is different from others */
329 		if (!t1 || (t2 && d2->new.to < d1->new.from)) {
330 			if (eflag==0) {
331 				separate("2");
332 				keep(1, &d2->new);
333 				change(2, &d2->old, 0);
334 				change(3, &d2->new, 0);
335 			}
336 			d2++;
337 			continue;
338 		}
339 		/*
340 		 * Merge overlapping changes in first file
341 		 * this happens after extension (see below).
342 		 */
343 		if (d1 + 1 < d13 + m1 && d1->new.to >= d1[1].new.from) {
344 			d1[1].old.from = d1->old.from;
345 			d1[1].new.from = d1->new.from;
346 			d1++;
347 			continue;
348 		}
349 
350 		/* merge overlapping changes in second */
351 		if (d2 + 1 < d23 + m2 && d2->new.to >= d2[1].new.from) {
352 			d2[1].old.from = d2->old.from;
353 			d2[1].new.from = d2->new.from;
354 			d2++;
355 			continue;
356 		}
357 		/* stuff peculiar to third file or different in all */
358 		if (d1->new.from == d2->new.from && d1->new.to == d2->new.to) {
359 			dup = duplicate(&d1->old,&d2->old);
360 			/*
361 			 * dup = 0 means all files differ
362 			 * dup = 1 means files 1 and 2 identical
363 			 */
364 			if (eflag==0) {
365 				separate(dup ? "3" : "");
366 				change(1, &d1->old, dup);
367 				change(2, &d2->old, 0);
368 				d3 = d1->old.to > d1->old.from ? d1 : d2;
369 				change(3, &d3->new, 0);
370 			} else
371 				j = edit(d1, dup, j);
372 			d1++;
373 			d2++;
374 			continue;
375 		}
376 		/*
377 		 * Overlapping changes from file 1 and 2; extend changes
378 		 * appropriately to make them coincide.
379 		 */
380 		if (d1->new.from < d2->new.from) {
381 			d2->old.from -= d2->new.from-d1->new.from;
382 			d2->new.from = d1->new.from;
383 		} else if (d2->new.from < d1->new.from) {
384 			d1->old.from -= d1->new.from-d2->new.from;
385 			d1->new.from = d2->new.from;
386 		}
387 		if (d1->new.to > d2->new.to) {
388 			d2->old.to += d1->new.to - d2->new.to;
389 			d2->new.to = d1->new.to;
390 		} else if (d2->new.to > d1->new.to) {
391 			d1->old.to += d2->new.to - d1->new.to;
392 			d1->new.to = d2->new.to;
393 		}
394 	}
395 	if (eflag)
396 		edscript(j);
397 }
398 
399 void
400 separate(const char *s)
401 {
402 	printf("====%s\n", s);
403 }
404 
405 /*
406  * The range of lines rold.from thru rold.to in file i is to be changed.
407  * It is to be printed only if it does not duplicate something to be
408  * printed later.
409  */
410 void
411 change(int i, struct range *rold, int dup)
412 {
413 	printf("%d:", i);
414 	last[i] = rold->to;
415 	prange(rold);
416 	if (dup || debug)
417 		return;
418 	i--;
419 	(void)skip(i, rold->from, NULL);
420 	(void)skip(i, rold->to, "  ");
421 }
422 
423 /*
424  * print the range of line numbers, rold.from thru rold.to, as n1,n2 or n1
425  */
426 void
427 prange(struct range *rold)
428 {
429 	if (rold->to <= rold->from)
430 		printf("%da\n", rold->from - 1);
431 	else {
432 		printf("%d", rold->from);
433 		if (rold->to > rold->from+1)
434 			printf(",%d", rold->to - 1);
435 		printf("c\n");
436 	}
437 }
438 
439 /*
440  * No difference was reported by diff between file 1 (or 2) and file 3,
441  * and an artificial dummy difference (trange) must be ginned up to
442  * correspond to the change reported in the other file.
443  */
444 void
445 keep(int i, struct range *rnew)
446 {
447 	int delta;
448 	struct range trange;
449 
450 	delta = last[3] - last[i];
451 	trange.from = rnew->from - delta;
452 	trange.to = rnew->to - delta;
453 	change(i, &trange, 1);
454 }
455 
456 /*
457  * skip to just before line number from in file "i".  If "pr" is non-NULL,
458  * print all skipped stuff with string pr as a prefix.
459  */
460 int
461 skip(int i, int from, char *pr)
462 {
463 	size_t j, n;
464 	char *line;
465 
466 	for (n = 0; cline[i] < from - 1; n += j) {
467 		if ((line = getline(fp[i], &j)) == NULL)
468 			trouble();
469 		if (pr != NULL)
470 			printf("%s%s", pr, line);
471 		cline[i]++;
472 	}
473 	return ((int) n);
474 }
475 
476 /*
477  * Return 1 or 0 according as the old range (in file 1) contains exactly
478  * the same data as the new range (in file 2).
479  */
480 int
481 duplicate(struct range *r1, struct range *r2)
482 {
483 	int c,d;
484 	int nchar;
485 	int nline;
486 
487 	if (r1->to-r1->from != r2->to-r2->from)
488 		return (0);
489 	(void)skip(0, r1->from, NULL);
490 	(void)skip(1, r2->from, NULL);
491 	nchar = 0;
492 	for (nline=0; nline < r1->to - r1->from; nline++) {
493 		do {
494 			c = getc(fp[0]);
495 			d = getc(fp[1]);
496 			if (c == -1 || d== -1)
497 				trouble();
498 			nchar++;
499 			if (c != d) {
500 				repos(nchar);
501 				return (0);
502 			}
503 		} while (c != '\n');
504 	}
505 	repos(nchar);
506 	return (1);
507 }
508 
509 void
510 repos(int nchar)
511 {
512 	int i;
513 
514 	for (i = 0; i < 2; i++)
515 		(void)fseek(fp[i], (long)-nchar, 1);
516 }
517 
518 __dead void
519 trouble(void)
520 {
521 	errx(EXIT_FAILURE, "logic error");
522 }
523 
524 /*
525  * collect an editing script for later regurgitation
526  */
527 int
528 edit(struct diff *diff, int dup, int j)
529 {
530 	if (((dup + 1) & eflag) == 0)
531 		return (j);
532 	j++;
533 	overlap[j] = !dup;
534 	if (!dup)
535 		overlapcnt++;
536 	de[j].old.from = diff->old.from;
537 	de[j].old.to = diff->old.to;
538 	de[j].new.from = de[j-1].new.to + skip(2, diff->new.from, NULL);
539 	de[j].new.to = de[j].new.from + skip(2, diff->new.to, NULL);
540 	return (j);
541 }
542 
543 /* regurgitate */
544 __dead void
545 edscript(int n)
546 {
547 	int j,k;
548 	char block[BUFSIZ];
549 
550 	for (n = n; n > 0; n--) {
551 		if (!oflag || !overlap[n])
552 			prange(&de[n].old);
553 		else
554 			printf("%da\n=======\n", de[n].old.to -1);
555 		(void)fseek(fp[2], (long)de[n].new.from, 0);
556 		for (k = de[n].new.to-de[n].new.from; k > 0; k-= j) {
557 			j = k > BUFSIZ ? BUFSIZ : k;
558 			if (fread(block, 1, j, fp[2]) != j)
559 				trouble();
560 			(void)fwrite(block, 1, j, stdout);
561 		}
562 		if (!oflag || !overlap[n])
563 			printf(".\n");
564 		else {
565 			printf("%s\n.\n", f3mark);
566 			printf("%da\n%s\n.\n", de[n].old.from - 1, f1mark);
567 		}
568 	}
569 	exit(overlapcnt);
570 }
571 
572 void
573 increase(void)
574 {
575 	struct diff *p;
576 	char *q;
577 	size_t newsz, incr;
578 
579 	/* are the memset(3) calls needed? */
580 	newsz = szchanges == 0 ? 64 : 2 * szchanges;
581 	incr = newsz - szchanges;
582 
583 	p = realloc(d13, newsz * sizeof(struct diff));
584 	if (p == NULL)
585 		err(1, NULL);
586 	memset(p + szchanges, 0, incr * sizeof(struct diff));
587 	d13 = p;
588 	p = realloc(d23, newsz * sizeof(struct diff));
589 	if (p == NULL)
590 		err(1, NULL);
591 	memset(p + szchanges, 0, incr * sizeof(struct diff));
592 	d23 = p;
593 	p = realloc(de, newsz * sizeof(struct diff));
594 	if (p == NULL)
595 		err(1, NULL);
596 	memset(p + szchanges, 0, incr * sizeof(struct diff));
597 	de = p;
598 	q = realloc(overlap, newsz * sizeof(char));
599 	if (q == NULL)
600 		err(1, NULL);
601 	memset(q + szchanges, 0, incr * sizeof(char));
602 	overlap = q;
603 	szchanges = newsz;
604 }
605 
606 
607 __dead void
608 usage(void)
609 {
610 	extern char *__progname;
611 
612 	fprintf(stderr, "usage: %s [-exEX3] /tmp/d3a.?????????? "
613 	    "/tmp/d3b.?????????? file1 file2 file3\n", __progname);
614 	exit(EXIT_FAILURE);
615 }
616