12144Seric # include "../hdr/defines.h"
22144Seric 
3*37837Sbostic static char Sccsid[] = "@(#)bdiff.c	4.5	05/10/89";
42144Seric 
52144Seric /*
62144Seric 	This program segments two files into pieces of <= seglim lines
72144Seric 	(which is passed as a third argument or defaulted to some number)
82144Seric 	and then executes diff upon the pieces. The output of
92144Seric 	'diff' is then processed to make it look as if 'diff' had
102144Seric 	processed the files whole. The reason for all this is that seglim
112144Seric 	is a reasonable upper limit on the size of files that diff can
122144Seric 	process.
132144Seric 	NOTE -- by segmenting the files in this manner, it cannot be
142144Seric 	guaranteed that the 'diffing' of the segments will generate
152144Seric 	a minimal set of differences.
162144Seric 	This process is most definitely not equivalent to 'diffing'
172144Seric 	the files whole, assuming 'diff' could handle such large files.
182144Seric 
192144Seric 	'diff' is executed by a child process, generated by forking,
202144Seric 	and communicates with this program through pipes.
212144Seric */
222144Seric 
232144Seric int seglim;	/* limit of size of file segment to be generated */
242144Seric 
25*37837Sbostic char diff[] = "/usr/bin/diff";
2630498Slepreau char tempskel[] = "/tmp/bdXXXXX";		/* used to generate temp file names */
272144Seric char tempfile[32];
282144Seric char otmp[32], ntmp[32];
292144Seric int linenum;
302144Seric 
main(argc,argv)312144Seric main(argc,argv)
322144Seric int argc;
332144Seric char *argv[];
342144Seric {
352144Seric 	FILE *poldfile, *pnewfile, *tptr;
362144Seric 	char oline[BUFSIZ], nline[BUFSIZ], diffline[BUFSIZ];
372144Seric 	char *olp, *nlp, *dp;
382144Seric 	int i, otcnt, ntcnt;
392144Seric 	int pfd[2];
402144Seric 	FILE *poldtemp, *pnewtemp, *pipeinp;
412144Seric 	int status;
422144Seric 
432144Seric 	/*
442144Seric 	Set flags for 'fatal' so that it will clean up,
452144Seric 	produce a message, and terminate.
462144Seric 	*/
472144Seric 	Fflags = FTLMSG | FTLCLN | FTLEXIT;
482144Seric 
492144Seric 	setsig();
502144Seric 
512144Seric 	if (argc < 3 || argc > 5)
522144Seric 		fatal("arg count (bd1)");
532144Seric 
542144Seric 	if (equal(argv[1],"-") && equal(argv[2],"-"))
552144Seric 		fatal("both files standard input (bd2)");
562144Seric 	if (equal(argv[1],"-"))
572144Seric 		poldfile = stdin;
582144Seric 	else
592144Seric 		poldfile = xfopen(argv[1],0);
602144Seric 	if (equal(argv[2],"-"))
612144Seric 		pnewfile = stdin;
622144Seric 	else
632144Seric 		pnewfile = xfopen(argv[2],0);
642144Seric 
652144Seric 	seglim = 3500;
662144Seric 
672144Seric 	if (argc > 3) {
682144Seric 		if (argv[3][0] == '-' && argv[3][1] == 's')
6930498Slepreau 			Fflags &= ~FTLMSG;
702144Seric 		else {
712144Seric 			if ((seglim = patoi(argv[3])) == -1)
722144Seric 				fatal("non-numeric limit (bd4)");
732144Seric 			if (argc == 5 && argv[4][0] == '-' &&
742144Seric 					argv[4][1] == 's')
7530498Slepreau 				Fflags &= ~FTLMSG;
762144Seric 		}
772144Seric 	}
782144Seric 
792144Seric 	linenum = 0;
802144Seric 
812144Seric 	/*
822144Seric 	The following while-loop will prevent any lines
832144Seric 	common to the beginning of both files from being
842144Seric 	sent to 'diff'. Since the running time of 'diff' is
852144Seric 	non-linear, this will help improve performance.
862144Seric 	If, during this process, both files reach EOF, then
872144Seric 	the files are equal and the program will terminate.
882144Seric 	If either file reaches EOF before the other, the
892144Seric 	program will generate the appropriate 'diff' output
902144Seric 	itself, since this can be easily determined and will
912144Seric 	avoid executing 'diff' completely.
922144Seric 	*/
932144Seric 	while (1) {
942144Seric 		olp = fgets(oline,BUFSIZ,poldfile);
952144Seric 		nlp = fgets(nline,BUFSIZ,pnewfile);
962144Seric 
972144Seric 		if (!olp && !nlp)	/* files are equal */
982144Seric 			exit(0);
992144Seric 
1002144Seric 		if (!olp) {
1012144Seric 			/*
1022144Seric 			The entire old file is a prefix of the
1032144Seric 			new file. Generate the appropriate "append"
1042144Seric 			'diff'-like output, which is of the form:
1052144Seric 					nan,n
1062144Seric 			where 'n' represents a line-number.
1072144Seric 			*/
1082144Seric 			addgen(nline,pnewfile);
1092144Seric 		}
1102144Seric 
1112144Seric 		if (!nlp) {
1122144Seric 			/*
1132144Seric 			The entire new file is a prefix of the
1142144Seric 			old file. Generate the appropriate "delete"
1152144Seric 			'diff'-like output, which is of the form:
1162144Seric 					n,ndn
1172144Seric 			where 'n' represents a line-number.
1182144Seric 			*/
1192144Seric 			delgen(oline,poldfile);
1202144Seric 		}
1212144Seric 
1222144Seric 		if (equal(olp,nlp))
1232144Seric 			linenum++;
1242144Seric 		else
1252144Seric 			break;
1262144Seric 	}
1272144Seric 
1282144Seric 	/*
1292144Seric 	Here, first 'linenum' lines are equal.
1302144Seric 	The following while-loop segments both files into
1312144Seric 	seglim segments, forks and executes 'diff' on the
1322144Seric 	segments, and processes the resulting output of
1332144Seric 	'diff', which is read from a pipe.
1342144Seric 	*/
1352144Seric 	while (1) {
1362144Seric 		/*
1372144Seric 		If both files are at EOF, everything is done.
1382144Seric 		*/
1392144Seric 		if (!olp && !nlp)	/* finished */
1402144Seric 			exit(0);
1412144Seric 
1422144Seric 		if (!olp) {
1432144Seric 			/*
1442144Seric 			Generate appropriate "append"
1452144Seric 			output without executing 'diff'.
1462144Seric 			*/
1472144Seric 			addgen(nline,pnewfile);
1482144Seric 		}
1492144Seric 
1502144Seric 		if (!nlp) {
1512144Seric 			/*
1522144Seric 			Generate appropriate "delete"
1532144Seric 			output without executing 'diff'.
1542144Seric 			*/
1552144Seric 			delgen(oline,poldfile);
1562144Seric 		}
1572144Seric 
1582144Seric 		/*
1592144Seric 		Create a temporary file to hold a segment
1602144Seric 		from the old file, and write it.
1612144Seric 		*/
1622144Seric 		poldtemp = maket(otmp);
1632144Seric 		otcnt = 0;
1642144Seric 		while(olp && otcnt < seglim) {
1652144Seric 			fputs(oline,poldtemp);
1662144Seric 			olp = fgets(oline,BUFSIZ,poldfile);
1672144Seric 			otcnt++;
1682144Seric 		}
1692144Seric 		fclose(poldtemp);
1702144Seric 
1712144Seric 		/*
1722144Seric 		Create a temporary file to hold a segment
1732144Seric 		from the new file, and write it.
1742144Seric 		*/
1752144Seric 		pnewtemp = maket(ntmp);
1762144Seric 		ntcnt = 0;
1772144Seric 		while(nlp && ntcnt < seglim) {
1782144Seric 			fputs(nline,pnewtemp);
1792144Seric 			nlp = fgets(nline,BUFSIZ,pnewfile);
1802144Seric 			ntcnt++;
1812144Seric 		}
1822144Seric 		fclose(pnewtemp);
1832144Seric 
1842144Seric 		/*
1852144Seric 		Create pipes and fork.
1862144Seric 		*/
1872144Seric 		xpipe(pfd);
1882144Seric 		if ((i = fork()) < 0) {
1892144Seric 			close(pfd[0]);
1902144Seric 			close(pfd[1]);
1912144Seric 			fatal("cannot fork, try again (bd3)");
1922144Seric 		}
1932144Seric 		else if (i == 0) {	/* child process */
1942144Seric 			close(pfd[0]);
19519941Ssam 			dup2(pfd[1], 1);
19619941Ssam 			if (pfd[1] != 1)
19719941Ssam 				close(pfd[1]);
1982144Seric 
1992144Seric 			/*
2002144Seric 			Execute 'diff' on the segment files.
2012144Seric 			*/
2022144Seric 			execl(diff,diff,otmp,ntmp,0);
2032144Seric 			close(1);
20433423Sbostic 			sprintf(Error,"cannot execute '%s' (bd5)",diff);
20533423Sbostic 			fatal(Error);
2062144Seric 		}
2072144Seric 		else {			/* parent process */
2082144Seric 			close(pfd[1]);
20919941Ssam 			pipeinp = fdopen(pfd[0],"r");
2102144Seric 
2112144Seric 			/*
2122144Seric 			Process 'diff' output.
2132144Seric 			*/
2142144Seric 			while ((dp = fgets(diffline,BUFSIZ,pipeinp))) {
2152144Seric 				if (numeric(*dp))
2162144Seric 					fixnum(diffline);
2172144Seric 				else
2182144Seric 					printf("%s",diffline);
2192144Seric 			}
2202144Seric 
2212144Seric 			fclose(pipeinp);
2222144Seric 
2232144Seric 			/*
2242144Seric 			EOF on pipe.
2252144Seric 			*/
2262144Seric 			wait(&status);
22733423Sbostic 			if (status&~0x100) {
22833423Sbostic 				sprintf(Error,"'%s' failed (bd6)",diff);
22933423Sbostic 				fatal(Error);
23033423Sbostic 			}
2312144Seric 		}
23230498Slepreau 		linenum += seglim;
2332144Seric 
2342144Seric 		/*
2352144Seric 		Remove temporary files.
2362144Seric 		*/
2372144Seric 		unlink(otmp);
2382144Seric 		unlink(ntmp);
2392144Seric 	}
2402144Seric }
2412144Seric 
2422144Seric 
2432144Seric /*
2442144Seric 	Routine to save remainder of a file.
2452144Seric */
saverest(line,iptr)2462144Seric saverest(line,iptr)
2472144Seric char *line;
2482144Seric FILE *iptr;
2492144Seric {
2502144Seric 	register char *lp;
2512144Seric 	FILE *temptr;
2522144Seric 
2532144Seric 	temptr = maket(tempfile);
2542144Seric 
2552144Seric 	lp = line;
2562144Seric 
2572144Seric 	while (lp) {
2582144Seric 		fputs(line,temptr);
2592144Seric 		linenum++;
2602144Seric 		lp = fgets(line,BUFSIZ,iptr);
2612144Seric 	}
2622144Seric 	fclose(temptr);
2632144Seric }
2642144Seric 
2652144Seric 
2662144Seric /*
2672144Seric 	Routine to write out data saved by
2682144Seric 	'saverest' routine and to remove the file.
2692144Seric */
putsave(line,type)2702144Seric putsave(line,type)
2712144Seric char *line;
2722144Seric char type;
2732144Seric {
2742144Seric 	FILE *temptr;
2752144Seric 
2762144Seric 	temptr = xfopen(tempfile,0);
2772144Seric 
2782144Seric 	while (fgets(line,BUFSIZ,temptr))
2792144Seric 		printf("%c %s",type,line);
2802144Seric 
2812144Seric 	fclose(temptr);
2822144Seric 
2832144Seric 	xunlink(tempfile);
2842144Seric }
2852144Seric 
2862144Seric 
fixnum(lp)2872144Seric fixnum(lp)
2882144Seric char *lp;
2892144Seric {
2902144Seric 	int num;
2912144Seric 
2922144Seric 	while (*lp) {
2932144Seric 		switch (*lp) {
2942144Seric 
2952144Seric 		case 'a':
2962144Seric 		case 'c':
2972144Seric 		case 'd':
2982144Seric 		case ',':
2992144Seric 		case '\n':
3002144Seric 			printf("%c",*lp);
3012144Seric 			lp++;
3022144Seric 			break;
3032144Seric 
3042144Seric 		default:
3052144Seric 			lp = satoi(lp,&num);
30630498Slepreau 			num += linenum;
3072144Seric 			printf("%d",num);
3082144Seric 		}
3092144Seric 	}
3102144Seric }
3112144Seric 
3122144Seric 
addgen(lp,fp)3132144Seric addgen(lp,fp)
3142144Seric char *lp;
3152144Seric FILE *fp;
3162144Seric {
3172144Seric 	printf("%da%d,",linenum,linenum+1);
3182144Seric 
3192144Seric 	/*
3202144Seric 	Save lines of new file.
3212144Seric 	*/
3222144Seric 	saverest(lp,fp);
3232144Seric 
3242144Seric 	printf("%d\n",linenum);
3252144Seric 
3262144Seric 	/*
3272144Seric 	Output saved lines, as 'diff' would.
3282144Seric 	*/
3292144Seric 	putsave(lp,'>');
3302144Seric 
3312144Seric 	exit(0);
3322144Seric }
3332144Seric 
3342144Seric 
delgen(lp,fp)3352144Seric delgen(lp,fp)
3362144Seric char *lp;
3372144Seric FILE *fp;
3382144Seric {
3392144Seric 	int savenum;
3402144Seric 
3412144Seric 	printf("%d,",linenum+1);
3422144Seric 	savenum = linenum;
3432144Seric 
3442144Seric 	/*
3452144Seric 	Save lines of old file.
3462144Seric 	*/
3472144Seric 	saverest(lp,fp);
3482144Seric 
3492144Seric 	printf("%dd%d\n",linenum,savenum);
3502144Seric 
3512144Seric 	/*
3522144Seric 	Output saved lines, as 'diff' would.
3532144Seric 	*/
3542144Seric 	putsave(lp,'<');
3552144Seric 
3562144Seric 	exit(0);
3572144Seric }
3582144Seric 
3592144Seric 
clean_up()3602144Seric clean_up()
3612144Seric {
3622144Seric 	unlink(tempfile);
3632144Seric 	unlink(otmp);
3642144Seric 	unlink(ntmp);
3652144Seric }
3662144Seric 
3672144Seric 
maket(file)3682144Seric maket(file)
3692144Seric char *file;
3702144Seric {
3712144Seric 	FILE *iop;
3722144Seric 
3732144Seric 	copy(tempskel,file);
3742144Seric 	iop = xfcreat(mktemp(file),0644);
3752144Seric 
3762144Seric 	return(iop);
3772144Seric }
378