12144Seric # include "../hdr/defines.h"
22144Seric 
3*30498Slepreau static char Sccsid[] = "@(#)bdiff.c	4.3	02/15/87";
42144Seric 
52144Seric /*
62144Seric 	This program segments two files into pieces of <= seglim lines
72144Seric 	(which is passed as a third argument or defaulted to some number)
82144Seric 	and then executes diff upon the pieces. The output of
92144Seric 	'diff' is then processed to make it look as if 'diff' had
102144Seric 	processed the files whole. The reason for all this is that seglim
112144Seric 	is a reasonable upper limit on the size of files that diff can
122144Seric 	process.
132144Seric 	NOTE -- by segmenting the files in this manner, it cannot be
142144Seric 	guaranteed that the 'diffing' of the segments will generate
152144Seric 	a minimal set of differences.
162144Seric 	This process is most definitely not equivalent to 'diffing'
172144Seric 	the files whole, assuming 'diff' could handle such large files.
182144Seric 
192144Seric 	'diff' is executed by a child process, generated by forking,
202144Seric 	and communicates with this program through pipes.
212144Seric */
222144Seric 
232144Seric int seglim;	/* limit of size of file segment to be generated */
242144Seric 
25*30498Slepreau char diff[] = "/bin/diff";
26*30498Slepreau char tempskel[] = "/tmp/bdXXXXX";		/* used to generate temp file names */
272144Seric char tempfile[32];
282144Seric char otmp[32], ntmp[32];
292144Seric int linenum;
302144Seric 
312144Seric main(argc,argv)
322144Seric int argc;
332144Seric char *argv[];
342144Seric {
352144Seric 	FILE *poldfile, *pnewfile, *tptr;
362144Seric 	char oline[BUFSIZ], nline[BUFSIZ], diffline[BUFSIZ];
372144Seric 	char *olp, *nlp, *dp;
382144Seric 	int i, otcnt, ntcnt;
392144Seric 	int pfd[2];
402144Seric 	FILE *poldtemp, *pnewtemp, *pipeinp;
412144Seric 	int status;
422144Seric 
432144Seric 	/*
442144Seric 	Set flags for 'fatal' so that it will clean up,
452144Seric 	produce a message, and terminate.
462144Seric 	*/
472144Seric 	Fflags = FTLMSG | FTLCLN | FTLEXIT;
482144Seric 
492144Seric 	setsig();
502144Seric 
512144Seric 	if (argc < 3 || argc > 5)
522144Seric 		fatal("arg count (bd1)");
532144Seric 
542144Seric 	if (equal(argv[1],"-") && equal(argv[2],"-"))
552144Seric 		fatal("both files standard input (bd2)");
562144Seric 	if (equal(argv[1],"-"))
572144Seric 		poldfile = stdin;
582144Seric 	else
592144Seric 		poldfile = xfopen(argv[1],0);
602144Seric 	if (equal(argv[2],"-"))
612144Seric 		pnewfile = stdin;
622144Seric 	else
632144Seric 		pnewfile = xfopen(argv[2],0);
642144Seric 
652144Seric 	seglim = 3500;
662144Seric 
672144Seric 	if (argc > 3) {
682144Seric 		if (argv[3][0] == '-' && argv[3][1] == 's')
69*30498Slepreau 			Fflags &= ~FTLMSG;
702144Seric 		else {
712144Seric 			if ((seglim = patoi(argv[3])) == -1)
722144Seric 				fatal("non-numeric limit (bd4)");
732144Seric 			if (argc == 5 && argv[4][0] == '-' &&
742144Seric 					argv[4][1] == 's')
75*30498Slepreau 				Fflags &= ~FTLMSG;
762144Seric 		}
772144Seric 	}
782144Seric 
792144Seric 	linenum = 0;
802144Seric 
812144Seric 	/*
822144Seric 	The following while-loop will prevent any lines
832144Seric 	common to the beginning of both files from being
842144Seric 	sent to 'diff'. Since the running time of 'diff' is
852144Seric 	non-linear, this will help improve performance.
862144Seric 	If, during this process, both files reach EOF, then
872144Seric 	the files are equal and the program will terminate.
882144Seric 	If either file reaches EOF before the other, the
892144Seric 	program will generate the appropriate 'diff' output
902144Seric 	itself, since this can be easily determined and will
912144Seric 	avoid executing 'diff' completely.
922144Seric 	*/
932144Seric 	while (1) {
942144Seric 		olp = fgets(oline,BUFSIZ,poldfile);
952144Seric 		nlp = fgets(nline,BUFSIZ,pnewfile);
962144Seric 
972144Seric 		if (!olp && !nlp)	/* files are equal */
982144Seric 			exit(0);
992144Seric 
1002144Seric 		if (!olp) {
1012144Seric 			/*
1022144Seric 			The entire old file is a prefix of the
1032144Seric 			new file. Generate the appropriate "append"
1042144Seric 			'diff'-like output, which is of the form:
1052144Seric 					nan,n
1062144Seric 			where 'n' represents a line-number.
1072144Seric 			*/
1082144Seric 			addgen(nline,pnewfile);
1092144Seric 		}
1102144Seric 
1112144Seric 		if (!nlp) {
1122144Seric 			/*
1132144Seric 			The entire new file is a prefix of the
1142144Seric 			old file. Generate the appropriate "delete"
1152144Seric 			'diff'-like output, which is of the form:
1162144Seric 					n,ndn
1172144Seric 			where 'n' represents a line-number.
1182144Seric 			*/
1192144Seric 			delgen(oline,poldfile);
1202144Seric 		}
1212144Seric 
1222144Seric 		if (equal(olp,nlp))
1232144Seric 			linenum++;
1242144Seric 		else
1252144Seric 			break;
1262144Seric 	}
1272144Seric 
1282144Seric 	/*
1292144Seric 	Here, first 'linenum' lines are equal.
1302144Seric 	The following while-loop segments both files into
1312144Seric 	seglim segments, forks and executes 'diff' on the
1322144Seric 	segments, and processes the resulting output of
1332144Seric 	'diff', which is read from a pipe.
1342144Seric 	*/
1352144Seric 	while (1) {
1362144Seric 		/*
1372144Seric 		If both files are at EOF, everything is done.
1382144Seric 		*/
1392144Seric 		if (!olp && !nlp)	/* finished */
1402144Seric 			exit(0);
1412144Seric 
1422144Seric 		if (!olp) {
1432144Seric 			/*
1442144Seric 			Generate appropriate "append"
1452144Seric 			output without executing 'diff'.
1462144Seric 			*/
1472144Seric 			addgen(nline,pnewfile);
1482144Seric 		}
1492144Seric 
1502144Seric 		if (!nlp) {
1512144Seric 			/*
1522144Seric 			Generate appropriate "delete"
1532144Seric 			output without executing 'diff'.
1542144Seric 			*/
1552144Seric 			delgen(oline,poldfile);
1562144Seric 		}
1572144Seric 
1582144Seric 		/*
1592144Seric 		Create a temporary file to hold a segment
1602144Seric 		from the old file, and write it.
1612144Seric 		*/
1622144Seric 		poldtemp = maket(otmp);
1632144Seric 		otcnt = 0;
1642144Seric 		while(olp && otcnt < seglim) {
1652144Seric 			fputs(oline,poldtemp);
1662144Seric 			olp = fgets(oline,BUFSIZ,poldfile);
1672144Seric 			otcnt++;
1682144Seric 		}
1692144Seric 		fclose(poldtemp);
1702144Seric 
1712144Seric 		/*
1722144Seric 		Create a temporary file to hold a segment
1732144Seric 		from the new file, and write it.
1742144Seric 		*/
1752144Seric 		pnewtemp = maket(ntmp);
1762144Seric 		ntcnt = 0;
1772144Seric 		while(nlp && ntcnt < seglim) {
1782144Seric 			fputs(nline,pnewtemp);
1792144Seric 			nlp = fgets(nline,BUFSIZ,pnewfile);
1802144Seric 			ntcnt++;
1812144Seric 		}
1822144Seric 		fclose(pnewtemp);
1832144Seric 
1842144Seric 		/*
1852144Seric 		Create pipes and fork.
1862144Seric 		*/
1872144Seric 		xpipe(pfd);
1882144Seric 		if ((i = fork()) < 0) {
1892144Seric 			close(pfd[0]);
1902144Seric 			close(pfd[1]);
1912144Seric 			fatal("cannot fork, try again (bd3)");
1922144Seric 		}
1932144Seric 		else if (i == 0) {	/* child process */
1942144Seric 			close(pfd[0]);
19519941Ssam 			dup2(pfd[1], 1);
19619941Ssam 			if (pfd[1] != 1)
19719941Ssam 				close(pfd[1]);
1982144Seric 
1992144Seric 			/*
2002144Seric 			Execute 'diff' on the segment files.
2012144Seric 			*/
2022144Seric 			execl(diff,diff,otmp,ntmp,0);
2032144Seric 			close(1);
2042144Seric 			fatal(sprintf(Error,"cannot execute '%s' (bd5)",diff));
2052144Seric 		}
2062144Seric 		else {			/* parent process */
2072144Seric 			close(pfd[1]);
20819941Ssam 			pipeinp = fdopen(pfd[0],"r");
2092144Seric 
2102144Seric 			/*
2112144Seric 			Process 'diff' output.
2122144Seric 			*/
2132144Seric 			while ((dp = fgets(diffline,BUFSIZ,pipeinp))) {
2142144Seric 				if (numeric(*dp))
2152144Seric 					fixnum(diffline);
2162144Seric 				else
2172144Seric 					printf("%s",diffline);
2182144Seric 			}
2192144Seric 
2202144Seric 			fclose(pipeinp);
2212144Seric 
2222144Seric 			/*
2232144Seric 			EOF on pipe.
2242144Seric 			*/
2252144Seric 			wait(&status);
2262144Seric 			if (status&~0x100)
2272144Seric 				fatal(sprintf(Error,"'%s' failed (bd6)",diff));
2282144Seric 		}
229*30498Slepreau 		linenum += seglim;
2302144Seric 
2312144Seric 		/*
2322144Seric 		Remove temporary files.
2332144Seric 		*/
2342144Seric 		unlink(otmp);
2352144Seric 		unlink(ntmp);
2362144Seric 	}
2372144Seric }
2382144Seric 
2392144Seric 
2402144Seric /*
2412144Seric 	Routine to save remainder of a file.
2422144Seric */
2432144Seric saverest(line,iptr)
2442144Seric char *line;
2452144Seric FILE *iptr;
2462144Seric {
2472144Seric 	register char *lp;
2482144Seric 	FILE *temptr;
2492144Seric 
2502144Seric 	temptr = maket(tempfile);
2512144Seric 
2522144Seric 	lp = line;
2532144Seric 
2542144Seric 	while (lp) {
2552144Seric 		fputs(line,temptr);
2562144Seric 		linenum++;
2572144Seric 		lp = fgets(line,BUFSIZ,iptr);
2582144Seric 	}
2592144Seric 	fclose(temptr);
2602144Seric }
2612144Seric 
2622144Seric 
2632144Seric /*
2642144Seric 	Routine to write out data saved by
2652144Seric 	'saverest' routine and to remove the file.
2662144Seric */
2672144Seric putsave(line,type)
2682144Seric char *line;
2692144Seric char type;
2702144Seric {
2712144Seric 	FILE *temptr;
2722144Seric 
2732144Seric 	temptr = xfopen(tempfile,0);
2742144Seric 
2752144Seric 	while (fgets(line,BUFSIZ,temptr))
2762144Seric 		printf("%c %s",type,line);
2772144Seric 
2782144Seric 	fclose(temptr);
2792144Seric 
2802144Seric 	xunlink(tempfile);
2812144Seric }
2822144Seric 
2832144Seric 
2842144Seric fixnum(lp)
2852144Seric char *lp;
2862144Seric {
2872144Seric 	int num;
2882144Seric 
2892144Seric 	while (*lp) {
2902144Seric 		switch (*lp) {
2912144Seric 
2922144Seric 		case 'a':
2932144Seric 		case 'c':
2942144Seric 		case 'd':
2952144Seric 		case ',':
2962144Seric 		case '\n':
2972144Seric 			printf("%c",*lp);
2982144Seric 			lp++;
2992144Seric 			break;
3002144Seric 
3012144Seric 		default:
3022144Seric 			lp = satoi(lp,&num);
303*30498Slepreau 			num += linenum;
3042144Seric 			printf("%d",num);
3052144Seric 		}
3062144Seric 	}
3072144Seric }
3082144Seric 
3092144Seric 
3102144Seric addgen(lp,fp)
3112144Seric char *lp;
3122144Seric FILE *fp;
3132144Seric {
3142144Seric 	printf("%da%d,",linenum,linenum+1);
3152144Seric 
3162144Seric 	/*
3172144Seric 	Save lines of new file.
3182144Seric 	*/
3192144Seric 	saverest(lp,fp);
3202144Seric 
3212144Seric 	printf("%d\n",linenum);
3222144Seric 
3232144Seric 	/*
3242144Seric 	Output saved lines, as 'diff' would.
3252144Seric 	*/
3262144Seric 	putsave(lp,'>');
3272144Seric 
3282144Seric 	exit(0);
3292144Seric }
3302144Seric 
3312144Seric 
3322144Seric delgen(lp,fp)
3332144Seric char *lp;
3342144Seric FILE *fp;
3352144Seric {
3362144Seric 	int savenum;
3372144Seric 
3382144Seric 	printf("%d,",linenum+1);
3392144Seric 	savenum = linenum;
3402144Seric 
3412144Seric 	/*
3422144Seric 	Save lines of old file.
3432144Seric 	*/
3442144Seric 	saverest(lp,fp);
3452144Seric 
3462144Seric 	printf("%dd%d\n",linenum,savenum);
3472144Seric 
3482144Seric 	/*
3492144Seric 	Output saved lines, as 'diff' would.
3502144Seric 	*/
3512144Seric 	putsave(lp,'<');
3522144Seric 
3532144Seric 	exit(0);
3542144Seric }
3552144Seric 
3562144Seric 
3572144Seric clean_up()
3582144Seric {
3592144Seric 	unlink(tempfile);
3602144Seric 	unlink(otmp);
3612144Seric 	unlink(ntmp);
3622144Seric }
3632144Seric 
3642144Seric 
3652144Seric maket(file)
3662144Seric char *file;
3672144Seric {
3682144Seric 	FILE *iop;
3692144Seric 
3702144Seric 	copy(tempskel,file);
3712144Seric 	iop = xfcreat(mktemp(file),0644);
3722144Seric 
3732144Seric 	return(iop);
3742144Seric }
375