1*2144Seric # include "../hdr/defines.h"
2*2144Seric 
3*2144Seric SCCSID(@(#)bdiff.c	4.1);
4*2144Seric 
5*2144Seric /*
6*2144Seric 	This program segments two files into pieces of <= seglim lines
7*2144Seric 	(which is passed as a third argument or defaulted to some number)
8*2144Seric 	and then executes diff upon the pieces. The output of
9*2144Seric 	'diff' is then processed to make it look as if 'diff' had
10*2144Seric 	processed the files whole. The reason for all this is that seglim
11*2144Seric 	is a reasonable upper limit on the size of files that diff can
12*2144Seric 	process.
13*2144Seric 	NOTE -- by segmenting the files in this manner, it cannot be
14*2144Seric 	guaranteed that the 'diffing' of the segments will generate
15*2144Seric 	a minimal set of differences.
16*2144Seric 	This process is most definitely not equivalent to 'diffing'
17*2144Seric 	the files whole, assuming 'diff' could handle such large files.
18*2144Seric 
19*2144Seric 	'diff' is executed by a child process, generated by forking,
20*2144Seric 	and communicates with this program through pipes.
21*2144Seric */
22*2144Seric 
23*2144Seric int seglim;	/* limit of size of file segment to be generated */
24*2144Seric 
25*2144Seric char diff[]	"/bin/diff";
26*2144Seric char tempskel[] "/tmp/bdXXXXX";		/* used to generate temp file names */
27*2144Seric char tempfile[32];
28*2144Seric char otmp[32], ntmp[32];
29*2144Seric int linenum;
30*2144Seric 
31*2144Seric main(argc,argv)
32*2144Seric int argc;
33*2144Seric char *argv[];
34*2144Seric {
35*2144Seric 	FILE *poldfile, *pnewfile, *tptr;
36*2144Seric 	char oline[BUFSIZ], nline[BUFSIZ], diffline[BUFSIZ];
37*2144Seric 	char *olp, *nlp, *dp;
38*2144Seric 	int i, otcnt, ntcnt;
39*2144Seric 	int pfd[2];
40*2144Seric 	FILE *poldtemp, *pnewtemp, *pipeinp;
41*2144Seric 	int status;
42*2144Seric 
43*2144Seric 	/*
44*2144Seric 	Set flags for 'fatal' so that it will clean up,
45*2144Seric 	produce a message, and terminate.
46*2144Seric 	*/
47*2144Seric 	Fflags = FTLMSG | FTLCLN | FTLEXIT;
48*2144Seric 
49*2144Seric 	setsig();
50*2144Seric 
51*2144Seric 	if (argc < 3 || argc > 5)
52*2144Seric 		fatal("arg count (bd1)");
53*2144Seric 
54*2144Seric 	if (equal(argv[1],"-") && equal(argv[2],"-"))
55*2144Seric 		fatal("both files standard input (bd2)");
56*2144Seric 	if (equal(argv[1],"-"))
57*2144Seric 		poldfile = stdin;
58*2144Seric 	else
59*2144Seric 		poldfile = xfopen(argv[1],0);
60*2144Seric 	if (equal(argv[2],"-"))
61*2144Seric 		pnewfile = stdin;
62*2144Seric 	else
63*2144Seric 		pnewfile = xfopen(argv[2],0);
64*2144Seric 
65*2144Seric 	seglim = 3500;
66*2144Seric 
67*2144Seric 	if (argc > 3) {
68*2144Seric 		if (argv[3][0] == '-' && argv[3][1] == 's')
69*2144Seric 			Fflags =& ~FTLMSG;
70*2144Seric 		else {
71*2144Seric 			if ((seglim = patoi(argv[3])) == -1)
72*2144Seric 				fatal("non-numeric limit (bd4)");
73*2144Seric 			if (argc == 5 && argv[4][0] == '-' &&
74*2144Seric 					argv[4][1] == 's')
75*2144Seric 				Fflags =& ~FTLMSG;
76*2144Seric 		}
77*2144Seric 	}
78*2144Seric 
79*2144Seric 	linenum = 0;
80*2144Seric 
81*2144Seric 	/*
82*2144Seric 	The following while-loop will prevent any lines
83*2144Seric 	common to the beginning of both files from being
84*2144Seric 	sent to 'diff'. Since the running time of 'diff' is
85*2144Seric 	non-linear, this will help improve performance.
86*2144Seric 	If, during this process, both files reach EOF, then
87*2144Seric 	the files are equal and the program will terminate.
88*2144Seric 	If either file reaches EOF before the other, the
89*2144Seric 	program will generate the appropriate 'diff' output
90*2144Seric 	itself, since this can be easily determined and will
91*2144Seric 	avoid executing 'diff' completely.
92*2144Seric 	*/
93*2144Seric 	while (1) {
94*2144Seric 		olp = fgets(oline,BUFSIZ,poldfile);
95*2144Seric 		nlp = fgets(nline,BUFSIZ,pnewfile);
96*2144Seric 
97*2144Seric 		if (!olp && !nlp)	/* files are equal */
98*2144Seric 			exit(0);
99*2144Seric 
100*2144Seric 		if (!olp) {
101*2144Seric 			/*
102*2144Seric 			The entire old file is a prefix of the
103*2144Seric 			new file. Generate the appropriate "append"
104*2144Seric 			'diff'-like output, which is of the form:
105*2144Seric 					nan,n
106*2144Seric 			where 'n' represents a line-number.
107*2144Seric 			*/
108*2144Seric 			addgen(nline,pnewfile);
109*2144Seric 		}
110*2144Seric 
111*2144Seric 		if (!nlp) {
112*2144Seric 			/*
113*2144Seric 			The entire new file is a prefix of the
114*2144Seric 			old file. Generate the appropriate "delete"
115*2144Seric 			'diff'-like output, which is of the form:
116*2144Seric 					n,ndn
117*2144Seric 			where 'n' represents a line-number.
118*2144Seric 			*/
119*2144Seric 			delgen(oline,poldfile);
120*2144Seric 		}
121*2144Seric 
122*2144Seric 		if (equal(olp,nlp))
123*2144Seric 			linenum++;
124*2144Seric 		else
125*2144Seric 			break;
126*2144Seric 	}
127*2144Seric 
128*2144Seric 	/*
129*2144Seric 	Here, first 'linenum' lines are equal.
130*2144Seric 	The following while-loop segments both files into
131*2144Seric 	seglim segments, forks and executes 'diff' on the
132*2144Seric 	segments, and processes the resulting output of
133*2144Seric 	'diff', which is read from a pipe.
134*2144Seric 	*/
135*2144Seric 	while (1) {
136*2144Seric 		/*
137*2144Seric 		If both files are at EOF, everything is done.
138*2144Seric 		*/
139*2144Seric 		if (!olp && !nlp)	/* finished */
140*2144Seric 			exit(0);
141*2144Seric 
142*2144Seric 		if (!olp) {
143*2144Seric 			/*
144*2144Seric 			Generate appropriate "append"
145*2144Seric 			output without executing 'diff'.
146*2144Seric 			*/
147*2144Seric 			addgen(nline,pnewfile);
148*2144Seric 		}
149*2144Seric 
150*2144Seric 		if (!nlp) {
151*2144Seric 			/*
152*2144Seric 			Generate appropriate "delete"
153*2144Seric 			output without executing 'diff'.
154*2144Seric 			*/
155*2144Seric 			delgen(oline,poldfile);
156*2144Seric 		}
157*2144Seric 
158*2144Seric 		/*
159*2144Seric 		Create a temporary file to hold a segment
160*2144Seric 		from the old file, and write it.
161*2144Seric 		*/
162*2144Seric 		poldtemp = maket(otmp);
163*2144Seric 		otcnt = 0;
164*2144Seric 		while(olp && otcnt < seglim) {
165*2144Seric 			fputs(oline,poldtemp);
166*2144Seric 			olp = fgets(oline,BUFSIZ,poldfile);
167*2144Seric 			otcnt++;
168*2144Seric 		}
169*2144Seric 		fclose(poldtemp);
170*2144Seric 
171*2144Seric 		/*
172*2144Seric 		Create a temporary file to hold a segment
173*2144Seric 		from the new file, and write it.
174*2144Seric 		*/
175*2144Seric 		pnewtemp = maket(ntmp);
176*2144Seric 		ntcnt = 0;
177*2144Seric 		while(nlp && ntcnt < seglim) {
178*2144Seric 			fputs(nline,pnewtemp);
179*2144Seric 			nlp = fgets(nline,BUFSIZ,pnewfile);
180*2144Seric 			ntcnt++;
181*2144Seric 		}
182*2144Seric 		fclose(pnewtemp);
183*2144Seric 
184*2144Seric 		/*
185*2144Seric 		Create pipes and fork.
186*2144Seric 		*/
187*2144Seric 		xpipe(pfd);
188*2144Seric 		if ((i = fork()) < 0) {
189*2144Seric 			close(pfd[0]);
190*2144Seric 			close(pfd[1]);
191*2144Seric 			fatal("cannot fork, try again (bd3)");
192*2144Seric 		}
193*2144Seric 		else if (i == 0) {	/* child process */
194*2144Seric 			close(pfd[0]);
195*2144Seric 			close(1);
196*2144Seric 			dup(pfd[1]);
197*2144Seric 			close(pfd[1]);
198*2144Seric 
199*2144Seric 			/*
200*2144Seric 			Execute 'diff' on the segment files.
201*2144Seric 			*/
202*2144Seric 			execl(diff,diff,otmp,ntmp,0);
203*2144Seric 			close(1);
204*2144Seric 			fatal(sprintf(Error,"cannot execute '%s' (bd5)",diff));
205*2144Seric 		}
206*2144Seric 		else {			/* parent process */
207*2144Seric 			close(pfd[1]);
208*2144Seric 			pipeinp = fdfopen(pfd[0],0);
209*2144Seric 
210*2144Seric 			/*
211*2144Seric 			Process 'diff' output.
212*2144Seric 			*/
213*2144Seric 			while ((dp = fgets(diffline,BUFSIZ,pipeinp))) {
214*2144Seric 				if (numeric(*dp))
215*2144Seric 					fixnum(diffline);
216*2144Seric 				else
217*2144Seric 					printf("%s",diffline);
218*2144Seric 			}
219*2144Seric 
220*2144Seric 			fclose(pipeinp);
221*2144Seric 
222*2144Seric 			/*
223*2144Seric 			EOF on pipe.
224*2144Seric 			*/
225*2144Seric 			wait(&status);
226*2144Seric 			if (status&~0x100)
227*2144Seric 				fatal(sprintf(Error,"'%s' failed (bd6)",diff));
228*2144Seric 		}
229*2144Seric 		linenum =+ seglim;
230*2144Seric 
231*2144Seric 		/*
232*2144Seric 		Remove temporary files.
233*2144Seric 		*/
234*2144Seric 		unlink(otmp);
235*2144Seric 		unlink(ntmp);
236*2144Seric 	}
237*2144Seric }
238*2144Seric 
239*2144Seric 
240*2144Seric /*
241*2144Seric 	Routine to save remainder of a file.
242*2144Seric */
243*2144Seric saverest(line,iptr)
244*2144Seric char *line;
245*2144Seric FILE *iptr;
246*2144Seric {
247*2144Seric 	register char *lp;
248*2144Seric 	FILE *temptr;
249*2144Seric 
250*2144Seric 	temptr = maket(tempfile);
251*2144Seric 
252*2144Seric 	lp = line;
253*2144Seric 
254*2144Seric 	while (lp) {
255*2144Seric 		fputs(line,temptr);
256*2144Seric 		linenum++;
257*2144Seric 		lp = fgets(line,BUFSIZ,iptr);
258*2144Seric 	}
259*2144Seric 	fclose(temptr);
260*2144Seric }
261*2144Seric 
262*2144Seric 
263*2144Seric /*
264*2144Seric 	Routine to write out data saved by
265*2144Seric 	'saverest' routine and to remove the file.
266*2144Seric */
267*2144Seric putsave(line,type)
268*2144Seric char *line;
269*2144Seric char type;
270*2144Seric {
271*2144Seric 	FILE *temptr;
272*2144Seric 
273*2144Seric 	temptr = xfopen(tempfile,0);
274*2144Seric 
275*2144Seric 	while (fgets(line,BUFSIZ,temptr))
276*2144Seric 		printf("%c %s",type,line);
277*2144Seric 
278*2144Seric 	fclose(temptr);
279*2144Seric 
280*2144Seric 	xunlink(tempfile);
281*2144Seric }
282*2144Seric 
283*2144Seric 
284*2144Seric fixnum(lp)
285*2144Seric char *lp;
286*2144Seric {
287*2144Seric 	int num;
288*2144Seric 
289*2144Seric 	while (*lp) {
290*2144Seric 		switch (*lp) {
291*2144Seric 
292*2144Seric 		case 'a':
293*2144Seric 		case 'c':
294*2144Seric 		case 'd':
295*2144Seric 		case ',':
296*2144Seric 		case '\n':
297*2144Seric 			printf("%c",*lp);
298*2144Seric 			lp++;
299*2144Seric 			break;
300*2144Seric 
301*2144Seric 		default:
302*2144Seric 			lp = satoi(lp,&num);
303*2144Seric 			num =+ linenum;
304*2144Seric 			printf("%d",num);
305*2144Seric 		}
306*2144Seric 	}
307*2144Seric }
308*2144Seric 
309*2144Seric 
310*2144Seric addgen(lp,fp)
311*2144Seric char *lp;
312*2144Seric FILE *fp;
313*2144Seric {
314*2144Seric 	printf("%da%d,",linenum,linenum+1);
315*2144Seric 
316*2144Seric 	/*
317*2144Seric 	Save lines of new file.
318*2144Seric 	*/
319*2144Seric 	saverest(lp,fp);
320*2144Seric 
321*2144Seric 	printf("%d\n",linenum);
322*2144Seric 
323*2144Seric 	/*
324*2144Seric 	Output saved lines, as 'diff' would.
325*2144Seric 	*/
326*2144Seric 	putsave(lp,'>');
327*2144Seric 
328*2144Seric 	exit(0);
329*2144Seric }
330*2144Seric 
331*2144Seric 
332*2144Seric delgen(lp,fp)
333*2144Seric char *lp;
334*2144Seric FILE *fp;
335*2144Seric {
336*2144Seric 	int savenum;
337*2144Seric 
338*2144Seric 	printf("%d,",linenum+1);
339*2144Seric 	savenum = linenum;
340*2144Seric 
341*2144Seric 	/*
342*2144Seric 	Save lines of old file.
343*2144Seric 	*/
344*2144Seric 	saverest(lp,fp);
345*2144Seric 
346*2144Seric 	printf("%dd%d\n",linenum,savenum);
347*2144Seric 
348*2144Seric 	/*
349*2144Seric 	Output saved lines, as 'diff' would.
350*2144Seric 	*/
351*2144Seric 	putsave(lp,'<');
352*2144Seric 
353*2144Seric 	exit(0);
354*2144Seric }
355*2144Seric 
356*2144Seric 
357*2144Seric clean_up()
358*2144Seric {
359*2144Seric 	unlink(tempfile);
360*2144Seric 	unlink(otmp);
361*2144Seric 	unlink(ntmp);
362*2144Seric }
363*2144Seric 
364*2144Seric 
365*2144Seric maket(file)
366*2144Seric char *file;
367*2144Seric {
368*2144Seric 	FILE *iop;
369*2144Seric 
370*2144Seric 	copy(tempskel,file);
371*2144Seric 	iop = xfcreat(mktemp(file),0644);
372*2144Seric 
373*2144Seric 	return(iop);
374*2144Seric }
375