12144Seric # include "../hdr/defines.h"
22144Seric
3*37837Sbostic static char Sccsid[] = "@(#)bdiff.c 4.5 05/10/89";
42144Seric
52144Seric /*
62144Seric This program segments two files into pieces of <= seglim lines
72144Seric (which is passed as a third argument or defaulted to some number)
82144Seric and then executes diff upon the pieces. The output of
92144Seric 'diff' is then processed to make it look as if 'diff' had
102144Seric processed the files whole. The reason for all this is that seglim
112144Seric is a reasonable upper limit on the size of files that diff can
122144Seric process.
132144Seric NOTE -- by segmenting the files in this manner, it cannot be
142144Seric guaranteed that the 'diffing' of the segments will generate
152144Seric a minimal set of differences.
162144Seric This process is most definitely not equivalent to 'diffing'
172144Seric the files whole, assuming 'diff' could handle such large files.
182144Seric
192144Seric 'diff' is executed by a child process, generated by forking,
202144Seric and communicates with this program through pipes.
212144Seric */
222144Seric
232144Seric int seglim; /* limit of size of file segment to be generated */
242144Seric
25*37837Sbostic char diff[] = "/usr/bin/diff";
2630498Slepreau char tempskel[] = "/tmp/bdXXXXX"; /* used to generate temp file names */
272144Seric char tempfile[32];
282144Seric char otmp[32], ntmp[32];
292144Seric int linenum;
302144Seric
main(argc,argv)312144Seric main(argc,argv)
322144Seric int argc;
332144Seric char *argv[];
342144Seric {
352144Seric FILE *poldfile, *pnewfile, *tptr;
362144Seric char oline[BUFSIZ], nline[BUFSIZ], diffline[BUFSIZ];
372144Seric char *olp, *nlp, *dp;
382144Seric int i, otcnt, ntcnt;
392144Seric int pfd[2];
402144Seric FILE *poldtemp, *pnewtemp, *pipeinp;
412144Seric int status;
422144Seric
432144Seric /*
442144Seric Set flags for 'fatal' so that it will clean up,
452144Seric produce a message, and terminate.
462144Seric */
472144Seric Fflags = FTLMSG | FTLCLN | FTLEXIT;
482144Seric
492144Seric setsig();
502144Seric
512144Seric if (argc < 3 || argc > 5)
522144Seric fatal("arg count (bd1)");
532144Seric
542144Seric if (equal(argv[1],"-") && equal(argv[2],"-"))
552144Seric fatal("both files standard input (bd2)");
562144Seric if (equal(argv[1],"-"))
572144Seric poldfile = stdin;
582144Seric else
592144Seric poldfile = xfopen(argv[1],0);
602144Seric if (equal(argv[2],"-"))
612144Seric pnewfile = stdin;
622144Seric else
632144Seric pnewfile = xfopen(argv[2],0);
642144Seric
652144Seric seglim = 3500;
662144Seric
672144Seric if (argc > 3) {
682144Seric if (argv[3][0] == '-' && argv[3][1] == 's')
6930498Slepreau Fflags &= ~FTLMSG;
702144Seric else {
712144Seric if ((seglim = patoi(argv[3])) == -1)
722144Seric fatal("non-numeric limit (bd4)");
732144Seric if (argc == 5 && argv[4][0] == '-' &&
742144Seric argv[4][1] == 's')
7530498Slepreau Fflags &= ~FTLMSG;
762144Seric }
772144Seric }
782144Seric
792144Seric linenum = 0;
802144Seric
812144Seric /*
822144Seric The following while-loop will prevent any lines
832144Seric common to the beginning of both files from being
842144Seric sent to 'diff'. Since the running time of 'diff' is
852144Seric non-linear, this will help improve performance.
862144Seric If, during this process, both files reach EOF, then
872144Seric the files are equal and the program will terminate.
882144Seric If either file reaches EOF before the other, the
892144Seric program will generate the appropriate 'diff' output
902144Seric itself, since this can be easily determined and will
912144Seric avoid executing 'diff' completely.
922144Seric */
932144Seric while (1) {
942144Seric olp = fgets(oline,BUFSIZ,poldfile);
952144Seric nlp = fgets(nline,BUFSIZ,pnewfile);
962144Seric
972144Seric if (!olp && !nlp) /* files are equal */
982144Seric exit(0);
992144Seric
1002144Seric if (!olp) {
1012144Seric /*
1022144Seric The entire old file is a prefix of the
1032144Seric new file. Generate the appropriate "append"
1042144Seric 'diff'-like output, which is of the form:
1052144Seric nan,n
1062144Seric where 'n' represents a line-number.
1072144Seric */
1082144Seric addgen(nline,pnewfile);
1092144Seric }
1102144Seric
1112144Seric if (!nlp) {
1122144Seric /*
1132144Seric The entire new file is a prefix of the
1142144Seric old file. Generate the appropriate "delete"
1152144Seric 'diff'-like output, which is of the form:
1162144Seric n,ndn
1172144Seric where 'n' represents a line-number.
1182144Seric */
1192144Seric delgen(oline,poldfile);
1202144Seric }
1212144Seric
1222144Seric if (equal(olp,nlp))
1232144Seric linenum++;
1242144Seric else
1252144Seric break;
1262144Seric }
1272144Seric
1282144Seric /*
1292144Seric Here, first 'linenum' lines are equal.
1302144Seric The following while-loop segments both files into
1312144Seric seglim segments, forks and executes 'diff' on the
1322144Seric segments, and processes the resulting output of
1332144Seric 'diff', which is read from a pipe.
1342144Seric */
1352144Seric while (1) {
1362144Seric /*
1372144Seric If both files are at EOF, everything is done.
1382144Seric */
1392144Seric if (!olp && !nlp) /* finished */
1402144Seric exit(0);
1412144Seric
1422144Seric if (!olp) {
1432144Seric /*
1442144Seric Generate appropriate "append"
1452144Seric output without executing 'diff'.
1462144Seric */
1472144Seric addgen(nline,pnewfile);
1482144Seric }
1492144Seric
1502144Seric if (!nlp) {
1512144Seric /*
1522144Seric Generate appropriate "delete"
1532144Seric output without executing 'diff'.
1542144Seric */
1552144Seric delgen(oline,poldfile);
1562144Seric }
1572144Seric
1582144Seric /*
1592144Seric Create a temporary file to hold a segment
1602144Seric from the old file, and write it.
1612144Seric */
1622144Seric poldtemp = maket(otmp);
1632144Seric otcnt = 0;
1642144Seric while(olp && otcnt < seglim) {
1652144Seric fputs(oline,poldtemp);
1662144Seric olp = fgets(oline,BUFSIZ,poldfile);
1672144Seric otcnt++;
1682144Seric }
1692144Seric fclose(poldtemp);
1702144Seric
1712144Seric /*
1722144Seric Create a temporary file to hold a segment
1732144Seric from the new file, and write it.
1742144Seric */
1752144Seric pnewtemp = maket(ntmp);
1762144Seric ntcnt = 0;
1772144Seric while(nlp && ntcnt < seglim) {
1782144Seric fputs(nline,pnewtemp);
1792144Seric nlp = fgets(nline,BUFSIZ,pnewfile);
1802144Seric ntcnt++;
1812144Seric }
1822144Seric fclose(pnewtemp);
1832144Seric
1842144Seric /*
1852144Seric Create pipes and fork.
1862144Seric */
1872144Seric xpipe(pfd);
1882144Seric if ((i = fork()) < 0) {
1892144Seric close(pfd[0]);
1902144Seric close(pfd[1]);
1912144Seric fatal("cannot fork, try again (bd3)");
1922144Seric }
1932144Seric else if (i == 0) { /* child process */
1942144Seric close(pfd[0]);
19519941Ssam dup2(pfd[1], 1);
19619941Ssam if (pfd[1] != 1)
19719941Ssam close(pfd[1]);
1982144Seric
1992144Seric /*
2002144Seric Execute 'diff' on the segment files.
2012144Seric */
2022144Seric execl(diff,diff,otmp,ntmp,0);
2032144Seric close(1);
20433423Sbostic sprintf(Error,"cannot execute '%s' (bd5)",diff);
20533423Sbostic fatal(Error);
2062144Seric }
2072144Seric else { /* parent process */
2082144Seric close(pfd[1]);
20919941Ssam pipeinp = fdopen(pfd[0],"r");
2102144Seric
2112144Seric /*
2122144Seric Process 'diff' output.
2132144Seric */
2142144Seric while ((dp = fgets(diffline,BUFSIZ,pipeinp))) {
2152144Seric if (numeric(*dp))
2162144Seric fixnum(diffline);
2172144Seric else
2182144Seric printf("%s",diffline);
2192144Seric }
2202144Seric
2212144Seric fclose(pipeinp);
2222144Seric
2232144Seric /*
2242144Seric EOF on pipe.
2252144Seric */
2262144Seric wait(&status);
22733423Sbostic if (status&~0x100) {
22833423Sbostic sprintf(Error,"'%s' failed (bd6)",diff);
22933423Sbostic fatal(Error);
23033423Sbostic }
2312144Seric }
23230498Slepreau linenum += seglim;
2332144Seric
2342144Seric /*
2352144Seric Remove temporary files.
2362144Seric */
2372144Seric unlink(otmp);
2382144Seric unlink(ntmp);
2392144Seric }
2402144Seric }
2412144Seric
2422144Seric
2432144Seric /*
2442144Seric Routine to save remainder of a file.
2452144Seric */
saverest(line,iptr)2462144Seric saverest(line,iptr)
2472144Seric char *line;
2482144Seric FILE *iptr;
2492144Seric {
2502144Seric register char *lp;
2512144Seric FILE *temptr;
2522144Seric
2532144Seric temptr = maket(tempfile);
2542144Seric
2552144Seric lp = line;
2562144Seric
2572144Seric while (lp) {
2582144Seric fputs(line,temptr);
2592144Seric linenum++;
2602144Seric lp = fgets(line,BUFSIZ,iptr);
2612144Seric }
2622144Seric fclose(temptr);
2632144Seric }
2642144Seric
2652144Seric
2662144Seric /*
2672144Seric Routine to write out data saved by
2682144Seric 'saverest' routine and to remove the file.
2692144Seric */
putsave(line,type)2702144Seric putsave(line,type)
2712144Seric char *line;
2722144Seric char type;
2732144Seric {
2742144Seric FILE *temptr;
2752144Seric
2762144Seric temptr = xfopen(tempfile,0);
2772144Seric
2782144Seric while (fgets(line,BUFSIZ,temptr))
2792144Seric printf("%c %s",type,line);
2802144Seric
2812144Seric fclose(temptr);
2822144Seric
2832144Seric xunlink(tempfile);
2842144Seric }
2852144Seric
2862144Seric
fixnum(lp)2872144Seric fixnum(lp)
2882144Seric char *lp;
2892144Seric {
2902144Seric int num;
2912144Seric
2922144Seric while (*lp) {
2932144Seric switch (*lp) {
2942144Seric
2952144Seric case 'a':
2962144Seric case 'c':
2972144Seric case 'd':
2982144Seric case ',':
2992144Seric case '\n':
3002144Seric printf("%c",*lp);
3012144Seric lp++;
3022144Seric break;
3032144Seric
3042144Seric default:
3052144Seric lp = satoi(lp,&num);
30630498Slepreau num += linenum;
3072144Seric printf("%d",num);
3082144Seric }
3092144Seric }
3102144Seric }
3112144Seric
3122144Seric
addgen(lp,fp)3132144Seric addgen(lp,fp)
3142144Seric char *lp;
3152144Seric FILE *fp;
3162144Seric {
3172144Seric printf("%da%d,",linenum,linenum+1);
3182144Seric
3192144Seric /*
3202144Seric Save lines of new file.
3212144Seric */
3222144Seric saverest(lp,fp);
3232144Seric
3242144Seric printf("%d\n",linenum);
3252144Seric
3262144Seric /*
3272144Seric Output saved lines, as 'diff' would.
3282144Seric */
3292144Seric putsave(lp,'>');
3302144Seric
3312144Seric exit(0);
3322144Seric }
3332144Seric
3342144Seric
delgen(lp,fp)3352144Seric delgen(lp,fp)
3362144Seric char *lp;
3372144Seric FILE *fp;
3382144Seric {
3392144Seric int savenum;
3402144Seric
3412144Seric printf("%d,",linenum+1);
3422144Seric savenum = linenum;
3432144Seric
3442144Seric /*
3452144Seric Save lines of old file.
3462144Seric */
3472144Seric saverest(lp,fp);
3482144Seric
3492144Seric printf("%dd%d\n",linenum,savenum);
3502144Seric
3512144Seric /*
3522144Seric Output saved lines, as 'diff' would.
3532144Seric */
3542144Seric putsave(lp,'<');
3552144Seric
3562144Seric exit(0);
3572144Seric }
3582144Seric
3592144Seric
clean_up()3602144Seric clean_up()
3612144Seric {
3622144Seric unlink(tempfile);
3632144Seric unlink(otmp);
3642144Seric unlink(ntmp);
3652144Seric }
3662144Seric
3672144Seric
maket(file)3682144Seric maket(file)
3692144Seric char *file;
3702144Seric {
3712144Seric FILE *iop;
3722144Seric
3732144Seric copy(tempskel,file);
3742144Seric iop = xfcreat(mktemp(file),0644);
3752144Seric
3762144Seric return(iop);
3772144Seric }
378