12144Seric # include "../hdr/defines.h" 22144Seric 3*30498Slepreau static char Sccsid[] = "@(#)bdiff.c 4.3 02/15/87"; 42144Seric 52144Seric /* 62144Seric This program segments two files into pieces of <= seglim lines 72144Seric (which is passed as a third argument or defaulted to some number) 82144Seric and then executes diff upon the pieces. The output of 92144Seric 'diff' is then processed to make it look as if 'diff' had 102144Seric processed the files whole. The reason for all this is that seglim 112144Seric is a reasonable upper limit on the size of files that diff can 122144Seric process. 132144Seric NOTE -- by segmenting the files in this manner, it cannot be 142144Seric guaranteed that the 'diffing' of the segments will generate 152144Seric a minimal set of differences. 162144Seric This process is most definitely not equivalent to 'diffing' 172144Seric the files whole, assuming 'diff' could handle such large files. 182144Seric 192144Seric 'diff' is executed by a child process, generated by forking, 202144Seric and communicates with this program through pipes. 212144Seric */ 222144Seric 232144Seric int seglim; /* limit of size of file segment to be generated */ 242144Seric 25*30498Slepreau char diff[] = "/bin/diff"; 26*30498Slepreau char tempskel[] = "/tmp/bdXXXXX"; /* used to generate temp file names */ 272144Seric char tempfile[32]; 282144Seric char otmp[32], ntmp[32]; 292144Seric int linenum; 302144Seric 312144Seric main(argc,argv) 322144Seric int argc; 332144Seric char *argv[]; 342144Seric { 352144Seric FILE *poldfile, *pnewfile, *tptr; 362144Seric char oline[BUFSIZ], nline[BUFSIZ], diffline[BUFSIZ]; 372144Seric char *olp, *nlp, *dp; 382144Seric int i, otcnt, ntcnt; 392144Seric int pfd[2]; 402144Seric FILE *poldtemp, *pnewtemp, *pipeinp; 412144Seric int status; 422144Seric 432144Seric /* 442144Seric Set flags for 'fatal' so that it will clean up, 452144Seric produce a message, and terminate. 462144Seric */ 472144Seric Fflags = FTLMSG | FTLCLN | FTLEXIT; 482144Seric 492144Seric setsig(); 502144Seric 512144Seric if (argc < 3 || argc > 5) 522144Seric fatal("arg count (bd1)"); 532144Seric 542144Seric if (equal(argv[1],"-") && equal(argv[2],"-")) 552144Seric fatal("both files standard input (bd2)"); 562144Seric if (equal(argv[1],"-")) 572144Seric poldfile = stdin; 582144Seric else 592144Seric poldfile = xfopen(argv[1],0); 602144Seric if (equal(argv[2],"-")) 612144Seric pnewfile = stdin; 622144Seric else 632144Seric pnewfile = xfopen(argv[2],0); 642144Seric 652144Seric seglim = 3500; 662144Seric 672144Seric if (argc > 3) { 682144Seric if (argv[3][0] == '-' && argv[3][1] == 's') 69*30498Slepreau Fflags &= ~FTLMSG; 702144Seric else { 712144Seric if ((seglim = patoi(argv[3])) == -1) 722144Seric fatal("non-numeric limit (bd4)"); 732144Seric if (argc == 5 && argv[4][0] == '-' && 742144Seric argv[4][1] == 's') 75*30498Slepreau Fflags &= ~FTLMSG; 762144Seric } 772144Seric } 782144Seric 792144Seric linenum = 0; 802144Seric 812144Seric /* 822144Seric The following while-loop will prevent any lines 832144Seric common to the beginning of both files from being 842144Seric sent to 'diff'. Since the running time of 'diff' is 852144Seric non-linear, this will help improve performance. 862144Seric If, during this process, both files reach EOF, then 872144Seric the files are equal and the program will terminate. 882144Seric If either file reaches EOF before the other, the 892144Seric program will generate the appropriate 'diff' output 902144Seric itself, since this can be easily determined and will 912144Seric avoid executing 'diff' completely. 922144Seric */ 932144Seric while (1) { 942144Seric olp = fgets(oline,BUFSIZ,poldfile); 952144Seric nlp = fgets(nline,BUFSIZ,pnewfile); 962144Seric 972144Seric if (!olp && !nlp) /* files are equal */ 982144Seric exit(0); 992144Seric 1002144Seric if (!olp) { 1012144Seric /* 1022144Seric The entire old file is a prefix of the 1032144Seric new file. Generate the appropriate "append" 1042144Seric 'diff'-like output, which is of the form: 1052144Seric nan,n 1062144Seric where 'n' represents a line-number. 1072144Seric */ 1082144Seric addgen(nline,pnewfile); 1092144Seric } 1102144Seric 1112144Seric if (!nlp) { 1122144Seric /* 1132144Seric The entire new file is a prefix of the 1142144Seric old file. Generate the appropriate "delete" 1152144Seric 'diff'-like output, which is of the form: 1162144Seric n,ndn 1172144Seric where 'n' represents a line-number. 1182144Seric */ 1192144Seric delgen(oline,poldfile); 1202144Seric } 1212144Seric 1222144Seric if (equal(olp,nlp)) 1232144Seric linenum++; 1242144Seric else 1252144Seric break; 1262144Seric } 1272144Seric 1282144Seric /* 1292144Seric Here, first 'linenum' lines are equal. 1302144Seric The following while-loop segments both files into 1312144Seric seglim segments, forks and executes 'diff' on the 1322144Seric segments, and processes the resulting output of 1332144Seric 'diff', which is read from a pipe. 1342144Seric */ 1352144Seric while (1) { 1362144Seric /* 1372144Seric If both files are at EOF, everything is done. 1382144Seric */ 1392144Seric if (!olp && !nlp) /* finished */ 1402144Seric exit(0); 1412144Seric 1422144Seric if (!olp) { 1432144Seric /* 1442144Seric Generate appropriate "append" 1452144Seric output without executing 'diff'. 1462144Seric */ 1472144Seric addgen(nline,pnewfile); 1482144Seric } 1492144Seric 1502144Seric if (!nlp) { 1512144Seric /* 1522144Seric Generate appropriate "delete" 1532144Seric output without executing 'diff'. 1542144Seric */ 1552144Seric delgen(oline,poldfile); 1562144Seric } 1572144Seric 1582144Seric /* 1592144Seric Create a temporary file to hold a segment 1602144Seric from the old file, and write it. 1612144Seric */ 1622144Seric poldtemp = maket(otmp); 1632144Seric otcnt = 0; 1642144Seric while(olp && otcnt < seglim) { 1652144Seric fputs(oline,poldtemp); 1662144Seric olp = fgets(oline,BUFSIZ,poldfile); 1672144Seric otcnt++; 1682144Seric } 1692144Seric fclose(poldtemp); 1702144Seric 1712144Seric /* 1722144Seric Create a temporary file to hold a segment 1732144Seric from the new file, and write it. 1742144Seric */ 1752144Seric pnewtemp = maket(ntmp); 1762144Seric ntcnt = 0; 1772144Seric while(nlp && ntcnt < seglim) { 1782144Seric fputs(nline,pnewtemp); 1792144Seric nlp = fgets(nline,BUFSIZ,pnewfile); 1802144Seric ntcnt++; 1812144Seric } 1822144Seric fclose(pnewtemp); 1832144Seric 1842144Seric /* 1852144Seric Create pipes and fork. 1862144Seric */ 1872144Seric xpipe(pfd); 1882144Seric if ((i = fork()) < 0) { 1892144Seric close(pfd[0]); 1902144Seric close(pfd[1]); 1912144Seric fatal("cannot fork, try again (bd3)"); 1922144Seric } 1932144Seric else if (i == 0) { /* child process */ 1942144Seric close(pfd[0]); 19519941Ssam dup2(pfd[1], 1); 19619941Ssam if (pfd[1] != 1) 19719941Ssam close(pfd[1]); 1982144Seric 1992144Seric /* 2002144Seric Execute 'diff' on the segment files. 2012144Seric */ 2022144Seric execl(diff,diff,otmp,ntmp,0); 2032144Seric close(1); 2042144Seric fatal(sprintf(Error,"cannot execute '%s' (bd5)",diff)); 2052144Seric } 2062144Seric else { /* parent process */ 2072144Seric close(pfd[1]); 20819941Ssam pipeinp = fdopen(pfd[0],"r"); 2092144Seric 2102144Seric /* 2112144Seric Process 'diff' output. 2122144Seric */ 2132144Seric while ((dp = fgets(diffline,BUFSIZ,pipeinp))) { 2142144Seric if (numeric(*dp)) 2152144Seric fixnum(diffline); 2162144Seric else 2172144Seric printf("%s",diffline); 2182144Seric } 2192144Seric 2202144Seric fclose(pipeinp); 2212144Seric 2222144Seric /* 2232144Seric EOF on pipe. 2242144Seric */ 2252144Seric wait(&status); 2262144Seric if (status&~0x100) 2272144Seric fatal(sprintf(Error,"'%s' failed (bd6)",diff)); 2282144Seric } 229*30498Slepreau linenum += seglim; 2302144Seric 2312144Seric /* 2322144Seric Remove temporary files. 2332144Seric */ 2342144Seric unlink(otmp); 2352144Seric unlink(ntmp); 2362144Seric } 2372144Seric } 2382144Seric 2392144Seric 2402144Seric /* 2412144Seric Routine to save remainder of a file. 2422144Seric */ 2432144Seric saverest(line,iptr) 2442144Seric char *line; 2452144Seric FILE *iptr; 2462144Seric { 2472144Seric register char *lp; 2482144Seric FILE *temptr; 2492144Seric 2502144Seric temptr = maket(tempfile); 2512144Seric 2522144Seric lp = line; 2532144Seric 2542144Seric while (lp) { 2552144Seric fputs(line,temptr); 2562144Seric linenum++; 2572144Seric lp = fgets(line,BUFSIZ,iptr); 2582144Seric } 2592144Seric fclose(temptr); 2602144Seric } 2612144Seric 2622144Seric 2632144Seric /* 2642144Seric Routine to write out data saved by 2652144Seric 'saverest' routine and to remove the file. 2662144Seric */ 2672144Seric putsave(line,type) 2682144Seric char *line; 2692144Seric char type; 2702144Seric { 2712144Seric FILE *temptr; 2722144Seric 2732144Seric temptr = xfopen(tempfile,0); 2742144Seric 2752144Seric while (fgets(line,BUFSIZ,temptr)) 2762144Seric printf("%c %s",type,line); 2772144Seric 2782144Seric fclose(temptr); 2792144Seric 2802144Seric xunlink(tempfile); 2812144Seric } 2822144Seric 2832144Seric 2842144Seric fixnum(lp) 2852144Seric char *lp; 2862144Seric { 2872144Seric int num; 2882144Seric 2892144Seric while (*lp) { 2902144Seric switch (*lp) { 2912144Seric 2922144Seric case 'a': 2932144Seric case 'c': 2942144Seric case 'd': 2952144Seric case ',': 2962144Seric case '\n': 2972144Seric printf("%c",*lp); 2982144Seric lp++; 2992144Seric break; 3002144Seric 3012144Seric default: 3022144Seric lp = satoi(lp,&num); 303*30498Slepreau num += linenum; 3042144Seric printf("%d",num); 3052144Seric } 3062144Seric } 3072144Seric } 3082144Seric 3092144Seric 3102144Seric addgen(lp,fp) 3112144Seric char *lp; 3122144Seric FILE *fp; 3132144Seric { 3142144Seric printf("%da%d,",linenum,linenum+1); 3152144Seric 3162144Seric /* 3172144Seric Save lines of new file. 3182144Seric */ 3192144Seric saverest(lp,fp); 3202144Seric 3212144Seric printf("%d\n",linenum); 3222144Seric 3232144Seric /* 3242144Seric Output saved lines, as 'diff' would. 3252144Seric */ 3262144Seric putsave(lp,'>'); 3272144Seric 3282144Seric exit(0); 3292144Seric } 3302144Seric 3312144Seric 3322144Seric delgen(lp,fp) 3332144Seric char *lp; 3342144Seric FILE *fp; 3352144Seric { 3362144Seric int savenum; 3372144Seric 3382144Seric printf("%d,",linenum+1); 3392144Seric savenum = linenum; 3402144Seric 3412144Seric /* 3422144Seric Save lines of old file. 3432144Seric */ 3442144Seric saverest(lp,fp); 3452144Seric 3462144Seric printf("%dd%d\n",linenum,savenum); 3472144Seric 3482144Seric /* 3492144Seric Output saved lines, as 'diff' would. 3502144Seric */ 3512144Seric putsave(lp,'<'); 3522144Seric 3532144Seric exit(0); 3542144Seric } 3552144Seric 3562144Seric 3572144Seric clean_up() 3582144Seric { 3592144Seric unlink(tempfile); 3602144Seric unlink(otmp); 3612144Seric unlink(ntmp); 3622144Seric } 3632144Seric 3642144Seric 3652144Seric maket(file) 3662144Seric char *file; 3672144Seric { 3682144Seric FILE *iop; 3692144Seric 3702144Seric copy(tempskel,file); 3712144Seric iop = xfcreat(mktemp(file),0644); 3722144Seric 3732144Seric return(iop); 3742144Seric } 375