12144Seric # include "../hdr/defines.h" 22144Seric 3*33423Sbostic static char Sccsid[] = "@(#)bdiff.c 4.4 02/02/88"; 42144Seric 52144Seric /* 62144Seric This program segments two files into pieces of <= seglim lines 72144Seric (which is passed as a third argument or defaulted to some number) 82144Seric and then executes diff upon the pieces. The output of 92144Seric 'diff' is then processed to make it look as if 'diff' had 102144Seric processed the files whole. The reason for all this is that seglim 112144Seric is a reasonable upper limit on the size of files that diff can 122144Seric process. 132144Seric NOTE -- by segmenting the files in this manner, it cannot be 142144Seric guaranteed that the 'diffing' of the segments will generate 152144Seric a minimal set of differences. 162144Seric This process is most definitely not equivalent to 'diffing' 172144Seric the files whole, assuming 'diff' could handle such large files. 182144Seric 192144Seric 'diff' is executed by a child process, generated by forking, 202144Seric and communicates with this program through pipes. 212144Seric */ 222144Seric 232144Seric int seglim; /* limit of size of file segment to be generated */ 242144Seric 2530498Slepreau char diff[] = "/bin/diff"; 2630498Slepreau char tempskel[] = "/tmp/bdXXXXX"; /* used to generate temp file names */ 272144Seric char tempfile[32]; 282144Seric char otmp[32], ntmp[32]; 292144Seric int linenum; 302144Seric 312144Seric main(argc,argv) 322144Seric int argc; 332144Seric char *argv[]; 342144Seric { 352144Seric FILE *poldfile, *pnewfile, *tptr; 362144Seric char oline[BUFSIZ], nline[BUFSIZ], diffline[BUFSIZ]; 372144Seric char *olp, *nlp, *dp; 382144Seric int i, otcnt, ntcnt; 392144Seric int pfd[2]; 402144Seric FILE *poldtemp, *pnewtemp, *pipeinp; 412144Seric int status; 422144Seric 432144Seric /* 442144Seric Set flags for 'fatal' so that it will clean up, 452144Seric produce a message, and terminate. 462144Seric */ 472144Seric Fflags = FTLMSG | FTLCLN | FTLEXIT; 482144Seric 492144Seric setsig(); 502144Seric 512144Seric if (argc < 3 || argc > 5) 522144Seric fatal("arg count (bd1)"); 532144Seric 542144Seric if (equal(argv[1],"-") && equal(argv[2],"-")) 552144Seric fatal("both files standard input (bd2)"); 562144Seric if (equal(argv[1],"-")) 572144Seric poldfile = stdin; 582144Seric else 592144Seric poldfile = xfopen(argv[1],0); 602144Seric if (equal(argv[2],"-")) 612144Seric pnewfile = stdin; 622144Seric else 632144Seric pnewfile = xfopen(argv[2],0); 642144Seric 652144Seric seglim = 3500; 662144Seric 672144Seric if (argc > 3) { 682144Seric if (argv[3][0] == '-' && argv[3][1] == 's') 6930498Slepreau Fflags &= ~FTLMSG; 702144Seric else { 712144Seric if ((seglim = patoi(argv[3])) == -1) 722144Seric fatal("non-numeric limit (bd4)"); 732144Seric if (argc == 5 && argv[4][0] == '-' && 742144Seric argv[4][1] == 's') 7530498Slepreau Fflags &= ~FTLMSG; 762144Seric } 772144Seric } 782144Seric 792144Seric linenum = 0; 802144Seric 812144Seric /* 822144Seric The following while-loop will prevent any lines 832144Seric common to the beginning of both files from being 842144Seric sent to 'diff'. Since the running time of 'diff' is 852144Seric non-linear, this will help improve performance. 862144Seric If, during this process, both files reach EOF, then 872144Seric the files are equal and the program will terminate. 882144Seric If either file reaches EOF before the other, the 892144Seric program will generate the appropriate 'diff' output 902144Seric itself, since this can be easily determined and will 912144Seric avoid executing 'diff' completely. 922144Seric */ 932144Seric while (1) { 942144Seric olp = fgets(oline,BUFSIZ,poldfile); 952144Seric nlp = fgets(nline,BUFSIZ,pnewfile); 962144Seric 972144Seric if (!olp && !nlp) /* files are equal */ 982144Seric exit(0); 992144Seric 1002144Seric if (!olp) { 1012144Seric /* 1022144Seric The entire old file is a prefix of the 1032144Seric new file. Generate the appropriate "append" 1042144Seric 'diff'-like output, which is of the form: 1052144Seric nan,n 1062144Seric where 'n' represents a line-number. 1072144Seric */ 1082144Seric addgen(nline,pnewfile); 1092144Seric } 1102144Seric 1112144Seric if (!nlp) { 1122144Seric /* 1132144Seric The entire new file is a prefix of the 1142144Seric old file. Generate the appropriate "delete" 1152144Seric 'diff'-like output, which is of the form: 1162144Seric n,ndn 1172144Seric where 'n' represents a line-number. 1182144Seric */ 1192144Seric delgen(oline,poldfile); 1202144Seric } 1212144Seric 1222144Seric if (equal(olp,nlp)) 1232144Seric linenum++; 1242144Seric else 1252144Seric break; 1262144Seric } 1272144Seric 1282144Seric /* 1292144Seric Here, first 'linenum' lines are equal. 1302144Seric The following while-loop segments both files into 1312144Seric seglim segments, forks and executes 'diff' on the 1322144Seric segments, and processes the resulting output of 1332144Seric 'diff', which is read from a pipe. 1342144Seric */ 1352144Seric while (1) { 1362144Seric /* 1372144Seric If both files are at EOF, everything is done. 1382144Seric */ 1392144Seric if (!olp && !nlp) /* finished */ 1402144Seric exit(0); 1412144Seric 1422144Seric if (!olp) { 1432144Seric /* 1442144Seric Generate appropriate "append" 1452144Seric output without executing 'diff'. 1462144Seric */ 1472144Seric addgen(nline,pnewfile); 1482144Seric } 1492144Seric 1502144Seric if (!nlp) { 1512144Seric /* 1522144Seric Generate appropriate "delete" 1532144Seric output without executing 'diff'. 1542144Seric */ 1552144Seric delgen(oline,poldfile); 1562144Seric } 1572144Seric 1582144Seric /* 1592144Seric Create a temporary file to hold a segment 1602144Seric from the old file, and write it. 1612144Seric */ 1622144Seric poldtemp = maket(otmp); 1632144Seric otcnt = 0; 1642144Seric while(olp && otcnt < seglim) { 1652144Seric fputs(oline,poldtemp); 1662144Seric olp = fgets(oline,BUFSIZ,poldfile); 1672144Seric otcnt++; 1682144Seric } 1692144Seric fclose(poldtemp); 1702144Seric 1712144Seric /* 1722144Seric Create a temporary file to hold a segment 1732144Seric from the new file, and write it. 1742144Seric */ 1752144Seric pnewtemp = maket(ntmp); 1762144Seric ntcnt = 0; 1772144Seric while(nlp && ntcnt < seglim) { 1782144Seric fputs(nline,pnewtemp); 1792144Seric nlp = fgets(nline,BUFSIZ,pnewfile); 1802144Seric ntcnt++; 1812144Seric } 1822144Seric fclose(pnewtemp); 1832144Seric 1842144Seric /* 1852144Seric Create pipes and fork. 1862144Seric */ 1872144Seric xpipe(pfd); 1882144Seric if ((i = fork()) < 0) { 1892144Seric close(pfd[0]); 1902144Seric close(pfd[1]); 1912144Seric fatal("cannot fork, try again (bd3)"); 1922144Seric } 1932144Seric else if (i == 0) { /* child process */ 1942144Seric close(pfd[0]); 19519941Ssam dup2(pfd[1], 1); 19619941Ssam if (pfd[1] != 1) 19719941Ssam close(pfd[1]); 1982144Seric 1992144Seric /* 2002144Seric Execute 'diff' on the segment files. 2012144Seric */ 2022144Seric execl(diff,diff,otmp,ntmp,0); 2032144Seric close(1); 204*33423Sbostic sprintf(Error,"cannot execute '%s' (bd5)",diff); 205*33423Sbostic fatal(Error); 2062144Seric } 2072144Seric else { /* parent process */ 2082144Seric close(pfd[1]); 20919941Ssam pipeinp = fdopen(pfd[0],"r"); 2102144Seric 2112144Seric /* 2122144Seric Process 'diff' output. 2132144Seric */ 2142144Seric while ((dp = fgets(diffline,BUFSIZ,pipeinp))) { 2152144Seric if (numeric(*dp)) 2162144Seric fixnum(diffline); 2172144Seric else 2182144Seric printf("%s",diffline); 2192144Seric } 2202144Seric 2212144Seric fclose(pipeinp); 2222144Seric 2232144Seric /* 2242144Seric EOF on pipe. 2252144Seric */ 2262144Seric wait(&status); 227*33423Sbostic if (status&~0x100) { 228*33423Sbostic sprintf(Error,"'%s' failed (bd6)",diff); 229*33423Sbostic fatal(Error); 230*33423Sbostic } 2312144Seric } 23230498Slepreau linenum += seglim; 2332144Seric 2342144Seric /* 2352144Seric Remove temporary files. 2362144Seric */ 2372144Seric unlink(otmp); 2382144Seric unlink(ntmp); 2392144Seric } 2402144Seric } 2412144Seric 2422144Seric 2432144Seric /* 2442144Seric Routine to save remainder of a file. 2452144Seric */ 2462144Seric saverest(line,iptr) 2472144Seric char *line; 2482144Seric FILE *iptr; 2492144Seric { 2502144Seric register char *lp; 2512144Seric FILE *temptr; 2522144Seric 2532144Seric temptr = maket(tempfile); 2542144Seric 2552144Seric lp = line; 2562144Seric 2572144Seric while (lp) { 2582144Seric fputs(line,temptr); 2592144Seric linenum++; 2602144Seric lp = fgets(line,BUFSIZ,iptr); 2612144Seric } 2622144Seric fclose(temptr); 2632144Seric } 2642144Seric 2652144Seric 2662144Seric /* 2672144Seric Routine to write out data saved by 2682144Seric 'saverest' routine and to remove the file. 2692144Seric */ 2702144Seric putsave(line,type) 2712144Seric char *line; 2722144Seric char type; 2732144Seric { 2742144Seric FILE *temptr; 2752144Seric 2762144Seric temptr = xfopen(tempfile,0); 2772144Seric 2782144Seric while (fgets(line,BUFSIZ,temptr)) 2792144Seric printf("%c %s",type,line); 2802144Seric 2812144Seric fclose(temptr); 2822144Seric 2832144Seric xunlink(tempfile); 2842144Seric } 2852144Seric 2862144Seric 2872144Seric fixnum(lp) 2882144Seric char *lp; 2892144Seric { 2902144Seric int num; 2912144Seric 2922144Seric while (*lp) { 2932144Seric switch (*lp) { 2942144Seric 2952144Seric case 'a': 2962144Seric case 'c': 2972144Seric case 'd': 2982144Seric case ',': 2992144Seric case '\n': 3002144Seric printf("%c",*lp); 3012144Seric lp++; 3022144Seric break; 3032144Seric 3042144Seric default: 3052144Seric lp = satoi(lp,&num); 30630498Slepreau num += linenum; 3072144Seric printf("%d",num); 3082144Seric } 3092144Seric } 3102144Seric } 3112144Seric 3122144Seric 3132144Seric addgen(lp,fp) 3142144Seric char *lp; 3152144Seric FILE *fp; 3162144Seric { 3172144Seric printf("%da%d,",linenum,linenum+1); 3182144Seric 3192144Seric /* 3202144Seric Save lines of new file. 3212144Seric */ 3222144Seric saverest(lp,fp); 3232144Seric 3242144Seric printf("%d\n",linenum); 3252144Seric 3262144Seric /* 3272144Seric Output saved lines, as 'diff' would. 3282144Seric */ 3292144Seric putsave(lp,'>'); 3302144Seric 3312144Seric exit(0); 3322144Seric } 3332144Seric 3342144Seric 3352144Seric delgen(lp,fp) 3362144Seric char *lp; 3372144Seric FILE *fp; 3382144Seric { 3392144Seric int savenum; 3402144Seric 3412144Seric printf("%d,",linenum+1); 3422144Seric savenum = linenum; 3432144Seric 3442144Seric /* 3452144Seric Save lines of old file. 3462144Seric */ 3472144Seric saverest(lp,fp); 3482144Seric 3492144Seric printf("%dd%d\n",linenum,savenum); 3502144Seric 3512144Seric /* 3522144Seric Output saved lines, as 'diff' would. 3532144Seric */ 3542144Seric putsave(lp,'<'); 3552144Seric 3562144Seric exit(0); 3572144Seric } 3582144Seric 3592144Seric 3602144Seric clean_up() 3612144Seric { 3622144Seric unlink(tempfile); 3632144Seric unlink(otmp); 3642144Seric unlink(ntmp); 3652144Seric } 3662144Seric 3672144Seric 3682144Seric maket(file) 3692144Seric char *file; 3702144Seric { 3712144Seric FILE *iop; 3722144Seric 3732144Seric copy(tempskel,file); 3742144Seric iop = xfcreat(mktemp(file),0644); 3752144Seric 3762144Seric return(iop); 3772144Seric } 378