1*2144Seric # include "../hdr/defines.h" 2*2144Seric 3*2144Seric SCCSID(@(#)bdiff.c 4.1); 4*2144Seric 5*2144Seric /* 6*2144Seric This program segments two files into pieces of <= seglim lines 7*2144Seric (which is passed as a third argument or defaulted to some number) 8*2144Seric and then executes diff upon the pieces. The output of 9*2144Seric 'diff' is then processed to make it look as if 'diff' had 10*2144Seric processed the files whole. The reason for all this is that seglim 11*2144Seric is a reasonable upper limit on the size of files that diff can 12*2144Seric process. 13*2144Seric NOTE -- by segmenting the files in this manner, it cannot be 14*2144Seric guaranteed that the 'diffing' of the segments will generate 15*2144Seric a minimal set of differences. 16*2144Seric This process is most definitely not equivalent to 'diffing' 17*2144Seric the files whole, assuming 'diff' could handle such large files. 18*2144Seric 19*2144Seric 'diff' is executed by a child process, generated by forking, 20*2144Seric and communicates with this program through pipes. 21*2144Seric */ 22*2144Seric 23*2144Seric int seglim; /* limit of size of file segment to be generated */ 24*2144Seric 25*2144Seric char diff[] "/bin/diff"; 26*2144Seric char tempskel[] "/tmp/bdXXXXX"; /* used to generate temp file names */ 27*2144Seric char tempfile[32]; 28*2144Seric char otmp[32], ntmp[32]; 29*2144Seric int linenum; 30*2144Seric 31*2144Seric main(argc,argv) 32*2144Seric int argc; 33*2144Seric char *argv[]; 34*2144Seric { 35*2144Seric FILE *poldfile, *pnewfile, *tptr; 36*2144Seric char oline[BUFSIZ], nline[BUFSIZ], diffline[BUFSIZ]; 37*2144Seric char *olp, *nlp, *dp; 38*2144Seric int i, otcnt, ntcnt; 39*2144Seric int pfd[2]; 40*2144Seric FILE *poldtemp, *pnewtemp, *pipeinp; 41*2144Seric int status; 42*2144Seric 43*2144Seric /* 44*2144Seric Set flags for 'fatal' so that it will clean up, 45*2144Seric produce a message, and terminate. 46*2144Seric */ 47*2144Seric Fflags = FTLMSG | FTLCLN | FTLEXIT; 48*2144Seric 49*2144Seric setsig(); 50*2144Seric 51*2144Seric if (argc < 3 || argc > 5) 52*2144Seric fatal("arg count (bd1)"); 53*2144Seric 54*2144Seric if (equal(argv[1],"-") && equal(argv[2],"-")) 55*2144Seric fatal("both files standard input (bd2)"); 56*2144Seric if (equal(argv[1],"-")) 57*2144Seric poldfile = stdin; 58*2144Seric else 59*2144Seric poldfile = xfopen(argv[1],0); 60*2144Seric if (equal(argv[2],"-")) 61*2144Seric pnewfile = stdin; 62*2144Seric else 63*2144Seric pnewfile = xfopen(argv[2],0); 64*2144Seric 65*2144Seric seglim = 3500; 66*2144Seric 67*2144Seric if (argc > 3) { 68*2144Seric if (argv[3][0] == '-' && argv[3][1] == 's') 69*2144Seric Fflags =& ~FTLMSG; 70*2144Seric else { 71*2144Seric if ((seglim = patoi(argv[3])) == -1) 72*2144Seric fatal("non-numeric limit (bd4)"); 73*2144Seric if (argc == 5 && argv[4][0] == '-' && 74*2144Seric argv[4][1] == 's') 75*2144Seric Fflags =& ~FTLMSG; 76*2144Seric } 77*2144Seric } 78*2144Seric 79*2144Seric linenum = 0; 80*2144Seric 81*2144Seric /* 82*2144Seric The following while-loop will prevent any lines 83*2144Seric common to the beginning of both files from being 84*2144Seric sent to 'diff'. Since the running time of 'diff' is 85*2144Seric non-linear, this will help improve performance. 86*2144Seric If, during this process, both files reach EOF, then 87*2144Seric the files are equal and the program will terminate. 88*2144Seric If either file reaches EOF before the other, the 89*2144Seric program will generate the appropriate 'diff' output 90*2144Seric itself, since this can be easily determined and will 91*2144Seric avoid executing 'diff' completely. 92*2144Seric */ 93*2144Seric while (1) { 94*2144Seric olp = fgets(oline,BUFSIZ,poldfile); 95*2144Seric nlp = fgets(nline,BUFSIZ,pnewfile); 96*2144Seric 97*2144Seric if (!olp && !nlp) /* files are equal */ 98*2144Seric exit(0); 99*2144Seric 100*2144Seric if (!olp) { 101*2144Seric /* 102*2144Seric The entire old file is a prefix of the 103*2144Seric new file. Generate the appropriate "append" 104*2144Seric 'diff'-like output, which is of the form: 105*2144Seric nan,n 106*2144Seric where 'n' represents a line-number. 107*2144Seric */ 108*2144Seric addgen(nline,pnewfile); 109*2144Seric } 110*2144Seric 111*2144Seric if (!nlp) { 112*2144Seric /* 113*2144Seric The entire new file is a prefix of the 114*2144Seric old file. Generate the appropriate "delete" 115*2144Seric 'diff'-like output, which is of the form: 116*2144Seric n,ndn 117*2144Seric where 'n' represents a line-number. 118*2144Seric */ 119*2144Seric delgen(oline,poldfile); 120*2144Seric } 121*2144Seric 122*2144Seric if (equal(olp,nlp)) 123*2144Seric linenum++; 124*2144Seric else 125*2144Seric break; 126*2144Seric } 127*2144Seric 128*2144Seric /* 129*2144Seric Here, first 'linenum' lines are equal. 130*2144Seric The following while-loop segments both files into 131*2144Seric seglim segments, forks and executes 'diff' on the 132*2144Seric segments, and processes the resulting output of 133*2144Seric 'diff', which is read from a pipe. 134*2144Seric */ 135*2144Seric while (1) { 136*2144Seric /* 137*2144Seric If both files are at EOF, everything is done. 138*2144Seric */ 139*2144Seric if (!olp && !nlp) /* finished */ 140*2144Seric exit(0); 141*2144Seric 142*2144Seric if (!olp) { 143*2144Seric /* 144*2144Seric Generate appropriate "append" 145*2144Seric output without executing 'diff'. 146*2144Seric */ 147*2144Seric addgen(nline,pnewfile); 148*2144Seric } 149*2144Seric 150*2144Seric if (!nlp) { 151*2144Seric /* 152*2144Seric Generate appropriate "delete" 153*2144Seric output without executing 'diff'. 154*2144Seric */ 155*2144Seric delgen(oline,poldfile); 156*2144Seric } 157*2144Seric 158*2144Seric /* 159*2144Seric Create a temporary file to hold a segment 160*2144Seric from the old file, and write it. 161*2144Seric */ 162*2144Seric poldtemp = maket(otmp); 163*2144Seric otcnt = 0; 164*2144Seric while(olp && otcnt < seglim) { 165*2144Seric fputs(oline,poldtemp); 166*2144Seric olp = fgets(oline,BUFSIZ,poldfile); 167*2144Seric otcnt++; 168*2144Seric } 169*2144Seric fclose(poldtemp); 170*2144Seric 171*2144Seric /* 172*2144Seric Create a temporary file to hold a segment 173*2144Seric from the new file, and write it. 174*2144Seric */ 175*2144Seric pnewtemp = maket(ntmp); 176*2144Seric ntcnt = 0; 177*2144Seric while(nlp && ntcnt < seglim) { 178*2144Seric fputs(nline,pnewtemp); 179*2144Seric nlp = fgets(nline,BUFSIZ,pnewfile); 180*2144Seric ntcnt++; 181*2144Seric } 182*2144Seric fclose(pnewtemp); 183*2144Seric 184*2144Seric /* 185*2144Seric Create pipes and fork. 186*2144Seric */ 187*2144Seric xpipe(pfd); 188*2144Seric if ((i = fork()) < 0) { 189*2144Seric close(pfd[0]); 190*2144Seric close(pfd[1]); 191*2144Seric fatal("cannot fork, try again (bd3)"); 192*2144Seric } 193*2144Seric else if (i == 0) { /* child process */ 194*2144Seric close(pfd[0]); 195*2144Seric close(1); 196*2144Seric dup(pfd[1]); 197*2144Seric close(pfd[1]); 198*2144Seric 199*2144Seric /* 200*2144Seric Execute 'diff' on the segment files. 201*2144Seric */ 202*2144Seric execl(diff,diff,otmp,ntmp,0); 203*2144Seric close(1); 204*2144Seric fatal(sprintf(Error,"cannot execute '%s' (bd5)",diff)); 205*2144Seric } 206*2144Seric else { /* parent process */ 207*2144Seric close(pfd[1]); 208*2144Seric pipeinp = fdfopen(pfd[0],0); 209*2144Seric 210*2144Seric /* 211*2144Seric Process 'diff' output. 212*2144Seric */ 213*2144Seric while ((dp = fgets(diffline,BUFSIZ,pipeinp))) { 214*2144Seric if (numeric(*dp)) 215*2144Seric fixnum(diffline); 216*2144Seric else 217*2144Seric printf("%s",diffline); 218*2144Seric } 219*2144Seric 220*2144Seric fclose(pipeinp); 221*2144Seric 222*2144Seric /* 223*2144Seric EOF on pipe. 224*2144Seric */ 225*2144Seric wait(&status); 226*2144Seric if (status&~0x100) 227*2144Seric fatal(sprintf(Error,"'%s' failed (bd6)",diff)); 228*2144Seric } 229*2144Seric linenum =+ seglim; 230*2144Seric 231*2144Seric /* 232*2144Seric Remove temporary files. 233*2144Seric */ 234*2144Seric unlink(otmp); 235*2144Seric unlink(ntmp); 236*2144Seric } 237*2144Seric } 238*2144Seric 239*2144Seric 240*2144Seric /* 241*2144Seric Routine to save remainder of a file. 242*2144Seric */ 243*2144Seric saverest(line,iptr) 244*2144Seric char *line; 245*2144Seric FILE *iptr; 246*2144Seric { 247*2144Seric register char *lp; 248*2144Seric FILE *temptr; 249*2144Seric 250*2144Seric temptr = maket(tempfile); 251*2144Seric 252*2144Seric lp = line; 253*2144Seric 254*2144Seric while (lp) { 255*2144Seric fputs(line,temptr); 256*2144Seric linenum++; 257*2144Seric lp = fgets(line,BUFSIZ,iptr); 258*2144Seric } 259*2144Seric fclose(temptr); 260*2144Seric } 261*2144Seric 262*2144Seric 263*2144Seric /* 264*2144Seric Routine to write out data saved by 265*2144Seric 'saverest' routine and to remove the file. 266*2144Seric */ 267*2144Seric putsave(line,type) 268*2144Seric char *line; 269*2144Seric char type; 270*2144Seric { 271*2144Seric FILE *temptr; 272*2144Seric 273*2144Seric temptr = xfopen(tempfile,0); 274*2144Seric 275*2144Seric while (fgets(line,BUFSIZ,temptr)) 276*2144Seric printf("%c %s",type,line); 277*2144Seric 278*2144Seric fclose(temptr); 279*2144Seric 280*2144Seric xunlink(tempfile); 281*2144Seric } 282*2144Seric 283*2144Seric 284*2144Seric fixnum(lp) 285*2144Seric char *lp; 286*2144Seric { 287*2144Seric int num; 288*2144Seric 289*2144Seric while (*lp) { 290*2144Seric switch (*lp) { 291*2144Seric 292*2144Seric case 'a': 293*2144Seric case 'c': 294*2144Seric case 'd': 295*2144Seric case ',': 296*2144Seric case '\n': 297*2144Seric printf("%c",*lp); 298*2144Seric lp++; 299*2144Seric break; 300*2144Seric 301*2144Seric default: 302*2144Seric lp = satoi(lp,&num); 303*2144Seric num =+ linenum; 304*2144Seric printf("%d",num); 305*2144Seric } 306*2144Seric } 307*2144Seric } 308*2144Seric 309*2144Seric 310*2144Seric addgen(lp,fp) 311*2144Seric char *lp; 312*2144Seric FILE *fp; 313*2144Seric { 314*2144Seric printf("%da%d,",linenum,linenum+1); 315*2144Seric 316*2144Seric /* 317*2144Seric Save lines of new file. 318*2144Seric */ 319*2144Seric saverest(lp,fp); 320*2144Seric 321*2144Seric printf("%d\n",linenum); 322*2144Seric 323*2144Seric /* 324*2144Seric Output saved lines, as 'diff' would. 325*2144Seric */ 326*2144Seric putsave(lp,'>'); 327*2144Seric 328*2144Seric exit(0); 329*2144Seric } 330*2144Seric 331*2144Seric 332*2144Seric delgen(lp,fp) 333*2144Seric char *lp; 334*2144Seric FILE *fp; 335*2144Seric { 336*2144Seric int savenum; 337*2144Seric 338*2144Seric printf("%d,",linenum+1); 339*2144Seric savenum = linenum; 340*2144Seric 341*2144Seric /* 342*2144Seric Save lines of old file. 343*2144Seric */ 344*2144Seric saverest(lp,fp); 345*2144Seric 346*2144Seric printf("%dd%d\n",linenum,savenum); 347*2144Seric 348*2144Seric /* 349*2144Seric Output saved lines, as 'diff' would. 350*2144Seric */ 351*2144Seric putsave(lp,'<'); 352*2144Seric 353*2144Seric exit(0); 354*2144Seric } 355*2144Seric 356*2144Seric 357*2144Seric clean_up() 358*2144Seric { 359*2144Seric unlink(tempfile); 360*2144Seric unlink(otmp); 361*2144Seric unlink(ntmp); 362*2144Seric } 363*2144Seric 364*2144Seric 365*2144Seric maket(file) 366*2144Seric char *file; 367*2144Seric { 368*2144Seric FILE *iop; 369*2144Seric 370*2144Seric copy(tempskel,file); 371*2144Seric iop = xfcreat(mktemp(file),0644); 372*2144Seric 373*2144Seric return(iop); 374*2144Seric } 375