1 /* $OpenBSD: diffreg.c,v 1.12 2003/06/25 03:53:59 deraadt Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 #include <sys/types.h> 38 39 #include <stdlib.h> 40 #include <unistd.h> 41 #include <fcntl.h> 42 #include <string.h> 43 44 #include "diff.h" 45 #include "pathnames.h" 46 47 #if 0 48 static char const sccsid[] = "@(#)diffreg.c 4.21 4/6/90"; 49 #endif 50 51 /* 52 * diff - compare two files. 53 */ 54 55 /* 56 * Uses an algorithm due to Harold Stone, which finds 57 * a pair of longest identical subsequences in the two 58 * files. 59 * 60 * The major goal is to generate the match vector J. 61 * J[i] is the index of the line in file1 corresponding 62 * to line i file0. J[i] = 0 if there is no 63 * such line in file1. 64 * 65 * Lines are hashed so as to work in core. All potential 66 * matches are located by sorting the lines of each file 67 * on the hash (called ``value''). In particular, this 68 * collects the equivalence classes in file1 together. 69 * Subroutine equiv replaces the value of each line in 70 * file0 by the index of the first element of its 71 * matching equivalence in (the reordered) file1. 72 * To save space equiv squeezes file1 into a single 73 * array member in which the equivalence classes 74 * are simply concatenated, except that their first 75 * members are flagged by changing sign. 76 * 77 * Next the indices that point into member are unsorted into 78 * array class according to the original order of file0. 79 * 80 * The cleverness lies in routine stone. This marches 81 * through the lines of file0, developing a vector klist 82 * of "k-candidates". At step i a k-candidate is a matched 83 * pair of lines x,y (x in file0 y in file1) such that 84 * there is a common subsequence of length k 85 * between the first i lines of file0 and the first y 86 * lines of file1, but there is no such subsequence for 87 * any smaller y. x is the earliest possible mate to y 88 * that occurs in such a subsequence. 89 * 90 * Whenever any of the members of the equivalence class of 91 * lines in file1 matable to a line in file0 has serial number 92 * less than the y of some k-candidate, that k-candidate 93 * with the smallest such y is replaced. The new 94 * k-candidate is chained (via pred) to the current 95 * k-1 candidate so that the actual subsequence can 96 * be recovered. When a member has serial number greater 97 * that the y of all k-candidates, the klist is extended. 98 * At the end, the longest subsequence is pulled out 99 * and placed in the array J by unravel 100 * 101 * With J in hand, the matches there recorded are 102 * check'ed against reality to assure that no spurious 103 * matches have crept in due to hashing. If they have, 104 * they are broken, and "jackpot" is recorded--a harmless 105 * matter except that a true match for a spuriously 106 * mated line may now be unnecessarily reported as a change. 107 * 108 * Much of the complexity of the program comes simply 109 * from trying to minimize core utilization and 110 * maximize the range of doable problems by dynamically 111 * allocating what is needed and reusing what is not. 112 * The core requirements for problems larger than somewhat 113 * are (in words) 2*length(file0) + length(file1) + 114 * 3*(number of k-candidates installed), typically about 115 * 6n words for files of length n. 116 */ 117 118 #define prints(s) fputs(s,stdout) 119 120 FILE *input[2]; 121 122 struct cand { 123 int x; 124 int y; 125 int pred; 126 } cand; 127 128 struct line { 129 int serial; 130 int value; 131 } *file[2]; 132 133 int len[2]; 134 struct line *sfile[2]; /* shortened by pruning common prefix and suffix */ 135 int slen[2]; 136 int pref, suff; /* length of prefix and suffix */ 137 int *class; /* will be overlaid on file[0] */ 138 int *member; /* will be overlaid on file[1] */ 139 int *klist; /* will be overlaid on file[0] after class */ 140 struct cand *clist; /* merely a free storage pot for candidates */ 141 int clen = 0; 142 int *J; /* will be overlaid on class */ 143 long *ixold; /* will be overlaid on klist */ 144 long *ixnew; /* will be overlaid on file[1] */ 145 u_char *chrtran; /* translation table for case-folding */ 146 147 static void fetch(long *, int, int, FILE *, char *, int); 148 static void output(void); 149 static void check(void); 150 static void range(int, int, char *); 151 static void dump_context_vec(void); 152 static void prepare(int, FILE *); 153 static void prune(void); 154 static void equiv(struct line *, int, struct line *, int, int *); 155 static void unravel(int); 156 static void unsort(struct line *, int, int *); 157 static void change(int, int, int, int); 158 static void sort(struct line *, int); 159 static int newcand(int, int, int); 160 static int search(int *, int, int); 161 static int skipline(int); 162 static int asciifile(FILE *); 163 static int stone(int *, int, int *, int *); 164 static int readhash(FILE *); 165 166 /* 167 * chrtran points to one of 2 translation tables: cup2low if folding upper to 168 * lower case clow2low if not folding case 169 */ 170 u_char clow2low[256] = { 171 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 172 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 173 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 174 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 175 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 176 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 177 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 178 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 179 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 180 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 181 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 182 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 183 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 184 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 185 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 186 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 187 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 188 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 189 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 190 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 191 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 192 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 193 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 194 0xfd, 0xfe, 0xff 195 }; 196 197 u_char cup2low[256] = { 198 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 199 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 200 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 201 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 202 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 203 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 204 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 205 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 206 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 207 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 208 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 209 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 210 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 211 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 212 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 213 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 214 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 215 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 216 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 217 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 218 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 219 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 220 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 221 0xfd, 0xfe, 0xff 222 }; 223 224 void 225 diffreg(void) 226 { 227 char buf1[BUFSIZ], buf2[BUFSIZ]; 228 FILE *f1, *f2; 229 int i, j; 230 231 if (hflag) { 232 diffargv[0] = "diffh"; 233 execv(diffh, diffargv); 234 fprintf(stderr, "diff: "); 235 perror(diffh); 236 done(0); 237 } 238 chrtran = (iflag ? cup2low : clow2low); 239 if ((stb1.st_mode & S_IFMT) == S_IFDIR) { 240 file1 = splice(file1, file2); 241 if (stat(file1, &stb1) < 0) { 242 fprintf(stderr, "diff: "); 243 perror(file1); 244 done(0); 245 } 246 } else if ((stb2.st_mode & S_IFMT) == S_IFDIR) { 247 file2 = splice(file2, file1); 248 if (stat(file2, &stb2) < 0) { 249 fprintf(stderr, "diff: "); 250 perror(file2); 251 done(0); 252 } 253 } else if ((stb1.st_mode & S_IFMT) != S_IFREG || !strcmp(file1, "-")) { 254 if (!strcmp(file2, "-")) { 255 fprintf(stderr, "diff: can't specify - -\n"); 256 done(0); 257 } 258 file1 = copytemp(); 259 if (stat(file1, &stb1) < 0) { 260 fprintf(stderr, "diff: "); 261 perror(file1); 262 done(0); 263 } 264 } else if ((stb2.st_mode & S_IFMT) != S_IFREG || !strcmp(file2, "-")) { 265 file2 = copytemp(); 266 if (stat(file2, &stb2) < 0) { 267 fprintf(stderr, "diff: "); 268 perror(file2); 269 done(0); 270 } 271 } 272 if ((f1 = fopen(file1, "r")) == NULL) { 273 fprintf(stderr, "diff: "); 274 perror(file1); 275 done(0); 276 } 277 if ((f2 = fopen(file2, "r")) == NULL) { 278 fprintf(stderr, "diff: "); 279 perror(file2); 280 fclose(f1); 281 done(0); 282 } 283 if (stb1.st_size != stb2.st_size) 284 goto notsame; 285 for (;;) { 286 i = fread(buf1, 1, BUFSIZ, f1); 287 j = fread(buf2, 1, BUFSIZ, f2); 288 if (i < 0 || j < 0 || i != j) 289 goto notsame; 290 if (i == 0 && j == 0) { 291 fclose(f1); 292 fclose(f2); 293 status = 0; /* files don't differ */ 294 goto same; 295 } 296 for (j = 0; j < i; j++) 297 if (buf1[j] != buf2[j]) 298 goto notsame; 299 } 300 notsame: 301 /* 302 * Files certainly differ at this point; set status accordingly 303 */ 304 status = 1; 305 if (!asciifile(f1) || !asciifile(f2)) { 306 printf("Binary files %s and %s differ\n", file1, file2); 307 fclose(f1); 308 fclose(f2); 309 done(0); 310 } 311 prepare(0, f1); 312 prepare(1, f2); 313 fclose(f1); 314 fclose(f2); 315 prune(); 316 sort(sfile[0], slen[0]); 317 sort(sfile[1], slen[1]); 318 319 member = (int *)file[1]; 320 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 321 member = ralloc(member, (slen[1] + 2) * sizeof(int)); 322 323 class = (int *)file[0]; 324 unsort(sfile[0], slen[0], class); 325 class = ralloc(class, (slen[0] + 2) * sizeof(int)); 326 327 klist = talloc((slen[0] + 2) * sizeof(int)); 328 clist = talloc(sizeof(cand)); 329 i = stone(class, slen[0], member, klist); 330 free(member); 331 free(class); 332 333 J = talloc((len[0] + 2) * sizeof(int)); 334 unravel(klist[i]); 335 free(clist); 336 free(klist); 337 338 ixold = talloc((len[0] + 2) * sizeof(long)); 339 ixnew = talloc((len[1] + 2) * sizeof(long)); 340 check(); 341 output(); 342 status = anychange; 343 same: 344 if (opt == D_CONTEXT && anychange == 0) 345 printf("No differences encountered\n"); 346 done(0); 347 } 348 349 char *tempfile = _PATH_TMP; 350 351 char * 352 copytemp(void) 353 { 354 char buf[BUFSIZ]; 355 int i, f; 356 357 signal(SIGHUP, catchsig); 358 signal(SIGINT, catchsig); 359 signal(SIGPIPE, catchsig); 360 signal(SIGTERM, catchsig); 361 f = mkstemp(tempfile); 362 if (f < 0) { 363 fprintf(stderr, "diff: "); 364 perror(tempfile); 365 done(0); 366 } 367 while ((i = read(0, buf, BUFSIZ)) > 0) 368 if (write(f, buf, i) != i) { 369 fprintf(stderr, "diff: "); 370 perror(tempfile); 371 done(0); 372 } 373 close(f); 374 return (tempfile); 375 } 376 377 char * 378 splice(char *dir, char *file) 379 { 380 char *tail, buf[BUFSIZ]; 381 382 if (!strcmp(file, "-")) { 383 fprintf(stderr, "diff: can't specify - with other arg directory\n"); 384 done(0); 385 } 386 tail = strrchr(file, '/'); 387 if (tail == 0) 388 tail = file; 389 else 390 tail++; 391 snprintf(buf, sizeof buf, "%s/%s", dir, tail); 392 return (strdup(buf)); 393 } 394 395 static void 396 prepare(int i, FILE *fd) 397 { 398 struct line *p; 399 int j, h; 400 401 fseek(fd, (off_t)0, SEEK_SET); 402 p = talloc(3 * sizeof(struct line)); 403 for (j = 0; (h = readhash(fd));) { 404 p = ralloc(p, (++j + 3) * sizeof(struct line)); 405 p[j].value = h; 406 } 407 len[i] = j; 408 file[i] = p; 409 } 410 411 static void 412 prune(void) 413 { 414 int i, j; 415 416 for (pref = 0; pref < len[0] && pref < len[1] && 417 file[0][pref + 1].value == file[1][pref + 1].value; 418 pref++) 419 ; 420 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 421 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 422 suff++) 423 ; 424 for (j = 0; j < 2; j++) { 425 sfile[j] = file[j] + pref; 426 slen[j] = len[j] - pref - suff; 427 for (i = 0; i <= slen[j]; i++) 428 sfile[j][i].serial = i; 429 } 430 } 431 432 static void 433 equiv(struct line *a, int n, struct line *b, int m, int *c) 434 { 435 int i, j; 436 437 i = j = 1; 438 while (i <= n && j <= m) { 439 if (a[i].value < b[j].value) 440 a[i++].value = 0; 441 else if (a[i].value == b[j].value) 442 a[i++].value = j; 443 else 444 j++; 445 } 446 while (i <= n) 447 a[i++].value = 0; 448 b[m + 1].value = 0; 449 j = 0; 450 while (++j <= m) { 451 c[j] = -b[j].serial; 452 while (b[j + 1].value == b[j].value) { 453 j++; 454 c[j] = b[j].serial; 455 } 456 } 457 c[j] = -1; 458 } 459 460 static int 461 stone(int *a, int n, int *b, int *c) 462 { 463 int i, k, y, j, l; 464 int oldc, tc, oldl; 465 466 k = 0; 467 c[0] = newcand(0, 0, 0); 468 for (i = 1; i <= n; i++) { 469 j = a[i]; 470 if (j == 0) 471 continue; 472 y = -b[j]; 473 oldl = 0; 474 oldc = c[0]; 475 do { 476 if (y <= clist[oldc].y) 477 continue; 478 l = search(c, k, y); 479 if (l != oldl + 1) 480 oldc = c[l - 1]; 481 if (l <= k) { 482 if (clist[c[l]].y <= y) 483 continue; 484 tc = c[l]; 485 c[l] = newcand(i, y, oldc); 486 oldc = tc; 487 oldl = l; 488 } else { 489 c[l] = newcand(i, y, oldc); 490 k++; 491 break; 492 } 493 } while ((y = b[++j]) > 0); 494 } 495 return (k); 496 } 497 498 static int 499 newcand(int x, int y, int pred) 500 { 501 struct cand *q; 502 503 clist = ralloc(clist, ++clen * sizeof(cand)); 504 q = clist + clen - 1; 505 q->x = x; 506 q->y = y; 507 q->pred = pred; 508 return (clen - 1); 509 } 510 511 static int 512 search(int *c, int k, int y) 513 { 514 int i, j, l, t; 515 516 if (clist[c[k]].y < y) /* quick look for typical case */ 517 return (k + 1); 518 i = 0; 519 j = k + 1; 520 while (1) { 521 l = i + j; 522 if ((l >>= 1) <= i) 523 break; 524 t = clist[c[l]].y; 525 if (t > y) 526 j = l; 527 else if (t < y) 528 i = l; 529 else 530 return (l); 531 } 532 return (l + 1); 533 } 534 535 static void 536 unravel(int p) 537 { 538 struct cand *q; 539 int i; 540 541 for (i = 0; i <= len[0]; i++) 542 J[i] = i <= pref ? i : 543 i > len[0] - suff ? i + len[1] - len[0] : 0; 544 for (q = clist + p; q->y != 0; q = clist + q->pred) 545 J[q->x + pref] = q->y + pref; 546 } 547 548 /* 549 * check does double duty: 1. ferret out any fortuitous correspondences due 550 * to confounding by hashing (which result in "jackpot") 2. collect random 551 * access indexes to the two files 552 */ 553 554 static void 555 check(void) 556 { 557 int i, j, jackpot, c, d; 558 long ctold, ctnew; 559 560 if ((input[0] = fopen(file1, "r")) == NULL) { 561 perror(file1); 562 done(0); 563 } 564 if ((input[1] = fopen(file2, "r")) == NULL) { 565 perror(file2); 566 done(0); 567 } 568 j = 1; 569 ixold[0] = ixnew[0] = 0; 570 jackpot = 0; 571 ctold = ctnew = 0; 572 for (i = 1; i <= len[0]; i++) { 573 if (J[i] == 0) { 574 ixold[i] = ctold += skipline(0); 575 continue; 576 } 577 while (j < J[i]) { 578 ixnew[j] = ctnew += skipline(1); 579 j++; 580 } 581 if (bflag || wflag || iflag) { 582 for (;;) { 583 c = getc(input[0]); 584 d = getc(input[1]); 585 ctold++; 586 ctnew++; 587 if (bflag && isspace(c) && isspace(d)) { 588 do { 589 if (c == '\n') 590 break; 591 ctold++; 592 } while (isspace(c = getc(input[0]))); 593 do { 594 if (d == '\n') 595 break; 596 ctnew++; 597 } while (isspace(d = getc(input[1]))); 598 } else if (wflag) { 599 while (isspace(c) && c != '\n') { 600 c = getc(input[0]); 601 ctold++; 602 } 603 while (isspace(d) && d != '\n') { 604 d = getc(input[1]); 605 ctnew++; 606 } 607 } 608 if (chrtran[c] != chrtran[d]) { 609 jackpot++; 610 J[i] = 0; 611 if (c != '\n') 612 ctold += skipline(0); 613 if (d != '\n') 614 ctnew += skipline(1); 615 break; 616 } 617 if (c == '\n') 618 break; 619 } 620 } else { 621 for (;;) { 622 ctold++; 623 ctnew++; 624 if ((c = getc(input[0])) != (d = getc(input[1]))) { 625 /* jackpot++; */ 626 J[i] = 0; 627 if (c != '\n') 628 ctold += skipline(0); 629 if (d != '\n') 630 ctnew += skipline(1); 631 break; 632 } 633 if (c == '\n') 634 break; 635 } 636 } 637 ixold[i] = ctold; 638 ixnew[j] = ctnew; 639 j++; 640 } 641 for (; j <= len[1]; j++) { 642 ixnew[j] = ctnew += skipline(1); 643 } 644 fclose(input[0]); 645 fclose(input[1]); 646 /* 647 * if (jackpot) 648 * fprintf(stderr, "jackpot\n"); 649 */ 650 } 651 652 /* shellsort CACM #201 */ 653 static void 654 sort(struct line *a, int n) 655 { 656 struct line *ai, *aim, w; 657 int j, m = 0, k; 658 659 if (n == 0) 660 return; 661 for (j = 1; j <= n; j *= 2) 662 m = 2 * j - 1; 663 for (m /= 2; m != 0; m /= 2) { 664 k = n - m; 665 for (j = 1; j <= k; j++) { 666 for (ai = &a[j]; ai > a; ai -= m) { 667 aim = &ai[m]; 668 if (aim < ai) 669 break; /* wraparound */ 670 if (aim->value > ai[0].value || 671 (aim->value == ai[0].value && 672 aim->serial > ai[0].serial)) 673 break; 674 w.value = ai[0].value; 675 ai[0].value = aim->value; 676 aim->value = w.value; 677 w.serial = ai[0].serial; 678 ai[0].serial = aim->serial; 679 aim->serial = w.serial; 680 } 681 } 682 } 683 } 684 685 static void 686 unsort(struct line *f, int l, int *b) 687 { 688 int *a, i; 689 690 a = talloc((l + 1) * sizeof(int)); 691 for (i = 1; i <= l; i++) 692 a[f[i].serial] = f[i].value; 693 for (i = 1; i <= l; i++) 694 b[i] = a[i]; 695 free(a); 696 } 697 698 static int 699 skipline(int f) 700 { 701 int i, c; 702 703 for (i = 1; (c = getc(input[f])) != '\n'; i++) 704 if (c < 0) 705 return (i); 706 return (i); 707 } 708 709 static void 710 output(void) 711 { 712 int m, i0, i1, j0, j1; 713 714 input[0] = fopen(file1, "r"); 715 input[1] = fopen(file2, "r"); 716 m = len[0]; 717 J[0] = 0; 718 J[m + 1] = len[1] + 1; 719 if (opt != D_EDIT) { 720 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 721 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 722 i0++; 723 j0 = J[i0 - 1] + 1; 724 i1 = i0 - 1; 725 while (i1 < m && J[i1 + 1] == 0) 726 i1++; 727 j1 = J[i1 + 1] - 1; 728 J[i1] = j1; 729 change(i0, i1, j0, j1); 730 } 731 } else { 732 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 733 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 734 i0--; 735 j0 = J[i0 + 1] - 1; 736 i1 = i0 + 1; 737 while (i1 > 1 && J[i1 - 1] == 0) 738 i1--; 739 j1 = J[i1 - 1] + 1; 740 J[i1] = j1; 741 change(i1, i0, j1, j0); 742 } 743 } 744 if (m == 0) 745 change(1, 0, 1, len[1]); 746 if (opt == D_IFDEF) { 747 for (;;) { 748 #define c i0 749 c = getc(input[0]); 750 if (c < 0) 751 return; 752 putchar(c); 753 } 754 #undef c 755 } 756 if (anychange && opt == D_CONTEXT) 757 dump_context_vec(); 758 } 759 760 /* 761 * The following struct is used to record change information when 762 * doing a "context" diff. (see routine "change" to understand the 763 * highly mneumonic field names) 764 */ 765 struct context_vec { 766 int a; /* start line in old file */ 767 int b; /* end line in old file */ 768 int c; /* start line in new file */ 769 int d; /* end line in new file */ 770 }; 771 772 struct context_vec *context_vec_start, *context_vec_end, *context_vec_ptr; 773 774 #define MAX_CONTEXT 128 775 776 /* 777 * indicate that there is a difference between lines a and b of the from file 778 * to get to lines c to d of the to file. If a is greater then b then there 779 * are no lines in the from file involved and this means that there were 780 * lines appended (beginning at b). If c is greater than d then there are 781 * lines missing from the to file. 782 */ 783 static void 784 change(int a, int b, int c, int d) 785 { 786 struct stat stbuf; 787 788 if (opt != D_IFDEF && a > b && c > d) 789 return; 790 if (anychange == 0) { 791 anychange = 1; 792 if (opt == D_CONTEXT) { 793 printf("*** %s ", file1); 794 stat(file1, &stbuf); 795 printf("%s--- %s ", 796 ctime(&stbuf.st_mtime), file2); 797 stat(file2, &stbuf); 798 printf("%s", ctime(&stbuf.st_mtime)); 799 800 context_vec_start = talloc(MAX_CONTEXT * 801 sizeof(struct context_vec)); 802 context_vec_end = context_vec_start + MAX_CONTEXT; 803 context_vec_ptr = context_vec_start - 1; 804 } 805 } 806 if (opt == D_CONTEXT) { 807 /* 808 * if this new change is within 'context' lines of 809 * the previous change, just add it to the change 810 * record. If the record is full or if this 811 * change is more than 'context' lines from the previous 812 * change, dump the record, reset it & add the new change. 813 */ 814 if (context_vec_ptr >= context_vec_end || 815 (context_vec_ptr >= context_vec_start && 816 a > (context_vec_ptr->b + 2 * context) && 817 c > (context_vec_ptr->d + 2 * context))) 818 dump_context_vec(); 819 820 context_vec_ptr++; 821 context_vec_ptr->a = a; 822 context_vec_ptr->b = b; 823 context_vec_ptr->c = c; 824 context_vec_ptr->d = d; 825 return; 826 } 827 switch (opt) { 828 829 case D_NORMAL: 830 case D_EDIT: 831 range(a, b, ","); 832 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 833 if (opt == D_NORMAL) 834 range(c, d, ","); 835 putchar('\n'); 836 break; 837 case D_REVERSE: 838 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 839 range(a, b, " "); 840 putchar('\n'); 841 break; 842 case D_NREVERSE: 843 if (a > b) 844 printf("a%d %d\n", b, d - c + 1); 845 else { 846 printf("d%d %d\n", a, b - a + 1); 847 if (!(c > d)) 848 /* add changed lines */ 849 printf("a%d %d\n", b, d - c + 1); 850 } 851 break; 852 } 853 if (opt == D_NORMAL || opt == D_IFDEF) { 854 fetch(ixold, a, b, input[0], "< ", 1); 855 if (a <= b && c <= d && opt == D_NORMAL) 856 prints("---\n"); 857 } 858 fetch(ixnew, c, d, input[1], opt == D_NORMAL ? "> " : "", 0); 859 if ((opt == D_EDIT || opt == D_REVERSE) && c <= d) 860 prints(".\n"); 861 if (inifdef) { 862 fprintf(stdout, "#endif /* %s */\n", endifname); 863 inifdef = 0; 864 } 865 } 866 867 static void 868 range(int a, int b, char *separator) 869 { 870 printf("%d", a > b ? b : a); 871 if (a < b) 872 printf("%s%d", separator, b); 873 } 874 875 static void 876 fetch(long *f, int a, int b, FILE *lb, char *s, int oldfile) 877 { 878 int oneflag = (*ifdef1 != '\0') != (*ifdef2 != '\0'); 879 int i, j, c, col, nc; 880 881 /* 882 * When doing #ifdef's, copy down to current line 883 * if this is the first file, so that stuff makes it to output. 884 */ 885 if (opt == D_IFDEF && oldfile) { 886 long curpos = ftell(lb); 887 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 888 nc = f[a > b ? b : a - 1] - curpos; 889 for (i = 0; i < nc; i++) 890 putchar(getc(lb)); 891 } 892 if (a > b) 893 return; 894 if (opt == D_IFDEF) { 895 if (inifdef) 896 fprintf(stdout, "#else /* %s%s */\n", 897 oneflag && oldfile == 1 ? "!" : "", ifdef2); 898 else { 899 if (oneflag) { 900 /* There was only one ifdef given */ 901 endifname = ifdef2; 902 if (oldfile) 903 fprintf(stdout, "#ifndef %s\n", endifname); 904 else 905 fprintf(stdout, "#ifdef %s\n", endifname); 906 } else { 907 endifname = oldfile ? ifdef1 : ifdef2; 908 fprintf(stdout, "#ifdef %s\n", endifname); 909 } 910 } 911 inifdef = 1 + oldfile; 912 } 913 for (i = a; i <= b; i++) { 914 fseek(lb, f[i - 1], SEEK_SET); 915 nc = f[i] - f[i - 1]; 916 if (opt != D_IFDEF) 917 prints(s); 918 col = 0; 919 for (j = 0; j < nc; j++) { 920 c = getc(lb); 921 if (c == '\t' && tflag) 922 do 923 putchar(' '); 924 while (++col & 7) 925 ; 926 else { 927 putchar(c); 928 col++; 929 } 930 } 931 } 932 933 if (inifdef && !wantelses) { 934 fprintf(stdout, "#endif /* %s */\n", endifname); 935 inifdef = 0; 936 } 937 } 938 939 #define POW2 /* define only if HALFLONG is 2**n */ 940 #define HALFLONG 16 941 #define low(x) (x&((1L<<HALFLONG)-1)) 942 #define high(x) (x>>HALFLONG) 943 944 /* 945 * hashing has the effect of 946 * arranging line in 7-bit bytes and then 947 * summing 1-s complement in 16-bit hunks 948 */ 949 static int 950 readhash(FILE *f) 951 { 952 unsigned int shift; 953 int t, space; 954 long sum; 955 956 sum = 1; 957 space = 0; 958 if (!bflag && !wflag) { 959 if (iflag) 960 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 961 if (t == -1) 962 return (0); 963 sum += (long)chrtran[t] << (shift 964 #ifdef POW2 965 &= HALFLONG - 1); 966 #else 967 %= HALFLONG); 968 #endif 969 } 970 else 971 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 972 if (t == -1) 973 return (0); 974 sum += (long)t << (shift 975 #ifdef POW2 976 &= HALFLONG - 1); 977 #else 978 %= HALFLONG); 979 #endif 980 } 981 } else { 982 for (shift = 0;;) { 983 switch (t = getc(f)) { 984 case -1: 985 return (0); 986 case '\t': 987 case ' ': 988 space++; 989 continue; 990 default: 991 if (space && !wflag) { 992 shift += 7; 993 space = 0; 994 } 995 sum += (long)chrtran[t] << (shift 996 #ifdef POW2 997 &= HALFLONG - 1); 998 #else 999 %= HALFLONG); 1000 #endif 1001 shift += 7; 1002 continue; 1003 case '\n': 1004 break; 1005 } 1006 break; 1007 } 1008 } 1009 sum = low(sum) + high(sum); 1010 return ((short) low(sum) + (short) high(sum)); 1011 } 1012 1013 static int 1014 asciifile(FILE *f) 1015 { 1016 char buf[BUFSIZ], *cp; 1017 int cnt; 1018 1019 fseek(f, (off_t)0, SEEK_SET); 1020 cnt = fread(buf, 1, BUFSIZ, f); 1021 cp = buf; 1022 while (--cnt >= 0) 1023 if (*cp++ & 0200) 1024 return (0); 1025 return (1); 1026 } 1027 1028 1029 /* dump accumulated "context" diff changes */ 1030 static void 1031 dump_context_vec(void) 1032 { 1033 struct context_vec *cvp = context_vec_start; 1034 int lowa, upb, lowc, upd, do_output; 1035 int a, b, c, d; 1036 char ch; 1037 1038 if (cvp > context_vec_ptr) 1039 return; 1040 1041 b = d = 0; /* gcc */ 1042 lowa = max(1, cvp->a - context); 1043 upb = min(len[0], context_vec_ptr->b + context); 1044 lowc = max(1, cvp->c - context); 1045 upd = min(len[1], context_vec_ptr->d + context); 1046 1047 printf("***************\n*** "); 1048 range(lowa, upb, ","); 1049 printf(" ****\n"); 1050 1051 /* 1052 * output changes to the "old" file. The first loop suppresses 1053 * output if there were no changes to the "old" file (we'll see 1054 * the "old" lines as context in the "new" list). 1055 */ 1056 do_output = 0; 1057 for (; cvp <= context_vec_ptr; cvp++) 1058 if (cvp->a <= cvp->b) { 1059 cvp = context_vec_start; 1060 do_output++; 1061 break; 1062 } 1063 if (do_output) { 1064 while (cvp <= context_vec_ptr) { 1065 a = cvp->a; 1066 b = cvp->b; 1067 c = cvp->c; 1068 d = cvp->d; 1069 1070 if (a <= b && c <= d) 1071 ch = 'c'; 1072 else 1073 ch = (a <= b) ? 'd' : 'a'; 1074 1075 if (ch == 'a') 1076 fetch(ixold, lowa, b, input[0], " ", 0); 1077 else { 1078 fetch(ixold, lowa, a - 1, input[0], " ", 0); 1079 fetch(ixold, a, b, input[0], 1080 ch == 'c' ? "! " : "- ", 0); 1081 } 1082 lowa = b + 1; 1083 cvp++; 1084 } 1085 fetch(ixold, b + 1, upb, input[0], " ", 0); 1086 } 1087 /* output changes to the "new" file */ 1088 printf("--- "); 1089 range(lowc, upd, ","); 1090 printf(" ----\n"); 1091 1092 do_output = 0; 1093 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1094 if (cvp->c <= cvp->d) { 1095 cvp = context_vec_start; 1096 do_output++; 1097 break; 1098 } 1099 if (do_output) { 1100 while (cvp <= context_vec_ptr) { 1101 a = cvp->a; 1102 b = cvp->b; 1103 c = cvp->c; 1104 d = cvp->d; 1105 1106 if (a <= b && c <= d) 1107 ch = 'c'; 1108 else 1109 ch = (a <= b) ? 'd' : 'a'; 1110 1111 if (ch == 'd') 1112 fetch(ixnew, lowc, d, input[1], " ", 0); 1113 else { 1114 fetch(ixnew, lowc, c - 1, input[1], " ", 0); 1115 fetch(ixnew, c, d, input[1], 1116 ch == 'c' ? "! " : "+ ", 0); 1117 } 1118 lowc = d + 1; 1119 cvp++; 1120 } 1121 fetch(ixnew, d + 1, upd, input[1], " ", 0); 1122 } 1123 context_vec_ptr = context_vec_start - 1; 1124 } 1125