1 /* $OpenBSD: diffreg.c,v 1.15 2003/06/25 17:49:22 millert Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 #include <sys/types.h> 38 39 #include <stdlib.h> 40 #include <unistd.h> 41 #include <fcntl.h> 42 #include <string.h> 43 44 #include "diff.h" 45 #include "pathnames.h" 46 47 #if 0 48 static char const sccsid[] = "@(#)diffreg.c 4.21 4/6/90"; 49 #endif 50 51 /* 52 * diff - compare two files. 53 */ 54 55 /* 56 * Uses an algorithm due to Harold Stone, which finds 57 * a pair of longest identical subsequences in the two 58 * files. 59 * 60 * The major goal is to generate the match vector J. 61 * J[i] is the index of the line in file1 corresponding 62 * to line i file0. J[i] = 0 if there is no 63 * such line in file1. 64 * 65 * Lines are hashed so as to work in core. All potential 66 * matches are located by sorting the lines of each file 67 * on the hash (called ``value''). In particular, this 68 * collects the equivalence classes in file1 together. 69 * Subroutine equiv replaces the value of each line in 70 * file0 by the index of the first element of its 71 * matching equivalence in (the reordered) file1. 72 * To save space equiv squeezes file1 into a single 73 * array member in which the equivalence classes 74 * are simply concatenated, except that their first 75 * members are flagged by changing sign. 76 * 77 * Next the indices that point into member are unsorted into 78 * array class according to the original order of file0. 79 * 80 * The cleverness lies in routine stone. This marches 81 * through the lines of file0, developing a vector klist 82 * of "k-candidates". At step i a k-candidate is a matched 83 * pair of lines x,y (x in file0 y in file1) such that 84 * there is a common subsequence of length k 85 * between the first i lines of file0 and the first y 86 * lines of file1, but there is no such subsequence for 87 * any smaller y. x is the earliest possible mate to y 88 * that occurs in such a subsequence. 89 * 90 * Whenever any of the members of the equivalence class of 91 * lines in file1 matable to a line in file0 has serial number 92 * less than the y of some k-candidate, that k-candidate 93 * with the smallest such y is replaced. The new 94 * k-candidate is chained (via pred) to the current 95 * k-1 candidate so that the actual subsequence can 96 * be recovered. When a member has serial number greater 97 * that the y of all k-candidates, the klist is extended. 98 * At the end, the longest subsequence is pulled out 99 * and placed in the array J by unravel 100 * 101 * With J in hand, the matches there recorded are 102 * check'ed against reality to assure that no spurious 103 * matches have crept in due to hashing. If they have, 104 * they are broken, and "jackpot" is recorded--a harmless 105 * matter except that a true match for a spuriously 106 * mated line may now be unnecessarily reported as a change. 107 * 108 * Much of the complexity of the program comes simply 109 * from trying to minimize core utilization and 110 * maximize the range of doable problems by dynamically 111 * allocating what is needed and reusing what is not. 112 * The core requirements for problems larger than somewhat 113 * are (in words) 2*length(file0) + length(file1) + 114 * 3*(number of k-candidates installed), typically about 115 * 6n words for files of length n. 116 */ 117 118 #define prints(s) fputs(s,stdout) 119 120 FILE *input[2]; 121 122 struct cand { 123 int x; 124 int y; 125 int pred; 126 } cand; 127 128 struct line { 129 int serial; 130 int value; 131 } *file[2]; 132 133 int len[2]; 134 struct line *sfile[2]; /* shortened by pruning common prefix and suffix */ 135 int slen[2]; 136 int pref, suff; /* length of prefix and suffix */ 137 int *class; /* will be overlaid on file[0] */ 138 int *member; /* will be overlaid on file[1] */ 139 int *klist; /* will be overlaid on file[0] after class */ 140 struct cand *clist; /* merely a free storage pot for candidates */ 141 int clen = 0; 142 int *J; /* will be overlaid on class */ 143 long *ixold; /* will be overlaid on klist */ 144 long *ixnew; /* will be overlaid on file[1] */ 145 u_char *chrtran; /* translation table for case-folding */ 146 147 static void fetch(long *, int, int, FILE *, char *, int); 148 static void output(void); 149 static void check(void); 150 static void range(int, int, char *); 151 static void dump_context_vec(void); 152 static void prepare(int, FILE *); 153 static void prune(void); 154 static void equiv(struct line *, int, struct line *, int, int *); 155 static void unravel(int); 156 static void unsort(struct line *, int, int *); 157 static void change(int, int, int, int); 158 static void sort(struct line *, int); 159 static int newcand(int, int, int); 160 static int search(int *, int, int); 161 static int skipline(int); 162 static int asciifile(FILE *); 163 static int stone(int *, int, int *, int *); 164 static int readhash(FILE *); 165 166 /* 167 * chrtran points to one of 2 translation tables: cup2low if folding upper to 168 * lower case clow2low if not folding case 169 */ 170 u_char clow2low[256] = { 171 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 172 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 173 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 174 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 175 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 176 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 177 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 178 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 179 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 180 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 181 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 182 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 183 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 184 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 185 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 186 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 187 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 188 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 189 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 190 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 191 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 192 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 193 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 194 0xfd, 0xfe, 0xff 195 }; 196 197 u_char cup2low[256] = { 198 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 199 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 200 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 201 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 202 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 203 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 204 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 205 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 206 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 207 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 208 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 209 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 210 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 211 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 212 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 213 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 214 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 215 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 216 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 217 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 218 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 219 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 220 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 221 0xfd, 0xfe, 0xff 222 }; 223 224 void 225 diffreg(void) 226 { 227 char buf1[BUFSIZ], buf2[BUFSIZ]; 228 FILE *f1, *f2; 229 int i, j; 230 231 if (hflag) { 232 diffargv[0] = "diffh"; 233 execv(diffh, diffargv); 234 warn("%s", diffh); 235 done(0); 236 } 237 chrtran = (iflag ? cup2low : clow2low); 238 if ((stb1.st_mode & S_IFMT) == S_IFDIR) { 239 file1 = splice(file1, file2); 240 if (stat(file1, &stb1) < 0) { 241 warn("%s", file1); 242 done(0); 243 } 244 } else if ((stb2.st_mode & S_IFMT) == S_IFDIR) { 245 file2 = splice(file2, file1); 246 if (stat(file2, &stb2) < 0) { 247 warn("%s", file2); 248 done(0); 249 } 250 } else if ((stb1.st_mode & S_IFMT) != S_IFREG || !strcmp(file1, "-")) { 251 if (!strcmp(file2, "-")) { 252 warnx("can't specify - -"); 253 done(0); 254 } 255 file1 = copytemp(); 256 if (stat(file1, &stb1) < 0) { 257 warn("%s", file1); 258 done(0); 259 } 260 } else if ((stb2.st_mode & S_IFMT) != S_IFREG || !strcmp(file2, "-")) { 261 file2 = copytemp(); 262 if (stat(file2, &stb2) < 0) { 263 warn("%s", file2); 264 done(0); 265 } 266 } 267 if ((f1 = fopen(file1, "r")) == NULL) { 268 warn("%s", file1); 269 done(0); 270 } 271 if ((f2 = fopen(file2, "r")) == NULL) { 272 warn("%s", file2); 273 fclose(f1); 274 done(0); 275 } 276 if (stb1.st_size != stb2.st_size) 277 goto notsame; 278 for (;;) { 279 i = fread(buf1, 1, BUFSIZ, f1); 280 j = fread(buf2, 1, BUFSIZ, f2); 281 if (i < 0 || j < 0 || i != j) 282 goto notsame; 283 if (i == 0 && j == 0) { 284 fclose(f1); 285 fclose(f2); 286 status = 0; /* files don't differ */ 287 goto same; 288 } 289 for (j = 0; j < i; j++) 290 if (buf1[j] != buf2[j]) 291 goto notsame; 292 } 293 notsame: 294 /* 295 * Files certainly differ at this point; set status accordingly 296 */ 297 status = 1; 298 if (!asciifile(f1) || !asciifile(f2)) { 299 printf("Binary files %s and %s differ\n", file1, file2); 300 fclose(f1); 301 fclose(f2); 302 done(0); 303 } 304 prepare(0, f1); 305 prepare(1, f2); 306 fclose(f1); 307 fclose(f2); 308 prune(); 309 sort(sfile[0], slen[0]); 310 sort(sfile[1], slen[1]); 311 312 member = (int *)file[1]; 313 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 314 member = ralloc(member, (slen[1] + 2) * sizeof(int)); 315 316 class = (int *)file[0]; 317 unsort(sfile[0], slen[0], class); 318 class = ralloc(class, (slen[0] + 2) * sizeof(int)); 319 320 klist = talloc((slen[0] + 2) * sizeof(int)); 321 clist = talloc(sizeof(cand)); 322 i = stone(class, slen[0], member, klist); 323 free(member); 324 free(class); 325 326 J = talloc((len[0] + 2) * sizeof(int)); 327 unravel(klist[i]); 328 free(clist); 329 free(klist); 330 331 ixold = talloc((len[0] + 2) * sizeof(long)); 332 ixnew = talloc((len[1] + 2) * sizeof(long)); 333 check(); 334 output(); 335 status = anychange; 336 same: 337 if (opt == D_CONTEXT && anychange == 0) 338 printf("No differences encountered\n"); 339 done(0); 340 } 341 342 char *tempfile = _PATH_TMP; 343 344 char * 345 copytemp(void) 346 { 347 char buf[BUFSIZ]; 348 int i, f; 349 350 signal(SIGHUP, done); 351 signal(SIGINT, done); 352 signal(SIGPIPE, done); 353 signal(SIGTERM, done); 354 f = mkstemp(tempfile); 355 if (f < 0) { 356 warn("%s", tempfile); 357 done(0); 358 } 359 while ((i = read(0, buf, BUFSIZ)) > 0) 360 if (write(f, buf, i) != i) { 361 warn("%s", tempfile); 362 done(0); 363 } 364 close(f); 365 return (tempfile); 366 } 367 368 char * 369 splice(char *dir, char *file) 370 { 371 char *tail, buf[BUFSIZ]; 372 373 if (!strcmp(file, "-")) { 374 warnx("can't specify - with other arg directory"); 375 done(0); 376 } 377 tail = strrchr(file, '/'); 378 if (tail == 0) 379 tail = file; 380 else 381 tail++; 382 snprintf(buf, sizeof buf, "%s/%s", dir, tail); 383 return (strdup(buf)); 384 } 385 386 static void 387 prepare(int i, FILE *fd) 388 { 389 struct line *p; 390 int j, h; 391 392 fseek(fd, 0L, SEEK_SET); 393 p = talloc(3 * sizeof(struct line)); 394 for (j = 0; (h = readhash(fd));) { 395 p = ralloc(p, (++j + 3) * sizeof(struct line)); 396 p[j].value = h; 397 } 398 len[i] = j; 399 file[i] = p; 400 } 401 402 static void 403 prune(void) 404 { 405 int i, j; 406 407 for (pref = 0; pref < len[0] && pref < len[1] && 408 file[0][pref + 1].value == file[1][pref + 1].value; 409 pref++) 410 ; 411 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 412 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 413 suff++) 414 ; 415 for (j = 0; j < 2; j++) { 416 sfile[j] = file[j] + pref; 417 slen[j] = len[j] - pref - suff; 418 for (i = 0; i <= slen[j]; i++) 419 sfile[j][i].serial = i; 420 } 421 } 422 423 static void 424 equiv(struct line *a, int n, struct line *b, int m, int *c) 425 { 426 int i, j; 427 428 i = j = 1; 429 while (i <= n && j <= m) { 430 if (a[i].value < b[j].value) 431 a[i++].value = 0; 432 else if (a[i].value == b[j].value) 433 a[i++].value = j; 434 else 435 j++; 436 } 437 while (i <= n) 438 a[i++].value = 0; 439 b[m + 1].value = 0; 440 j = 0; 441 while (++j <= m) { 442 c[j] = -b[j].serial; 443 while (b[j + 1].value == b[j].value) { 444 j++; 445 c[j] = b[j].serial; 446 } 447 } 448 c[j] = -1; 449 } 450 451 static int 452 stone(int *a, int n, int *b, int *c) 453 { 454 int i, k, y, j, l; 455 int oldc, tc, oldl; 456 457 k = 0; 458 c[0] = newcand(0, 0, 0); 459 for (i = 1; i <= n; i++) { 460 j = a[i]; 461 if (j == 0) 462 continue; 463 y = -b[j]; 464 oldl = 0; 465 oldc = c[0]; 466 do { 467 if (y <= clist[oldc].y) 468 continue; 469 l = search(c, k, y); 470 if (l != oldl + 1) 471 oldc = c[l - 1]; 472 if (l <= k) { 473 if (clist[c[l]].y <= y) 474 continue; 475 tc = c[l]; 476 c[l] = newcand(i, y, oldc); 477 oldc = tc; 478 oldl = l; 479 } else { 480 c[l] = newcand(i, y, oldc); 481 k++; 482 break; 483 } 484 } while ((y = b[++j]) > 0); 485 } 486 return (k); 487 } 488 489 static int 490 newcand(int x, int y, int pred) 491 { 492 struct cand *q; 493 494 clist = ralloc(clist, ++clen * sizeof(cand)); 495 q = clist + clen - 1; 496 q->x = x; 497 q->y = y; 498 q->pred = pred; 499 return (clen - 1); 500 } 501 502 static int 503 search(int *c, int k, int y) 504 { 505 int i, j, l, t; 506 507 if (clist[c[k]].y < y) /* quick look for typical case */ 508 return (k + 1); 509 i = 0; 510 j = k + 1; 511 while (1) { 512 l = i + j; 513 if ((l >>= 1) <= i) 514 break; 515 t = clist[c[l]].y; 516 if (t > y) 517 j = l; 518 else if (t < y) 519 i = l; 520 else 521 return (l); 522 } 523 return (l + 1); 524 } 525 526 static void 527 unravel(int p) 528 { 529 struct cand *q; 530 int i; 531 532 for (i = 0; i <= len[0]; i++) 533 J[i] = i <= pref ? i : 534 i > len[0] - suff ? i + len[1] - len[0] : 0; 535 for (q = clist + p; q->y != 0; q = clist + q->pred) 536 J[q->x + pref] = q->y + pref; 537 } 538 539 /* 540 * check does double duty: 1. ferret out any fortuitous correspondences due 541 * to confounding by hashing (which result in "jackpot") 2. collect random 542 * access indexes to the two files 543 */ 544 545 static void 546 check(void) 547 { 548 int i, j, jackpot, c, d; 549 long ctold, ctnew; 550 551 if ((input[0] = fopen(file1, "r")) == NULL) { 552 perror(file1); 553 done(0); 554 } 555 if ((input[1] = fopen(file2, "r")) == NULL) { 556 perror(file2); 557 done(0); 558 } 559 j = 1; 560 ixold[0] = ixnew[0] = 0; 561 jackpot = 0; 562 ctold = ctnew = 0; 563 for (i = 1; i <= len[0]; i++) { 564 if (J[i] == 0) { 565 ixold[i] = ctold += skipline(0); 566 continue; 567 } 568 while (j < J[i]) { 569 ixnew[j] = ctnew += skipline(1); 570 j++; 571 } 572 if (bflag || wflag || iflag) { 573 for (;;) { 574 c = getc(input[0]); 575 d = getc(input[1]); 576 ctold++; 577 ctnew++; 578 if (bflag && isspace(c) && isspace(d)) { 579 do { 580 if (c == '\n') 581 break; 582 ctold++; 583 } while (isspace(c = getc(input[0]))); 584 do { 585 if (d == '\n') 586 break; 587 ctnew++; 588 } while (isspace(d = getc(input[1]))); 589 } else if (wflag) { 590 while (isspace(c) && c != '\n') { 591 c = getc(input[0]); 592 ctold++; 593 } 594 while (isspace(d) && d != '\n') { 595 d = getc(input[1]); 596 ctnew++; 597 } 598 } 599 if (chrtran[c] != chrtran[d]) { 600 jackpot++; 601 J[i] = 0; 602 if (c != '\n') 603 ctold += skipline(0); 604 if (d != '\n') 605 ctnew += skipline(1); 606 break; 607 } 608 if (c == '\n') 609 break; 610 } 611 } else { 612 for (;;) { 613 ctold++; 614 ctnew++; 615 if ((c = getc(input[0])) != (d = getc(input[1]))) { 616 /* jackpot++; */ 617 J[i] = 0; 618 if (c != '\n') 619 ctold += skipline(0); 620 if (d != '\n') 621 ctnew += skipline(1); 622 break; 623 } 624 if (c == '\n') 625 break; 626 } 627 } 628 ixold[i] = ctold; 629 ixnew[j] = ctnew; 630 j++; 631 } 632 for (; j <= len[1]; j++) { 633 ixnew[j] = ctnew += skipline(1); 634 } 635 fclose(input[0]); 636 fclose(input[1]); 637 /* 638 * if (jackpot) 639 * fprintf(stderr, "jackpot\n"); 640 */ 641 } 642 643 /* shellsort CACM #201 */ 644 static void 645 sort(struct line *a, int n) 646 { 647 struct line *ai, *aim, w; 648 int j, m = 0, k; 649 650 if (n == 0) 651 return; 652 for (j = 1; j <= n; j *= 2) 653 m = 2 * j - 1; 654 for (m /= 2; m != 0; m /= 2) { 655 k = n - m; 656 for (j = 1; j <= k; j++) { 657 for (ai = &a[j]; ai > a; ai -= m) { 658 aim = &ai[m]; 659 if (aim < ai) 660 break; /* wraparound */ 661 if (aim->value > ai[0].value || 662 (aim->value == ai[0].value && 663 aim->serial > ai[0].serial)) 664 break; 665 w.value = ai[0].value; 666 ai[0].value = aim->value; 667 aim->value = w.value; 668 w.serial = ai[0].serial; 669 ai[0].serial = aim->serial; 670 aim->serial = w.serial; 671 } 672 } 673 } 674 } 675 676 static void 677 unsort(struct line *f, int l, int *b) 678 { 679 int *a, i; 680 681 a = talloc((l + 1) * sizeof(int)); 682 for (i = 1; i <= l; i++) 683 a[f[i].serial] = f[i].value; 684 for (i = 1; i <= l; i++) 685 b[i] = a[i]; 686 free(a); 687 } 688 689 static int 690 skipline(int f) 691 { 692 int i, c; 693 694 for (i = 1; (c = getc(input[f])) != '\n'; i++) 695 if (c < 0) 696 return (i); 697 return (i); 698 } 699 700 static void 701 output(void) 702 { 703 int m, i0, i1, j0, j1; 704 705 input[0] = fopen(file1, "r"); 706 input[1] = fopen(file2, "r"); 707 m = len[0]; 708 J[0] = 0; 709 J[m + 1] = len[1] + 1; 710 if (opt != D_EDIT) { 711 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 712 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 713 i0++; 714 j0 = J[i0 - 1] + 1; 715 i1 = i0 - 1; 716 while (i1 < m && J[i1 + 1] == 0) 717 i1++; 718 j1 = J[i1 + 1] - 1; 719 J[i1] = j1; 720 change(i0, i1, j0, j1); 721 } 722 } else { 723 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 724 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 725 i0--; 726 j0 = J[i0 + 1] - 1; 727 i1 = i0 + 1; 728 while (i1 > 1 && J[i1 - 1] == 0) 729 i1--; 730 j1 = J[i1 - 1] + 1; 731 J[i1] = j1; 732 change(i1, i0, j1, j0); 733 } 734 } 735 if (m == 0) 736 change(1, 0, 1, len[1]); 737 if (opt == D_IFDEF) { 738 for (;;) { 739 #define c i0 740 c = getc(input[0]); 741 if (c < 0) 742 return; 743 putchar(c); 744 } 745 #undef c 746 } 747 if (anychange && opt == D_CONTEXT) 748 dump_context_vec(); 749 } 750 751 /* 752 * The following struct is used to record change information when 753 * doing a "context" diff. (see routine "change" to understand the 754 * highly mneumonic field names) 755 */ 756 struct context_vec { 757 int a; /* start line in old file */ 758 int b; /* end line in old file */ 759 int c; /* start line in new file */ 760 int d; /* end line in new file */ 761 }; 762 763 struct context_vec *context_vec_start, *context_vec_end, *context_vec_ptr; 764 765 #define MAX_CONTEXT 128 766 767 /* 768 * indicate that there is a difference between lines a and b of the from file 769 * to get to lines c to d of the to file. If a is greater then b then there 770 * are no lines in the from file involved and this means that there were 771 * lines appended (beginning at b). If c is greater than d then there are 772 * lines missing from the to file. 773 */ 774 static void 775 change(int a, int b, int c, int d) 776 { 777 struct stat stbuf; 778 779 if (opt != D_IFDEF && a > b && c > d) 780 return; 781 if (anychange == 0) { 782 anychange = 1; 783 if (opt == D_CONTEXT) { 784 printf("*** %s ", file1); 785 stat(file1, &stbuf); 786 printf("%s--- %s ", 787 ctime(&stbuf.st_mtime), file2); 788 stat(file2, &stbuf); 789 printf("%s", ctime(&stbuf.st_mtime)); 790 791 context_vec_start = talloc(MAX_CONTEXT * 792 sizeof(struct context_vec)); 793 context_vec_end = context_vec_start + MAX_CONTEXT; 794 context_vec_ptr = context_vec_start - 1; 795 } 796 } 797 if (opt == D_CONTEXT) { 798 /* 799 * if this new change is within 'context' lines of 800 * the previous change, just add it to the change 801 * record. If the record is full or if this 802 * change is more than 'context' lines from the previous 803 * change, dump the record, reset it & add the new change. 804 */ 805 if (context_vec_ptr >= context_vec_end || 806 (context_vec_ptr >= context_vec_start && 807 a > (context_vec_ptr->b + 2 * context) && 808 c > (context_vec_ptr->d + 2 * context))) 809 dump_context_vec(); 810 811 context_vec_ptr++; 812 context_vec_ptr->a = a; 813 context_vec_ptr->b = b; 814 context_vec_ptr->c = c; 815 context_vec_ptr->d = d; 816 return; 817 } 818 switch (opt) { 819 820 case D_NORMAL: 821 case D_EDIT: 822 range(a, b, ","); 823 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 824 if (opt == D_NORMAL) 825 range(c, d, ","); 826 putchar('\n'); 827 break; 828 case D_REVERSE: 829 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 830 range(a, b, " "); 831 putchar('\n'); 832 break; 833 case D_NREVERSE: 834 if (a > b) 835 printf("a%d %d\n", b, d - c + 1); 836 else { 837 printf("d%d %d\n", a, b - a + 1); 838 if (!(c > d)) 839 /* add changed lines */ 840 printf("a%d %d\n", b, d - c + 1); 841 } 842 break; 843 } 844 if (opt == D_NORMAL || opt == D_IFDEF) { 845 fetch(ixold, a, b, input[0], "< ", 1); 846 if (a <= b && c <= d && opt == D_NORMAL) 847 prints("---\n"); 848 } 849 fetch(ixnew, c, d, input[1], opt == D_NORMAL ? "> " : "", 0); 850 if ((opt == D_EDIT || opt == D_REVERSE) && c <= d) 851 prints(".\n"); 852 if (inifdef) { 853 fprintf(stdout, "#endif /* %s */\n", endifname); 854 inifdef = 0; 855 } 856 } 857 858 static void 859 range(int a, int b, char *separator) 860 { 861 printf("%d", a > b ? b : a); 862 if (a < b) 863 printf("%s%d", separator, b); 864 } 865 866 static void 867 fetch(long *f, int a, int b, FILE *lb, char *s, int oldfile) 868 { 869 int oneflag = (*ifdef1 != '\0') != (*ifdef2 != '\0'); 870 int i, j, c, col, nc; 871 872 /* 873 * When doing #ifdef's, copy down to current line 874 * if this is the first file, so that stuff makes it to output. 875 */ 876 if (opt == D_IFDEF && oldfile) { 877 long curpos = ftell(lb); 878 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 879 nc = f[a > b ? b : a - 1] - curpos; 880 for (i = 0; i < nc; i++) 881 putchar(getc(lb)); 882 } 883 if (a > b) 884 return; 885 if (opt == D_IFDEF) { 886 if (inifdef) 887 fprintf(stdout, "#else /* %s%s */\n", 888 oneflag && oldfile == 1 ? "!" : "", ifdef2); 889 else { 890 if (oneflag) { 891 /* There was only one ifdef given */ 892 endifname = ifdef2; 893 if (oldfile) 894 fprintf(stdout, "#ifndef %s\n", endifname); 895 else 896 fprintf(stdout, "#ifdef %s\n", endifname); 897 } else { 898 endifname = oldfile ? ifdef1 : ifdef2; 899 fprintf(stdout, "#ifdef %s\n", endifname); 900 } 901 } 902 inifdef = 1 + oldfile; 903 } 904 for (i = a; i <= b; i++) { 905 fseek(lb, f[i - 1], SEEK_SET); 906 nc = f[i] - f[i - 1]; 907 if (opt != D_IFDEF) 908 prints(s); 909 col = 0; 910 for (j = 0; j < nc; j++) { 911 c = getc(lb); 912 if (c == '\t' && tflag) 913 do 914 putchar(' '); 915 while (++col & 7); 916 else { 917 putchar(c); 918 col++; 919 } 920 } 921 } 922 923 if (inifdef && !wantelses) { 924 fprintf(stdout, "#endif /* %s */\n", endifname); 925 inifdef = 0; 926 } 927 } 928 929 #define POW2 /* define only if HALFLONG is 2**n */ 930 #define HALFLONG 16 931 #define low(x) (x&((1L<<HALFLONG)-1)) 932 #define high(x) (x>>HALFLONG) 933 934 /* 935 * hashing has the effect of 936 * arranging line in 7-bit bytes and then 937 * summing 1-s complement in 16-bit hunks 938 */ 939 static int 940 readhash(FILE *f) 941 { 942 unsigned int shift; 943 int t, space; 944 long sum; 945 946 sum = 1; 947 space = 0; 948 if (!bflag && !wflag) { 949 if (iflag) 950 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 951 if (t == -1) 952 return (0); 953 sum += (long)chrtran[t] << (shift 954 #ifdef POW2 955 &= HALFLONG - 1); 956 #else 957 %= HALFLONG); 958 #endif 959 } 960 else 961 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 962 if (t == -1) 963 return (0); 964 sum += (long)t << (shift 965 #ifdef POW2 966 &= HALFLONG - 1); 967 #else 968 %= HALFLONG); 969 #endif 970 } 971 } else { 972 for (shift = 0;;) { 973 switch (t = getc(f)) { 974 case -1: 975 return (0); 976 case '\t': 977 case ' ': 978 space++; 979 continue; 980 default: 981 if (space && !wflag) { 982 shift += 7; 983 space = 0; 984 } 985 sum += (long)chrtran[t] << (shift 986 #ifdef POW2 987 &= HALFLONG - 1); 988 #else 989 %= HALFLONG); 990 #endif 991 shift += 7; 992 continue; 993 case '\n': 994 break; 995 } 996 break; 997 } 998 } 999 sum = low(sum) + high(sum); 1000 return ((short) low(sum) + (short) high(sum)); 1001 } 1002 1003 static int 1004 asciifile(FILE *f) 1005 { 1006 char buf[BUFSIZ], *cp; 1007 int cnt; 1008 1009 fseek(f, 0L, SEEK_SET); 1010 cnt = fread(buf, 1, BUFSIZ, f); 1011 cp = buf; 1012 while (--cnt >= 0) 1013 if (*cp++ & 0200) 1014 return (0); 1015 return (1); 1016 } 1017 1018 1019 /* dump accumulated "context" diff changes */ 1020 static void 1021 dump_context_vec(void) 1022 { 1023 struct context_vec *cvp = context_vec_start; 1024 int lowa, upb, lowc, upd, do_output; 1025 int a, b, c, d; 1026 char ch; 1027 1028 if (cvp > context_vec_ptr) 1029 return; 1030 1031 b = d = 0; /* gcc */ 1032 lowa = max(1, cvp->a - context); 1033 upb = min(len[0], context_vec_ptr->b + context); 1034 lowc = max(1, cvp->c - context); 1035 upd = min(len[1], context_vec_ptr->d + context); 1036 1037 printf("***************\n*** "); 1038 range(lowa, upb, ","); 1039 printf(" ****\n"); 1040 1041 /* 1042 * output changes to the "old" file. The first loop suppresses 1043 * output if there were no changes to the "old" file (we'll see 1044 * the "old" lines as context in the "new" list). 1045 */ 1046 do_output = 0; 1047 for (; cvp <= context_vec_ptr; cvp++) 1048 if (cvp->a <= cvp->b) { 1049 cvp = context_vec_start; 1050 do_output++; 1051 break; 1052 } 1053 if (do_output) { 1054 while (cvp <= context_vec_ptr) { 1055 a = cvp->a; 1056 b = cvp->b; 1057 c = cvp->c; 1058 d = cvp->d; 1059 1060 if (a <= b && c <= d) 1061 ch = 'c'; 1062 else 1063 ch = (a <= b) ? 'd' : 'a'; 1064 1065 if (ch == 'a') 1066 fetch(ixold, lowa, b, input[0], " ", 0); 1067 else { 1068 fetch(ixold, lowa, a - 1, input[0], " ", 0); 1069 fetch(ixold, a, b, input[0], 1070 ch == 'c' ? "! " : "- ", 0); 1071 } 1072 lowa = b + 1; 1073 cvp++; 1074 } 1075 fetch(ixold, b + 1, upb, input[0], " ", 0); 1076 } 1077 /* output changes to the "new" file */ 1078 printf("--- "); 1079 range(lowc, upd, ","); 1080 printf(" ----\n"); 1081 1082 do_output = 0; 1083 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1084 if (cvp->c <= cvp->d) { 1085 cvp = context_vec_start; 1086 do_output++; 1087 break; 1088 } 1089 if (do_output) { 1090 while (cvp <= context_vec_ptr) { 1091 a = cvp->a; 1092 b = cvp->b; 1093 c = cvp->c; 1094 d = cvp->d; 1095 1096 if (a <= b && c <= d) 1097 ch = 'c'; 1098 else 1099 ch = (a <= b) ? 'd' : 'a'; 1100 1101 if (ch == 'd') 1102 fetch(ixnew, lowc, d, input[1], " ", 0); 1103 else { 1104 fetch(ixnew, lowc, c - 1, input[1], " ", 0); 1105 fetch(ixnew, c, d, input[1], 1106 ch == 'c' ? "! " : "+ ", 0); 1107 } 1108 lowc = d + 1; 1109 cvp++; 1110 } 1111 fetch(ixnew, d + 1, upd, input[1], " ", 0); 1112 } 1113 context_vec_ptr = context_vec_start - 1; 1114 } 1115