1 /* $OpenBSD: diffreg.c,v 1.4 2003/06/25 03:25:29 tedu Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 #include <sys/types.h> 38 39 #include <stdlib.h> 40 #include <unistd.h> 41 #include <fcntl.h> 42 #include <string.h> 43 44 #include "diff.h" 45 #include "pathnames.h" 46 47 #if 0 48 static char const sccsid[] = "@(#)diffreg.c 4.21 4/6/90"; 49 #endif 50 51 /* 52 * diff - compare two files. 53 */ 54 55 /* 56 * Uses an algorithm due to Harold Stone, which finds 57 * a pair of longest identical subsequences in the two 58 * files. 59 * 60 * The major goal is to generate the match vector J. 61 * J[i] is the index of the line in file1 corresponding 62 * to line i file0. J[i] = 0 if there is no 63 * such line in file1. 64 * 65 * Lines are hashed so as to work in core. All potential 66 * matches are located by sorting the lines of each file 67 * on the hash (called ``value''). In particular, this 68 * collects the equivalence classes in file1 together. 69 * Subroutine equiv replaces the value of each line in 70 * file0 by the index of the first element of its 71 * matching equivalence in (the reordered) file1. 72 * To save space equiv squeezes file1 into a single 73 * array member in which the equivalence classes 74 * are simply concatenated, except that their first 75 * members are flagged by changing sign. 76 * 77 * Next the indices that point into member are unsorted into 78 * array class according to the original order of file0. 79 * 80 * The cleverness lies in routine stone. This marches 81 * through the lines of file0, developing a vector klist 82 * of "k-candidates". At step i a k-candidate is a matched 83 * pair of lines x,y (x in file0 y in file1) such that 84 * there is a common subsequence of length k 85 * between the first i lines of file0 and the first y 86 * lines of file1, but there is no such subsequence for 87 * any smaller y. x is the earliest possible mate to y 88 * that occurs in such a subsequence. 89 * 90 * Whenever any of the members of the equivalence class of 91 * lines in file1 matable to a line in file0 has serial number 92 * less than the y of some k-candidate, that k-candidate 93 * with the smallest such y is replaced. The new 94 * k-candidate is chained (via pred) to the current 95 * k-1 candidate so that the actual subsequence can 96 * be recovered. When a member has serial number greater 97 * that the y of all k-candidates, the klist is extended. 98 * At the end, the longest subsequence is pulled out 99 * and placed in the array J by unravel 100 * 101 * With J in hand, the matches there recorded are 102 * check'ed against reality to assure that no spurious 103 * matches have crept in due to hashing. If they have, 104 * they are broken, and "jackpot" is recorded--a harmless 105 * matter except that a true match for a spuriously 106 * mated line may now be unnecessarily reported as a change. 107 * 108 * Much of the complexity of the program comes simply 109 * from trying to minimize core utilization and 110 * maximize the range of doable problems by dynamically 111 * allocating what is needed and reusing what is not. 112 * The core requirements for problems larger than somewhat 113 * are (in words) 2*length(file0) + length(file1) + 114 * 3*(number of k-candidates installed), typically about 115 * 6n words for files of length n. 116 */ 117 118 #define prints(s) fputs(s,stdout) 119 120 FILE *input[2]; 121 FILE *fopen(); 122 123 struct cand { 124 int x; 125 int y; 126 int pred; 127 } cand; 128 129 struct line { 130 int serial; 131 int value; 132 } *file[2], line; 133 134 int len[2]; 135 struct line *sfile[2]; /* shortened by pruning common prefix and suffix */ 136 int slen[2]; 137 int pref, suff; /* length of prefix and suffix */ 138 int *class; /* will be overlaid on file[0] */ 139 int *member; /* will be overlaid on file[1] */ 140 int *klist; /* will be overlaid on file[0] after class */ 141 struct cand *clist; /* merely a free storage pot for candidates */ 142 int clen = 0; 143 int *J; /* will be overlaid on class */ 144 long *ixold; /* will be overlaid on klist */ 145 long *ixnew; /* will be overlaid on file[1] */ 146 char *chrtran; /* translation table for case-folding */ 147 148 static void fetch(long *, int, int, FILE *, char *, int); 149 static void output(void); 150 static void check(void); 151 static void range(int, int, char *); 152 static void dump_context_vec(void); 153 static void prepare(int, FILE *); 154 static void prune(void); 155 static void equiv(struct line *, int, struct line *, int, int *); 156 static void unravel(int); 157 static void unsort(struct line *, int, int *); 158 static void change(int, int, int, int); 159 static void sort(struct line *, int); 160 static int newcand(int, int, int); 161 static int search(int *, int, int); 162 static int skipline(int); 163 static int asciifile(FILE *); 164 static int stone(int *, int, int *, int *); 165 static int readhash(FILE *); 166 167 /* 168 * chrtran points to one of 2 translation tables: cup2low if folding upper to 169 * lower case clow2low if not folding case 170 */ 171 char clow2low[256] = { 172 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 173 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 174 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 175 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 176 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 177 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 178 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 179 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 180 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 181 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 182 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 183 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 184 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 185 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 186 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 187 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 188 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 189 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 190 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 191 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 192 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 193 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 194 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 195 0xfd, 0xfe, 0xff 196 }; 197 198 char cup2low[256] = { 199 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 200 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 201 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 202 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 203 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 204 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 205 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 206 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 207 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 208 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 209 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 210 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 211 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 212 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 213 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 214 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 215 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 216 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 217 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 218 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 219 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 220 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 221 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 222 0xfd, 0xfe, 0xff 223 }; 224 225 void 226 diffreg(void) 227 { 228 int i, j; 229 FILE *f1, *f2; 230 char buf1[BUFSIZ], buf2[BUFSIZ]; 231 232 if (hflag) { 233 diffargv[0] = "diffh"; 234 execv(diffh, diffargv); 235 fprintf(stderr, "diff: "); 236 perror(diffh); 237 done(); 238 } 239 chrtran = (iflag ? cup2low : clow2low); 240 if ((stb1.st_mode & S_IFMT) == S_IFDIR) { 241 file1 = splice(file1, file2); 242 if (stat(file1, &stb1) < 0) { 243 fprintf(stderr, "diff: "); 244 perror(file1); 245 done(); 246 } 247 } else if ((stb2.st_mode & S_IFMT) == S_IFDIR) { 248 file2 = splice(file2, file1); 249 if (stat(file2, &stb2) < 0) { 250 fprintf(stderr, "diff: "); 251 perror(file2); 252 done(); 253 } 254 } else if ((stb1.st_mode & S_IFMT) != S_IFREG || !strcmp(file1, "-")) { 255 if (!strcmp(file2, "-")) { 256 fprintf(stderr, "diff: can't specify - -\n"); 257 done(); 258 } 259 file1 = copytemp(); 260 if (stat(file1, &stb1) < 0) { 261 fprintf(stderr, "diff: "); 262 perror(file1); 263 done(); 264 } 265 } else if ((stb2.st_mode & S_IFMT) != S_IFREG || !strcmp(file2, "-")) { 266 file2 = copytemp(); 267 if (stat(file2, &stb2) < 0) { 268 fprintf(stderr, "diff: "); 269 perror(file2); 270 done(); 271 } 272 } 273 if ((f1 = fopen(file1, "r")) == NULL) { 274 fprintf(stderr, "diff: "); 275 perror(file1); 276 done(); 277 } 278 if ((f2 = fopen(file2, "r")) == NULL) { 279 fprintf(stderr, "diff: "); 280 perror(file2); 281 fclose(f1); 282 done(); 283 } 284 if (stb1.st_size != stb2.st_size) 285 goto notsame; 286 for (;;) { 287 i = fread(buf1, 1, BUFSIZ, f1); 288 j = fread(buf2, 1, BUFSIZ, f2); 289 if (i < 0 || j < 0 || i != j) 290 goto notsame; 291 if (i == 0 && j == 0) { 292 fclose(f1); 293 fclose(f2); 294 status = 0; /* files don't differ */ 295 goto same; 296 } 297 for (j = 0; j < i; j++) 298 if (buf1[j] != buf2[j]) 299 goto notsame; 300 } 301 notsame: 302 /* 303 * Files certainly differ at this point; set status accordingly 304 */ 305 status = 1; 306 if (!asciifile(f1) || !asciifile(f2)) { 307 printf("Binary files %s and %s differ\n", file1, file2); 308 fclose(f1); 309 fclose(f2); 310 done(); 311 } 312 prepare(0, f1); 313 prepare(1, f2); 314 fclose(f1); 315 fclose(f2); 316 prune(); 317 sort(sfile[0], slen[0]); 318 sort(sfile[1], slen[1]); 319 320 member = (int *)file[1]; 321 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 322 member = ralloc(member, (slen[1] + 2) * sizeof(int)); 323 324 class = (int *)file[0]; 325 unsort(sfile[0], slen[0], class); 326 class = ralloc(class, (slen[0] + 2) * sizeof(int)); 327 328 klist = talloc((slen[0] + 2) * sizeof(int)); 329 clist = talloc(sizeof(cand)); 330 i = stone(class, slen[0], member, klist); 331 free(member); 332 free(class); 333 334 J = talloc((len[0] + 2) * sizeof(int)); 335 unravel(klist[i]); 336 free(clist); 337 free(klist); 338 339 ixold = talloc((len[0] + 2) * sizeof(long)); 340 ixnew = talloc((len[1] + 2) * sizeof(long)); 341 check(); 342 output(); 343 status = anychange; 344 same: 345 if (opt == D_CONTEXT && anychange == 0) 346 printf("No differences encountered\n"); 347 done(); 348 } 349 350 char *tempfile = _PATH_TMP; 351 352 char * 353 copytemp(void) 354 { 355 char buf[BUFSIZ]; 356 int i, f; 357 358 signal(SIGHUP, catchsig); 359 signal(SIGINT, catchsig); 360 signal(SIGPIPE, catchsig); 361 signal(SIGTERM, catchsig); 362 f = mkstemp(tempfile); 363 if (f < 0) { 364 fprintf(stderr, "diff: "); 365 perror(tempfile); 366 done(); 367 } 368 while ((i = read(0, buf, BUFSIZ)) > 0) 369 if (write(f, buf, i) != i) { 370 fprintf(stderr, "diff: "); 371 perror(tempfile); 372 done(); 373 } 374 close(f); 375 return (tempfile); 376 } 377 378 char * 379 splice(char *dir, char *file) 380 { 381 char *tail; 382 char buf[BUFSIZ]; 383 384 if (!strcmp(file, "-")) { 385 fprintf(stderr, "diff: can't specify - with other arg directory\n"); 386 done(); 387 } 388 tail = rindex(file, '/'); 389 if (tail == 0) 390 tail = file; 391 else 392 tail++; 393 snprintf(buf, sizeof buf, "%s/%s", dir, tail); 394 return (strdup(buf)); 395 } 396 397 static void 398 prepare(int i, FILE *fd) 399 { 400 struct line *p; 401 int j, h; 402 403 fseek(fd, 0, 0); 404 p = talloc(3 * sizeof(line)); 405 for (j = 0; (h = readhash(fd));) { 406 p = ralloc(p, (++j + 3) * sizeof(line)); 407 p[j].value = h; 408 } 409 len[i] = j; 410 file[i] = p; 411 } 412 413 static void 414 prune(void) 415 { 416 int i, j; 417 for (pref = 0; pref < len[0] && pref < len[1] && 418 file[0][pref + 1].value == file[1][pref + 1].value; 419 pref++); 420 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 421 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 422 suff++); 423 for (j = 0; j < 2; j++) { 424 sfile[j] = file[j] + pref; 425 slen[j] = len[j] - pref - suff; 426 for (i = 0; i <= slen[j]; i++) 427 sfile[j][i].serial = i; 428 } 429 } 430 431 static void 432 equiv(struct line *a, int n, struct line *b, int m, int *c) 433 { 434 int i, j; 435 i = j = 1; 436 while (i <= n && j <= m) { 437 if (a[i].value < b[j].value) 438 a[i++].value = 0; 439 else if (a[i].value == b[j].value) 440 a[i++].value = j; 441 else 442 j++; 443 } 444 while (i <= n) 445 a[i++].value = 0; 446 b[m + 1].value = 0; 447 j = 0; 448 while (++j <= m) { 449 c[j] = -b[j].serial; 450 while (b[j + 1].value == b[j].value) { 451 j++; 452 c[j] = b[j].serial; 453 } 454 } 455 c[j] = -1; 456 } 457 458 static int 459 stone(int *a, int n, int *b, int *c) 460 { 461 int i, k, y; 462 int j, l; 463 int oldc, tc; 464 int oldl; 465 k = 0; 466 c[0] = newcand(0, 0, 0); 467 for (i = 1; i <= n; i++) { 468 j = a[i]; 469 if (j == 0) 470 continue; 471 y = -b[j]; 472 oldl = 0; 473 oldc = c[0]; 474 do { 475 if (y <= clist[oldc].y) 476 continue; 477 l = search(c, k, y); 478 if (l != oldl + 1) 479 oldc = c[l - 1]; 480 if (l <= k) { 481 if (clist[c[l]].y <= y) 482 continue; 483 tc = c[l]; 484 c[l] = newcand(i, y, oldc); 485 oldc = tc; 486 oldl = l; 487 } else { 488 c[l] = newcand(i, y, oldc); 489 k++; 490 break; 491 } 492 } while ((y = b[++j]) > 0); 493 } 494 return (k); 495 } 496 497 static int 498 newcand(int x, int y, int pred) 499 { 500 struct cand *q; 501 502 clist = ralloc(clist, ++clen * sizeof(cand)); 503 q = clist + clen - 1; 504 q->x = x; 505 q->y = y; 506 q->pred = pred; 507 return (clen - 1); 508 } 509 510 static int 511 search(int *c, int k, int y) 512 { 513 int i, j, l; 514 int t; 515 if (clist[c[k]].y < y) /* quick look for typical case */ 516 return (k + 1); 517 i = 0; 518 j = k + 1; 519 while (1) { 520 l = i + j; 521 if ((l >>= 1) <= i) 522 break; 523 t = clist[c[l]].y; 524 if (t > y) 525 j = l; 526 else if (t < y) 527 i = l; 528 else 529 return (l); 530 } 531 return (l + 1); 532 } 533 534 static void 535 unravel(int p) 536 { 537 int i; 538 struct cand *q; 539 for (i = 0; i <= len[0]; i++) 540 J[i] = i <= pref ? i : 541 i > len[0] - suff ? i + len[1] - len[0] : 542 0; 543 for (q = clist + p; q->y != 0; q = clist + q->pred) 544 J[q->x + pref] = q->y + pref; 545 } 546 547 /* 548 * check does double duty: 1. ferret out any fortuitous correspondences due 549 * to confounding by hashing (which result in "jackpot") 2. collect random 550 * access indexes to the two files 551 */ 552 553 static void 554 check(void) 555 { 556 int i, j; 557 int jackpot; 558 long ctold, ctnew; 559 int c, d; 560 561 if ((input[0] = fopen(file1, "r")) == NULL) { 562 perror(file1); 563 done(); 564 } 565 if ((input[1] = fopen(file2, "r")) == NULL) { 566 perror(file2); 567 done(); 568 } 569 j = 1; 570 ixold[0] = ixnew[0] = 0; 571 jackpot = 0; 572 ctold = ctnew = 0; 573 for (i = 1; i <= len[0]; i++) { 574 if (J[i] == 0) { 575 ixold[i] = ctold += skipline(0); 576 continue; 577 } 578 while (j < J[i]) { 579 ixnew[j] = ctnew += skipline(1); 580 j++; 581 } 582 if (bflag || wflag || iflag) { 583 for (;;) { 584 c = getc(input[0]); 585 d = getc(input[1]); 586 ctold++; 587 ctnew++; 588 if (bflag && isspace(c) && isspace(d)) { 589 do { 590 if (c == '\n') 591 break; 592 ctold++; 593 } while (isspace(c = getc(input[0]))); 594 do { 595 if (d == '\n') 596 break; 597 ctnew++; 598 } while (isspace(d = getc(input[1]))); 599 } else if (wflag) { 600 while (isspace(c) && c != '\n') { 601 c = getc(input[0]); 602 ctold++; 603 } 604 while (isspace(d) && d != '\n') { 605 d = getc(input[1]); 606 ctnew++; 607 } 608 } 609 if (chrtran[c] != chrtran[d]) { 610 jackpot++; 611 J[i] = 0; 612 if (c != '\n') 613 ctold += skipline(0); 614 if (d != '\n') 615 ctnew += skipline(1); 616 break; 617 } 618 if (c == '\n') 619 break; 620 } 621 } else { 622 for (;;) { 623 ctold++; 624 ctnew++; 625 if ((c = getc(input[0])) != (d = getc(input[1]))) { 626 /* jackpot++; */ 627 J[i] = 0; 628 if (c != '\n') 629 ctold += skipline(0); 630 if (d != '\n') 631 ctnew += skipline(1); 632 break; 633 } 634 if (c == '\n') 635 break; 636 } 637 } 638 ixold[i] = ctold; 639 ixnew[j] = ctnew; 640 j++; 641 } 642 for (; j <= len[1]; j++) { 643 ixnew[j] = ctnew += skipline(1); 644 } 645 fclose(input[0]); 646 fclose(input[1]); 647 /* 648 if(jackpot) 649 fprintf(stderr, "jackpot\n"); 650 */ 651 } 652 653 static void 654 sort(struct line *a, int n) 655 { /* shellsort CACM #201 */ 656 struct line w; 657 int j, m = 0; /* gcc */ 658 struct line *ai; 659 struct line *aim; 660 int k; 661 662 if (n == 0) 663 return; 664 for (j = 1; j <= n; j *= 2) 665 m = 2 * j - 1; 666 for (m /= 2; m != 0; m /= 2) { 667 k = n - m; 668 for (j = 1; j <= k; j++) { 669 for (ai = &a[j]; ai > a; ai -= m) { 670 aim = &ai[m]; 671 if (aim < ai) 672 break; /* wraparound */ 673 if (aim->value > ai[0].value || 674 (aim->value == ai[0].value && 675 aim->serial > ai[0].serial)) 676 break; 677 w.value = ai[0].value; 678 ai[0].value = aim->value; 679 aim->value = w.value; 680 w.serial = ai[0].serial; 681 ai[0].serial = aim->serial; 682 aim->serial = w.serial; 683 } 684 } 685 } 686 } 687 688 static void 689 unsort(struct line *f, int l, int *b) 690 { 691 int *a; 692 int i; 693 694 a = talloc((l + 1) * sizeof(int)); 695 for (i = 1; i <= l; i++) 696 a[f[i].serial] = f[i].value; 697 for (i = 1; i <= l; i++) 698 b[i] = a[i]; 699 free(a); 700 } 701 702 static int 703 skipline(int f) 704 { 705 int i, c; 706 707 for (i = 1; (c = getc(input[f])) != '\n'; i++) 708 if (c < 0) 709 return (i); 710 return (i); 711 } 712 713 static void 714 output(void) 715 { 716 int m; 717 int i0, i1, j1; 718 int j0; 719 input[0] = fopen(file1, "r"); 720 input[1] = fopen(file2, "r"); 721 m = len[0]; 722 J[0] = 0; 723 J[m + 1] = len[1] + 1; 724 if (opt != D_EDIT) { 725 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 726 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 727 i0++; 728 j0 = J[i0 - 1] + 1; 729 i1 = i0 - 1; 730 while (i1 < m && J[i1 + 1] == 0) 731 i1++; 732 j1 = J[i1 + 1] - 1; 733 J[i1] = j1; 734 change(i0, i1, j0, j1); 735 } 736 } else { 737 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 738 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 739 i0--; 740 j0 = J[i0 + 1] - 1; 741 i1 = i0 + 1; 742 while (i1 > 1 && J[i1 - 1] == 0) 743 i1--; 744 j1 = J[i1 - 1] + 1; 745 J[i1] = j1; 746 change(i1, i0, j1, j0); 747 } 748 } 749 if (m == 0) 750 change(1, 0, 1, len[1]); 751 if (opt == D_IFDEF) { 752 for (;;) { 753 #define c i0 754 c = getc(input[0]); 755 if (c < 0) 756 return; 757 putchar(c); 758 } 759 #undef c 760 } 761 if (anychange && opt == D_CONTEXT) 762 dump_context_vec(); 763 } 764 765 /* 766 * The following struct is used to record change information when 767 * doing a "context" diff. (see routine "change" to understand the 768 * highly mneumonic field names) 769 */ 770 struct context_vec { 771 int a; /* start line in old file */ 772 int b; /* end line in old file */ 773 int c; /* start line in new file */ 774 int d; /* end line in new file */ 775 }; 776 777 struct context_vec *context_vec_start, *context_vec_end, *context_vec_ptr; 778 779 #define MAX_CONTEXT 128 780 781 /* 782 * indicate that there is a difference between lines a and b of the from file 783 * to get to lines c to d of the to file. If a is greater then b then there 784 * are no lines in the from file involved and this means that there were 785 * lines appended (beginning at b). If c is greater than d then there are 786 * lines missing from the to file. 787 */ 788 static void 789 change(int a, int b, int c, int d) 790 { 791 struct stat stbuf; 792 793 if (opt != D_IFDEF && a > b && c > d) 794 return; 795 if (anychange == 0) { 796 anychange = 1; 797 if (opt == D_CONTEXT) { 798 printf("*** %s ", file1); 799 stat(file1, &stbuf); 800 printf("%s--- %s ", 801 ctime(&stbuf.st_mtime), file2); 802 stat(file2, &stbuf); 803 printf("%s", ctime(&stbuf.st_mtime)); 804 805 context_vec_start = talloc(MAX_CONTEXT * 806 sizeof(struct context_vec)); 807 context_vec_end = context_vec_start + MAX_CONTEXT; 808 context_vec_ptr = context_vec_start - 1; 809 } 810 } 811 if (opt == D_CONTEXT) { 812 /* 813 * if this new change is within 'context' lines of 814 * the previous change, just add it to the change 815 * record. If the record is full or if this 816 * change is more than 'context' lines from the previous 817 * change, dump the record, reset it & add the new change. 818 */ 819 if (context_vec_ptr >= context_vec_end || 820 (context_vec_ptr >= context_vec_start && 821 a > (context_vec_ptr->b + 2 * context) && 822 c > (context_vec_ptr->d + 2 * context))) 823 dump_context_vec(); 824 825 context_vec_ptr++; 826 context_vec_ptr->a = a; 827 context_vec_ptr->b = b; 828 context_vec_ptr->c = c; 829 context_vec_ptr->d = d; 830 return; 831 } 832 switch (opt) { 833 834 case D_NORMAL: 835 case D_EDIT: 836 range(a, b, ","); 837 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 838 if (opt == D_NORMAL) 839 range(c, d, ","); 840 putchar('\n'); 841 break; 842 case D_REVERSE: 843 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 844 range(a, b, " "); 845 putchar('\n'); 846 break; 847 case D_NREVERSE: 848 if (a > b) 849 printf("a%d %d\n", b, d - c + 1); 850 else { 851 printf("d%d %d\n", a, b - a + 1); 852 if (!(c > d)) 853 /* add changed lines */ 854 printf("a%d %d\n", b, d - c + 1); 855 } 856 break; 857 } 858 if (opt == D_NORMAL || opt == D_IFDEF) { 859 fetch(ixold, a, b, input[0], "< ", 1); 860 if (a <= b && c <= d && opt == D_NORMAL) 861 prints("---\n"); 862 } 863 fetch(ixnew, c, d, input[1], opt == D_NORMAL ? "> " : "", 0); 864 if ((opt == D_EDIT || opt == D_REVERSE) && c <= d) 865 prints(".\n"); 866 if (inifdef) { 867 fprintf(stdout, "#endif /* %s */\n", endifname); 868 inifdef = 0; 869 } 870 } 871 872 static void 873 range(int a, int b, char *separator) 874 { 875 printf("%d", a > b ? b : a); 876 if (a < b) { 877 printf("%s%d", separator, b); 878 } 879 } 880 881 static void 882 fetch(long *f, int a, int b, FILE *lb, char *s, int oldfile) 883 { 884 int i, j; 885 int c; 886 int col; 887 int nc; 888 int oneflag = (*ifdef1 != '\0') != (*ifdef2 != '\0'); 889 890 /* 891 * When doing #ifdef's, copy down to current line 892 * if this is the first file, so that stuff makes it to output. 893 */ 894 if (opt == D_IFDEF && oldfile) { 895 long curpos = ftell(lb); 896 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 897 nc = f[a > b ? b : a - 1] - curpos; 898 for (i = 0; i < nc; i++) 899 putchar(getc(lb)); 900 } 901 if (a > b) 902 return; 903 if (opt == D_IFDEF) { 904 if (inifdef) 905 fprintf(stdout, "#else /* %s%s */\n", oneflag && oldfile == 1 ? "!" : "", ifdef2); 906 else { 907 if (oneflag) { 908 /* There was only one ifdef given */ 909 endifname = ifdef2; 910 if (oldfile) 911 fprintf(stdout, "#ifndef %s\n", endifname); 912 else 913 fprintf(stdout, "#ifdef %s\n", endifname); 914 } else { 915 endifname = oldfile ? ifdef1 : ifdef2; 916 fprintf(stdout, "#ifdef %s\n", endifname); 917 } 918 } 919 inifdef = 1 + oldfile; 920 } 921 for (i = a; i <= b; i++) { 922 fseek(lb, f[i - 1], 0); 923 nc = f[i] - f[i - 1]; 924 if (opt != D_IFDEF) 925 prints(s); 926 col = 0; 927 for (j = 0; j < nc; j++) { 928 c = getc(lb); 929 if (c == '\t' && tflag) 930 do 931 putchar(' '); 932 while (++col & 7); 933 else { 934 putchar(c); 935 col++; 936 } 937 } 938 } 939 940 if (inifdef && !wantelses) { 941 fprintf(stdout, "#endif /* %s */\n", endifname); 942 inifdef = 0; 943 } 944 } 945 946 #define POW2 /* define only if HALFLONG is 2**n */ 947 #define HALFLONG 16 948 #define low(x) (x&((1L<<HALFLONG)-1)) 949 #define high(x) (x>>HALFLONG) 950 951 /* 952 * hashing has the effect of 953 * arranging line in 7-bit bytes and then 954 * summing 1-s complement in 16-bit hunks 955 */ 956 static int 957 readhash(FILE *f) 958 { 959 long sum; 960 unsigned shift; 961 int t; 962 int space; 963 964 sum = 1; 965 space = 0; 966 if (!bflag && !wflag) { 967 if (iflag) 968 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 969 if (t == -1) 970 return (0); 971 sum += (long)chrtran[t] << (shift 972 #ifdef POW2 973 &= HALFLONG - 1); 974 #else 975 %= HALFLONG); 976 #endif 977 } 978 else 979 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 980 if (t == -1) 981 return (0); 982 sum += (long)t << (shift 983 #ifdef POW2 984 &= HALFLONG - 1); 985 #else 986 %= HALFLONG); 987 #endif 988 } 989 } else { 990 for (shift = 0;;) { 991 switch (t = getc(f)) { 992 case -1: 993 return (0); 994 case '\t': 995 case ' ': 996 space++; 997 continue; 998 default: 999 if (space && !wflag) { 1000 shift += 7; 1001 space = 0; 1002 } 1003 sum += (long)chrtran[t] << (shift 1004 #ifdef POW2 1005 &= HALFLONG - 1); 1006 #else 1007 %= HALFLONG); 1008 #endif 1009 shift += 7; 1010 continue; 1011 case '\n': 1012 break; 1013 } 1014 break; 1015 } 1016 } 1017 sum = low(sum) + high(sum); 1018 return ((short) low(sum) + (short) high(sum)); 1019 } 1020 1021 static int 1022 asciifile(FILE *f) 1023 { 1024 char buf[BUFSIZ]; 1025 int cnt; 1026 char *cp; 1027 1028 fseek(f, 0, 0); 1029 cnt = fread(buf, 1, BUFSIZ, f); 1030 cp = buf; 1031 while (--cnt >= 0) 1032 if (*cp++ & 0200) 1033 return (0); 1034 return (1); 1035 } 1036 1037 1038 /* dump accumulated "context" diff changes */ 1039 static void 1040 dump_context_vec(void) 1041 { 1042 int a, b, c, d; 1043 char ch; 1044 struct context_vec *cvp = context_vec_start; 1045 int lowa, upb, lowc, upd; 1046 int do_output; 1047 1048 if (cvp > context_vec_ptr) 1049 return; 1050 1051 b = d = 0; /* gcc */ 1052 lowa = max(1, cvp->a - context); 1053 upb = min(len[0], context_vec_ptr->b + context); 1054 lowc = max(1, cvp->c - context); 1055 upd = min(len[1], context_vec_ptr->d + context); 1056 1057 printf("***************\n*** "); 1058 range(lowa, upb, ","); 1059 printf(" ****\n"); 1060 1061 /* 1062 * output changes to the "old" file. The first loop suppresses 1063 * output if there were no changes to the "old" file (we'll see 1064 * the "old" lines as context in the "new" list). 1065 */ 1066 do_output = 0; 1067 for (; cvp <= context_vec_ptr; cvp++) 1068 if (cvp->a <= cvp->b) { 1069 cvp = context_vec_start; 1070 do_output++; 1071 break; 1072 } 1073 if (do_output) { 1074 while (cvp <= context_vec_ptr) { 1075 a = cvp->a; 1076 b = cvp->b; 1077 c = cvp->c; 1078 d = cvp->d; 1079 1080 if (a <= b && c <= d) 1081 ch = 'c'; 1082 else 1083 ch = (a <= b) ? 'd' : 'a'; 1084 1085 if (ch == 'a') 1086 fetch(ixold, lowa, b, input[0], " ", 0); 1087 else { 1088 fetch(ixold, lowa, a - 1, input[0], " ", 0); 1089 fetch(ixold, a, b, input[0], ch == 'c' ? "! " : "- ", 0); 1090 } 1091 lowa = b + 1; 1092 cvp++; 1093 } 1094 fetch(ixold, b + 1, upb, input[0], " ", 0); 1095 } 1096 /* output changes to the "new" file */ 1097 printf("--- "); 1098 range(lowc, upd, ","); 1099 printf(" ----\n"); 1100 1101 do_output = 0; 1102 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1103 if (cvp->c <= cvp->d) { 1104 cvp = context_vec_start; 1105 do_output++; 1106 break; 1107 } 1108 if (do_output) { 1109 while (cvp <= context_vec_ptr) { 1110 a = cvp->a; 1111 b = cvp->b; 1112 c = cvp->c; 1113 d = cvp->d; 1114 1115 if (a <= b && c <= d) 1116 ch = 'c'; 1117 else 1118 ch = (a <= b) ? 'd' : 'a'; 1119 1120 if (ch == 'd') 1121 fetch(ixnew, lowc, d, input[1], " ", 0); 1122 else { 1123 fetch(ixnew, lowc, c - 1, input[1], " ", 0); 1124 fetch(ixnew, c, d, input[1], ch == 'c' ? "! " : "+ ", 0); 1125 } 1126 lowc = d + 1; 1127 cvp++; 1128 } 1129 fetch(ixnew, d + 1, upd, input[1], " ", 0); 1130 } 1131 context_vec_ptr = context_vec_start - 1; 1132 } 1133