1 /* $OpenBSD: diffreg.c,v 1.10 2003/06/25 03:46:45 deraadt Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 #include <sys/types.h> 38 39 #include <stdlib.h> 40 #include <unistd.h> 41 #include <fcntl.h> 42 #include <string.h> 43 44 #include "diff.h" 45 #include "pathnames.h" 46 47 #if 0 48 static char const sccsid[] = "@(#)diffreg.c 4.21 4/6/90"; 49 #endif 50 51 /* 52 * diff - compare two files. 53 */ 54 55 /* 56 * Uses an algorithm due to Harold Stone, which finds 57 * a pair of longest identical subsequences in the two 58 * files. 59 * 60 * The major goal is to generate the match vector J. 61 * J[i] is the index of the line in file1 corresponding 62 * to line i file0. J[i] = 0 if there is no 63 * such line in file1. 64 * 65 * Lines are hashed so as to work in core. All potential 66 * matches are located by sorting the lines of each file 67 * on the hash (called ``value''). In particular, this 68 * collects the equivalence classes in file1 together. 69 * Subroutine equiv replaces the value of each line in 70 * file0 by the index of the first element of its 71 * matching equivalence in (the reordered) file1. 72 * To save space equiv squeezes file1 into a single 73 * array member in which the equivalence classes 74 * are simply concatenated, except that their first 75 * members are flagged by changing sign. 76 * 77 * Next the indices that point into member are unsorted into 78 * array class according to the original order of file0. 79 * 80 * The cleverness lies in routine stone. This marches 81 * through the lines of file0, developing a vector klist 82 * of "k-candidates". At step i a k-candidate is a matched 83 * pair of lines x,y (x in file0 y in file1) such that 84 * there is a common subsequence of length k 85 * between the first i lines of file0 and the first y 86 * lines of file1, but there is no such subsequence for 87 * any smaller y. x is the earliest possible mate to y 88 * that occurs in such a subsequence. 89 * 90 * Whenever any of the members of the equivalence class of 91 * lines in file1 matable to a line in file0 has serial number 92 * less than the y of some k-candidate, that k-candidate 93 * with the smallest such y is replaced. The new 94 * k-candidate is chained (via pred) to the current 95 * k-1 candidate so that the actual subsequence can 96 * be recovered. When a member has serial number greater 97 * that the y of all k-candidates, the klist is extended. 98 * At the end, the longest subsequence is pulled out 99 * and placed in the array J by unravel 100 * 101 * With J in hand, the matches there recorded are 102 * check'ed against reality to assure that no spurious 103 * matches have crept in due to hashing. If they have, 104 * they are broken, and "jackpot" is recorded--a harmless 105 * matter except that a true match for a spuriously 106 * mated line may now be unnecessarily reported as a change. 107 * 108 * Much of the complexity of the program comes simply 109 * from trying to minimize core utilization and 110 * maximize the range of doable problems by dynamically 111 * allocating what is needed and reusing what is not. 112 * The core requirements for problems larger than somewhat 113 * are (in words) 2*length(file0) + length(file1) + 114 * 3*(number of k-candidates installed), typically about 115 * 6n words for files of length n. 116 */ 117 118 #define prints(s) fputs(s,stdout) 119 120 FILE *input[2]; 121 122 struct cand { 123 int x; 124 int y; 125 int pred; 126 } cand; 127 128 struct line { 129 int serial; 130 int value; 131 } *file[2]; 132 133 int len[2]; 134 struct line *sfile[2]; /* shortened by pruning common prefix and suffix */ 135 int slen[2]; 136 int pref, suff; /* length of prefix and suffix */ 137 int *class; /* will be overlaid on file[0] */ 138 int *member; /* will be overlaid on file[1] */ 139 int *klist; /* will be overlaid on file[0] after class */ 140 struct cand *clist; /* merely a free storage pot for candidates */ 141 int clen = 0; 142 int *J; /* will be overlaid on class */ 143 long *ixold; /* will be overlaid on klist */ 144 long *ixnew; /* will be overlaid on file[1] */ 145 u_char *chrtran; /* translation table for case-folding */ 146 147 static void fetch(long *, int, int, FILE *, char *, int); 148 static void output(void); 149 static void check(void); 150 static void range(int, int, char *); 151 static void dump_context_vec(void); 152 static void prepare(int, FILE *); 153 static void prune(void); 154 static void equiv(struct line *, int, struct line *, int, int *); 155 static void unravel(int); 156 static void unsort(struct line *, int, int *); 157 static void change(int, int, int, int); 158 static void sort(struct line *, int); 159 static int newcand(int, int, int); 160 static int search(int *, int, int); 161 static int skipline(int); 162 static int asciifile(FILE *); 163 static int stone(int *, int, int *, int *); 164 static int readhash(FILE *); 165 166 /* 167 * chrtran points to one of 2 translation tables: cup2low if folding upper to 168 * lower case clow2low if not folding case 169 */ 170 u_char clow2low[256] = { 171 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 172 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 173 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 174 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 175 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 176 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 177 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 178 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 179 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 180 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 181 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 182 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 183 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 184 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 185 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 186 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 187 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 188 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 189 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 190 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 191 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 192 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 193 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 194 0xfd, 0xfe, 0xff 195 }; 196 197 u_char cup2low[256] = { 198 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 199 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 200 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 201 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 202 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 203 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 204 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 205 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 206 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 207 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 208 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 209 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 210 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 211 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 212 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 213 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 214 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 215 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 216 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 217 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 218 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 219 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 220 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 221 0xfd, 0xfe, 0xff 222 }; 223 224 void 225 diffreg(void) 226 { 227 char buf1[BUFSIZ], buf2[BUFSIZ]; 228 FILE *f1, *f2; 229 int i, j; 230 231 if (hflag) { 232 diffargv[0] = "diffh"; 233 execv(diffh, diffargv); 234 fprintf(stderr, "diff: "); 235 perror(diffh); 236 done(0); 237 } 238 chrtran = (iflag ? cup2low : clow2low); 239 if ((stb1.st_mode & S_IFMT) == S_IFDIR) { 240 file1 = splice(file1, file2); 241 if (stat(file1, &stb1) < 0) { 242 fprintf(stderr, "diff: "); 243 perror(file1); 244 done(0); 245 } 246 } else if ((stb2.st_mode & S_IFMT) == S_IFDIR) { 247 file2 = splice(file2, file1); 248 if (stat(file2, &stb2) < 0) { 249 fprintf(stderr, "diff: "); 250 perror(file2); 251 done(0); 252 } 253 } else if ((stb1.st_mode & S_IFMT) != S_IFREG || !strcmp(file1, "-")) { 254 if (!strcmp(file2, "-")) { 255 fprintf(stderr, "diff: can't specify - -\n"); 256 done(0); 257 } 258 file1 = copytemp(); 259 if (stat(file1, &stb1) < 0) { 260 fprintf(stderr, "diff: "); 261 perror(file1); 262 done(0); 263 } 264 } else if ((stb2.st_mode & S_IFMT) != S_IFREG || !strcmp(file2, "-")) { 265 file2 = copytemp(); 266 if (stat(file2, &stb2) < 0) { 267 fprintf(stderr, "diff: "); 268 perror(file2); 269 done(0); 270 } 271 } 272 if ((f1 = fopen(file1, "r")) == NULL) { 273 fprintf(stderr, "diff: "); 274 perror(file1); 275 done(0); 276 } 277 if ((f2 = fopen(file2, "r")) == NULL) { 278 fprintf(stderr, "diff: "); 279 perror(file2); 280 fclose(f1); 281 done(0); 282 } 283 if (stb1.st_size != stb2.st_size) 284 goto notsame; 285 for (;;) { 286 i = fread(buf1, 1, BUFSIZ, f1); 287 j = fread(buf2, 1, BUFSIZ, f2); 288 if (i < 0 || j < 0 || i != j) 289 goto notsame; 290 if (i == 0 && j == 0) { 291 fclose(f1); 292 fclose(f2); 293 status = 0; /* files don't differ */ 294 goto same; 295 } 296 for (j = 0; j < i; j++) 297 if (buf1[j] != buf2[j]) 298 goto notsame; 299 } 300 notsame: 301 /* 302 * Files certainly differ at this point; set status accordingly 303 */ 304 status = 1; 305 if (!asciifile(f1) || !asciifile(f2)) { 306 printf("Binary files %s and %s differ\n", file1, file2); 307 fclose(f1); 308 fclose(f2); 309 done(0); 310 } 311 prepare(0, f1); 312 prepare(1, f2); 313 fclose(f1); 314 fclose(f2); 315 prune(); 316 sort(sfile[0], slen[0]); 317 sort(sfile[1], slen[1]); 318 319 member = (int *)file[1]; 320 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 321 member = ralloc(member, (slen[1] + 2) * sizeof(int)); 322 323 class = (int *)file[0]; 324 unsort(sfile[0], slen[0], class); 325 class = ralloc(class, (slen[0] + 2) * sizeof(int)); 326 327 klist = talloc((slen[0] + 2) * sizeof(int)); 328 clist = talloc(sizeof(cand)); 329 i = stone(class, slen[0], member, klist); 330 free(member); 331 free(class); 332 333 J = talloc((len[0] + 2) * sizeof(int)); 334 unravel(klist[i]); 335 free(clist); 336 free(klist); 337 338 ixold = talloc((len[0] + 2) * sizeof(long)); 339 ixnew = talloc((len[1] + 2) * sizeof(long)); 340 check(); 341 output(); 342 status = anychange; 343 same: 344 if (opt == D_CONTEXT && anychange == 0) 345 printf("No differences encountered\n"); 346 done(0); 347 } 348 349 char *tempfile = _PATH_TMP; 350 351 char * 352 copytemp(void) 353 { 354 char buf[BUFSIZ]; 355 int i, f; 356 357 signal(SIGHUP, catchsig); 358 signal(SIGINT, catchsig); 359 signal(SIGPIPE, catchsig); 360 signal(SIGTERM, catchsig); 361 f = mkstemp(tempfile); 362 if (f < 0) { 363 fprintf(stderr, "diff: "); 364 perror(tempfile); 365 done(0); 366 } 367 while ((i = read(0, buf, BUFSIZ)) > 0) 368 if (write(f, buf, i) != i) { 369 fprintf(stderr, "diff: "); 370 perror(tempfile); 371 done(0); 372 } 373 close(f); 374 return (tempfile); 375 } 376 377 char * 378 splice(char *dir, char *file) 379 { 380 char *tail, buf[BUFSIZ]; 381 382 if (!strcmp(file, "-")) { 383 fprintf(stderr, "diff: can't specify - with other arg directory\n"); 384 done(0); 385 } 386 tail = strrchr(file, '/'); 387 if (tail == 0) 388 tail = file; 389 else 390 tail++; 391 snprintf(buf, sizeof buf, "%s/%s", dir, tail); 392 return (strdup(buf)); 393 } 394 395 static void 396 prepare(int i, FILE *fd) 397 { 398 struct line *p; 399 int j, h; 400 401 fseek(fd, 0, 0); 402 p = talloc(3 * sizeof(struct line)); 403 for (j = 0; (h = readhash(fd));) { 404 p = ralloc(p, (++j + 3) * sizeof(struct line)); 405 p[j].value = h; 406 } 407 len[i] = j; 408 file[i] = p; 409 } 410 411 static void 412 prune(void) 413 { 414 int i, j; 415 416 for (pref = 0; pref < len[0] && pref < len[1] && 417 file[0][pref + 1].value == file[1][pref + 1].value; 418 pref++); 419 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 420 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 421 suff++); 422 for (j = 0; j < 2; j++) { 423 sfile[j] = file[j] + pref; 424 slen[j] = len[j] - pref - suff; 425 for (i = 0; i <= slen[j]; i++) 426 sfile[j][i].serial = i; 427 } 428 } 429 430 static void 431 equiv(struct line *a, int n, struct line *b, int m, int *c) 432 { 433 int i, j; 434 435 i = j = 1; 436 while (i <= n && j <= m) { 437 if (a[i].value < b[j].value) 438 a[i++].value = 0; 439 else if (a[i].value == b[j].value) 440 a[i++].value = j; 441 else 442 j++; 443 } 444 while (i <= n) 445 a[i++].value = 0; 446 b[m + 1].value = 0; 447 j = 0; 448 while (++j <= m) { 449 c[j] = -b[j].serial; 450 while (b[j + 1].value == b[j].value) { 451 j++; 452 c[j] = b[j].serial; 453 } 454 } 455 c[j] = -1; 456 } 457 458 static int 459 stone(int *a, int n, int *b, int *c) 460 { 461 int i, k, y, j, l; 462 int oldc, tc, oldl; 463 464 k = 0; 465 c[0] = newcand(0, 0, 0); 466 for (i = 1; i <= n; i++) { 467 j = a[i]; 468 if (j == 0) 469 continue; 470 y = -b[j]; 471 oldl = 0; 472 oldc = c[0]; 473 do { 474 if (y <= clist[oldc].y) 475 continue; 476 l = search(c, k, y); 477 if (l != oldl + 1) 478 oldc = c[l - 1]; 479 if (l <= k) { 480 if (clist[c[l]].y <= y) 481 continue; 482 tc = c[l]; 483 c[l] = newcand(i, y, oldc); 484 oldc = tc; 485 oldl = l; 486 } else { 487 c[l] = newcand(i, y, oldc); 488 k++; 489 break; 490 } 491 } while ((y = b[++j]) > 0); 492 } 493 return (k); 494 } 495 496 static int 497 newcand(int x, int y, int pred) 498 { 499 struct cand *q; 500 501 clist = ralloc(clist, ++clen * sizeof(cand)); 502 q = clist + clen - 1; 503 q->x = x; 504 q->y = y; 505 q->pred = pred; 506 return (clen - 1); 507 } 508 509 static int 510 search(int *c, int k, int y) 511 { 512 int i, j, l, t; 513 514 if (clist[c[k]].y < y) /* quick look for typical case */ 515 return (k + 1); 516 i = 0; 517 j = k + 1; 518 while (1) { 519 l = i + j; 520 if ((l >>= 1) <= i) 521 break; 522 t = clist[c[l]].y; 523 if (t > y) 524 j = l; 525 else if (t < y) 526 i = l; 527 else 528 return (l); 529 } 530 return (l + 1); 531 } 532 533 static void 534 unravel(int p) 535 { 536 struct cand *q; 537 int i; 538 539 for (i = 0; i <= len[0]; i++) 540 J[i] = i <= pref ? i : 541 i > len[0] - suff ? i + len[1] - len[0] : 542 0; 543 for (q = clist + p; q->y != 0; q = clist + q->pred) 544 J[q->x + pref] = q->y + pref; 545 } 546 547 /* 548 * check does double duty: 1. ferret out any fortuitous correspondences due 549 * to confounding by hashing (which result in "jackpot") 2. collect random 550 * access indexes to the two files 551 */ 552 553 static void 554 check(void) 555 { 556 int i, j, jackpot, c, d; 557 long ctold, ctnew; 558 559 if ((input[0] = fopen(file1, "r")) == NULL) { 560 perror(file1); 561 done(0); 562 } 563 if ((input[1] = fopen(file2, "r")) == NULL) { 564 perror(file2); 565 done(0); 566 } 567 j = 1; 568 ixold[0] = ixnew[0] = 0; 569 jackpot = 0; 570 ctold = ctnew = 0; 571 for (i = 1; i <= len[0]; i++) { 572 if (J[i] == 0) { 573 ixold[i] = ctold += skipline(0); 574 continue; 575 } 576 while (j < J[i]) { 577 ixnew[j] = ctnew += skipline(1); 578 j++; 579 } 580 if (bflag || wflag || iflag) { 581 for (;;) { 582 c = getc(input[0]); 583 d = getc(input[1]); 584 ctold++; 585 ctnew++; 586 if (bflag && isspace(c) && isspace(d)) { 587 do { 588 if (c == '\n') 589 break; 590 ctold++; 591 } while (isspace(c = getc(input[0]))); 592 do { 593 if (d == '\n') 594 break; 595 ctnew++; 596 } while (isspace(d = getc(input[1]))); 597 } else if (wflag) { 598 while (isspace(c) && c != '\n') { 599 c = getc(input[0]); 600 ctold++; 601 } 602 while (isspace(d) && d != '\n') { 603 d = getc(input[1]); 604 ctnew++; 605 } 606 } 607 if (chrtran[c] != chrtran[d]) { 608 jackpot++; 609 J[i] = 0; 610 if (c != '\n') 611 ctold += skipline(0); 612 if (d != '\n') 613 ctnew += skipline(1); 614 break; 615 } 616 if (c == '\n') 617 break; 618 } 619 } else { 620 for (;;) { 621 ctold++; 622 ctnew++; 623 if ((c = getc(input[0])) != (d = getc(input[1]))) { 624 /* jackpot++; */ 625 J[i] = 0; 626 if (c != '\n') 627 ctold += skipline(0); 628 if (d != '\n') 629 ctnew += skipline(1); 630 break; 631 } 632 if (c == '\n') 633 break; 634 } 635 } 636 ixold[i] = ctold; 637 ixnew[j] = ctnew; 638 j++; 639 } 640 for (; j <= len[1]; j++) { 641 ixnew[j] = ctnew += skipline(1); 642 } 643 fclose(input[0]); 644 fclose(input[1]); 645 /* 646 * if (jackpot) 647 * fprintf(stderr, "jackpot\n"); 648 */ 649 } 650 651 /* shellsort CACM #201 */ 652 static void 653 sort(struct line *a, int n) 654 { 655 struct line *ai, *aim, w; 656 int j, m = 0, k; 657 658 if (n == 0) 659 return; 660 for (j = 1; j <= n; j *= 2) 661 m = 2 * j - 1; 662 for (m /= 2; m != 0; m /= 2) { 663 k = n - m; 664 for (j = 1; j <= k; j++) { 665 for (ai = &a[j]; ai > a; ai -= m) { 666 aim = &ai[m]; 667 if (aim < ai) 668 break; /* wraparound */ 669 if (aim->value > ai[0].value || 670 (aim->value == ai[0].value && 671 aim->serial > ai[0].serial)) 672 break; 673 w.value = ai[0].value; 674 ai[0].value = aim->value; 675 aim->value = w.value; 676 w.serial = ai[0].serial; 677 ai[0].serial = aim->serial; 678 aim->serial = w.serial; 679 } 680 } 681 } 682 } 683 684 static void 685 unsort(struct line *f, int l, int *b) 686 { 687 int *a, i; 688 689 a = talloc((l + 1) * sizeof(int)); 690 for (i = 1; i <= l; i++) 691 a[f[i].serial] = f[i].value; 692 for (i = 1; i <= l; i++) 693 b[i] = a[i]; 694 free(a); 695 } 696 697 static int 698 skipline(int f) 699 { 700 int i, c; 701 702 for (i = 1; (c = getc(input[f])) != '\n'; i++) 703 if (c < 0) 704 return (i); 705 return (i); 706 } 707 708 static void 709 output(void) 710 { 711 int m, i0, i1, j0, j1; 712 713 input[0] = fopen(file1, "r"); 714 input[1] = fopen(file2, "r"); 715 m = len[0]; 716 J[0] = 0; 717 J[m + 1] = len[1] + 1; 718 if (opt != D_EDIT) { 719 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 720 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 721 i0++; 722 j0 = J[i0 - 1] + 1; 723 i1 = i0 - 1; 724 while (i1 < m && J[i1 + 1] == 0) 725 i1++; 726 j1 = J[i1 + 1] - 1; 727 J[i1] = j1; 728 change(i0, i1, j0, j1); 729 } 730 } else { 731 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 732 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 733 i0--; 734 j0 = J[i0 + 1] - 1; 735 i1 = i0 + 1; 736 while (i1 > 1 && J[i1 - 1] == 0) 737 i1--; 738 j1 = J[i1 - 1] + 1; 739 J[i1] = j1; 740 change(i1, i0, j1, j0); 741 } 742 } 743 if (m == 0) 744 change(1, 0, 1, len[1]); 745 if (opt == D_IFDEF) { 746 for (;;) { 747 #define c i0 748 c = getc(input[0]); 749 if (c < 0) 750 return; 751 putchar(c); 752 } 753 #undef c 754 } 755 if (anychange && opt == D_CONTEXT) 756 dump_context_vec(); 757 } 758 759 /* 760 * The following struct is used to record change information when 761 * doing a "context" diff. (see routine "change" to understand the 762 * highly mneumonic field names) 763 */ 764 struct context_vec { 765 int a; /* start line in old file */ 766 int b; /* end line in old file */ 767 int c; /* start line in new file */ 768 int d; /* end line in new file */ 769 }; 770 771 struct context_vec *context_vec_start, *context_vec_end, *context_vec_ptr; 772 773 #define MAX_CONTEXT 128 774 775 /* 776 * indicate that there is a difference between lines a and b of the from file 777 * to get to lines c to d of the to file. If a is greater then b then there 778 * are no lines in the from file involved and this means that there were 779 * lines appended (beginning at b). If c is greater than d then there are 780 * lines missing from the to file. 781 */ 782 static void 783 change(int a, int b, int c, int d) 784 { 785 struct stat stbuf; 786 787 if (opt != D_IFDEF && a > b && c > d) 788 return; 789 if (anychange == 0) { 790 anychange = 1; 791 if (opt == D_CONTEXT) { 792 printf("*** %s ", file1); 793 stat(file1, &stbuf); 794 printf("%s--- %s ", 795 ctime(&stbuf.st_mtime), file2); 796 stat(file2, &stbuf); 797 printf("%s", ctime(&stbuf.st_mtime)); 798 799 context_vec_start = talloc(MAX_CONTEXT * 800 sizeof(struct context_vec)); 801 context_vec_end = context_vec_start + MAX_CONTEXT; 802 context_vec_ptr = context_vec_start - 1; 803 } 804 } 805 if (opt == D_CONTEXT) { 806 /* 807 * if this new change is within 'context' lines of 808 * the previous change, just add it to the change 809 * record. If the record is full or if this 810 * change is more than 'context' lines from the previous 811 * change, dump the record, reset it & add the new change. 812 */ 813 if (context_vec_ptr >= context_vec_end || 814 (context_vec_ptr >= context_vec_start && 815 a > (context_vec_ptr->b + 2 * context) && 816 c > (context_vec_ptr->d + 2 * context))) 817 dump_context_vec(); 818 819 context_vec_ptr++; 820 context_vec_ptr->a = a; 821 context_vec_ptr->b = b; 822 context_vec_ptr->c = c; 823 context_vec_ptr->d = d; 824 return; 825 } 826 switch (opt) { 827 828 case D_NORMAL: 829 case D_EDIT: 830 range(a, b, ","); 831 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 832 if (opt == D_NORMAL) 833 range(c, d, ","); 834 putchar('\n'); 835 break; 836 case D_REVERSE: 837 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 838 range(a, b, " "); 839 putchar('\n'); 840 break; 841 case D_NREVERSE: 842 if (a > b) 843 printf("a%d %d\n", b, d - c + 1); 844 else { 845 printf("d%d %d\n", a, b - a + 1); 846 if (!(c > d)) 847 /* add changed lines */ 848 printf("a%d %d\n", b, d - c + 1); 849 } 850 break; 851 } 852 if (opt == D_NORMAL || opt == D_IFDEF) { 853 fetch(ixold, a, b, input[0], "< ", 1); 854 if (a <= b && c <= d && opt == D_NORMAL) 855 prints("---\n"); 856 } 857 fetch(ixnew, c, d, input[1], opt == D_NORMAL ? "> " : "", 0); 858 if ((opt == D_EDIT || opt == D_REVERSE) && c <= d) 859 prints(".\n"); 860 if (inifdef) { 861 fprintf(stdout, "#endif /* %s */\n", endifname); 862 inifdef = 0; 863 } 864 } 865 866 static void 867 range(int a, int b, char *separator) 868 { 869 printf("%d", a > b ? b : a); 870 if (a < b) 871 printf("%s%d", separator, b); 872 } 873 874 static void 875 fetch(long *f, int a, int b, FILE *lb, char *s, int oldfile) 876 { 877 int oneflag = (*ifdef1 != '\0') != (*ifdef2 != '\0'); 878 int i, j, c, col, nc; 879 880 /* 881 * When doing #ifdef's, copy down to current line 882 * if this is the first file, so that stuff makes it to output. 883 */ 884 if (opt == D_IFDEF && oldfile) { 885 long curpos = ftell(lb); 886 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 887 nc = f[a > b ? b : a - 1] - curpos; 888 for (i = 0; i < nc; i++) 889 putchar(getc(lb)); 890 } 891 if (a > b) 892 return; 893 if (opt == D_IFDEF) { 894 if (inifdef) 895 fprintf(stdout, "#else /* %s%s */\n", 896 oneflag && oldfile == 1 ? "!" : "", ifdef2); 897 else { 898 if (oneflag) { 899 /* There was only one ifdef given */ 900 endifname = ifdef2; 901 if (oldfile) 902 fprintf(stdout, "#ifndef %s\n", endifname); 903 else 904 fprintf(stdout, "#ifdef %s\n", endifname); 905 } else { 906 endifname = oldfile ? ifdef1 : ifdef2; 907 fprintf(stdout, "#ifdef %s\n", endifname); 908 } 909 } 910 inifdef = 1 + oldfile; 911 } 912 for (i = a; i <= b; i++) { 913 fseek(lb, f[i - 1], 0); 914 nc = f[i] - f[i - 1]; 915 if (opt != D_IFDEF) 916 prints(s); 917 col = 0; 918 for (j = 0; j < nc; j++) { 919 c = getc(lb); 920 if (c == '\t' && tflag) 921 do 922 putchar(' '); 923 while (++col & 7); 924 else { 925 putchar(c); 926 col++; 927 } 928 } 929 } 930 931 if (inifdef && !wantelses) { 932 fprintf(stdout, "#endif /* %s */\n", endifname); 933 inifdef = 0; 934 } 935 } 936 937 #define POW2 /* define only if HALFLONG is 2**n */ 938 #define HALFLONG 16 939 #define low(x) (x&((1L<<HALFLONG)-1)) 940 #define high(x) (x>>HALFLONG) 941 942 /* 943 * hashing has the effect of 944 * arranging line in 7-bit bytes and then 945 * summing 1-s complement in 16-bit hunks 946 */ 947 static int 948 readhash(FILE *f) 949 { 950 unsigned int shift; 951 int t, space; 952 long sum; 953 954 sum = 1; 955 space = 0; 956 if (!bflag && !wflag) { 957 if (iflag) 958 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 959 if (t == -1) 960 return (0); 961 sum += (long)chrtran[t] << (shift 962 #ifdef POW2 963 &= HALFLONG - 1); 964 #else 965 %= HALFLONG); 966 #endif 967 } 968 else 969 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 970 if (t == -1) 971 return (0); 972 sum += (long)t << (shift 973 #ifdef POW2 974 &= HALFLONG - 1); 975 #else 976 %= HALFLONG); 977 #endif 978 } 979 } else { 980 for (shift = 0;;) { 981 switch (t = getc(f)) { 982 case -1: 983 return (0); 984 case '\t': 985 case ' ': 986 space++; 987 continue; 988 default: 989 if (space && !wflag) { 990 shift += 7; 991 space = 0; 992 } 993 sum += (long)chrtran[t] << (shift 994 #ifdef POW2 995 &= HALFLONG - 1); 996 #else 997 %= HALFLONG); 998 #endif 999 shift += 7; 1000 continue; 1001 case '\n': 1002 break; 1003 } 1004 break; 1005 } 1006 } 1007 sum = low(sum) + high(sum); 1008 return ((short) low(sum) + (short) high(sum)); 1009 } 1010 1011 static int 1012 asciifile(FILE *f) 1013 { 1014 char buf[BUFSIZ], *cp; 1015 int cnt; 1016 1017 fseek(f, 0, 0); 1018 cnt = fread(buf, 1, BUFSIZ, f); 1019 cp = buf; 1020 while (--cnt >= 0) 1021 if (*cp++ & 0200) 1022 return (0); 1023 return (1); 1024 } 1025 1026 1027 /* dump accumulated "context" diff changes */ 1028 static void 1029 dump_context_vec(void) 1030 { 1031 struct context_vec *cvp = context_vec_start; 1032 int lowa, upb, lowc, upd, do_output; 1033 int a, b, c, d; 1034 char ch; 1035 1036 if (cvp > context_vec_ptr) 1037 return; 1038 1039 b = d = 0; /* gcc */ 1040 lowa = max(1, cvp->a - context); 1041 upb = min(len[0], context_vec_ptr->b + context); 1042 lowc = max(1, cvp->c - context); 1043 upd = min(len[1], context_vec_ptr->d + context); 1044 1045 printf("***************\n*** "); 1046 range(lowa, upb, ","); 1047 printf(" ****\n"); 1048 1049 /* 1050 * output changes to the "old" file. The first loop suppresses 1051 * output if there were no changes to the "old" file (we'll see 1052 * the "old" lines as context in the "new" list). 1053 */ 1054 do_output = 0; 1055 for (; cvp <= context_vec_ptr; cvp++) 1056 if (cvp->a <= cvp->b) { 1057 cvp = context_vec_start; 1058 do_output++; 1059 break; 1060 } 1061 if (do_output) { 1062 while (cvp <= context_vec_ptr) { 1063 a = cvp->a; 1064 b = cvp->b; 1065 c = cvp->c; 1066 d = cvp->d; 1067 1068 if (a <= b && c <= d) 1069 ch = 'c'; 1070 else 1071 ch = (a <= b) ? 'd' : 'a'; 1072 1073 if (ch == 'a') 1074 fetch(ixold, lowa, b, input[0], " ", 0); 1075 else { 1076 fetch(ixold, lowa, a - 1, input[0], " ", 0); 1077 fetch(ixold, a, b, input[0], 1078 ch == 'c' ? "! " : "- ", 0); 1079 } 1080 lowa = b + 1; 1081 cvp++; 1082 } 1083 fetch(ixold, b + 1, upb, input[0], " ", 0); 1084 } 1085 /* output changes to the "new" file */ 1086 printf("--- "); 1087 range(lowc, upd, ","); 1088 printf(" ----\n"); 1089 1090 do_output = 0; 1091 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1092 if (cvp->c <= cvp->d) { 1093 cvp = context_vec_start; 1094 do_output++; 1095 break; 1096 } 1097 if (do_output) { 1098 while (cvp <= context_vec_ptr) { 1099 a = cvp->a; 1100 b = cvp->b; 1101 c = cvp->c; 1102 d = cvp->d; 1103 1104 if (a <= b && c <= d) 1105 ch = 'c'; 1106 else 1107 ch = (a <= b) ? 'd' : 'a'; 1108 1109 if (ch == 'd') 1110 fetch(ixnew, lowc, d, input[1], " ", 0); 1111 else { 1112 fetch(ixnew, lowc, c - 1, input[1], " ", 0); 1113 fetch(ixnew, c, d, input[1], 1114 ch == 'c' ? "! " : "+ ", 0); 1115 } 1116 lowc = d + 1; 1117 cvp++; 1118 } 1119 fetch(ixnew, d + 1, upd, input[1], " ", 0); 1120 } 1121 context_vec_ptr = context_vec_start - 1; 1122 } 1123