1 /* $OpenBSD: diffreg.c,v 1.14 2003/06/25 07:26:59 tedu Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 #include <sys/types.h> 38 39 #include <stdlib.h> 40 #include <unistd.h> 41 #include <fcntl.h> 42 #include <string.h> 43 44 #include "diff.h" 45 #include "pathnames.h" 46 47 #if 0 48 static char const sccsid[] = "@(#)diffreg.c 4.21 4/6/90"; 49 #endif 50 51 /* 52 * diff - compare two files. 53 */ 54 55 /* 56 * Uses an algorithm due to Harold Stone, which finds 57 * a pair of longest identical subsequences in the two 58 * files. 59 * 60 * The major goal is to generate the match vector J. 61 * J[i] is the index of the line in file1 corresponding 62 * to line i file0. J[i] = 0 if there is no 63 * such line in file1. 64 * 65 * Lines are hashed so as to work in core. All potential 66 * matches are located by sorting the lines of each file 67 * on the hash (called ``value''). In particular, this 68 * collects the equivalence classes in file1 together. 69 * Subroutine equiv replaces the value of each line in 70 * file0 by the index of the first element of its 71 * matching equivalence in (the reordered) file1. 72 * To save space equiv squeezes file1 into a single 73 * array member in which the equivalence classes 74 * are simply concatenated, except that their first 75 * members are flagged by changing sign. 76 * 77 * Next the indices that point into member are unsorted into 78 * array class according to the original order of file0. 79 * 80 * The cleverness lies in routine stone. This marches 81 * through the lines of file0, developing a vector klist 82 * of "k-candidates". At step i a k-candidate is a matched 83 * pair of lines x,y (x in file0 y in file1) such that 84 * there is a common subsequence of length k 85 * between the first i lines of file0 and the first y 86 * lines of file1, but there is no such subsequence for 87 * any smaller y. x is the earliest possible mate to y 88 * that occurs in such a subsequence. 89 * 90 * Whenever any of the members of the equivalence class of 91 * lines in file1 matable to a line in file0 has serial number 92 * less than the y of some k-candidate, that k-candidate 93 * with the smallest such y is replaced. The new 94 * k-candidate is chained (via pred) to the current 95 * k-1 candidate so that the actual subsequence can 96 * be recovered. When a member has serial number greater 97 * that the y of all k-candidates, the klist is extended. 98 * At the end, the longest subsequence is pulled out 99 * and placed in the array J by unravel 100 * 101 * With J in hand, the matches there recorded are 102 * check'ed against reality to assure that no spurious 103 * matches have crept in due to hashing. If they have, 104 * they are broken, and "jackpot" is recorded--a harmless 105 * matter except that a true match for a spuriously 106 * mated line may now be unnecessarily reported as a change. 107 * 108 * Much of the complexity of the program comes simply 109 * from trying to minimize core utilization and 110 * maximize the range of doable problems by dynamically 111 * allocating what is needed and reusing what is not. 112 * The core requirements for problems larger than somewhat 113 * are (in words) 2*length(file0) + length(file1) + 114 * 3*(number of k-candidates installed), typically about 115 * 6n words for files of length n. 116 */ 117 118 #define prints(s) fputs(s,stdout) 119 120 FILE *input[2]; 121 122 struct cand { 123 int x; 124 int y; 125 int pred; 126 } cand; 127 128 struct line { 129 int serial; 130 int value; 131 } *file[2]; 132 133 int len[2]; 134 struct line *sfile[2]; /* shortened by pruning common prefix and suffix */ 135 int slen[2]; 136 int pref, suff; /* length of prefix and suffix */ 137 int *class; /* will be overlaid on file[0] */ 138 int *member; /* will be overlaid on file[1] */ 139 int *klist; /* will be overlaid on file[0] after class */ 140 struct cand *clist; /* merely a free storage pot for candidates */ 141 int clen = 0; 142 int *J; /* will be overlaid on class */ 143 long *ixold; /* will be overlaid on klist */ 144 long *ixnew; /* will be overlaid on file[1] */ 145 u_char *chrtran; /* translation table for case-folding */ 146 147 static void fetch(long *, int, int, FILE *, char *, int); 148 static void output(void); 149 static void check(void); 150 static void range(int, int, char *); 151 static void dump_context_vec(void); 152 static void prepare(int, FILE *); 153 static void prune(void); 154 static void equiv(struct line *, int, struct line *, int, int *); 155 static void unravel(int); 156 static void unsort(struct line *, int, int *); 157 static void change(int, int, int, int); 158 static void sort(struct line *, int); 159 static int newcand(int, int, int); 160 static int search(int *, int, int); 161 static int skipline(int); 162 static int asciifile(FILE *); 163 static int stone(int *, int, int *, int *); 164 static int readhash(FILE *); 165 166 /* 167 * chrtran points to one of 2 translation tables: cup2low if folding upper to 168 * lower case clow2low if not folding case 169 */ 170 u_char clow2low[256] = { 171 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 172 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 173 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 174 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 175 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 176 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 177 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 178 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 179 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 180 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 181 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 182 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 183 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 184 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 185 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 186 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 187 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 188 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 189 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 190 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 191 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 192 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 193 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 194 0xfd, 0xfe, 0xff 195 }; 196 197 u_char cup2low[256] = { 198 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 199 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 200 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 201 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 202 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 203 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 204 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 205 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 206 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 207 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 208 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 209 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 210 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 211 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 212 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 213 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 214 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 215 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 216 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 217 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 218 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 219 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 220 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 221 0xfd, 0xfe, 0xff 222 }; 223 224 void 225 diffreg(void) 226 { 227 char buf1[BUFSIZ], buf2[BUFSIZ]; 228 FILE *f1, *f2; 229 int i, j; 230 231 if (hflag) { 232 diffargv[0] = "diffh"; 233 execv(diffh, diffargv); 234 fprintf(stderr, "diff: "); 235 perror(diffh); 236 done(0); 237 } 238 chrtran = (iflag ? cup2low : clow2low); 239 if ((stb1.st_mode & S_IFMT) == S_IFDIR) { 240 file1 = splice(file1, file2); 241 if (stat(file1, &stb1) < 0) { 242 fprintf(stderr, "diff: "); 243 perror(file1); 244 done(0); 245 } 246 } else if ((stb2.st_mode & S_IFMT) == S_IFDIR) { 247 file2 = splice(file2, file1); 248 if (stat(file2, &stb2) < 0) { 249 fprintf(stderr, "diff: "); 250 perror(file2); 251 done(0); 252 } 253 } else if ((stb1.st_mode & S_IFMT) != S_IFREG || !strcmp(file1, "-")) { 254 if (!strcmp(file2, "-")) { 255 fprintf(stderr, "diff: can't specify - -\n"); 256 done(0); 257 } 258 file1 = copytemp(); 259 if (stat(file1, &stb1) < 0) { 260 fprintf(stderr, "diff: "); 261 perror(file1); 262 done(0); 263 } 264 } else if ((stb2.st_mode & S_IFMT) != S_IFREG || !strcmp(file2, "-")) { 265 file2 = copytemp(); 266 if (stat(file2, &stb2) < 0) { 267 fprintf(stderr, "diff: "); 268 perror(file2); 269 done(0); 270 } 271 } 272 if ((f1 = fopen(file1, "r")) == NULL) { 273 fprintf(stderr, "diff: "); 274 perror(file1); 275 done(0); 276 } 277 if ((f2 = fopen(file2, "r")) == NULL) { 278 fprintf(stderr, "diff: "); 279 perror(file2); 280 fclose(f1); 281 done(0); 282 } 283 if (stb1.st_size != stb2.st_size) 284 goto notsame; 285 for (;;) { 286 i = fread(buf1, 1, BUFSIZ, f1); 287 j = fread(buf2, 1, BUFSIZ, f2); 288 if (i < 0 || j < 0 || i != j) 289 goto notsame; 290 if (i == 0 && j == 0) { 291 fclose(f1); 292 fclose(f2); 293 status = 0; /* files don't differ */ 294 goto same; 295 } 296 for (j = 0; j < i; j++) 297 if (buf1[j] != buf2[j]) 298 goto notsame; 299 } 300 notsame: 301 /* 302 * Files certainly differ at this point; set status accordingly 303 */ 304 status = 1; 305 if (!asciifile(f1) || !asciifile(f2)) { 306 printf("Binary files %s and %s differ\n", file1, file2); 307 fclose(f1); 308 fclose(f2); 309 done(0); 310 } 311 prepare(0, f1); 312 prepare(1, f2); 313 fclose(f1); 314 fclose(f2); 315 prune(); 316 sort(sfile[0], slen[0]); 317 sort(sfile[1], slen[1]); 318 319 member = (int *)file[1]; 320 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 321 member = ralloc(member, (slen[1] + 2) * sizeof(int)); 322 323 class = (int *)file[0]; 324 unsort(sfile[0], slen[0], class); 325 class = ralloc(class, (slen[0] + 2) * sizeof(int)); 326 327 klist = talloc((slen[0] + 2) * sizeof(int)); 328 clist = talloc(sizeof(cand)); 329 i = stone(class, slen[0], member, klist); 330 free(member); 331 free(class); 332 333 J = talloc((len[0] + 2) * sizeof(int)); 334 unravel(klist[i]); 335 free(clist); 336 free(klist); 337 338 ixold = talloc((len[0] + 2) * sizeof(long)); 339 ixnew = talloc((len[1] + 2) * sizeof(long)); 340 check(); 341 output(); 342 status = anychange; 343 same: 344 if (opt == D_CONTEXT && anychange == 0) 345 printf("No differences encountered\n"); 346 done(0); 347 } 348 349 char *tempfile = _PATH_TMP; 350 351 char * 352 copytemp(void) 353 { 354 char buf[BUFSIZ]; 355 int i, f; 356 357 signal(SIGHUP, done); 358 signal(SIGINT, done); 359 signal(SIGPIPE, done); 360 signal(SIGTERM, done); 361 f = mkstemp(tempfile); 362 if (f < 0) { 363 fprintf(stderr, "diff: "); 364 perror(tempfile); 365 done(0); 366 } 367 while ((i = read(0, buf, BUFSIZ)) > 0) 368 if (write(f, buf, i) != i) { 369 fprintf(stderr, "diff: "); 370 perror(tempfile); 371 done(0); 372 } 373 close(f); 374 return (tempfile); 375 } 376 377 char * 378 splice(char *dir, char *file) 379 { 380 char *tail, buf[BUFSIZ]; 381 382 if (!strcmp(file, "-")) { 383 fprintf(stderr, "diff: can't specify - with other arg directory\n"); 384 done(0); 385 } 386 tail = strrchr(file, '/'); 387 if (tail == 0) 388 tail = file; 389 else 390 tail++; 391 snprintf(buf, sizeof buf, "%s/%s", dir, tail); 392 return (strdup(buf)); 393 } 394 395 static void 396 prepare(int i, FILE *fd) 397 { 398 struct line *p; 399 int j, h; 400 401 fseek(fd, 0L, SEEK_SET); 402 p = talloc(3 * sizeof(struct line)); 403 for (j = 0; (h = readhash(fd));) { 404 p = ralloc(p, (++j + 3) * sizeof(struct line)); 405 p[j].value = h; 406 } 407 len[i] = j; 408 file[i] = p; 409 } 410 411 static void 412 prune(void) 413 { 414 int i, j; 415 416 for (pref = 0; pref < len[0] && pref < len[1] && 417 file[0][pref + 1].value == file[1][pref + 1].value; 418 pref++) 419 ; 420 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 421 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 422 suff++) 423 ; 424 for (j = 0; j < 2; j++) { 425 sfile[j] = file[j] + pref; 426 slen[j] = len[j] - pref - suff; 427 for (i = 0; i <= slen[j]; i++) 428 sfile[j][i].serial = i; 429 } 430 } 431 432 static void 433 equiv(struct line *a, int n, struct line *b, int m, int *c) 434 { 435 int i, j; 436 437 i = j = 1; 438 while (i <= n && j <= m) { 439 if (a[i].value < b[j].value) 440 a[i++].value = 0; 441 else if (a[i].value == b[j].value) 442 a[i++].value = j; 443 else 444 j++; 445 } 446 while (i <= n) 447 a[i++].value = 0; 448 b[m + 1].value = 0; 449 j = 0; 450 while (++j <= m) { 451 c[j] = -b[j].serial; 452 while (b[j + 1].value == b[j].value) { 453 j++; 454 c[j] = b[j].serial; 455 } 456 } 457 c[j] = -1; 458 } 459 460 static int 461 stone(int *a, int n, int *b, int *c) 462 { 463 int i, k, y, j, l; 464 int oldc, tc, oldl; 465 466 k = 0; 467 c[0] = newcand(0, 0, 0); 468 for (i = 1; i <= n; i++) { 469 j = a[i]; 470 if (j == 0) 471 continue; 472 y = -b[j]; 473 oldl = 0; 474 oldc = c[0]; 475 do { 476 if (y <= clist[oldc].y) 477 continue; 478 l = search(c, k, y); 479 if (l != oldl + 1) 480 oldc = c[l - 1]; 481 if (l <= k) { 482 if (clist[c[l]].y <= y) 483 continue; 484 tc = c[l]; 485 c[l] = newcand(i, y, oldc); 486 oldc = tc; 487 oldl = l; 488 } else { 489 c[l] = newcand(i, y, oldc); 490 k++; 491 break; 492 } 493 } while ((y = b[++j]) > 0); 494 } 495 return (k); 496 } 497 498 static int 499 newcand(int x, int y, int pred) 500 { 501 struct cand *q; 502 503 clist = ralloc(clist, ++clen * sizeof(cand)); 504 q = clist + clen - 1; 505 q->x = x; 506 q->y = y; 507 q->pred = pred; 508 return (clen - 1); 509 } 510 511 static int 512 search(int *c, int k, int y) 513 { 514 int i, j, l, t; 515 516 if (clist[c[k]].y < y) /* quick look for typical case */ 517 return (k + 1); 518 i = 0; 519 j = k + 1; 520 while (1) { 521 l = i + j; 522 if ((l >>= 1) <= i) 523 break; 524 t = clist[c[l]].y; 525 if (t > y) 526 j = l; 527 else if (t < y) 528 i = l; 529 else 530 return (l); 531 } 532 return (l + 1); 533 } 534 535 static void 536 unravel(int p) 537 { 538 struct cand *q; 539 int i; 540 541 for (i = 0; i <= len[0]; i++) 542 J[i] = i <= pref ? i : 543 i > len[0] - suff ? i + len[1] - len[0] : 0; 544 for (q = clist + p; q->y != 0; q = clist + q->pred) 545 J[q->x + pref] = q->y + pref; 546 } 547 548 /* 549 * check does double duty: 1. ferret out any fortuitous correspondences due 550 * to confounding by hashing (which result in "jackpot") 2. collect random 551 * access indexes to the two files 552 */ 553 554 static void 555 check(void) 556 { 557 int i, j, jackpot, c, d; 558 long ctold, ctnew; 559 560 if ((input[0] = fopen(file1, "r")) == NULL) { 561 perror(file1); 562 done(0); 563 } 564 if ((input[1] = fopen(file2, "r")) == NULL) { 565 perror(file2); 566 done(0); 567 } 568 j = 1; 569 ixold[0] = ixnew[0] = 0; 570 jackpot = 0; 571 ctold = ctnew = 0; 572 for (i = 1; i <= len[0]; i++) { 573 if (J[i] == 0) { 574 ixold[i] = ctold += skipline(0); 575 continue; 576 } 577 while (j < J[i]) { 578 ixnew[j] = ctnew += skipline(1); 579 j++; 580 } 581 if (bflag || wflag || iflag) { 582 for (;;) { 583 c = getc(input[0]); 584 d = getc(input[1]); 585 ctold++; 586 ctnew++; 587 if (bflag && isspace(c) && isspace(d)) { 588 do { 589 if (c == '\n') 590 break; 591 ctold++; 592 } while (isspace(c = getc(input[0]))); 593 do { 594 if (d == '\n') 595 break; 596 ctnew++; 597 } while (isspace(d = getc(input[1]))); 598 } else if (wflag) { 599 while (isspace(c) && c != '\n') { 600 c = getc(input[0]); 601 ctold++; 602 } 603 while (isspace(d) && d != '\n') { 604 d = getc(input[1]); 605 ctnew++; 606 } 607 } 608 if (chrtran[c] != chrtran[d]) { 609 jackpot++; 610 J[i] = 0; 611 if (c != '\n') 612 ctold += skipline(0); 613 if (d != '\n') 614 ctnew += skipline(1); 615 break; 616 } 617 if (c == '\n') 618 break; 619 } 620 } else { 621 for (;;) { 622 ctold++; 623 ctnew++; 624 if ((c = getc(input[0])) != (d = getc(input[1]))) { 625 /* jackpot++; */ 626 J[i] = 0; 627 if (c != '\n') 628 ctold += skipline(0); 629 if (d != '\n') 630 ctnew += skipline(1); 631 break; 632 } 633 if (c == '\n') 634 break; 635 } 636 } 637 ixold[i] = ctold; 638 ixnew[j] = ctnew; 639 j++; 640 } 641 for (; j <= len[1]; j++) { 642 ixnew[j] = ctnew += skipline(1); 643 } 644 fclose(input[0]); 645 fclose(input[1]); 646 /* 647 * if (jackpot) 648 * fprintf(stderr, "jackpot\n"); 649 */ 650 } 651 652 /* shellsort CACM #201 */ 653 static void 654 sort(struct line *a, int n) 655 { 656 struct line *ai, *aim, w; 657 int j, m = 0, k; 658 659 if (n == 0) 660 return; 661 for (j = 1; j <= n; j *= 2) 662 m = 2 * j - 1; 663 for (m /= 2; m != 0; m /= 2) { 664 k = n - m; 665 for (j = 1; j <= k; j++) { 666 for (ai = &a[j]; ai > a; ai -= m) { 667 aim = &ai[m]; 668 if (aim < ai) 669 break; /* wraparound */ 670 if (aim->value > ai[0].value || 671 (aim->value == ai[0].value && 672 aim->serial > ai[0].serial)) 673 break; 674 w.value = ai[0].value; 675 ai[0].value = aim->value; 676 aim->value = w.value; 677 w.serial = ai[0].serial; 678 ai[0].serial = aim->serial; 679 aim->serial = w.serial; 680 } 681 } 682 } 683 } 684 685 static void 686 unsort(struct line *f, int l, int *b) 687 { 688 int *a, i; 689 690 a = talloc((l + 1) * sizeof(int)); 691 for (i = 1; i <= l; i++) 692 a[f[i].serial] = f[i].value; 693 for (i = 1; i <= l; i++) 694 b[i] = a[i]; 695 free(a); 696 } 697 698 static int 699 skipline(int f) 700 { 701 int i, c; 702 703 for (i = 1; (c = getc(input[f])) != '\n'; i++) 704 if (c < 0) 705 return (i); 706 return (i); 707 } 708 709 static void 710 output(void) 711 { 712 int m, i0, i1, j0, j1; 713 714 input[0] = fopen(file1, "r"); 715 input[1] = fopen(file2, "r"); 716 m = len[0]; 717 J[0] = 0; 718 J[m + 1] = len[1] + 1; 719 if (opt != D_EDIT) { 720 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 721 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 722 i0++; 723 j0 = J[i0 - 1] + 1; 724 i1 = i0 - 1; 725 while (i1 < m && J[i1 + 1] == 0) 726 i1++; 727 j1 = J[i1 + 1] - 1; 728 J[i1] = j1; 729 change(i0, i1, j0, j1); 730 } 731 } else { 732 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 733 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 734 i0--; 735 j0 = J[i0 + 1] - 1; 736 i1 = i0 + 1; 737 while (i1 > 1 && J[i1 - 1] == 0) 738 i1--; 739 j1 = J[i1 - 1] + 1; 740 J[i1] = j1; 741 change(i1, i0, j1, j0); 742 } 743 } 744 if (m == 0) 745 change(1, 0, 1, len[1]); 746 if (opt == D_IFDEF) { 747 for (;;) { 748 #define c i0 749 c = getc(input[0]); 750 if (c < 0) 751 return; 752 putchar(c); 753 } 754 #undef c 755 } 756 if (anychange && opt == D_CONTEXT) 757 dump_context_vec(); 758 } 759 760 /* 761 * The following struct is used to record change information when 762 * doing a "context" diff. (see routine "change" to understand the 763 * highly mneumonic field names) 764 */ 765 struct context_vec { 766 int a; /* start line in old file */ 767 int b; /* end line in old file */ 768 int c; /* start line in new file */ 769 int d; /* end line in new file */ 770 }; 771 772 struct context_vec *context_vec_start, *context_vec_end, *context_vec_ptr; 773 774 #define MAX_CONTEXT 128 775 776 /* 777 * indicate that there is a difference between lines a and b of the from file 778 * to get to lines c to d of the to file. If a is greater then b then there 779 * are no lines in the from file involved and this means that there were 780 * lines appended (beginning at b). If c is greater than d then there are 781 * lines missing from the to file. 782 */ 783 static void 784 change(int a, int b, int c, int d) 785 { 786 struct stat stbuf; 787 788 if (opt != D_IFDEF && a > b && c > d) 789 return; 790 if (anychange == 0) { 791 anychange = 1; 792 if (opt == D_CONTEXT) { 793 printf("*** %s ", file1); 794 stat(file1, &stbuf); 795 printf("%s--- %s ", 796 ctime(&stbuf.st_mtime), file2); 797 stat(file2, &stbuf); 798 printf("%s", ctime(&stbuf.st_mtime)); 799 800 context_vec_start = talloc(MAX_CONTEXT * 801 sizeof(struct context_vec)); 802 context_vec_end = context_vec_start + MAX_CONTEXT; 803 context_vec_ptr = context_vec_start - 1; 804 } 805 } 806 if (opt == D_CONTEXT) { 807 /* 808 * if this new change is within 'context' lines of 809 * the previous change, just add it to the change 810 * record. If the record is full or if this 811 * change is more than 'context' lines from the previous 812 * change, dump the record, reset it & add the new change. 813 */ 814 if (context_vec_ptr >= context_vec_end || 815 (context_vec_ptr >= context_vec_start && 816 a > (context_vec_ptr->b + 2 * context) && 817 c > (context_vec_ptr->d + 2 * context))) 818 dump_context_vec(); 819 820 context_vec_ptr++; 821 context_vec_ptr->a = a; 822 context_vec_ptr->b = b; 823 context_vec_ptr->c = c; 824 context_vec_ptr->d = d; 825 return; 826 } 827 switch (opt) { 828 829 case D_NORMAL: 830 case D_EDIT: 831 range(a, b, ","); 832 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 833 if (opt == D_NORMAL) 834 range(c, d, ","); 835 putchar('\n'); 836 break; 837 case D_REVERSE: 838 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 839 range(a, b, " "); 840 putchar('\n'); 841 break; 842 case D_NREVERSE: 843 if (a > b) 844 printf("a%d %d\n", b, d - c + 1); 845 else { 846 printf("d%d %d\n", a, b - a + 1); 847 if (!(c > d)) 848 /* add changed lines */ 849 printf("a%d %d\n", b, d - c + 1); 850 } 851 break; 852 } 853 if (opt == D_NORMAL || opt == D_IFDEF) { 854 fetch(ixold, a, b, input[0], "< ", 1); 855 if (a <= b && c <= d && opt == D_NORMAL) 856 prints("---\n"); 857 } 858 fetch(ixnew, c, d, input[1], opt == D_NORMAL ? "> " : "", 0); 859 if ((opt == D_EDIT || opt == D_REVERSE) && c <= d) 860 prints(".\n"); 861 if (inifdef) { 862 fprintf(stdout, "#endif /* %s */\n", endifname); 863 inifdef = 0; 864 } 865 } 866 867 static void 868 range(int a, int b, char *separator) 869 { 870 printf("%d", a > b ? b : a); 871 if (a < b) 872 printf("%s%d", separator, b); 873 } 874 875 static void 876 fetch(long *f, int a, int b, FILE *lb, char *s, int oldfile) 877 { 878 int oneflag = (*ifdef1 != '\0') != (*ifdef2 != '\0'); 879 int i, j, c, col, nc; 880 881 /* 882 * When doing #ifdef's, copy down to current line 883 * if this is the first file, so that stuff makes it to output. 884 */ 885 if (opt == D_IFDEF && oldfile) { 886 long curpos = ftell(lb); 887 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 888 nc = f[a > b ? b : a - 1] - curpos; 889 for (i = 0; i < nc; i++) 890 putchar(getc(lb)); 891 } 892 if (a > b) 893 return; 894 if (opt == D_IFDEF) { 895 if (inifdef) 896 fprintf(stdout, "#else /* %s%s */\n", 897 oneflag && oldfile == 1 ? "!" : "", ifdef2); 898 else { 899 if (oneflag) { 900 /* There was only one ifdef given */ 901 endifname = ifdef2; 902 if (oldfile) 903 fprintf(stdout, "#ifndef %s\n", endifname); 904 else 905 fprintf(stdout, "#ifdef %s\n", endifname); 906 } else { 907 endifname = oldfile ? ifdef1 : ifdef2; 908 fprintf(stdout, "#ifdef %s\n", endifname); 909 } 910 } 911 inifdef = 1 + oldfile; 912 } 913 for (i = a; i <= b; i++) { 914 fseek(lb, f[i - 1], SEEK_SET); 915 nc = f[i] - f[i - 1]; 916 if (opt != D_IFDEF) 917 prints(s); 918 col = 0; 919 for (j = 0; j < nc; j++) { 920 c = getc(lb); 921 if (c == '\t' && tflag) 922 do 923 putchar(' '); 924 while (++col & 7); 925 else { 926 putchar(c); 927 col++; 928 } 929 } 930 } 931 932 if (inifdef && !wantelses) { 933 fprintf(stdout, "#endif /* %s */\n", endifname); 934 inifdef = 0; 935 } 936 } 937 938 #define POW2 /* define only if HALFLONG is 2**n */ 939 #define HALFLONG 16 940 #define low(x) (x&((1L<<HALFLONG)-1)) 941 #define high(x) (x>>HALFLONG) 942 943 /* 944 * hashing has the effect of 945 * arranging line in 7-bit bytes and then 946 * summing 1-s complement in 16-bit hunks 947 */ 948 static int 949 readhash(FILE *f) 950 { 951 unsigned int shift; 952 int t, space; 953 long sum; 954 955 sum = 1; 956 space = 0; 957 if (!bflag && !wflag) { 958 if (iflag) 959 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 960 if (t == -1) 961 return (0); 962 sum += (long)chrtran[t] << (shift 963 #ifdef POW2 964 &= HALFLONG - 1); 965 #else 966 %= HALFLONG); 967 #endif 968 } 969 else 970 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 971 if (t == -1) 972 return (0); 973 sum += (long)t << (shift 974 #ifdef POW2 975 &= HALFLONG - 1); 976 #else 977 %= HALFLONG); 978 #endif 979 } 980 } else { 981 for (shift = 0;;) { 982 switch (t = getc(f)) { 983 case -1: 984 return (0); 985 case '\t': 986 case ' ': 987 space++; 988 continue; 989 default: 990 if (space && !wflag) { 991 shift += 7; 992 space = 0; 993 } 994 sum += (long)chrtran[t] << (shift 995 #ifdef POW2 996 &= HALFLONG - 1); 997 #else 998 %= HALFLONG); 999 #endif 1000 shift += 7; 1001 continue; 1002 case '\n': 1003 break; 1004 } 1005 break; 1006 } 1007 } 1008 sum = low(sum) + high(sum); 1009 return ((short) low(sum) + (short) high(sum)); 1010 } 1011 1012 static int 1013 asciifile(FILE *f) 1014 { 1015 char buf[BUFSIZ], *cp; 1016 int cnt; 1017 1018 fseek(f, 0L, SEEK_SET); 1019 cnt = fread(buf, 1, BUFSIZ, f); 1020 cp = buf; 1021 while (--cnt >= 0) 1022 if (*cp++ & 0200) 1023 return (0); 1024 return (1); 1025 } 1026 1027 1028 /* dump accumulated "context" diff changes */ 1029 static void 1030 dump_context_vec(void) 1031 { 1032 struct context_vec *cvp = context_vec_start; 1033 int lowa, upb, lowc, upd, do_output; 1034 int a, b, c, d; 1035 char ch; 1036 1037 if (cvp > context_vec_ptr) 1038 return; 1039 1040 b = d = 0; /* gcc */ 1041 lowa = max(1, cvp->a - context); 1042 upb = min(len[0], context_vec_ptr->b + context); 1043 lowc = max(1, cvp->c - context); 1044 upd = min(len[1], context_vec_ptr->d + context); 1045 1046 printf("***************\n*** "); 1047 range(lowa, upb, ","); 1048 printf(" ****\n"); 1049 1050 /* 1051 * output changes to the "old" file. The first loop suppresses 1052 * output if there were no changes to the "old" file (we'll see 1053 * the "old" lines as context in the "new" list). 1054 */ 1055 do_output = 0; 1056 for (; cvp <= context_vec_ptr; cvp++) 1057 if (cvp->a <= cvp->b) { 1058 cvp = context_vec_start; 1059 do_output++; 1060 break; 1061 } 1062 if (do_output) { 1063 while (cvp <= context_vec_ptr) { 1064 a = cvp->a; 1065 b = cvp->b; 1066 c = cvp->c; 1067 d = cvp->d; 1068 1069 if (a <= b && c <= d) 1070 ch = 'c'; 1071 else 1072 ch = (a <= b) ? 'd' : 'a'; 1073 1074 if (ch == 'a') 1075 fetch(ixold, lowa, b, input[0], " ", 0); 1076 else { 1077 fetch(ixold, lowa, a - 1, input[0], " ", 0); 1078 fetch(ixold, a, b, input[0], 1079 ch == 'c' ? "! " : "- ", 0); 1080 } 1081 lowa = b + 1; 1082 cvp++; 1083 } 1084 fetch(ixold, b + 1, upb, input[0], " ", 0); 1085 } 1086 /* output changes to the "new" file */ 1087 printf("--- "); 1088 range(lowc, upd, ","); 1089 printf(" ----\n"); 1090 1091 do_output = 0; 1092 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1093 if (cvp->c <= cvp->d) { 1094 cvp = context_vec_start; 1095 do_output++; 1096 break; 1097 } 1098 if (do_output) { 1099 while (cvp <= context_vec_ptr) { 1100 a = cvp->a; 1101 b = cvp->b; 1102 c = cvp->c; 1103 d = cvp->d; 1104 1105 if (a <= b && c <= d) 1106 ch = 'c'; 1107 else 1108 ch = (a <= b) ? 'd' : 'a'; 1109 1110 if (ch == 'd') 1111 fetch(ixnew, lowc, d, input[1], " ", 0); 1112 else { 1113 fetch(ixnew, lowc, c - 1, input[1], " ", 0); 1114 fetch(ixnew, c, d, input[1], 1115 ch == 'c' ? "! " : "+ ", 0); 1116 } 1117 lowc = d + 1; 1118 cvp++; 1119 } 1120 fetch(ixnew, d + 1, upd, input[1], " ", 0); 1121 } 1122 context_vec_ptr = context_vec_start - 1; 1123 } 1124