1 /* $OpenBSD: diffreg.c,v 1.16 2003/06/25 21:43:49 millert Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 #include <sys/types.h> 38 39 #include <stdlib.h> 40 #include <unistd.h> 41 #include <fcntl.h> 42 #include <string.h> 43 44 #include "diff.h" 45 #include "pathnames.h" 46 47 #if 0 48 static char const sccsid[] = "@(#)diffreg.c 4.21 4/6/90"; 49 #endif 50 51 /* 52 * diff - compare two files. 53 */ 54 55 /* 56 * Uses an algorithm due to Harold Stone, which finds 57 * a pair of longest identical subsequences in the two 58 * files. 59 * 60 * The major goal is to generate the match vector J. 61 * J[i] is the index of the line in file1 corresponding 62 * to line i file0. J[i] = 0 if there is no 63 * such line in file1. 64 * 65 * Lines are hashed so as to work in core. All potential 66 * matches are located by sorting the lines of each file 67 * on the hash (called ``value''). In particular, this 68 * collects the equivalence classes in file1 together. 69 * Subroutine equiv replaces the value of each line in 70 * file0 by the index of the first element of its 71 * matching equivalence in (the reordered) file1. 72 * To save space equiv squeezes file1 into a single 73 * array member in which the equivalence classes 74 * are simply concatenated, except that their first 75 * members are flagged by changing sign. 76 * 77 * Next the indices that point into member are unsorted into 78 * array class according to the original order of file0. 79 * 80 * The cleverness lies in routine stone. This marches 81 * through the lines of file0, developing a vector klist 82 * of "k-candidates". At step i a k-candidate is a matched 83 * pair of lines x,y (x in file0 y in file1) such that 84 * there is a common subsequence of length k 85 * between the first i lines of file0 and the first y 86 * lines of file1, but there is no such subsequence for 87 * any smaller y. x is the earliest possible mate to y 88 * that occurs in such a subsequence. 89 * 90 * Whenever any of the members of the equivalence class of 91 * lines in file1 matable to a line in file0 has serial number 92 * less than the y of some k-candidate, that k-candidate 93 * with the smallest such y is replaced. The new 94 * k-candidate is chained (via pred) to the current 95 * k-1 candidate so that the actual subsequence can 96 * be recovered. When a member has serial number greater 97 * that the y of all k-candidates, the klist is extended. 98 * At the end, the longest subsequence is pulled out 99 * and placed in the array J by unravel 100 * 101 * With J in hand, the matches there recorded are 102 * check'ed against reality to assure that no spurious 103 * matches have crept in due to hashing. If they have, 104 * they are broken, and "jackpot" is recorded--a harmless 105 * matter except that a true match for a spuriously 106 * mated line may now be unnecessarily reported as a change. 107 * 108 * Much of the complexity of the program comes simply 109 * from trying to minimize core utilization and 110 * maximize the range of doable problems by dynamically 111 * allocating what is needed and reusing what is not. 112 * The core requirements for problems larger than somewhat 113 * are (in words) 2*length(file0) + length(file1) + 114 * 3*(number of k-candidates installed), typically about 115 * 6n words for files of length n. 116 */ 117 118 #define prints(s) fputs(s,stdout) 119 120 FILE *input[2]; 121 122 struct cand { 123 int x; 124 int y; 125 int pred; 126 } cand; 127 128 struct line { 129 int serial; 130 int value; 131 } *file[2]; 132 133 int len[2]; 134 struct line *sfile[2]; /* shortened by pruning common prefix and suffix */ 135 int slen[2]; 136 int pref, suff; /* length of prefix and suffix */ 137 int *class; /* will be overlaid on file[0] */ 138 int *member; /* will be overlaid on file[1] */ 139 int *klist; /* will be overlaid on file[0] after class */ 140 struct cand *clist; /* merely a free storage pot for candidates */ 141 int clen = 0; 142 int *J; /* will be overlaid on class */ 143 long *ixold; /* will be overlaid on klist */ 144 long *ixnew; /* will be overlaid on file[1] */ 145 u_char *chrtran; /* translation table for case-folding */ 146 147 static void fetch(long *, int, int, FILE *, char *, int); 148 static void output(void); 149 static void check(void); 150 static void range(int, int, char *); 151 static void dump_context_vec(void); 152 static void prepare(int, FILE *); 153 static void prune(void); 154 static void equiv(struct line *, int, struct line *, int, int *); 155 static void unravel(int); 156 static void unsort(struct line *, int, int *); 157 static void change(int, int, int, int); 158 static void sort(struct line *, int); 159 static int newcand(int, int, int); 160 static int search(int *, int, int); 161 static int skipline(int); 162 static int asciifile(FILE *); 163 static int stone(int *, int, int *, int *); 164 static int readhash(FILE *); 165 166 /* 167 * chrtran points to one of 2 translation tables: cup2low if folding upper to 168 * lower case clow2low if not folding case 169 */ 170 u_char clow2low[256] = { 171 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 172 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 173 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 174 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 175 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 176 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 177 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 178 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 179 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 180 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 181 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 182 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 183 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 184 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 185 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 186 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 187 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 188 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 189 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 190 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 191 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 192 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 193 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 194 0xfd, 0xfe, 0xff 195 }; 196 197 u_char cup2low[256] = { 198 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 199 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 200 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 201 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 202 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 203 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 204 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 205 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 206 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 207 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 208 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 209 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 210 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 211 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 212 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 213 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 214 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 215 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 216 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 217 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 218 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 219 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 220 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 221 0xfd, 0xfe, 0xff 222 }; 223 224 void 225 diffreg(void) 226 { 227 char buf1[BUFSIZ], buf2[BUFSIZ]; 228 FILE *f1, *f2; 229 int i, j; 230 231 if (hflag) { 232 diffargv[0] = "diffh"; 233 execv(diffh, diffargv); 234 warn("%s", diffh); 235 done(0); 236 } 237 chrtran = (iflag ? cup2low : clow2low); 238 if ((stb1.st_mode & S_IFMT) == S_IFDIR) { 239 file1 = splice(file1, file2); 240 if (stat(file1, &stb1) < 0) { 241 warn("%s", file1); 242 done(0); 243 } 244 } else if ((stb2.st_mode & S_IFMT) == S_IFDIR) { 245 file2 = splice(file2, file1); 246 if (stat(file2, &stb2) < 0) { 247 warn("%s", file2); 248 done(0); 249 } 250 } else if ((stb1.st_mode & S_IFMT) != S_IFREG || !strcmp(file1, "-")) { 251 if (!strcmp(file2, "-")) { 252 warnx("can't specify - -"); 253 done(0); 254 } 255 file1 = copytemp(); 256 if (stat(file1, &stb1) < 0) { 257 warn("%s", file1); 258 done(0); 259 } 260 } else if ((stb2.st_mode & S_IFMT) != S_IFREG || !strcmp(file2, "-")) { 261 file2 = copytemp(); 262 if (stat(file2, &stb2) < 0) { 263 warn("%s", file2); 264 done(0); 265 } 266 } 267 if ((f1 = fopen(file1, "r")) == NULL) { 268 warn("%s", file1); 269 done(0); 270 } 271 if ((f2 = fopen(file2, "r")) == NULL) { 272 warn("%s", file2); 273 fclose(f1); 274 done(0); 275 } 276 if (stb1.st_size != stb2.st_size) 277 goto notsame; 278 for (;;) { 279 i = fread(buf1, 1, BUFSIZ, f1); 280 j = fread(buf2, 1, BUFSIZ, f2); 281 if (i < 0 || j < 0 || i != j) 282 goto notsame; 283 if (i == 0 && j == 0) { 284 fclose(f1); 285 fclose(f2); 286 status = 0; /* files don't differ */ 287 goto same; 288 } 289 for (j = 0; j < i; j++) 290 if (buf1[j] != buf2[j]) 291 goto notsame; 292 } 293 notsame: 294 /* 295 * Files certainly differ at this point; set status accordingly 296 */ 297 status = 1; 298 if (!asciifile(f1) || !asciifile(f2)) { 299 printf("Binary files %s and %s differ\n", file1, file2); 300 fclose(f1); 301 fclose(f2); 302 done(0); 303 } 304 prepare(0, f1); 305 prepare(1, f2); 306 fclose(f1); 307 fclose(f2); 308 prune(); 309 sort(sfile[0], slen[0]); 310 sort(sfile[1], slen[1]); 311 312 member = (int *)file[1]; 313 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 314 member = erealloc(member, (slen[1] + 2) * sizeof(int)); 315 316 class = (int *)file[0]; 317 unsort(sfile[0], slen[0], class); 318 class = erealloc(class, (slen[0] + 2) * sizeof(int)); 319 320 klist = emalloc((slen[0] + 2) * sizeof(int)); 321 clist = emalloc(sizeof(cand)); 322 i = stone(class, slen[0], member, klist); 323 free(member); 324 free(class); 325 326 J = emalloc((len[0] + 2) * sizeof(int)); 327 unravel(klist[i]); 328 free(clist); 329 free(klist); 330 331 ixold = emalloc((len[0] + 2) * sizeof(long)); 332 ixnew = emalloc((len[1] + 2) * sizeof(long)); 333 check(); 334 output(); 335 status = anychange; 336 same: 337 if (opt == D_CONTEXT && anychange == 0) 338 printf("No differences encountered\n"); 339 done(0); 340 } 341 342 char *tempfile = _PATH_TMP; 343 344 char * 345 copytemp(void) 346 { 347 char buf[BUFSIZ]; 348 int i, f; 349 350 signal(SIGHUP, done); 351 signal(SIGINT, done); 352 signal(SIGPIPE, done); 353 signal(SIGTERM, done); 354 f = mkstemp(tempfile); 355 if (f < 0) { 356 warn("%s", tempfile); 357 done(0); 358 } 359 while ((i = read(0, buf, BUFSIZ)) > 0) 360 if (write(f, buf, i) != i) { 361 warn("%s", tempfile); 362 done(0); 363 } 364 close(f); 365 return (tempfile); 366 } 367 368 char * 369 splice(char *dir, char *file) 370 { 371 char *tail, *buf; 372 size_t len; 373 374 if (!strcmp(file, "-")) { 375 warnx("can't specify - with other arg directory"); 376 done(0); 377 } 378 tail = strrchr(file, '/'); 379 if (tail == NULL) 380 tail = file; 381 else 382 tail++; 383 len = strlen(dir) + strlen(tail) + 1; 384 buf = emalloc(len); 385 snprintf(buf, len, "%s/%s", dir, tail); 386 return (buf); 387 } 388 389 static void 390 prepare(int i, FILE *fd) 391 { 392 struct line *p; 393 int j, h; 394 395 fseek(fd, 0L, SEEK_SET); 396 p = emalloc(3 * sizeof(struct line)); 397 for (j = 0; (h = readhash(fd));) { 398 p = erealloc(p, (++j + 3) * sizeof(struct line)); 399 p[j].value = h; 400 } 401 len[i] = j; 402 file[i] = p; 403 } 404 405 static void 406 prune(void) 407 { 408 int i, j; 409 410 for (pref = 0; pref < len[0] && pref < len[1] && 411 file[0][pref + 1].value == file[1][pref + 1].value; 412 pref++) 413 ; 414 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 415 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 416 suff++) 417 ; 418 for (j = 0; j < 2; j++) { 419 sfile[j] = file[j] + pref; 420 slen[j] = len[j] - pref - suff; 421 for (i = 0; i <= slen[j]; i++) 422 sfile[j][i].serial = i; 423 } 424 } 425 426 static void 427 equiv(struct line *a, int n, struct line *b, int m, int *c) 428 { 429 int i, j; 430 431 i = j = 1; 432 while (i <= n && j <= m) { 433 if (a[i].value < b[j].value) 434 a[i++].value = 0; 435 else if (a[i].value == b[j].value) 436 a[i++].value = j; 437 else 438 j++; 439 } 440 while (i <= n) 441 a[i++].value = 0; 442 b[m + 1].value = 0; 443 j = 0; 444 while (++j <= m) { 445 c[j] = -b[j].serial; 446 while (b[j + 1].value == b[j].value) { 447 j++; 448 c[j] = b[j].serial; 449 } 450 } 451 c[j] = -1; 452 } 453 454 static int 455 stone(int *a, int n, int *b, int *c) 456 { 457 int i, k, y, j, l; 458 int oldc, tc, oldl; 459 460 k = 0; 461 c[0] = newcand(0, 0, 0); 462 for (i = 1; i <= n; i++) { 463 j = a[i]; 464 if (j == 0) 465 continue; 466 y = -b[j]; 467 oldl = 0; 468 oldc = c[0]; 469 do { 470 if (y <= clist[oldc].y) 471 continue; 472 l = search(c, k, y); 473 if (l != oldl + 1) 474 oldc = c[l - 1]; 475 if (l <= k) { 476 if (clist[c[l]].y <= y) 477 continue; 478 tc = c[l]; 479 c[l] = newcand(i, y, oldc); 480 oldc = tc; 481 oldl = l; 482 } else { 483 c[l] = newcand(i, y, oldc); 484 k++; 485 break; 486 } 487 } while ((y = b[++j]) > 0); 488 } 489 return (k); 490 } 491 492 static int 493 newcand(int x, int y, int pred) 494 { 495 struct cand *q; 496 497 clist = erealloc(clist, ++clen * sizeof(cand)); 498 q = clist + clen - 1; 499 q->x = x; 500 q->y = y; 501 q->pred = pred; 502 return (clen - 1); 503 } 504 505 static int 506 search(int *c, int k, int y) 507 { 508 int i, j, l, t; 509 510 if (clist[c[k]].y < y) /* quick look for typical case */ 511 return (k + 1); 512 i = 0; 513 j = k + 1; 514 while (1) { 515 l = i + j; 516 if ((l >>= 1) <= i) 517 break; 518 t = clist[c[l]].y; 519 if (t > y) 520 j = l; 521 else if (t < y) 522 i = l; 523 else 524 return (l); 525 } 526 return (l + 1); 527 } 528 529 static void 530 unravel(int p) 531 { 532 struct cand *q; 533 int i; 534 535 for (i = 0; i <= len[0]; i++) 536 J[i] = i <= pref ? i : 537 i > len[0] - suff ? i + len[1] - len[0] : 0; 538 for (q = clist + p; q->y != 0; q = clist + q->pred) 539 J[q->x + pref] = q->y + pref; 540 } 541 542 /* 543 * Check does double duty: 544 * 1. ferret out any fortuitous correspondences due 545 * to confounding by hashing (which result in "jackpot") 546 * 2. collect random access indexes to the two files 547 */ 548 static void 549 check(void) 550 { 551 int i, j, jackpot, c, d; 552 long ctold, ctnew; 553 554 if ((input[0] = fopen(file1, "r")) == NULL) { 555 perror(file1); 556 done(0); 557 } 558 if ((input[1] = fopen(file2, "r")) == NULL) { 559 perror(file2); 560 done(0); 561 } 562 j = 1; 563 ixold[0] = ixnew[0] = 0; 564 jackpot = 0; 565 ctold = ctnew = 0; 566 for (i = 1; i <= len[0]; i++) { 567 if (J[i] == 0) { 568 ixold[i] = ctold += skipline(0); 569 continue; 570 } 571 while (j < J[i]) { 572 ixnew[j] = ctnew += skipline(1); 573 j++; 574 } 575 if (bflag || wflag || iflag) { 576 for (;;) { 577 c = getc(input[0]); 578 d = getc(input[1]); 579 ctold++; 580 ctnew++; 581 if (bflag && isspace(c) && isspace(d)) { 582 do { 583 if (c == '\n') 584 break; 585 ctold++; 586 } while (isspace(c = getc(input[0]))); 587 do { 588 if (d == '\n') 589 break; 590 ctnew++; 591 } while (isspace(d = getc(input[1]))); 592 } else if (wflag) { 593 while (isspace(c) && c != '\n') { 594 c = getc(input[0]); 595 ctold++; 596 } 597 while (isspace(d) && d != '\n') { 598 d = getc(input[1]); 599 ctnew++; 600 } 601 } 602 if (chrtran[c] != chrtran[d]) { 603 jackpot++; 604 J[i] = 0; 605 if (c != '\n') 606 ctold += skipline(0); 607 if (d != '\n') 608 ctnew += skipline(1); 609 break; 610 } 611 if (c == '\n') 612 break; 613 } 614 } else { 615 for (;;) { 616 ctold++; 617 ctnew++; 618 if ((c = getc(input[0])) != (d = getc(input[1]))) { 619 /* jackpot++; */ 620 J[i] = 0; 621 if (c != '\n') 622 ctold += skipline(0); 623 if (d != '\n') 624 ctnew += skipline(1); 625 break; 626 } 627 if (c == '\n') 628 break; 629 } 630 } 631 ixold[i] = ctold; 632 ixnew[j] = ctnew; 633 j++; 634 } 635 for (; j <= len[1]; j++) { 636 ixnew[j] = ctnew += skipline(1); 637 } 638 fclose(input[0]); 639 fclose(input[1]); 640 /* 641 * if (jackpot) 642 * fprintf(stderr, "jackpot\n"); 643 */ 644 } 645 646 /* shellsort CACM #201 */ 647 static void 648 sort(struct line *a, int n) 649 { 650 struct line *ai, *aim, w; 651 int j, m = 0, k; 652 653 if (n == 0) 654 return; 655 for (j = 1; j <= n; j *= 2) 656 m = 2 * j - 1; 657 for (m /= 2; m != 0; m /= 2) { 658 k = n - m; 659 for (j = 1; j <= k; j++) { 660 for (ai = &a[j]; ai > a; ai -= m) { 661 aim = &ai[m]; 662 if (aim < ai) 663 break; /* wraparound */ 664 if (aim->value > ai[0].value || 665 (aim->value == ai[0].value && 666 aim->serial > ai[0].serial)) 667 break; 668 w.value = ai[0].value; 669 ai[0].value = aim->value; 670 aim->value = w.value; 671 w.serial = ai[0].serial; 672 ai[0].serial = aim->serial; 673 aim->serial = w.serial; 674 } 675 } 676 } 677 } 678 679 static void 680 unsort(struct line *f, int l, int *b) 681 { 682 int *a, i; 683 684 a = emalloc((l + 1) * sizeof(int)); 685 for (i = 1; i <= l; i++) 686 a[f[i].serial] = f[i].value; 687 for (i = 1; i <= l; i++) 688 b[i] = a[i]; 689 free(a); 690 } 691 692 static int 693 skipline(int f) 694 { 695 int i, c; 696 697 for (i = 1; (c = getc(input[f])) != '\n'; i++) 698 if (c < 0) 699 return (i); 700 return (i); 701 } 702 703 static void 704 output(void) 705 { 706 int m, i0, i1, j0, j1; 707 708 input[0] = fopen(file1, "r"); 709 input[1] = fopen(file2, "r"); 710 m = len[0]; 711 J[0] = 0; 712 J[m + 1] = len[1] + 1; 713 if (opt != D_EDIT) { 714 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 715 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 716 i0++; 717 j0 = J[i0 - 1] + 1; 718 i1 = i0 - 1; 719 while (i1 < m && J[i1 + 1] == 0) 720 i1++; 721 j1 = J[i1 + 1] - 1; 722 J[i1] = j1; 723 change(i0, i1, j0, j1); 724 } 725 } else { 726 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 727 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 728 i0--; 729 j0 = J[i0 + 1] - 1; 730 i1 = i0 + 1; 731 while (i1 > 1 && J[i1 - 1] == 0) 732 i1--; 733 j1 = J[i1 - 1] + 1; 734 J[i1] = j1; 735 change(i1, i0, j1, j0); 736 } 737 } 738 if (m == 0) 739 change(1, 0, 1, len[1]); 740 if (opt == D_IFDEF) { 741 for (;;) { 742 #define c i0 743 c = getc(input[0]); 744 if (c < 0) 745 return; 746 putchar(c); 747 } 748 #undef c 749 } 750 if (anychange && opt == D_CONTEXT) 751 dump_context_vec(); 752 } 753 754 /* 755 * The following struct is used to record change information when 756 * doing a "context" diff. (see routine "change" to understand the 757 * highly mneumonic field names) 758 */ 759 struct context_vec { 760 int a; /* start line in old file */ 761 int b; /* end line in old file */ 762 int c; /* start line in new file */ 763 int d; /* end line in new file */ 764 }; 765 766 struct context_vec *context_vec_start, *context_vec_end, *context_vec_ptr; 767 768 #define MAX_CONTEXT 128 769 770 /* 771 * indicate that there is a difference between lines a and b of the from file 772 * to get to lines c to d of the to file. If a is greater then b then there 773 * are no lines in the from file involved and this means that there were 774 * lines appended (beginning at b). If c is greater than d then there are 775 * lines missing from the to file. 776 */ 777 static void 778 change(int a, int b, int c, int d) 779 { 780 struct stat stbuf; 781 782 if (opt != D_IFDEF && a > b && c > d) 783 return; 784 if (anychange == 0) { 785 anychange = 1; 786 if (opt == D_CONTEXT) { 787 printf("*** %s ", file1); 788 stat(file1, &stbuf); 789 printf("%s--- %s ", 790 ctime(&stbuf.st_mtime), file2); 791 stat(file2, &stbuf); 792 printf("%s", ctime(&stbuf.st_mtime)); 793 794 context_vec_start = emalloc(MAX_CONTEXT * 795 sizeof(struct context_vec)); 796 context_vec_end = context_vec_start + MAX_CONTEXT; 797 context_vec_ptr = context_vec_start - 1; 798 } 799 } 800 if (opt == D_CONTEXT) { 801 /* 802 * if this new change is within 'context' lines of 803 * the previous change, just add it to the change 804 * record. If the record is full or if this 805 * change is more than 'context' lines from the previous 806 * change, dump the record, reset it & add the new change. 807 */ 808 if (context_vec_ptr >= context_vec_end || 809 (context_vec_ptr >= context_vec_start && 810 a > (context_vec_ptr->b + 2 * context) && 811 c > (context_vec_ptr->d + 2 * context))) 812 dump_context_vec(); 813 814 context_vec_ptr++; 815 context_vec_ptr->a = a; 816 context_vec_ptr->b = b; 817 context_vec_ptr->c = c; 818 context_vec_ptr->d = d; 819 return; 820 } 821 switch (opt) { 822 823 case D_NORMAL: 824 case D_EDIT: 825 range(a, b, ","); 826 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 827 if (opt == D_NORMAL) 828 range(c, d, ","); 829 putchar('\n'); 830 break; 831 case D_REVERSE: 832 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 833 range(a, b, " "); 834 putchar('\n'); 835 break; 836 case D_NREVERSE: 837 if (a > b) 838 printf("a%d %d\n", b, d - c + 1); 839 else { 840 printf("d%d %d\n", a, b - a + 1); 841 if (!(c > d)) 842 /* add changed lines */ 843 printf("a%d %d\n", b, d - c + 1); 844 } 845 break; 846 } 847 if (opt == D_NORMAL || opt == D_IFDEF) { 848 fetch(ixold, a, b, input[0], "< ", 1); 849 if (a <= b && c <= d && opt == D_NORMAL) 850 prints("---\n"); 851 } 852 fetch(ixnew, c, d, input[1], opt == D_NORMAL ? "> " : "", 0); 853 if ((opt == D_EDIT || opt == D_REVERSE) && c <= d) 854 prints(".\n"); 855 if (inifdef) { 856 fprintf(stdout, "#endif /* %s */\n", endifname); 857 inifdef = 0; 858 } 859 } 860 861 static void 862 range(int a, int b, char *separator) 863 { 864 printf("%d", a > b ? b : a); 865 if (a < b) 866 printf("%s%d", separator, b); 867 } 868 869 static void 870 fetch(long *f, int a, int b, FILE *lb, char *s, int oldfile) 871 { 872 int oneflag = (*ifdef1 != '\0') != (*ifdef2 != '\0'); 873 int i, j, c, col, nc; 874 875 /* 876 * When doing #ifdef's, copy down to current line 877 * if this is the first file, so that stuff makes it to output. 878 */ 879 if (opt == D_IFDEF && oldfile) { 880 long curpos = ftell(lb); 881 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 882 nc = f[a > b ? b : a - 1] - curpos; 883 for (i = 0; i < nc; i++) 884 putchar(getc(lb)); 885 } 886 if (a > b) 887 return; 888 if (opt == D_IFDEF) { 889 if (inifdef) 890 fprintf(stdout, "#else /* %s%s */\n", 891 oneflag && oldfile == 1 ? "!" : "", ifdef2); 892 else { 893 if (oneflag) { 894 /* There was only one ifdef given */ 895 endifname = ifdef2; 896 if (oldfile) 897 fprintf(stdout, "#ifndef %s\n", endifname); 898 else 899 fprintf(stdout, "#ifdef %s\n", endifname); 900 } else { 901 endifname = oldfile ? ifdef1 : ifdef2; 902 fprintf(stdout, "#ifdef %s\n", endifname); 903 } 904 } 905 inifdef = 1 + oldfile; 906 } 907 for (i = a; i <= b; i++) { 908 fseek(lb, f[i - 1], SEEK_SET); 909 nc = f[i] - f[i - 1]; 910 if (opt != D_IFDEF) 911 prints(s); 912 col = 0; 913 for (j = 0; j < nc; j++) { 914 c = getc(lb); 915 if (c == '\t' && tflag) 916 do 917 putchar(' '); 918 while (++col & 7); 919 else { 920 putchar(c); 921 col++; 922 } 923 } 924 } 925 926 if (inifdef && !wantelses) { 927 fprintf(stdout, "#endif /* %s */\n", endifname); 928 inifdef = 0; 929 } 930 } 931 932 #define POW2 /* define only if HALFLONG is 2**n */ 933 #define HALFLONG 16 934 #define low(x) (x&((1L<<HALFLONG)-1)) 935 #define high(x) (x>>HALFLONG) 936 937 /* 938 * hashing has the effect of 939 * arranging line in 7-bit bytes and then 940 * summing 1-s complement in 16-bit hunks 941 */ 942 static int 943 readhash(FILE *f) 944 { 945 unsigned int shift; 946 int t, space; 947 long sum; 948 949 sum = 1; 950 space = 0; 951 if (!bflag && !wflag) { 952 if (iflag) 953 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 954 if (t == -1) 955 return (0); 956 sum += (long)chrtran[t] << (shift 957 #ifdef POW2 958 &= HALFLONG - 1); 959 #else 960 %= HALFLONG); 961 #endif 962 } 963 else 964 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 965 if (t == -1) 966 return (0); 967 sum += (long)t << (shift 968 #ifdef POW2 969 &= HALFLONG - 1); 970 #else 971 %= HALFLONG); 972 #endif 973 } 974 } else { 975 for (shift = 0;;) { 976 switch (t = getc(f)) { 977 case -1: 978 return (0); 979 case '\t': 980 case ' ': 981 space++; 982 continue; 983 default: 984 if (space && !wflag) { 985 shift += 7; 986 space = 0; 987 } 988 sum += (long)chrtran[t] << (shift 989 #ifdef POW2 990 &= HALFLONG - 1); 991 #else 992 %= HALFLONG); 993 #endif 994 shift += 7; 995 continue; 996 case '\n': 997 break; 998 } 999 break; 1000 } 1001 } 1002 sum = low(sum) + high(sum); 1003 return ((short) low(sum) + (short) high(sum)); 1004 } 1005 1006 static int 1007 asciifile(FILE *f) 1008 { 1009 char buf[BUFSIZ], *cp; 1010 int cnt; 1011 1012 fseek(f, 0L, SEEK_SET); 1013 cnt = fread(buf, 1, BUFSIZ, f); 1014 cp = buf; 1015 while (--cnt >= 0) 1016 if (*cp++ & 0200) 1017 return (0); 1018 return (1); 1019 } 1020 1021 /* dump accumulated "context" diff changes */ 1022 static void 1023 dump_context_vec(void) 1024 { 1025 struct context_vec *cvp = context_vec_start; 1026 int lowa, upb, lowc, upd, do_output; 1027 int a, b, c, d; 1028 char ch; 1029 1030 if (cvp > context_vec_ptr) 1031 return; 1032 1033 b = d = 0; /* gcc */ 1034 lowa = max(1, cvp->a - context); 1035 upb = min(len[0], context_vec_ptr->b + context); 1036 lowc = max(1, cvp->c - context); 1037 upd = min(len[1], context_vec_ptr->d + context); 1038 1039 printf("***************\n*** "); 1040 range(lowa, upb, ","); 1041 printf(" ****\n"); 1042 1043 /* 1044 * output changes to the "old" file. The first loop suppresses 1045 * output if there were no changes to the "old" file (we'll see 1046 * the "old" lines as context in the "new" list). 1047 */ 1048 do_output = 0; 1049 for (; cvp <= context_vec_ptr; cvp++) 1050 if (cvp->a <= cvp->b) { 1051 cvp = context_vec_start; 1052 do_output++; 1053 break; 1054 } 1055 if (do_output) { 1056 while (cvp <= context_vec_ptr) { 1057 a = cvp->a; 1058 b = cvp->b; 1059 c = cvp->c; 1060 d = cvp->d; 1061 1062 if (a <= b && c <= d) 1063 ch = 'c'; 1064 else 1065 ch = (a <= b) ? 'd' : 'a'; 1066 1067 if (ch == 'a') 1068 fetch(ixold, lowa, b, input[0], " ", 0); 1069 else { 1070 fetch(ixold, lowa, a - 1, input[0], " ", 0); 1071 fetch(ixold, a, b, input[0], 1072 ch == 'c' ? "! " : "- ", 0); 1073 } 1074 lowa = b + 1; 1075 cvp++; 1076 } 1077 fetch(ixold, b + 1, upb, input[0], " ", 0); 1078 } 1079 /* output changes to the "new" file */ 1080 printf("--- "); 1081 range(lowc, upd, ","); 1082 printf(" ----\n"); 1083 1084 do_output = 0; 1085 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1086 if (cvp->c <= cvp->d) { 1087 cvp = context_vec_start; 1088 do_output++; 1089 break; 1090 } 1091 if (do_output) { 1092 while (cvp <= context_vec_ptr) { 1093 a = cvp->a; 1094 b = cvp->b; 1095 c = cvp->c; 1096 d = cvp->d; 1097 1098 if (a <= b && c <= d) 1099 ch = 'c'; 1100 else 1101 ch = (a <= b) ? 'd' : 'a'; 1102 1103 if (ch == 'd') 1104 fetch(ixnew, lowc, d, input[1], " ", 0); 1105 else { 1106 fetch(ixnew, lowc, c - 1, input[1], " ", 0); 1107 fetch(ixnew, c, d, input[1], 1108 ch == 'c' ? "! " : "+ ", 0); 1109 } 1110 lowc = d + 1; 1111 cvp++; 1112 } 1113 fetch(ixnew, d + 1, upd, input[1], " ", 0); 1114 } 1115 context_vec_ptr = context_vec_start - 1; 1116 } 1117