1 /* $OpenBSD: diffreg.c,v 1.7 2003/06/25 03:39:23 tedu Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 #include <sys/types.h> 38 39 #include <stdlib.h> 40 #include <unistd.h> 41 #include <fcntl.h> 42 #include <string.h> 43 44 #include "diff.h" 45 #include "pathnames.h" 46 47 #if 0 48 static char const sccsid[] = "@(#)diffreg.c 4.21 4/6/90"; 49 #endif 50 51 /* 52 * diff - compare two files. 53 */ 54 55 /* 56 * Uses an algorithm due to Harold Stone, which finds 57 * a pair of longest identical subsequences in the two 58 * files. 59 * 60 * The major goal is to generate the match vector J. 61 * J[i] is the index of the line in file1 corresponding 62 * to line i file0. J[i] = 0 if there is no 63 * such line in file1. 64 * 65 * Lines are hashed so as to work in core. All potential 66 * matches are located by sorting the lines of each file 67 * on the hash (called ``value''). In particular, this 68 * collects the equivalence classes in file1 together. 69 * Subroutine equiv replaces the value of each line in 70 * file0 by the index of the first element of its 71 * matching equivalence in (the reordered) file1. 72 * To save space equiv squeezes file1 into a single 73 * array member in which the equivalence classes 74 * are simply concatenated, except that their first 75 * members are flagged by changing sign. 76 * 77 * Next the indices that point into member are unsorted into 78 * array class according to the original order of file0. 79 * 80 * The cleverness lies in routine stone. This marches 81 * through the lines of file0, developing a vector klist 82 * of "k-candidates". At step i a k-candidate is a matched 83 * pair of lines x,y (x in file0 y in file1) such that 84 * there is a common subsequence of length k 85 * between the first i lines of file0 and the first y 86 * lines of file1, but there is no such subsequence for 87 * any smaller y. x is the earliest possible mate to y 88 * that occurs in such a subsequence. 89 * 90 * Whenever any of the members of the equivalence class of 91 * lines in file1 matable to a line in file0 has serial number 92 * less than the y of some k-candidate, that k-candidate 93 * with the smallest such y is replaced. The new 94 * k-candidate is chained (via pred) to the current 95 * k-1 candidate so that the actual subsequence can 96 * be recovered. When a member has serial number greater 97 * that the y of all k-candidates, the klist is extended. 98 * At the end, the longest subsequence is pulled out 99 * and placed in the array J by unravel 100 * 101 * With J in hand, the matches there recorded are 102 * check'ed against reality to assure that no spurious 103 * matches have crept in due to hashing. If they have, 104 * they are broken, and "jackpot" is recorded--a harmless 105 * matter except that a true match for a spuriously 106 * mated line may now be unnecessarily reported as a change. 107 * 108 * Much of the complexity of the program comes simply 109 * from trying to minimize core utilization and 110 * maximize the range of doable problems by dynamically 111 * allocating what is needed and reusing what is not. 112 * The core requirements for problems larger than somewhat 113 * are (in words) 2*length(file0) + length(file1) + 114 * 3*(number of k-candidates installed), typically about 115 * 6n words for files of length n. 116 */ 117 118 #define prints(s) fputs(s,stdout) 119 120 FILE *input[2]; 121 122 struct cand { 123 int x; 124 int y; 125 int pred; 126 } cand; 127 128 struct line { 129 int serial; 130 int value; 131 } *file[2], line; 132 133 int len[2]; 134 struct line *sfile[2]; /* shortened by pruning common prefix and suffix */ 135 int slen[2]; 136 int pref, suff; /* length of prefix and suffix */ 137 int *class; /* will be overlaid on file[0] */ 138 int *member; /* will be overlaid on file[1] */ 139 int *klist; /* will be overlaid on file[0] after class */ 140 struct cand *clist; /* merely a free storage pot for candidates */ 141 int clen = 0; 142 int *J; /* will be overlaid on class */ 143 long *ixold; /* will be overlaid on klist */ 144 long *ixnew; /* will be overlaid on file[1] */ 145 u_char *chrtran; /* translation table for case-folding */ 146 147 static void fetch(long *, int, int, FILE *, char *, int); 148 static void output(void); 149 static void check(void); 150 static void range(int, int, char *); 151 static void dump_context_vec(void); 152 static void prepare(int, FILE *); 153 static void prune(void); 154 static void equiv(struct line *, int, struct line *, int, int *); 155 static void unravel(int); 156 static void unsort(struct line *, int, int *); 157 static void change(int, int, int, int); 158 static void sort(struct line *, int); 159 static int newcand(int, int, int); 160 static int search(int *, int, int); 161 static int skipline(int); 162 static int asciifile(FILE *); 163 static int stone(int *, int, int *, int *); 164 static int readhash(FILE *); 165 166 /* 167 * chrtran points to one of 2 translation tables: cup2low if folding upper to 168 * lower case clow2low if not folding case 169 */ 170 u_char clow2low[256] = { 171 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 172 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 173 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 174 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 175 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 176 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 177 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 178 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 179 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 180 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 181 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 182 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 183 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 184 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 185 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 186 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 187 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 188 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 189 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 190 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 191 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 192 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 193 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 194 0xfd, 0xfe, 0xff 195 }; 196 197 u_char cup2low[256] = { 198 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 199 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 200 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 201 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 202 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 203 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 204 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 205 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 206 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 207 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 208 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 209 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 210 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 211 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 212 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 213 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 214 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 215 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 216 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 217 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 218 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 219 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 220 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 221 0xfd, 0xfe, 0xff 222 }; 223 224 void 225 diffreg(void) 226 { 227 int i, j; 228 FILE *f1, *f2; 229 char buf1[BUFSIZ], buf2[BUFSIZ]; 230 231 if (hflag) { 232 diffargv[0] = "diffh"; 233 execv(diffh, diffargv); 234 fprintf(stderr, "diff: "); 235 perror(diffh); 236 done(0); 237 } 238 chrtran = (iflag ? cup2low : clow2low); 239 if ((stb1.st_mode & S_IFMT) == S_IFDIR) { 240 file1 = splice(file1, file2); 241 if (stat(file1, &stb1) < 0) { 242 fprintf(stderr, "diff: "); 243 perror(file1); 244 done(0); 245 } 246 } else if ((stb2.st_mode & S_IFMT) == S_IFDIR) { 247 file2 = splice(file2, file1); 248 if (stat(file2, &stb2) < 0) { 249 fprintf(stderr, "diff: "); 250 perror(file2); 251 done(0); 252 } 253 } else if ((stb1.st_mode & S_IFMT) != S_IFREG || !strcmp(file1, "-")) { 254 if (!strcmp(file2, "-")) { 255 fprintf(stderr, "diff: can't specify - -\n"); 256 done(0); 257 } 258 file1 = copytemp(); 259 if (stat(file1, &stb1) < 0) { 260 fprintf(stderr, "diff: "); 261 perror(file1); 262 done(0); 263 } 264 } else if ((stb2.st_mode & S_IFMT) != S_IFREG || !strcmp(file2, "-")) { 265 file2 = copytemp(); 266 if (stat(file2, &stb2) < 0) { 267 fprintf(stderr, "diff: "); 268 perror(file2); 269 done(0); 270 } 271 } 272 if ((f1 = fopen(file1, "r")) == NULL) { 273 fprintf(stderr, "diff: "); 274 perror(file1); 275 done(0); 276 } 277 if ((f2 = fopen(file2, "r")) == NULL) { 278 fprintf(stderr, "diff: "); 279 perror(file2); 280 fclose(f1); 281 done(0); 282 } 283 if (stb1.st_size != stb2.st_size) 284 goto notsame; 285 for (;;) { 286 i = fread(buf1, 1, BUFSIZ, f1); 287 j = fread(buf2, 1, BUFSIZ, f2); 288 if (i < 0 || j < 0 || i != j) 289 goto notsame; 290 if (i == 0 && j == 0) { 291 fclose(f1); 292 fclose(f2); 293 status = 0; /* files don't differ */ 294 goto same; 295 } 296 for (j = 0; j < i; j++) 297 if (buf1[j] != buf2[j]) 298 goto notsame; 299 } 300 notsame: 301 /* 302 * Files certainly differ at this point; set status accordingly 303 */ 304 status = 1; 305 if (!asciifile(f1) || !asciifile(f2)) { 306 printf("Binary files %s and %s differ\n", file1, file2); 307 fclose(f1); 308 fclose(f2); 309 done(0); 310 } 311 prepare(0, f1); 312 prepare(1, f2); 313 fclose(f1); 314 fclose(f2); 315 prune(); 316 sort(sfile[0], slen[0]); 317 sort(sfile[1], slen[1]); 318 319 member = (int *)file[1]; 320 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 321 member = ralloc(member, (slen[1] + 2) * sizeof(int)); 322 323 class = (int *)file[0]; 324 unsort(sfile[0], slen[0], class); 325 class = ralloc(class, (slen[0] + 2) * sizeof(int)); 326 327 klist = talloc((slen[0] + 2) * sizeof(int)); 328 clist = talloc(sizeof(cand)); 329 i = stone(class, slen[0], member, klist); 330 free(member); 331 free(class); 332 333 J = talloc((len[0] + 2) * sizeof(int)); 334 unravel(klist[i]); 335 free(clist); 336 free(klist); 337 338 ixold = talloc((len[0] + 2) * sizeof(long)); 339 ixnew = talloc((len[1] + 2) * sizeof(long)); 340 check(); 341 output(); 342 status = anychange; 343 same: 344 if (opt == D_CONTEXT && anychange == 0) 345 printf("No differences encountered\n"); 346 done(0); 347 } 348 349 char *tempfile = _PATH_TMP; 350 351 char * 352 copytemp(void) 353 { 354 char buf[BUFSIZ]; 355 int i, f; 356 357 signal(SIGHUP, catchsig); 358 signal(SIGINT, catchsig); 359 signal(SIGPIPE, catchsig); 360 signal(SIGTERM, catchsig); 361 f = mkstemp(tempfile); 362 if (f < 0) { 363 fprintf(stderr, "diff: "); 364 perror(tempfile); 365 done(0); 366 } 367 while ((i = read(0, buf, BUFSIZ)) > 0) 368 if (write(f, buf, i) != i) { 369 fprintf(stderr, "diff: "); 370 perror(tempfile); 371 done(0); 372 } 373 close(f); 374 return (tempfile); 375 } 376 377 char * 378 splice(char *dir, char *file) 379 { 380 char *tail; 381 char buf[BUFSIZ]; 382 383 if (!strcmp(file, "-")) { 384 fprintf(stderr, "diff: can't specify - with other arg directory\n"); 385 done(0); 386 } 387 tail = rindex(file, '/'); 388 if (tail == 0) 389 tail = file; 390 else 391 tail++; 392 snprintf(buf, sizeof buf, "%s/%s", dir, tail); 393 return (strdup(buf)); 394 } 395 396 static void 397 prepare(int i, FILE *fd) 398 { 399 struct line *p; 400 int j, h; 401 402 fseek(fd, 0, 0); 403 p = talloc(3 * sizeof(line)); 404 for (j = 0; (h = readhash(fd));) { 405 p = ralloc(p, (++j + 3) * sizeof(line)); 406 p[j].value = h; 407 } 408 len[i] = j; 409 file[i] = p; 410 } 411 412 static void 413 prune(void) 414 { 415 int i, j; 416 for (pref = 0; pref < len[0] && pref < len[1] && 417 file[0][pref + 1].value == file[1][pref + 1].value; 418 pref++); 419 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 420 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 421 suff++); 422 for (j = 0; j < 2; j++) { 423 sfile[j] = file[j] + pref; 424 slen[j] = len[j] - pref - suff; 425 for (i = 0; i <= slen[j]; i++) 426 sfile[j][i].serial = i; 427 } 428 } 429 430 static void 431 equiv(struct line *a, int n, struct line *b, int m, int *c) 432 { 433 int i, j; 434 i = j = 1; 435 while (i <= n && j <= m) { 436 if (a[i].value < b[j].value) 437 a[i++].value = 0; 438 else if (a[i].value == b[j].value) 439 a[i++].value = j; 440 else 441 j++; 442 } 443 while (i <= n) 444 a[i++].value = 0; 445 b[m + 1].value = 0; 446 j = 0; 447 while (++j <= m) { 448 c[j] = -b[j].serial; 449 while (b[j + 1].value == b[j].value) { 450 j++; 451 c[j] = b[j].serial; 452 } 453 } 454 c[j] = -1; 455 } 456 457 static int 458 stone(int *a, int n, int *b, int *c) 459 { 460 int i, k, y; 461 int j, l; 462 int oldc, tc; 463 int oldl; 464 k = 0; 465 c[0] = newcand(0, 0, 0); 466 for (i = 1; i <= n; i++) { 467 j = a[i]; 468 if (j == 0) 469 continue; 470 y = -b[j]; 471 oldl = 0; 472 oldc = c[0]; 473 do { 474 if (y <= clist[oldc].y) 475 continue; 476 l = search(c, k, y); 477 if (l != oldl + 1) 478 oldc = c[l - 1]; 479 if (l <= k) { 480 if (clist[c[l]].y <= y) 481 continue; 482 tc = c[l]; 483 c[l] = newcand(i, y, oldc); 484 oldc = tc; 485 oldl = l; 486 } else { 487 c[l] = newcand(i, y, oldc); 488 k++; 489 break; 490 } 491 } while ((y = b[++j]) > 0); 492 } 493 return (k); 494 } 495 496 static int 497 newcand(int x, int y, int pred) 498 { 499 struct cand *q; 500 501 clist = ralloc(clist, ++clen * sizeof(cand)); 502 q = clist + clen - 1; 503 q->x = x; 504 q->y = y; 505 q->pred = pred; 506 return (clen - 1); 507 } 508 509 static int 510 search(int *c, int k, int y) 511 { 512 int i, j, l; 513 int t; 514 if (clist[c[k]].y < y) /* quick look for typical case */ 515 return (k + 1); 516 i = 0; 517 j = k + 1; 518 while (1) { 519 l = i + j; 520 if ((l >>= 1) <= i) 521 break; 522 t = clist[c[l]].y; 523 if (t > y) 524 j = l; 525 else if (t < y) 526 i = l; 527 else 528 return (l); 529 } 530 return (l + 1); 531 } 532 533 static void 534 unravel(int p) 535 { 536 int i; 537 struct cand *q; 538 for (i = 0; i <= len[0]; i++) 539 J[i] = i <= pref ? i : 540 i > len[0] - suff ? i + len[1] - len[0] : 541 0; 542 for (q = clist + p; q->y != 0; q = clist + q->pred) 543 J[q->x + pref] = q->y + pref; 544 } 545 546 /* 547 * check does double duty: 1. ferret out any fortuitous correspondences due 548 * to confounding by hashing (which result in "jackpot") 2. collect random 549 * access indexes to the two files 550 */ 551 552 static void 553 check(void) 554 { 555 int i, j; 556 int jackpot; 557 long ctold, ctnew; 558 int c, d; 559 560 if ((input[0] = fopen(file1, "r")) == NULL) { 561 perror(file1); 562 done(0); 563 } 564 if ((input[1] = fopen(file2, "r")) == NULL) { 565 perror(file2); 566 done(0); 567 } 568 j = 1; 569 ixold[0] = ixnew[0] = 0; 570 jackpot = 0; 571 ctold = ctnew = 0; 572 for (i = 1; i <= len[0]; i++) { 573 if (J[i] == 0) { 574 ixold[i] = ctold += skipline(0); 575 continue; 576 } 577 while (j < J[i]) { 578 ixnew[j] = ctnew += skipline(1); 579 j++; 580 } 581 if (bflag || wflag || iflag) { 582 for (;;) { 583 c = getc(input[0]); 584 d = getc(input[1]); 585 ctold++; 586 ctnew++; 587 if (bflag && isspace(c) && isspace(d)) { 588 do { 589 if (c == '\n') 590 break; 591 ctold++; 592 } while (isspace(c = getc(input[0]))); 593 do { 594 if (d == '\n') 595 break; 596 ctnew++; 597 } while (isspace(d = getc(input[1]))); 598 } else if (wflag) { 599 while (isspace(c) && c != '\n') { 600 c = getc(input[0]); 601 ctold++; 602 } 603 while (isspace(d) && d != '\n') { 604 d = getc(input[1]); 605 ctnew++; 606 } 607 } 608 if (chrtran[c] != chrtran[d]) { 609 jackpot++; 610 J[i] = 0; 611 if (c != '\n') 612 ctold += skipline(0); 613 if (d != '\n') 614 ctnew += skipline(1); 615 break; 616 } 617 if (c == '\n') 618 break; 619 } 620 } else { 621 for (;;) { 622 ctold++; 623 ctnew++; 624 if ((c = getc(input[0])) != (d = getc(input[1]))) { 625 /* jackpot++; */ 626 J[i] = 0; 627 if (c != '\n') 628 ctold += skipline(0); 629 if (d != '\n') 630 ctnew += skipline(1); 631 break; 632 } 633 if (c == '\n') 634 break; 635 } 636 } 637 ixold[i] = ctold; 638 ixnew[j] = ctnew; 639 j++; 640 } 641 for (; j <= len[1]; j++) { 642 ixnew[j] = ctnew += skipline(1); 643 } 644 fclose(input[0]); 645 fclose(input[1]); 646 /* 647 if(jackpot) 648 fprintf(stderr, "jackpot\n"); 649 */ 650 } 651 652 static void 653 sort(struct line *a, int n) 654 { /* shellsort CACM #201 */ 655 struct line w; 656 int j, m = 0; /* gcc */ 657 struct line *ai; 658 struct line *aim; 659 int k; 660 661 if (n == 0) 662 return; 663 for (j = 1; j <= n; j *= 2) 664 m = 2 * j - 1; 665 for (m /= 2; m != 0; m /= 2) { 666 k = n - m; 667 for (j = 1; j <= k; j++) { 668 for (ai = &a[j]; ai > a; ai -= m) { 669 aim = &ai[m]; 670 if (aim < ai) 671 break; /* wraparound */ 672 if (aim->value > ai[0].value || 673 (aim->value == ai[0].value && 674 aim->serial > ai[0].serial)) 675 break; 676 w.value = ai[0].value; 677 ai[0].value = aim->value; 678 aim->value = w.value; 679 w.serial = ai[0].serial; 680 ai[0].serial = aim->serial; 681 aim->serial = w.serial; 682 } 683 } 684 } 685 } 686 687 static void 688 unsort(struct line *f, int l, int *b) 689 { 690 int *a; 691 int i; 692 693 a = talloc((l + 1) * sizeof(int)); 694 for (i = 1; i <= l; i++) 695 a[f[i].serial] = f[i].value; 696 for (i = 1; i <= l; i++) 697 b[i] = a[i]; 698 free(a); 699 } 700 701 static int 702 skipline(int f) 703 { 704 int i, c; 705 706 for (i = 1; (c = getc(input[f])) != '\n'; i++) 707 if (c < 0) 708 return (i); 709 return (i); 710 } 711 712 static void 713 output(void) 714 { 715 int m; 716 int i0, i1, j1; 717 int j0; 718 input[0] = fopen(file1, "r"); 719 input[1] = fopen(file2, "r"); 720 m = len[0]; 721 J[0] = 0; 722 J[m + 1] = len[1] + 1; 723 if (opt != D_EDIT) { 724 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 725 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 726 i0++; 727 j0 = J[i0 - 1] + 1; 728 i1 = i0 - 1; 729 while (i1 < m && J[i1 + 1] == 0) 730 i1++; 731 j1 = J[i1 + 1] - 1; 732 J[i1] = j1; 733 change(i0, i1, j0, j1); 734 } 735 } else { 736 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 737 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 738 i0--; 739 j0 = J[i0 + 1] - 1; 740 i1 = i0 + 1; 741 while (i1 > 1 && J[i1 - 1] == 0) 742 i1--; 743 j1 = J[i1 - 1] + 1; 744 J[i1] = j1; 745 change(i1, i0, j1, j0); 746 } 747 } 748 if (m == 0) 749 change(1, 0, 1, len[1]); 750 if (opt == D_IFDEF) { 751 for (;;) { 752 #define c i0 753 c = getc(input[0]); 754 if (c < 0) 755 return; 756 putchar(c); 757 } 758 #undef c 759 } 760 if (anychange && opt == D_CONTEXT) 761 dump_context_vec(); 762 } 763 764 /* 765 * The following struct is used to record change information when 766 * doing a "context" diff. (see routine "change" to understand the 767 * highly mneumonic field names) 768 */ 769 struct context_vec { 770 int a; /* start line in old file */ 771 int b; /* end line in old file */ 772 int c; /* start line in new file */ 773 int d; /* end line in new file */ 774 }; 775 776 struct context_vec *context_vec_start, *context_vec_end, *context_vec_ptr; 777 778 #define MAX_CONTEXT 128 779 780 /* 781 * indicate that there is a difference between lines a and b of the from file 782 * to get to lines c to d of the to file. If a is greater then b then there 783 * are no lines in the from file involved and this means that there were 784 * lines appended (beginning at b). If c is greater than d then there are 785 * lines missing from the to file. 786 */ 787 static void 788 change(int a, int b, int c, int d) 789 { 790 struct stat stbuf; 791 792 if (opt != D_IFDEF && a > b && c > d) 793 return; 794 if (anychange == 0) { 795 anychange = 1; 796 if (opt == D_CONTEXT) { 797 printf("*** %s ", file1); 798 stat(file1, &stbuf); 799 printf("%s--- %s ", 800 ctime(&stbuf.st_mtime), file2); 801 stat(file2, &stbuf); 802 printf("%s", ctime(&stbuf.st_mtime)); 803 804 context_vec_start = talloc(MAX_CONTEXT * 805 sizeof(struct context_vec)); 806 context_vec_end = context_vec_start + MAX_CONTEXT; 807 context_vec_ptr = context_vec_start - 1; 808 } 809 } 810 if (opt == D_CONTEXT) { 811 /* 812 * if this new change is within 'context' lines of 813 * the previous change, just add it to the change 814 * record. If the record is full or if this 815 * change is more than 'context' lines from the previous 816 * change, dump the record, reset it & add the new change. 817 */ 818 if (context_vec_ptr >= context_vec_end || 819 (context_vec_ptr >= context_vec_start && 820 a > (context_vec_ptr->b + 2 * context) && 821 c > (context_vec_ptr->d + 2 * context))) 822 dump_context_vec(); 823 824 context_vec_ptr++; 825 context_vec_ptr->a = a; 826 context_vec_ptr->b = b; 827 context_vec_ptr->c = c; 828 context_vec_ptr->d = d; 829 return; 830 } 831 switch (opt) { 832 833 case D_NORMAL: 834 case D_EDIT: 835 range(a, b, ","); 836 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 837 if (opt == D_NORMAL) 838 range(c, d, ","); 839 putchar('\n'); 840 break; 841 case D_REVERSE: 842 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 843 range(a, b, " "); 844 putchar('\n'); 845 break; 846 case D_NREVERSE: 847 if (a > b) 848 printf("a%d %d\n", b, d - c + 1); 849 else { 850 printf("d%d %d\n", a, b - a + 1); 851 if (!(c > d)) 852 /* add changed lines */ 853 printf("a%d %d\n", b, d - c + 1); 854 } 855 break; 856 } 857 if (opt == D_NORMAL || opt == D_IFDEF) { 858 fetch(ixold, a, b, input[0], "< ", 1); 859 if (a <= b && c <= d && opt == D_NORMAL) 860 prints("---\n"); 861 } 862 fetch(ixnew, c, d, input[1], opt == D_NORMAL ? "> " : "", 0); 863 if ((opt == D_EDIT || opt == D_REVERSE) && c <= d) 864 prints(".\n"); 865 if (inifdef) { 866 fprintf(stdout, "#endif /* %s */\n", endifname); 867 inifdef = 0; 868 } 869 } 870 871 static void 872 range(int a, int b, char *separator) 873 { 874 printf("%d", a > b ? b : a); 875 if (a < b) { 876 printf("%s%d", separator, b); 877 } 878 } 879 880 static void 881 fetch(long *f, int a, int b, FILE *lb, char *s, int oldfile) 882 { 883 int i, j; 884 int c; 885 int col; 886 int nc; 887 int oneflag = (*ifdef1 != '\0') != (*ifdef2 != '\0'); 888 889 /* 890 * When doing #ifdef's, copy down to current line 891 * if this is the first file, so that stuff makes it to output. 892 */ 893 if (opt == D_IFDEF && oldfile) { 894 long curpos = ftell(lb); 895 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 896 nc = f[a > b ? b : a - 1] - curpos; 897 for (i = 0; i < nc; i++) 898 putchar(getc(lb)); 899 } 900 if (a > b) 901 return; 902 if (opt == D_IFDEF) { 903 if (inifdef) 904 fprintf(stdout, "#else /* %s%s */\n", oneflag && oldfile == 1 ? "!" : "", ifdef2); 905 else { 906 if (oneflag) { 907 /* There was only one ifdef given */ 908 endifname = ifdef2; 909 if (oldfile) 910 fprintf(stdout, "#ifndef %s\n", endifname); 911 else 912 fprintf(stdout, "#ifdef %s\n", endifname); 913 } else { 914 endifname = oldfile ? ifdef1 : ifdef2; 915 fprintf(stdout, "#ifdef %s\n", endifname); 916 } 917 } 918 inifdef = 1 + oldfile; 919 } 920 for (i = a; i <= b; i++) { 921 fseek(lb, f[i - 1], 0); 922 nc = f[i] - f[i - 1]; 923 if (opt != D_IFDEF) 924 prints(s); 925 col = 0; 926 for (j = 0; j < nc; j++) { 927 c = getc(lb); 928 if (c == '\t' && tflag) 929 do 930 putchar(' '); 931 while (++col & 7); 932 else { 933 putchar(c); 934 col++; 935 } 936 } 937 } 938 939 if (inifdef && !wantelses) { 940 fprintf(stdout, "#endif /* %s */\n", endifname); 941 inifdef = 0; 942 } 943 } 944 945 #define POW2 /* define only if HALFLONG is 2**n */ 946 #define HALFLONG 16 947 #define low(x) (x&((1L<<HALFLONG)-1)) 948 #define high(x) (x>>HALFLONG) 949 950 /* 951 * hashing has the effect of 952 * arranging line in 7-bit bytes and then 953 * summing 1-s complement in 16-bit hunks 954 */ 955 static int 956 readhash(FILE *f) 957 { 958 long sum; 959 unsigned shift; 960 int t; 961 int space; 962 963 sum = 1; 964 space = 0; 965 if (!bflag && !wflag) { 966 if (iflag) 967 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 968 if (t == -1) 969 return (0); 970 sum += (long)chrtran[t] << (shift 971 #ifdef POW2 972 &= HALFLONG - 1); 973 #else 974 %= HALFLONG); 975 #endif 976 } 977 else 978 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 979 if (t == -1) 980 return (0); 981 sum += (long)t << (shift 982 #ifdef POW2 983 &= HALFLONG - 1); 984 #else 985 %= HALFLONG); 986 #endif 987 } 988 } else { 989 for (shift = 0;;) { 990 switch (t = getc(f)) { 991 case -1: 992 return (0); 993 case '\t': 994 case ' ': 995 space++; 996 continue; 997 default: 998 if (space && !wflag) { 999 shift += 7; 1000 space = 0; 1001 } 1002 sum += (long)chrtran[t] << (shift 1003 #ifdef POW2 1004 &= HALFLONG - 1); 1005 #else 1006 %= HALFLONG); 1007 #endif 1008 shift += 7; 1009 continue; 1010 case '\n': 1011 break; 1012 } 1013 break; 1014 } 1015 } 1016 sum = low(sum) + high(sum); 1017 return ((short) low(sum) + (short) high(sum)); 1018 } 1019 1020 static int 1021 asciifile(FILE *f) 1022 { 1023 char buf[BUFSIZ]; 1024 int cnt; 1025 char *cp; 1026 1027 fseek(f, 0, 0); 1028 cnt = fread(buf, 1, BUFSIZ, f); 1029 cp = buf; 1030 while (--cnt >= 0) 1031 if (*cp++ & 0200) 1032 return (0); 1033 return (1); 1034 } 1035 1036 1037 /* dump accumulated "context" diff changes */ 1038 static void 1039 dump_context_vec(void) 1040 { 1041 int a, b, c, d; 1042 char ch; 1043 struct context_vec *cvp = context_vec_start; 1044 int lowa, upb, lowc, upd; 1045 int do_output; 1046 1047 if (cvp > context_vec_ptr) 1048 return; 1049 1050 b = d = 0; /* gcc */ 1051 lowa = max(1, cvp->a - context); 1052 upb = min(len[0], context_vec_ptr->b + context); 1053 lowc = max(1, cvp->c - context); 1054 upd = min(len[1], context_vec_ptr->d + context); 1055 1056 printf("***************\n*** "); 1057 range(lowa, upb, ","); 1058 printf(" ****\n"); 1059 1060 /* 1061 * output changes to the "old" file. The first loop suppresses 1062 * output if there were no changes to the "old" file (we'll see 1063 * the "old" lines as context in the "new" list). 1064 */ 1065 do_output = 0; 1066 for (; cvp <= context_vec_ptr; cvp++) 1067 if (cvp->a <= cvp->b) { 1068 cvp = context_vec_start; 1069 do_output++; 1070 break; 1071 } 1072 if (do_output) { 1073 while (cvp <= context_vec_ptr) { 1074 a = cvp->a; 1075 b = cvp->b; 1076 c = cvp->c; 1077 d = cvp->d; 1078 1079 if (a <= b && c <= d) 1080 ch = 'c'; 1081 else 1082 ch = (a <= b) ? 'd' : 'a'; 1083 1084 if (ch == 'a') 1085 fetch(ixold, lowa, b, input[0], " ", 0); 1086 else { 1087 fetch(ixold, lowa, a - 1, input[0], " ", 0); 1088 fetch(ixold, a, b, input[0], ch == 'c' ? "! " : "- ", 0); 1089 } 1090 lowa = b + 1; 1091 cvp++; 1092 } 1093 fetch(ixold, b + 1, upb, input[0], " ", 0); 1094 } 1095 /* output changes to the "new" file */ 1096 printf("--- "); 1097 range(lowc, upd, ","); 1098 printf(" ----\n"); 1099 1100 do_output = 0; 1101 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1102 if (cvp->c <= cvp->d) { 1103 cvp = context_vec_start; 1104 do_output++; 1105 break; 1106 } 1107 if (do_output) { 1108 while (cvp <= context_vec_ptr) { 1109 a = cvp->a; 1110 b = cvp->b; 1111 c = cvp->c; 1112 d = cvp->d; 1113 1114 if (a <= b && c <= d) 1115 ch = 'c'; 1116 else 1117 ch = (a <= b) ? 'd' : 'a'; 1118 1119 if (ch == 'd') 1120 fetch(ixnew, lowc, d, input[1], " ", 0); 1121 else { 1122 fetch(ixnew, lowc, c - 1, input[1], " ", 0); 1123 fetch(ixnew, c, d, input[1], ch == 'c' ? "! " : "+ ", 0); 1124 } 1125 lowc = d + 1; 1126 cvp++; 1127 } 1128 fetch(ixnew, d + 1, upd, input[1], " ", 0); 1129 } 1130 context_vec_ptr = context_vec_start - 1; 1131 } 1132