1 /* $OpenBSD: diffreg.c,v 1.20 2003/06/26 04:52:26 millert Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 #include <sys/types.h> 38 39 #include <stdlib.h> 40 #include <unistd.h> 41 #include <fcntl.h> 42 #include <string.h> 43 44 #include "diff.h" 45 #include "pathnames.h" 46 47 #if 0 48 static char const sccsid[] = "@(#)diffreg.c 4.21 4/6/90"; 49 #endif 50 51 /* 52 * diff - compare two files. 53 */ 54 55 /* 56 * Uses an algorithm due to Harold Stone, which finds 57 * a pair of longest identical subsequences in the two 58 * files. 59 * 60 * The major goal is to generate the match vector J. 61 * J[i] is the index of the line in file1 corresponding 62 * to line i file0. J[i] = 0 if there is no 63 * such line in file1. 64 * 65 * Lines are hashed so as to work in core. All potential 66 * matches are located by sorting the lines of each file 67 * on the hash (called ``value''). In particular, this 68 * collects the equivalence classes in file1 together. 69 * Subroutine equiv replaces the value of each line in 70 * file0 by the index of the first element of its 71 * matching equivalence in (the reordered) file1. 72 * To save space equiv squeezes file1 into a single 73 * array member in which the equivalence classes 74 * are simply concatenated, except that their first 75 * members are flagged by changing sign. 76 * 77 * Next the indices that point into member are unsorted into 78 * array class according to the original order of file0. 79 * 80 * The cleverness lies in routine stone. This marches 81 * through the lines of file0, developing a vector klist 82 * of "k-candidates". At step i a k-candidate is a matched 83 * pair of lines x,y (x in file0 y in file1) such that 84 * there is a common subsequence of length k 85 * between the first i lines of file0 and the first y 86 * lines of file1, but there is no such subsequence for 87 * any smaller y. x is the earliest possible mate to y 88 * that occurs in such a subsequence. 89 * 90 * Whenever any of the members of the equivalence class of 91 * lines in file1 matable to a line in file0 has serial number 92 * less than the y of some k-candidate, that k-candidate 93 * with the smallest such y is replaced. The new 94 * k-candidate is chained (via pred) to the current 95 * k-1 candidate so that the actual subsequence can 96 * be recovered. When a member has serial number greater 97 * that the y of all k-candidates, the klist is extended. 98 * At the end, the longest subsequence is pulled out 99 * and placed in the array J by unravel 100 * 101 * With J in hand, the matches there recorded are 102 * check'ed against reality to assure that no spurious 103 * matches have crept in due to hashing. If they have, 104 * they are broken, and "jackpot" is recorded--a harmless 105 * matter except that a true match for a spuriously 106 * mated line may now be unnecessarily reported as a change. 107 * 108 * Much of the complexity of the program comes simply 109 * from trying to minimize core utilization and 110 * maximize the range of doable problems by dynamically 111 * allocating what is needed and reusing what is not. 112 * The core requirements for problems larger than somewhat 113 * are (in words) 2*length(file0) + length(file1) + 114 * 3*(number of k-candidates installed), typically about 115 * 6n words for files of length n. 116 */ 117 118 #define prints(s) fputs(s,stdout) 119 120 FILE *input[2]; 121 122 struct cand { 123 int x; 124 int y; 125 int pred; 126 } cand; 127 128 struct line { 129 int serial; 130 int value; 131 } *file[2]; 132 133 int len[2]; 134 struct line *sfile[2]; /* shortened by pruning common prefix and suffix */ 135 int slen[2]; 136 int pref, suff; /* length of prefix and suffix */ 137 int *class; /* will be overlaid on file[0] */ 138 int *member; /* will be overlaid on file[1] */ 139 int *klist; /* will be overlaid on file[0] after class */ 140 struct cand *clist; /* merely a free storage pot for candidates */ 141 int clen = 0; 142 int *J; /* will be overlaid on class */ 143 long *ixold; /* will be overlaid on klist */ 144 long *ixnew; /* will be overlaid on file[1] */ 145 u_char *chrtran; /* translation table for case-folding */ 146 147 static void fetch(long *, int, int, FILE *, char *, int); 148 static void output(void); 149 static void check(void); 150 static void range(int, int, char *); 151 static void dump_context_vec(void); 152 static void dump_unified_vec(void); 153 static void prepare(int, FILE *); 154 static void prune(void); 155 static void equiv(struct line *, int, struct line *, int, int *); 156 static void unravel(int); 157 static void unsort(struct line *, int, int *); 158 static void change(int, int, int, int); 159 static void sort(struct line *, int); 160 static int newcand(int, int, int); 161 static int search(int *, int, int); 162 static int skipline(int); 163 static int asciifile(FILE *); 164 static int stone(int *, int, int *, int *); 165 static int readhash(FILE *); 166 167 /* 168 * chrtran points to one of 2 translation tables: cup2low if folding upper to 169 * lower case clow2low if not folding case 170 */ 171 u_char clow2low[256] = { 172 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 173 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 174 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 175 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 176 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 177 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 178 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 179 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 180 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 181 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 182 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 183 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 184 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 185 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 186 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 187 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 188 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 189 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 190 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 191 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 192 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 193 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 194 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 195 0xfd, 0xfe, 0xff 196 }; 197 198 u_char cup2low[256] = { 199 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 200 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 201 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 202 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 203 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 204 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 205 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 206 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 207 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 208 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 209 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 210 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 211 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 212 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 213 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 214 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 215 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 216 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 217 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 218 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 219 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 220 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 221 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 222 0xfd, 0xfe, 0xff 223 }; 224 225 void 226 diffreg(void) 227 { 228 char buf1[BUFSIZ], buf2[BUFSIZ]; 229 FILE *f1, *f2; 230 int i, j; 231 232 if (hflag) { 233 diffargv[0] = "diffh"; 234 execv(diffh, diffargv); 235 warn("%s", diffh); 236 done(0); 237 } 238 chrtran = (iflag ? cup2low : clow2low); 239 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) { 240 warnx("can't specify - -"); 241 done(0); 242 } 243 if (S_ISDIR(stb1.st_mode)) { 244 file1 = splice(file1, file2); 245 if (stat(file1, &stb1) < 0) { 246 warn("%s", file1); 247 done(0); 248 } 249 } else if (!S_ISREG(stb1.st_mode) || strcmp(file1, "-") == 0) { 250 file1 = copytemp(file1, 1); 251 if (stat(file1, &stb1) < 0) { 252 warn("%s", file1); 253 done(0); 254 } 255 } 256 if (S_ISDIR(stb2.st_mode)) { 257 file2 = splice(file2, file1); 258 if (stat(file2, &stb2) < 0) { 259 warn("%s", file2); 260 done(0); 261 } 262 } else if (!S_ISREG(stb2.st_mode) || strcmp(file2, "-") == 0) { 263 file2 = copytemp(file2, 2); 264 if (stat(file2, &stb2) < 0) { 265 warn("%s", file2); 266 done(0); 267 } 268 } 269 if ((f1 = fopen(file1, "r")) == NULL) { 270 warn("%s", file1); 271 done(0); 272 } 273 if ((f2 = fopen(file2, "r")) == NULL) { 274 warn("%s", file2); 275 fclose(f1); 276 done(0); 277 } 278 if (S_ISREG(stb1.st_mode) && S_ISREG(stb2.st_mode) && 279 stb1.st_size != stb2.st_size) 280 goto notsame; 281 for (;;) { 282 i = fread(buf1, 1, BUFSIZ, f1); 283 j = fread(buf2, 1, BUFSIZ, f2); 284 if (i < 0 || j < 0 || i != j) 285 goto notsame; 286 if (i == 0 && j == 0) { 287 fclose(f1); 288 fclose(f2); 289 status = 0; /* files don't differ */ 290 goto same; 291 } 292 for (j = 0; j < i; j++) 293 if (buf1[j] != buf2[j]) 294 goto notsame; 295 } 296 notsame: 297 /* 298 * Files certainly differ at this point; set status accordingly 299 */ 300 status = 1; 301 if (!asciifile(f1) || !asciifile(f2)) { 302 printf("Binary files %s and %s differ\n", file1, file2); 303 fclose(f1); 304 fclose(f2); 305 done(0); 306 } 307 prepare(0, f1); 308 prepare(1, f2); 309 fclose(f1); 310 fclose(f2); 311 prune(); 312 sort(sfile[0], slen[0]); 313 sort(sfile[1], slen[1]); 314 315 member = (int *)file[1]; 316 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 317 member = erealloc(member, (slen[1] + 2) * sizeof(int)); 318 319 class = (int *)file[0]; 320 unsort(sfile[0], slen[0], class); 321 class = erealloc(class, (slen[0] + 2) * sizeof(int)); 322 323 klist = emalloc((slen[0] + 2) * sizeof(int)); 324 clist = emalloc(sizeof(cand)); 325 i = stone(class, slen[0], member, klist); 326 free(member); 327 free(class); 328 329 J = emalloc((len[0] + 2) * sizeof(int)); 330 unravel(klist[i]); 331 free(clist); 332 free(klist); 333 334 ixold = emalloc((len[0] + 2) * sizeof(long)); 335 ixnew = emalloc((len[1] + 2) * sizeof(long)); 336 check(); 337 output(); 338 status = anychange; 339 same: 340 if (anychange == 0 && (opt == D_CONTEXT || opt == D_UNIFIED)) 341 printf("No differences encountered\n"); 342 done(0); 343 } 344 345 char *tempfiles[2]; 346 347 char * 348 copytemp(const char *file, int n) 349 { 350 char buf[BUFSIZ], *tempdir, *tempfile; 351 int i, ifd, ofd; 352 353 if (n != 1 && n != 2) 354 return (NULL); 355 356 if (strcmp(file, "-") == 0) 357 ifd = STDIN_FILENO; 358 else if ((ifd = open(file, O_RDONLY, 0644)) < 0) { 359 warn("%s", file); 360 done(0); 361 } 362 363 if ((tempdir = getenv("TMPDIR")) == NULL) 364 tempdir = _PATH_TMP; 365 if (asprintf(&tempfile, "%s/diff%d.XXXXXXXX", tempdir, n) == -1) { 366 warn(NULL); 367 done(0); 368 } 369 tempfiles[n - 1] = tempfile; 370 371 signal(SIGHUP, done); 372 signal(SIGINT, done); 373 signal(SIGPIPE, done); 374 signal(SIGTERM, done); 375 ofd = mkstemp(tempfile); 376 if (ofd < 0) { 377 warn("%s", tempfile); 378 done(0); 379 } 380 while ((i = read(ifd, buf, BUFSIZ)) > 0) 381 if (write(ofd, buf, i) != i) { 382 warn("%s", tempfile); 383 done(0); 384 } 385 close(ifd); 386 close(ofd); 387 return (tempfile); 388 } 389 390 char * 391 splice(char *dir, char *file) 392 { 393 char *tail, *buf; 394 size_t len; 395 396 if (!strcmp(file, "-")) { 397 warnx("can't specify - with other arg directory"); 398 done(0); 399 } 400 tail = strrchr(file, '/'); 401 if (tail == NULL) 402 tail = file; 403 else 404 tail++; 405 len = strlen(dir) + 1 + strlen(tail) + 1; 406 buf = emalloc(len); 407 snprintf(buf, len, "%s/%s", dir, tail); 408 return (buf); 409 } 410 411 static void 412 prepare(int i, FILE *fd) 413 { 414 struct line *p; 415 int j, h; 416 417 fseek(fd, 0L, SEEK_SET); 418 p = emalloc(3 * sizeof(struct line)); 419 for (j = 0; (h = readhash(fd));) { 420 p = erealloc(p, (++j + 3) * sizeof(struct line)); 421 p[j].value = h; 422 } 423 len[i] = j; 424 file[i] = p; 425 } 426 427 static void 428 prune(void) 429 { 430 int i, j; 431 432 for (pref = 0; pref < len[0] && pref < len[1] && 433 file[0][pref + 1].value == file[1][pref + 1].value; 434 pref++) 435 ; 436 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 437 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 438 suff++) 439 ; 440 for (j = 0; j < 2; j++) { 441 sfile[j] = file[j] + pref; 442 slen[j] = len[j] - pref - suff; 443 for (i = 0; i <= slen[j]; i++) 444 sfile[j][i].serial = i; 445 } 446 } 447 448 static void 449 equiv(struct line *a, int n, struct line *b, int m, int *c) 450 { 451 int i, j; 452 453 i = j = 1; 454 while (i <= n && j <= m) { 455 if (a[i].value < b[j].value) 456 a[i++].value = 0; 457 else if (a[i].value == b[j].value) 458 a[i++].value = j; 459 else 460 j++; 461 } 462 while (i <= n) 463 a[i++].value = 0; 464 b[m + 1].value = 0; 465 j = 0; 466 while (++j <= m) { 467 c[j] = -b[j].serial; 468 while (b[j + 1].value == b[j].value) { 469 j++; 470 c[j] = b[j].serial; 471 } 472 } 473 c[j] = -1; 474 } 475 476 static int 477 stone(int *a, int n, int *b, int *c) 478 { 479 int i, k, y, j, l; 480 int oldc, tc, oldl; 481 482 k = 0; 483 c[0] = newcand(0, 0, 0); 484 for (i = 1; i <= n; i++) { 485 j = a[i]; 486 if (j == 0) 487 continue; 488 y = -b[j]; 489 oldl = 0; 490 oldc = c[0]; 491 do { 492 if (y <= clist[oldc].y) 493 continue; 494 l = search(c, k, y); 495 if (l != oldl + 1) 496 oldc = c[l - 1]; 497 if (l <= k) { 498 if (clist[c[l]].y <= y) 499 continue; 500 tc = c[l]; 501 c[l] = newcand(i, y, oldc); 502 oldc = tc; 503 oldl = l; 504 } else { 505 c[l] = newcand(i, y, oldc); 506 k++; 507 break; 508 } 509 } while ((y = b[++j]) > 0); 510 } 511 return (k); 512 } 513 514 static int 515 newcand(int x, int y, int pred) 516 { 517 struct cand *q; 518 519 clist = erealloc(clist, ++clen * sizeof(cand)); 520 q = clist + clen - 1; 521 q->x = x; 522 q->y = y; 523 q->pred = pred; 524 return (clen - 1); 525 } 526 527 static int 528 search(int *c, int k, int y) 529 { 530 int i, j, l, t; 531 532 if (clist[c[k]].y < y) /* quick look for typical case */ 533 return (k + 1); 534 i = 0; 535 j = k + 1; 536 while (1) { 537 l = i + j; 538 if ((l >>= 1) <= i) 539 break; 540 t = clist[c[l]].y; 541 if (t > y) 542 j = l; 543 else if (t < y) 544 i = l; 545 else 546 return (l); 547 } 548 return (l + 1); 549 } 550 551 static void 552 unravel(int p) 553 { 554 struct cand *q; 555 int i; 556 557 for (i = 0; i <= len[0]; i++) 558 J[i] = i <= pref ? i : 559 i > len[0] - suff ? i + len[1] - len[0] : 0; 560 for (q = clist + p; q->y != 0; q = clist + q->pred) 561 J[q->x + pref] = q->y + pref; 562 } 563 564 /* 565 * Check does double duty: 566 * 1. ferret out any fortuitous correspondences due 567 * to confounding by hashing (which result in "jackpot") 568 * 2. collect random access indexes to the two files 569 */ 570 static void 571 check(void) 572 { 573 int i, j, jackpot, c, d; 574 long ctold, ctnew; 575 576 if ((input[0] = fopen(file1, "r")) == NULL) { 577 perror(file1); 578 done(0); 579 } 580 if ((input[1] = fopen(file2, "r")) == NULL) { 581 perror(file2); 582 done(0); 583 } 584 j = 1; 585 ixold[0] = ixnew[0] = 0; 586 jackpot = 0; 587 ctold = ctnew = 0; 588 for (i = 1; i <= len[0]; i++) { 589 if (J[i] == 0) { 590 ixold[i] = ctold += skipline(0); 591 continue; 592 } 593 while (j < J[i]) { 594 ixnew[j] = ctnew += skipline(1); 595 j++; 596 } 597 if (bflag || wflag || iflag) { 598 for (;;) { 599 c = getc(input[0]); 600 d = getc(input[1]); 601 ctold++; 602 ctnew++; 603 if (bflag && isspace(c) && isspace(d)) { 604 do { 605 if (c == '\n') 606 break; 607 ctold++; 608 } while (isspace(c = getc(input[0]))); 609 do { 610 if (d == '\n') 611 break; 612 ctnew++; 613 } while (isspace(d = getc(input[1]))); 614 } else if (wflag) { 615 while (isspace(c) && c != '\n') { 616 c = getc(input[0]); 617 ctold++; 618 } 619 while (isspace(d) && d != '\n') { 620 d = getc(input[1]); 621 ctnew++; 622 } 623 } 624 if (chrtran[c] != chrtran[d]) { 625 jackpot++; 626 J[i] = 0; 627 if (c != '\n') 628 ctold += skipline(0); 629 if (d != '\n') 630 ctnew += skipline(1); 631 break; 632 } 633 if (c == '\n') 634 break; 635 } 636 } else { 637 for (;;) { 638 ctold++; 639 ctnew++; 640 if ((c = getc(input[0])) != (d = getc(input[1]))) { 641 /* jackpot++; */ 642 J[i] = 0; 643 if (c != '\n') 644 ctold += skipline(0); 645 if (d != '\n') 646 ctnew += skipline(1); 647 break; 648 } 649 if (c == '\n') 650 break; 651 } 652 } 653 ixold[i] = ctold; 654 ixnew[j] = ctnew; 655 j++; 656 } 657 for (; j <= len[1]; j++) { 658 ixnew[j] = ctnew += skipline(1); 659 } 660 fclose(input[0]); 661 fclose(input[1]); 662 /* 663 * if (jackpot) 664 * fprintf(stderr, "jackpot\n"); 665 */ 666 } 667 668 /* shellsort CACM #201 */ 669 static void 670 sort(struct line *a, int n) 671 { 672 struct line *ai, *aim, w; 673 int j, m = 0, k; 674 675 if (n == 0) 676 return; 677 for (j = 1; j <= n; j *= 2) 678 m = 2 * j - 1; 679 for (m /= 2; m != 0; m /= 2) { 680 k = n - m; 681 for (j = 1; j <= k; j++) { 682 for (ai = &a[j]; ai > a; ai -= m) { 683 aim = &ai[m]; 684 if (aim < ai) 685 break; /* wraparound */ 686 if (aim->value > ai[0].value || 687 (aim->value == ai[0].value && 688 aim->serial > ai[0].serial)) 689 break; 690 w.value = ai[0].value; 691 ai[0].value = aim->value; 692 aim->value = w.value; 693 w.serial = ai[0].serial; 694 ai[0].serial = aim->serial; 695 aim->serial = w.serial; 696 } 697 } 698 } 699 } 700 701 static void 702 unsort(struct line *f, int l, int *b) 703 { 704 int *a, i; 705 706 a = emalloc((l + 1) * sizeof(int)); 707 for (i = 1; i <= l; i++) 708 a[f[i].serial] = f[i].value; 709 for (i = 1; i <= l; i++) 710 b[i] = a[i]; 711 free(a); 712 } 713 714 static int 715 skipline(int f) 716 { 717 int i, c; 718 719 for (i = 1; (c = getc(input[f])) != '\n'; i++) 720 if (c < 0) 721 return (i); 722 return (i); 723 } 724 725 static void 726 output(void) 727 { 728 int m, i0, i1, j0, j1; 729 730 input[0] = fopen(file1, "r"); 731 input[1] = fopen(file2, "r"); 732 m = len[0]; 733 J[0] = 0; 734 J[m + 1] = len[1] + 1; 735 if (opt != D_EDIT) { 736 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 737 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 738 i0++; 739 j0 = J[i0 - 1] + 1; 740 i1 = i0 - 1; 741 while (i1 < m && J[i1 + 1] == 0) 742 i1++; 743 j1 = J[i1 + 1] - 1; 744 J[i1] = j1; 745 change(i0, i1, j0, j1); 746 } 747 } else { 748 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 749 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 750 i0--; 751 j0 = J[i0 + 1] - 1; 752 i1 = i0 + 1; 753 while (i1 > 1 && J[i1 - 1] == 0) 754 i1--; 755 j1 = J[i1 - 1] + 1; 756 J[i1] = j1; 757 change(i1, i0, j1, j0); 758 } 759 } 760 if (m == 0) 761 change(1, 0, 1, len[1]); 762 if (opt == D_IFDEF) { 763 for (;;) { 764 #define c i0 765 c = getc(input[0]); 766 if (c < 0) 767 return; 768 putchar(c); 769 } 770 #undef c 771 } 772 if (anychange != 0) { 773 if (opt == D_CONTEXT) 774 dump_context_vec(); 775 else if (opt == D_UNIFIED) 776 dump_unified_vec(); 777 } 778 } 779 780 /* 781 * The following struct is used to record change information when 782 * doing a "context" diff. (see routine "change" to understand the 783 * highly mneumonic field names) 784 */ 785 struct context_vec { 786 int a; /* start line in old file */ 787 int b; /* end line in old file */ 788 int c; /* start line in new file */ 789 int d; /* end line in new file */ 790 }; 791 792 struct context_vec *context_vec_start, *context_vec_end, *context_vec_ptr; 793 794 #define MAX_CONTEXT 128 795 796 /* 797 * indicate that there is a difference between lines a and b of the from file 798 * to get to lines c to d of the to file. If a is greater then b then there 799 * are no lines in the from file involved and this means that there were 800 * lines appended (beginning at b). If c is greater than d then there are 801 * lines missing from the to file. 802 */ 803 static void 804 change(int a, int b, int c, int d) 805 { 806 struct stat stbuf; 807 808 if (opt != D_IFDEF && a > b && c > d) 809 return; 810 if (anychange == 0) { 811 anychange = 1; 812 if (opt == D_CONTEXT || opt == D_UNIFIED) { 813 stat(file1, &stbuf); 814 printf("%s %s %s", opt == D_CONTEXT ? "***" : "---", 815 file1, ctime(&stbuf.st_mtime)); 816 stat(file2, &stbuf); 817 printf("%s %s %s", opt == D_CONTEXT ? "---" : "+++", 818 file2, ctime(&stbuf.st_mtime)); 819 context_vec_start = emalloc(MAX_CONTEXT * 820 sizeof(struct context_vec)); 821 context_vec_end = context_vec_start + MAX_CONTEXT; 822 context_vec_ptr = context_vec_start - 1; 823 } 824 } 825 if (opt == D_CONTEXT || opt == D_UNIFIED) { 826 /* 827 * if this new change is within 'context' lines of 828 * the previous change, just add it to the change 829 * record. If the record is full or if this 830 * change is more than 'context' lines from the previous 831 * change, dump the record, reset it & add the new change. 832 */ 833 if (context_vec_ptr >= context_vec_end || 834 (context_vec_ptr >= context_vec_start && 835 a > (context_vec_ptr->b + 2 * context) && 836 c > (context_vec_ptr->d + 2 * context))) { 837 if (opt == D_CONTEXT) 838 dump_context_vec(); 839 else 840 dump_unified_vec(); 841 } 842 context_vec_ptr++; 843 context_vec_ptr->a = a; 844 context_vec_ptr->b = b; 845 context_vec_ptr->c = c; 846 context_vec_ptr->d = d; 847 return; 848 } 849 switch (opt) { 850 851 case D_NORMAL: 852 case D_EDIT: 853 range(a, b, ","); 854 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 855 if (opt == D_NORMAL) 856 range(c, d, ","); 857 putchar('\n'); 858 break; 859 case D_REVERSE: 860 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 861 range(a, b, " "); 862 putchar('\n'); 863 break; 864 case D_NREVERSE: 865 if (a > b) 866 printf("a%d %d\n", b, d - c + 1); 867 else { 868 printf("d%d %d\n", a, b - a + 1); 869 if (!(c > d)) 870 /* add changed lines */ 871 printf("a%d %d\n", b, d - c + 1); 872 } 873 break; 874 } 875 if (opt == D_NORMAL || opt == D_IFDEF) { 876 fetch(ixold, a, b, input[0], "< ", 1); 877 if (a <= b && c <= d && opt == D_NORMAL) 878 prints("---\n"); 879 } 880 fetch(ixnew, c, d, input[1], opt == D_NORMAL ? "> " : "", 0); 881 if ((opt == D_EDIT || opt == D_REVERSE) && c <= d) 882 prints(".\n"); 883 if (inifdef) { 884 fprintf(stdout, "#endif /* %s */\n", endifname); 885 inifdef = 0; 886 } 887 } 888 889 static void 890 range(int a, int b, char *separator) 891 { 892 printf("%d", a > b ? b : a); 893 if (a < b) 894 printf("%s%d", separator, b); 895 } 896 897 static void 898 fetch(long *f, int a, int b, FILE *lb, char *s, int oldfile) 899 { 900 int oneflag = (*ifdef1 != '\0') != (*ifdef2 != '\0'); 901 int i, j, c, col, nc; 902 903 /* 904 * When doing #ifdef's, copy down to current line 905 * if this is the first file, so that stuff makes it to output. 906 */ 907 if (opt == D_IFDEF && oldfile) { 908 long curpos = ftell(lb); 909 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 910 nc = f[a > b ? b : a - 1] - curpos; 911 for (i = 0; i < nc; i++) 912 putchar(getc(lb)); 913 } 914 if (a > b) 915 return; 916 if (opt == D_IFDEF) { 917 if (inifdef) 918 fprintf(stdout, "#else /* %s%s */\n", 919 oneflag && oldfile == 1 ? "!" : "", ifdef2); 920 else { 921 if (oneflag) { 922 /* There was only one ifdef given */ 923 endifname = ifdef2; 924 if (oldfile) 925 fprintf(stdout, "#ifndef %s\n", endifname); 926 else 927 fprintf(stdout, "#ifdef %s\n", endifname); 928 } else { 929 endifname = oldfile ? ifdef1 : ifdef2; 930 fprintf(stdout, "#ifdef %s\n", endifname); 931 } 932 } 933 inifdef = 1 + oldfile; 934 } 935 for (i = a; i <= b; i++) { 936 fseek(lb, f[i - 1], SEEK_SET); 937 nc = f[i] - f[i - 1]; 938 if (opt != D_IFDEF) 939 prints(s); 940 col = 0; 941 for (j = 0; j < nc; j++) { 942 c = getc(lb); 943 if (c == '\t' && tflag) 944 do 945 putchar(' '); 946 while (++col & 7); 947 else { 948 putchar(c); 949 col++; 950 } 951 } 952 } 953 954 if (inifdef && !wantelses) { 955 fprintf(stdout, "#endif /* %s */\n", endifname); 956 inifdef = 0; 957 } 958 } 959 960 #define POW2 /* define only if HALFLONG is 2**n */ 961 #define HALFLONG 16 962 #define low(x) (x&((1L<<HALFLONG)-1)) 963 #define high(x) (x>>HALFLONG) 964 965 /* 966 * hashing has the effect of 967 * arranging line in 7-bit bytes and then 968 * summing 1-s complement in 16-bit hunks 969 */ 970 static int 971 readhash(FILE *f) 972 { 973 unsigned int shift; 974 int t, space; 975 long sum; 976 977 sum = 1; 978 space = 0; 979 if (!bflag && !wflag) { 980 if (iflag) 981 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 982 if (t == -1) 983 return (0); 984 sum += (long)chrtran[t] << (shift 985 #ifdef POW2 986 &= HALFLONG - 1); 987 #else 988 %= HALFLONG); 989 #endif 990 } 991 else 992 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 993 if (t == -1) 994 return (0); 995 sum += (long)t << (shift 996 #ifdef POW2 997 &= HALFLONG - 1); 998 #else 999 %= HALFLONG); 1000 #endif 1001 } 1002 } else { 1003 for (shift = 0;;) { 1004 switch (t = getc(f)) { 1005 case -1: 1006 return (0); 1007 case '\t': 1008 case ' ': 1009 space++; 1010 continue; 1011 default: 1012 if (space && !wflag) { 1013 shift += 7; 1014 space = 0; 1015 } 1016 sum += (long)chrtran[t] << (shift 1017 #ifdef POW2 1018 &= HALFLONG - 1); 1019 #else 1020 %= HALFLONG); 1021 #endif 1022 shift += 7; 1023 continue; 1024 case '\n': 1025 break; 1026 } 1027 break; 1028 } 1029 } 1030 sum = low(sum) + high(sum); 1031 return ((short) low(sum) + (short) high(sum)); 1032 } 1033 1034 static int 1035 asciifile(FILE *f) 1036 { 1037 char buf[BUFSIZ], *cp; 1038 int cnt; 1039 1040 fseek(f, 0L, SEEK_SET); 1041 cnt = fread(buf, 1, BUFSIZ, f); 1042 cp = buf; 1043 while (--cnt >= 0) 1044 if (*cp++ & 0200) 1045 return (0); 1046 return (1); 1047 } 1048 1049 /* dump accumulated "context" diff changes */ 1050 static void 1051 dump_context_vec(void) 1052 { 1053 struct context_vec *cvp = context_vec_start; 1054 int lowa, upb, lowc, upd, do_output; 1055 int a, b, c, d; 1056 char ch; 1057 1058 if (cvp > context_vec_ptr) 1059 return; 1060 1061 b = d = 0; /* gcc */ 1062 lowa = max(1, cvp->a - context); 1063 upb = min(len[0], context_vec_ptr->b + context); 1064 lowc = max(1, cvp->c - context); 1065 upd = min(len[1], context_vec_ptr->d + context); 1066 1067 printf("***************\n*** "); 1068 range(lowa, upb, ","); 1069 printf(" ****\n"); 1070 1071 /* 1072 * output changes to the "old" file. The first loop suppresses 1073 * output if there were no changes to the "old" file (we'll see 1074 * the "old" lines as context in the "new" list). 1075 */ 1076 do_output = 0; 1077 for (; cvp <= context_vec_ptr; cvp++) 1078 if (cvp->a <= cvp->b) { 1079 cvp = context_vec_start; 1080 do_output++; 1081 break; 1082 } 1083 if (do_output) { 1084 while (cvp <= context_vec_ptr) { 1085 a = cvp->a; 1086 b = cvp->b; 1087 c = cvp->c; 1088 d = cvp->d; 1089 1090 if (a <= b && c <= d) 1091 ch = 'c'; 1092 else 1093 ch = (a <= b) ? 'd' : 'a'; 1094 1095 if (ch == 'a') 1096 fetch(ixold, lowa, b, input[0], " ", 0); 1097 else { 1098 fetch(ixold, lowa, a - 1, input[0], " ", 0); 1099 fetch(ixold, a, b, input[0], 1100 ch == 'c' ? "! " : "- ", 0); 1101 } 1102 lowa = b + 1; 1103 cvp++; 1104 } 1105 fetch(ixold, b + 1, upb, input[0], " ", 0); 1106 } 1107 /* output changes to the "new" file */ 1108 printf("--- "); 1109 range(lowc, upd, ","); 1110 printf(" ----\n"); 1111 1112 do_output = 0; 1113 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1114 if (cvp->c <= cvp->d) { 1115 cvp = context_vec_start; 1116 do_output++; 1117 break; 1118 } 1119 if (do_output) { 1120 while (cvp <= context_vec_ptr) { 1121 a = cvp->a; 1122 b = cvp->b; 1123 c = cvp->c; 1124 d = cvp->d; 1125 1126 if (a <= b && c <= d) 1127 ch = 'c'; 1128 else 1129 ch = (a <= b) ? 'd' : 'a'; 1130 1131 if (ch == 'd') 1132 fetch(ixnew, lowc, d, input[1], " ", 0); 1133 else { 1134 fetch(ixnew, lowc, c - 1, input[1], " ", 0); 1135 fetch(ixnew, c, d, input[1], 1136 ch == 'c' ? "! " : "+ ", 0); 1137 } 1138 lowc = d + 1; 1139 cvp++; 1140 } 1141 fetch(ixnew, d + 1, upd, input[1], " ", 0); 1142 } 1143 context_vec_ptr = context_vec_start - 1; 1144 } 1145 1146 /* dump accumulated "unified" diff changes */ 1147 static void 1148 dump_unified_vec(void) 1149 { 1150 struct context_vec *cvp = context_vec_start; 1151 int lowa, upb, lowc, upd; 1152 int a, b, c, d; 1153 char ch; 1154 1155 if (cvp > context_vec_ptr) 1156 return; 1157 1158 b = d = 0; /* gcc */ 1159 lowa = max(1, cvp->a - context); 1160 upb = min(len[0], context_vec_ptr->b + context); 1161 lowc = max(1, cvp->c - context); 1162 upd = min(len[1], context_vec_ptr->d + context); 1163 1164 printf("@@ -%d,%d +%d,%d @@\n", lowa, upb - lowa + 1, 1165 lowc, upd - lowc + 1); 1166 1167 /* 1168 * Output changes in "unified" diff format--the old and new lines 1169 * are printed together. 1170 */ 1171 for (; cvp <= context_vec_ptr; cvp++) { 1172 a = cvp->a; 1173 b = cvp->b; 1174 c = cvp->c; 1175 d = cvp->d; 1176 1177 /* 1178 * c: both new and old changes 1179 * d: only changes in the old file 1180 * a: only changes in the new file 1181 */ 1182 if (a <= b && c <= d) 1183 ch = 'c'; 1184 else 1185 ch = (a <= b) ? 'd' : 'a'; 1186 1187 switch (ch) { 1188 case 'c': 1189 fetch(ixold, lowa, a - 1, input[0], " ", 0); 1190 fetch(ixold, a, b, input[0], "-", 0); 1191 fetch(ixnew, c, d, input[1], "+", 0); 1192 break; 1193 case 'd': 1194 fetch(ixold, lowa, a - 1, input[0], " ", 0); 1195 fetch(ixold, a, b, input[0], "-", 0); 1196 break; 1197 case 'a': 1198 fetch(ixnew, lowc, c - 1, input[1], " ", 0); 1199 fetch(ixnew, c, d, input[1], "+", 0); 1200 break; 1201 } 1202 lowa = b + 1; 1203 lowc = d + 1; 1204 } 1205 fetch(ixnew, d + 1, upd, input[1], " ", 0); 1206 1207 context_vec_ptr = context_vec_start - 1; 1208 } 1209