1 /* $OpenBSD: diffreg.c,v 1.23 2003/06/27 20:28:13 tedu Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 #include <sys/types.h> 38 39 #include <stdlib.h> 40 #include <unistd.h> 41 #include <fcntl.h> 42 #include <string.h> 43 44 #include "diff.h" 45 #include "pathnames.h" 46 47 #if 0 48 static char const sccsid[] = "@(#)diffreg.c 4.21 4/6/90"; 49 #endif 50 51 /* 52 * diff - compare two files. 53 */ 54 55 /* 56 * Uses an algorithm due to Harold Stone, which finds 57 * a pair of longest identical subsequences in the two 58 * files. 59 * 60 * The major goal is to generate the match vector J. 61 * J[i] is the index of the line in file1 corresponding 62 * to line i file0. J[i] = 0 if there is no 63 * such line in file1. 64 * 65 * Lines are hashed so as to work in core. All potential 66 * matches are located by sorting the lines of each file 67 * on the hash (called ``value''). In particular, this 68 * collects the equivalence classes in file1 together. 69 * Subroutine equiv replaces the value of each line in 70 * file0 by the index of the first element of its 71 * matching equivalence in (the reordered) file1. 72 * To save space equiv squeezes file1 into a single 73 * array member in which the equivalence classes 74 * are simply concatenated, except that their first 75 * members are flagged by changing sign. 76 * 77 * Next the indices that point into member are unsorted into 78 * array class according to the original order of file0. 79 * 80 * The cleverness lies in routine stone. This marches 81 * through the lines of file0, developing a vector klist 82 * of "k-candidates". At step i a k-candidate is a matched 83 * pair of lines x,y (x in file0 y in file1) such that 84 * there is a common subsequence of length k 85 * between the first i lines of file0 and the first y 86 * lines of file1, but there is no such subsequence for 87 * any smaller y. x is the earliest possible mate to y 88 * that occurs in such a subsequence. 89 * 90 * Whenever any of the members of the equivalence class of 91 * lines in file1 matable to a line in file0 has serial number 92 * less than the y of some k-candidate, that k-candidate 93 * with the smallest such y is replaced. The new 94 * k-candidate is chained (via pred) to the current 95 * k-1 candidate so that the actual subsequence can 96 * be recovered. When a member has serial number greater 97 * that the y of all k-candidates, the klist is extended. 98 * At the end, the longest subsequence is pulled out 99 * and placed in the array J by unravel 100 * 101 * With J in hand, the matches there recorded are 102 * check'ed against reality to assure that no spurious 103 * matches have crept in due to hashing. If they have, 104 * they are broken, and "jackpot" is recorded--a harmless 105 * matter except that a true match for a spuriously 106 * mated line may now be unnecessarily reported as a change. 107 * 108 * Much of the complexity of the program comes simply 109 * from trying to minimize core utilization and 110 * maximize the range of doable problems by dynamically 111 * allocating what is needed and reusing what is not. 112 * The core requirements for problems larger than somewhat 113 * are (in words) 2*length(file0) + length(file1) + 114 * 3*(number of k-candidates installed), typically about 115 * 6n words for files of length n. 116 */ 117 118 #define prints(s) fputs(s,stdout) 119 120 FILE *input[2]; 121 122 struct cand { 123 int x; 124 int y; 125 int pred; 126 } cand; 127 128 struct line { 129 int serial; 130 int value; 131 } *file[2]; 132 133 int len[2]; 134 struct line *sfile[2]; /* shortened by pruning common prefix and suffix */ 135 int slen[2]; 136 int pref, suff; /* length of prefix and suffix */ 137 int *class; /* will be overlaid on file[0] */ 138 int *member; /* will be overlaid on file[1] */ 139 int *klist; /* will be overlaid on file[0] after class */ 140 struct cand *clist; /* merely a free storage pot for candidates */ 141 int clen = 0; 142 int *J; /* will be overlaid on class */ 143 long *ixold; /* will be overlaid on klist */ 144 long *ixnew; /* will be overlaid on file[1] */ 145 u_char *chrtran; /* translation table for case-folding */ 146 147 static void fetch(long *, int, int, FILE *, char *, int); 148 static void output(void); 149 static void check(void); 150 static void range(int, int, char *); 151 static void dump_context_vec(void); 152 static void dump_unified_vec(void); 153 static void prepare(int, FILE *); 154 static void prune(void); 155 static void equiv(struct line *, int, struct line *, int, int *); 156 static void unravel(int); 157 static void unsort(struct line *, int, int *); 158 static void change(int, int, int, int); 159 static void sort(struct line *, int); 160 static int newcand(int, int, int); 161 static int search(int *, int, int); 162 static int skipline(int); 163 static int asciifile(FILE *); 164 static int stone(int *, int, int *, int *); 165 static int readhash(FILE *); 166 167 /* 168 * chrtran points to one of 2 translation tables: cup2low if folding upper to 169 * lower case clow2low if not folding case 170 */ 171 u_char clow2low[256] = { 172 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 173 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 174 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 175 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 176 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 177 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 178 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 179 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 180 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 181 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 182 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 183 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 184 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 185 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 186 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 187 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 188 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 189 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 190 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 191 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 192 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 193 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 194 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 195 0xfd, 0xfe, 0xff 196 }; 197 198 u_char cup2low[256] = { 199 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 200 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 201 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 202 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 203 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 204 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 205 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 206 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 207 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 208 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 209 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 210 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 211 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 212 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 213 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 214 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 215 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 216 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 217 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 218 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 219 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 220 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 221 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 222 0xfd, 0xfe, 0xff 223 }; 224 225 void 226 diffreg(void) 227 { 228 char buf1[BUFSIZ], buf2[BUFSIZ]; 229 FILE *f1, *f2; 230 int i, j; 231 232 if (hflag) { 233 diffargv[0] = "diffh"; 234 execv(diffh, diffargv); 235 error("%s", diffh); 236 } 237 chrtran = (iflag ? cup2low : clow2low); 238 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) 239 errorx("can't specify - -"); 240 if (S_ISDIR(stb1.st_mode)) { 241 file1 = splice(file1, file2); 242 if (stat(file1, &stb1) < 0) 243 error("%s", file1); 244 } else if (!S_ISREG(stb1.st_mode) || strcmp(file1, "-") == 0) { 245 file1 = copytemp(file1, 1); 246 if (stat(file1, &stb1) < 0) 247 error("%s", file1); 248 } 249 if (S_ISDIR(stb2.st_mode)) { 250 file2 = splice(file2, file1); 251 if (stat(file2, &stb2) < 0) 252 error("%s", file2); 253 } else if (!S_ISREG(stb2.st_mode) || strcmp(file2, "-") == 0) { 254 file2 = copytemp(file2, 2); 255 if (stat(file2, &stb2) < 0) 256 error("%s", file2); 257 } 258 if ((f1 = fopen(file1, "r")) == NULL) 259 error("%s", file1); 260 if ((f2 = fopen(file2, "r")) == NULL) 261 error("%s", file2); 262 if (S_ISREG(stb1.st_mode) && S_ISREG(stb2.st_mode) && 263 stb1.st_size != stb2.st_size) 264 goto notsame; 265 for (;;) { 266 i = fread(buf1, 1, BUFSIZ, f1); 267 j = fread(buf2, 1, BUFSIZ, f2); 268 if (i < 0 || j < 0 || i != j) 269 goto notsame; 270 if (i == 0 && j == 0) { 271 fclose(f1); 272 fclose(f2); 273 status = 0; /* files don't differ */ 274 goto same; 275 } 276 for (j = 0; j < i; j++) 277 if (buf1[j] != buf2[j]) 278 goto notsame; 279 } 280 notsame: 281 /* 282 * Files certainly differ at this point; set status accordingly 283 */ 284 status = 1; 285 if (!asciifile(f1) || !asciifile(f2)) { 286 printf("Binary files %s and %s differ\n", file1, file2); 287 exit(status); 288 } 289 prepare(0, f1); 290 prepare(1, f2); 291 fclose(f1); 292 fclose(f2); 293 prune(); 294 sort(sfile[0], slen[0]); 295 sort(sfile[1], slen[1]); 296 297 member = (int *)file[1]; 298 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 299 member = erealloc(member, (slen[1] + 2) * sizeof(int)); 300 301 class = (int *)file[0]; 302 unsort(sfile[0], slen[0], class); 303 class = erealloc(class, (slen[0] + 2) * sizeof(int)); 304 305 klist = emalloc((slen[0] + 2) * sizeof(int)); 306 clist = emalloc(sizeof(cand)); 307 i = stone(class, slen[0], member, klist); 308 free(member); 309 free(class); 310 311 J = emalloc((len[0] + 2) * sizeof(int)); 312 unravel(klist[i]); 313 free(clist); 314 free(klist); 315 316 ixold = emalloc((len[0] + 2) * sizeof(long)); 317 ixnew = emalloc((len[1] + 2) * sizeof(long)); 318 check(); 319 output(); 320 status = anychange; 321 same: 322 if (anychange == 0 && (opt == D_CONTEXT || opt == D_UNIFIED)) 323 printf("No differences encountered\n"); 324 } 325 326 char *tempfiles[2]; 327 328 char * 329 copytemp(const char *file, int n) 330 { 331 char buf[BUFSIZ], *tempdir, *tempfile; 332 int i, ifd, ofd; 333 334 if (n != 1 && n != 2) 335 return (NULL); 336 337 if (strcmp(file, "-") == 0) 338 ifd = STDIN_FILENO; 339 else if ((ifd = open(file, O_RDONLY, 0644)) < 0) 340 error("%s", file); 341 342 if ((tempdir = getenv("TMPDIR")) == NULL) 343 tempdir = _PATH_TMP; 344 if (asprintf(&tempfile, "%s/diff%d.XXXXXXXX", tempdir, n) == -1) 345 error(NULL); 346 tempfiles[n - 1] = tempfile; 347 348 signal(SIGHUP, done); 349 signal(SIGINT, done); 350 signal(SIGPIPE, done); 351 signal(SIGTERM, done); 352 ofd = mkstemp(tempfile); 353 if (ofd < 0) 354 error("%s", tempfile); 355 while ((i = read(ifd, buf, BUFSIZ)) > 0) { 356 if (write(ofd, buf, i) != i) 357 error("%s", tempfile); 358 } 359 close(ifd); 360 close(ofd); 361 return (tempfile); 362 } 363 364 char * 365 splice(char *dir, char *file) 366 { 367 char *tail, *buf; 368 size_t len; 369 370 if (!strcmp(file, "-")) 371 errorx("can't specify - with other arg directory"); 372 tail = strrchr(file, '/'); 373 if (tail == NULL) 374 tail = file; 375 else 376 tail++; 377 len = strlen(dir) + 1 + strlen(tail) + 1; 378 buf = emalloc(len); 379 snprintf(buf, len, "%s/%s", dir, tail); 380 return (buf); 381 } 382 383 static void 384 prepare(int i, FILE *fd) 385 { 386 struct line *p; 387 int j, h; 388 389 fseek(fd, 0L, SEEK_SET); 390 p = emalloc(3 * sizeof(struct line)); 391 for (j = 0; (h = readhash(fd));) { 392 p = erealloc(p, (++j + 3) * sizeof(struct line)); 393 p[j].value = h; 394 } 395 len[i] = j; 396 file[i] = p; 397 } 398 399 static void 400 prune(void) 401 { 402 int i, j; 403 404 for (pref = 0; pref < len[0] && pref < len[1] && 405 file[0][pref + 1].value == file[1][pref + 1].value; 406 pref++) 407 ; 408 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 409 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 410 suff++) 411 ; 412 for (j = 0; j < 2; j++) { 413 sfile[j] = file[j] + pref; 414 slen[j] = len[j] - pref - suff; 415 for (i = 0; i <= slen[j]; i++) 416 sfile[j][i].serial = i; 417 } 418 } 419 420 static void 421 equiv(struct line *a, int n, struct line *b, int m, int *c) 422 { 423 int i, j; 424 425 i = j = 1; 426 while (i <= n && j <= m) { 427 if (a[i].value < b[j].value) 428 a[i++].value = 0; 429 else if (a[i].value == b[j].value) 430 a[i++].value = j; 431 else 432 j++; 433 } 434 while (i <= n) 435 a[i++].value = 0; 436 b[m + 1].value = 0; 437 j = 0; 438 while (++j <= m) { 439 c[j] = -b[j].serial; 440 while (b[j + 1].value == b[j].value) { 441 j++; 442 c[j] = b[j].serial; 443 } 444 } 445 c[j] = -1; 446 } 447 448 static int 449 stone(int *a, int n, int *b, int *c) 450 { 451 int i, k, y, j, l; 452 int oldc, tc, oldl; 453 454 k = 0; 455 c[0] = newcand(0, 0, 0); 456 for (i = 1; i <= n; i++) { 457 j = a[i]; 458 if (j == 0) 459 continue; 460 y = -b[j]; 461 oldl = 0; 462 oldc = c[0]; 463 do { 464 if (y <= clist[oldc].y) 465 continue; 466 l = search(c, k, y); 467 if (l != oldl + 1) 468 oldc = c[l - 1]; 469 if (l <= k) { 470 if (clist[c[l]].y <= y) 471 continue; 472 tc = c[l]; 473 c[l] = newcand(i, y, oldc); 474 oldc = tc; 475 oldl = l; 476 } else { 477 c[l] = newcand(i, y, oldc); 478 k++; 479 break; 480 } 481 } while ((y = b[++j]) > 0); 482 } 483 return (k); 484 } 485 486 static int 487 newcand(int x, int y, int pred) 488 { 489 struct cand *q; 490 491 clist = erealloc(clist, ++clen * sizeof(cand)); 492 q = clist + clen - 1; 493 q->x = x; 494 q->y = y; 495 q->pred = pred; 496 return (clen - 1); 497 } 498 499 static int 500 search(int *c, int k, int y) 501 { 502 int i, j, l, t; 503 504 if (clist[c[k]].y < y) /* quick look for typical case */ 505 return (k + 1); 506 i = 0; 507 j = k + 1; 508 while (1) { 509 l = i + j; 510 if ((l >>= 1) <= i) 511 break; 512 t = clist[c[l]].y; 513 if (t > y) 514 j = l; 515 else if (t < y) 516 i = l; 517 else 518 return (l); 519 } 520 return (l + 1); 521 } 522 523 static void 524 unravel(int p) 525 { 526 struct cand *q; 527 int i; 528 529 for (i = 0; i <= len[0]; i++) 530 J[i] = i <= pref ? i : 531 i > len[0] - suff ? i + len[1] - len[0] : 0; 532 for (q = clist + p; q->y != 0; q = clist + q->pred) 533 J[q->x + pref] = q->y + pref; 534 } 535 536 /* 537 * Check does double duty: 538 * 1. ferret out any fortuitous correspondences due 539 * to confounding by hashing (which result in "jackpot") 540 * 2. collect random access indexes to the two files 541 */ 542 static void 543 check(void) 544 { 545 int i, j, jackpot, c, d; 546 long ctold, ctnew; 547 548 if ((input[0] = fopen(file1, "r")) == NULL) 549 error("%s", file1); 550 if ((input[1] = fopen(file2, "r")) == NULL) 551 error("%s", file2); 552 j = 1; 553 ixold[0] = ixnew[0] = 0; 554 jackpot = 0; 555 ctold = ctnew = 0; 556 for (i = 1; i <= len[0]; i++) { 557 if (J[i] == 0) { 558 ixold[i] = ctold += skipline(0); 559 continue; 560 } 561 while (j < J[i]) { 562 ixnew[j] = ctnew += skipline(1); 563 j++; 564 } 565 if (bflag || wflag || iflag) { 566 for (;;) { 567 c = getc(input[0]); 568 d = getc(input[1]); 569 ctold++; 570 ctnew++; 571 if (bflag && isspace(c) && isspace(d)) { 572 do { 573 if (c == '\n') 574 break; 575 ctold++; 576 } while (isspace(c = getc(input[0]))); 577 do { 578 if (d == '\n') 579 break; 580 ctnew++; 581 } while (isspace(d = getc(input[1]))); 582 } else if (wflag) { 583 while (isspace(c) && c != '\n') { 584 c = getc(input[0]); 585 ctold++; 586 } 587 while (isspace(d) && d != '\n') { 588 d = getc(input[1]); 589 ctnew++; 590 } 591 } 592 if (chrtran[c] != chrtran[d]) { 593 jackpot++; 594 J[i] = 0; 595 if (c != '\n') 596 ctold += skipline(0); 597 if (d != '\n') 598 ctnew += skipline(1); 599 break; 600 } 601 if (c == '\n') 602 break; 603 } 604 } else { 605 for (;;) { 606 ctold++; 607 ctnew++; 608 if ((c = getc(input[0])) != (d = getc(input[1]))) { 609 /* jackpot++; */ 610 J[i] = 0; 611 if (c != '\n') 612 ctold += skipline(0); 613 if (d != '\n') 614 ctnew += skipline(1); 615 break; 616 } 617 if (c == '\n') 618 break; 619 } 620 } 621 ixold[i] = ctold; 622 ixnew[j] = ctnew; 623 j++; 624 } 625 for (; j <= len[1]; j++) { 626 ixnew[j] = ctnew += skipline(1); 627 } 628 fclose(input[0]); 629 fclose(input[1]); 630 /* 631 * if (jackpot) 632 * fprintf(stderr, "jackpot\n"); 633 */ 634 } 635 636 /* shellsort CACM #201 */ 637 static void 638 sort(struct line *a, int n) 639 { 640 struct line *ai, *aim, w; 641 int j, m = 0, k; 642 643 if (n == 0) 644 return; 645 for (j = 1; j <= n; j *= 2) 646 m = 2 * j - 1; 647 for (m /= 2; m != 0; m /= 2) { 648 k = n - m; 649 for (j = 1; j <= k; j++) { 650 for (ai = &a[j]; ai > a; ai -= m) { 651 aim = &ai[m]; 652 if (aim < ai) 653 break; /* wraparound */ 654 if (aim->value > ai[0].value || 655 (aim->value == ai[0].value && 656 aim->serial > ai[0].serial)) 657 break; 658 w.value = ai[0].value; 659 ai[0].value = aim->value; 660 aim->value = w.value; 661 w.serial = ai[0].serial; 662 ai[0].serial = aim->serial; 663 aim->serial = w.serial; 664 } 665 } 666 } 667 } 668 669 static void 670 unsort(struct line *f, int l, int *b) 671 { 672 int *a, i; 673 674 a = emalloc((l + 1) * sizeof(int)); 675 for (i = 1; i <= l; i++) 676 a[f[i].serial] = f[i].value; 677 for (i = 1; i <= l; i++) 678 b[i] = a[i]; 679 free(a); 680 } 681 682 static int 683 skipline(int f) 684 { 685 int i, c; 686 687 for (i = 1; (c = getc(input[f])) != '\n'; i++) 688 if (c < 0) 689 return (i); 690 return (i); 691 } 692 693 static void 694 output(void) 695 { 696 int m, i0, i1, j0, j1; 697 698 input[0] = fopen(file1, "r"); 699 input[1] = fopen(file2, "r"); 700 m = len[0]; 701 J[0] = 0; 702 J[m + 1] = len[1] + 1; 703 if (opt != D_EDIT) { 704 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 705 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 706 i0++; 707 j0 = J[i0 - 1] + 1; 708 i1 = i0 - 1; 709 while (i1 < m && J[i1 + 1] == 0) 710 i1++; 711 j1 = J[i1 + 1] - 1; 712 J[i1] = j1; 713 change(i0, i1, j0, j1); 714 } 715 } else { 716 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 717 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 718 i0--; 719 j0 = J[i0 + 1] - 1; 720 i1 = i0 + 1; 721 while (i1 > 1 && J[i1 - 1] == 0) 722 i1--; 723 j1 = J[i1 - 1] + 1; 724 J[i1] = j1; 725 change(i1, i0, j1, j0); 726 } 727 } 728 if (m == 0) 729 change(1, 0, 1, len[1]); 730 if (opt == D_IFDEF) { 731 for (;;) { 732 #define c i0 733 c = getc(input[0]); 734 if (c < 0) 735 return; 736 putchar(c); 737 } 738 #undef c 739 } 740 if (anychange != 0) { 741 if (opt == D_CONTEXT) 742 dump_context_vec(); 743 else if (opt == D_UNIFIED) 744 dump_unified_vec(); 745 } 746 } 747 748 /* 749 * The following struct is used to record change information when 750 * doing a "context" diff. (see routine "change" to understand the 751 * highly mneumonic field names) 752 */ 753 struct context_vec { 754 int a; /* start line in old file */ 755 int b; /* end line in old file */ 756 int c; /* start line in new file */ 757 int d; /* end line in new file */ 758 }; 759 760 struct context_vec *context_vec_start, *context_vec_end, *context_vec_ptr; 761 762 #define MAX_CONTEXT 128 763 764 /* 765 * indicate that there is a difference between lines a and b of the from file 766 * to get to lines c to d of the to file. If a is greater then b then there 767 * are no lines in the from file involved and this means that there were 768 * lines appended (beginning at b). If c is greater than d then there are 769 * lines missing from the to file. 770 */ 771 static void 772 change(int a, int b, int c, int d) 773 { 774 struct stat stbuf; 775 776 if (opt != D_IFDEF && a > b && c > d) 777 return; 778 if (anychange == 0) { 779 anychange = 1; 780 if (opt == D_CONTEXT || opt == D_UNIFIED) { 781 stat(file1, &stbuf); 782 printf("%s %s %s", opt == D_CONTEXT ? "***" : "---", 783 file1, ctime(&stbuf.st_mtime)); 784 stat(file2, &stbuf); 785 printf("%s %s %s", opt == D_CONTEXT ? "---" : "+++", 786 file2, ctime(&stbuf.st_mtime)); 787 context_vec_start = emalloc(MAX_CONTEXT * 788 sizeof(struct context_vec)); 789 context_vec_end = context_vec_start + MAX_CONTEXT; 790 context_vec_ptr = context_vec_start - 1; 791 } 792 } 793 if (opt == D_CONTEXT || opt == D_UNIFIED) { 794 /* 795 * If this new change is within 'context' lines of 796 * the previous change, just add it to the change 797 * record. If the record is full or if this 798 * change is more than 'context' lines from the previous 799 * change, dump the record, reset it & add the new change. 800 */ 801 if (context_vec_ptr >= context_vec_end || 802 (context_vec_ptr >= context_vec_start && 803 a > (context_vec_ptr->b + 2 * context) && 804 c > (context_vec_ptr->d + 2 * context))) { 805 if (opt == D_CONTEXT) 806 dump_context_vec(); 807 else 808 dump_unified_vec(); 809 } 810 context_vec_ptr++; 811 context_vec_ptr->a = a; 812 context_vec_ptr->b = b; 813 context_vec_ptr->c = c; 814 context_vec_ptr->d = d; 815 return; 816 } 817 switch (opt) { 818 819 case D_NORMAL: 820 case D_EDIT: 821 range(a, b, ","); 822 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 823 if (opt == D_NORMAL) 824 range(c, d, ","); 825 putchar('\n'); 826 break; 827 case D_REVERSE: 828 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 829 range(a, b, " "); 830 putchar('\n'); 831 break; 832 case D_NREVERSE: 833 if (a > b) 834 printf("a%d %d\n", b, d - c + 1); 835 else { 836 printf("d%d %d\n", a, b - a + 1); 837 if (!(c > d)) 838 /* add changed lines */ 839 printf("a%d %d\n", b, d - c + 1); 840 } 841 break; 842 } 843 if (opt == D_NORMAL || opt == D_IFDEF) { 844 fetch(ixold, a, b, input[0], "< ", 1); 845 if (a <= b && c <= d && opt == D_NORMAL) 846 prints("---\n"); 847 } 848 fetch(ixnew, c, d, input[1], opt == D_NORMAL ? "> " : "", 0); 849 if ((opt == D_EDIT || opt == D_REVERSE) && c <= d) 850 prints(".\n"); 851 if (inifdef) { 852 fprintf(stdout, "#endif /* %s */\n", ifdefname); 853 inifdef = 0; 854 } 855 } 856 857 static void 858 range(int a, int b, char *separator) 859 { 860 printf("%d", a > b ? b : a); 861 if (a < b) 862 printf("%s%d", separator, b); 863 } 864 865 static void 866 fetch(long *f, int a, int b, FILE *lb, char *s, int oldfile) 867 { 868 int i, j, c, col, nc; 869 870 /* 871 * When doing #ifdef's, copy down to current line 872 * if this is the first file, so that stuff makes it to output. 873 */ 874 if (opt == D_IFDEF && oldfile) { 875 long curpos = ftell(lb); 876 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 877 nc = f[a > b ? b : a - 1] - curpos; 878 for (i = 0; i < nc; i++) 879 putchar(getc(lb)); 880 } 881 if (a > b) 882 return; 883 if (opt == D_IFDEF) { 884 if (inifdef) { 885 fprintf(stdout, "#else /* %s%s */\n", 886 oldfile == 1 ? "!" : "", ifdefname); 887 } else { 888 if (oldfile) 889 fprintf(stdout, "#ifndef %s\n", ifdefname); 890 else 891 fprintf(stdout, "#ifdef %s\n", ifdefname); 892 } 893 inifdef = 1 + oldfile; 894 } 895 for (i = a; i <= b; i++) { 896 fseek(lb, f[i - 1], SEEK_SET); 897 nc = f[i] - f[i - 1]; 898 if (opt != D_IFDEF) 899 prints(s); 900 col = 0; 901 for (j = 0; j < nc; j++) { 902 c = getc(lb); 903 if (c == '\t' && tflag) 904 do 905 putchar(' '); 906 while (++col & 7); 907 else { 908 putchar(c); 909 col++; 910 } 911 } 912 } 913 } 914 915 #define POW2 /* define only if HALFLONG is 2**n */ 916 #define HALFLONG 16 917 #define low(x) (x&((1L<<HALFLONG)-1)) 918 #define high(x) (x>>HALFLONG) 919 920 /* 921 * hashing has the effect of 922 * arranging line in 7-bit bytes and then 923 * summing 1-s complement in 16-bit hunks 924 */ 925 static int 926 readhash(FILE *f) 927 { 928 unsigned int shift; 929 int t, space; 930 long sum; 931 932 sum = 1; 933 space = 0; 934 if (!bflag && !wflag) { 935 if (iflag) 936 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 937 if (t == -1) 938 return (0); 939 sum += (long)chrtran[t] << (shift 940 #ifdef POW2 941 &= HALFLONG - 1); 942 #else 943 %= HALFLONG); 944 #endif 945 } 946 else 947 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 948 if (t == -1) 949 return (0); 950 sum += (long)t << (shift 951 #ifdef POW2 952 &= HALFLONG - 1); 953 #else 954 %= HALFLONG); 955 #endif 956 } 957 } else { 958 for (shift = 0;;) { 959 switch (t = getc(f)) { 960 case -1: 961 return (0); 962 case '\t': 963 case ' ': 964 space++; 965 continue; 966 default: 967 if (space && !wflag) { 968 shift += 7; 969 space = 0; 970 } 971 sum += (long)chrtran[t] << (shift 972 #ifdef POW2 973 &= HALFLONG - 1); 974 #else 975 %= HALFLONG); 976 #endif 977 shift += 7; 978 continue; 979 case '\n': 980 break; 981 } 982 break; 983 } 984 } 985 sum = low(sum) + high(sum); 986 return ((short) low(sum) + (short) high(sum)); 987 } 988 989 static int 990 asciifile(FILE *f) 991 { 992 char buf[BUFSIZ], *cp; 993 int cnt; 994 995 if (aflag) 996 return (1); 997 998 fseek(f, 0L, SEEK_SET); 999 cnt = fread(buf, 1, BUFSIZ, f); 1000 cp = buf; 1001 while (--cnt >= 0) 1002 if (*cp++ & 0200) 1003 return (0); 1004 return (1); 1005 } 1006 1007 /* dump accumulated "context" diff changes */ 1008 static void 1009 dump_context_vec(void) 1010 { 1011 struct context_vec *cvp = context_vec_start; 1012 int lowa, upb, lowc, upd, do_output; 1013 int a, b, c, d; 1014 char ch; 1015 1016 if (context_vec_start > context_vec_ptr) 1017 return; 1018 1019 b = d = 0; /* gcc */ 1020 lowa = max(1, cvp->a - context); 1021 upb = min(len[0], context_vec_ptr->b + context); 1022 lowc = max(1, cvp->c - context); 1023 upd = min(len[1], context_vec_ptr->d + context); 1024 1025 printf("***************\n*** "); 1026 range(lowa, upb, ","); 1027 printf(" ****\n"); 1028 1029 /* 1030 * output changes to the "old" file. The first loop suppresses 1031 * output if there were no changes to the "old" file (we'll see 1032 * the "old" lines as context in the "new" list). 1033 */ 1034 do_output = 0; 1035 for (; cvp <= context_vec_ptr; cvp++) 1036 if (cvp->a <= cvp->b) { 1037 cvp = context_vec_start; 1038 do_output++; 1039 break; 1040 } 1041 if (do_output) { 1042 while (cvp <= context_vec_ptr) { 1043 a = cvp->a; 1044 b = cvp->b; 1045 c = cvp->c; 1046 d = cvp->d; 1047 1048 if (a <= b && c <= d) 1049 ch = 'c'; 1050 else 1051 ch = (a <= b) ? 'd' : 'a'; 1052 1053 if (ch == 'a') 1054 fetch(ixold, lowa, b, input[0], " ", 0); 1055 else { 1056 fetch(ixold, lowa, a - 1, input[0], " ", 0); 1057 fetch(ixold, a, b, input[0], 1058 ch == 'c' ? "! " : "- ", 0); 1059 } 1060 lowa = b + 1; 1061 cvp++; 1062 } 1063 fetch(ixold, b + 1, upb, input[0], " ", 0); 1064 } 1065 /* output changes to the "new" file */ 1066 printf("--- "); 1067 range(lowc, upd, ","); 1068 printf(" ----\n"); 1069 1070 do_output = 0; 1071 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1072 if (cvp->c <= cvp->d) { 1073 cvp = context_vec_start; 1074 do_output++; 1075 break; 1076 } 1077 if (do_output) { 1078 while (cvp <= context_vec_ptr) { 1079 a = cvp->a; 1080 b = cvp->b; 1081 c = cvp->c; 1082 d = cvp->d; 1083 1084 if (a <= b && c <= d) 1085 ch = 'c'; 1086 else 1087 ch = (a <= b) ? 'd' : 'a'; 1088 1089 if (ch == 'd') 1090 fetch(ixnew, lowc, d, input[1], " ", 0); 1091 else { 1092 fetch(ixnew, lowc, c - 1, input[1], " ", 0); 1093 fetch(ixnew, c, d, input[1], 1094 ch == 'c' ? "! " : "+ ", 0); 1095 } 1096 lowc = d + 1; 1097 cvp++; 1098 } 1099 fetch(ixnew, d + 1, upd, input[1], " ", 0); 1100 } 1101 context_vec_ptr = context_vec_start - 1; 1102 } 1103 1104 /* dump accumulated "unified" diff changes */ 1105 static void 1106 dump_unified_vec(void) 1107 { 1108 struct context_vec *cvp = context_vec_start; 1109 int lowa, upb, lowc, upd; 1110 int a, b, c, d; 1111 char ch; 1112 1113 if (context_vec_start > context_vec_ptr) 1114 return; 1115 1116 b = d = 0; /* gcc */ 1117 lowa = max(1, cvp->a - context); 1118 upb = min(len[0], context_vec_ptr->b + context); 1119 lowc = max(1, cvp->c - context); 1120 upd = min(len[1], context_vec_ptr->d + context); 1121 1122 printf("@@ -%d,%d +%d,%d @@\n", lowa, upb - lowa + 1, 1123 lowc, upd - lowc + 1); 1124 1125 /* 1126 * Output changes in "unified" diff format--the old and new lines 1127 * are printed together. 1128 */ 1129 for (; cvp <= context_vec_ptr; cvp++) { 1130 a = cvp->a; 1131 b = cvp->b; 1132 c = cvp->c; 1133 d = cvp->d; 1134 1135 /* 1136 * c: both new and old changes 1137 * d: only changes in the old file 1138 * a: only changes in the new file 1139 */ 1140 if (a <= b && c <= d) 1141 ch = 'c'; 1142 else 1143 ch = (a <= b) ? 'd' : 'a'; 1144 1145 switch (ch) { 1146 case 'c': 1147 fetch(ixold, lowa, a - 1, input[0], " ", 0); 1148 fetch(ixold, a, b, input[0], "-", 0); 1149 fetch(ixnew, c, d, input[1], "+", 0); 1150 break; 1151 case 'd': 1152 fetch(ixold, lowa, a - 1, input[0], " ", 0); 1153 fetch(ixold, a, b, input[0], "-", 0); 1154 break; 1155 case 'a': 1156 fetch(ixnew, lowc, c - 1, input[1], " ", 0); 1157 fetch(ixnew, c, d, input[1], "+", 0); 1158 break; 1159 } 1160 lowa = b + 1; 1161 lowc = d + 1; 1162 } 1163 fetch(ixnew, d + 1, upd, input[1], " ", 0); 1164 1165 context_vec_ptr = context_vec_start - 1; 1166 } 1167