1 /* $OpenBSD: diffreg.c,v 1.26 2003/07/04 17:37:07 millert Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 #include <sys/types.h> 38 39 #include <stdlib.h> 40 #include <unistd.h> 41 #include <fcntl.h> 42 #include <string.h> 43 44 #include "diff.h" 45 #include "pathnames.h" 46 47 #if 0 48 static char const sccsid[] = "@(#)diffreg.c 4.21 4/6/90"; 49 #endif 50 51 /* 52 * diff - compare two files. 53 */ 54 55 /* 56 * Uses an algorithm due to Harold Stone, which finds 57 * a pair of longest identical subsequences in the two 58 * files. 59 * 60 * The major goal is to generate the match vector J. 61 * J[i] is the index of the line in file1 corresponding 62 * to line i file0. J[i] = 0 if there is no 63 * such line in file1. 64 * 65 * Lines are hashed so as to work in core. All potential 66 * matches are located by sorting the lines of each file 67 * on the hash (called ``value''). In particular, this 68 * collects the equivalence classes in file1 together. 69 * Subroutine equiv replaces the value of each line in 70 * file0 by the index of the first element of its 71 * matching equivalence in (the reordered) file1. 72 * To save space equiv squeezes file1 into a single 73 * array member in which the equivalence classes 74 * are simply concatenated, except that their first 75 * members are flagged by changing sign. 76 * 77 * Next the indices that point into member are unsorted into 78 * array class according to the original order of file0. 79 * 80 * The cleverness lies in routine stone. This marches 81 * through the lines of file0, developing a vector klist 82 * of "k-candidates". At step i a k-candidate is a matched 83 * pair of lines x,y (x in file0 y in file1) such that 84 * there is a common subsequence of length k 85 * between the first i lines of file0 and the first y 86 * lines of file1, but there is no such subsequence for 87 * any smaller y. x is the earliest possible mate to y 88 * that occurs in such a subsequence. 89 * 90 * Whenever any of the members of the equivalence class of 91 * lines in file1 matable to a line in file0 has serial number 92 * less than the y of some k-candidate, that k-candidate 93 * with the smallest such y is replaced. The new 94 * k-candidate is chained (via pred) to the current 95 * k-1 candidate so that the actual subsequence can 96 * be recovered. When a member has serial number greater 97 * that the y of all k-candidates, the klist is extended. 98 * At the end, the longest subsequence is pulled out 99 * and placed in the array J by unravel 100 * 101 * With J in hand, the matches there recorded are 102 * check'ed against reality to assure that no spurious 103 * matches have crept in due to hashing. If they have, 104 * they are broken, and "jackpot" is recorded--a harmless 105 * matter except that a true match for a spuriously 106 * mated line may now be unnecessarily reported as a change. 107 * 108 * Much of the complexity of the program comes simply 109 * from trying to minimize core utilization and 110 * maximize the range of doable problems by dynamically 111 * allocating what is needed and reusing what is not. 112 * The core requirements for problems larger than somewhat 113 * are (in words) 2*length(file0) + length(file1) + 114 * 3*(number of k-candidates installed), typically about 115 * 6n words for files of length n. 116 */ 117 118 #define prints(s) fputs(s,stdout) 119 120 FILE *input[2]; 121 122 struct cand { 123 int x; 124 int y; 125 int pred; 126 } cand; 127 128 struct line { 129 int serial; 130 int value; 131 } *file[2]; 132 133 int len[2]; 134 struct line *sfile[2]; /* shortened by pruning common prefix and suffix */ 135 int slen[2]; 136 int pref, suff; /* length of prefix and suffix */ 137 int inifdef; /* whether or not we are in a #ifdef block */ 138 int *class; /* will be overlaid on file[0] */ 139 int *member; /* will be overlaid on file[1] */ 140 int *klist; /* will be overlaid on file[0] after class */ 141 struct cand *clist; /* merely a free storage pot for candidates */ 142 int clen = 0; 143 int *J; /* will be overlaid on class */ 144 long *ixold; /* will be overlaid on klist */ 145 long *ixnew; /* will be overlaid on file[1] */ 146 u_char *chrtran; /* translation table for case-folding */ 147 148 static void fetch(long *, int, int, FILE *, char *, int); 149 static void output(void); 150 static void check(void); 151 static void range(int, int, char *); 152 static void dump_context_vec(void); 153 static void dump_unified_vec(void); 154 static void prepare(int, FILE *); 155 static void prune(void); 156 static void equiv(struct line *, int, struct line *, int, int *); 157 static void unravel(int); 158 static void unsort(struct line *, int, int *); 159 static void change(int, int, int, int); 160 static void sort(struct line *, int); 161 static int newcand(int, int, int); 162 static int search(int *, int, int); 163 static int skipline(int); 164 static int asciifile(FILE *); 165 static int stone(int *, int, int *, int *); 166 static int readhash(FILE *); 167 168 /* 169 * chrtran points to one of 2 translation tables: cup2low if folding upper to 170 * lower case clow2low if not folding case 171 */ 172 u_char clow2low[256] = { 173 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 174 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 175 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 176 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 177 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 178 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 179 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 180 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 181 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 182 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 183 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 184 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 185 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 186 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 187 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 188 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 189 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 190 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 191 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 192 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 193 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 194 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 195 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 196 0xfd, 0xfe, 0xff 197 }; 198 199 u_char cup2low[256] = { 200 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 201 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 202 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 203 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 204 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 205 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 206 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 207 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 208 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 209 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 210 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 211 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 212 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 213 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 214 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 215 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 216 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 217 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 218 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 219 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 220 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 221 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 222 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 223 0xfd, 0xfe, 0xff 224 }; 225 226 void 227 diffreg(void) 228 { 229 char buf1[BUFSIZ], buf2[BUFSIZ]; 230 FILE *f1, *f2; 231 int i, j; 232 233 chrtran = (iflag ? cup2low : clow2low); 234 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) 235 errorx("can't specify - -"); 236 if (S_ISDIR(stb1.st_mode)) { 237 file1 = splice(file1, file2); 238 if (stat(file1, &stb1) < 0) 239 error("%s", file1); 240 } else if (strcmp(file1, "-") == 0 || 241 (!S_ISREG(stb1.st_mode) && strcmp(file1, _PATH_DEVNULL) != 0)) { 242 file1 = copytemp(file1, 1); 243 if (stat(file1, &stb1) < 0) 244 error("%s", file1); 245 } 246 if (S_ISDIR(stb2.st_mode)) { 247 file2 = splice(file2, file1); 248 if (stat(file2, &stb2) < 0) 249 error("%s", file2); 250 } else if (strcmp(file2, "-") == 0 || 251 (!S_ISREG(stb2.st_mode) && strcmp(file2, _PATH_DEVNULL) != 0)) { 252 file2 = copytemp(file2, 2); 253 if (stat(file2, &stb2) < 0) 254 error("%s", file2); 255 } 256 if ((f1 = fopen(file1, "r")) == NULL) 257 error("%s", file1); 258 if ((f2 = fopen(file2, "r")) == NULL) 259 error("%s", file2); 260 if ((stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT) || 261 stb1.st_size != stb2.st_size) 262 goto notsame; 263 for (;;) { 264 i = fread(buf1, 1, BUFSIZ, f1); 265 j = fread(buf2, 1, BUFSIZ, f2); 266 if (i < 0 || j < 0 || i != j) 267 goto notsame; 268 if (i == 0 && j == 0) { 269 fclose(f1); 270 fclose(f2); 271 status = 0; /* files don't differ */ 272 goto same; 273 } 274 for (j = 0; j < i; j++) 275 if (buf1[j] != buf2[j]) 276 goto notsame; 277 } 278 notsame: 279 /* 280 * Files certainly differ at this point; set status accordingly 281 */ 282 status = 1; 283 if (!asciifile(f1) || !asciifile(f2)) { 284 printf("Binary files %s and %s differ\n", file1, file2); 285 exit(status); 286 } 287 prepare(0, f1); 288 prepare(1, f2); 289 fclose(f1); 290 fclose(f2); 291 prune(); 292 sort(sfile[0], slen[0]); 293 sort(sfile[1], slen[1]); 294 295 member = (int *)file[1]; 296 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 297 member = erealloc(member, (slen[1] + 2) * sizeof(int)); 298 299 class = (int *)file[0]; 300 unsort(sfile[0], slen[0], class); 301 class = erealloc(class, (slen[0] + 2) * sizeof(int)); 302 303 klist = emalloc((slen[0] + 2) * sizeof(int)); 304 clist = emalloc(sizeof(cand)); 305 i = stone(class, slen[0], member, klist); 306 free(member); 307 free(class); 308 309 J = emalloc((len[0] + 2) * sizeof(int)); 310 unravel(klist[i]); 311 free(clist); 312 free(klist); 313 314 ixold = emalloc((len[0] + 2) * sizeof(long)); 315 ixnew = emalloc((len[1] + 2) * sizeof(long)); 316 check(); 317 output(); 318 status = anychange; 319 same: 320 if (anychange == 0 && (opt == D_CONTEXT || opt == D_UNIFIED)) 321 printf("No differences encountered\n"); 322 } 323 324 char *tempfiles[2]; 325 326 char * 327 copytemp(const char *file, int n) 328 { 329 char buf[BUFSIZ], *tempdir, *tempfile; 330 int i, ifd, ofd; 331 332 if (n != 1 && n != 2) 333 return (NULL); 334 335 if (strcmp(file, "-") == 0) 336 ifd = STDIN_FILENO; 337 else if ((ifd = open(file, O_RDONLY, 0644)) < 0) 338 error("%s", file); 339 340 if ((tempdir = getenv("TMPDIR")) == NULL) 341 tempdir = _PATH_TMP; 342 if (asprintf(&tempfile, "%s/diff%d.XXXXXXXX", tempdir, n) == -1) 343 error(NULL); 344 tempfiles[n - 1] = tempfile; 345 346 signal(SIGHUP, done); 347 signal(SIGINT, done); 348 signal(SIGPIPE, done); 349 signal(SIGTERM, done); 350 ofd = mkstemp(tempfile); 351 if (ofd < 0) 352 error("%s", tempfile); 353 while ((i = read(ifd, buf, BUFSIZ)) > 0) { 354 if (write(ofd, buf, i) != i) 355 error("%s", tempfile); 356 } 357 close(ifd); 358 close(ofd); 359 return (tempfile); 360 } 361 362 char * 363 splice(char *dir, char *file) 364 { 365 char *tail, *buf; 366 size_t len; 367 368 if (!strcmp(file, "-")) 369 errorx("can't specify - with other arg directory"); 370 tail = strrchr(file, '/'); 371 if (tail == NULL) 372 tail = file; 373 else 374 tail++; 375 len = strlen(dir) + 1 + strlen(tail) + 1; 376 buf = emalloc(len); 377 snprintf(buf, len, "%s/%s", dir, tail); 378 return (buf); 379 } 380 381 static void 382 prepare(int i, FILE *fd) 383 { 384 struct line *p; 385 int j, h; 386 387 fseek(fd, 0L, SEEK_SET); 388 p = emalloc(3 * sizeof(struct line)); 389 for (j = 0; (h = readhash(fd));) { 390 p = erealloc(p, (++j + 3) * sizeof(struct line)); 391 p[j].value = h; 392 } 393 len[i] = j; 394 file[i] = p; 395 } 396 397 static void 398 prune(void) 399 { 400 int i, j; 401 402 for (pref = 0; pref < len[0] && pref < len[1] && 403 file[0][pref + 1].value == file[1][pref + 1].value; 404 pref++) 405 ; 406 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 407 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 408 suff++) 409 ; 410 for (j = 0; j < 2; j++) { 411 sfile[j] = file[j] + pref; 412 slen[j] = len[j] - pref - suff; 413 for (i = 0; i <= slen[j]; i++) 414 sfile[j][i].serial = i; 415 } 416 } 417 418 static void 419 equiv(struct line *a, int n, struct line *b, int m, int *c) 420 { 421 int i, j; 422 423 i = j = 1; 424 while (i <= n && j <= m) { 425 if (a[i].value < b[j].value) 426 a[i++].value = 0; 427 else if (a[i].value == b[j].value) 428 a[i++].value = j; 429 else 430 j++; 431 } 432 while (i <= n) 433 a[i++].value = 0; 434 b[m + 1].value = 0; 435 j = 0; 436 while (++j <= m) { 437 c[j] = -b[j].serial; 438 while (b[j + 1].value == b[j].value) { 439 j++; 440 c[j] = b[j].serial; 441 } 442 } 443 c[j] = -1; 444 } 445 446 static int 447 stone(int *a, int n, int *b, int *c) 448 { 449 int i, k, y, j, l; 450 int oldc, tc, oldl; 451 452 k = 0; 453 c[0] = newcand(0, 0, 0); 454 for (i = 1; i <= n; i++) { 455 j = a[i]; 456 if (j == 0) 457 continue; 458 y = -b[j]; 459 oldl = 0; 460 oldc = c[0]; 461 do { 462 if (y <= clist[oldc].y) 463 continue; 464 l = search(c, k, y); 465 if (l != oldl + 1) 466 oldc = c[l - 1]; 467 if (l <= k) { 468 if (clist[c[l]].y <= y) 469 continue; 470 tc = c[l]; 471 c[l] = newcand(i, y, oldc); 472 oldc = tc; 473 oldl = l; 474 } else { 475 c[l] = newcand(i, y, oldc); 476 k++; 477 break; 478 } 479 } while ((y = b[++j]) > 0); 480 } 481 return (k); 482 } 483 484 static int 485 newcand(int x, int y, int pred) 486 { 487 struct cand *q; 488 489 clist = erealloc(clist, ++clen * sizeof(cand)); 490 q = clist + clen - 1; 491 q->x = x; 492 q->y = y; 493 q->pred = pred; 494 return (clen - 1); 495 } 496 497 static int 498 search(int *c, int k, int y) 499 { 500 int i, j, l, t; 501 502 if (clist[c[k]].y < y) /* quick look for typical case */ 503 return (k + 1); 504 i = 0; 505 j = k + 1; 506 while (1) { 507 l = i + j; 508 if ((l >>= 1) <= i) 509 break; 510 t = clist[c[l]].y; 511 if (t > y) 512 j = l; 513 else if (t < y) 514 i = l; 515 else 516 return (l); 517 } 518 return (l + 1); 519 } 520 521 static void 522 unravel(int p) 523 { 524 struct cand *q; 525 int i; 526 527 for (i = 0; i <= len[0]; i++) 528 J[i] = i <= pref ? i : 529 i > len[0] - suff ? i + len[1] - len[0] : 0; 530 for (q = clist + p; q->y != 0; q = clist + q->pred) 531 J[q->x + pref] = q->y + pref; 532 } 533 534 /* 535 * Check does double duty: 536 * 1. ferret out any fortuitous correspondences due 537 * to confounding by hashing (which result in "jackpot") 538 * 2. collect random access indexes to the two files 539 */ 540 static void 541 check(void) 542 { 543 int i, j, jackpot, c, d; 544 long ctold, ctnew; 545 546 if ((input[0] = fopen(file1, "r")) == NULL) 547 error("%s", file1); 548 if ((input[1] = fopen(file2, "r")) == NULL) 549 error("%s", file2); 550 j = 1; 551 ixold[0] = ixnew[0] = 0; 552 jackpot = 0; 553 ctold = ctnew = 0; 554 for (i = 1; i <= len[0]; i++) { 555 if (J[i] == 0) { 556 ixold[i] = ctold += skipline(0); 557 continue; 558 } 559 while (j < J[i]) { 560 ixnew[j] = ctnew += skipline(1); 561 j++; 562 } 563 if (bflag || wflag || iflag) { 564 for (;;) { 565 c = getc(input[0]); 566 d = getc(input[1]); 567 ctold++; 568 ctnew++; 569 if (bflag && isspace(c) && isspace(d)) { 570 do { 571 if (c == '\n') 572 break; 573 ctold++; 574 } while (isspace(c = getc(input[0]))); 575 do { 576 if (d == '\n') 577 break; 578 ctnew++; 579 } while (isspace(d = getc(input[1]))); 580 } else if (wflag) { 581 while (isspace(c) && c != '\n') { 582 c = getc(input[0]); 583 ctold++; 584 } 585 while (isspace(d) && d != '\n') { 586 d = getc(input[1]); 587 ctnew++; 588 } 589 } 590 if (chrtran[c] != chrtran[d]) { 591 jackpot++; 592 J[i] = 0; 593 if (c != '\n') 594 ctold += skipline(0); 595 if (d != '\n') 596 ctnew += skipline(1); 597 break; 598 } 599 if (c == '\n') 600 break; 601 } 602 } else { 603 for (;;) { 604 ctold++; 605 ctnew++; 606 if ((c = getc(input[0])) != (d = getc(input[1]))) { 607 /* jackpot++; */ 608 J[i] = 0; 609 if (c != '\n') 610 ctold += skipline(0); 611 if (d != '\n') 612 ctnew += skipline(1); 613 break; 614 } 615 if (c == '\n') 616 break; 617 } 618 } 619 ixold[i] = ctold; 620 ixnew[j] = ctnew; 621 j++; 622 } 623 for (; j <= len[1]; j++) { 624 ixnew[j] = ctnew += skipline(1); 625 } 626 fclose(input[0]); 627 fclose(input[1]); 628 /* 629 * if (jackpot) 630 * fprintf(stderr, "jackpot\n"); 631 */ 632 } 633 634 /* shellsort CACM #201 */ 635 static void 636 sort(struct line *a, int n) 637 { 638 struct line *ai, *aim, w; 639 int j, m = 0, k; 640 641 if (n == 0) 642 return; 643 for (j = 1; j <= n; j *= 2) 644 m = 2 * j - 1; 645 for (m /= 2; m != 0; m /= 2) { 646 k = n - m; 647 for (j = 1; j <= k; j++) { 648 for (ai = &a[j]; ai > a; ai -= m) { 649 aim = &ai[m]; 650 if (aim < ai) 651 break; /* wraparound */ 652 if (aim->value > ai[0].value || 653 (aim->value == ai[0].value && 654 aim->serial > ai[0].serial)) 655 break; 656 w.value = ai[0].value; 657 ai[0].value = aim->value; 658 aim->value = w.value; 659 w.serial = ai[0].serial; 660 ai[0].serial = aim->serial; 661 aim->serial = w.serial; 662 } 663 } 664 } 665 } 666 667 static void 668 unsort(struct line *f, int l, int *b) 669 { 670 int *a, i; 671 672 a = emalloc((l + 1) * sizeof(int)); 673 for (i = 1; i <= l; i++) 674 a[f[i].serial] = f[i].value; 675 for (i = 1; i <= l; i++) 676 b[i] = a[i]; 677 free(a); 678 } 679 680 static int 681 skipline(int f) 682 { 683 int i, c; 684 685 for (i = 1; (c = getc(input[f])) != '\n'; i++) 686 if (c < 0) 687 return (i); 688 return (i); 689 } 690 691 static void 692 output(void) 693 { 694 int m, i0, i1, j0, j1; 695 696 input[0] = fopen(file1, "r"); 697 input[1] = fopen(file2, "r"); 698 m = len[0]; 699 J[0] = 0; 700 J[m + 1] = len[1] + 1; 701 if (opt != D_EDIT) { 702 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 703 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 704 i0++; 705 j0 = J[i0 - 1] + 1; 706 i1 = i0 - 1; 707 while (i1 < m && J[i1 + 1] == 0) 708 i1++; 709 j1 = J[i1 + 1] - 1; 710 J[i1] = j1; 711 change(i0, i1, j0, j1); 712 } 713 } else { 714 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 715 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 716 i0--; 717 j0 = J[i0 + 1] - 1; 718 i1 = i0 + 1; 719 while (i1 > 1 && J[i1 - 1] == 0) 720 i1--; 721 j1 = J[i1 - 1] + 1; 722 J[i1] = j1; 723 change(i1, i0, j1, j0); 724 } 725 } 726 if (m == 0) 727 change(1, 0, 1, len[1]); 728 if (opt == D_IFDEF) { 729 for (;;) { 730 #define c i0 731 c = getc(input[0]); 732 if (c < 0) 733 return; 734 putchar(c); 735 } 736 #undef c 737 } 738 if (anychange != 0) { 739 if (opt == D_CONTEXT) 740 dump_context_vec(); 741 else if (opt == D_UNIFIED) 742 dump_unified_vec(); 743 } 744 } 745 746 /* 747 * The following struct is used to record change information when 748 * doing a "context" diff. (see routine "change" to understand the 749 * highly mneumonic field names) 750 */ 751 struct context_vec { 752 int a; /* start line in old file */ 753 int b; /* end line in old file */ 754 int c; /* start line in new file */ 755 int d; /* end line in new file */ 756 }; 757 758 struct context_vec *context_vec_start, *context_vec_end, *context_vec_ptr; 759 760 #define MAX_CONTEXT 128 761 762 /* 763 * indicate that there is a difference between lines a and b of the from file 764 * to get to lines c to d of the to file. If a is greater then b then there 765 * are no lines in the from file involved and this means that there were 766 * lines appended (beginning at b). If c is greater than d then there are 767 * lines missing from the to file. 768 */ 769 static void 770 change(int a, int b, int c, int d) 771 { 772 struct stat stbuf; 773 774 if (opt != D_IFDEF && a > b && c > d) 775 return; 776 if (anychange == 0) { 777 anychange = 1; 778 if (opt == D_CONTEXT || opt == D_UNIFIED) { 779 stat(file1, &stbuf); 780 printf("%s %s %s", opt == D_CONTEXT ? "***" : "---", 781 file1, ctime(&stbuf.st_mtime)); 782 stat(file2, &stbuf); 783 printf("%s %s %s", opt == D_CONTEXT ? "---" : "+++", 784 file2, ctime(&stbuf.st_mtime)); 785 context_vec_start = emalloc(MAX_CONTEXT * 786 sizeof(struct context_vec)); 787 context_vec_end = context_vec_start + MAX_CONTEXT; 788 context_vec_ptr = context_vec_start - 1; 789 } 790 } 791 if (opt == D_CONTEXT || opt == D_UNIFIED) { 792 /* 793 * If this new change is within 'context' lines of 794 * the previous change, just add it to the change 795 * record. If the record is full or if this 796 * change is more than 'context' lines from the previous 797 * change, dump the record, reset it & add the new change. 798 */ 799 if (context_vec_ptr >= context_vec_end || 800 (context_vec_ptr >= context_vec_start && 801 a > (context_vec_ptr->b + 2 * context) && 802 c > (context_vec_ptr->d + 2 * context))) { 803 if (opt == D_CONTEXT) 804 dump_context_vec(); 805 else 806 dump_unified_vec(); 807 } 808 context_vec_ptr++; 809 context_vec_ptr->a = a; 810 context_vec_ptr->b = b; 811 context_vec_ptr->c = c; 812 context_vec_ptr->d = d; 813 return; 814 } 815 switch (opt) { 816 817 case D_NORMAL: 818 case D_EDIT: 819 range(a, b, ","); 820 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 821 if (opt == D_NORMAL) 822 range(c, d, ","); 823 putchar('\n'); 824 break; 825 case D_REVERSE: 826 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 827 range(a, b, " "); 828 putchar('\n'); 829 break; 830 case D_NREVERSE: 831 if (a > b) 832 printf("a%d %d\n", b, d - c + 1); 833 else { 834 printf("d%d %d\n", a, b - a + 1); 835 if (!(c > d)) 836 /* add changed lines */ 837 printf("a%d %d\n", b, d - c + 1); 838 } 839 break; 840 } 841 if (opt == D_NORMAL || opt == D_IFDEF) { 842 fetch(ixold, a, b, input[0], "< ", 1); 843 if (a <= b && c <= d && opt == D_NORMAL) 844 prints("---\n"); 845 } 846 fetch(ixnew, c, d, input[1], opt == D_NORMAL ? "> " : "", 0); 847 if ((opt == D_EDIT || opt == D_REVERSE) && c <= d) 848 prints(".\n"); 849 if (inifdef) { 850 fprintf(stdout, "#endif /* %s */\n", ifdefname); 851 inifdef = 0; 852 } 853 } 854 855 static void 856 range(int a, int b, char *separator) 857 { 858 printf("%d", a > b ? b : a); 859 if (a < b) 860 printf("%s%d", separator, b); 861 } 862 863 static void 864 fetch(long *f, int a, int b, FILE *lb, char *s, int oldfile) 865 { 866 int i, j, c, col, nc; 867 868 /* 869 * When doing #ifdef's, copy down to current line 870 * if this is the first file, so that stuff makes it to output. 871 */ 872 if (opt == D_IFDEF && oldfile) { 873 long curpos = ftell(lb); 874 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 875 nc = f[a > b ? b : a - 1] - curpos; 876 for (i = 0; i < nc; i++) 877 putchar(getc(lb)); 878 } 879 if (a > b) 880 return; 881 if (opt == D_IFDEF) { 882 if (inifdef) { 883 fprintf(stdout, "#else /* %s%s */\n", 884 oldfile == 1 ? "!" : "", ifdefname); 885 } else { 886 if (oldfile) 887 fprintf(stdout, "#ifndef %s\n", ifdefname); 888 else 889 fprintf(stdout, "#ifdef %s\n", ifdefname); 890 } 891 inifdef = 1 + oldfile; 892 } 893 for (i = a; i <= b; i++) { 894 fseek(lb, f[i - 1], SEEK_SET); 895 nc = f[i] - f[i - 1]; 896 if (opt != D_IFDEF) 897 prints(s); 898 col = 0; 899 for (j = 0; j < nc; j++) { 900 c = getc(lb); 901 if (c == '\t' && tflag) 902 do 903 putchar(' '); 904 while (++col & 7); 905 else { 906 putchar(c); 907 col++; 908 } 909 } 910 } 911 } 912 913 #define POW2 /* define only if HALFLONG is 2**n */ 914 #define HALFLONG 16 915 #define low(x) (x&((1L<<HALFLONG)-1)) 916 #define high(x) (x>>HALFLONG) 917 918 /* 919 * hashing has the effect of 920 * arranging line in 7-bit bytes and then 921 * summing 1-s complement in 16-bit hunks 922 */ 923 static int 924 readhash(FILE *f) 925 { 926 unsigned int shift; 927 int t, space; 928 long sum; 929 930 sum = 1; 931 space = 0; 932 if (!bflag && !wflag) { 933 if (iflag) 934 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 935 if (t == -1) 936 return (0); 937 sum += (long)chrtran[t] << (shift 938 #ifdef POW2 939 &= HALFLONG - 1); 940 #else 941 %= HALFLONG); 942 #endif 943 } 944 else 945 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 946 if (t == -1) 947 return (0); 948 sum += (long)t << (shift 949 #ifdef POW2 950 &= HALFLONG - 1); 951 #else 952 %= HALFLONG); 953 #endif 954 } 955 } else { 956 for (shift = 0;;) { 957 switch (t = getc(f)) { 958 case -1: 959 return (0); 960 case '\t': 961 case ' ': 962 space++; 963 continue; 964 default: 965 if (space && !wflag) { 966 shift += 7; 967 space = 0; 968 } 969 sum += (long)chrtran[t] << (shift 970 #ifdef POW2 971 &= HALFLONG - 1); 972 #else 973 %= HALFLONG); 974 #endif 975 shift += 7; 976 continue; 977 case '\n': 978 break; 979 } 980 break; 981 } 982 } 983 sum = low(sum) + high(sum); 984 return ((short) low(sum) + (short) high(sum)); 985 } 986 987 static int 988 asciifile(FILE *f) 989 { 990 char buf[BUFSIZ], *cp; 991 int cnt; 992 993 if (aflag) 994 return (1); 995 996 fseek(f, 0L, SEEK_SET); 997 cnt = fread(buf, 1, BUFSIZ, f); 998 cp = buf; 999 while (--cnt >= 0) 1000 if (*cp++ & 0200) 1001 return (0); 1002 return (1); 1003 } 1004 1005 /* dump accumulated "context" diff changes */ 1006 static void 1007 dump_context_vec(void) 1008 { 1009 struct context_vec *cvp = context_vec_start; 1010 int lowa, upb, lowc, upd, do_output; 1011 int a, b, c, d; 1012 char ch; 1013 1014 if (context_vec_start > context_vec_ptr) 1015 return; 1016 1017 b = d = 0; /* gcc */ 1018 lowa = max(1, cvp->a - context); 1019 upb = min(len[0], context_vec_ptr->b + context); 1020 lowc = max(1, cvp->c - context); 1021 upd = min(len[1], context_vec_ptr->d + context); 1022 1023 printf("***************\n*** "); 1024 range(lowa, upb, ","); 1025 printf(" ****\n"); 1026 1027 /* 1028 * output changes to the "old" file. The first loop suppresses 1029 * output if there were no changes to the "old" file (we'll see 1030 * the "old" lines as context in the "new" list). 1031 */ 1032 do_output = 0; 1033 for (; cvp <= context_vec_ptr; cvp++) 1034 if (cvp->a <= cvp->b) { 1035 cvp = context_vec_start; 1036 do_output++; 1037 break; 1038 } 1039 if (do_output) { 1040 while (cvp <= context_vec_ptr) { 1041 a = cvp->a; 1042 b = cvp->b; 1043 c = cvp->c; 1044 d = cvp->d; 1045 1046 if (a <= b && c <= d) 1047 ch = 'c'; 1048 else 1049 ch = (a <= b) ? 'd' : 'a'; 1050 1051 if (ch == 'a') 1052 fetch(ixold, lowa, b, input[0], " ", 0); 1053 else { 1054 fetch(ixold, lowa, a - 1, input[0], " ", 0); 1055 fetch(ixold, a, b, input[0], 1056 ch == 'c' ? "! " : "- ", 0); 1057 } 1058 lowa = b + 1; 1059 cvp++; 1060 } 1061 fetch(ixold, b + 1, upb, input[0], " ", 0); 1062 } 1063 /* output changes to the "new" file */ 1064 printf("--- "); 1065 range(lowc, upd, ","); 1066 printf(" ----\n"); 1067 1068 do_output = 0; 1069 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1070 if (cvp->c <= cvp->d) { 1071 cvp = context_vec_start; 1072 do_output++; 1073 break; 1074 } 1075 if (do_output) { 1076 while (cvp <= context_vec_ptr) { 1077 a = cvp->a; 1078 b = cvp->b; 1079 c = cvp->c; 1080 d = cvp->d; 1081 1082 if (a <= b && c <= d) 1083 ch = 'c'; 1084 else 1085 ch = (a <= b) ? 'd' : 'a'; 1086 1087 if (ch == 'd') 1088 fetch(ixnew, lowc, d, input[1], " ", 0); 1089 else { 1090 fetch(ixnew, lowc, c - 1, input[1], " ", 0); 1091 fetch(ixnew, c, d, input[1], 1092 ch == 'c' ? "! " : "+ ", 0); 1093 } 1094 lowc = d + 1; 1095 cvp++; 1096 } 1097 fetch(ixnew, d + 1, upd, input[1], " ", 0); 1098 } 1099 context_vec_ptr = context_vec_start - 1; 1100 } 1101 1102 /* dump accumulated "unified" diff changes */ 1103 static void 1104 dump_unified_vec(void) 1105 { 1106 struct context_vec *cvp = context_vec_start; 1107 int lowa, upb, lowc, upd; 1108 int a, b, c, d; 1109 char ch; 1110 1111 if (context_vec_start > context_vec_ptr) 1112 return; 1113 1114 b = d = 0; /* gcc */ 1115 lowa = max(1, cvp->a - context); 1116 upb = min(len[0], context_vec_ptr->b + context); 1117 lowc = max(1, cvp->c - context); 1118 upd = min(len[1], context_vec_ptr->d + context); 1119 1120 printf("@@ -%d,%d +%d,%d @@\n", lowa, upb - lowa + 1, 1121 lowc, upd - lowc + 1); 1122 1123 /* 1124 * Output changes in "unified" diff format--the old and new lines 1125 * are printed together. 1126 */ 1127 for (; cvp <= context_vec_ptr; cvp++) { 1128 a = cvp->a; 1129 b = cvp->b; 1130 c = cvp->c; 1131 d = cvp->d; 1132 1133 /* 1134 * c: both new and old changes 1135 * d: only changes in the old file 1136 * a: only changes in the new file 1137 */ 1138 if (a <= b && c <= d) 1139 ch = 'c'; 1140 else 1141 ch = (a <= b) ? 'd' : 'a'; 1142 1143 switch (ch) { 1144 case 'c': 1145 fetch(ixold, lowa, a - 1, input[0], " ", 0); 1146 fetch(ixold, a, b, input[0], "-", 0); 1147 fetch(ixnew, c, d, input[1], "+", 0); 1148 break; 1149 case 'd': 1150 fetch(ixold, lowa, a - 1, input[0], " ", 0); 1151 fetch(ixold, a, b, input[0], "-", 0); 1152 break; 1153 case 'a': 1154 fetch(ixnew, lowc, c - 1, input[1], " ", 0); 1155 fetch(ixnew, c, d, input[1], "+", 0); 1156 break; 1157 } 1158 lowa = b + 1; 1159 lowc = d + 1; 1160 } 1161 fetch(ixnew, d + 1, upd, input[1], " ", 0); 1162 1163 context_vec_ptr = context_vec_start - 1; 1164 } 1165