1 /* $OpenBSD: diffreg.c,v 1.21 2003/06/26 18:19:29 millert Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 #include <sys/types.h> 38 39 #include <stdlib.h> 40 #include <unistd.h> 41 #include <fcntl.h> 42 #include <string.h> 43 44 #include "diff.h" 45 #include "pathnames.h" 46 47 #if 0 48 static char const sccsid[] = "@(#)diffreg.c 4.21 4/6/90"; 49 #endif 50 51 /* 52 * diff - compare two files. 53 */ 54 55 /* 56 * Uses an algorithm due to Harold Stone, which finds 57 * a pair of longest identical subsequences in the two 58 * files. 59 * 60 * The major goal is to generate the match vector J. 61 * J[i] is the index of the line in file1 corresponding 62 * to line i file0. J[i] = 0 if there is no 63 * such line in file1. 64 * 65 * Lines are hashed so as to work in core. All potential 66 * matches are located by sorting the lines of each file 67 * on the hash (called ``value''). In particular, this 68 * collects the equivalence classes in file1 together. 69 * Subroutine equiv replaces the value of each line in 70 * file0 by the index of the first element of its 71 * matching equivalence in (the reordered) file1. 72 * To save space equiv squeezes file1 into a single 73 * array member in which the equivalence classes 74 * are simply concatenated, except that their first 75 * members are flagged by changing sign. 76 * 77 * Next the indices that point into member are unsorted into 78 * array class according to the original order of file0. 79 * 80 * The cleverness lies in routine stone. This marches 81 * through the lines of file0, developing a vector klist 82 * of "k-candidates". At step i a k-candidate is a matched 83 * pair of lines x,y (x in file0 y in file1) such that 84 * there is a common subsequence of length k 85 * between the first i lines of file0 and the first y 86 * lines of file1, but there is no such subsequence for 87 * any smaller y. x is the earliest possible mate to y 88 * that occurs in such a subsequence. 89 * 90 * Whenever any of the members of the equivalence class of 91 * lines in file1 matable to a line in file0 has serial number 92 * less than the y of some k-candidate, that k-candidate 93 * with the smallest such y is replaced. The new 94 * k-candidate is chained (via pred) to the current 95 * k-1 candidate so that the actual subsequence can 96 * be recovered. When a member has serial number greater 97 * that the y of all k-candidates, the klist is extended. 98 * At the end, the longest subsequence is pulled out 99 * and placed in the array J by unravel 100 * 101 * With J in hand, the matches there recorded are 102 * check'ed against reality to assure that no spurious 103 * matches have crept in due to hashing. If they have, 104 * they are broken, and "jackpot" is recorded--a harmless 105 * matter except that a true match for a spuriously 106 * mated line may now be unnecessarily reported as a change. 107 * 108 * Much of the complexity of the program comes simply 109 * from trying to minimize core utilization and 110 * maximize the range of doable problems by dynamically 111 * allocating what is needed and reusing what is not. 112 * The core requirements for problems larger than somewhat 113 * are (in words) 2*length(file0) + length(file1) + 114 * 3*(number of k-candidates installed), typically about 115 * 6n words for files of length n. 116 */ 117 118 #define prints(s) fputs(s,stdout) 119 120 FILE *input[2]; 121 122 struct cand { 123 int x; 124 int y; 125 int pred; 126 } cand; 127 128 struct line { 129 int serial; 130 int value; 131 } *file[2]; 132 133 int len[2]; 134 struct line *sfile[2]; /* shortened by pruning common prefix and suffix */ 135 int slen[2]; 136 int pref, suff; /* length of prefix and suffix */ 137 int *class; /* will be overlaid on file[0] */ 138 int *member; /* will be overlaid on file[1] */ 139 int *klist; /* will be overlaid on file[0] after class */ 140 struct cand *clist; /* merely a free storage pot for candidates */ 141 int clen = 0; 142 int *J; /* will be overlaid on class */ 143 long *ixold; /* will be overlaid on klist */ 144 long *ixnew; /* will be overlaid on file[1] */ 145 u_char *chrtran; /* translation table for case-folding */ 146 147 static void fetch(long *, int, int, FILE *, char *, int); 148 static void output(void); 149 static void check(void); 150 static void range(int, int, char *); 151 static void dump_context_vec(void); 152 static void dump_unified_vec(void); 153 static void prepare(int, FILE *); 154 static void prune(void); 155 static void equiv(struct line *, int, struct line *, int, int *); 156 static void unravel(int); 157 static void unsort(struct line *, int, int *); 158 static void change(int, int, int, int); 159 static void sort(struct line *, int); 160 static int newcand(int, int, int); 161 static int search(int *, int, int); 162 static int skipline(int); 163 static int asciifile(FILE *); 164 static int stone(int *, int, int *, int *); 165 static int readhash(FILE *); 166 167 /* 168 * chrtran points to one of 2 translation tables: cup2low if folding upper to 169 * lower case clow2low if not folding case 170 */ 171 u_char clow2low[256] = { 172 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 173 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 174 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 175 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 176 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 177 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 178 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 179 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 180 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 181 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 182 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 183 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 184 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 185 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 186 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 187 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 188 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 189 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 190 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 191 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 192 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 193 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 194 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 195 0xfd, 0xfe, 0xff 196 }; 197 198 u_char cup2low[256] = { 199 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 200 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 201 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 202 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 203 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 204 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 205 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 206 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 207 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 208 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 209 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 210 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 211 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 212 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 213 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 214 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 215 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 216 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 217 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 218 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 219 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 220 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 221 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 222 0xfd, 0xfe, 0xff 223 }; 224 225 void 226 diffreg(void) 227 { 228 char buf1[BUFSIZ], buf2[BUFSIZ]; 229 FILE *f1, *f2; 230 int i, j; 231 232 if (hflag) { 233 diffargv[0] = "diffh"; 234 execv(diffh, diffargv); 235 error("%s", diffh); 236 } 237 chrtran = (iflag ? cup2low : clow2low); 238 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) 239 errorx("can't specify - -"); 240 if (S_ISDIR(stb1.st_mode)) { 241 file1 = splice(file1, file2); 242 if (stat(file1, &stb1) < 0) 243 error("%s", file1); 244 } else if (!S_ISREG(stb1.st_mode) || strcmp(file1, "-") == 0) { 245 file1 = copytemp(file1, 1); 246 if (stat(file1, &stb1) < 0) 247 error("%s", file1); 248 } 249 if (S_ISDIR(stb2.st_mode)) { 250 file2 = splice(file2, file1); 251 if (stat(file2, &stb2) < 0) 252 error("%s", file2); 253 } else if (!S_ISREG(stb2.st_mode) || strcmp(file2, "-") == 0) { 254 file2 = copytemp(file2, 2); 255 if (stat(file2, &stb2) < 0) 256 error("%s", file2); 257 } 258 if ((f1 = fopen(file1, "r")) == NULL) 259 error("%s", file1); 260 if ((f2 = fopen(file2, "r")) == NULL) 261 error("%s", file2); 262 if (S_ISREG(stb1.st_mode) && S_ISREG(stb2.st_mode) && 263 stb1.st_size != stb2.st_size) 264 goto notsame; 265 for (;;) { 266 i = fread(buf1, 1, BUFSIZ, f1); 267 j = fread(buf2, 1, BUFSIZ, f2); 268 if (i < 0 || j < 0 || i != j) 269 goto notsame; 270 if (i == 0 && j == 0) { 271 fclose(f1); 272 fclose(f2); 273 status = 0; /* files don't differ */ 274 goto same; 275 } 276 for (j = 0; j < i; j++) 277 if (buf1[j] != buf2[j]) 278 goto notsame; 279 } 280 notsame: 281 /* 282 * Files certainly differ at this point; set status accordingly 283 */ 284 status = 1; 285 if (!asciifile(f1) || !asciifile(f2)) { 286 printf("Binary files %s and %s differ\n", file1, file2); 287 exit(status); 288 } 289 prepare(0, f1); 290 prepare(1, f2); 291 fclose(f1); 292 fclose(f2); 293 prune(); 294 sort(sfile[0], slen[0]); 295 sort(sfile[1], slen[1]); 296 297 member = (int *)file[1]; 298 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 299 member = erealloc(member, (slen[1] + 2) * sizeof(int)); 300 301 class = (int *)file[0]; 302 unsort(sfile[0], slen[0], class); 303 class = erealloc(class, (slen[0] + 2) * sizeof(int)); 304 305 klist = emalloc((slen[0] + 2) * sizeof(int)); 306 clist = emalloc(sizeof(cand)); 307 i = stone(class, slen[0], member, klist); 308 free(member); 309 free(class); 310 311 J = emalloc((len[0] + 2) * sizeof(int)); 312 unravel(klist[i]); 313 free(clist); 314 free(klist); 315 316 ixold = emalloc((len[0] + 2) * sizeof(long)); 317 ixnew = emalloc((len[1] + 2) * sizeof(long)); 318 check(); 319 output(); 320 status = anychange; 321 same: 322 if (anychange == 0 && (opt == D_CONTEXT || opt == D_UNIFIED)) 323 printf("No differences encountered\n"); 324 } 325 326 char *tempfiles[2]; 327 328 char * 329 copytemp(const char *file, int n) 330 { 331 char buf[BUFSIZ], *tempdir, *tempfile; 332 int i, ifd, ofd; 333 334 if (n != 1 && n != 2) 335 return (NULL); 336 337 if (strcmp(file, "-") == 0) 338 ifd = STDIN_FILENO; 339 else if ((ifd = open(file, O_RDONLY, 0644)) < 0) 340 error("%s", file); 341 342 if ((tempdir = getenv("TMPDIR")) == NULL) 343 tempdir = _PATH_TMP; 344 if (asprintf(&tempfile, "%s/diff%d.XXXXXXXX", tempdir, n) == -1) 345 error(NULL); 346 tempfiles[n - 1] = tempfile; 347 348 signal(SIGHUP, done); 349 signal(SIGINT, done); 350 signal(SIGPIPE, done); 351 signal(SIGTERM, done); 352 ofd = mkstemp(tempfile); 353 if (ofd < 0) 354 error("%s", tempfile); 355 while ((i = read(ifd, buf, BUFSIZ)) > 0) { 356 if (write(ofd, buf, i) != i) 357 error("%s", tempfile); 358 } 359 close(ifd); 360 close(ofd); 361 return (tempfile); 362 } 363 364 char * 365 splice(char *dir, char *file) 366 { 367 char *tail, *buf; 368 size_t len; 369 370 if (!strcmp(file, "-")) 371 errorx("can't specify - with other arg directory"); 372 tail = strrchr(file, '/'); 373 if (tail == NULL) 374 tail = file; 375 else 376 tail++; 377 len = strlen(dir) + 1 + strlen(tail) + 1; 378 buf = emalloc(len); 379 snprintf(buf, len, "%s/%s", dir, tail); 380 return (buf); 381 } 382 383 static void 384 prepare(int i, FILE *fd) 385 { 386 struct line *p; 387 int j, h; 388 389 fseek(fd, 0L, SEEK_SET); 390 p = emalloc(3 * sizeof(struct line)); 391 for (j = 0; (h = readhash(fd));) { 392 p = erealloc(p, (++j + 3) * sizeof(struct line)); 393 p[j].value = h; 394 } 395 len[i] = j; 396 file[i] = p; 397 } 398 399 static void 400 prune(void) 401 { 402 int i, j; 403 404 for (pref = 0; pref < len[0] && pref < len[1] && 405 file[0][pref + 1].value == file[1][pref + 1].value; 406 pref++) 407 ; 408 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 409 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 410 suff++) 411 ; 412 for (j = 0; j < 2; j++) { 413 sfile[j] = file[j] + pref; 414 slen[j] = len[j] - pref - suff; 415 for (i = 0; i <= slen[j]; i++) 416 sfile[j][i].serial = i; 417 } 418 } 419 420 static void 421 equiv(struct line *a, int n, struct line *b, int m, int *c) 422 { 423 int i, j; 424 425 i = j = 1; 426 while (i <= n && j <= m) { 427 if (a[i].value < b[j].value) 428 a[i++].value = 0; 429 else if (a[i].value == b[j].value) 430 a[i++].value = j; 431 else 432 j++; 433 } 434 while (i <= n) 435 a[i++].value = 0; 436 b[m + 1].value = 0; 437 j = 0; 438 while (++j <= m) { 439 c[j] = -b[j].serial; 440 while (b[j + 1].value == b[j].value) { 441 j++; 442 c[j] = b[j].serial; 443 } 444 } 445 c[j] = -1; 446 } 447 448 static int 449 stone(int *a, int n, int *b, int *c) 450 { 451 int i, k, y, j, l; 452 int oldc, tc, oldl; 453 454 k = 0; 455 c[0] = newcand(0, 0, 0); 456 for (i = 1; i <= n; i++) { 457 j = a[i]; 458 if (j == 0) 459 continue; 460 y = -b[j]; 461 oldl = 0; 462 oldc = c[0]; 463 do { 464 if (y <= clist[oldc].y) 465 continue; 466 l = search(c, k, y); 467 if (l != oldl + 1) 468 oldc = c[l - 1]; 469 if (l <= k) { 470 if (clist[c[l]].y <= y) 471 continue; 472 tc = c[l]; 473 c[l] = newcand(i, y, oldc); 474 oldc = tc; 475 oldl = l; 476 } else { 477 c[l] = newcand(i, y, oldc); 478 k++; 479 break; 480 } 481 } while ((y = b[++j]) > 0); 482 } 483 return (k); 484 } 485 486 static int 487 newcand(int x, int y, int pred) 488 { 489 struct cand *q; 490 491 clist = erealloc(clist, ++clen * sizeof(cand)); 492 q = clist + clen - 1; 493 q->x = x; 494 q->y = y; 495 q->pred = pred; 496 return (clen - 1); 497 } 498 499 static int 500 search(int *c, int k, int y) 501 { 502 int i, j, l, t; 503 504 if (clist[c[k]].y < y) /* quick look for typical case */ 505 return (k + 1); 506 i = 0; 507 j = k + 1; 508 while (1) { 509 l = i + j; 510 if ((l >>= 1) <= i) 511 break; 512 t = clist[c[l]].y; 513 if (t > y) 514 j = l; 515 else if (t < y) 516 i = l; 517 else 518 return (l); 519 } 520 return (l + 1); 521 } 522 523 static void 524 unravel(int p) 525 { 526 struct cand *q; 527 int i; 528 529 for (i = 0; i <= len[0]; i++) 530 J[i] = i <= pref ? i : 531 i > len[0] - suff ? i + len[1] - len[0] : 0; 532 for (q = clist + p; q->y != 0; q = clist + q->pred) 533 J[q->x + pref] = q->y + pref; 534 } 535 536 /* 537 * Check does double duty: 538 * 1. ferret out any fortuitous correspondences due 539 * to confounding by hashing (which result in "jackpot") 540 * 2. collect random access indexes to the two files 541 */ 542 static void 543 check(void) 544 { 545 int i, j, jackpot, c, d; 546 long ctold, ctnew; 547 548 if ((input[0] = fopen(file1, "r")) == NULL) 549 error("%s", file1); 550 if ((input[1] = fopen(file2, "r")) == NULL) 551 error("%s", file2); 552 j = 1; 553 ixold[0] = ixnew[0] = 0; 554 jackpot = 0; 555 ctold = ctnew = 0; 556 for (i = 1; i <= len[0]; i++) { 557 if (J[i] == 0) { 558 ixold[i] = ctold += skipline(0); 559 continue; 560 } 561 while (j < J[i]) { 562 ixnew[j] = ctnew += skipline(1); 563 j++; 564 } 565 if (bflag || wflag || iflag) { 566 for (;;) { 567 c = getc(input[0]); 568 d = getc(input[1]); 569 ctold++; 570 ctnew++; 571 if (bflag && isspace(c) && isspace(d)) { 572 do { 573 if (c == '\n') 574 break; 575 ctold++; 576 } while (isspace(c = getc(input[0]))); 577 do { 578 if (d == '\n') 579 break; 580 ctnew++; 581 } while (isspace(d = getc(input[1]))); 582 } else if (wflag) { 583 while (isspace(c) && c != '\n') { 584 c = getc(input[0]); 585 ctold++; 586 } 587 while (isspace(d) && d != '\n') { 588 d = getc(input[1]); 589 ctnew++; 590 } 591 } 592 if (chrtran[c] != chrtran[d]) { 593 jackpot++; 594 J[i] = 0; 595 if (c != '\n') 596 ctold += skipline(0); 597 if (d != '\n') 598 ctnew += skipline(1); 599 break; 600 } 601 if (c == '\n') 602 break; 603 } 604 } else { 605 for (;;) { 606 ctold++; 607 ctnew++; 608 if ((c = getc(input[0])) != (d = getc(input[1]))) { 609 /* jackpot++; */ 610 J[i] = 0; 611 if (c != '\n') 612 ctold += skipline(0); 613 if (d != '\n') 614 ctnew += skipline(1); 615 break; 616 } 617 if (c == '\n') 618 break; 619 } 620 } 621 ixold[i] = ctold; 622 ixnew[j] = ctnew; 623 j++; 624 } 625 for (; j <= len[1]; j++) { 626 ixnew[j] = ctnew += skipline(1); 627 } 628 fclose(input[0]); 629 fclose(input[1]); 630 /* 631 * if (jackpot) 632 * fprintf(stderr, "jackpot\n"); 633 */ 634 } 635 636 /* shellsort CACM #201 */ 637 static void 638 sort(struct line *a, int n) 639 { 640 struct line *ai, *aim, w; 641 int j, m = 0, k; 642 643 if (n == 0) 644 return; 645 for (j = 1; j <= n; j *= 2) 646 m = 2 * j - 1; 647 for (m /= 2; m != 0; m /= 2) { 648 k = n - m; 649 for (j = 1; j <= k; j++) { 650 for (ai = &a[j]; ai > a; ai -= m) { 651 aim = &ai[m]; 652 if (aim < ai) 653 break; /* wraparound */ 654 if (aim->value > ai[0].value || 655 (aim->value == ai[0].value && 656 aim->serial > ai[0].serial)) 657 break; 658 w.value = ai[0].value; 659 ai[0].value = aim->value; 660 aim->value = w.value; 661 w.serial = ai[0].serial; 662 ai[0].serial = aim->serial; 663 aim->serial = w.serial; 664 } 665 } 666 } 667 } 668 669 static void 670 unsort(struct line *f, int l, int *b) 671 { 672 int *a, i; 673 674 a = emalloc((l + 1) * sizeof(int)); 675 for (i = 1; i <= l; i++) 676 a[f[i].serial] = f[i].value; 677 for (i = 1; i <= l; i++) 678 b[i] = a[i]; 679 free(a); 680 } 681 682 static int 683 skipline(int f) 684 { 685 int i, c; 686 687 for (i = 1; (c = getc(input[f])) != '\n'; i++) 688 if (c < 0) 689 return (i); 690 return (i); 691 } 692 693 static void 694 output(void) 695 { 696 int m, i0, i1, j0, j1; 697 698 input[0] = fopen(file1, "r"); 699 input[1] = fopen(file2, "r"); 700 m = len[0]; 701 J[0] = 0; 702 J[m + 1] = len[1] + 1; 703 if (opt != D_EDIT) { 704 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 705 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 706 i0++; 707 j0 = J[i0 - 1] + 1; 708 i1 = i0 - 1; 709 while (i1 < m && J[i1 + 1] == 0) 710 i1++; 711 j1 = J[i1 + 1] - 1; 712 J[i1] = j1; 713 change(i0, i1, j0, j1); 714 } 715 } else { 716 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 717 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 718 i0--; 719 j0 = J[i0 + 1] - 1; 720 i1 = i0 + 1; 721 while (i1 > 1 && J[i1 - 1] == 0) 722 i1--; 723 j1 = J[i1 - 1] + 1; 724 J[i1] = j1; 725 change(i1, i0, j1, j0); 726 } 727 } 728 if (m == 0) 729 change(1, 0, 1, len[1]); 730 if (opt == D_IFDEF) { 731 for (;;) { 732 #define c i0 733 c = getc(input[0]); 734 if (c < 0) 735 return; 736 putchar(c); 737 } 738 #undef c 739 } 740 if (anychange != 0) { 741 if (opt == D_CONTEXT) 742 dump_context_vec(); 743 else if (opt == D_UNIFIED) 744 dump_unified_vec(); 745 } 746 } 747 748 /* 749 * The following struct is used to record change information when 750 * doing a "context" diff. (see routine "change" to understand the 751 * highly mneumonic field names) 752 */ 753 struct context_vec { 754 int a; /* start line in old file */ 755 int b; /* end line in old file */ 756 int c; /* start line in new file */ 757 int d; /* end line in new file */ 758 }; 759 760 struct context_vec *context_vec_start, *context_vec_end, *context_vec_ptr; 761 762 #define MAX_CONTEXT 128 763 764 /* 765 * indicate that there is a difference between lines a and b of the from file 766 * to get to lines c to d of the to file. If a is greater then b then there 767 * are no lines in the from file involved and this means that there were 768 * lines appended (beginning at b). If c is greater than d then there are 769 * lines missing from the to file. 770 */ 771 static void 772 change(int a, int b, int c, int d) 773 { 774 struct stat stbuf; 775 776 if (opt != D_IFDEF && a > b && c > d) 777 return; 778 if (anychange == 0) { 779 anychange = 1; 780 if (opt == D_CONTEXT || opt == D_UNIFIED) { 781 stat(file1, &stbuf); 782 printf("%s %s %s", opt == D_CONTEXT ? "***" : "---", 783 file1, ctime(&stbuf.st_mtime)); 784 stat(file2, &stbuf); 785 printf("%s %s %s", opt == D_CONTEXT ? "---" : "+++", 786 file2, ctime(&stbuf.st_mtime)); 787 context_vec_start = emalloc(MAX_CONTEXT * 788 sizeof(struct context_vec)); 789 context_vec_end = context_vec_start + MAX_CONTEXT; 790 context_vec_ptr = context_vec_start - 1; 791 } 792 } 793 if (opt == D_CONTEXT || opt == D_UNIFIED) { 794 /* 795 * if this new change is within 'context' lines of 796 * the previous change, just add it to the change 797 * record. If the record is full or if this 798 * change is more than 'context' lines from the previous 799 * change, dump the record, reset it & add the new change. 800 */ 801 if (context_vec_ptr >= context_vec_end || 802 (context_vec_ptr >= context_vec_start && 803 a > (context_vec_ptr->b + 2 * context) && 804 c > (context_vec_ptr->d + 2 * context))) { 805 if (opt == D_CONTEXT) 806 dump_context_vec(); 807 else 808 dump_unified_vec(); 809 } 810 context_vec_ptr++; 811 context_vec_ptr->a = a; 812 context_vec_ptr->b = b; 813 context_vec_ptr->c = c; 814 context_vec_ptr->d = d; 815 return; 816 } 817 switch (opt) { 818 819 case D_NORMAL: 820 case D_EDIT: 821 range(a, b, ","); 822 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 823 if (opt == D_NORMAL) 824 range(c, d, ","); 825 putchar('\n'); 826 break; 827 case D_REVERSE: 828 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 829 range(a, b, " "); 830 putchar('\n'); 831 break; 832 case D_NREVERSE: 833 if (a > b) 834 printf("a%d %d\n", b, d - c + 1); 835 else { 836 printf("d%d %d\n", a, b - a + 1); 837 if (!(c > d)) 838 /* add changed lines */ 839 printf("a%d %d\n", b, d - c + 1); 840 } 841 break; 842 } 843 if (opt == D_NORMAL || opt == D_IFDEF) { 844 fetch(ixold, a, b, input[0], "< ", 1); 845 if (a <= b && c <= d && opt == D_NORMAL) 846 prints("---\n"); 847 } 848 fetch(ixnew, c, d, input[1], opt == D_NORMAL ? "> " : "", 0); 849 if ((opt == D_EDIT || opt == D_REVERSE) && c <= d) 850 prints(".\n"); 851 if (inifdef) { 852 fprintf(stdout, "#endif /* %s */\n", endifname); 853 inifdef = 0; 854 } 855 } 856 857 static void 858 range(int a, int b, char *separator) 859 { 860 printf("%d", a > b ? b : a); 861 if (a < b) 862 printf("%s%d", separator, b); 863 } 864 865 static void 866 fetch(long *f, int a, int b, FILE *lb, char *s, int oldfile) 867 { 868 int oneflag = (*ifdef1 != '\0') != (*ifdef2 != '\0'); 869 int i, j, c, col, nc; 870 871 /* 872 * When doing #ifdef's, copy down to current line 873 * if this is the first file, so that stuff makes it to output. 874 */ 875 if (opt == D_IFDEF && oldfile) { 876 long curpos = ftell(lb); 877 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 878 nc = f[a > b ? b : a - 1] - curpos; 879 for (i = 0; i < nc; i++) 880 putchar(getc(lb)); 881 } 882 if (a > b) 883 return; 884 if (opt == D_IFDEF) { 885 if (inifdef) 886 fprintf(stdout, "#else /* %s%s */\n", 887 oneflag && oldfile == 1 ? "!" : "", ifdef2); 888 else { 889 if (oneflag) { 890 /* There was only one ifdef given */ 891 endifname = ifdef2; 892 if (oldfile) 893 fprintf(stdout, "#ifndef %s\n", endifname); 894 else 895 fprintf(stdout, "#ifdef %s\n", endifname); 896 } else { 897 endifname = oldfile ? ifdef1 : ifdef2; 898 fprintf(stdout, "#ifdef %s\n", endifname); 899 } 900 } 901 inifdef = 1 + oldfile; 902 } 903 for (i = a; i <= b; i++) { 904 fseek(lb, f[i - 1], SEEK_SET); 905 nc = f[i] - f[i - 1]; 906 if (opt != D_IFDEF) 907 prints(s); 908 col = 0; 909 for (j = 0; j < nc; j++) { 910 c = getc(lb); 911 if (c == '\t' && tflag) 912 do 913 putchar(' '); 914 while (++col & 7); 915 else { 916 putchar(c); 917 col++; 918 } 919 } 920 } 921 922 if (inifdef && !wantelses) { 923 fprintf(stdout, "#endif /* %s */\n", endifname); 924 inifdef = 0; 925 } 926 } 927 928 #define POW2 /* define only if HALFLONG is 2**n */ 929 #define HALFLONG 16 930 #define low(x) (x&((1L<<HALFLONG)-1)) 931 #define high(x) (x>>HALFLONG) 932 933 /* 934 * hashing has the effect of 935 * arranging line in 7-bit bytes and then 936 * summing 1-s complement in 16-bit hunks 937 */ 938 static int 939 readhash(FILE *f) 940 { 941 unsigned int shift; 942 int t, space; 943 long sum; 944 945 sum = 1; 946 space = 0; 947 if (!bflag && !wflag) { 948 if (iflag) 949 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 950 if (t == -1) 951 return (0); 952 sum += (long)chrtran[t] << (shift 953 #ifdef POW2 954 &= HALFLONG - 1); 955 #else 956 %= HALFLONG); 957 #endif 958 } 959 else 960 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 961 if (t == -1) 962 return (0); 963 sum += (long)t << (shift 964 #ifdef POW2 965 &= HALFLONG - 1); 966 #else 967 %= HALFLONG); 968 #endif 969 } 970 } else { 971 for (shift = 0;;) { 972 switch (t = getc(f)) { 973 case -1: 974 return (0); 975 case '\t': 976 case ' ': 977 space++; 978 continue; 979 default: 980 if (space && !wflag) { 981 shift += 7; 982 space = 0; 983 } 984 sum += (long)chrtran[t] << (shift 985 #ifdef POW2 986 &= HALFLONG - 1); 987 #else 988 %= HALFLONG); 989 #endif 990 shift += 7; 991 continue; 992 case '\n': 993 break; 994 } 995 break; 996 } 997 } 998 sum = low(sum) + high(sum); 999 return ((short) low(sum) + (short) high(sum)); 1000 } 1001 1002 static int 1003 asciifile(FILE *f) 1004 { 1005 char buf[BUFSIZ], *cp; 1006 int cnt; 1007 1008 fseek(f, 0L, SEEK_SET); 1009 cnt = fread(buf, 1, BUFSIZ, f); 1010 cp = buf; 1011 while (--cnt >= 0) 1012 if (*cp++ & 0200) 1013 return (0); 1014 return (1); 1015 } 1016 1017 /* dump accumulated "context" diff changes */ 1018 static void 1019 dump_context_vec(void) 1020 { 1021 struct context_vec *cvp = context_vec_start; 1022 int lowa, upb, lowc, upd, do_output; 1023 int a, b, c, d; 1024 char ch; 1025 1026 if (cvp > context_vec_ptr) 1027 return; 1028 1029 b = d = 0; /* gcc */ 1030 lowa = max(1, cvp->a - context); 1031 upb = min(len[0], context_vec_ptr->b + context); 1032 lowc = max(1, cvp->c - context); 1033 upd = min(len[1], context_vec_ptr->d + context); 1034 1035 printf("***************\n*** "); 1036 range(lowa, upb, ","); 1037 printf(" ****\n"); 1038 1039 /* 1040 * output changes to the "old" file. The first loop suppresses 1041 * output if there were no changes to the "old" file (we'll see 1042 * the "old" lines as context in the "new" list). 1043 */ 1044 do_output = 0; 1045 for (; cvp <= context_vec_ptr; cvp++) 1046 if (cvp->a <= cvp->b) { 1047 cvp = context_vec_start; 1048 do_output++; 1049 break; 1050 } 1051 if (do_output) { 1052 while (cvp <= context_vec_ptr) { 1053 a = cvp->a; 1054 b = cvp->b; 1055 c = cvp->c; 1056 d = cvp->d; 1057 1058 if (a <= b && c <= d) 1059 ch = 'c'; 1060 else 1061 ch = (a <= b) ? 'd' : 'a'; 1062 1063 if (ch == 'a') 1064 fetch(ixold, lowa, b, input[0], " ", 0); 1065 else { 1066 fetch(ixold, lowa, a - 1, input[0], " ", 0); 1067 fetch(ixold, a, b, input[0], 1068 ch == 'c' ? "! " : "- ", 0); 1069 } 1070 lowa = b + 1; 1071 cvp++; 1072 } 1073 fetch(ixold, b + 1, upb, input[0], " ", 0); 1074 } 1075 /* output changes to the "new" file */ 1076 printf("--- "); 1077 range(lowc, upd, ","); 1078 printf(" ----\n"); 1079 1080 do_output = 0; 1081 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1082 if (cvp->c <= cvp->d) { 1083 cvp = context_vec_start; 1084 do_output++; 1085 break; 1086 } 1087 if (do_output) { 1088 while (cvp <= context_vec_ptr) { 1089 a = cvp->a; 1090 b = cvp->b; 1091 c = cvp->c; 1092 d = cvp->d; 1093 1094 if (a <= b && c <= d) 1095 ch = 'c'; 1096 else 1097 ch = (a <= b) ? 'd' : 'a'; 1098 1099 if (ch == 'd') 1100 fetch(ixnew, lowc, d, input[1], " ", 0); 1101 else { 1102 fetch(ixnew, lowc, c - 1, input[1], " ", 0); 1103 fetch(ixnew, c, d, input[1], 1104 ch == 'c' ? "! " : "+ ", 0); 1105 } 1106 lowc = d + 1; 1107 cvp++; 1108 } 1109 fetch(ixnew, d + 1, upd, input[1], " ", 0); 1110 } 1111 context_vec_ptr = context_vec_start - 1; 1112 } 1113 1114 /* dump accumulated "unified" diff changes */ 1115 static void 1116 dump_unified_vec(void) 1117 { 1118 struct context_vec *cvp = context_vec_start; 1119 int lowa, upb, lowc, upd; 1120 int a, b, c, d; 1121 char ch; 1122 1123 if (cvp > context_vec_ptr) 1124 return; 1125 1126 b = d = 0; /* gcc */ 1127 lowa = max(1, cvp->a - context); 1128 upb = min(len[0], context_vec_ptr->b + context); 1129 lowc = max(1, cvp->c - context); 1130 upd = min(len[1], context_vec_ptr->d + context); 1131 1132 printf("@@ -%d,%d +%d,%d @@\n", lowa, upb - lowa + 1, 1133 lowc, upd - lowc + 1); 1134 1135 /* 1136 * Output changes in "unified" diff format--the old and new lines 1137 * are printed together. 1138 */ 1139 for (; cvp <= context_vec_ptr; cvp++) { 1140 a = cvp->a; 1141 b = cvp->b; 1142 c = cvp->c; 1143 d = cvp->d; 1144 1145 /* 1146 * c: both new and old changes 1147 * d: only changes in the old file 1148 * a: only changes in the new file 1149 */ 1150 if (a <= b && c <= d) 1151 ch = 'c'; 1152 else 1153 ch = (a <= b) ? 'd' : 'a'; 1154 1155 switch (ch) { 1156 case 'c': 1157 fetch(ixold, lowa, a - 1, input[0], " ", 0); 1158 fetch(ixold, a, b, input[0], "-", 0); 1159 fetch(ixnew, c, d, input[1], "+", 0); 1160 break; 1161 case 'd': 1162 fetch(ixold, lowa, a - 1, input[0], " ", 0); 1163 fetch(ixold, a, b, input[0], "-", 0); 1164 break; 1165 case 'a': 1166 fetch(ixnew, lowc, c - 1, input[1], " ", 0); 1167 fetch(ixnew, c, d, input[1], "+", 0); 1168 break; 1169 } 1170 lowa = b + 1; 1171 lowc = d + 1; 1172 } 1173 fetch(ixnew, d + 1, upd, input[1], " ", 0); 1174 1175 context_vec_ptr = context_vec_start - 1; 1176 } 1177