1 /* $OpenBSD: diffreg.c,v 1.24 2003/07/02 18:54:13 millert Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 #include <sys/types.h> 38 39 #include <stdlib.h> 40 #include <unistd.h> 41 #include <fcntl.h> 42 #include <string.h> 43 44 #include "diff.h" 45 #include "pathnames.h" 46 47 #if 0 48 static char const sccsid[] = "@(#)diffreg.c 4.21 4/6/90"; 49 #endif 50 51 /* 52 * diff - compare two files. 53 */ 54 55 /* 56 * Uses an algorithm due to Harold Stone, which finds 57 * a pair of longest identical subsequences in the two 58 * files. 59 * 60 * The major goal is to generate the match vector J. 61 * J[i] is the index of the line in file1 corresponding 62 * to line i file0. J[i] = 0 if there is no 63 * such line in file1. 64 * 65 * Lines are hashed so as to work in core. All potential 66 * matches are located by sorting the lines of each file 67 * on the hash (called ``value''). In particular, this 68 * collects the equivalence classes in file1 together. 69 * Subroutine equiv replaces the value of each line in 70 * file0 by the index of the first element of its 71 * matching equivalence in (the reordered) file1. 72 * To save space equiv squeezes file1 into a single 73 * array member in which the equivalence classes 74 * are simply concatenated, except that their first 75 * members are flagged by changing sign. 76 * 77 * Next the indices that point into member are unsorted into 78 * array class according to the original order of file0. 79 * 80 * The cleverness lies in routine stone. This marches 81 * through the lines of file0, developing a vector klist 82 * of "k-candidates". At step i a k-candidate is a matched 83 * pair of lines x,y (x in file0 y in file1) such that 84 * there is a common subsequence of length k 85 * between the first i lines of file0 and the first y 86 * lines of file1, but there is no such subsequence for 87 * any smaller y. x is the earliest possible mate to y 88 * that occurs in such a subsequence. 89 * 90 * Whenever any of the members of the equivalence class of 91 * lines in file1 matable to a line in file0 has serial number 92 * less than the y of some k-candidate, that k-candidate 93 * with the smallest such y is replaced. The new 94 * k-candidate is chained (via pred) to the current 95 * k-1 candidate so that the actual subsequence can 96 * be recovered. When a member has serial number greater 97 * that the y of all k-candidates, the klist is extended. 98 * At the end, the longest subsequence is pulled out 99 * and placed in the array J by unravel 100 * 101 * With J in hand, the matches there recorded are 102 * check'ed against reality to assure that no spurious 103 * matches have crept in due to hashing. If they have, 104 * they are broken, and "jackpot" is recorded--a harmless 105 * matter except that a true match for a spuriously 106 * mated line may now be unnecessarily reported as a change. 107 * 108 * Much of the complexity of the program comes simply 109 * from trying to minimize core utilization and 110 * maximize the range of doable problems by dynamically 111 * allocating what is needed and reusing what is not. 112 * The core requirements for problems larger than somewhat 113 * are (in words) 2*length(file0) + length(file1) + 114 * 3*(number of k-candidates installed), typically about 115 * 6n words for files of length n. 116 */ 117 118 #define prints(s) fputs(s,stdout) 119 120 FILE *input[2]; 121 122 struct cand { 123 int x; 124 int y; 125 int pred; 126 } cand; 127 128 struct line { 129 int serial; 130 int value; 131 } *file[2]; 132 133 int len[2]; 134 struct line *sfile[2]; /* shortened by pruning common prefix and suffix */ 135 int slen[2]; 136 int pref, suff; /* length of prefix and suffix */ 137 int *class; /* will be overlaid on file[0] */ 138 int *member; /* will be overlaid on file[1] */ 139 int *klist; /* will be overlaid on file[0] after class */ 140 struct cand *clist; /* merely a free storage pot for candidates */ 141 int clen = 0; 142 int *J; /* will be overlaid on class */ 143 long *ixold; /* will be overlaid on klist */ 144 long *ixnew; /* will be overlaid on file[1] */ 145 u_char *chrtran; /* translation table for case-folding */ 146 147 static void fetch(long *, int, int, FILE *, char *, int); 148 static void output(void); 149 static void check(void); 150 static void range(int, int, char *); 151 static void dump_context_vec(void); 152 static void dump_unified_vec(void); 153 static void prepare(int, FILE *); 154 static void prune(void); 155 static void equiv(struct line *, int, struct line *, int, int *); 156 static void unravel(int); 157 static void unsort(struct line *, int, int *); 158 static void change(int, int, int, int); 159 static void sort(struct line *, int); 160 static int newcand(int, int, int); 161 static int search(int *, int, int); 162 static int skipline(int); 163 static int asciifile(FILE *); 164 static int stone(int *, int, int *, int *); 165 static int readhash(FILE *); 166 167 /* 168 * chrtran points to one of 2 translation tables: cup2low if folding upper to 169 * lower case clow2low if not folding case 170 */ 171 u_char clow2low[256] = { 172 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 173 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 174 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 175 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 176 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 177 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 178 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 179 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 180 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 181 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 182 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 183 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 184 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 185 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 186 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 187 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 188 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 189 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 190 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 191 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 192 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 193 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 194 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 195 0xfd, 0xfe, 0xff 196 }; 197 198 u_char cup2low[256] = { 199 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 200 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 201 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 202 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 203 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 204 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 205 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 206 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 207 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 208 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 209 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 210 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 211 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 212 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 213 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 214 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 215 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 216 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 217 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 218 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 219 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 220 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 221 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 222 0xfd, 0xfe, 0xff 223 }; 224 225 void 226 diffreg(void) 227 { 228 char buf1[BUFSIZ], buf2[BUFSIZ]; 229 FILE *f1, *f2; 230 int i, j; 231 232 if (hflag) { 233 diffargv[0] = "diffh"; 234 execv(diffh, diffargv); 235 error("%s", diffh); 236 } 237 chrtran = (iflag ? cup2low : clow2low); 238 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) 239 errorx("can't specify - -"); 240 if (S_ISDIR(stb1.st_mode)) { 241 file1 = splice(file1, file2); 242 if (stat(file1, &stb1) < 0) 243 error("%s", file1); 244 } else if (strcmp(file1, "-") == 0 || 245 (!S_ISREG(stb1.st_mode) && strcmp(file1, _PATH_DEVNULL) != 0)) { 246 file1 = copytemp(file1, 1); 247 if (stat(file1, &stb1) < 0) 248 error("%s", file1); 249 } 250 if (S_ISDIR(stb2.st_mode)) { 251 file2 = splice(file2, file1); 252 if (stat(file2, &stb2) < 0) 253 error("%s", file2); 254 } else if (strcmp(file2, "-") == 0 || 255 (!S_ISREG(stb2.st_mode) && strcmp(file2, _PATH_DEVNULL) != 0)) { 256 file2 = copytemp(file2, 2); 257 if (stat(file2, &stb2) < 0) 258 error("%s", file2); 259 } 260 if ((f1 = fopen(file1, "r")) == NULL) 261 error("%s", file1); 262 if ((f2 = fopen(file2, "r")) == NULL) 263 error("%s", file2); 264 if ((stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT) || 265 stb1.st_size != stb2.st_size) 266 goto notsame; 267 for (;;) { 268 i = fread(buf1, 1, BUFSIZ, f1); 269 j = fread(buf2, 1, BUFSIZ, f2); 270 if (i < 0 || j < 0 || i != j) 271 goto notsame; 272 if (i == 0 && j == 0) { 273 fclose(f1); 274 fclose(f2); 275 status = 0; /* files don't differ */ 276 goto same; 277 } 278 for (j = 0; j < i; j++) 279 if (buf1[j] != buf2[j]) 280 goto notsame; 281 } 282 notsame: 283 /* 284 * Files certainly differ at this point; set status accordingly 285 */ 286 status = 1; 287 if (!asciifile(f1) || !asciifile(f2)) { 288 printf("Binary files %s and %s differ\n", file1, file2); 289 exit(status); 290 } 291 prepare(0, f1); 292 prepare(1, f2); 293 fclose(f1); 294 fclose(f2); 295 prune(); 296 sort(sfile[0], slen[0]); 297 sort(sfile[1], slen[1]); 298 299 member = (int *)file[1]; 300 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 301 member = erealloc(member, (slen[1] + 2) * sizeof(int)); 302 303 class = (int *)file[0]; 304 unsort(sfile[0], slen[0], class); 305 class = erealloc(class, (slen[0] + 2) * sizeof(int)); 306 307 klist = emalloc((slen[0] + 2) * sizeof(int)); 308 clist = emalloc(sizeof(cand)); 309 i = stone(class, slen[0], member, klist); 310 free(member); 311 free(class); 312 313 J = emalloc((len[0] + 2) * sizeof(int)); 314 unravel(klist[i]); 315 free(clist); 316 free(klist); 317 318 ixold = emalloc((len[0] + 2) * sizeof(long)); 319 ixnew = emalloc((len[1] + 2) * sizeof(long)); 320 check(); 321 output(); 322 status = anychange; 323 same: 324 if (anychange == 0 && (opt == D_CONTEXT || opt == D_UNIFIED)) 325 printf("No differences encountered\n"); 326 } 327 328 char *tempfiles[2]; 329 330 char * 331 copytemp(const char *file, int n) 332 { 333 char buf[BUFSIZ], *tempdir, *tempfile; 334 int i, ifd, ofd; 335 336 if (n != 1 && n != 2) 337 return (NULL); 338 339 if (strcmp(file, "-") == 0) 340 ifd = STDIN_FILENO; 341 else if ((ifd = open(file, O_RDONLY, 0644)) < 0) 342 error("%s", file); 343 344 if ((tempdir = getenv("TMPDIR")) == NULL) 345 tempdir = _PATH_TMP; 346 if (asprintf(&tempfile, "%s/diff%d.XXXXXXXX", tempdir, n) == -1) 347 error(NULL); 348 tempfiles[n - 1] = tempfile; 349 350 signal(SIGHUP, done); 351 signal(SIGINT, done); 352 signal(SIGPIPE, done); 353 signal(SIGTERM, done); 354 ofd = mkstemp(tempfile); 355 if (ofd < 0) 356 error("%s", tempfile); 357 while ((i = read(ifd, buf, BUFSIZ)) > 0) { 358 if (write(ofd, buf, i) != i) 359 error("%s", tempfile); 360 } 361 close(ifd); 362 close(ofd); 363 return (tempfile); 364 } 365 366 char * 367 splice(char *dir, char *file) 368 { 369 char *tail, *buf; 370 size_t len; 371 372 if (!strcmp(file, "-")) 373 errorx("can't specify - with other arg directory"); 374 tail = strrchr(file, '/'); 375 if (tail == NULL) 376 tail = file; 377 else 378 tail++; 379 len = strlen(dir) + 1 + strlen(tail) + 1; 380 buf = emalloc(len); 381 snprintf(buf, len, "%s/%s", dir, tail); 382 return (buf); 383 } 384 385 static void 386 prepare(int i, FILE *fd) 387 { 388 struct line *p; 389 int j, h; 390 391 fseek(fd, 0L, SEEK_SET); 392 p = emalloc(3 * sizeof(struct line)); 393 for (j = 0; (h = readhash(fd));) { 394 p = erealloc(p, (++j + 3) * sizeof(struct line)); 395 p[j].value = h; 396 } 397 len[i] = j; 398 file[i] = p; 399 } 400 401 static void 402 prune(void) 403 { 404 int i, j; 405 406 for (pref = 0; pref < len[0] && pref < len[1] && 407 file[0][pref + 1].value == file[1][pref + 1].value; 408 pref++) 409 ; 410 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 411 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 412 suff++) 413 ; 414 for (j = 0; j < 2; j++) { 415 sfile[j] = file[j] + pref; 416 slen[j] = len[j] - pref - suff; 417 for (i = 0; i <= slen[j]; i++) 418 sfile[j][i].serial = i; 419 } 420 } 421 422 static void 423 equiv(struct line *a, int n, struct line *b, int m, int *c) 424 { 425 int i, j; 426 427 i = j = 1; 428 while (i <= n && j <= m) { 429 if (a[i].value < b[j].value) 430 a[i++].value = 0; 431 else if (a[i].value == b[j].value) 432 a[i++].value = j; 433 else 434 j++; 435 } 436 while (i <= n) 437 a[i++].value = 0; 438 b[m + 1].value = 0; 439 j = 0; 440 while (++j <= m) { 441 c[j] = -b[j].serial; 442 while (b[j + 1].value == b[j].value) { 443 j++; 444 c[j] = b[j].serial; 445 } 446 } 447 c[j] = -1; 448 } 449 450 static int 451 stone(int *a, int n, int *b, int *c) 452 { 453 int i, k, y, j, l; 454 int oldc, tc, oldl; 455 456 k = 0; 457 c[0] = newcand(0, 0, 0); 458 for (i = 1; i <= n; i++) { 459 j = a[i]; 460 if (j == 0) 461 continue; 462 y = -b[j]; 463 oldl = 0; 464 oldc = c[0]; 465 do { 466 if (y <= clist[oldc].y) 467 continue; 468 l = search(c, k, y); 469 if (l != oldl + 1) 470 oldc = c[l - 1]; 471 if (l <= k) { 472 if (clist[c[l]].y <= y) 473 continue; 474 tc = c[l]; 475 c[l] = newcand(i, y, oldc); 476 oldc = tc; 477 oldl = l; 478 } else { 479 c[l] = newcand(i, y, oldc); 480 k++; 481 break; 482 } 483 } while ((y = b[++j]) > 0); 484 } 485 return (k); 486 } 487 488 static int 489 newcand(int x, int y, int pred) 490 { 491 struct cand *q; 492 493 clist = erealloc(clist, ++clen * sizeof(cand)); 494 q = clist + clen - 1; 495 q->x = x; 496 q->y = y; 497 q->pred = pred; 498 return (clen - 1); 499 } 500 501 static int 502 search(int *c, int k, int y) 503 { 504 int i, j, l, t; 505 506 if (clist[c[k]].y < y) /* quick look for typical case */ 507 return (k + 1); 508 i = 0; 509 j = k + 1; 510 while (1) { 511 l = i + j; 512 if ((l >>= 1) <= i) 513 break; 514 t = clist[c[l]].y; 515 if (t > y) 516 j = l; 517 else if (t < y) 518 i = l; 519 else 520 return (l); 521 } 522 return (l + 1); 523 } 524 525 static void 526 unravel(int p) 527 { 528 struct cand *q; 529 int i; 530 531 for (i = 0; i <= len[0]; i++) 532 J[i] = i <= pref ? i : 533 i > len[0] - suff ? i + len[1] - len[0] : 0; 534 for (q = clist + p; q->y != 0; q = clist + q->pred) 535 J[q->x + pref] = q->y + pref; 536 } 537 538 /* 539 * Check does double duty: 540 * 1. ferret out any fortuitous correspondences due 541 * to confounding by hashing (which result in "jackpot") 542 * 2. collect random access indexes to the two files 543 */ 544 static void 545 check(void) 546 { 547 int i, j, jackpot, c, d; 548 long ctold, ctnew; 549 550 if ((input[0] = fopen(file1, "r")) == NULL) 551 error("%s", file1); 552 if ((input[1] = fopen(file2, "r")) == NULL) 553 error("%s", file2); 554 j = 1; 555 ixold[0] = ixnew[0] = 0; 556 jackpot = 0; 557 ctold = ctnew = 0; 558 for (i = 1; i <= len[0]; i++) { 559 if (J[i] == 0) { 560 ixold[i] = ctold += skipline(0); 561 continue; 562 } 563 while (j < J[i]) { 564 ixnew[j] = ctnew += skipline(1); 565 j++; 566 } 567 if (bflag || wflag || iflag) { 568 for (;;) { 569 c = getc(input[0]); 570 d = getc(input[1]); 571 ctold++; 572 ctnew++; 573 if (bflag && isspace(c) && isspace(d)) { 574 do { 575 if (c == '\n') 576 break; 577 ctold++; 578 } while (isspace(c = getc(input[0]))); 579 do { 580 if (d == '\n') 581 break; 582 ctnew++; 583 } while (isspace(d = getc(input[1]))); 584 } else if (wflag) { 585 while (isspace(c) && c != '\n') { 586 c = getc(input[0]); 587 ctold++; 588 } 589 while (isspace(d) && d != '\n') { 590 d = getc(input[1]); 591 ctnew++; 592 } 593 } 594 if (chrtran[c] != chrtran[d]) { 595 jackpot++; 596 J[i] = 0; 597 if (c != '\n') 598 ctold += skipline(0); 599 if (d != '\n') 600 ctnew += skipline(1); 601 break; 602 } 603 if (c == '\n') 604 break; 605 } 606 } else { 607 for (;;) { 608 ctold++; 609 ctnew++; 610 if ((c = getc(input[0])) != (d = getc(input[1]))) { 611 /* jackpot++; */ 612 J[i] = 0; 613 if (c != '\n') 614 ctold += skipline(0); 615 if (d != '\n') 616 ctnew += skipline(1); 617 break; 618 } 619 if (c == '\n') 620 break; 621 } 622 } 623 ixold[i] = ctold; 624 ixnew[j] = ctnew; 625 j++; 626 } 627 for (; j <= len[1]; j++) { 628 ixnew[j] = ctnew += skipline(1); 629 } 630 fclose(input[0]); 631 fclose(input[1]); 632 /* 633 * if (jackpot) 634 * fprintf(stderr, "jackpot\n"); 635 */ 636 } 637 638 /* shellsort CACM #201 */ 639 static void 640 sort(struct line *a, int n) 641 { 642 struct line *ai, *aim, w; 643 int j, m = 0, k; 644 645 if (n == 0) 646 return; 647 for (j = 1; j <= n; j *= 2) 648 m = 2 * j - 1; 649 for (m /= 2; m != 0; m /= 2) { 650 k = n - m; 651 for (j = 1; j <= k; j++) { 652 for (ai = &a[j]; ai > a; ai -= m) { 653 aim = &ai[m]; 654 if (aim < ai) 655 break; /* wraparound */ 656 if (aim->value > ai[0].value || 657 (aim->value == ai[0].value && 658 aim->serial > ai[0].serial)) 659 break; 660 w.value = ai[0].value; 661 ai[0].value = aim->value; 662 aim->value = w.value; 663 w.serial = ai[0].serial; 664 ai[0].serial = aim->serial; 665 aim->serial = w.serial; 666 } 667 } 668 } 669 } 670 671 static void 672 unsort(struct line *f, int l, int *b) 673 { 674 int *a, i; 675 676 a = emalloc((l + 1) * sizeof(int)); 677 for (i = 1; i <= l; i++) 678 a[f[i].serial] = f[i].value; 679 for (i = 1; i <= l; i++) 680 b[i] = a[i]; 681 free(a); 682 } 683 684 static int 685 skipline(int f) 686 { 687 int i, c; 688 689 for (i = 1; (c = getc(input[f])) != '\n'; i++) 690 if (c < 0) 691 return (i); 692 return (i); 693 } 694 695 static void 696 output(void) 697 { 698 int m, i0, i1, j0, j1; 699 700 input[0] = fopen(file1, "r"); 701 input[1] = fopen(file2, "r"); 702 m = len[0]; 703 J[0] = 0; 704 J[m + 1] = len[1] + 1; 705 if (opt != D_EDIT) { 706 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 707 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 708 i0++; 709 j0 = J[i0 - 1] + 1; 710 i1 = i0 - 1; 711 while (i1 < m && J[i1 + 1] == 0) 712 i1++; 713 j1 = J[i1 + 1] - 1; 714 J[i1] = j1; 715 change(i0, i1, j0, j1); 716 } 717 } else { 718 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 719 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 720 i0--; 721 j0 = J[i0 + 1] - 1; 722 i1 = i0 + 1; 723 while (i1 > 1 && J[i1 - 1] == 0) 724 i1--; 725 j1 = J[i1 - 1] + 1; 726 J[i1] = j1; 727 change(i1, i0, j1, j0); 728 } 729 } 730 if (m == 0) 731 change(1, 0, 1, len[1]); 732 if (opt == D_IFDEF) { 733 for (;;) { 734 #define c i0 735 c = getc(input[0]); 736 if (c < 0) 737 return; 738 putchar(c); 739 } 740 #undef c 741 } 742 if (anychange != 0) { 743 if (opt == D_CONTEXT) 744 dump_context_vec(); 745 else if (opt == D_UNIFIED) 746 dump_unified_vec(); 747 } 748 } 749 750 /* 751 * The following struct is used to record change information when 752 * doing a "context" diff. (see routine "change" to understand the 753 * highly mneumonic field names) 754 */ 755 struct context_vec { 756 int a; /* start line in old file */ 757 int b; /* end line in old file */ 758 int c; /* start line in new file */ 759 int d; /* end line in new file */ 760 }; 761 762 struct context_vec *context_vec_start, *context_vec_end, *context_vec_ptr; 763 764 #define MAX_CONTEXT 128 765 766 /* 767 * indicate that there is a difference between lines a and b of the from file 768 * to get to lines c to d of the to file. If a is greater then b then there 769 * are no lines in the from file involved and this means that there were 770 * lines appended (beginning at b). If c is greater than d then there are 771 * lines missing from the to file. 772 */ 773 static void 774 change(int a, int b, int c, int d) 775 { 776 struct stat stbuf; 777 778 if (opt != D_IFDEF && a > b && c > d) 779 return; 780 if (anychange == 0) { 781 anychange = 1; 782 if (opt == D_CONTEXT || opt == D_UNIFIED) { 783 stat(file1, &stbuf); 784 printf("%s %s %s", opt == D_CONTEXT ? "***" : "---", 785 file1, ctime(&stbuf.st_mtime)); 786 stat(file2, &stbuf); 787 printf("%s %s %s", opt == D_CONTEXT ? "---" : "+++", 788 file2, ctime(&stbuf.st_mtime)); 789 context_vec_start = emalloc(MAX_CONTEXT * 790 sizeof(struct context_vec)); 791 context_vec_end = context_vec_start + MAX_CONTEXT; 792 context_vec_ptr = context_vec_start - 1; 793 } 794 } 795 if (opt == D_CONTEXT || opt == D_UNIFIED) { 796 /* 797 * If this new change is within 'context' lines of 798 * the previous change, just add it to the change 799 * record. If the record is full or if this 800 * change is more than 'context' lines from the previous 801 * change, dump the record, reset it & add the new change. 802 */ 803 if (context_vec_ptr >= context_vec_end || 804 (context_vec_ptr >= context_vec_start && 805 a > (context_vec_ptr->b + 2 * context) && 806 c > (context_vec_ptr->d + 2 * context))) { 807 if (opt == D_CONTEXT) 808 dump_context_vec(); 809 else 810 dump_unified_vec(); 811 } 812 context_vec_ptr++; 813 context_vec_ptr->a = a; 814 context_vec_ptr->b = b; 815 context_vec_ptr->c = c; 816 context_vec_ptr->d = d; 817 return; 818 } 819 switch (opt) { 820 821 case D_NORMAL: 822 case D_EDIT: 823 range(a, b, ","); 824 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 825 if (opt == D_NORMAL) 826 range(c, d, ","); 827 putchar('\n'); 828 break; 829 case D_REVERSE: 830 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 831 range(a, b, " "); 832 putchar('\n'); 833 break; 834 case D_NREVERSE: 835 if (a > b) 836 printf("a%d %d\n", b, d - c + 1); 837 else { 838 printf("d%d %d\n", a, b - a + 1); 839 if (!(c > d)) 840 /* add changed lines */ 841 printf("a%d %d\n", b, d - c + 1); 842 } 843 break; 844 } 845 if (opt == D_NORMAL || opt == D_IFDEF) { 846 fetch(ixold, a, b, input[0], "< ", 1); 847 if (a <= b && c <= d && opt == D_NORMAL) 848 prints("---\n"); 849 } 850 fetch(ixnew, c, d, input[1], opt == D_NORMAL ? "> " : "", 0); 851 if ((opt == D_EDIT || opt == D_REVERSE) && c <= d) 852 prints(".\n"); 853 if (inifdef) { 854 fprintf(stdout, "#endif /* %s */\n", ifdefname); 855 inifdef = 0; 856 } 857 } 858 859 static void 860 range(int a, int b, char *separator) 861 { 862 printf("%d", a > b ? b : a); 863 if (a < b) 864 printf("%s%d", separator, b); 865 } 866 867 static void 868 fetch(long *f, int a, int b, FILE *lb, char *s, int oldfile) 869 { 870 int i, j, c, col, nc; 871 872 /* 873 * When doing #ifdef's, copy down to current line 874 * if this is the first file, so that stuff makes it to output. 875 */ 876 if (opt == D_IFDEF && oldfile) { 877 long curpos = ftell(lb); 878 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 879 nc = f[a > b ? b : a - 1] - curpos; 880 for (i = 0; i < nc; i++) 881 putchar(getc(lb)); 882 } 883 if (a > b) 884 return; 885 if (opt == D_IFDEF) { 886 if (inifdef) { 887 fprintf(stdout, "#else /* %s%s */\n", 888 oldfile == 1 ? "!" : "", ifdefname); 889 } else { 890 if (oldfile) 891 fprintf(stdout, "#ifndef %s\n", ifdefname); 892 else 893 fprintf(stdout, "#ifdef %s\n", ifdefname); 894 } 895 inifdef = 1 + oldfile; 896 } 897 for (i = a; i <= b; i++) { 898 fseek(lb, f[i - 1], SEEK_SET); 899 nc = f[i] - f[i - 1]; 900 if (opt != D_IFDEF) 901 prints(s); 902 col = 0; 903 for (j = 0; j < nc; j++) { 904 c = getc(lb); 905 if (c == '\t' && tflag) 906 do 907 putchar(' '); 908 while (++col & 7); 909 else { 910 putchar(c); 911 col++; 912 } 913 } 914 } 915 } 916 917 #define POW2 /* define only if HALFLONG is 2**n */ 918 #define HALFLONG 16 919 #define low(x) (x&((1L<<HALFLONG)-1)) 920 #define high(x) (x>>HALFLONG) 921 922 /* 923 * hashing has the effect of 924 * arranging line in 7-bit bytes and then 925 * summing 1-s complement in 16-bit hunks 926 */ 927 static int 928 readhash(FILE *f) 929 { 930 unsigned int shift; 931 int t, space; 932 long sum; 933 934 sum = 1; 935 space = 0; 936 if (!bflag && !wflag) { 937 if (iflag) 938 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 939 if (t == -1) 940 return (0); 941 sum += (long)chrtran[t] << (shift 942 #ifdef POW2 943 &= HALFLONG - 1); 944 #else 945 %= HALFLONG); 946 #endif 947 } 948 else 949 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 950 if (t == -1) 951 return (0); 952 sum += (long)t << (shift 953 #ifdef POW2 954 &= HALFLONG - 1); 955 #else 956 %= HALFLONG); 957 #endif 958 } 959 } else { 960 for (shift = 0;;) { 961 switch (t = getc(f)) { 962 case -1: 963 return (0); 964 case '\t': 965 case ' ': 966 space++; 967 continue; 968 default: 969 if (space && !wflag) { 970 shift += 7; 971 space = 0; 972 } 973 sum += (long)chrtran[t] << (shift 974 #ifdef POW2 975 &= HALFLONG - 1); 976 #else 977 %= HALFLONG); 978 #endif 979 shift += 7; 980 continue; 981 case '\n': 982 break; 983 } 984 break; 985 } 986 } 987 sum = low(sum) + high(sum); 988 return ((short) low(sum) + (short) high(sum)); 989 } 990 991 static int 992 asciifile(FILE *f) 993 { 994 char buf[BUFSIZ], *cp; 995 int cnt; 996 997 if (aflag) 998 return (1); 999 1000 fseek(f, 0L, SEEK_SET); 1001 cnt = fread(buf, 1, BUFSIZ, f); 1002 cp = buf; 1003 while (--cnt >= 0) 1004 if (*cp++ & 0200) 1005 return (0); 1006 return (1); 1007 } 1008 1009 /* dump accumulated "context" diff changes */ 1010 static void 1011 dump_context_vec(void) 1012 { 1013 struct context_vec *cvp = context_vec_start; 1014 int lowa, upb, lowc, upd, do_output; 1015 int a, b, c, d; 1016 char ch; 1017 1018 if (context_vec_start > context_vec_ptr) 1019 return; 1020 1021 b = d = 0; /* gcc */ 1022 lowa = max(1, cvp->a - context); 1023 upb = min(len[0], context_vec_ptr->b + context); 1024 lowc = max(1, cvp->c - context); 1025 upd = min(len[1], context_vec_ptr->d + context); 1026 1027 printf("***************\n*** "); 1028 range(lowa, upb, ","); 1029 printf(" ****\n"); 1030 1031 /* 1032 * output changes to the "old" file. The first loop suppresses 1033 * output if there were no changes to the "old" file (we'll see 1034 * the "old" lines as context in the "new" list). 1035 */ 1036 do_output = 0; 1037 for (; cvp <= context_vec_ptr; cvp++) 1038 if (cvp->a <= cvp->b) { 1039 cvp = context_vec_start; 1040 do_output++; 1041 break; 1042 } 1043 if (do_output) { 1044 while (cvp <= context_vec_ptr) { 1045 a = cvp->a; 1046 b = cvp->b; 1047 c = cvp->c; 1048 d = cvp->d; 1049 1050 if (a <= b && c <= d) 1051 ch = 'c'; 1052 else 1053 ch = (a <= b) ? 'd' : 'a'; 1054 1055 if (ch == 'a') 1056 fetch(ixold, lowa, b, input[0], " ", 0); 1057 else { 1058 fetch(ixold, lowa, a - 1, input[0], " ", 0); 1059 fetch(ixold, a, b, input[0], 1060 ch == 'c' ? "! " : "- ", 0); 1061 } 1062 lowa = b + 1; 1063 cvp++; 1064 } 1065 fetch(ixold, b + 1, upb, input[0], " ", 0); 1066 } 1067 /* output changes to the "new" file */ 1068 printf("--- "); 1069 range(lowc, upd, ","); 1070 printf(" ----\n"); 1071 1072 do_output = 0; 1073 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1074 if (cvp->c <= cvp->d) { 1075 cvp = context_vec_start; 1076 do_output++; 1077 break; 1078 } 1079 if (do_output) { 1080 while (cvp <= context_vec_ptr) { 1081 a = cvp->a; 1082 b = cvp->b; 1083 c = cvp->c; 1084 d = cvp->d; 1085 1086 if (a <= b && c <= d) 1087 ch = 'c'; 1088 else 1089 ch = (a <= b) ? 'd' : 'a'; 1090 1091 if (ch == 'd') 1092 fetch(ixnew, lowc, d, input[1], " ", 0); 1093 else { 1094 fetch(ixnew, lowc, c - 1, input[1], " ", 0); 1095 fetch(ixnew, c, d, input[1], 1096 ch == 'c' ? "! " : "+ ", 0); 1097 } 1098 lowc = d + 1; 1099 cvp++; 1100 } 1101 fetch(ixnew, d + 1, upd, input[1], " ", 0); 1102 } 1103 context_vec_ptr = context_vec_start - 1; 1104 } 1105 1106 /* dump accumulated "unified" diff changes */ 1107 static void 1108 dump_unified_vec(void) 1109 { 1110 struct context_vec *cvp = context_vec_start; 1111 int lowa, upb, lowc, upd; 1112 int a, b, c, d; 1113 char ch; 1114 1115 if (context_vec_start > context_vec_ptr) 1116 return; 1117 1118 b = d = 0; /* gcc */ 1119 lowa = max(1, cvp->a - context); 1120 upb = min(len[0], context_vec_ptr->b + context); 1121 lowc = max(1, cvp->c - context); 1122 upd = min(len[1], context_vec_ptr->d + context); 1123 1124 printf("@@ -%d,%d +%d,%d @@\n", lowa, upb - lowa + 1, 1125 lowc, upd - lowc + 1); 1126 1127 /* 1128 * Output changes in "unified" diff format--the old and new lines 1129 * are printed together. 1130 */ 1131 for (; cvp <= context_vec_ptr; cvp++) { 1132 a = cvp->a; 1133 b = cvp->b; 1134 c = cvp->c; 1135 d = cvp->d; 1136 1137 /* 1138 * c: both new and old changes 1139 * d: only changes in the old file 1140 * a: only changes in the new file 1141 */ 1142 if (a <= b && c <= d) 1143 ch = 'c'; 1144 else 1145 ch = (a <= b) ? 'd' : 'a'; 1146 1147 switch (ch) { 1148 case 'c': 1149 fetch(ixold, lowa, a - 1, input[0], " ", 0); 1150 fetch(ixold, a, b, input[0], "-", 0); 1151 fetch(ixnew, c, d, input[1], "+", 0); 1152 break; 1153 case 'd': 1154 fetch(ixold, lowa, a - 1, input[0], " ", 0); 1155 fetch(ixold, a, b, input[0], "-", 0); 1156 break; 1157 case 'a': 1158 fetch(ixnew, lowc, c - 1, input[1], " ", 0); 1159 fetch(ixnew, c, d, input[1], "+", 0); 1160 break; 1161 } 1162 lowa = b + 1; 1163 lowc = d + 1; 1164 } 1165 fetch(ixnew, d + 1, upd, input[1], " ", 0); 1166 1167 context_vec_ptr = context_vec_start - 1; 1168 } 1169