1 /* $OpenBSD: diffreg.c,v 1.25 2003/07/04 02:54:36 millert Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 #include <sys/types.h> 38 39 #include <stdlib.h> 40 #include <unistd.h> 41 #include <fcntl.h> 42 #include <string.h> 43 44 #include "diff.h" 45 #include "pathnames.h" 46 47 #if 0 48 static char const sccsid[] = "@(#)diffreg.c 4.21 4/6/90"; 49 #endif 50 51 /* 52 * diff - compare two files. 53 */ 54 55 /* 56 * Uses an algorithm due to Harold Stone, which finds 57 * a pair of longest identical subsequences in the two 58 * files. 59 * 60 * The major goal is to generate the match vector J. 61 * J[i] is the index of the line in file1 corresponding 62 * to line i file0. J[i] = 0 if there is no 63 * such line in file1. 64 * 65 * Lines are hashed so as to work in core. All potential 66 * matches are located by sorting the lines of each file 67 * on the hash (called ``value''). In particular, this 68 * collects the equivalence classes in file1 together. 69 * Subroutine equiv replaces the value of each line in 70 * file0 by the index of the first element of its 71 * matching equivalence in (the reordered) file1. 72 * To save space equiv squeezes file1 into a single 73 * array member in which the equivalence classes 74 * are simply concatenated, except that their first 75 * members are flagged by changing sign. 76 * 77 * Next the indices that point into member are unsorted into 78 * array class according to the original order of file0. 79 * 80 * The cleverness lies in routine stone. This marches 81 * through the lines of file0, developing a vector klist 82 * of "k-candidates". At step i a k-candidate is a matched 83 * pair of lines x,y (x in file0 y in file1) such that 84 * there is a common subsequence of length k 85 * between the first i lines of file0 and the first y 86 * lines of file1, but there is no such subsequence for 87 * any smaller y. x is the earliest possible mate to y 88 * that occurs in such a subsequence. 89 * 90 * Whenever any of the members of the equivalence class of 91 * lines in file1 matable to a line in file0 has serial number 92 * less than the y of some k-candidate, that k-candidate 93 * with the smallest such y is replaced. The new 94 * k-candidate is chained (via pred) to the current 95 * k-1 candidate so that the actual subsequence can 96 * be recovered. When a member has serial number greater 97 * that the y of all k-candidates, the klist is extended. 98 * At the end, the longest subsequence is pulled out 99 * and placed in the array J by unravel 100 * 101 * With J in hand, the matches there recorded are 102 * check'ed against reality to assure that no spurious 103 * matches have crept in due to hashing. If they have, 104 * they are broken, and "jackpot" is recorded--a harmless 105 * matter except that a true match for a spuriously 106 * mated line may now be unnecessarily reported as a change. 107 * 108 * Much of the complexity of the program comes simply 109 * from trying to minimize core utilization and 110 * maximize the range of doable problems by dynamically 111 * allocating what is needed and reusing what is not. 112 * The core requirements for problems larger than somewhat 113 * are (in words) 2*length(file0) + length(file1) + 114 * 3*(number of k-candidates installed), typically about 115 * 6n words for files of length n. 116 */ 117 118 #define prints(s) fputs(s,stdout) 119 120 FILE *input[2]; 121 122 struct cand { 123 int x; 124 int y; 125 int pred; 126 } cand; 127 128 struct line { 129 int serial; 130 int value; 131 } *file[2]; 132 133 int len[2]; 134 struct line *sfile[2]; /* shortened by pruning common prefix and suffix */ 135 int slen[2]; 136 int pref, suff; /* length of prefix and suffix */ 137 int inifdef; /* whether or not we are in a #ifdef block */ 138 int *class; /* will be overlaid on file[0] */ 139 int *member; /* will be overlaid on file[1] */ 140 int *klist; /* will be overlaid on file[0] after class */ 141 struct cand *clist; /* merely a free storage pot for candidates */ 142 int clen = 0; 143 int *J; /* will be overlaid on class */ 144 long *ixold; /* will be overlaid on klist */ 145 long *ixnew; /* will be overlaid on file[1] */ 146 u_char *chrtran; /* translation table for case-folding */ 147 148 static void fetch(long *, int, int, FILE *, char *, int); 149 static void output(void); 150 static void check(void); 151 static void range(int, int, char *); 152 static void dump_context_vec(void); 153 static void dump_unified_vec(void); 154 static void prepare(int, FILE *); 155 static void prune(void); 156 static void equiv(struct line *, int, struct line *, int, int *); 157 static void unravel(int); 158 static void unsort(struct line *, int, int *); 159 static void change(int, int, int, int); 160 static void sort(struct line *, int); 161 static int newcand(int, int, int); 162 static int search(int *, int, int); 163 static int skipline(int); 164 static int asciifile(FILE *); 165 static int stone(int *, int, int *, int *); 166 static int readhash(FILE *); 167 168 /* 169 * chrtran points to one of 2 translation tables: cup2low if folding upper to 170 * lower case clow2low if not folding case 171 */ 172 u_char clow2low[256] = { 173 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 174 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 175 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 176 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 177 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 178 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 179 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 180 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 181 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 182 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 183 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 184 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 185 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 186 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 187 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 188 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 189 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 190 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 191 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 192 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 193 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 194 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 195 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 196 0xfd, 0xfe, 0xff 197 }; 198 199 u_char cup2low[256] = { 200 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 201 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 202 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 203 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 204 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 205 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 206 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 207 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 208 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 209 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 210 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 211 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 212 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 213 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 214 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 215 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 216 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 217 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 218 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 219 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 220 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 221 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 222 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 223 0xfd, 0xfe, 0xff 224 }; 225 226 void 227 diffreg(void) 228 { 229 char buf1[BUFSIZ], buf2[BUFSIZ]; 230 FILE *f1, *f2; 231 int i, j; 232 233 if (hflag) { 234 diffargv[0] = "diffh"; 235 execv(_PATH_DIFFH, diffargv); 236 error("%s", _PATH_DIFFH); 237 } 238 chrtran = (iflag ? cup2low : clow2low); 239 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) 240 errorx("can't specify - -"); 241 if (S_ISDIR(stb1.st_mode)) { 242 file1 = splice(file1, file2); 243 if (stat(file1, &stb1) < 0) 244 error("%s", file1); 245 } else if (strcmp(file1, "-") == 0 || 246 (!S_ISREG(stb1.st_mode) && strcmp(file1, _PATH_DEVNULL) != 0)) { 247 file1 = copytemp(file1, 1); 248 if (stat(file1, &stb1) < 0) 249 error("%s", file1); 250 } 251 if (S_ISDIR(stb2.st_mode)) { 252 file2 = splice(file2, file1); 253 if (stat(file2, &stb2) < 0) 254 error("%s", file2); 255 } else if (strcmp(file2, "-") == 0 || 256 (!S_ISREG(stb2.st_mode) && strcmp(file2, _PATH_DEVNULL) != 0)) { 257 file2 = copytemp(file2, 2); 258 if (stat(file2, &stb2) < 0) 259 error("%s", file2); 260 } 261 if ((f1 = fopen(file1, "r")) == NULL) 262 error("%s", file1); 263 if ((f2 = fopen(file2, "r")) == NULL) 264 error("%s", file2); 265 if ((stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT) || 266 stb1.st_size != stb2.st_size) 267 goto notsame; 268 for (;;) { 269 i = fread(buf1, 1, BUFSIZ, f1); 270 j = fread(buf2, 1, BUFSIZ, f2); 271 if (i < 0 || j < 0 || i != j) 272 goto notsame; 273 if (i == 0 && j == 0) { 274 fclose(f1); 275 fclose(f2); 276 status = 0; /* files don't differ */ 277 goto same; 278 } 279 for (j = 0; j < i; j++) 280 if (buf1[j] != buf2[j]) 281 goto notsame; 282 } 283 notsame: 284 /* 285 * Files certainly differ at this point; set status accordingly 286 */ 287 status = 1; 288 if (!asciifile(f1) || !asciifile(f2)) { 289 printf("Binary files %s and %s differ\n", file1, file2); 290 exit(status); 291 } 292 prepare(0, f1); 293 prepare(1, f2); 294 fclose(f1); 295 fclose(f2); 296 prune(); 297 sort(sfile[0], slen[0]); 298 sort(sfile[1], slen[1]); 299 300 member = (int *)file[1]; 301 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 302 member = erealloc(member, (slen[1] + 2) * sizeof(int)); 303 304 class = (int *)file[0]; 305 unsort(sfile[0], slen[0], class); 306 class = erealloc(class, (slen[0] + 2) * sizeof(int)); 307 308 klist = emalloc((slen[0] + 2) * sizeof(int)); 309 clist = emalloc(sizeof(cand)); 310 i = stone(class, slen[0], member, klist); 311 free(member); 312 free(class); 313 314 J = emalloc((len[0] + 2) * sizeof(int)); 315 unravel(klist[i]); 316 free(clist); 317 free(klist); 318 319 ixold = emalloc((len[0] + 2) * sizeof(long)); 320 ixnew = emalloc((len[1] + 2) * sizeof(long)); 321 check(); 322 output(); 323 status = anychange; 324 same: 325 if (anychange == 0 && (opt == D_CONTEXT || opt == D_UNIFIED)) 326 printf("No differences encountered\n"); 327 } 328 329 char *tempfiles[2]; 330 331 char * 332 copytemp(const char *file, int n) 333 { 334 char buf[BUFSIZ], *tempdir, *tempfile; 335 int i, ifd, ofd; 336 337 if (n != 1 && n != 2) 338 return (NULL); 339 340 if (strcmp(file, "-") == 0) 341 ifd = STDIN_FILENO; 342 else if ((ifd = open(file, O_RDONLY, 0644)) < 0) 343 error("%s", file); 344 345 if ((tempdir = getenv("TMPDIR")) == NULL) 346 tempdir = _PATH_TMP; 347 if (asprintf(&tempfile, "%s/diff%d.XXXXXXXX", tempdir, n) == -1) 348 error(NULL); 349 tempfiles[n - 1] = tempfile; 350 351 signal(SIGHUP, done); 352 signal(SIGINT, done); 353 signal(SIGPIPE, done); 354 signal(SIGTERM, done); 355 ofd = mkstemp(tempfile); 356 if (ofd < 0) 357 error("%s", tempfile); 358 while ((i = read(ifd, buf, BUFSIZ)) > 0) { 359 if (write(ofd, buf, i) != i) 360 error("%s", tempfile); 361 } 362 close(ifd); 363 close(ofd); 364 return (tempfile); 365 } 366 367 char * 368 splice(char *dir, char *file) 369 { 370 char *tail, *buf; 371 size_t len; 372 373 if (!strcmp(file, "-")) 374 errorx("can't specify - with other arg directory"); 375 tail = strrchr(file, '/'); 376 if (tail == NULL) 377 tail = file; 378 else 379 tail++; 380 len = strlen(dir) + 1 + strlen(tail) + 1; 381 buf = emalloc(len); 382 snprintf(buf, len, "%s/%s", dir, tail); 383 return (buf); 384 } 385 386 static void 387 prepare(int i, FILE *fd) 388 { 389 struct line *p; 390 int j, h; 391 392 fseek(fd, 0L, SEEK_SET); 393 p = emalloc(3 * sizeof(struct line)); 394 for (j = 0; (h = readhash(fd));) { 395 p = erealloc(p, (++j + 3) * sizeof(struct line)); 396 p[j].value = h; 397 } 398 len[i] = j; 399 file[i] = p; 400 } 401 402 static void 403 prune(void) 404 { 405 int i, j; 406 407 for (pref = 0; pref < len[0] && pref < len[1] && 408 file[0][pref + 1].value == file[1][pref + 1].value; 409 pref++) 410 ; 411 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 412 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 413 suff++) 414 ; 415 for (j = 0; j < 2; j++) { 416 sfile[j] = file[j] + pref; 417 slen[j] = len[j] - pref - suff; 418 for (i = 0; i <= slen[j]; i++) 419 sfile[j][i].serial = i; 420 } 421 } 422 423 static void 424 equiv(struct line *a, int n, struct line *b, int m, int *c) 425 { 426 int i, j; 427 428 i = j = 1; 429 while (i <= n && j <= m) { 430 if (a[i].value < b[j].value) 431 a[i++].value = 0; 432 else if (a[i].value == b[j].value) 433 a[i++].value = j; 434 else 435 j++; 436 } 437 while (i <= n) 438 a[i++].value = 0; 439 b[m + 1].value = 0; 440 j = 0; 441 while (++j <= m) { 442 c[j] = -b[j].serial; 443 while (b[j + 1].value == b[j].value) { 444 j++; 445 c[j] = b[j].serial; 446 } 447 } 448 c[j] = -1; 449 } 450 451 static int 452 stone(int *a, int n, int *b, int *c) 453 { 454 int i, k, y, j, l; 455 int oldc, tc, oldl; 456 457 k = 0; 458 c[0] = newcand(0, 0, 0); 459 for (i = 1; i <= n; i++) { 460 j = a[i]; 461 if (j == 0) 462 continue; 463 y = -b[j]; 464 oldl = 0; 465 oldc = c[0]; 466 do { 467 if (y <= clist[oldc].y) 468 continue; 469 l = search(c, k, y); 470 if (l != oldl + 1) 471 oldc = c[l - 1]; 472 if (l <= k) { 473 if (clist[c[l]].y <= y) 474 continue; 475 tc = c[l]; 476 c[l] = newcand(i, y, oldc); 477 oldc = tc; 478 oldl = l; 479 } else { 480 c[l] = newcand(i, y, oldc); 481 k++; 482 break; 483 } 484 } while ((y = b[++j]) > 0); 485 } 486 return (k); 487 } 488 489 static int 490 newcand(int x, int y, int pred) 491 { 492 struct cand *q; 493 494 clist = erealloc(clist, ++clen * sizeof(cand)); 495 q = clist + clen - 1; 496 q->x = x; 497 q->y = y; 498 q->pred = pred; 499 return (clen - 1); 500 } 501 502 static int 503 search(int *c, int k, int y) 504 { 505 int i, j, l, t; 506 507 if (clist[c[k]].y < y) /* quick look for typical case */ 508 return (k + 1); 509 i = 0; 510 j = k + 1; 511 while (1) { 512 l = i + j; 513 if ((l >>= 1) <= i) 514 break; 515 t = clist[c[l]].y; 516 if (t > y) 517 j = l; 518 else if (t < y) 519 i = l; 520 else 521 return (l); 522 } 523 return (l + 1); 524 } 525 526 static void 527 unravel(int p) 528 { 529 struct cand *q; 530 int i; 531 532 for (i = 0; i <= len[0]; i++) 533 J[i] = i <= pref ? i : 534 i > len[0] - suff ? i + len[1] - len[0] : 0; 535 for (q = clist + p; q->y != 0; q = clist + q->pred) 536 J[q->x + pref] = q->y + pref; 537 } 538 539 /* 540 * Check does double duty: 541 * 1. ferret out any fortuitous correspondences due 542 * to confounding by hashing (which result in "jackpot") 543 * 2. collect random access indexes to the two files 544 */ 545 static void 546 check(void) 547 { 548 int i, j, jackpot, c, d; 549 long ctold, ctnew; 550 551 if ((input[0] = fopen(file1, "r")) == NULL) 552 error("%s", file1); 553 if ((input[1] = fopen(file2, "r")) == NULL) 554 error("%s", file2); 555 j = 1; 556 ixold[0] = ixnew[0] = 0; 557 jackpot = 0; 558 ctold = ctnew = 0; 559 for (i = 1; i <= len[0]; i++) { 560 if (J[i] == 0) { 561 ixold[i] = ctold += skipline(0); 562 continue; 563 } 564 while (j < J[i]) { 565 ixnew[j] = ctnew += skipline(1); 566 j++; 567 } 568 if (bflag || wflag || iflag) { 569 for (;;) { 570 c = getc(input[0]); 571 d = getc(input[1]); 572 ctold++; 573 ctnew++; 574 if (bflag && isspace(c) && isspace(d)) { 575 do { 576 if (c == '\n') 577 break; 578 ctold++; 579 } while (isspace(c = getc(input[0]))); 580 do { 581 if (d == '\n') 582 break; 583 ctnew++; 584 } while (isspace(d = getc(input[1]))); 585 } else if (wflag) { 586 while (isspace(c) && c != '\n') { 587 c = getc(input[0]); 588 ctold++; 589 } 590 while (isspace(d) && d != '\n') { 591 d = getc(input[1]); 592 ctnew++; 593 } 594 } 595 if (chrtran[c] != chrtran[d]) { 596 jackpot++; 597 J[i] = 0; 598 if (c != '\n') 599 ctold += skipline(0); 600 if (d != '\n') 601 ctnew += skipline(1); 602 break; 603 } 604 if (c == '\n') 605 break; 606 } 607 } else { 608 for (;;) { 609 ctold++; 610 ctnew++; 611 if ((c = getc(input[0])) != (d = getc(input[1]))) { 612 /* jackpot++; */ 613 J[i] = 0; 614 if (c != '\n') 615 ctold += skipline(0); 616 if (d != '\n') 617 ctnew += skipline(1); 618 break; 619 } 620 if (c == '\n') 621 break; 622 } 623 } 624 ixold[i] = ctold; 625 ixnew[j] = ctnew; 626 j++; 627 } 628 for (; j <= len[1]; j++) { 629 ixnew[j] = ctnew += skipline(1); 630 } 631 fclose(input[0]); 632 fclose(input[1]); 633 /* 634 * if (jackpot) 635 * fprintf(stderr, "jackpot\n"); 636 */ 637 } 638 639 /* shellsort CACM #201 */ 640 static void 641 sort(struct line *a, int n) 642 { 643 struct line *ai, *aim, w; 644 int j, m = 0, k; 645 646 if (n == 0) 647 return; 648 for (j = 1; j <= n; j *= 2) 649 m = 2 * j - 1; 650 for (m /= 2; m != 0; m /= 2) { 651 k = n - m; 652 for (j = 1; j <= k; j++) { 653 for (ai = &a[j]; ai > a; ai -= m) { 654 aim = &ai[m]; 655 if (aim < ai) 656 break; /* wraparound */ 657 if (aim->value > ai[0].value || 658 (aim->value == ai[0].value && 659 aim->serial > ai[0].serial)) 660 break; 661 w.value = ai[0].value; 662 ai[0].value = aim->value; 663 aim->value = w.value; 664 w.serial = ai[0].serial; 665 ai[0].serial = aim->serial; 666 aim->serial = w.serial; 667 } 668 } 669 } 670 } 671 672 static void 673 unsort(struct line *f, int l, int *b) 674 { 675 int *a, i; 676 677 a = emalloc((l + 1) * sizeof(int)); 678 for (i = 1; i <= l; i++) 679 a[f[i].serial] = f[i].value; 680 for (i = 1; i <= l; i++) 681 b[i] = a[i]; 682 free(a); 683 } 684 685 static int 686 skipline(int f) 687 { 688 int i, c; 689 690 for (i = 1; (c = getc(input[f])) != '\n'; i++) 691 if (c < 0) 692 return (i); 693 return (i); 694 } 695 696 static void 697 output(void) 698 { 699 int m, i0, i1, j0, j1; 700 701 input[0] = fopen(file1, "r"); 702 input[1] = fopen(file2, "r"); 703 m = len[0]; 704 J[0] = 0; 705 J[m + 1] = len[1] + 1; 706 if (opt != D_EDIT) { 707 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 708 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 709 i0++; 710 j0 = J[i0 - 1] + 1; 711 i1 = i0 - 1; 712 while (i1 < m && J[i1 + 1] == 0) 713 i1++; 714 j1 = J[i1 + 1] - 1; 715 J[i1] = j1; 716 change(i0, i1, j0, j1); 717 } 718 } else { 719 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 720 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 721 i0--; 722 j0 = J[i0 + 1] - 1; 723 i1 = i0 + 1; 724 while (i1 > 1 && J[i1 - 1] == 0) 725 i1--; 726 j1 = J[i1 - 1] + 1; 727 J[i1] = j1; 728 change(i1, i0, j1, j0); 729 } 730 } 731 if (m == 0) 732 change(1, 0, 1, len[1]); 733 if (opt == D_IFDEF) { 734 for (;;) { 735 #define c i0 736 c = getc(input[0]); 737 if (c < 0) 738 return; 739 putchar(c); 740 } 741 #undef c 742 } 743 if (anychange != 0) { 744 if (opt == D_CONTEXT) 745 dump_context_vec(); 746 else if (opt == D_UNIFIED) 747 dump_unified_vec(); 748 } 749 } 750 751 /* 752 * The following struct is used to record change information when 753 * doing a "context" diff. (see routine "change" to understand the 754 * highly mneumonic field names) 755 */ 756 struct context_vec { 757 int a; /* start line in old file */ 758 int b; /* end line in old file */ 759 int c; /* start line in new file */ 760 int d; /* end line in new file */ 761 }; 762 763 struct context_vec *context_vec_start, *context_vec_end, *context_vec_ptr; 764 765 #define MAX_CONTEXT 128 766 767 /* 768 * indicate that there is a difference between lines a and b of the from file 769 * to get to lines c to d of the to file. If a is greater then b then there 770 * are no lines in the from file involved and this means that there were 771 * lines appended (beginning at b). If c is greater than d then there are 772 * lines missing from the to file. 773 */ 774 static void 775 change(int a, int b, int c, int d) 776 { 777 struct stat stbuf; 778 779 if (opt != D_IFDEF && a > b && c > d) 780 return; 781 if (anychange == 0) { 782 anychange = 1; 783 if (opt == D_CONTEXT || opt == D_UNIFIED) { 784 stat(file1, &stbuf); 785 printf("%s %s %s", opt == D_CONTEXT ? "***" : "---", 786 file1, ctime(&stbuf.st_mtime)); 787 stat(file2, &stbuf); 788 printf("%s %s %s", opt == D_CONTEXT ? "---" : "+++", 789 file2, ctime(&stbuf.st_mtime)); 790 context_vec_start = emalloc(MAX_CONTEXT * 791 sizeof(struct context_vec)); 792 context_vec_end = context_vec_start + MAX_CONTEXT; 793 context_vec_ptr = context_vec_start - 1; 794 } 795 } 796 if (opt == D_CONTEXT || opt == D_UNIFIED) { 797 /* 798 * If this new change is within 'context' lines of 799 * the previous change, just add it to the change 800 * record. If the record is full or if this 801 * change is more than 'context' lines from the previous 802 * change, dump the record, reset it & add the new change. 803 */ 804 if (context_vec_ptr >= context_vec_end || 805 (context_vec_ptr >= context_vec_start && 806 a > (context_vec_ptr->b + 2 * context) && 807 c > (context_vec_ptr->d + 2 * context))) { 808 if (opt == D_CONTEXT) 809 dump_context_vec(); 810 else 811 dump_unified_vec(); 812 } 813 context_vec_ptr++; 814 context_vec_ptr->a = a; 815 context_vec_ptr->b = b; 816 context_vec_ptr->c = c; 817 context_vec_ptr->d = d; 818 return; 819 } 820 switch (opt) { 821 822 case D_NORMAL: 823 case D_EDIT: 824 range(a, b, ","); 825 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 826 if (opt == D_NORMAL) 827 range(c, d, ","); 828 putchar('\n'); 829 break; 830 case D_REVERSE: 831 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 832 range(a, b, " "); 833 putchar('\n'); 834 break; 835 case D_NREVERSE: 836 if (a > b) 837 printf("a%d %d\n", b, d - c + 1); 838 else { 839 printf("d%d %d\n", a, b - a + 1); 840 if (!(c > d)) 841 /* add changed lines */ 842 printf("a%d %d\n", b, d - c + 1); 843 } 844 break; 845 } 846 if (opt == D_NORMAL || opt == D_IFDEF) { 847 fetch(ixold, a, b, input[0], "< ", 1); 848 if (a <= b && c <= d && opt == D_NORMAL) 849 prints("---\n"); 850 } 851 fetch(ixnew, c, d, input[1], opt == D_NORMAL ? "> " : "", 0); 852 if ((opt == D_EDIT || opt == D_REVERSE) && c <= d) 853 prints(".\n"); 854 if (inifdef) { 855 fprintf(stdout, "#endif /* %s */\n", ifdefname); 856 inifdef = 0; 857 } 858 } 859 860 static void 861 range(int a, int b, char *separator) 862 { 863 printf("%d", a > b ? b : a); 864 if (a < b) 865 printf("%s%d", separator, b); 866 } 867 868 static void 869 fetch(long *f, int a, int b, FILE *lb, char *s, int oldfile) 870 { 871 int i, j, c, col, nc; 872 873 /* 874 * When doing #ifdef's, copy down to current line 875 * if this is the first file, so that stuff makes it to output. 876 */ 877 if (opt == D_IFDEF && oldfile) { 878 long curpos = ftell(lb); 879 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 880 nc = f[a > b ? b : a - 1] - curpos; 881 for (i = 0; i < nc; i++) 882 putchar(getc(lb)); 883 } 884 if (a > b) 885 return; 886 if (opt == D_IFDEF) { 887 if (inifdef) { 888 fprintf(stdout, "#else /* %s%s */\n", 889 oldfile == 1 ? "!" : "", ifdefname); 890 } else { 891 if (oldfile) 892 fprintf(stdout, "#ifndef %s\n", ifdefname); 893 else 894 fprintf(stdout, "#ifdef %s\n", ifdefname); 895 } 896 inifdef = 1 + oldfile; 897 } 898 for (i = a; i <= b; i++) { 899 fseek(lb, f[i - 1], SEEK_SET); 900 nc = f[i] - f[i - 1]; 901 if (opt != D_IFDEF) 902 prints(s); 903 col = 0; 904 for (j = 0; j < nc; j++) { 905 c = getc(lb); 906 if (c == '\t' && tflag) 907 do 908 putchar(' '); 909 while (++col & 7); 910 else { 911 putchar(c); 912 col++; 913 } 914 } 915 } 916 } 917 918 #define POW2 /* define only if HALFLONG is 2**n */ 919 #define HALFLONG 16 920 #define low(x) (x&((1L<<HALFLONG)-1)) 921 #define high(x) (x>>HALFLONG) 922 923 /* 924 * hashing has the effect of 925 * arranging line in 7-bit bytes and then 926 * summing 1-s complement in 16-bit hunks 927 */ 928 static int 929 readhash(FILE *f) 930 { 931 unsigned int shift; 932 int t, space; 933 long sum; 934 935 sum = 1; 936 space = 0; 937 if (!bflag && !wflag) { 938 if (iflag) 939 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 940 if (t == -1) 941 return (0); 942 sum += (long)chrtran[t] << (shift 943 #ifdef POW2 944 &= HALFLONG - 1); 945 #else 946 %= HALFLONG); 947 #endif 948 } 949 else 950 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 951 if (t == -1) 952 return (0); 953 sum += (long)t << (shift 954 #ifdef POW2 955 &= HALFLONG - 1); 956 #else 957 %= HALFLONG); 958 #endif 959 } 960 } else { 961 for (shift = 0;;) { 962 switch (t = getc(f)) { 963 case -1: 964 return (0); 965 case '\t': 966 case ' ': 967 space++; 968 continue; 969 default: 970 if (space && !wflag) { 971 shift += 7; 972 space = 0; 973 } 974 sum += (long)chrtran[t] << (shift 975 #ifdef POW2 976 &= HALFLONG - 1); 977 #else 978 %= HALFLONG); 979 #endif 980 shift += 7; 981 continue; 982 case '\n': 983 break; 984 } 985 break; 986 } 987 } 988 sum = low(sum) + high(sum); 989 return ((short) low(sum) + (short) high(sum)); 990 } 991 992 static int 993 asciifile(FILE *f) 994 { 995 char buf[BUFSIZ], *cp; 996 int cnt; 997 998 if (aflag) 999 return (1); 1000 1001 fseek(f, 0L, SEEK_SET); 1002 cnt = fread(buf, 1, BUFSIZ, f); 1003 cp = buf; 1004 while (--cnt >= 0) 1005 if (*cp++ & 0200) 1006 return (0); 1007 return (1); 1008 } 1009 1010 /* dump accumulated "context" diff changes */ 1011 static void 1012 dump_context_vec(void) 1013 { 1014 struct context_vec *cvp = context_vec_start; 1015 int lowa, upb, lowc, upd, do_output; 1016 int a, b, c, d; 1017 char ch; 1018 1019 if (context_vec_start > context_vec_ptr) 1020 return; 1021 1022 b = d = 0; /* gcc */ 1023 lowa = max(1, cvp->a - context); 1024 upb = min(len[0], context_vec_ptr->b + context); 1025 lowc = max(1, cvp->c - context); 1026 upd = min(len[1], context_vec_ptr->d + context); 1027 1028 printf("***************\n*** "); 1029 range(lowa, upb, ","); 1030 printf(" ****\n"); 1031 1032 /* 1033 * output changes to the "old" file. The first loop suppresses 1034 * output if there were no changes to the "old" file (we'll see 1035 * the "old" lines as context in the "new" list). 1036 */ 1037 do_output = 0; 1038 for (; cvp <= context_vec_ptr; cvp++) 1039 if (cvp->a <= cvp->b) { 1040 cvp = context_vec_start; 1041 do_output++; 1042 break; 1043 } 1044 if (do_output) { 1045 while (cvp <= context_vec_ptr) { 1046 a = cvp->a; 1047 b = cvp->b; 1048 c = cvp->c; 1049 d = cvp->d; 1050 1051 if (a <= b && c <= d) 1052 ch = 'c'; 1053 else 1054 ch = (a <= b) ? 'd' : 'a'; 1055 1056 if (ch == 'a') 1057 fetch(ixold, lowa, b, input[0], " ", 0); 1058 else { 1059 fetch(ixold, lowa, a - 1, input[0], " ", 0); 1060 fetch(ixold, a, b, input[0], 1061 ch == 'c' ? "! " : "- ", 0); 1062 } 1063 lowa = b + 1; 1064 cvp++; 1065 } 1066 fetch(ixold, b + 1, upb, input[0], " ", 0); 1067 } 1068 /* output changes to the "new" file */ 1069 printf("--- "); 1070 range(lowc, upd, ","); 1071 printf(" ----\n"); 1072 1073 do_output = 0; 1074 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1075 if (cvp->c <= cvp->d) { 1076 cvp = context_vec_start; 1077 do_output++; 1078 break; 1079 } 1080 if (do_output) { 1081 while (cvp <= context_vec_ptr) { 1082 a = cvp->a; 1083 b = cvp->b; 1084 c = cvp->c; 1085 d = cvp->d; 1086 1087 if (a <= b && c <= d) 1088 ch = 'c'; 1089 else 1090 ch = (a <= b) ? 'd' : 'a'; 1091 1092 if (ch == 'd') 1093 fetch(ixnew, lowc, d, input[1], " ", 0); 1094 else { 1095 fetch(ixnew, lowc, c - 1, input[1], " ", 0); 1096 fetch(ixnew, c, d, input[1], 1097 ch == 'c' ? "! " : "+ ", 0); 1098 } 1099 lowc = d + 1; 1100 cvp++; 1101 } 1102 fetch(ixnew, d + 1, upd, input[1], " ", 0); 1103 } 1104 context_vec_ptr = context_vec_start - 1; 1105 } 1106 1107 /* dump accumulated "unified" diff changes */ 1108 static void 1109 dump_unified_vec(void) 1110 { 1111 struct context_vec *cvp = context_vec_start; 1112 int lowa, upb, lowc, upd; 1113 int a, b, c, d; 1114 char ch; 1115 1116 if (context_vec_start > context_vec_ptr) 1117 return; 1118 1119 b = d = 0; /* gcc */ 1120 lowa = max(1, cvp->a - context); 1121 upb = min(len[0], context_vec_ptr->b + context); 1122 lowc = max(1, cvp->c - context); 1123 upd = min(len[1], context_vec_ptr->d + context); 1124 1125 printf("@@ -%d,%d +%d,%d @@\n", lowa, upb - lowa + 1, 1126 lowc, upd - lowc + 1); 1127 1128 /* 1129 * Output changes in "unified" diff format--the old and new lines 1130 * are printed together. 1131 */ 1132 for (; cvp <= context_vec_ptr; cvp++) { 1133 a = cvp->a; 1134 b = cvp->b; 1135 c = cvp->c; 1136 d = cvp->d; 1137 1138 /* 1139 * c: both new and old changes 1140 * d: only changes in the old file 1141 * a: only changes in the new file 1142 */ 1143 if (a <= b && c <= d) 1144 ch = 'c'; 1145 else 1146 ch = (a <= b) ? 'd' : 'a'; 1147 1148 switch (ch) { 1149 case 'c': 1150 fetch(ixold, lowa, a - 1, input[0], " ", 0); 1151 fetch(ixold, a, b, input[0], "-", 0); 1152 fetch(ixnew, c, d, input[1], "+", 0); 1153 break; 1154 case 'd': 1155 fetch(ixold, lowa, a - 1, input[0], " ", 0); 1156 fetch(ixold, a, b, input[0], "-", 0); 1157 break; 1158 case 'a': 1159 fetch(ixnew, lowc, c - 1, input[1], " ", 0); 1160 fetch(ixnew, c, d, input[1], "+", 0); 1161 break; 1162 } 1163 lowa = b + 1; 1164 lowc = d + 1; 1165 } 1166 fetch(ixnew, d + 1, upd, input[1], " ", 0); 1167 1168 context_vec_ptr = context_vec_start - 1; 1169 } 1170