1 /* $OpenBSD: diffreg.c,v 1.17 2003/06/25 22:14:43 millert Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 #include <sys/types.h> 38 39 #include <stdlib.h> 40 #include <unistd.h> 41 #include <fcntl.h> 42 #include <string.h> 43 44 #include "diff.h" 45 #include "pathnames.h" 46 47 #if 0 48 static char const sccsid[] = "@(#)diffreg.c 4.21 4/6/90"; 49 #endif 50 51 /* 52 * diff - compare two files. 53 */ 54 55 /* 56 * Uses an algorithm due to Harold Stone, which finds 57 * a pair of longest identical subsequences in the two 58 * files. 59 * 60 * The major goal is to generate the match vector J. 61 * J[i] is the index of the line in file1 corresponding 62 * to line i file0. J[i] = 0 if there is no 63 * such line in file1. 64 * 65 * Lines are hashed so as to work in core. All potential 66 * matches are located by sorting the lines of each file 67 * on the hash (called ``value''). In particular, this 68 * collects the equivalence classes in file1 together. 69 * Subroutine equiv replaces the value of each line in 70 * file0 by the index of the first element of its 71 * matching equivalence in (the reordered) file1. 72 * To save space equiv squeezes file1 into a single 73 * array member in which the equivalence classes 74 * are simply concatenated, except that their first 75 * members are flagged by changing sign. 76 * 77 * Next the indices that point into member are unsorted into 78 * array class according to the original order of file0. 79 * 80 * The cleverness lies in routine stone. This marches 81 * through the lines of file0, developing a vector klist 82 * of "k-candidates". At step i a k-candidate is a matched 83 * pair of lines x,y (x in file0 y in file1) such that 84 * there is a common subsequence of length k 85 * between the first i lines of file0 and the first y 86 * lines of file1, but there is no such subsequence for 87 * any smaller y. x is the earliest possible mate to y 88 * that occurs in such a subsequence. 89 * 90 * Whenever any of the members of the equivalence class of 91 * lines in file1 matable to a line in file0 has serial number 92 * less than the y of some k-candidate, that k-candidate 93 * with the smallest such y is replaced. The new 94 * k-candidate is chained (via pred) to the current 95 * k-1 candidate so that the actual subsequence can 96 * be recovered. When a member has serial number greater 97 * that the y of all k-candidates, the klist is extended. 98 * At the end, the longest subsequence is pulled out 99 * and placed in the array J by unravel 100 * 101 * With J in hand, the matches there recorded are 102 * check'ed against reality to assure that no spurious 103 * matches have crept in due to hashing. If they have, 104 * they are broken, and "jackpot" is recorded--a harmless 105 * matter except that a true match for a spuriously 106 * mated line may now be unnecessarily reported as a change. 107 * 108 * Much of the complexity of the program comes simply 109 * from trying to minimize core utilization and 110 * maximize the range of doable problems by dynamically 111 * allocating what is needed and reusing what is not. 112 * The core requirements for problems larger than somewhat 113 * are (in words) 2*length(file0) + length(file1) + 114 * 3*(number of k-candidates installed), typically about 115 * 6n words for files of length n. 116 */ 117 118 #define prints(s) fputs(s,stdout) 119 120 FILE *input[2]; 121 122 struct cand { 123 int x; 124 int y; 125 int pred; 126 } cand; 127 128 struct line { 129 int serial; 130 int value; 131 } *file[2]; 132 133 int len[2]; 134 struct line *sfile[2]; /* shortened by pruning common prefix and suffix */ 135 int slen[2]; 136 int pref, suff; /* length of prefix and suffix */ 137 int *class; /* will be overlaid on file[0] */ 138 int *member; /* will be overlaid on file[1] */ 139 int *klist; /* will be overlaid on file[0] after class */ 140 struct cand *clist; /* merely a free storage pot for candidates */ 141 int clen = 0; 142 int *J; /* will be overlaid on class */ 143 long *ixold; /* will be overlaid on klist */ 144 long *ixnew; /* will be overlaid on file[1] */ 145 u_char *chrtran; /* translation table for case-folding */ 146 147 static void fetch(long *, int, int, FILE *, char *, int); 148 static void output(void); 149 static void check(void); 150 static void range(int, int, char *); 151 static void dump_context_vec(void); 152 static void dump_unified_vec(void); 153 static void prepare(int, FILE *); 154 static void prune(void); 155 static void equiv(struct line *, int, struct line *, int, int *); 156 static void unravel(int); 157 static void unsort(struct line *, int, int *); 158 static void change(int, int, int, int); 159 static void sort(struct line *, int); 160 static int newcand(int, int, int); 161 static int search(int *, int, int); 162 static int skipline(int); 163 static int asciifile(FILE *); 164 static int stone(int *, int, int *, int *); 165 static int readhash(FILE *); 166 167 /* 168 * chrtran points to one of 2 translation tables: cup2low if folding upper to 169 * lower case clow2low if not folding case 170 */ 171 u_char clow2low[256] = { 172 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 173 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 174 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 175 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 176 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 177 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 178 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 179 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 180 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 181 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 182 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 183 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 184 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 185 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 186 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 187 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 188 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 189 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 190 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 191 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 192 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 193 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 194 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 195 0xfd, 0xfe, 0xff 196 }; 197 198 u_char cup2low[256] = { 199 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 200 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 201 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 202 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 203 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 204 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 205 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 206 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 207 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 208 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 209 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 210 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 211 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 212 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 213 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 214 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 215 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 216 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 217 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 218 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 219 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 220 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 221 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 222 0xfd, 0xfe, 0xff 223 }; 224 225 void 226 diffreg(void) 227 { 228 char buf1[BUFSIZ], buf2[BUFSIZ]; 229 FILE *f1, *f2; 230 int i, j; 231 232 if (hflag) { 233 diffargv[0] = "diffh"; 234 execv(diffh, diffargv); 235 warn("%s", diffh); 236 done(0); 237 } 238 chrtran = (iflag ? cup2low : clow2low); 239 if ((stb1.st_mode & S_IFMT) == S_IFDIR) { 240 file1 = splice(file1, file2); 241 if (stat(file1, &stb1) < 0) { 242 warn("%s", file1); 243 done(0); 244 } 245 } else if ((stb2.st_mode & S_IFMT) == S_IFDIR) { 246 file2 = splice(file2, file1); 247 if (stat(file2, &stb2) < 0) { 248 warn("%s", file2); 249 done(0); 250 } 251 } else if ((stb1.st_mode & S_IFMT) != S_IFREG || !strcmp(file1, "-")) { 252 if (!strcmp(file2, "-")) { 253 warnx("can't specify - -"); 254 done(0); 255 } 256 file1 = copytemp(); 257 if (stat(file1, &stb1) < 0) { 258 warn("%s", file1); 259 done(0); 260 } 261 } else if ((stb2.st_mode & S_IFMT) != S_IFREG || !strcmp(file2, "-")) { 262 file2 = copytemp(); 263 if (stat(file2, &stb2) < 0) { 264 warn("%s", file2); 265 done(0); 266 } 267 } 268 if ((f1 = fopen(file1, "r")) == NULL) { 269 warn("%s", file1); 270 done(0); 271 } 272 if ((f2 = fopen(file2, "r")) == NULL) { 273 warn("%s", file2); 274 fclose(f1); 275 done(0); 276 } 277 if (stb1.st_size != stb2.st_size) 278 goto notsame; 279 for (;;) { 280 i = fread(buf1, 1, BUFSIZ, f1); 281 j = fread(buf2, 1, BUFSIZ, f2); 282 if (i < 0 || j < 0 || i != j) 283 goto notsame; 284 if (i == 0 && j == 0) { 285 fclose(f1); 286 fclose(f2); 287 status = 0; /* files don't differ */ 288 goto same; 289 } 290 for (j = 0; j < i; j++) 291 if (buf1[j] != buf2[j]) 292 goto notsame; 293 } 294 notsame: 295 /* 296 * Files certainly differ at this point; set status accordingly 297 */ 298 status = 1; 299 if (!asciifile(f1) || !asciifile(f2)) { 300 printf("Binary files %s and %s differ\n", file1, file2); 301 fclose(f1); 302 fclose(f2); 303 done(0); 304 } 305 prepare(0, f1); 306 prepare(1, f2); 307 fclose(f1); 308 fclose(f2); 309 prune(); 310 sort(sfile[0], slen[0]); 311 sort(sfile[1], slen[1]); 312 313 member = (int *)file[1]; 314 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 315 member = erealloc(member, (slen[1] + 2) * sizeof(int)); 316 317 class = (int *)file[0]; 318 unsort(sfile[0], slen[0], class); 319 class = erealloc(class, (slen[0] + 2) * sizeof(int)); 320 321 klist = emalloc((slen[0] + 2) * sizeof(int)); 322 clist = emalloc(sizeof(cand)); 323 i = stone(class, slen[0], member, klist); 324 free(member); 325 free(class); 326 327 J = emalloc((len[0] + 2) * sizeof(int)); 328 unravel(klist[i]); 329 free(clist); 330 free(klist); 331 332 ixold = emalloc((len[0] + 2) * sizeof(long)); 333 ixnew = emalloc((len[1] + 2) * sizeof(long)); 334 check(); 335 output(); 336 status = anychange; 337 same: 338 if (anychange == 0 && (opt == D_CONTEXT || opt == D_UNIFIED)) 339 printf("No differences encountered\n"); 340 done(0); 341 } 342 343 char *tempfile = _PATH_TMP; 344 345 char * 346 copytemp(void) 347 { 348 char buf[BUFSIZ]; 349 int i, f; 350 351 signal(SIGHUP, done); 352 signal(SIGINT, done); 353 signal(SIGPIPE, done); 354 signal(SIGTERM, done); 355 f = mkstemp(tempfile); 356 if (f < 0) { 357 warn("%s", tempfile); 358 done(0); 359 } 360 while ((i = read(0, buf, BUFSIZ)) > 0) 361 if (write(f, buf, i) != i) { 362 warn("%s", tempfile); 363 done(0); 364 } 365 close(f); 366 return (tempfile); 367 } 368 369 char * 370 splice(char *dir, char *file) 371 { 372 char *tail, *buf; 373 size_t len; 374 375 if (!strcmp(file, "-")) { 376 warnx("can't specify - with other arg directory"); 377 done(0); 378 } 379 tail = strrchr(file, '/'); 380 if (tail == NULL) 381 tail = file; 382 else 383 tail++; 384 len = strlen(dir) + strlen(tail) + 1; 385 buf = emalloc(len); 386 snprintf(buf, len, "%s/%s", dir, tail); 387 return (buf); 388 } 389 390 static void 391 prepare(int i, FILE *fd) 392 { 393 struct line *p; 394 int j, h; 395 396 fseek(fd, 0L, SEEK_SET); 397 p = emalloc(3 * sizeof(struct line)); 398 for (j = 0; (h = readhash(fd));) { 399 p = erealloc(p, (++j + 3) * sizeof(struct line)); 400 p[j].value = h; 401 } 402 len[i] = j; 403 file[i] = p; 404 } 405 406 static void 407 prune(void) 408 { 409 int i, j; 410 411 for (pref = 0; pref < len[0] && pref < len[1] && 412 file[0][pref + 1].value == file[1][pref + 1].value; 413 pref++) 414 ; 415 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 416 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 417 suff++) 418 ; 419 for (j = 0; j < 2; j++) { 420 sfile[j] = file[j] + pref; 421 slen[j] = len[j] - pref - suff; 422 for (i = 0; i <= slen[j]; i++) 423 sfile[j][i].serial = i; 424 } 425 } 426 427 static void 428 equiv(struct line *a, int n, struct line *b, int m, int *c) 429 { 430 int i, j; 431 432 i = j = 1; 433 while (i <= n && j <= m) { 434 if (a[i].value < b[j].value) 435 a[i++].value = 0; 436 else if (a[i].value == b[j].value) 437 a[i++].value = j; 438 else 439 j++; 440 } 441 while (i <= n) 442 a[i++].value = 0; 443 b[m + 1].value = 0; 444 j = 0; 445 while (++j <= m) { 446 c[j] = -b[j].serial; 447 while (b[j + 1].value == b[j].value) { 448 j++; 449 c[j] = b[j].serial; 450 } 451 } 452 c[j] = -1; 453 } 454 455 static int 456 stone(int *a, int n, int *b, int *c) 457 { 458 int i, k, y, j, l; 459 int oldc, tc, oldl; 460 461 k = 0; 462 c[0] = newcand(0, 0, 0); 463 for (i = 1; i <= n; i++) { 464 j = a[i]; 465 if (j == 0) 466 continue; 467 y = -b[j]; 468 oldl = 0; 469 oldc = c[0]; 470 do { 471 if (y <= clist[oldc].y) 472 continue; 473 l = search(c, k, y); 474 if (l != oldl + 1) 475 oldc = c[l - 1]; 476 if (l <= k) { 477 if (clist[c[l]].y <= y) 478 continue; 479 tc = c[l]; 480 c[l] = newcand(i, y, oldc); 481 oldc = tc; 482 oldl = l; 483 } else { 484 c[l] = newcand(i, y, oldc); 485 k++; 486 break; 487 } 488 } while ((y = b[++j]) > 0); 489 } 490 return (k); 491 } 492 493 static int 494 newcand(int x, int y, int pred) 495 { 496 struct cand *q; 497 498 clist = erealloc(clist, ++clen * sizeof(cand)); 499 q = clist + clen - 1; 500 q->x = x; 501 q->y = y; 502 q->pred = pred; 503 return (clen - 1); 504 } 505 506 static int 507 search(int *c, int k, int y) 508 { 509 int i, j, l, t; 510 511 if (clist[c[k]].y < y) /* quick look for typical case */ 512 return (k + 1); 513 i = 0; 514 j = k + 1; 515 while (1) { 516 l = i + j; 517 if ((l >>= 1) <= i) 518 break; 519 t = clist[c[l]].y; 520 if (t > y) 521 j = l; 522 else if (t < y) 523 i = l; 524 else 525 return (l); 526 } 527 return (l + 1); 528 } 529 530 static void 531 unravel(int p) 532 { 533 struct cand *q; 534 int i; 535 536 for (i = 0; i <= len[0]; i++) 537 J[i] = i <= pref ? i : 538 i > len[0] - suff ? i + len[1] - len[0] : 0; 539 for (q = clist + p; q->y != 0; q = clist + q->pred) 540 J[q->x + pref] = q->y + pref; 541 } 542 543 /* 544 * Check does double duty: 545 * 1. ferret out any fortuitous correspondences due 546 * to confounding by hashing (which result in "jackpot") 547 * 2. collect random access indexes to the two files 548 */ 549 static void 550 check(void) 551 { 552 int i, j, jackpot, c, d; 553 long ctold, ctnew; 554 555 if ((input[0] = fopen(file1, "r")) == NULL) { 556 perror(file1); 557 done(0); 558 } 559 if ((input[1] = fopen(file2, "r")) == NULL) { 560 perror(file2); 561 done(0); 562 } 563 j = 1; 564 ixold[0] = ixnew[0] = 0; 565 jackpot = 0; 566 ctold = ctnew = 0; 567 for (i = 1; i <= len[0]; i++) { 568 if (J[i] == 0) { 569 ixold[i] = ctold += skipline(0); 570 continue; 571 } 572 while (j < J[i]) { 573 ixnew[j] = ctnew += skipline(1); 574 j++; 575 } 576 if (bflag || wflag || iflag) { 577 for (;;) { 578 c = getc(input[0]); 579 d = getc(input[1]); 580 ctold++; 581 ctnew++; 582 if (bflag && isspace(c) && isspace(d)) { 583 do { 584 if (c == '\n') 585 break; 586 ctold++; 587 } while (isspace(c = getc(input[0]))); 588 do { 589 if (d == '\n') 590 break; 591 ctnew++; 592 } while (isspace(d = getc(input[1]))); 593 } else if (wflag) { 594 while (isspace(c) && c != '\n') { 595 c = getc(input[0]); 596 ctold++; 597 } 598 while (isspace(d) && d != '\n') { 599 d = getc(input[1]); 600 ctnew++; 601 } 602 } 603 if (chrtran[c] != chrtran[d]) { 604 jackpot++; 605 J[i] = 0; 606 if (c != '\n') 607 ctold += skipline(0); 608 if (d != '\n') 609 ctnew += skipline(1); 610 break; 611 } 612 if (c == '\n') 613 break; 614 } 615 } else { 616 for (;;) { 617 ctold++; 618 ctnew++; 619 if ((c = getc(input[0])) != (d = getc(input[1]))) { 620 /* jackpot++; */ 621 J[i] = 0; 622 if (c != '\n') 623 ctold += skipline(0); 624 if (d != '\n') 625 ctnew += skipline(1); 626 break; 627 } 628 if (c == '\n') 629 break; 630 } 631 } 632 ixold[i] = ctold; 633 ixnew[j] = ctnew; 634 j++; 635 } 636 for (; j <= len[1]; j++) { 637 ixnew[j] = ctnew += skipline(1); 638 } 639 fclose(input[0]); 640 fclose(input[1]); 641 /* 642 * if (jackpot) 643 * fprintf(stderr, "jackpot\n"); 644 */ 645 } 646 647 /* shellsort CACM #201 */ 648 static void 649 sort(struct line *a, int n) 650 { 651 struct line *ai, *aim, w; 652 int j, m = 0, k; 653 654 if (n == 0) 655 return; 656 for (j = 1; j <= n; j *= 2) 657 m = 2 * j - 1; 658 for (m /= 2; m != 0; m /= 2) { 659 k = n - m; 660 for (j = 1; j <= k; j++) { 661 for (ai = &a[j]; ai > a; ai -= m) { 662 aim = &ai[m]; 663 if (aim < ai) 664 break; /* wraparound */ 665 if (aim->value > ai[0].value || 666 (aim->value == ai[0].value && 667 aim->serial > ai[0].serial)) 668 break; 669 w.value = ai[0].value; 670 ai[0].value = aim->value; 671 aim->value = w.value; 672 w.serial = ai[0].serial; 673 ai[0].serial = aim->serial; 674 aim->serial = w.serial; 675 } 676 } 677 } 678 } 679 680 static void 681 unsort(struct line *f, int l, int *b) 682 { 683 int *a, i; 684 685 a = emalloc((l + 1) * sizeof(int)); 686 for (i = 1; i <= l; i++) 687 a[f[i].serial] = f[i].value; 688 for (i = 1; i <= l; i++) 689 b[i] = a[i]; 690 free(a); 691 } 692 693 static int 694 skipline(int f) 695 { 696 int i, c; 697 698 for (i = 1; (c = getc(input[f])) != '\n'; i++) 699 if (c < 0) 700 return (i); 701 return (i); 702 } 703 704 static void 705 output(void) 706 { 707 int m, i0, i1, j0, j1; 708 709 input[0] = fopen(file1, "r"); 710 input[1] = fopen(file2, "r"); 711 m = len[0]; 712 J[0] = 0; 713 J[m + 1] = len[1] + 1; 714 if (opt != D_EDIT) { 715 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 716 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 717 i0++; 718 j0 = J[i0 - 1] + 1; 719 i1 = i0 - 1; 720 while (i1 < m && J[i1 + 1] == 0) 721 i1++; 722 j1 = J[i1 + 1] - 1; 723 J[i1] = j1; 724 change(i0, i1, j0, j1); 725 } 726 } else { 727 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 728 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 729 i0--; 730 j0 = J[i0 + 1] - 1; 731 i1 = i0 + 1; 732 while (i1 > 1 && J[i1 - 1] == 0) 733 i1--; 734 j1 = J[i1 - 1] + 1; 735 J[i1] = j1; 736 change(i1, i0, j1, j0); 737 } 738 } 739 if (m == 0) 740 change(1, 0, 1, len[1]); 741 if (opt == D_IFDEF) { 742 for (;;) { 743 #define c i0 744 c = getc(input[0]); 745 if (c < 0) 746 return; 747 putchar(c); 748 } 749 #undef c 750 } 751 if (anychange != 0) { 752 if (opt == D_CONTEXT) 753 dump_context_vec(); 754 else if (opt == D_UNIFIED) 755 dump_unified_vec(); 756 } 757 } 758 759 /* 760 * The following struct is used to record change information when 761 * doing a "context" diff. (see routine "change" to understand the 762 * highly mneumonic field names) 763 */ 764 struct context_vec { 765 int a; /* start line in old file */ 766 int b; /* end line in old file */ 767 int c; /* start line in new file */ 768 int d; /* end line in new file */ 769 }; 770 771 struct context_vec *context_vec_start, *context_vec_end, *context_vec_ptr; 772 773 #define MAX_CONTEXT 128 774 775 /* 776 * indicate that there is a difference between lines a and b of the from file 777 * to get to lines c to d of the to file. If a is greater then b then there 778 * are no lines in the from file involved and this means that there were 779 * lines appended (beginning at b). If c is greater than d then there are 780 * lines missing from the to file. 781 */ 782 static void 783 change(int a, int b, int c, int d) 784 { 785 struct stat stbuf; 786 787 if (opt != D_IFDEF && a > b && c > d) 788 return; 789 if (anychange == 0) { 790 anychange = 1; 791 if (opt == D_CONTEXT || opt == D_UNIFIED) { 792 stat(file1, &stbuf); 793 printf("%s %s %s", opt == D_CONTEXT ? "***" : "---", 794 file1, ctime(&stbuf.st_mtime)); 795 stat(file2, &stbuf); 796 printf("%s %s %s", opt == D_CONTEXT ? "---" : "+++", 797 file2, ctime(&stbuf.st_mtime)); 798 context_vec_start = emalloc(MAX_CONTEXT * 799 sizeof(struct context_vec)); 800 context_vec_end = context_vec_start + MAX_CONTEXT; 801 context_vec_ptr = context_vec_start - 1; 802 } 803 } 804 if (opt == D_CONTEXT || opt == D_UNIFIED) { 805 /* 806 * if this new change is within 'context' lines of 807 * the previous change, just add it to the change 808 * record. If the record is full or if this 809 * change is more than 'context' lines from the previous 810 * change, dump the record, reset it & add the new change. 811 */ 812 if (context_vec_ptr >= context_vec_end || 813 (context_vec_ptr >= context_vec_start && 814 a > (context_vec_ptr->b + 2 * context) && 815 c > (context_vec_ptr->d + 2 * context))) { 816 if (opt == D_CONTEXT) 817 dump_context_vec(); 818 else 819 dump_unified_vec(); 820 } 821 context_vec_ptr++; 822 context_vec_ptr->a = a; 823 context_vec_ptr->b = b; 824 context_vec_ptr->c = c; 825 context_vec_ptr->d = d; 826 return; 827 } 828 switch (opt) { 829 830 case D_NORMAL: 831 case D_EDIT: 832 range(a, b, ","); 833 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 834 if (opt == D_NORMAL) 835 range(c, d, ","); 836 putchar('\n'); 837 break; 838 case D_REVERSE: 839 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 840 range(a, b, " "); 841 putchar('\n'); 842 break; 843 case D_NREVERSE: 844 if (a > b) 845 printf("a%d %d\n", b, d - c + 1); 846 else { 847 printf("d%d %d\n", a, b - a + 1); 848 if (!(c > d)) 849 /* add changed lines */ 850 printf("a%d %d\n", b, d - c + 1); 851 } 852 break; 853 } 854 if (opt == D_NORMAL || opt == D_IFDEF) { 855 fetch(ixold, a, b, input[0], "< ", 1); 856 if (a <= b && c <= d && opt == D_NORMAL) 857 prints("---\n"); 858 } 859 fetch(ixnew, c, d, input[1], opt == D_NORMAL ? "> " : "", 0); 860 if ((opt == D_EDIT || opt == D_REVERSE) && c <= d) 861 prints(".\n"); 862 if (inifdef) { 863 fprintf(stdout, "#endif /* %s */\n", endifname); 864 inifdef = 0; 865 } 866 } 867 868 static void 869 range(int a, int b, char *separator) 870 { 871 printf("%d", a > b ? b : a); 872 if (a < b) 873 printf("%s%d", separator, b); 874 } 875 876 static void 877 fetch(long *f, int a, int b, FILE *lb, char *s, int oldfile) 878 { 879 int oneflag = (*ifdef1 != '\0') != (*ifdef2 != '\0'); 880 int i, j, c, col, nc; 881 882 /* 883 * When doing #ifdef's, copy down to current line 884 * if this is the first file, so that stuff makes it to output. 885 */ 886 if (opt == D_IFDEF && oldfile) { 887 long curpos = ftell(lb); 888 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 889 nc = f[a > b ? b : a - 1] - curpos; 890 for (i = 0; i < nc; i++) 891 putchar(getc(lb)); 892 } 893 if (a > b) 894 return; 895 if (opt == D_IFDEF) { 896 if (inifdef) 897 fprintf(stdout, "#else /* %s%s */\n", 898 oneflag && oldfile == 1 ? "!" : "", ifdef2); 899 else { 900 if (oneflag) { 901 /* There was only one ifdef given */ 902 endifname = ifdef2; 903 if (oldfile) 904 fprintf(stdout, "#ifndef %s\n", endifname); 905 else 906 fprintf(stdout, "#ifdef %s\n", endifname); 907 } else { 908 endifname = oldfile ? ifdef1 : ifdef2; 909 fprintf(stdout, "#ifdef %s\n", endifname); 910 } 911 } 912 inifdef = 1 + oldfile; 913 } 914 for (i = a; i <= b; i++) { 915 fseek(lb, f[i - 1], SEEK_SET); 916 nc = f[i] - f[i - 1]; 917 if (opt != D_IFDEF) 918 prints(s); 919 col = 0; 920 for (j = 0; j < nc; j++) { 921 c = getc(lb); 922 if (c == '\t' && tflag) 923 do 924 putchar(' '); 925 while (++col & 7); 926 else { 927 putchar(c); 928 col++; 929 } 930 } 931 } 932 933 if (inifdef && !wantelses) { 934 fprintf(stdout, "#endif /* %s */\n", endifname); 935 inifdef = 0; 936 } 937 } 938 939 #define POW2 /* define only if HALFLONG is 2**n */ 940 #define HALFLONG 16 941 #define low(x) (x&((1L<<HALFLONG)-1)) 942 #define high(x) (x>>HALFLONG) 943 944 /* 945 * hashing has the effect of 946 * arranging line in 7-bit bytes and then 947 * summing 1-s complement in 16-bit hunks 948 */ 949 static int 950 readhash(FILE *f) 951 { 952 unsigned int shift; 953 int t, space; 954 long sum; 955 956 sum = 1; 957 space = 0; 958 if (!bflag && !wflag) { 959 if (iflag) 960 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 961 if (t == -1) 962 return (0); 963 sum += (long)chrtran[t] << (shift 964 #ifdef POW2 965 &= HALFLONG - 1); 966 #else 967 %= HALFLONG); 968 #endif 969 } 970 else 971 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 972 if (t == -1) 973 return (0); 974 sum += (long)t << (shift 975 #ifdef POW2 976 &= HALFLONG - 1); 977 #else 978 %= HALFLONG); 979 #endif 980 } 981 } else { 982 for (shift = 0;;) { 983 switch (t = getc(f)) { 984 case -1: 985 return (0); 986 case '\t': 987 case ' ': 988 space++; 989 continue; 990 default: 991 if (space && !wflag) { 992 shift += 7; 993 space = 0; 994 } 995 sum += (long)chrtran[t] << (shift 996 #ifdef POW2 997 &= HALFLONG - 1); 998 #else 999 %= HALFLONG); 1000 #endif 1001 shift += 7; 1002 continue; 1003 case '\n': 1004 break; 1005 } 1006 break; 1007 } 1008 } 1009 sum = low(sum) + high(sum); 1010 return ((short) low(sum) + (short) high(sum)); 1011 } 1012 1013 static int 1014 asciifile(FILE *f) 1015 { 1016 char buf[BUFSIZ], *cp; 1017 int cnt; 1018 1019 fseek(f, 0L, SEEK_SET); 1020 cnt = fread(buf, 1, BUFSIZ, f); 1021 cp = buf; 1022 while (--cnt >= 0) 1023 if (*cp++ & 0200) 1024 return (0); 1025 return (1); 1026 } 1027 1028 /* dump accumulated "context" diff changes */ 1029 static void 1030 dump_context_vec(void) 1031 { 1032 struct context_vec *cvp = context_vec_start; 1033 int lowa, upb, lowc, upd, do_output; 1034 int a, b, c, d; 1035 char ch; 1036 1037 if (cvp > context_vec_ptr) 1038 return; 1039 1040 b = d = 0; /* gcc */ 1041 lowa = max(1, cvp->a - context); 1042 upb = min(len[0], context_vec_ptr->b + context); 1043 lowc = max(1, cvp->c - context); 1044 upd = min(len[1], context_vec_ptr->d + context); 1045 1046 printf("***************\n*** "); 1047 range(lowa, upb, ","); 1048 printf(" ****\n"); 1049 1050 /* 1051 * output changes to the "old" file. The first loop suppresses 1052 * output if there were no changes to the "old" file (we'll see 1053 * the "old" lines as context in the "new" list). 1054 */ 1055 do_output = 0; 1056 for (; cvp <= context_vec_ptr; cvp++) 1057 if (cvp->a <= cvp->b) { 1058 cvp = context_vec_start; 1059 do_output++; 1060 break; 1061 } 1062 if (do_output) { 1063 while (cvp <= context_vec_ptr) { 1064 a = cvp->a; 1065 b = cvp->b; 1066 c = cvp->c; 1067 d = cvp->d; 1068 1069 if (a <= b && c <= d) 1070 ch = 'c'; 1071 else 1072 ch = (a <= b) ? 'd' : 'a'; 1073 1074 if (ch == 'a') 1075 fetch(ixold, lowa, b, input[0], " ", 0); 1076 else { 1077 fetch(ixold, lowa, a - 1, input[0], " ", 0); 1078 fetch(ixold, a, b, input[0], 1079 ch == 'c' ? "! " : "- ", 0); 1080 } 1081 lowa = b + 1; 1082 cvp++; 1083 } 1084 fetch(ixold, b + 1, upb, input[0], " ", 0); 1085 } 1086 /* output changes to the "new" file */ 1087 printf("--- "); 1088 range(lowc, upd, ","); 1089 printf(" ----\n"); 1090 1091 do_output = 0; 1092 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1093 if (cvp->c <= cvp->d) { 1094 cvp = context_vec_start; 1095 do_output++; 1096 break; 1097 } 1098 if (do_output) { 1099 while (cvp <= context_vec_ptr) { 1100 a = cvp->a; 1101 b = cvp->b; 1102 c = cvp->c; 1103 d = cvp->d; 1104 1105 if (a <= b && c <= d) 1106 ch = 'c'; 1107 else 1108 ch = (a <= b) ? 'd' : 'a'; 1109 1110 if (ch == 'd') 1111 fetch(ixnew, lowc, d, input[1], " ", 0); 1112 else { 1113 fetch(ixnew, lowc, c - 1, input[1], " ", 0); 1114 fetch(ixnew, c, d, input[1], 1115 ch == 'c' ? "! " : "+ ", 0); 1116 } 1117 lowc = d + 1; 1118 cvp++; 1119 } 1120 fetch(ixnew, d + 1, upd, input[1], " ", 0); 1121 } 1122 context_vec_ptr = context_vec_start - 1; 1123 } 1124 1125 /* dump accumulated "unified" diff changes */ 1126 static void 1127 dump_unified_vec(void) 1128 { 1129 struct context_vec *cvp = context_vec_start; 1130 int lowa, upb, lowc, upd; 1131 int a, b, c, d; 1132 char ch; 1133 1134 if (cvp > context_vec_ptr) 1135 return; 1136 1137 b = d = 0; /* gcc */ 1138 lowa = max(1, cvp->a - context); 1139 upb = min(len[0], context_vec_ptr->b + context); 1140 lowc = max(1, cvp->c - context); 1141 upd = min(len[1], context_vec_ptr->d + context); 1142 1143 printf("@@ -%d,%d +%d,%d @@\n", lowa, upb - lowa + 1, 1144 lowc, upd - lowc + 1); 1145 1146 /* 1147 * Output changes in "unified" diff format--the old and new lines 1148 * are printed together. 1149 */ 1150 for (; cvp <= context_vec_ptr; cvp++) { 1151 a = cvp->a; 1152 b = cvp->b; 1153 c = cvp->c; 1154 d = cvp->d; 1155 1156 /* 1157 * c: both new and old changes 1158 * d: only changes in the old file 1159 * a: only changes in the new file 1160 */ 1161 if (a <= b && c <= d) 1162 ch = 'c'; 1163 else 1164 ch = (a <= b) ? 'd' : 'a'; 1165 1166 switch (ch) { 1167 case 'c': 1168 fetch(ixold, lowa, a - 1, input[0], " ", 0); 1169 fetch(ixold, a, b, input[0], "- ", 0); 1170 fetch(ixnew, c, d, input[1], "+ ", 0); 1171 break; 1172 case 'd': 1173 fetch(ixold, lowa, a - 1, input[0], " ", 0); 1174 fetch(ixold, a, b, input[0], "- ", 0); 1175 break; 1176 case 'a': 1177 fetch(ixnew, lowc, c - 1, input[1], " ", 0); 1178 fetch(ixnew, c, d, input[1], "+ ", 0); 1179 break; 1180 } 1181 lowa = b + 1; 1182 lowc = d + 1; 1183 } 1184 fetch(ixnew, d + 1, upd, input[1], " ", 0); 1185 1186 context_vec_ptr = context_vec_start - 1; 1187 } 1188