1 /* $OpenBSD: diffreg.c,v 1.30 2003/07/08 04:51:30 millert Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 /*- 37 * Copyright (c) 1991, 1993 38 * The Regents of the University of California. All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93 65 */ 66 67 #ifndef lint 68 static const char rcsid[] = "$OpenBSD: diffreg.c,v 1.30 2003/07/08 04:51:30 millert Exp $"; 69 #endif /* not lint */ 70 71 #include <sys/types.h> 72 #include <sys/stat.h> 73 74 #include <ctype.h> 75 #include <err.h> 76 #include <fcntl.h> 77 #include <libgen.h> 78 #include <paths.h> 79 #include <signal.h> 80 #include <stdio.h> 81 #include <stdlib.h> 82 #include <string.h> 83 #include <unistd.h> 84 85 #include "diff.h" 86 87 /* 88 * diff - compare two files. 89 */ 90 91 /* 92 * Uses an algorithm due to Harold Stone, which finds 93 * a pair of longest identical subsequences in the two 94 * files. 95 * 96 * The major goal is to generate the match vector J. 97 * J[i] is the index of the line in file1 corresponding 98 * to line i file0. J[i] = 0 if there is no 99 * such line in file1. 100 * 101 * Lines are hashed so as to work in core. All potential 102 * matches are located by sorting the lines of each file 103 * on the hash (called ``value''). In particular, this 104 * collects the equivalence classes in file1 together. 105 * Subroutine equiv replaces the value of each line in 106 * file0 by the index of the first element of its 107 * matching equivalence in (the reordered) file1. 108 * To save space equiv squeezes file1 into a single 109 * array member in which the equivalence classes 110 * are simply concatenated, except that their first 111 * members are flagged by changing sign. 112 * 113 * Next the indices that point into member are unsorted into 114 * array class according to the original order of file0. 115 * 116 * The cleverness lies in routine stone. This marches 117 * through the lines of file0, developing a vector klist 118 * of "k-candidates". At step i a k-candidate is a matched 119 * pair of lines x,y (x in file0 y in file1) such that 120 * there is a common subsequence of length k 121 * between the first i lines of file0 and the first y 122 * lines of file1, but there is no such subsequence for 123 * any smaller y. x is the earliest possible mate to y 124 * that occurs in such a subsequence. 125 * 126 * Whenever any of the members of the equivalence class of 127 * lines in file1 matable to a line in file0 has serial number 128 * less than the y of some k-candidate, that k-candidate 129 * with the smallest such y is replaced. The new 130 * k-candidate is chained (via pred) to the current 131 * k-1 candidate so that the actual subsequence can 132 * be recovered. When a member has serial number greater 133 * that the y of all k-candidates, the klist is extended. 134 * At the end, the longest subsequence is pulled out 135 * and placed in the array J by unravel 136 * 137 * With J in hand, the matches there recorded are 138 * check'ed against reality to assure that no spurious 139 * matches have crept in due to hashing. If they have, 140 * they are broken, and "jackpot" is recorded--a harmless 141 * matter except that a true match for a spuriously 142 * mated line may now be unnecessarily reported as a change. 143 * 144 * Much of the complexity of the program comes simply 145 * from trying to minimize core utilization and 146 * maximize the range of doable problems by dynamically 147 * allocating what is needed and reusing what is not. 148 * The core requirements for problems larger than somewhat 149 * are (in words) 2*length(file0) + length(file1) + 150 * 3*(number of k-candidates installed), typically about 151 * 6n words for files of length n. 152 */ 153 154 struct cand { 155 int x; 156 int y; 157 int pred; 158 } cand; 159 160 struct line { 161 int serial; 162 int value; 163 } *file[2]; 164 165 static int *J; /* will be overlaid on class */ 166 static int *class; /* will be overlaid on file[0] */ 167 static int *klist; /* will be overlaid on file[0] after class */ 168 static int *member; /* will be overlaid on file[1] */ 169 static int clen; 170 static int inifdef; /* whether or not we are in a #ifdef block */ 171 static int len[2]; 172 static int pref, suff; /* length of prefix and suffix */ 173 static int slen[2]; 174 static int anychange; 175 static long *ixnew; /* will be overlaid on file[1] */ 176 static long *ixold; /* will be overlaid on klist */ 177 static struct cand *clist; /* merely a free storage pot for candidates */ 178 static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ 179 static u_char *chrtran; /* translation table for case-folding */ 180 181 static void fetch(long *, int, int, FILE *, char *, int); 182 static void output(char *, FILE *, char *, FILE *); 183 static void check(char *, FILE *, char *, FILE *); 184 static void range(int, int, char *); 185 static void dump_context_vec(FILE *, FILE *); 186 static void dump_unified_vec(FILE *, FILE *); 187 static void prepare(int, FILE *); 188 static void prune(void); 189 static void equiv(struct line *, int, struct line *, int, int *); 190 static void unravel(int); 191 static void unsort(struct line *, int, int *); 192 static void change(char *, FILE *, char *, FILE *, int, int, int, int); 193 static void sort(struct line *, int); 194 static int asciifile(FILE *); 195 static int newcand(int, int, int); 196 static int search(int *, int, int); 197 static int skipline(FILE *); 198 static int stone(int *, int, int *, int *); 199 static int readhash(FILE *); 200 static int files_differ(FILE *, FILE *, int); 201 202 /* 203 * chrtran points to one of 2 translation tables: cup2low if folding upper to 204 * lower case clow2low if not folding case 205 */ 206 u_char clow2low[256] = { 207 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 208 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 209 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 210 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 211 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 212 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 213 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 214 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 215 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 216 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 217 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 218 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 219 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 220 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 221 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 222 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 223 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 224 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 225 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 226 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 227 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 228 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 229 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 230 0xfd, 0xfe, 0xff 231 }; 232 233 u_char cup2low[256] = { 234 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 235 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 236 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 237 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 238 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 239 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 240 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 241 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 242 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 243 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 244 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 245 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 246 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 247 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 248 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 249 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 250 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 251 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 252 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 253 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 254 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 255 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 256 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 257 0xfd, 0xfe, 0xff 258 }; 259 260 void 261 diffreg(char *ofile1, char *ofile2, int flags) 262 { 263 char *file1 = ofile1; 264 char *file2 = ofile2; 265 FILE *f1 = NULL; 266 FILE *f2 = NULL; 267 int i; 268 269 anychange = 0; 270 chrtran = (iflag ? cup2low : clow2low); 271 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) 272 goto notsame; 273 274 /* XXX - only make temp file for stdin if not seekable? (millert) */ 275 if (flags & D_EMPTY1) 276 f1 = fopen(_PATH_DEVNULL, "r"); 277 else { 278 if (S_ISDIR(stb1.st_mode)) { 279 file1 = splice(file1, file2); 280 if (stat(file1, &stb1) < 0) { 281 warn("%s", file1); 282 status |= 2; 283 goto closem; 284 } 285 } else if (!S_ISREG(stb1.st_mode)) { 286 file1 = copytemp(file1, 1); 287 if (file1 == NULL || stat(file1, &stb1) < 0) { 288 warn("%s", file1); 289 status |= 2; 290 goto closem; 291 } 292 } 293 if (strcmp(file1, "-") == 0) 294 f1 = stdin; 295 else 296 f1 = fopen(file1, "r"); 297 } 298 if (f1 == NULL) { 299 warn("%s", file1); 300 status |= 2; 301 goto closem; 302 } 303 304 if (flags & D_EMPTY2) 305 f2 = fopen(_PATH_DEVNULL, "r"); 306 else { 307 if (S_ISDIR(stb2.st_mode)) { 308 file2 = splice(file2, file1); 309 if (stat(file2, &stb2) < 0) { 310 warn("%s", file2); 311 status |= 2; 312 goto closem; 313 } 314 } else if (!S_ISREG(stb2.st_mode)) { 315 file2 = copytemp(file2, 2); 316 if (file2 == NULL || stat(file2, &stb2) < 0) { 317 warn("%s", file2); 318 status |= 2; 319 goto closem; 320 } 321 } 322 if (strcmp(file2, "-") == 0) 323 f2 = stdin; 324 else 325 f2 = fopen(file2, "r"); 326 } 327 if (f2 == NULL) { 328 warn("%s", file2); 329 status |= 2; 330 goto closem; 331 } 332 333 switch (files_differ(f1, f2, flags)) { 334 case 0: 335 goto same; 336 case 1: 337 break; 338 default: 339 /* error */ 340 status |= 2; 341 goto closem; 342 } 343 344 notsame: 345 /* 346 * Files certainly differ at this point; set status accordingly 347 */ 348 status |= 1; 349 if (format == D_BRIEF) { 350 printf("Files %s and %s differ\n", file1, file2); 351 goto closem; 352 } 353 if (flags & D_HEADER) { 354 if (format == D_EDIT) 355 printf("ed - %s << '-*-END-*-'\n", basename(file1)); 356 else 357 printf("%s %s %s\n", diffargs, file1, file2); 358 } 359 if (!asciifile(f1) || !asciifile(f2)) { 360 printf("Binary files %s and %s differ\n", file1, file2); 361 goto closem; 362 } 363 prepare(0, f1); 364 prepare(1, f2); 365 prune(); 366 sort(sfile[0], slen[0]); 367 sort(sfile[1], slen[1]); 368 369 member = (int *)file[1]; 370 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 371 member = erealloc(member, (slen[1] + 2) * sizeof(int)); 372 373 class = (int *)file[0]; 374 unsort(sfile[0], slen[0], class); 375 class = erealloc(class, (slen[0] + 2) * sizeof(int)); 376 377 klist = emalloc((slen[0] + 2) * sizeof(int)); 378 clist = emalloc(sizeof(cand)); 379 i = stone(class, slen[0], member, klist); 380 free(member); 381 free(class); 382 383 J = erealloc(J, (len[0] + 2) * sizeof(int)); 384 unravel(klist[i]); 385 free(clist); 386 free(klist); 387 388 ixold = erealloc(ixold, (len[0] + 2) * sizeof(long)); 389 ixnew = erealloc(ixnew, (len[1] + 2) * sizeof(long)); 390 check(file1, f1, file2, f2); 391 output(file1, f1, file2, f2); 392 if ((flags & D_HEADER) && format == D_EDIT) 393 printf("w\nq\n-*-END-*-\n"); 394 same: 395 if (anychange == 0 && sflag != 0) 396 printf("Files %s and %s are identical\n", file1, file2); 397 398 closem: 399 if (f1 != NULL) 400 fclose(f1); 401 if (f2 != NULL) 402 fclose(f2); 403 if (tempfiles[0] != NULL) { 404 unlink(tempfiles[0]); 405 free(tempfiles[0]); 406 tempfiles[0] = NULL; 407 } else if (file1 != ofile1) 408 free(file1); 409 if (tempfiles[1] != NULL) { 410 unlink(tempfiles[1]); 411 free(tempfiles[1]); 412 tempfiles[1] = NULL; 413 } else if (file2 != ofile2) 414 free(file2); 415 } 416 417 /* 418 * Check to see if the given files differ. 419 * Returns 0 if they are the same, 1 if different, and -1 on error. 420 * XXX - could use code from cmp(1) [faster] 421 */ 422 static int 423 files_differ(FILE *f1, FILE *f2, int flags) 424 { 425 char buf1[BUFSIZ], buf2[BUFSIZ]; 426 size_t i, j; 427 428 if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size || 429 (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT)) 430 return (1); 431 for (;;) { 432 i = fread(buf1, 1, sizeof(buf1), f1); 433 j = fread(buf2, 1, sizeof(buf2), f2); 434 if (i != j) 435 return (1); 436 if (i == 0 && j == 0) { 437 if (ferror(f1) || ferror(f2)) 438 return (1); 439 return (0); 440 } 441 if (memcmp(buf1, buf2, i) != 0) 442 return (1); 443 } 444 } 445 446 char *tempfiles[2]; 447 448 /* XXX - pass back a FILE * too (millert) */ 449 char * 450 copytemp(const char *file, int n) 451 { 452 char buf[BUFSIZ], *tempdir, *tempfile; 453 int i, ifd, ofd; 454 455 if (n != 1 && n != 2) 456 return (NULL); 457 458 if (strcmp(file, "-") == 0) 459 ifd = STDIN_FILENO; 460 else if ((ifd = open(file, O_RDONLY, 0644)) < 0) 461 return (NULL); 462 463 if ((tempdir = getenv("TMPDIR")) == NULL) 464 tempdir = _PATH_TMP; 465 if (asprintf(&tempfile, "%s/diff%d.XXXXXXXX", tempdir, n) == -1) 466 return (NULL); 467 tempfiles[n - 1] = tempfile; 468 469 signal(SIGHUP, quit); 470 signal(SIGINT, quit); 471 signal(SIGPIPE, quit); 472 signal(SIGTERM, quit); 473 ofd = mkstemp(tempfile); 474 if (ofd < 0) 475 return (NULL); 476 while ((i = read(ifd, buf, BUFSIZ)) > 0) { 477 if (write(ofd, buf, i) != i) 478 return (NULL); 479 } 480 close(ifd); 481 close(ofd); 482 return (tempfile); 483 } 484 485 char * 486 splice(char *dir, char *file) 487 { 488 char *tail, *buf; 489 size_t len; 490 491 tail = strrchr(file, '/'); 492 if (tail == NULL) 493 tail = file; 494 else 495 tail++; 496 len = strlen(dir) + 1 + strlen(tail) + 1; 497 buf = emalloc(len); 498 snprintf(buf, len, "%s/%s", dir, tail); 499 return (buf); 500 } 501 502 static void 503 prepare(int i, FILE *fd) 504 { 505 struct line *p; 506 int j, h; 507 508 rewind(fd); 509 p = emalloc(3 * sizeof(struct line)); 510 for (j = 0; (h = readhash(fd));) { 511 p = erealloc(p, (++j + 3) * sizeof(struct line)); 512 p[j].value = h; 513 } 514 len[i] = j; 515 file[i] = p; 516 } 517 518 static void 519 prune(void) 520 { 521 int i, j; 522 523 for (pref = 0; pref < len[0] && pref < len[1] && 524 file[0][pref + 1].value == file[1][pref + 1].value; 525 pref++) 526 ; 527 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 528 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 529 suff++) 530 ; 531 for (j = 0; j < 2; j++) { 532 sfile[j] = file[j] + pref; 533 slen[j] = len[j] - pref - suff; 534 for (i = 0; i <= slen[j]; i++) 535 sfile[j][i].serial = i; 536 } 537 } 538 539 static void 540 equiv(struct line *a, int n, struct line *b, int m, int *c) 541 { 542 int i, j; 543 544 i = j = 1; 545 while (i <= n && j <= m) { 546 if (a[i].value < b[j].value) 547 a[i++].value = 0; 548 else if (a[i].value == b[j].value) 549 a[i++].value = j; 550 else 551 j++; 552 } 553 while (i <= n) 554 a[i++].value = 0; 555 b[m + 1].value = 0; 556 j = 0; 557 while (++j <= m) { 558 c[j] = -b[j].serial; 559 while (b[j + 1].value == b[j].value) { 560 j++; 561 c[j] = b[j].serial; 562 } 563 } 564 c[j] = -1; 565 } 566 567 static int 568 stone(int *a, int n, int *b, int *c) 569 { 570 int i, k, y, j, l; 571 int oldc, tc, oldl; 572 573 k = 0; 574 c[0] = newcand(0, 0, 0); 575 for (i = 1; i <= n; i++) { 576 j = a[i]; 577 if (j == 0) 578 continue; 579 y = -b[j]; 580 oldl = 0; 581 oldc = c[0]; 582 do { 583 if (y <= clist[oldc].y) 584 continue; 585 l = search(c, k, y); 586 if (l != oldl + 1) 587 oldc = c[l - 1]; 588 if (l <= k) { 589 if (clist[c[l]].y <= y) 590 continue; 591 tc = c[l]; 592 c[l] = newcand(i, y, oldc); 593 oldc = tc; 594 oldl = l; 595 } else { 596 c[l] = newcand(i, y, oldc); 597 k++; 598 break; 599 } 600 } while ((y = b[++j]) > 0); 601 } 602 return (k); 603 } 604 605 static int 606 newcand(int x, int y, int pred) 607 { 608 struct cand *q; 609 610 clist = erealloc(clist, ++clen * sizeof(cand)); 611 q = clist + clen - 1; 612 q->x = x; 613 q->y = y; 614 q->pred = pred; 615 return (clen - 1); 616 } 617 618 static int 619 search(int *c, int k, int y) 620 { 621 int i, j, l, t; 622 623 if (clist[c[k]].y < y) /* quick look for typical case */ 624 return (k + 1); 625 i = 0; 626 j = k + 1; 627 while (1) { 628 l = i + j; 629 if ((l >>= 1) <= i) 630 break; 631 t = clist[c[l]].y; 632 if (t > y) 633 j = l; 634 else if (t < y) 635 i = l; 636 else 637 return (l); 638 } 639 return (l + 1); 640 } 641 642 static void 643 unravel(int p) 644 { 645 struct cand *q; 646 int i; 647 648 for (i = 0; i <= len[0]; i++) 649 J[i] = i <= pref ? i : 650 i > len[0] - suff ? i + len[1] - len[0] : 0; 651 for (q = clist + p; q->y != 0; q = clist + q->pred) 652 J[q->x + pref] = q->y + pref; 653 } 654 655 /* 656 * Check does double duty: 657 * 1. ferret out any fortuitous correspondences due 658 * to confounding by hashing (which result in "jackpot") 659 * 2. collect random access indexes to the two files 660 */ 661 static void 662 check(char *file1, FILE *f1, char *file2, FILE *f2) 663 { 664 int i, j, jackpot, c, d; 665 long ctold, ctnew; 666 667 rewind(f1); 668 rewind(f2); 669 j = 1; 670 ixold[0] = ixnew[0] = 0; 671 jackpot = 0; 672 ctold = ctnew = 0; 673 for (i = 1; i <= len[0]; i++) { 674 if (J[i] == 0) { 675 ixold[i] = ctold += skipline(f1); 676 continue; 677 } 678 while (j < J[i]) { 679 ixnew[j] = ctnew += skipline(f2); 680 j++; 681 } 682 if (bflag || wflag || iflag) { 683 for (;;) { 684 c = getc(f1); 685 d = getc(f2); 686 ctold++; 687 ctnew++; 688 if (bflag && isspace(c) && isspace(d)) { 689 do { 690 if (c == '\n') 691 break; 692 ctold++; 693 } while (isspace(c = getc(f1))); 694 do { 695 if (d == '\n') 696 break; 697 ctnew++; 698 } while (isspace(d = getc(f2))); 699 } else if (wflag) { 700 while (isspace(c) && c != '\n') { 701 c = getc(f1); 702 ctold++; 703 } 704 while (isspace(d) && d != '\n') { 705 d = getc(f2); 706 ctnew++; 707 } 708 } 709 if (chrtran[c] != chrtran[d]) { 710 jackpot++; 711 J[i] = 0; 712 if (c != '\n') 713 ctold += skipline(f1); 714 if (d != '\n') 715 ctnew += skipline(f2); 716 break; 717 } 718 if (c == '\n') 719 break; 720 } 721 } else { 722 for (;;) { 723 ctold++; 724 ctnew++; 725 if ((c = getc(f1)) != (d = getc(f2))) { 726 /* jackpot++; */ 727 J[i] = 0; 728 if (c != '\n') 729 ctold += skipline(f1); 730 if (d != '\n') 731 ctnew += skipline(f2); 732 break; 733 } 734 if (c == '\n') 735 break; 736 } 737 } 738 ixold[i] = ctold; 739 ixnew[j] = ctnew; 740 j++; 741 } 742 for (; j <= len[1]; j++) 743 ixnew[j] = ctnew += skipline(f2); 744 /* 745 * if (jackpot) 746 * fprintf(stderr, "jackpot\n"); 747 */ 748 } 749 750 /* shellsort CACM #201 */ 751 static void 752 sort(struct line *a, int n) 753 { 754 struct line *ai, *aim, w; 755 int j, m = 0, k; 756 757 if (n == 0) 758 return; 759 for (j = 1; j <= n; j *= 2) 760 m = 2 * j - 1; 761 for (m /= 2; m != 0; m /= 2) { 762 k = n - m; 763 for (j = 1; j <= k; j++) { 764 for (ai = &a[j]; ai > a; ai -= m) { 765 aim = &ai[m]; 766 if (aim < ai) 767 break; /* wraparound */ 768 if (aim->value > ai[0].value || 769 (aim->value == ai[0].value && 770 aim->serial > ai[0].serial)) 771 break; 772 w.value = ai[0].value; 773 ai[0].value = aim->value; 774 aim->value = w.value; 775 w.serial = ai[0].serial; 776 ai[0].serial = aim->serial; 777 aim->serial = w.serial; 778 } 779 } 780 } 781 } 782 783 static void 784 unsort(struct line *f, int l, int *b) 785 { 786 int *a, i; 787 788 a = emalloc((l + 1) * sizeof(int)); 789 for (i = 1; i <= l; i++) 790 a[f[i].serial] = f[i].value; 791 for (i = 1; i <= l; i++) 792 b[i] = a[i]; 793 free(a); 794 } 795 796 static int 797 skipline(FILE *f) 798 { 799 int i, c; 800 801 for (i = 1; (c = getc(f)) != '\n'; i++) 802 if (c < 0) 803 return (i); 804 return (i); 805 } 806 807 static void 808 output(char *file1, FILE *f1, char *file2, FILE *f2) 809 { 810 int m, i0, i1, j0, j1; 811 812 rewind(f1); 813 rewind(f2); 814 m = len[0]; 815 J[0] = 0; 816 J[m + 1] = len[1] + 1; 817 if (format != D_EDIT) { 818 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 819 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 820 i0++; 821 j0 = J[i0 - 1] + 1; 822 i1 = i0 - 1; 823 while (i1 < m && J[i1 + 1] == 0) 824 i1++; 825 j1 = J[i1 + 1] - 1; 826 J[i1] = j1; 827 change(file1, f1, file2, f2, i0, i1, j0, j1); 828 } 829 } else { 830 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 831 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 832 i0--; 833 j0 = J[i0 + 1] - 1; 834 i1 = i0 + 1; 835 while (i1 > 1 && J[i1 - 1] == 0) 836 i1--; 837 j1 = J[i1 - 1] + 1; 838 J[i1] = j1; 839 change(file1, f1, file2, f2, i1, i0, j1, j0); 840 } 841 } 842 if (m == 0) 843 change(file1, f1, file2, f2, 1, 0, 1, len[1]); 844 if (format == D_IFDEF) { 845 for (;;) { 846 #define c i0 847 c = getc(f1); 848 if (c < 0) 849 return; 850 putchar(c); 851 } 852 #undef c 853 } 854 if (anychange != 0) { 855 if (format == D_CONTEXT) 856 dump_context_vec(f1, f2); 857 else if (format == D_UNIFIED) 858 dump_unified_vec(f1, f2); 859 } 860 } 861 862 /* 863 * The following struct is used to record change information when 864 * doing a "context" or "unified" diff. (see routine "change" to 865 * understand the highly mnemonic field names) 866 */ 867 struct context_vec { 868 int a; /* start line in old file */ 869 int b; /* end line in old file */ 870 int c; /* start line in new file */ 871 int d; /* end line in new file */ 872 }; 873 874 struct context_vec *context_vec_start, *context_vec_end, *context_vec_ptr; 875 876 #define MAX_CONTEXT 128 877 878 /* 879 * Indicate that there is a difference between lines a and b of the from file 880 * to get to lines c to d of the to file. If a is greater then b then there 881 * are no lines in the from file involved and this means that there were 882 * lines appended (beginning at b). If c is greater than d then there are 883 * lines missing from the to file. 884 */ 885 static void 886 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d) 887 { 888 if (format != D_IFDEF && a > b && c > d) 889 return; 890 if (anychange == 0) { 891 anychange = 1; 892 if (format == D_CONTEXT || format == D_UNIFIED) { 893 printf("%s %s %s", format == D_CONTEXT ? "***" : "---", 894 file1, ctime(&stb1.st_mtime)); 895 printf("%s %s %s", format == D_CONTEXT ? "---" : "+++", 896 file2, ctime(&stb2.st_mtime)); 897 if (context_vec_start == NULL) 898 context_vec_start = emalloc(MAX_CONTEXT * 899 sizeof(struct context_vec)); 900 context_vec_end = context_vec_start + MAX_CONTEXT; 901 context_vec_ptr = context_vec_start - 1; 902 } 903 } 904 if (format == D_CONTEXT || format == D_UNIFIED) { 905 /* 906 * If this new change is within 'context' lines of 907 * the previous change, just add it to the change 908 * record. If the record is full or if this 909 * change is more than 'context' lines from the previous 910 * change, dump the record, reset it & add the new change. 911 */ 912 if (context_vec_ptr >= context_vec_end || 913 (context_vec_ptr >= context_vec_start && 914 a > (context_vec_ptr->b + 2 * context) && 915 c > (context_vec_ptr->d + 2 * context))) { 916 if (format == D_CONTEXT) 917 dump_context_vec(f1, f2); 918 else 919 dump_unified_vec(f1, f2); 920 } 921 context_vec_ptr++; 922 context_vec_ptr->a = a; 923 context_vec_ptr->b = b; 924 context_vec_ptr->c = c; 925 context_vec_ptr->d = d; 926 return; 927 } 928 switch (format) { 929 930 case D_NORMAL: 931 case D_EDIT: 932 range(a, b, ","); 933 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 934 if (format == D_NORMAL) 935 range(c, d, ","); 936 putchar('\n'); 937 break; 938 case D_REVERSE: 939 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 940 range(a, b, " "); 941 putchar('\n'); 942 break; 943 case D_NREVERSE: 944 if (a > b) 945 printf("a%d %d\n", b, d - c + 1); 946 else { 947 printf("d%d %d\n", a, b - a + 1); 948 if (!(c > d)) 949 /* add changed lines */ 950 printf("a%d %d\n", b, d - c + 1); 951 } 952 break; 953 } 954 if (format == D_NORMAL || format == D_IFDEF) { 955 fetch(ixold, a, b, f1, "< ", 1); 956 if (a <= b && c <= d && format == D_NORMAL) 957 puts("---"); 958 } 959 fetch(ixnew, c, d, f2, format == D_NORMAL ? "> " : "", 0); 960 if ((format == D_EDIT || format == D_REVERSE) && c <= d) 961 puts("."); 962 if (inifdef) { 963 fprintf(stdout, "#endif /* %s */\n", ifdefname); 964 inifdef = 0; 965 } 966 } 967 968 static void 969 range(int a, int b, char *separator) 970 { 971 printf("%d", a > b ? b : a); 972 if (a < b) 973 printf("%s%d", separator, b); 974 } 975 976 static void 977 fetch(long *f, int a, int b, FILE *lb, char *s, int oldfile) 978 { 979 int i, j, c, col, nc; 980 981 /* 982 * When doing #ifdef's, copy down to current line 983 * if this is the first file, so that stuff makes it to output. 984 */ 985 if (format == D_IFDEF && oldfile) { 986 long curpos = ftell(lb); 987 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 988 nc = f[a > b ? b : a - 1] - curpos; 989 for (i = 0; i < nc; i++) 990 putchar(getc(lb)); 991 } 992 if (a > b) 993 return; 994 if (format == D_IFDEF) { 995 if (inifdef) { 996 fprintf(stdout, "#else /* %s%s */\n", 997 oldfile == 1 ? "!" : "", ifdefname); 998 } else { 999 if (oldfile) 1000 fprintf(stdout, "#ifndef %s\n", ifdefname); 1001 else 1002 fprintf(stdout, "#ifdef %s\n", ifdefname); 1003 } 1004 inifdef = 1 + oldfile; 1005 } 1006 for (i = a; i <= b; i++) { 1007 fseek(lb, f[i - 1], SEEK_SET); 1008 nc = f[i] - f[i - 1]; 1009 if (format != D_IFDEF) 1010 fputs(s, stdout); 1011 col = 0; 1012 for (j = 0; j < nc; j++) { 1013 c = getc(lb); 1014 if (c == '\t' && tflag) 1015 do 1016 putchar(' '); 1017 while (++col & 7); 1018 else { 1019 putchar(c); 1020 col++; 1021 } 1022 } 1023 } 1024 } 1025 1026 #define POW2 /* define only if HALFLONG is 2**n */ 1027 #define HALFLONG 16 1028 #define low(x) (x&((1L<<HALFLONG)-1)) 1029 #define high(x) (x>>HALFLONG) 1030 1031 /* 1032 * hashing has the effect of 1033 * arranging line in 7-bit bytes and then 1034 * summing 1-s complement in 16-bit hunks 1035 */ 1036 static int 1037 readhash(FILE *f) 1038 { 1039 unsigned int shift; 1040 int t, space; 1041 long sum; 1042 1043 sum = 1; 1044 space = 0; 1045 if (!bflag && !wflag) { 1046 if (iflag) 1047 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 1048 if (t == -1) 1049 return (0); 1050 sum += (long)chrtran[t] << (shift 1051 #ifdef POW2 1052 &= HALFLONG - 1); 1053 #else 1054 %= HALFLONG); 1055 #endif 1056 } 1057 else 1058 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 1059 if (t == -1) 1060 return (0); 1061 sum += (long)t << (shift 1062 #ifdef POW2 1063 &= HALFLONG - 1); 1064 #else 1065 %= HALFLONG); 1066 #endif 1067 } 1068 } else { 1069 for (shift = 0;;) { 1070 switch (t = getc(f)) { 1071 case -1: 1072 return (0); 1073 case '\t': 1074 case ' ': 1075 space++; 1076 continue; 1077 default: 1078 if (space && !wflag) { 1079 shift += 7; 1080 space = 0; 1081 } 1082 sum += (long)chrtran[t] << (shift 1083 #ifdef POW2 1084 &= HALFLONG - 1); 1085 #else 1086 %= HALFLONG); 1087 #endif 1088 shift += 7; 1089 continue; 1090 case '\n': 1091 break; 1092 } 1093 break; 1094 } 1095 } 1096 sum = low(sum) + high(sum); 1097 return ((short) low(sum) + (short) high(sum)); 1098 } 1099 1100 int 1101 asciifile(FILE *f) 1102 { 1103 char buf[BUFSIZ], *cp; 1104 int cnt; 1105 1106 if (aflag || f == NULL) 1107 return (1); 1108 1109 rewind(f); 1110 cnt = fread(buf, 1, sizeof(buf), f); 1111 cp = buf; 1112 while (--cnt >= 0) 1113 if (*cp++ & 0200) 1114 return (0); 1115 return (1); 1116 } 1117 1118 static __inline int min(int a, int b) 1119 { 1120 return (a < b ? a : b); 1121 } 1122 1123 static __inline int max(int a, int b) 1124 { 1125 return (a > b ? a : b); 1126 } 1127 1128 /* dump accumulated "context" diff changes */ 1129 static void 1130 dump_context_vec(FILE *f1, FILE *f2) 1131 { 1132 struct context_vec *cvp = context_vec_start; 1133 int lowa, upb, lowc, upd, do_output; 1134 int a, b, c, d; 1135 char ch; 1136 1137 if (context_vec_start > context_vec_ptr) 1138 return; 1139 1140 b = d = 0; /* gcc */ 1141 lowa = max(1, cvp->a - context); 1142 upb = min(len[0], context_vec_ptr->b + context); 1143 lowc = max(1, cvp->c - context); 1144 upd = min(len[1], context_vec_ptr->d + context); 1145 1146 printf("***************\n*** "); 1147 range(lowa, upb, ","); 1148 printf(" ****\n"); 1149 1150 /* 1151 * Output changes to the "old" file. The first loop suppresses 1152 * output if there were no changes to the "old" file (we'll see 1153 * the "old" lines as context in the "new" list). 1154 */ 1155 do_output = 0; 1156 for (; cvp <= context_vec_ptr; cvp++) 1157 if (cvp->a <= cvp->b) { 1158 cvp = context_vec_start; 1159 do_output++; 1160 break; 1161 } 1162 if (do_output) { 1163 while (cvp <= context_vec_ptr) { 1164 a = cvp->a; 1165 b = cvp->b; 1166 c = cvp->c; 1167 d = cvp->d; 1168 1169 if (a <= b && c <= d) 1170 ch = 'c'; 1171 else 1172 ch = (a <= b) ? 'd' : 'a'; 1173 1174 if (ch == 'a') 1175 fetch(ixold, lowa, b, f1, " ", 0); 1176 else { 1177 fetch(ixold, lowa, a - 1, f1, " ", 0); 1178 fetch(ixold, a, b, f1, 1179 ch == 'c' ? "! " : "- ", 0); 1180 } 1181 lowa = b + 1; 1182 cvp++; 1183 } 1184 fetch(ixold, b + 1, upb, f1, " ", 0); 1185 } 1186 /* output changes to the "new" file */ 1187 printf("--- "); 1188 range(lowc, upd, ","); 1189 printf(" ----\n"); 1190 1191 do_output = 0; 1192 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1193 if (cvp->c <= cvp->d) { 1194 cvp = context_vec_start; 1195 do_output++; 1196 break; 1197 } 1198 if (do_output) { 1199 while (cvp <= context_vec_ptr) { 1200 a = cvp->a; 1201 b = cvp->b; 1202 c = cvp->c; 1203 d = cvp->d; 1204 1205 if (a <= b && c <= d) 1206 ch = 'c'; 1207 else 1208 ch = (a <= b) ? 'd' : 'a'; 1209 1210 if (ch == 'd') 1211 fetch(ixnew, lowc, d, f2, " ", 0); 1212 else { 1213 fetch(ixnew, lowc, c - 1, f2, " ", 0); 1214 fetch(ixnew, c, d, f2, 1215 ch == 'c' ? "! " : "+ ", 0); 1216 } 1217 lowc = d + 1; 1218 cvp++; 1219 } 1220 fetch(ixnew, d + 1, upd, f2, " ", 0); 1221 } 1222 context_vec_ptr = context_vec_start - 1; 1223 } 1224 1225 /* dump accumulated "unified" diff changes */ 1226 static void 1227 dump_unified_vec(FILE *f1, FILE *f2) 1228 { 1229 struct context_vec *cvp = context_vec_start; 1230 int lowa, upb, lowc, upd; 1231 int a, b, c, d; 1232 char ch; 1233 1234 if (context_vec_start > context_vec_ptr) 1235 return; 1236 1237 b = d = 0; /* gcc */ 1238 lowa = max(1, cvp->a - context); 1239 upb = min(len[0], context_vec_ptr->b + context); 1240 lowc = max(1, cvp->c - context); 1241 upd = min(len[1], context_vec_ptr->d + context); 1242 1243 printf("@@ -%d,%d +%d,%d @@\n", lowa, upb - lowa + 1, 1244 lowc, upd - lowc + 1); 1245 1246 /* 1247 * Output changes in "unified" diff format--the old and new lines 1248 * are printed together. 1249 */ 1250 for (; cvp <= context_vec_ptr; cvp++) { 1251 a = cvp->a; 1252 b = cvp->b; 1253 c = cvp->c; 1254 d = cvp->d; 1255 1256 /* 1257 * c: both new and old changes 1258 * d: only changes in the old file 1259 * a: only changes in the new file 1260 */ 1261 if (a <= b && c <= d) 1262 ch = 'c'; 1263 else 1264 ch = (a <= b) ? 'd' : 'a'; 1265 1266 switch (ch) { 1267 case 'c': 1268 fetch(ixold, lowa, a - 1, f1, " ", 0); 1269 fetch(ixold, a, b, f1, "-", 0); 1270 fetch(ixnew, c, d, f2, "+", 0); 1271 break; 1272 case 'd': 1273 fetch(ixold, lowa, a - 1, f1, " ", 0); 1274 fetch(ixold, a, b, f1, "-", 0); 1275 break; 1276 case 'a': 1277 fetch(ixnew, lowc, c - 1, f2, " ", 0); 1278 fetch(ixnew, c, d, f2, "+", 0); 1279 break; 1280 } 1281 lowa = b + 1; 1282 lowc = d + 1; 1283 } 1284 fetch(ixnew, d + 1, upd, f2, " ", 0); 1285 1286 context_vec_ptr = context_vec_start - 1; 1287 } 1288