1 /* $OpenBSD: diffreg.c,v 1.28 2003/07/06 22:17:21 millert Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 /*- 37 * Copyright (c) 1991, 1993 38 * The Regents of the University of California. All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93 65 */ 66 67 #ifndef lint 68 static const char rcsid[] = "$OpenBSD: diffreg.c,v 1.28 2003/07/06 22:17:21 millert Exp $"; 69 #endif /* not lint */ 70 71 #include <sys/types.h> 72 #include <sys/stat.h> 73 74 #include <ctype.h> 75 #include <err.h> 76 #include <fcntl.h> 77 #include <libgen.h> 78 #include <paths.h> 79 #include <signal.h> 80 #include <stdio.h> 81 #include <stdlib.h> 82 #include <string.h> 83 #include <unistd.h> 84 85 #include "diff.h" 86 87 /* 88 * diff - compare two files. 89 */ 90 91 /* 92 * Uses an algorithm due to Harold Stone, which finds 93 * a pair of longest identical subsequences in the two 94 * files. 95 * 96 * The major goal is to generate the match vector J. 97 * J[i] is the index of the line in file1 corresponding 98 * to line i file0. J[i] = 0 if there is no 99 * such line in file1. 100 * 101 * Lines are hashed so as to work in core. All potential 102 * matches are located by sorting the lines of each file 103 * on the hash (called ``value''). In particular, this 104 * collects the equivalence classes in file1 together. 105 * Subroutine equiv replaces the value of each line in 106 * file0 by the index of the first element of its 107 * matching equivalence in (the reordered) file1. 108 * To save space equiv squeezes file1 into a single 109 * array member in which the equivalence classes 110 * are simply concatenated, except that their first 111 * members are flagged by changing sign. 112 * 113 * Next the indices that point into member are unsorted into 114 * array class according to the original order of file0. 115 * 116 * The cleverness lies in routine stone. This marches 117 * through the lines of file0, developing a vector klist 118 * of "k-candidates". At step i a k-candidate is a matched 119 * pair of lines x,y (x in file0 y in file1) such that 120 * there is a common subsequence of length k 121 * between the first i lines of file0 and the first y 122 * lines of file1, but there is no such subsequence for 123 * any smaller y. x is the earliest possible mate to y 124 * that occurs in such a subsequence. 125 * 126 * Whenever any of the members of the equivalence class of 127 * lines in file1 matable to a line in file0 has serial number 128 * less than the y of some k-candidate, that k-candidate 129 * with the smallest such y is replaced. The new 130 * k-candidate is chained (via pred) to the current 131 * k-1 candidate so that the actual subsequence can 132 * be recovered. When a member has serial number greater 133 * that the y of all k-candidates, the klist is extended. 134 * At the end, the longest subsequence is pulled out 135 * and placed in the array J by unravel 136 * 137 * With J in hand, the matches there recorded are 138 * check'ed against reality to assure that no spurious 139 * matches have crept in due to hashing. If they have, 140 * they are broken, and "jackpot" is recorded--a harmless 141 * matter except that a true match for a spuriously 142 * mated line may now be unnecessarily reported as a change. 143 * 144 * Much of the complexity of the program comes simply 145 * from trying to minimize core utilization and 146 * maximize the range of doable problems by dynamically 147 * allocating what is needed and reusing what is not. 148 * The core requirements for problems larger than somewhat 149 * are (in words) 2*length(file0) + length(file1) + 150 * 3*(number of k-candidates installed), typically about 151 * 6n words for files of length n. 152 */ 153 154 struct cand { 155 int x; 156 int y; 157 int pred; 158 } cand; 159 160 struct line { 161 int serial; 162 int value; 163 } *file[2]; 164 165 static int *J; /* will be overlaid on class */ 166 static int *class; /* will be overlaid on file[0] */ 167 static int *klist; /* will be overlaid on file[0] after class */ 168 static int *member; /* will be overlaid on file[1] */ 169 static int clen; 170 static int inifdef; /* whether or not we are in a #ifdef block */ 171 static int len[2]; 172 static int pref, suff; /* length of prefix and suffix */ 173 static int slen[2]; 174 static int anychange; 175 static long *ixnew; /* will be overlaid on file[1] */ 176 static long *ixold; /* will be overlaid on klist */ 177 static struct cand *clist; /* merely a free storage pot for candidates */ 178 static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ 179 static u_char *chrtran; /* translation table for case-folding */ 180 181 static void fetch(long *, int, int, FILE *, char *, int); 182 static void output(char *, FILE *, char *, FILE *); 183 static void check(char *, FILE *, char *, FILE *); 184 static void range(int, int, char *); 185 static void dump_context_vec(FILE *, FILE *); 186 static void dump_unified_vec(FILE *, FILE *); 187 static void prepare(int, FILE *); 188 static void prune(void); 189 static void equiv(struct line *, int, struct line *, int, int *); 190 static void unravel(int); 191 static void unsort(struct line *, int, int *); 192 static void change(char *, FILE *, char *, FILE *, int, int, int, int); 193 static void sort(struct line *, int); 194 static int asciifile(FILE *); 195 static int newcand(int, int, int); 196 static int search(int *, int, int); 197 static int skipline(FILE *); 198 static int stone(int *, int, int *, int *); 199 static int readhash(FILE *); 200 static int files_differ(FILE *, FILE *, int); 201 202 /* 203 * chrtran points to one of 2 translation tables: cup2low if folding upper to 204 * lower case clow2low if not folding case 205 */ 206 u_char clow2low[256] = { 207 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 208 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 209 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 210 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 211 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 212 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 213 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 214 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 215 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 216 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 217 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 218 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 219 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 220 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 221 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 222 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 223 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 224 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 225 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 226 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 227 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 228 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 229 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 230 0xfd, 0xfe, 0xff 231 }; 232 233 u_char cup2low[256] = { 234 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 235 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 236 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 237 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 238 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 239 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 240 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 241 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 242 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 243 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 244 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 245 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 246 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 247 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 248 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 249 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 250 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 251 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 252 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 253 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 254 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 255 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 256 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 257 0xfd, 0xfe, 0xff 258 }; 259 260 void 261 diffreg(char *ofile1, char *ofile2, int flags) 262 { 263 char *file1 = ofile1; 264 char *file2 = ofile2; 265 FILE *f1 = NULL; 266 FILE *f2 = NULL; 267 int i; 268 269 anychange = 0; 270 chrtran = (iflag ? cup2low : clow2low); 271 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) 272 goto notsame; 273 274 /* XXX - only make temp file for stdin if not seekable? (millert) */ 275 if (flags & D_EMPTY1) 276 f1 = fopen(_PATH_DEVNULL, "r"); 277 else { 278 if (S_ISDIR(stb1.st_mode)) { 279 file1 = splice(file1, file2); 280 if (stat(file1, &stb1) < 0) { 281 warn("%s", file1); 282 status |= 2; 283 goto closem; 284 } 285 } else if (strcmp(file1, "-") == 0 || !S_ISREG(stb1.st_mode)) { 286 file1 = copytemp(file1, 1); 287 if (file1 == NULL || stat(file1, &stb1) < 0) { 288 warn("%s", file1); 289 status |= 2; 290 goto closem; 291 } 292 } 293 f1 = fopen(file1, "r"); 294 } 295 if (f1 == NULL) { 296 warn("%s", file1); 297 status |= 2; 298 goto closem; 299 } 300 301 if (flags & D_EMPTY2) 302 f2 = fopen(_PATH_DEVNULL, "r"); 303 else { 304 if (S_ISDIR(stb2.st_mode)) { 305 file2 = splice(file2, file1); 306 if (stat(file2, &stb2) < 0) { 307 warn("%s", file2); 308 status |= 2; 309 goto closem; 310 } 311 } else if (strcmp(file2, "-") == 0 || !S_ISREG(stb2.st_mode)) { 312 file2 = copytemp(file2, 2); 313 if (file2 == NULL || stat(file2, &stb2) < 0) { 314 warn("%s", file2); 315 status |= 2; 316 goto closem; 317 } 318 } 319 f2 = fopen(file2, "r"); 320 } 321 if (f2 == NULL) { 322 warn("%s", file2); 323 status |= 2; 324 goto closem; 325 } 326 327 switch (files_differ(f1, f2, flags)) { 328 case 0: 329 goto same; 330 case 1: 331 break; 332 default: 333 /* error */ 334 status |= 2; 335 goto closem; 336 } 337 338 notsame: 339 /* 340 * Files certainly differ at this point; set status accordingly 341 */ 342 status |= 1; 343 if (format == D_BRIEF) { 344 printf("Files %s and %s differ\n", file1, file2); 345 goto closem; 346 } 347 if (flags & D_HEADER) { 348 if (format == D_EDIT) 349 printf("ed - %s << '-*-END-*-'\n", basename(file1)); 350 else 351 printf("%s %s %s\n", diffargs, file1, file2); 352 } 353 if (!asciifile(f1) || !asciifile(f2)) { 354 printf("Binary files %s and %s differ\n", file1, file2); 355 goto closem; 356 } 357 prepare(0, f1); 358 prepare(1, f2); 359 prune(); 360 sort(sfile[0], slen[0]); 361 sort(sfile[1], slen[1]); 362 363 member = (int *)file[1]; 364 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 365 member = erealloc(member, (slen[1] + 2) * sizeof(int)); 366 367 class = (int *)file[0]; 368 unsort(sfile[0], slen[0], class); 369 class = erealloc(class, (slen[0] + 2) * sizeof(int)); 370 371 klist = emalloc((slen[0] + 2) * sizeof(int)); 372 clist = emalloc(sizeof(cand)); 373 i = stone(class, slen[0], member, klist); 374 free(member); 375 free(class); 376 377 J = erealloc(J, (len[0] + 2) * sizeof(int)); 378 unravel(klist[i]); 379 free(clist); 380 free(klist); 381 382 ixold = erealloc(ixold, (len[0] + 2) * sizeof(long)); 383 ixnew = erealloc(ixnew, (len[1] + 2) * sizeof(long)); 384 check(file1, f1, file2, f2); 385 output(file1, f1, file2, f2); 386 if ((flags & D_HEADER) && format == D_EDIT) 387 printf("w\nq\n-*-END-*-\n"); 388 same: 389 if (anychange == 0 && sflag != 0) 390 printf("Files %s and %s are identical\n", file1, file2); 391 392 closem: 393 if (f1 != NULL) 394 fclose(f1); 395 if (f2 != NULL) 396 fclose(f2); 397 if (tempfiles[0] != NULL) { 398 unlink(tempfiles[0]); 399 free(tempfiles[0]); 400 tempfiles[0] = NULL; 401 } 402 if (tempfiles[1] != NULL) { 403 unlink(tempfiles[1]); 404 free(tempfiles[1]); 405 tempfiles[1] = NULL; 406 } 407 if (file1 != ofile1) 408 free(file1); 409 if (file2 != ofile2) 410 free(file2); 411 } 412 413 /* 414 * Check to see if the given files differ. 415 * Returns 0 if they are the same, 1 if different, and -1 on error. 416 * XXX - could use code from cmp(1) [faster] 417 */ 418 static int 419 files_differ(FILE *f1, FILE *f2, int flags) 420 { 421 char buf1[BUFSIZ], buf2[BUFSIZ]; 422 size_t i, j; 423 424 if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size || 425 (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT)) 426 return (1); 427 for (;;) { 428 i = fread(buf1, 1, sizeof(buf1), f1); 429 j = fread(buf2, 1, sizeof(buf2), f2); 430 if (i != j) 431 return (1); 432 if (i == 0 && j == 0) { 433 if (ferror(f1) || ferror(f2)) 434 return (1); 435 return (0); 436 } 437 if (memcmp(buf1, buf2, i) != 0) 438 return (1); 439 } 440 } 441 442 char *tempfiles[2]; 443 444 /* XXX - pass back a FILE * too (millert) */ 445 char * 446 copytemp(const char *file, int n) 447 { 448 char buf[BUFSIZ], *tempdir, *tempfile; 449 int i, ifd, ofd; 450 451 if (n != 1 && n != 2) 452 return (NULL); 453 454 if (strcmp(file, "-") == 0) 455 ifd = STDIN_FILENO; 456 else if ((ifd = open(file, O_RDONLY, 0644)) < 0) 457 return (NULL); 458 459 if ((tempdir = getenv("TMPDIR")) == NULL) 460 tempdir = _PATH_TMP; 461 if (asprintf(&tempfile, "%s/diff%d.XXXXXXXX", tempdir, n) == -1) 462 return (NULL); 463 tempfiles[n - 1] = tempfile; 464 465 signal(SIGHUP, quit); 466 signal(SIGINT, quit); 467 signal(SIGPIPE, quit); 468 signal(SIGTERM, quit); 469 ofd = mkstemp(tempfile); 470 if (ofd < 0) 471 return (NULL); 472 while ((i = read(ifd, buf, BUFSIZ)) > 0) { 473 if (write(ofd, buf, i) != i) 474 return (NULL); 475 } 476 close(ifd); 477 close(ofd); 478 return (tempfile); 479 } 480 481 char * 482 splice(char *dir, char *file) 483 { 484 char *tail, *buf; 485 size_t len; 486 487 tail = strrchr(file, '/'); 488 if (tail == NULL) 489 tail = file; 490 else 491 tail++; 492 len = strlen(dir) + 1 + strlen(tail) + 1; 493 buf = emalloc(len); 494 snprintf(buf, len, "%s/%s", dir, tail); 495 return (buf); 496 } 497 498 static void 499 prepare(int i, FILE *fd) 500 { 501 struct line *p; 502 int j, h; 503 504 rewind(fd); 505 p = emalloc(3 * sizeof(struct line)); 506 for (j = 0; (h = readhash(fd));) { 507 p = erealloc(p, (++j + 3) * sizeof(struct line)); 508 p[j].value = h; 509 } 510 len[i] = j; 511 file[i] = p; 512 } 513 514 static void 515 prune(void) 516 { 517 int i, j; 518 519 for (pref = 0; pref < len[0] && pref < len[1] && 520 file[0][pref + 1].value == file[1][pref + 1].value; 521 pref++) 522 ; 523 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 524 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 525 suff++) 526 ; 527 for (j = 0; j < 2; j++) { 528 sfile[j] = file[j] + pref; 529 slen[j] = len[j] - pref - suff; 530 for (i = 0; i <= slen[j]; i++) 531 sfile[j][i].serial = i; 532 } 533 } 534 535 static void 536 equiv(struct line *a, int n, struct line *b, int m, int *c) 537 { 538 int i, j; 539 540 i = j = 1; 541 while (i <= n && j <= m) { 542 if (a[i].value < b[j].value) 543 a[i++].value = 0; 544 else if (a[i].value == b[j].value) 545 a[i++].value = j; 546 else 547 j++; 548 } 549 while (i <= n) 550 a[i++].value = 0; 551 b[m + 1].value = 0; 552 j = 0; 553 while (++j <= m) { 554 c[j] = -b[j].serial; 555 while (b[j + 1].value == b[j].value) { 556 j++; 557 c[j] = b[j].serial; 558 } 559 } 560 c[j] = -1; 561 } 562 563 static int 564 stone(int *a, int n, int *b, int *c) 565 { 566 int i, k, y, j, l; 567 int oldc, tc, oldl; 568 569 k = 0; 570 c[0] = newcand(0, 0, 0); 571 for (i = 1; i <= n; i++) { 572 j = a[i]; 573 if (j == 0) 574 continue; 575 y = -b[j]; 576 oldl = 0; 577 oldc = c[0]; 578 do { 579 if (y <= clist[oldc].y) 580 continue; 581 l = search(c, k, y); 582 if (l != oldl + 1) 583 oldc = c[l - 1]; 584 if (l <= k) { 585 if (clist[c[l]].y <= y) 586 continue; 587 tc = c[l]; 588 c[l] = newcand(i, y, oldc); 589 oldc = tc; 590 oldl = l; 591 } else { 592 c[l] = newcand(i, y, oldc); 593 k++; 594 break; 595 } 596 } while ((y = b[++j]) > 0); 597 } 598 return (k); 599 } 600 601 static int 602 newcand(int x, int y, int pred) 603 { 604 struct cand *q; 605 606 clist = erealloc(clist, ++clen * sizeof(cand)); 607 q = clist + clen - 1; 608 q->x = x; 609 q->y = y; 610 q->pred = pred; 611 return (clen - 1); 612 } 613 614 static int 615 search(int *c, int k, int y) 616 { 617 int i, j, l, t; 618 619 if (clist[c[k]].y < y) /* quick look for typical case */ 620 return (k + 1); 621 i = 0; 622 j = k + 1; 623 while (1) { 624 l = i + j; 625 if ((l >>= 1) <= i) 626 break; 627 t = clist[c[l]].y; 628 if (t > y) 629 j = l; 630 else if (t < y) 631 i = l; 632 else 633 return (l); 634 } 635 return (l + 1); 636 } 637 638 static void 639 unravel(int p) 640 { 641 struct cand *q; 642 int i; 643 644 for (i = 0; i <= len[0]; i++) 645 J[i] = i <= pref ? i : 646 i > len[0] - suff ? i + len[1] - len[0] : 0; 647 for (q = clist + p; q->y != 0; q = clist + q->pred) 648 J[q->x + pref] = q->y + pref; 649 } 650 651 /* 652 * Check does double duty: 653 * 1. ferret out any fortuitous correspondences due 654 * to confounding by hashing (which result in "jackpot") 655 * 2. collect random access indexes to the two files 656 */ 657 static void 658 check(char *file1, FILE *f1, char *file2, FILE *f2) 659 { 660 int i, j, jackpot, c, d; 661 long ctold, ctnew; 662 663 rewind(f1); 664 rewind(f2); 665 j = 1; 666 ixold[0] = ixnew[0] = 0; 667 jackpot = 0; 668 ctold = ctnew = 0; 669 for (i = 1; i <= len[0]; i++) { 670 if (J[i] == 0) { 671 ixold[i] = ctold += skipline(f1); 672 continue; 673 } 674 while (j < J[i]) { 675 ixnew[j] = ctnew += skipline(f2); 676 j++; 677 } 678 if (bflag || wflag || iflag) { 679 for (;;) { 680 c = getc(f1); 681 d = getc(f2); 682 ctold++; 683 ctnew++; 684 if (bflag && isspace(c) && isspace(d)) { 685 do { 686 if (c == '\n') 687 break; 688 ctold++; 689 } while (isspace(c = getc(f1))); 690 do { 691 if (d == '\n') 692 break; 693 ctnew++; 694 } while (isspace(d = getc(f2))); 695 } else if (wflag) { 696 while (isspace(c) && c != '\n') { 697 c = getc(f1); 698 ctold++; 699 } 700 while (isspace(d) && d != '\n') { 701 d = getc(f2); 702 ctnew++; 703 } 704 } 705 if (chrtran[c] != chrtran[d]) { 706 jackpot++; 707 J[i] = 0; 708 if (c != '\n') 709 ctold += skipline(f1); 710 if (d != '\n') 711 ctnew += skipline(f2); 712 break; 713 } 714 if (c == '\n') 715 break; 716 } 717 } else { 718 for (;;) { 719 ctold++; 720 ctnew++; 721 if ((c = getc(f1)) != (d = getc(f2))) { 722 /* jackpot++; */ 723 J[i] = 0; 724 if (c != '\n') 725 ctold += skipline(f1); 726 if (d != '\n') 727 ctnew += skipline(f2); 728 break; 729 } 730 if (c == '\n') 731 break; 732 } 733 } 734 ixold[i] = ctold; 735 ixnew[j] = ctnew; 736 j++; 737 } 738 for (; j <= len[1]; j++) 739 ixnew[j] = ctnew += skipline(f2); 740 /* 741 * if (jackpot) 742 * fprintf(stderr, "jackpot\n"); 743 */ 744 } 745 746 /* shellsort CACM #201 */ 747 static void 748 sort(struct line *a, int n) 749 { 750 struct line *ai, *aim, w; 751 int j, m = 0, k; 752 753 if (n == 0) 754 return; 755 for (j = 1; j <= n; j *= 2) 756 m = 2 * j - 1; 757 for (m /= 2; m != 0; m /= 2) { 758 k = n - m; 759 for (j = 1; j <= k; j++) { 760 for (ai = &a[j]; ai > a; ai -= m) { 761 aim = &ai[m]; 762 if (aim < ai) 763 break; /* wraparound */ 764 if (aim->value > ai[0].value || 765 (aim->value == ai[0].value && 766 aim->serial > ai[0].serial)) 767 break; 768 w.value = ai[0].value; 769 ai[0].value = aim->value; 770 aim->value = w.value; 771 w.serial = ai[0].serial; 772 ai[0].serial = aim->serial; 773 aim->serial = w.serial; 774 } 775 } 776 } 777 } 778 779 static void 780 unsort(struct line *f, int l, int *b) 781 { 782 int *a, i; 783 784 a = emalloc((l + 1) * sizeof(int)); 785 for (i = 1; i <= l; i++) 786 a[f[i].serial] = f[i].value; 787 for (i = 1; i <= l; i++) 788 b[i] = a[i]; 789 free(a); 790 } 791 792 static int 793 skipline(FILE *f) 794 { 795 int i, c; 796 797 for (i = 1; (c = getc(f)) != '\n'; i++) 798 if (c < 0) 799 return (i); 800 return (i); 801 } 802 803 static void 804 output(char *file1, FILE *f1, char *file2, FILE *f2) 805 { 806 int m, i0, i1, j0, j1; 807 808 rewind(f1); 809 rewind(f2); 810 m = len[0]; 811 J[0] = 0; 812 J[m + 1] = len[1] + 1; 813 if (format != D_EDIT) { 814 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 815 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 816 i0++; 817 j0 = J[i0 - 1] + 1; 818 i1 = i0 - 1; 819 while (i1 < m && J[i1 + 1] == 0) 820 i1++; 821 j1 = J[i1 + 1] - 1; 822 J[i1] = j1; 823 change(file1, f1, file2, f2, i0, i1, j0, j1); 824 } 825 } else { 826 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 827 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 828 i0--; 829 j0 = J[i0 + 1] - 1; 830 i1 = i0 + 1; 831 while (i1 > 1 && J[i1 - 1] == 0) 832 i1--; 833 j1 = J[i1 - 1] + 1; 834 J[i1] = j1; 835 change(file1, f1, file2, f2, i1, i0, j1, j0); 836 } 837 } 838 if (m == 0) 839 change(file1, f1, file2, f2, 1, 0, 1, len[1]); 840 if (format == D_IFDEF) { 841 for (;;) { 842 #define c i0 843 c = getc(f1); 844 if (c < 0) 845 return; 846 putchar(c); 847 } 848 #undef c 849 } 850 if (anychange != 0) { 851 if (format == D_CONTEXT) 852 dump_context_vec(f1, f2); 853 else if (format == D_UNIFIED) 854 dump_unified_vec(f1, f2); 855 } 856 } 857 858 /* 859 * The following struct is used to record change information when 860 * doing a "context" or "unified" diff. (see routine "change" to 861 * understand the highly mnemonic field names) 862 */ 863 struct context_vec { 864 int a; /* start line in old file */ 865 int b; /* end line in old file */ 866 int c; /* start line in new file */ 867 int d; /* end line in new file */ 868 }; 869 870 struct context_vec *context_vec_start, *context_vec_end, *context_vec_ptr; 871 872 #define MAX_CONTEXT 128 873 874 /* 875 * Indicate that there is a difference between lines a and b of the from file 876 * to get to lines c to d of the to file. If a is greater then b then there 877 * are no lines in the from file involved and this means that there were 878 * lines appended (beginning at b). If c is greater than d then there are 879 * lines missing from the to file. 880 */ 881 static void 882 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d) 883 { 884 if (format != D_IFDEF && a > b && c > d) 885 return; 886 if (anychange == 0) { 887 anychange = 1; 888 if (format == D_CONTEXT || format == D_UNIFIED) { 889 printf("%s %s %s", format == D_CONTEXT ? "***" : "---", 890 file1, ctime(&stb1.st_mtime)); 891 printf("%s %s %s", format == D_CONTEXT ? "---" : "+++", 892 file2, ctime(&stb2.st_mtime)); 893 if (context_vec_start == NULL) 894 context_vec_start = emalloc(MAX_CONTEXT * 895 sizeof(struct context_vec)); 896 context_vec_end = context_vec_start + MAX_CONTEXT; 897 context_vec_ptr = context_vec_start - 1; 898 } 899 } 900 if (format == D_CONTEXT || format == D_UNIFIED) { 901 /* 902 * If this new change is within 'context' lines of 903 * the previous change, just add it to the change 904 * record. If the record is full or if this 905 * change is more than 'context' lines from the previous 906 * change, dump the record, reset it & add the new change. 907 */ 908 if (context_vec_ptr >= context_vec_end || 909 (context_vec_ptr >= context_vec_start && 910 a > (context_vec_ptr->b + 2 * context) && 911 c > (context_vec_ptr->d + 2 * context))) { 912 if (format == D_CONTEXT) 913 dump_context_vec(f1, f2); 914 else 915 dump_unified_vec(f1, f2); 916 } 917 context_vec_ptr++; 918 context_vec_ptr->a = a; 919 context_vec_ptr->b = b; 920 context_vec_ptr->c = c; 921 context_vec_ptr->d = d; 922 return; 923 } 924 switch (format) { 925 926 case D_NORMAL: 927 case D_EDIT: 928 range(a, b, ","); 929 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 930 if (format == D_NORMAL) 931 range(c, d, ","); 932 putchar('\n'); 933 break; 934 case D_REVERSE: 935 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 936 range(a, b, " "); 937 putchar('\n'); 938 break; 939 case D_NREVERSE: 940 if (a > b) 941 printf("a%d %d\n", b, d - c + 1); 942 else { 943 printf("d%d %d\n", a, b - a + 1); 944 if (!(c > d)) 945 /* add changed lines */ 946 printf("a%d %d\n", b, d - c + 1); 947 } 948 break; 949 } 950 if (format == D_NORMAL || format == D_IFDEF) { 951 fetch(ixold, a, b, f1, "< ", 1); 952 if (a <= b && c <= d && format == D_NORMAL) 953 puts("---"); 954 } 955 fetch(ixnew, c, d, f2, format == D_NORMAL ? "> " : "", 0); 956 if ((format == D_EDIT || format == D_REVERSE) && c <= d) 957 puts("."); 958 if (inifdef) { 959 fprintf(stdout, "#endif /* %s */\n", ifdefname); 960 inifdef = 0; 961 } 962 } 963 964 static void 965 range(int a, int b, char *separator) 966 { 967 printf("%d", a > b ? b : a); 968 if (a < b) 969 printf("%s%d", separator, b); 970 } 971 972 static void 973 fetch(long *f, int a, int b, FILE *lb, char *s, int oldfile) 974 { 975 int i, j, c, col, nc; 976 977 /* 978 * When doing #ifdef's, copy down to current line 979 * if this is the first file, so that stuff makes it to output. 980 */ 981 if (format == D_IFDEF && oldfile) { 982 long curpos = ftell(lb); 983 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 984 nc = f[a > b ? b : a - 1] - curpos; 985 for (i = 0; i < nc; i++) 986 putchar(getc(lb)); 987 } 988 if (a > b) 989 return; 990 if (format == D_IFDEF) { 991 if (inifdef) { 992 fprintf(stdout, "#else /* %s%s */\n", 993 oldfile == 1 ? "!" : "", ifdefname); 994 } else { 995 if (oldfile) 996 fprintf(stdout, "#ifndef %s\n", ifdefname); 997 else 998 fprintf(stdout, "#ifdef %s\n", ifdefname); 999 } 1000 inifdef = 1 + oldfile; 1001 } 1002 for (i = a; i <= b; i++) { 1003 fseek(lb, f[i - 1], SEEK_SET); 1004 nc = f[i] - f[i - 1]; 1005 if (format != D_IFDEF) 1006 fputs(s, stdout); 1007 col = 0; 1008 for (j = 0; j < nc; j++) { 1009 c = getc(lb); 1010 if (c == '\t' && tflag) 1011 do 1012 putchar(' '); 1013 while (++col & 7); 1014 else { 1015 putchar(c); 1016 col++; 1017 } 1018 } 1019 } 1020 } 1021 1022 #define POW2 /* define only if HALFLONG is 2**n */ 1023 #define HALFLONG 16 1024 #define low(x) (x&((1L<<HALFLONG)-1)) 1025 #define high(x) (x>>HALFLONG) 1026 1027 /* 1028 * hashing has the effect of 1029 * arranging line in 7-bit bytes and then 1030 * summing 1-s complement in 16-bit hunks 1031 */ 1032 static int 1033 readhash(FILE *f) 1034 { 1035 unsigned int shift; 1036 int t, space; 1037 long sum; 1038 1039 sum = 1; 1040 space = 0; 1041 if (!bflag && !wflag) { 1042 if (iflag) 1043 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 1044 if (t == -1) 1045 return (0); 1046 sum += (long)chrtran[t] << (shift 1047 #ifdef POW2 1048 &= HALFLONG - 1); 1049 #else 1050 %= HALFLONG); 1051 #endif 1052 } 1053 else 1054 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 1055 if (t == -1) 1056 return (0); 1057 sum += (long)t << (shift 1058 #ifdef POW2 1059 &= HALFLONG - 1); 1060 #else 1061 %= HALFLONG); 1062 #endif 1063 } 1064 } else { 1065 for (shift = 0;;) { 1066 switch (t = getc(f)) { 1067 case -1: 1068 return (0); 1069 case '\t': 1070 case ' ': 1071 space++; 1072 continue; 1073 default: 1074 if (space && !wflag) { 1075 shift += 7; 1076 space = 0; 1077 } 1078 sum += (long)chrtran[t] << (shift 1079 #ifdef POW2 1080 &= HALFLONG - 1); 1081 #else 1082 %= HALFLONG); 1083 #endif 1084 shift += 7; 1085 continue; 1086 case '\n': 1087 break; 1088 } 1089 break; 1090 } 1091 } 1092 sum = low(sum) + high(sum); 1093 return ((short) low(sum) + (short) high(sum)); 1094 } 1095 1096 int 1097 asciifile(FILE *f) 1098 { 1099 char buf[BUFSIZ], *cp; 1100 int cnt; 1101 1102 if (aflag || f == NULL) 1103 return (1); 1104 1105 rewind(f); 1106 cnt = fread(buf, 1, sizeof(buf), f); 1107 cp = buf; 1108 while (--cnt >= 0) 1109 if (*cp++ & 0200) 1110 return (0); 1111 return (1); 1112 } 1113 1114 static __inline int min(int a, int b) 1115 { 1116 return (a < b ? a : b); 1117 } 1118 1119 static __inline int max(int a, int b) 1120 { 1121 return (a > b ? a : b); 1122 } 1123 1124 /* dump accumulated "context" diff changes */ 1125 static void 1126 dump_context_vec(FILE *f1, FILE *f2) 1127 { 1128 struct context_vec *cvp = context_vec_start; 1129 int lowa, upb, lowc, upd, do_output; 1130 int a, b, c, d; 1131 char ch; 1132 1133 if (context_vec_start > context_vec_ptr) 1134 return; 1135 1136 b = d = 0; /* gcc */ 1137 lowa = max(1, cvp->a - context); 1138 upb = min(len[0], context_vec_ptr->b + context); 1139 lowc = max(1, cvp->c - context); 1140 upd = min(len[1], context_vec_ptr->d + context); 1141 1142 printf("***************\n*** "); 1143 range(lowa, upb, ","); 1144 printf(" ****\n"); 1145 1146 /* 1147 * Output changes to the "old" file. The first loop suppresses 1148 * output if there were no changes to the "old" file (we'll see 1149 * the "old" lines as context in the "new" list). 1150 */ 1151 do_output = 0; 1152 for (; cvp <= context_vec_ptr; cvp++) 1153 if (cvp->a <= cvp->b) { 1154 cvp = context_vec_start; 1155 do_output++; 1156 break; 1157 } 1158 if (do_output) { 1159 while (cvp <= context_vec_ptr) { 1160 a = cvp->a; 1161 b = cvp->b; 1162 c = cvp->c; 1163 d = cvp->d; 1164 1165 if (a <= b && c <= d) 1166 ch = 'c'; 1167 else 1168 ch = (a <= b) ? 'd' : 'a'; 1169 1170 if (ch == 'a') 1171 fetch(ixold, lowa, b, f1, " ", 0); 1172 else { 1173 fetch(ixold, lowa, a - 1, f1, " ", 0); 1174 fetch(ixold, a, b, f1, 1175 ch == 'c' ? "! " : "- ", 0); 1176 } 1177 lowa = b + 1; 1178 cvp++; 1179 } 1180 fetch(ixold, b + 1, upb, f1, " ", 0); 1181 } 1182 /* output changes to the "new" file */ 1183 printf("--- "); 1184 range(lowc, upd, ","); 1185 printf(" ----\n"); 1186 1187 do_output = 0; 1188 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1189 if (cvp->c <= cvp->d) { 1190 cvp = context_vec_start; 1191 do_output++; 1192 break; 1193 } 1194 if (do_output) { 1195 while (cvp <= context_vec_ptr) { 1196 a = cvp->a; 1197 b = cvp->b; 1198 c = cvp->c; 1199 d = cvp->d; 1200 1201 if (a <= b && c <= d) 1202 ch = 'c'; 1203 else 1204 ch = (a <= b) ? 'd' : 'a'; 1205 1206 if (ch == 'd') 1207 fetch(ixnew, lowc, d, f2, " ", 0); 1208 else { 1209 fetch(ixnew, lowc, c - 1, f2, " ", 0); 1210 fetch(ixnew, c, d, f2, 1211 ch == 'c' ? "! " : "+ ", 0); 1212 } 1213 lowc = d + 1; 1214 cvp++; 1215 } 1216 fetch(ixnew, d + 1, upd, f2, " ", 0); 1217 } 1218 context_vec_ptr = context_vec_start - 1; 1219 } 1220 1221 /* dump accumulated "unified" diff changes */ 1222 static void 1223 dump_unified_vec(FILE *f1, FILE *f2) 1224 { 1225 struct context_vec *cvp = context_vec_start; 1226 int lowa, upb, lowc, upd; 1227 int a, b, c, d; 1228 char ch; 1229 1230 if (context_vec_start > context_vec_ptr) 1231 return; 1232 1233 b = d = 0; /* gcc */ 1234 lowa = max(1, cvp->a - context); 1235 upb = min(len[0], context_vec_ptr->b + context); 1236 lowc = max(1, cvp->c - context); 1237 upd = min(len[1], context_vec_ptr->d + context); 1238 1239 printf("@@ -%d,%d +%d,%d @@\n", lowa, upb - lowa + 1, 1240 lowc, upd - lowc + 1); 1241 1242 /* 1243 * Output changes in "unified" diff format--the old and new lines 1244 * are printed together. 1245 */ 1246 for (; cvp <= context_vec_ptr; cvp++) { 1247 a = cvp->a; 1248 b = cvp->b; 1249 c = cvp->c; 1250 d = cvp->d; 1251 1252 /* 1253 * c: both new and old changes 1254 * d: only changes in the old file 1255 * a: only changes in the new file 1256 */ 1257 if (a <= b && c <= d) 1258 ch = 'c'; 1259 else 1260 ch = (a <= b) ? 'd' : 'a'; 1261 1262 switch (ch) { 1263 case 'c': 1264 fetch(ixold, lowa, a - 1, f1, " ", 0); 1265 fetch(ixold, a, b, f1, "-", 0); 1266 fetch(ixnew, c, d, f2, "+", 0); 1267 break; 1268 case 'd': 1269 fetch(ixold, lowa, a - 1, f1, " ", 0); 1270 fetch(ixold, a, b, f1, "-", 0); 1271 break; 1272 case 'a': 1273 fetch(ixnew, lowc, c - 1, f2, " ", 0); 1274 fetch(ixnew, c, d, f2, "+", 0); 1275 break; 1276 } 1277 lowa = b + 1; 1278 lowc = d + 1; 1279 } 1280 fetch(ixnew, d + 1, upd, f2, " ", 0); 1281 1282 context_vec_ptr = context_vec_start - 1; 1283 } 1284