1 /* $OpenBSD: diffreg.c,v 1.27 2003/07/06 20:48:59 millert Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 /*- 37 * Copyright (c) 1991, 1993 38 * The Regents of the University of California. All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93 65 */ 66 67 #ifndef lint 68 static const char rcsid[] = "$OpenBSD: diffreg.c,v 1.27 2003/07/06 20:48:59 millert Exp $"; 69 #endif /* not lint */ 70 71 #include <sys/types.h> 72 #include <sys/stat.h> 73 74 #include <ctype.h> 75 #include <err.h> 76 #include <fcntl.h> 77 #include <libgen.h> 78 #include <paths.h> 79 #include <signal.h> 80 #include <stdio.h> 81 #include <stdlib.h> 82 #include <string.h> 83 #include <unistd.h> 84 85 #include "diff.h" 86 87 /* 88 * diff - compare two files. 89 */ 90 91 /* 92 * Uses an algorithm due to Harold Stone, which finds 93 * a pair of longest identical subsequences in the two 94 * files. 95 * 96 * The major goal is to generate the match vector J. 97 * J[i] is the index of the line in file1 corresponding 98 * to line i file0. J[i] = 0 if there is no 99 * such line in file1. 100 * 101 * Lines are hashed so as to work in core. All potential 102 * matches are located by sorting the lines of each file 103 * on the hash (called ``value''). In particular, this 104 * collects the equivalence classes in file1 together. 105 * Subroutine equiv replaces the value of each line in 106 * file0 by the index of the first element of its 107 * matching equivalence in (the reordered) file1. 108 * To save space equiv squeezes file1 into a single 109 * array member in which the equivalence classes 110 * are simply concatenated, except that their first 111 * members are flagged by changing sign. 112 * 113 * Next the indices that point into member are unsorted into 114 * array class according to the original order of file0. 115 * 116 * The cleverness lies in routine stone. This marches 117 * through the lines of file0, developing a vector klist 118 * of "k-candidates". At step i a k-candidate is a matched 119 * pair of lines x,y (x in file0 y in file1) such that 120 * there is a common subsequence of length k 121 * between the first i lines of file0 and the first y 122 * lines of file1, but there is no such subsequence for 123 * any smaller y. x is the earliest possible mate to y 124 * that occurs in such a subsequence. 125 * 126 * Whenever any of the members of the equivalence class of 127 * lines in file1 matable to a line in file0 has serial number 128 * less than the y of some k-candidate, that k-candidate 129 * with the smallest such y is replaced. The new 130 * k-candidate is chained (via pred) to the current 131 * k-1 candidate so that the actual subsequence can 132 * be recovered. When a member has serial number greater 133 * that the y of all k-candidates, the klist is extended. 134 * At the end, the longest subsequence is pulled out 135 * and placed in the array J by unravel 136 * 137 * With J in hand, the matches there recorded are 138 * check'ed against reality to assure that no spurious 139 * matches have crept in due to hashing. If they have, 140 * they are broken, and "jackpot" is recorded--a harmless 141 * matter except that a true match for a spuriously 142 * mated line may now be unnecessarily reported as a change. 143 * 144 * Much of the complexity of the program comes simply 145 * from trying to minimize core utilization and 146 * maximize the range of doable problems by dynamically 147 * allocating what is needed and reusing what is not. 148 * The core requirements for problems larger than somewhat 149 * are (in words) 2*length(file0) + length(file1) + 150 * 3*(number of k-candidates installed), typically about 151 * 6n words for files of length n. 152 */ 153 154 struct cand { 155 int x; 156 int y; 157 int pred; 158 } cand; 159 160 struct line { 161 int serial; 162 int value; 163 } *file[2]; 164 165 static int *J; /* will be overlaid on class */ 166 static int *class; /* will be overlaid on file[0] */ 167 static int *klist; /* will be overlaid on file[0] after class */ 168 static int *member; /* will be overlaid on file[1] */ 169 static int clen; 170 static int inifdef; /* whether or not we are in a #ifdef block */ 171 static int len[2]; 172 static int pref, suff; /* length of prefix and suffix */ 173 static int slen[2]; 174 static int anychange; 175 static long *ixnew; /* will be overlaid on file[1] */ 176 static long *ixold; /* will be overlaid on klist */ 177 static struct cand *clist; /* merely a free storage pot for candidates */ 178 static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ 179 static u_char *chrtran; /* translation table for case-folding */ 180 181 static void fetch(long *, int, int, FILE *, char *, int); 182 static void output(char *, FILE *, char *, FILE *); 183 static void check(char *, FILE *, char *, FILE *); 184 static void range(int, int, char *); 185 static void dump_context_vec(FILE *, FILE *); 186 static void dump_unified_vec(FILE *, FILE *); 187 static void prepare(int, FILE *); 188 static void prune(void); 189 static void equiv(struct line *, int, struct line *, int, int *); 190 static void unravel(int); 191 static void unsort(struct line *, int, int *); 192 static void change(char *, FILE *, char *, FILE *, int, int, int, int); 193 static void sort(struct line *, int); 194 static int asciifile(FILE *); 195 static int newcand(int, int, int); 196 static int search(int *, int, int); 197 static int skipline(FILE *); 198 static int stone(int *, int, int *, int *); 199 static int readhash(FILE *); 200 static int files_differ(FILE *, FILE *, int); 201 202 /* 203 * chrtran points to one of 2 translation tables: cup2low if folding upper to 204 * lower case clow2low if not folding case 205 */ 206 u_char clow2low[256] = { 207 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 208 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 209 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 210 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 211 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 212 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 213 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 214 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 215 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 216 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 217 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 218 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 219 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 220 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 221 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 222 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 223 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 224 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 225 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 226 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 227 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 228 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 229 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 230 0xfd, 0xfe, 0xff 231 }; 232 233 u_char cup2low[256] = { 234 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 235 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 236 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 237 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 238 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 239 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 240 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 241 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 242 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 243 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 244 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 245 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 246 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 247 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 248 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 249 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 250 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 251 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 252 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 253 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 254 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 255 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 256 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 257 0xfd, 0xfe, 0xff 258 }; 259 260 void 261 diffreg(char *ofile1, char *ofile2, int flags) 262 { 263 char *file1 = ofile1; 264 char *file2 = ofile2; 265 FILE *f1 = NULL; 266 FILE *f2 = NULL; 267 int i; 268 269 anychange = 0; 270 chrtran = (iflag ? cup2low : clow2low); 271 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) 272 goto notsame; 273 274 /* XXX - only make temp file for stdin if not seekable? (millert) */ 275 if (flags & D_EMPTY1) 276 f1 = fopen(_PATH_DEVNULL, "r"); 277 else { 278 if (S_ISDIR(stb1.st_mode)) { 279 file1 = splice(file1, file2); 280 if (stat(file1, &stb1) < 0) { 281 warn("%s", file1); 282 status |= 2; 283 goto closem; 284 } 285 } else if (strcmp(file1, "-") == 0 || !S_ISREG(stb1.st_mode)) { 286 file1 = copytemp(file1, 1); 287 if (file1 == NULL || stat(file1, &stb1) < 0) { 288 warn("%s", file1); 289 status |= 2; 290 goto closem; 291 } 292 } 293 f1 = fopen(file1, "r"); 294 } 295 if (f1 == NULL) { 296 warn("%s", file1); 297 status |= 2; 298 goto closem; 299 } 300 301 if (flags & D_EMPTY2) 302 f2 = fopen(_PATH_DEVNULL, "r"); 303 else { 304 if (S_ISDIR(stb2.st_mode)) { 305 file2 = splice(file2, file1); 306 if (stat(file2, &stb2) < 0) { 307 warn("%s", file2); 308 status |= 2; 309 goto closem; 310 } 311 } else if (strcmp(file2, "-") == 0 || !S_ISREG(stb2.st_mode)) { 312 file2 = copytemp(file2, 2); 313 if (file2 == NULL || stat(file2, &stb2) < 0) { 314 warn("%s", file2); 315 status |= 2; 316 goto closem; 317 } 318 } 319 f2 = fopen(file2, "r"); 320 } 321 if (f2 == NULL) { 322 warn("%s", file2); 323 status |= 2; 324 goto closem; 325 } 326 327 switch (files_differ(f1, f2, flags)) { 328 case 0: 329 goto same; 330 case 1: 331 break; 332 default: 333 /* error */ 334 status |= 2; 335 goto closem; 336 } 337 338 notsame: 339 /* 340 * Files certainly differ at this point; set status accordingly 341 */ 342 status |= 1; 343 if (flags & D_HEADER) { 344 if (format == D_EDIT) 345 printf("ed - %s << '-*-END-*-'\n", basename(file1)); 346 else 347 printf("%s %s %s\n", diffargs, file1, file2); 348 } 349 if (!asciifile(f1) || !asciifile(f2)) { 350 printf("Binary files %s and %s differ\n", file1, file2); 351 goto closem; 352 } 353 prepare(0, f1); 354 prepare(1, f2); 355 prune(); 356 sort(sfile[0], slen[0]); 357 sort(sfile[1], slen[1]); 358 359 member = (int *)file[1]; 360 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 361 member = erealloc(member, (slen[1] + 2) * sizeof(int)); 362 363 class = (int *)file[0]; 364 unsort(sfile[0], slen[0], class); 365 class = erealloc(class, (slen[0] + 2) * sizeof(int)); 366 367 klist = emalloc((slen[0] + 2) * sizeof(int)); 368 clist = emalloc(sizeof(cand)); 369 i = stone(class, slen[0], member, klist); 370 free(member); 371 free(class); 372 373 J = erealloc(J, (len[0] + 2) * sizeof(int)); 374 unravel(klist[i]); 375 free(clist); 376 free(klist); 377 378 ixold = erealloc(ixold, (len[0] + 2) * sizeof(long)); 379 ixnew = erealloc(ixnew, (len[1] + 2) * sizeof(long)); 380 check(file1, f1, file2, f2); 381 output(file1, f1, file2, f2); 382 if ((flags & D_HEADER) && format == D_EDIT) 383 printf("w\nq\n-*-END-*-\n"); 384 same: 385 if (anychange == 0 && sflag != 0) 386 printf("Files %s and %s are identical\n", file1, file2); 387 388 closem: 389 if (f1 != NULL) 390 fclose(f1); 391 if (f2 != NULL) 392 fclose(f2); 393 if (tempfiles[0] != NULL) { 394 unlink(tempfiles[0]); 395 free(tempfiles[0]); 396 tempfiles[0] = NULL; 397 } 398 if (tempfiles[1] != NULL) { 399 unlink(tempfiles[1]); 400 free(tempfiles[1]); 401 tempfiles[1] = NULL; 402 } 403 if (file1 != ofile1) 404 free(file1); 405 if (file2 != ofile2) 406 free(file2); 407 } 408 409 /* 410 * Check to see if the given files differ. 411 * Returns 0 if they are the same, 1 if different, and -1 on error. 412 * XXX - could use code from cmp(1) [faster] 413 */ 414 static int 415 files_differ(FILE *f1, FILE *f2, int flags) 416 { 417 char buf1[BUFSIZ], buf2[BUFSIZ]; 418 size_t i, j; 419 420 if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size || 421 (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT)) 422 return (1); 423 for (;;) { 424 i = fread(buf1, 1, sizeof(buf1), f1); 425 j = fread(buf2, 1, sizeof(buf2), f2); 426 if (i != j) 427 return (1); 428 if (i == 0 && j == 0) { 429 if (ferror(f1) || ferror(f2)) 430 return (1); 431 return (0); 432 } 433 if (memcmp(buf1, buf2, i) != 0) 434 return (1); 435 } 436 } 437 438 char *tempfiles[2]; 439 440 /* XXX - pass back a FILE * too (millert) */ 441 char * 442 copytemp(const char *file, int n) 443 { 444 char buf[BUFSIZ], *tempdir, *tempfile; 445 int i, ifd, ofd; 446 447 if (n != 1 && n != 2) 448 return (NULL); 449 450 if (strcmp(file, "-") == 0) 451 ifd = STDIN_FILENO; 452 else if ((ifd = open(file, O_RDONLY, 0644)) < 0) 453 return (NULL); 454 455 if ((tempdir = getenv("TMPDIR")) == NULL) 456 tempdir = _PATH_TMP; 457 if (asprintf(&tempfile, "%s/diff%d.XXXXXXXX", tempdir, n) == -1) 458 return (NULL); 459 tempfiles[n - 1] = tempfile; 460 461 signal(SIGHUP, quit); 462 signal(SIGINT, quit); 463 signal(SIGPIPE, quit); 464 signal(SIGTERM, quit); 465 ofd = mkstemp(tempfile); 466 if (ofd < 0) 467 return (NULL); 468 while ((i = read(ifd, buf, BUFSIZ)) > 0) { 469 if (write(ofd, buf, i) != i) 470 return (NULL); 471 } 472 close(ifd); 473 close(ofd); 474 return (tempfile); 475 } 476 477 char * 478 splice(char *dir, char *file) 479 { 480 char *tail, *buf; 481 size_t len; 482 483 tail = strrchr(file, '/'); 484 if (tail == NULL) 485 tail = file; 486 else 487 tail++; 488 len = strlen(dir) + 1 + strlen(tail) + 1; 489 buf = emalloc(len); 490 snprintf(buf, len, "%s/%s", dir, tail); 491 return (buf); 492 } 493 494 static void 495 prepare(int i, FILE *fd) 496 { 497 struct line *p; 498 int j, h; 499 500 rewind(fd); 501 p = emalloc(3 * sizeof(struct line)); 502 for (j = 0; (h = readhash(fd));) { 503 p = erealloc(p, (++j + 3) * sizeof(struct line)); 504 p[j].value = h; 505 } 506 len[i] = j; 507 file[i] = p; 508 } 509 510 static void 511 prune(void) 512 { 513 int i, j; 514 515 for (pref = 0; pref < len[0] && pref < len[1] && 516 file[0][pref + 1].value == file[1][pref + 1].value; 517 pref++) 518 ; 519 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 520 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 521 suff++) 522 ; 523 for (j = 0; j < 2; j++) { 524 sfile[j] = file[j] + pref; 525 slen[j] = len[j] - pref - suff; 526 for (i = 0; i <= slen[j]; i++) 527 sfile[j][i].serial = i; 528 } 529 } 530 531 static void 532 equiv(struct line *a, int n, struct line *b, int m, int *c) 533 { 534 int i, j; 535 536 i = j = 1; 537 while (i <= n && j <= m) { 538 if (a[i].value < b[j].value) 539 a[i++].value = 0; 540 else if (a[i].value == b[j].value) 541 a[i++].value = j; 542 else 543 j++; 544 } 545 while (i <= n) 546 a[i++].value = 0; 547 b[m + 1].value = 0; 548 j = 0; 549 while (++j <= m) { 550 c[j] = -b[j].serial; 551 while (b[j + 1].value == b[j].value) { 552 j++; 553 c[j] = b[j].serial; 554 } 555 } 556 c[j] = -1; 557 } 558 559 static int 560 stone(int *a, int n, int *b, int *c) 561 { 562 int i, k, y, j, l; 563 int oldc, tc, oldl; 564 565 k = 0; 566 c[0] = newcand(0, 0, 0); 567 for (i = 1; i <= n; i++) { 568 j = a[i]; 569 if (j == 0) 570 continue; 571 y = -b[j]; 572 oldl = 0; 573 oldc = c[0]; 574 do { 575 if (y <= clist[oldc].y) 576 continue; 577 l = search(c, k, y); 578 if (l != oldl + 1) 579 oldc = c[l - 1]; 580 if (l <= k) { 581 if (clist[c[l]].y <= y) 582 continue; 583 tc = c[l]; 584 c[l] = newcand(i, y, oldc); 585 oldc = tc; 586 oldl = l; 587 } else { 588 c[l] = newcand(i, y, oldc); 589 k++; 590 break; 591 } 592 } while ((y = b[++j]) > 0); 593 } 594 return (k); 595 } 596 597 static int 598 newcand(int x, int y, int pred) 599 { 600 struct cand *q; 601 602 clist = erealloc(clist, ++clen * sizeof(cand)); 603 q = clist + clen - 1; 604 q->x = x; 605 q->y = y; 606 q->pred = pred; 607 return (clen - 1); 608 } 609 610 static int 611 search(int *c, int k, int y) 612 { 613 int i, j, l, t; 614 615 if (clist[c[k]].y < y) /* quick look for typical case */ 616 return (k + 1); 617 i = 0; 618 j = k + 1; 619 while (1) { 620 l = i + j; 621 if ((l >>= 1) <= i) 622 break; 623 t = clist[c[l]].y; 624 if (t > y) 625 j = l; 626 else if (t < y) 627 i = l; 628 else 629 return (l); 630 } 631 return (l + 1); 632 } 633 634 static void 635 unravel(int p) 636 { 637 struct cand *q; 638 int i; 639 640 for (i = 0; i <= len[0]; i++) 641 J[i] = i <= pref ? i : 642 i > len[0] - suff ? i + len[1] - len[0] : 0; 643 for (q = clist + p; q->y != 0; q = clist + q->pred) 644 J[q->x + pref] = q->y + pref; 645 } 646 647 /* 648 * Check does double duty: 649 * 1. ferret out any fortuitous correspondences due 650 * to confounding by hashing (which result in "jackpot") 651 * 2. collect random access indexes to the two files 652 */ 653 static void 654 check(char *file1, FILE *f1, char *file2, FILE *f2) 655 { 656 int i, j, jackpot, c, d; 657 long ctold, ctnew; 658 659 rewind(f1); 660 rewind(f2); 661 j = 1; 662 ixold[0] = ixnew[0] = 0; 663 jackpot = 0; 664 ctold = ctnew = 0; 665 for (i = 1; i <= len[0]; i++) { 666 if (J[i] == 0) { 667 ixold[i] = ctold += skipline(f1); 668 continue; 669 } 670 while (j < J[i]) { 671 ixnew[j] = ctnew += skipline(f2); 672 j++; 673 } 674 if (bflag || wflag || iflag) { 675 for (;;) { 676 c = getc(f1); 677 d = getc(f2); 678 ctold++; 679 ctnew++; 680 if (bflag && isspace(c) && isspace(d)) { 681 do { 682 if (c == '\n') 683 break; 684 ctold++; 685 } while (isspace(c = getc(f1))); 686 do { 687 if (d == '\n') 688 break; 689 ctnew++; 690 } while (isspace(d = getc(f2))); 691 } else if (wflag) { 692 while (isspace(c) && c != '\n') { 693 c = getc(f1); 694 ctold++; 695 } 696 while (isspace(d) && d != '\n') { 697 d = getc(f2); 698 ctnew++; 699 } 700 } 701 if (chrtran[c] != chrtran[d]) { 702 jackpot++; 703 J[i] = 0; 704 if (c != '\n') 705 ctold += skipline(f1); 706 if (d != '\n') 707 ctnew += skipline(f2); 708 break; 709 } 710 if (c == '\n') 711 break; 712 } 713 } else { 714 for (;;) { 715 ctold++; 716 ctnew++; 717 if ((c = getc(f1)) != (d = getc(f2))) { 718 /* jackpot++; */ 719 J[i] = 0; 720 if (c != '\n') 721 ctold += skipline(f1); 722 if (d != '\n') 723 ctnew += skipline(f2); 724 break; 725 } 726 if (c == '\n') 727 break; 728 } 729 } 730 ixold[i] = ctold; 731 ixnew[j] = ctnew; 732 j++; 733 } 734 for (; j <= len[1]; j++) 735 ixnew[j] = ctnew += skipline(f2); 736 /* 737 * if (jackpot) 738 * fprintf(stderr, "jackpot\n"); 739 */ 740 } 741 742 /* shellsort CACM #201 */ 743 static void 744 sort(struct line *a, int n) 745 { 746 struct line *ai, *aim, w; 747 int j, m = 0, k; 748 749 if (n == 0) 750 return; 751 for (j = 1; j <= n; j *= 2) 752 m = 2 * j - 1; 753 for (m /= 2; m != 0; m /= 2) { 754 k = n - m; 755 for (j = 1; j <= k; j++) { 756 for (ai = &a[j]; ai > a; ai -= m) { 757 aim = &ai[m]; 758 if (aim < ai) 759 break; /* wraparound */ 760 if (aim->value > ai[0].value || 761 (aim->value == ai[0].value && 762 aim->serial > ai[0].serial)) 763 break; 764 w.value = ai[0].value; 765 ai[0].value = aim->value; 766 aim->value = w.value; 767 w.serial = ai[0].serial; 768 ai[0].serial = aim->serial; 769 aim->serial = w.serial; 770 } 771 } 772 } 773 } 774 775 static void 776 unsort(struct line *f, int l, int *b) 777 { 778 int *a, i; 779 780 a = emalloc((l + 1) * sizeof(int)); 781 for (i = 1; i <= l; i++) 782 a[f[i].serial] = f[i].value; 783 for (i = 1; i <= l; i++) 784 b[i] = a[i]; 785 free(a); 786 } 787 788 static int 789 skipline(FILE *f) 790 { 791 int i, c; 792 793 for (i = 1; (c = getc(f)) != '\n'; i++) 794 if (c < 0) 795 return (i); 796 return (i); 797 } 798 799 static void 800 output(char *file1, FILE *f1, char *file2, FILE *f2) 801 { 802 int m, i0, i1, j0, j1; 803 804 rewind(f1); 805 rewind(f2); 806 m = len[0]; 807 J[0] = 0; 808 J[m + 1] = len[1] + 1; 809 if (format != D_EDIT) { 810 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 811 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 812 i0++; 813 j0 = J[i0 - 1] + 1; 814 i1 = i0 - 1; 815 while (i1 < m && J[i1 + 1] == 0) 816 i1++; 817 j1 = J[i1 + 1] - 1; 818 J[i1] = j1; 819 change(file1, f1, file2, f2, i0, i1, j0, j1); 820 } 821 } else { 822 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 823 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 824 i0--; 825 j0 = J[i0 + 1] - 1; 826 i1 = i0 + 1; 827 while (i1 > 1 && J[i1 - 1] == 0) 828 i1--; 829 j1 = J[i1 - 1] + 1; 830 J[i1] = j1; 831 change(file1, f1, file2, f2, i1, i0, j1, j0); 832 } 833 } 834 if (m == 0) 835 change(file1, f1, file2, f2, 1, 0, 1, len[1]); 836 if (format == D_IFDEF) { 837 for (;;) { 838 #define c i0 839 c = getc(f1); 840 if (c < 0) 841 return; 842 putchar(c); 843 } 844 #undef c 845 } 846 if (anychange != 0) { 847 if (format == D_CONTEXT) 848 dump_context_vec(f1, f2); 849 else if (format == D_UNIFIED) 850 dump_unified_vec(f1, f2); 851 } 852 } 853 854 /* 855 * The following struct is used to record change information when 856 * doing a "context" or "unified" diff. (see routine "change" to 857 * understand the highly mnemonic field names) 858 */ 859 struct context_vec { 860 int a; /* start line in old file */ 861 int b; /* end line in old file */ 862 int c; /* start line in new file */ 863 int d; /* end line in new file */ 864 }; 865 866 struct context_vec *context_vec_start, *context_vec_end, *context_vec_ptr; 867 868 #define MAX_CONTEXT 128 869 870 /* 871 * Indicate that there is a difference between lines a and b of the from file 872 * to get to lines c to d of the to file. If a is greater then b then there 873 * are no lines in the from file involved and this means that there were 874 * lines appended (beginning at b). If c is greater than d then there are 875 * lines missing from the to file. 876 */ 877 static void 878 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d) 879 { 880 if (format != D_IFDEF && a > b && c > d) 881 return; 882 if (anychange == 0) { 883 anychange = 1; 884 if (format == D_CONTEXT || format == D_UNIFIED) { 885 printf("%s %s %s", format == D_CONTEXT ? "***" : "---", 886 file1, ctime(&stb1.st_mtime)); 887 printf("%s %s %s", format == D_CONTEXT ? "---" : "+++", 888 file2, ctime(&stb2.st_mtime)); 889 if (context_vec_start == NULL) 890 context_vec_start = emalloc(MAX_CONTEXT * 891 sizeof(struct context_vec)); 892 context_vec_end = context_vec_start + MAX_CONTEXT; 893 context_vec_ptr = context_vec_start - 1; 894 } 895 } 896 if (format == D_CONTEXT || format == D_UNIFIED) { 897 /* 898 * If this new change is within 'context' lines of 899 * the previous change, just add it to the change 900 * record. If the record is full or if this 901 * change is more than 'context' lines from the previous 902 * change, dump the record, reset it & add the new change. 903 */ 904 if (context_vec_ptr >= context_vec_end || 905 (context_vec_ptr >= context_vec_start && 906 a > (context_vec_ptr->b + 2 * context) && 907 c > (context_vec_ptr->d + 2 * context))) { 908 if (format == D_CONTEXT) 909 dump_context_vec(f1, f2); 910 else 911 dump_unified_vec(f1, f2); 912 } 913 context_vec_ptr++; 914 context_vec_ptr->a = a; 915 context_vec_ptr->b = b; 916 context_vec_ptr->c = c; 917 context_vec_ptr->d = d; 918 return; 919 } 920 switch (format) { 921 922 case D_NORMAL: 923 case D_EDIT: 924 range(a, b, ","); 925 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 926 if (format == D_NORMAL) 927 range(c, d, ","); 928 putchar('\n'); 929 break; 930 case D_REVERSE: 931 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 932 range(a, b, " "); 933 putchar('\n'); 934 break; 935 case D_NREVERSE: 936 if (a > b) 937 printf("a%d %d\n", b, d - c + 1); 938 else { 939 printf("d%d %d\n", a, b - a + 1); 940 if (!(c > d)) 941 /* add changed lines */ 942 printf("a%d %d\n", b, d - c + 1); 943 } 944 break; 945 } 946 if (format == D_NORMAL || format == D_IFDEF) { 947 fetch(ixold, a, b, f1, "< ", 1); 948 if (a <= b && c <= d && format == D_NORMAL) 949 puts("---"); 950 } 951 fetch(ixnew, c, d, f2, format == D_NORMAL ? "> " : "", 0); 952 if ((format == D_EDIT || format == D_REVERSE) && c <= d) 953 puts("."); 954 if (inifdef) { 955 fprintf(stdout, "#endif /* %s */\n", ifdefname); 956 inifdef = 0; 957 } 958 } 959 960 static void 961 range(int a, int b, char *separator) 962 { 963 printf("%d", a > b ? b : a); 964 if (a < b) 965 printf("%s%d", separator, b); 966 } 967 968 static void 969 fetch(long *f, int a, int b, FILE *lb, char *s, int oldfile) 970 { 971 int i, j, c, col, nc; 972 973 /* 974 * When doing #ifdef's, copy down to current line 975 * if this is the first file, so that stuff makes it to output. 976 */ 977 if (format == D_IFDEF && oldfile) { 978 long curpos = ftell(lb); 979 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 980 nc = f[a > b ? b : a - 1] - curpos; 981 for (i = 0; i < nc; i++) 982 putchar(getc(lb)); 983 } 984 if (a > b) 985 return; 986 if (format == D_IFDEF) { 987 if (inifdef) { 988 fprintf(stdout, "#else /* %s%s */\n", 989 oldfile == 1 ? "!" : "", ifdefname); 990 } else { 991 if (oldfile) 992 fprintf(stdout, "#ifndef %s\n", ifdefname); 993 else 994 fprintf(stdout, "#ifdef %s\n", ifdefname); 995 } 996 inifdef = 1 + oldfile; 997 } 998 for (i = a; i <= b; i++) { 999 fseek(lb, f[i - 1], SEEK_SET); 1000 nc = f[i] - f[i - 1]; 1001 if (format != D_IFDEF) 1002 fputs(s, stdout); 1003 col = 0; 1004 for (j = 0; j < nc; j++) { 1005 c = getc(lb); 1006 if (c == '\t' && tflag) 1007 do 1008 putchar(' '); 1009 while (++col & 7); 1010 else { 1011 putchar(c); 1012 col++; 1013 } 1014 } 1015 } 1016 } 1017 1018 #define POW2 /* define only if HALFLONG is 2**n */ 1019 #define HALFLONG 16 1020 #define low(x) (x&((1L<<HALFLONG)-1)) 1021 #define high(x) (x>>HALFLONG) 1022 1023 /* 1024 * hashing has the effect of 1025 * arranging line in 7-bit bytes and then 1026 * summing 1-s complement in 16-bit hunks 1027 */ 1028 static int 1029 readhash(FILE *f) 1030 { 1031 unsigned int shift; 1032 int t, space; 1033 long sum; 1034 1035 sum = 1; 1036 space = 0; 1037 if (!bflag && !wflag) { 1038 if (iflag) 1039 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 1040 if (t == -1) 1041 return (0); 1042 sum += (long)chrtran[t] << (shift 1043 #ifdef POW2 1044 &= HALFLONG - 1); 1045 #else 1046 %= HALFLONG); 1047 #endif 1048 } 1049 else 1050 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 1051 if (t == -1) 1052 return (0); 1053 sum += (long)t << (shift 1054 #ifdef POW2 1055 &= HALFLONG - 1); 1056 #else 1057 %= HALFLONG); 1058 #endif 1059 } 1060 } else { 1061 for (shift = 0;;) { 1062 switch (t = getc(f)) { 1063 case -1: 1064 return (0); 1065 case '\t': 1066 case ' ': 1067 space++; 1068 continue; 1069 default: 1070 if (space && !wflag) { 1071 shift += 7; 1072 space = 0; 1073 } 1074 sum += (long)chrtran[t] << (shift 1075 #ifdef POW2 1076 &= HALFLONG - 1); 1077 #else 1078 %= HALFLONG); 1079 #endif 1080 shift += 7; 1081 continue; 1082 case '\n': 1083 break; 1084 } 1085 break; 1086 } 1087 } 1088 sum = low(sum) + high(sum); 1089 return ((short) low(sum) + (short) high(sum)); 1090 } 1091 1092 int 1093 asciifile(FILE *f) 1094 { 1095 char buf[BUFSIZ], *cp; 1096 int cnt; 1097 1098 if (aflag || f == NULL) 1099 return (1); 1100 1101 rewind(f); 1102 cnt = fread(buf, 1, sizeof(buf), f); 1103 cp = buf; 1104 while (--cnt >= 0) 1105 if (*cp++ & 0200) 1106 return (0); 1107 return (1); 1108 } 1109 1110 static __inline int min(int a, int b) 1111 { 1112 return (a < b ? a : b); 1113 } 1114 1115 static __inline int max(int a, int b) 1116 { 1117 return (a > b ? a : b); 1118 } 1119 1120 /* dump accumulated "context" diff changes */ 1121 static void 1122 dump_context_vec(FILE *f1, FILE *f2) 1123 { 1124 struct context_vec *cvp = context_vec_start; 1125 int lowa, upb, lowc, upd, do_output; 1126 int a, b, c, d; 1127 char ch; 1128 1129 if (context_vec_start > context_vec_ptr) 1130 return; 1131 1132 b = d = 0; /* gcc */ 1133 lowa = max(1, cvp->a - context); 1134 upb = min(len[0], context_vec_ptr->b + context); 1135 lowc = max(1, cvp->c - context); 1136 upd = min(len[1], context_vec_ptr->d + context); 1137 1138 printf("***************\n*** "); 1139 range(lowa, upb, ","); 1140 printf(" ****\n"); 1141 1142 /* 1143 * Output changes to the "old" file. The first loop suppresses 1144 * output if there were no changes to the "old" file (we'll see 1145 * the "old" lines as context in the "new" list). 1146 */ 1147 do_output = 0; 1148 for (; cvp <= context_vec_ptr; cvp++) 1149 if (cvp->a <= cvp->b) { 1150 cvp = context_vec_start; 1151 do_output++; 1152 break; 1153 } 1154 if (do_output) { 1155 while (cvp <= context_vec_ptr) { 1156 a = cvp->a; 1157 b = cvp->b; 1158 c = cvp->c; 1159 d = cvp->d; 1160 1161 if (a <= b && c <= d) 1162 ch = 'c'; 1163 else 1164 ch = (a <= b) ? 'd' : 'a'; 1165 1166 if (ch == 'a') 1167 fetch(ixold, lowa, b, f1, " ", 0); 1168 else { 1169 fetch(ixold, lowa, a - 1, f1, " ", 0); 1170 fetch(ixold, a, b, f1, 1171 ch == 'c' ? "! " : "- ", 0); 1172 } 1173 lowa = b + 1; 1174 cvp++; 1175 } 1176 fetch(ixold, b + 1, upb, f1, " ", 0); 1177 } 1178 /* output changes to the "new" file */ 1179 printf("--- "); 1180 range(lowc, upd, ","); 1181 printf(" ----\n"); 1182 1183 do_output = 0; 1184 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1185 if (cvp->c <= cvp->d) { 1186 cvp = context_vec_start; 1187 do_output++; 1188 break; 1189 } 1190 if (do_output) { 1191 while (cvp <= context_vec_ptr) { 1192 a = cvp->a; 1193 b = cvp->b; 1194 c = cvp->c; 1195 d = cvp->d; 1196 1197 if (a <= b && c <= d) 1198 ch = 'c'; 1199 else 1200 ch = (a <= b) ? 'd' : 'a'; 1201 1202 if (ch == 'd') 1203 fetch(ixnew, lowc, d, f2, " ", 0); 1204 else { 1205 fetch(ixnew, lowc, c - 1, f2, " ", 0); 1206 fetch(ixnew, c, d, f2, 1207 ch == 'c' ? "! " : "+ ", 0); 1208 } 1209 lowc = d + 1; 1210 cvp++; 1211 } 1212 fetch(ixnew, d + 1, upd, f2, " ", 0); 1213 } 1214 context_vec_ptr = context_vec_start - 1; 1215 } 1216 1217 /* dump accumulated "unified" diff changes */ 1218 static void 1219 dump_unified_vec(FILE *f1, FILE *f2) 1220 { 1221 struct context_vec *cvp = context_vec_start; 1222 int lowa, upb, lowc, upd; 1223 int a, b, c, d; 1224 char ch; 1225 1226 if (context_vec_start > context_vec_ptr) 1227 return; 1228 1229 b = d = 0; /* gcc */ 1230 lowa = max(1, cvp->a - context); 1231 upb = min(len[0], context_vec_ptr->b + context); 1232 lowc = max(1, cvp->c - context); 1233 upd = min(len[1], context_vec_ptr->d + context); 1234 1235 printf("@@ -%d,%d +%d,%d @@\n", lowa, upb - lowa + 1, 1236 lowc, upd - lowc + 1); 1237 1238 /* 1239 * Output changes in "unified" diff format--the old and new lines 1240 * are printed together. 1241 */ 1242 for (; cvp <= context_vec_ptr; cvp++) { 1243 a = cvp->a; 1244 b = cvp->b; 1245 c = cvp->c; 1246 d = cvp->d; 1247 1248 /* 1249 * c: both new and old changes 1250 * d: only changes in the old file 1251 * a: only changes in the new file 1252 */ 1253 if (a <= b && c <= d) 1254 ch = 'c'; 1255 else 1256 ch = (a <= b) ? 'd' : 'a'; 1257 1258 switch (ch) { 1259 case 'c': 1260 fetch(ixold, lowa, a - 1, f1, " ", 0); 1261 fetch(ixold, a, b, f1, "-", 0); 1262 fetch(ixnew, c, d, f2, "+", 0); 1263 break; 1264 case 'd': 1265 fetch(ixold, lowa, a - 1, f1, " ", 0); 1266 fetch(ixold, a, b, f1, "-", 0); 1267 break; 1268 case 'a': 1269 fetch(ixnew, lowc, c - 1, f2, " ", 0); 1270 fetch(ixnew, c, d, f2, "+", 0); 1271 break; 1272 } 1273 lowa = b + 1; 1274 lowc = d + 1; 1275 } 1276 fetch(ixnew, d + 1, upd, f2, " ", 0); 1277 1278 context_vec_ptr = context_vec_start - 1; 1279 } 1280