1 /* $OpenBSD: diffreg.c,v 1.32 2003/07/09 00:39:26 millert Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 /*- 37 * Copyright (c) 1991, 1993 38 * The Regents of the University of California. All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93 65 */ 66 67 #ifndef lint 68 static const char rcsid[] = "$OpenBSD: diffreg.c,v 1.32 2003/07/09 00:39:26 millert Exp $"; 69 #endif /* not lint */ 70 71 #include <sys/param.h> 72 #include <sys/stat.h> 73 #include <sys/wait.h> 74 75 #include <ctype.h> 76 #include <err.h> 77 #include <errno.h> 78 #include <fcntl.h> 79 #include <libgen.h> 80 #include <stdio.h> 81 #include <stdlib.h> 82 #include <string.h> 83 #include <unistd.h> 84 85 #include "diff.h" 86 #include "pathnames.h" 87 88 /* 89 * diff - compare two files. 90 */ 91 92 /* 93 * Uses an algorithm due to Harold Stone, which finds 94 * a pair of longest identical subsequences in the two 95 * files. 96 * 97 * The major goal is to generate the match vector J. 98 * J[i] is the index of the line in file1 corresponding 99 * to line i file0. J[i] = 0 if there is no 100 * such line in file1. 101 * 102 * Lines are hashed so as to work in core. All potential 103 * matches are located by sorting the lines of each file 104 * on the hash (called ``value''). In particular, this 105 * collects the equivalence classes in file1 together. 106 * Subroutine equiv replaces the value of each line in 107 * file0 by the index of the first element of its 108 * matching equivalence in (the reordered) file1. 109 * To save space equiv squeezes file1 into a single 110 * array member in which the equivalence classes 111 * are simply concatenated, except that their first 112 * members are flagged by changing sign. 113 * 114 * Next the indices that point into member are unsorted into 115 * array class according to the original order of file0. 116 * 117 * The cleverness lies in routine stone. This marches 118 * through the lines of file0, developing a vector klist 119 * of "k-candidates". At step i a k-candidate is a matched 120 * pair of lines x,y (x in file0 y in file1) such that 121 * there is a common subsequence of length k 122 * between the first i lines of file0 and the first y 123 * lines of file1, but there is no such subsequence for 124 * any smaller y. x is the earliest possible mate to y 125 * that occurs in such a subsequence. 126 * 127 * Whenever any of the members of the equivalence class of 128 * lines in file1 matable to a line in file0 has serial number 129 * less than the y of some k-candidate, that k-candidate 130 * with the smallest such y is replaced. The new 131 * k-candidate is chained (via pred) to the current 132 * k-1 candidate so that the actual subsequence can 133 * be recovered. When a member has serial number greater 134 * that the y of all k-candidates, the klist is extended. 135 * At the end, the longest subsequence is pulled out 136 * and placed in the array J by unravel 137 * 138 * With J in hand, the matches there recorded are 139 * check'ed against reality to assure that no spurious 140 * matches have crept in due to hashing. If they have, 141 * they are broken, and "jackpot" is recorded--a harmless 142 * matter except that a true match for a spuriously 143 * mated line may now be unnecessarily reported as a change. 144 * 145 * Much of the complexity of the program comes simply 146 * from trying to minimize core utilization and 147 * maximize the range of doable problems by dynamically 148 * allocating what is needed and reusing what is not. 149 * The core requirements for problems larger than somewhat 150 * are (in words) 2*length(file0) + length(file1) + 151 * 3*(number of k-candidates installed), typically about 152 * 6n words for files of length n. 153 */ 154 155 struct cand { 156 int x; 157 int y; 158 int pred; 159 } cand; 160 161 struct line { 162 int serial; 163 int value; 164 } *file[2]; 165 166 static int *J; /* will be overlaid on class */ 167 static int *class; /* will be overlaid on file[0] */ 168 static int *klist; /* will be overlaid on file[0] after class */ 169 static int *member; /* will be overlaid on file[1] */ 170 static int clen; 171 static int inifdef; /* whether or not we are in a #ifdef block */ 172 static int len[2]; 173 static int pref, suff; /* length of prefix and suffix */ 174 static int slen[2]; 175 static int anychange; 176 static long *ixnew; /* will be overlaid on file[1] */ 177 static long *ixold; /* will be overlaid on klist */ 178 static struct cand *clist; /* merely a free storage pot for candidates */ 179 static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ 180 static u_char *chrtran; /* translation table for case-folding */ 181 182 static FILE *opentemp(const char *); 183 static void fetch(long *, int, int, FILE *, char *, int); 184 static void output(char *, FILE *, char *, FILE *); 185 static void check(char *, FILE *, char *, FILE *); 186 static void range(int, int, char *); 187 static void dump_context_vec(FILE *, FILE *); 188 static void dump_unified_vec(FILE *, FILE *); 189 static void prepare(int, FILE *); 190 static void prune(void); 191 static void equiv(struct line *, int, struct line *, int, int *); 192 static void unravel(int); 193 static void unsort(struct line *, int, int *); 194 static void change(char *, FILE *, char *, FILE *, int, int, int, int); 195 static void sort(struct line *, int); 196 static int asciifile(FILE *); 197 static int newcand(int, int, int); 198 static int search(int *, int, int); 199 static int skipline(FILE *); 200 static int stone(int *, int, int *, int *); 201 static int readhash(FILE *); 202 static int files_differ(FILE *, FILE *, int); 203 204 /* 205 * chrtran points to one of 2 translation tables: cup2low if folding upper to 206 * lower case clow2low if not folding case 207 */ 208 u_char clow2low[256] = { 209 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 210 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 211 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 212 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 213 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 214 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 215 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 216 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 217 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 218 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 219 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 220 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 221 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 222 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 223 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 224 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 225 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 226 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 227 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 228 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 229 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 230 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 231 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 232 0xfd, 0xfe, 0xff 233 }; 234 235 u_char cup2low[256] = { 236 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 237 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 238 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 239 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 240 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 241 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 242 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 243 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 244 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 245 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 246 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 247 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 248 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 249 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 250 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 251 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 252 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 253 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 254 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 255 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 256 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 257 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 258 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 259 0xfd, 0xfe, 0xff 260 }; 261 262 int 263 diffreg(char *ofile1, char *ofile2, int flags) 264 { 265 char *file1 = ofile1; 266 char *file2 = ofile2; 267 FILE *f1 = NULL; 268 FILE *f2 = NULL; 269 int rval = D_SAME; 270 int i, ostdout = -1; 271 pid_t pid = -1; 272 273 anychange = 0; 274 chrtran = (iflag ? cup2low : clow2low); 275 if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) 276 return (D_MISMATCH); 277 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) 278 goto notsame; 279 280 if (flags & D_EMPTY1) 281 f1 = fopen(_PATH_DEVNULL, "r"); 282 else { 283 if (!S_ISREG(stb1.st_mode)) { 284 if ((f1 = opentemp(file1)) == NULL || 285 fstat(fileno(f1), &stb1) < 0) { 286 warn("%s", file1); 287 status |= 2; 288 goto closem; 289 } 290 } else if (strcmp(file1, "-") == 0) 291 f1 = stdin; 292 else 293 f1 = fopen(file1, "r"); 294 } 295 if (f1 == NULL) { 296 warn("%s", file1); 297 status |= 2; 298 goto closem; 299 } 300 301 if (flags & D_EMPTY2) 302 f2 = fopen(_PATH_DEVNULL, "r"); 303 else { 304 if (!S_ISREG(stb2.st_mode)) { 305 if ((f2 = opentemp(file2)) == NULL || 306 fstat(fileno(f2), &stb2) < 0) { 307 warn("%s", file2); 308 status |= 2; 309 goto closem; 310 } 311 } else if (strcmp(file2, "-") == 0) 312 f2 = stdin; 313 else 314 f2 = fopen(file2, "r"); 315 } 316 if (f2 == NULL) { 317 warn("%s", file2); 318 status |= 2; 319 goto closem; 320 } 321 322 switch (files_differ(f1, f2, flags)) { 323 case 0: 324 goto closem; 325 case 1: 326 break; 327 default: 328 /* error */ 329 status |= 2; 330 goto closem; 331 } 332 333 notsame: 334 /* 335 * Files certainly differ at this point; set status accordingly 336 */ 337 status |= 1; 338 rval = D_DIFFER; 339 if (!asciifile(f1) || !asciifile(f2)) { 340 rval = D_BINARY; 341 goto closem; 342 } 343 if (format == D_BRIEF) 344 goto closem; 345 if (lflag) { 346 /* redirect stdout to pr */ 347 int pfd[2]; 348 char *header; 349 char *prargv[] = { "pr", "-h", NULL, "-f", NULL }; 350 351 easprintf(&header, "%s %s %s", diffargs, file1, file2); 352 prargv[2] = header; 353 fflush(stdout); 354 rewind(stdout); 355 pipe(pfd); 356 switch ((pid = fork())) { 357 case -1: 358 warnx("No more processes"); 359 status |= 2; 360 free(header); 361 return (D_ERROR); 362 case 0: 363 /* child */ 364 if (pfd[0] != STDIN_FILENO) { 365 dup2(pfd[0], STDIN_FILENO); 366 close(pfd[0]); 367 } 368 close(pfd[1]); 369 execv(_PATH_PR, prargv); 370 _exit(127); 371 default: 372 /* parent */ 373 if (pfd[1] != STDOUT_FILENO) { 374 ostdout = dup(STDOUT_FILENO); 375 dup2(pfd[1], STDOUT_FILENO); 376 close(pfd[1]); 377 } 378 close(pfd[0]); 379 rewind(stdout); 380 free(header); 381 } 382 } else { 383 if (flags & D_HEADER) { 384 if (format == D_EDIT) 385 printf("ed - %s << '-*-END-*-'\n", 386 basename(file1)); 387 else 388 printf("%s %s %s\n", diffargs, file1, file2); 389 } 390 } 391 prepare(0, f1); 392 prepare(1, f2); 393 prune(); 394 sort(sfile[0], slen[0]); 395 sort(sfile[1], slen[1]); 396 397 member = (int *)file[1]; 398 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 399 member = erealloc(member, (slen[1] + 2) * sizeof(int)); 400 401 class = (int *)file[0]; 402 unsort(sfile[0], slen[0], class); 403 class = erealloc(class, (slen[0] + 2) * sizeof(int)); 404 405 klist = emalloc((slen[0] + 2) * sizeof(int)); 406 clist = emalloc(sizeof(cand)); 407 i = stone(class, slen[0], member, klist); 408 free(member); 409 free(class); 410 411 J = erealloc(J, (len[0] + 2) * sizeof(int)); 412 unravel(klist[i]); 413 free(clist); 414 free(klist); 415 416 ixold = erealloc(ixold, (len[0] + 2) * sizeof(long)); 417 ixnew = erealloc(ixnew, (len[1] + 2) * sizeof(long)); 418 check(file1, f1, file2, f2); 419 output(file1, f1, file2, f2); 420 if (ostdout != -1) { 421 int wstatus; 422 423 /* close the pipe to pr and restore stdout */ 424 fflush(stdout); 425 rewind(stdout); 426 if (ostdout != STDOUT_FILENO) { 427 close(STDOUT_FILENO); 428 dup2(ostdout, STDOUT_FILENO); 429 close(ostdout); 430 } 431 waitpid(pid, &wstatus, 0); 432 } else if ((flags & D_HEADER) && format == D_EDIT) 433 printf("w\nq\n-*-END-*-\n"); 434 closem: 435 if (f1 != NULL) 436 fclose(f1); 437 if (f2 != NULL) 438 fclose(f2); 439 if (file1 != ofile1) 440 free(file1); 441 if (file2 != ofile2) 442 free(file2); 443 return (rval); 444 } 445 446 /* 447 * Check to see if the given files differ. 448 * Returns 0 if they are the same, 1 if different, and -1 on error. 449 * XXX - could use code from cmp(1) [faster] 450 */ 451 static int 452 files_differ(FILE *f1, FILE *f2, int flags) 453 { 454 char buf1[BUFSIZ], buf2[BUFSIZ]; 455 size_t i, j; 456 457 if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size || 458 (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT)) 459 return (1); 460 for (;;) { 461 i = fread(buf1, 1, sizeof(buf1), f1); 462 j = fread(buf2, 1, sizeof(buf2), f2); 463 if (i != j) 464 return (1); 465 if (i == 0 && j == 0) { 466 if (ferror(f1) || ferror(f2)) 467 return (1); 468 return (0); 469 } 470 if (memcmp(buf1, buf2, i) != 0) 471 return (1); 472 } 473 } 474 475 static FILE * 476 opentemp(const char *file) 477 { 478 char buf[BUFSIZ], *tempdir, tempfile[MAXPATHLEN]; 479 ssize_t nread; 480 int ifd, ofd; 481 482 if (strcmp(file, "-") == 0) 483 ifd = STDIN_FILENO; 484 else if ((ifd = open(file, O_RDONLY, 0644)) < 0) 485 return (NULL); 486 487 if ((tempdir = getenv("TMPDIR")) == NULL) 488 tempdir = _PATH_TMP; 489 if (snprintf(tempfile, sizeof(tempfile), "%s/diff.XXXXXXXX", 490 tempdir) >= sizeof(tempfile)) { 491 close(ifd); 492 errno = ENAMETOOLONG; 493 return (NULL); 494 } 495 496 if ((ofd = mkstemp(tempfile)) < 0) 497 return (NULL); 498 unlink(tempfile); 499 while ((nread = read(ifd, buf, BUFSIZ)) > 0) { 500 if (write(ofd, buf, nread) != nread) { 501 close(ifd); 502 close(ofd); 503 return (NULL); 504 } 505 } 506 close(ifd); 507 return (fdopen(ofd, "r")); 508 } 509 510 char * 511 splice(char *dir, char *file) 512 { 513 char *tail, *buf; 514 515 if ((tail = strrchr(file, '/')) == NULL) 516 tail = file; 517 else 518 tail++; 519 easprintf(&buf, "%s/%s", dir, tail); 520 return (buf); 521 } 522 523 static void 524 prepare(int i, FILE *fd) 525 { 526 struct line *p; 527 int j, h; 528 529 rewind(fd); 530 p = emalloc(3 * sizeof(struct line)); 531 for (j = 0; (h = readhash(fd));) { 532 p = erealloc(p, (++j + 3) * sizeof(struct line)); 533 p[j].value = h; 534 } 535 len[i] = j; 536 file[i] = p; 537 } 538 539 static void 540 prune(void) 541 { 542 int i, j; 543 544 for (pref = 0; pref < len[0] && pref < len[1] && 545 file[0][pref + 1].value == file[1][pref + 1].value; 546 pref++) 547 ; 548 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 549 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 550 suff++) 551 ; 552 for (j = 0; j < 2; j++) { 553 sfile[j] = file[j] + pref; 554 slen[j] = len[j] - pref - suff; 555 for (i = 0; i <= slen[j]; i++) 556 sfile[j][i].serial = i; 557 } 558 } 559 560 static void 561 equiv(struct line *a, int n, struct line *b, int m, int *c) 562 { 563 int i, j; 564 565 i = j = 1; 566 while (i <= n && j <= m) { 567 if (a[i].value < b[j].value) 568 a[i++].value = 0; 569 else if (a[i].value == b[j].value) 570 a[i++].value = j; 571 else 572 j++; 573 } 574 while (i <= n) 575 a[i++].value = 0; 576 b[m + 1].value = 0; 577 j = 0; 578 while (++j <= m) { 579 c[j] = -b[j].serial; 580 while (b[j + 1].value == b[j].value) { 581 j++; 582 c[j] = b[j].serial; 583 } 584 } 585 c[j] = -1; 586 } 587 588 static int 589 stone(int *a, int n, int *b, int *c) 590 { 591 int i, k, y, j, l; 592 int oldc, tc, oldl; 593 594 k = 0; 595 c[0] = newcand(0, 0, 0); 596 for (i = 1; i <= n; i++) { 597 j = a[i]; 598 if (j == 0) 599 continue; 600 y = -b[j]; 601 oldl = 0; 602 oldc = c[0]; 603 do { 604 if (y <= clist[oldc].y) 605 continue; 606 l = search(c, k, y); 607 if (l != oldl + 1) 608 oldc = c[l - 1]; 609 if (l <= k) { 610 if (clist[c[l]].y <= y) 611 continue; 612 tc = c[l]; 613 c[l] = newcand(i, y, oldc); 614 oldc = tc; 615 oldl = l; 616 } else { 617 c[l] = newcand(i, y, oldc); 618 k++; 619 break; 620 } 621 } while ((y = b[++j]) > 0); 622 } 623 return (k); 624 } 625 626 static int 627 newcand(int x, int y, int pred) 628 { 629 struct cand *q; 630 631 clist = erealloc(clist, ++clen * sizeof(cand)); 632 q = clist + clen - 1; 633 q->x = x; 634 q->y = y; 635 q->pred = pred; 636 return (clen - 1); 637 } 638 639 static int 640 search(int *c, int k, int y) 641 { 642 int i, j, l, t; 643 644 if (clist[c[k]].y < y) /* quick look for typical case */ 645 return (k + 1); 646 i = 0; 647 j = k + 1; 648 while (1) { 649 l = i + j; 650 if ((l >>= 1) <= i) 651 break; 652 t = clist[c[l]].y; 653 if (t > y) 654 j = l; 655 else if (t < y) 656 i = l; 657 else 658 return (l); 659 } 660 return (l + 1); 661 } 662 663 static void 664 unravel(int p) 665 { 666 struct cand *q; 667 int i; 668 669 for (i = 0; i <= len[0]; i++) 670 J[i] = i <= pref ? i : 671 i > len[0] - suff ? i + len[1] - len[0] : 0; 672 for (q = clist + p; q->y != 0; q = clist + q->pred) 673 J[q->x + pref] = q->y + pref; 674 } 675 676 /* 677 * Check does double duty: 678 * 1. ferret out any fortuitous correspondences due 679 * to confounding by hashing (which result in "jackpot") 680 * 2. collect random access indexes to the two files 681 */ 682 static void 683 check(char *file1, FILE *f1, char *file2, FILE *f2) 684 { 685 int i, j, jackpot, c, d; 686 long ctold, ctnew; 687 688 rewind(f1); 689 rewind(f2); 690 j = 1; 691 ixold[0] = ixnew[0] = 0; 692 jackpot = 0; 693 ctold = ctnew = 0; 694 for (i = 1; i <= len[0]; i++) { 695 if (J[i] == 0) { 696 ixold[i] = ctold += skipline(f1); 697 continue; 698 } 699 while (j < J[i]) { 700 ixnew[j] = ctnew += skipline(f2); 701 j++; 702 } 703 if (bflag || wflag || iflag) { 704 for (;;) { 705 c = getc(f1); 706 d = getc(f2); 707 ctold++; 708 ctnew++; 709 if (bflag && isspace(c) && isspace(d)) { 710 do { 711 if (c == '\n') 712 break; 713 ctold++; 714 } while (isspace(c = getc(f1))); 715 do { 716 if (d == '\n') 717 break; 718 ctnew++; 719 } while (isspace(d = getc(f2))); 720 } else if (wflag) { 721 while (isspace(c) && c != '\n') { 722 c = getc(f1); 723 ctold++; 724 } 725 while (isspace(d) && d != '\n') { 726 d = getc(f2); 727 ctnew++; 728 } 729 } 730 if (chrtran[c] != chrtran[d]) { 731 jackpot++; 732 J[i] = 0; 733 if (c != '\n') 734 ctold += skipline(f1); 735 if (d != '\n') 736 ctnew += skipline(f2); 737 break; 738 } 739 if (c == '\n') 740 break; 741 } 742 } else { 743 for (;;) { 744 ctold++; 745 ctnew++; 746 if ((c = getc(f1)) != (d = getc(f2))) { 747 /* jackpot++; */ 748 J[i] = 0; 749 if (c != '\n') 750 ctold += skipline(f1); 751 if (d != '\n') 752 ctnew += skipline(f2); 753 break; 754 } 755 if (c == '\n') 756 break; 757 } 758 } 759 ixold[i] = ctold; 760 ixnew[j] = ctnew; 761 j++; 762 } 763 for (; j <= len[1]; j++) 764 ixnew[j] = ctnew += skipline(f2); 765 /* 766 * if (jackpot) 767 * fprintf(stderr, "jackpot\n"); 768 */ 769 } 770 771 /* shellsort CACM #201 */ 772 static void 773 sort(struct line *a, int n) 774 { 775 struct line *ai, *aim, w; 776 int j, m = 0, k; 777 778 if (n == 0) 779 return; 780 for (j = 1; j <= n; j *= 2) 781 m = 2 * j - 1; 782 for (m /= 2; m != 0; m /= 2) { 783 k = n - m; 784 for (j = 1; j <= k; j++) { 785 for (ai = &a[j]; ai > a; ai -= m) { 786 aim = &ai[m]; 787 if (aim < ai) 788 break; /* wraparound */ 789 if (aim->value > ai[0].value || 790 (aim->value == ai[0].value && 791 aim->serial > ai[0].serial)) 792 break; 793 w.value = ai[0].value; 794 ai[0].value = aim->value; 795 aim->value = w.value; 796 w.serial = ai[0].serial; 797 ai[0].serial = aim->serial; 798 aim->serial = w.serial; 799 } 800 } 801 } 802 } 803 804 static void 805 unsort(struct line *f, int l, int *b) 806 { 807 int *a, i; 808 809 a = emalloc((l + 1) * sizeof(int)); 810 for (i = 1; i <= l; i++) 811 a[f[i].serial] = f[i].value; 812 for (i = 1; i <= l; i++) 813 b[i] = a[i]; 814 free(a); 815 } 816 817 static int 818 skipline(FILE *f) 819 { 820 int i, c; 821 822 for (i = 1; (c = getc(f)) != '\n'; i++) 823 if (c < 0) 824 return (i); 825 return (i); 826 } 827 828 static void 829 output(char *file1, FILE *f1, char *file2, FILE *f2) 830 { 831 int m, i0, i1, j0, j1; 832 833 rewind(f1); 834 rewind(f2); 835 m = len[0]; 836 J[0] = 0; 837 J[m + 1] = len[1] + 1; 838 if (format != D_EDIT) { 839 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 840 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 841 i0++; 842 j0 = J[i0 - 1] + 1; 843 i1 = i0 - 1; 844 while (i1 < m && J[i1 + 1] == 0) 845 i1++; 846 j1 = J[i1 + 1] - 1; 847 J[i1] = j1; 848 change(file1, f1, file2, f2, i0, i1, j0, j1); 849 } 850 } else { 851 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 852 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 853 i0--; 854 j0 = J[i0 + 1] - 1; 855 i1 = i0 + 1; 856 while (i1 > 1 && J[i1 - 1] == 0) 857 i1--; 858 j1 = J[i1 - 1] + 1; 859 J[i1] = j1; 860 change(file1, f1, file2, f2, i1, i0, j1, j0); 861 } 862 } 863 if (m == 0) 864 change(file1, f1, file2, f2, 1, 0, 1, len[1]); 865 if (format == D_IFDEF) { 866 for (;;) { 867 #define c i0 868 c = getc(f1); 869 if (c < 0) 870 return; 871 putchar(c); 872 } 873 #undef c 874 } 875 if (anychange != 0) { 876 if (format == D_CONTEXT) 877 dump_context_vec(f1, f2); 878 else if (format == D_UNIFIED) 879 dump_unified_vec(f1, f2); 880 } 881 } 882 883 /* 884 * The following struct is used to record change information when 885 * doing a "context" or "unified" diff. (see routine "change" to 886 * understand the highly mnemonic field names) 887 */ 888 struct context_vec { 889 int a; /* start line in old file */ 890 int b; /* end line in old file */ 891 int c; /* start line in new file */ 892 int d; /* end line in new file */ 893 }; 894 895 struct context_vec *context_vec_start, *context_vec_end, *context_vec_ptr; 896 897 #define MAX_CONTEXT 128 898 899 /* 900 * Indicate that there is a difference between lines a and b of the from file 901 * to get to lines c to d of the to file. If a is greater then b then there 902 * are no lines in the from file involved and this means that there were 903 * lines appended (beginning at b). If c is greater than d then there are 904 * lines missing from the to file. 905 */ 906 static void 907 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d) 908 { 909 if (format != D_IFDEF && a > b && c > d) 910 return; 911 if (anychange == 0) { 912 anychange = 1; 913 if (format == D_CONTEXT || format == D_UNIFIED) { 914 printf("%s %s %s", format == D_CONTEXT ? "***" : "---", 915 file1, ctime(&stb1.st_mtime)); 916 printf("%s %s %s", format == D_CONTEXT ? "---" : "+++", 917 file2, ctime(&stb2.st_mtime)); 918 if (context_vec_start == NULL) 919 context_vec_start = emalloc(MAX_CONTEXT * 920 sizeof(struct context_vec)); 921 context_vec_end = context_vec_start + MAX_CONTEXT; 922 context_vec_ptr = context_vec_start - 1; 923 } 924 } 925 if (format == D_CONTEXT || format == D_UNIFIED) { 926 /* 927 * If this new change is within 'context' lines of 928 * the previous change, just add it to the change 929 * record. If the record is full or if this 930 * change is more than 'context' lines from the previous 931 * change, dump the record, reset it & add the new change. 932 */ 933 if (context_vec_ptr >= context_vec_end || 934 (context_vec_ptr >= context_vec_start && 935 a > (context_vec_ptr->b + 2 * context) && 936 c > (context_vec_ptr->d + 2 * context))) { 937 if (format == D_CONTEXT) 938 dump_context_vec(f1, f2); 939 else 940 dump_unified_vec(f1, f2); 941 } 942 context_vec_ptr++; 943 context_vec_ptr->a = a; 944 context_vec_ptr->b = b; 945 context_vec_ptr->c = c; 946 context_vec_ptr->d = d; 947 return; 948 } 949 switch (format) { 950 951 case D_NORMAL: 952 case D_EDIT: 953 range(a, b, ","); 954 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 955 if (format == D_NORMAL) 956 range(c, d, ","); 957 putchar('\n'); 958 break; 959 case D_REVERSE: 960 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 961 range(a, b, " "); 962 putchar('\n'); 963 break; 964 case D_NREVERSE: 965 if (a > b) 966 printf("a%d %d\n", b, d - c + 1); 967 else { 968 printf("d%d %d\n", a, b - a + 1); 969 if (!(c > d)) 970 /* add changed lines */ 971 printf("a%d %d\n", b, d - c + 1); 972 } 973 break; 974 } 975 if (format == D_NORMAL || format == D_IFDEF) { 976 fetch(ixold, a, b, f1, "< ", 1); 977 if (a <= b && c <= d && format == D_NORMAL) 978 puts("---"); 979 } 980 fetch(ixnew, c, d, f2, format == D_NORMAL ? "> " : "", 0); 981 if ((format == D_EDIT || format == D_REVERSE) && c <= d) 982 puts("."); 983 if (inifdef) { 984 printf("#endif /* %s */\n", ifdefname); 985 inifdef = 0; 986 } 987 } 988 989 static void 990 range(int a, int b, char *separator) 991 { 992 printf("%d", a > b ? b : a); 993 if (a < b) 994 printf("%s%d", separator, b); 995 } 996 997 static void 998 fetch(long *f, int a, int b, FILE *lb, char *s, int oldfile) 999 { 1000 int i, j, c, col, nc; 1001 1002 /* 1003 * When doing #ifdef's, copy down to current line 1004 * if this is the first file, so that stuff makes it to output. 1005 */ 1006 if (format == D_IFDEF && oldfile) { 1007 long curpos = ftell(lb); 1008 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 1009 nc = f[a > b ? b : a - 1] - curpos; 1010 for (i = 0; i < nc; i++) 1011 putchar(getc(lb)); 1012 } 1013 if (a > b) 1014 return; 1015 if (format == D_IFDEF) { 1016 if (inifdef) { 1017 printf("#else /* %s%s */\n", 1018 oldfile == 1 ? "!" : "", ifdefname); 1019 } else { 1020 if (oldfile) 1021 printf("#ifndef %s\n", ifdefname); 1022 else 1023 printf("#ifdef %s\n", ifdefname); 1024 } 1025 inifdef = 1 + oldfile; 1026 } 1027 for (i = a; i <= b; i++) { 1028 fseek(lb, f[i - 1], SEEK_SET); 1029 nc = f[i] - f[i - 1]; 1030 if (format != D_IFDEF) 1031 fputs(s, stdout); 1032 col = 0; 1033 for (j = 0; j < nc; j++) { 1034 c = getc(lb); 1035 if (c == '\t' && tflag) 1036 do 1037 putchar(' '); 1038 while (++col & 7); 1039 else { 1040 putchar(c); 1041 col++; 1042 } 1043 } 1044 } 1045 } 1046 1047 #define POW2 /* define only if HALFLONG is 2**n */ 1048 #define HALFLONG 16 1049 #define low(x) (x&((1L<<HALFLONG)-1)) 1050 #define high(x) (x>>HALFLONG) 1051 1052 /* 1053 * hashing has the effect of 1054 * arranging line in 7-bit bytes and then 1055 * summing 1-s complement in 16-bit hunks 1056 */ 1057 static int 1058 readhash(FILE *f) 1059 { 1060 unsigned int shift; 1061 int t, space; 1062 long sum; 1063 1064 sum = 1; 1065 space = 0; 1066 if (!bflag && !wflag) { 1067 if (iflag) 1068 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 1069 if (t == -1) 1070 return (0); 1071 sum += (long)chrtran[t] << (shift 1072 #ifdef POW2 1073 &= HALFLONG - 1); 1074 #else 1075 %= HALFLONG); 1076 #endif 1077 } 1078 else 1079 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 1080 if (t == -1) 1081 return (0); 1082 sum += (long)t << (shift 1083 #ifdef POW2 1084 &= HALFLONG - 1); 1085 #else 1086 %= HALFLONG); 1087 #endif 1088 } 1089 } else { 1090 for (shift = 0;;) { 1091 switch (t = getc(f)) { 1092 case -1: 1093 return (0); 1094 case '\t': 1095 case ' ': 1096 space++; 1097 continue; 1098 default: 1099 if (space && !wflag) { 1100 shift += 7; 1101 space = 0; 1102 } 1103 sum += (long)chrtran[t] << (shift 1104 #ifdef POW2 1105 &= HALFLONG - 1); 1106 #else 1107 %= HALFLONG); 1108 #endif 1109 shift += 7; 1110 continue; 1111 case '\n': 1112 break; 1113 } 1114 break; 1115 } 1116 } 1117 sum = low(sum) + high(sum); 1118 return ((short) low(sum) + (short) high(sum)); 1119 } 1120 1121 int 1122 asciifile(FILE *f) 1123 { 1124 char buf[BUFSIZ], *cp; 1125 int cnt; 1126 1127 if (aflag || f == NULL) 1128 return (1); 1129 1130 rewind(f); 1131 cnt = fread(buf, 1, sizeof(buf), f); 1132 cp = buf; 1133 while (--cnt >= 0) 1134 if (*cp++ & 0200) 1135 return (0); 1136 return (1); 1137 } 1138 1139 static __inline int min(int a, int b) 1140 { 1141 return (a < b ? a : b); 1142 } 1143 1144 static __inline int max(int a, int b) 1145 { 1146 return (a > b ? a : b); 1147 } 1148 1149 /* dump accumulated "context" diff changes */ 1150 static void 1151 dump_context_vec(FILE *f1, FILE *f2) 1152 { 1153 struct context_vec *cvp = context_vec_start; 1154 int lowa, upb, lowc, upd, do_output; 1155 int a, b, c, d; 1156 char ch; 1157 1158 if (context_vec_start > context_vec_ptr) 1159 return; 1160 1161 b = d = 0; /* gcc */ 1162 lowa = max(1, cvp->a - context); 1163 upb = min(len[0], context_vec_ptr->b + context); 1164 lowc = max(1, cvp->c - context); 1165 upd = min(len[1], context_vec_ptr->d + context); 1166 1167 printf("***************\n*** "); 1168 range(lowa, upb, ","); 1169 printf(" ****\n"); 1170 1171 /* 1172 * Output changes to the "old" file. The first loop suppresses 1173 * output if there were no changes to the "old" file (we'll see 1174 * the "old" lines as context in the "new" list). 1175 */ 1176 do_output = 0; 1177 for (; cvp <= context_vec_ptr; cvp++) 1178 if (cvp->a <= cvp->b) { 1179 cvp = context_vec_start; 1180 do_output++; 1181 break; 1182 } 1183 if (do_output) { 1184 while (cvp <= context_vec_ptr) { 1185 a = cvp->a; 1186 b = cvp->b; 1187 c = cvp->c; 1188 d = cvp->d; 1189 1190 if (a <= b && c <= d) 1191 ch = 'c'; 1192 else 1193 ch = (a <= b) ? 'd' : 'a'; 1194 1195 if (ch == 'a') 1196 fetch(ixold, lowa, b, f1, " ", 0); 1197 else { 1198 fetch(ixold, lowa, a - 1, f1, " ", 0); 1199 fetch(ixold, a, b, f1, 1200 ch == 'c' ? "! " : "- ", 0); 1201 } 1202 lowa = b + 1; 1203 cvp++; 1204 } 1205 fetch(ixold, b + 1, upb, f1, " ", 0); 1206 } 1207 /* output changes to the "new" file */ 1208 printf("--- "); 1209 range(lowc, upd, ","); 1210 printf(" ----\n"); 1211 1212 do_output = 0; 1213 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1214 if (cvp->c <= cvp->d) { 1215 cvp = context_vec_start; 1216 do_output++; 1217 break; 1218 } 1219 if (do_output) { 1220 while (cvp <= context_vec_ptr) { 1221 a = cvp->a; 1222 b = cvp->b; 1223 c = cvp->c; 1224 d = cvp->d; 1225 1226 if (a <= b && c <= d) 1227 ch = 'c'; 1228 else 1229 ch = (a <= b) ? 'd' : 'a'; 1230 1231 if (ch == 'd') 1232 fetch(ixnew, lowc, d, f2, " ", 0); 1233 else { 1234 fetch(ixnew, lowc, c - 1, f2, " ", 0); 1235 fetch(ixnew, c, d, f2, 1236 ch == 'c' ? "! " : "+ ", 0); 1237 } 1238 lowc = d + 1; 1239 cvp++; 1240 } 1241 fetch(ixnew, d + 1, upd, f2, " ", 0); 1242 } 1243 context_vec_ptr = context_vec_start - 1; 1244 } 1245 1246 /* dump accumulated "unified" diff changes */ 1247 static void 1248 dump_unified_vec(FILE *f1, FILE *f2) 1249 { 1250 struct context_vec *cvp = context_vec_start; 1251 int lowa, upb, lowc, upd; 1252 int a, b, c, d; 1253 char ch; 1254 1255 if (context_vec_start > context_vec_ptr) 1256 return; 1257 1258 b = d = 0; /* gcc */ 1259 lowa = max(1, cvp->a - context); 1260 upb = min(len[0], context_vec_ptr->b + context); 1261 lowc = max(1, cvp->c - context); 1262 upd = min(len[1], context_vec_ptr->d + context); 1263 1264 printf("@@ -%d,%d +%d,%d @@\n", lowa, upb - lowa + 1, 1265 lowc, upd - lowc + 1); 1266 1267 /* 1268 * Output changes in "unified" diff format--the old and new lines 1269 * are printed together. 1270 */ 1271 for (; cvp <= context_vec_ptr; cvp++) { 1272 a = cvp->a; 1273 b = cvp->b; 1274 c = cvp->c; 1275 d = cvp->d; 1276 1277 /* 1278 * c: both new and old changes 1279 * d: only changes in the old file 1280 * a: only changes in the new file 1281 */ 1282 if (a <= b && c <= d) 1283 ch = 'c'; 1284 else 1285 ch = (a <= b) ? 'd' : 'a'; 1286 1287 switch (ch) { 1288 case 'c': 1289 fetch(ixold, lowa, a - 1, f1, " ", 0); 1290 fetch(ixold, a, b, f1, "-", 0); 1291 fetch(ixnew, c, d, f2, "+", 0); 1292 break; 1293 case 'd': 1294 fetch(ixold, lowa, a - 1, f1, " ", 0); 1295 fetch(ixold, a, b, f1, "-", 0); 1296 break; 1297 case 'a': 1298 fetch(ixnew, lowc, c - 1, f2, " ", 0); 1299 fetch(ixnew, c, d, f2, "+", 0); 1300 break; 1301 } 1302 lowa = b + 1; 1303 lowc = d + 1; 1304 } 1305 fetch(ixnew, d + 1, upd, f2, " ", 0); 1306 1307 context_vec_ptr = context_vec_start - 1; 1308 } 1309