1 /* $OpenBSD: diffreg.c,v 1.33 2003/07/15 23:17:56 millert Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 /*- 37 * Copyright (c) 1991, 1993 38 * The Regents of the University of California. All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93 65 */ 66 67 #ifndef lint 68 static const char rcsid[] = "$OpenBSD: diffreg.c,v 1.33 2003/07/15 23:17:56 millert Exp $"; 69 #endif /* not lint */ 70 71 #include <sys/param.h> 72 #include <sys/stat.h> 73 #include <sys/wait.h> 74 75 #include <ctype.h> 76 #include <err.h> 77 #include <errno.h> 78 #include <fcntl.h> 79 #include <libgen.h> 80 #include <stdio.h> 81 #include <stdlib.h> 82 #include <string.h> 83 #include <unistd.h> 84 85 #include "diff.h" 86 #include "pathnames.h" 87 88 /* 89 * diff - compare two files. 90 */ 91 92 /* 93 * Uses an algorithm due to Harold Stone, which finds 94 * a pair of longest identical subsequences in the two 95 * files. 96 * 97 * The major goal is to generate the match vector J. 98 * J[i] is the index of the line in file1 corresponding 99 * to line i file0. J[i] = 0 if there is no 100 * such line in file1. 101 * 102 * Lines are hashed so as to work in core. All potential 103 * matches are located by sorting the lines of each file 104 * on the hash (called ``value''). In particular, this 105 * collects the equivalence classes in file1 together. 106 * Subroutine equiv replaces the value of each line in 107 * file0 by the index of the first element of its 108 * matching equivalence in (the reordered) file1. 109 * To save space equiv squeezes file1 into a single 110 * array member in which the equivalence classes 111 * are simply concatenated, except that their first 112 * members are flagged by changing sign. 113 * 114 * Next the indices that point into member are unsorted into 115 * array class according to the original order of file0. 116 * 117 * The cleverness lies in routine stone. This marches 118 * through the lines of file0, developing a vector klist 119 * of "k-candidates". At step i a k-candidate is a matched 120 * pair of lines x,y (x in file0 y in file1) such that 121 * there is a common subsequence of length k 122 * between the first i lines of file0 and the first y 123 * lines of file1, but there is no such subsequence for 124 * any smaller y. x is the earliest possible mate to y 125 * that occurs in such a subsequence. 126 * 127 * Whenever any of the members of the equivalence class of 128 * lines in file1 matable to a line in file0 has serial number 129 * less than the y of some k-candidate, that k-candidate 130 * with the smallest such y is replaced. The new 131 * k-candidate is chained (via pred) to the current 132 * k-1 candidate so that the actual subsequence can 133 * be recovered. When a member has serial number greater 134 * that the y of all k-candidates, the klist is extended. 135 * At the end, the longest subsequence is pulled out 136 * and placed in the array J by unravel 137 * 138 * With J in hand, the matches there recorded are 139 * check'ed against reality to assure that no spurious 140 * matches have crept in due to hashing. If they have, 141 * they are broken, and "jackpot" is recorded--a harmless 142 * matter except that a true match for a spuriously 143 * mated line may now be unnecessarily reported as a change. 144 * 145 * Much of the complexity of the program comes simply 146 * from trying to minimize core utilization and 147 * maximize the range of doable problems by dynamically 148 * allocating what is needed and reusing what is not. 149 * The core requirements for problems larger than somewhat 150 * are (in words) 2*length(file0) + length(file1) + 151 * 3*(number of k-candidates installed), typically about 152 * 6n words for files of length n. 153 */ 154 155 struct cand { 156 int x; 157 int y; 158 int pred; 159 } cand; 160 161 struct line { 162 int serial; 163 int value; 164 } *file[2]; 165 166 static int *J; /* will be overlaid on class */ 167 static int *class; /* will be overlaid on file[0] */ 168 static int *klist; /* will be overlaid on file[0] after class */ 169 static int *member; /* will be overlaid on file[1] */ 170 static int clen; 171 static int inifdef; /* whether or not we are in a #ifdef block */ 172 static int len[2]; 173 static int pref, suff; /* length of prefix and suffix */ 174 static int slen[2]; 175 static int anychange; 176 static long *ixnew; /* will be overlaid on file[1] */ 177 static long *ixold; /* will be overlaid on klist */ 178 static struct cand *clist; /* merely a free storage pot for candidates */ 179 static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ 180 static u_char *chrtran; /* translation table for case-folding */ 181 182 static FILE *opentemp(const char *); 183 static void fetch(long *, int, int, FILE *, char *, int); 184 static void output(char *, FILE *, char *, FILE *); 185 static void check(char *, FILE *, char *, FILE *); 186 static void range(int, int, char *); 187 static void uni_range(int, int); 188 static void dump_context_vec(FILE *, FILE *); 189 static void dump_unified_vec(FILE *, FILE *); 190 static void prepare(int, FILE *); 191 static void prune(void); 192 static void equiv(struct line *, int, struct line *, int, int *); 193 static void unravel(int); 194 static void unsort(struct line *, int, int *); 195 static void change(char *, FILE *, char *, FILE *, int, int, int, int); 196 static void sort(struct line *, int); 197 static int asciifile(FILE *); 198 static int newcand(int, int, int); 199 static int search(int *, int, int); 200 static int skipline(FILE *); 201 static int stone(int *, int, int *, int *); 202 static int readhash(FILE *); 203 static int files_differ(FILE *, FILE *, int); 204 205 /* 206 * chrtran points to one of 2 translation tables: cup2low if folding upper to 207 * lower case clow2low if not folding case 208 */ 209 u_char clow2low[256] = { 210 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 211 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 212 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 213 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 214 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 215 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 216 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 217 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 218 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 219 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 220 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 221 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 222 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 223 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 224 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 225 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 226 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 227 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 228 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 229 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 230 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 231 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 232 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 233 0xfd, 0xfe, 0xff 234 }; 235 236 u_char cup2low[256] = { 237 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 238 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 239 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 240 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 241 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 242 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 243 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 244 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 245 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 246 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 247 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 248 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 249 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 250 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 251 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 252 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 253 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 254 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 255 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 256 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 257 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 258 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 259 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 260 0xfd, 0xfe, 0xff 261 }; 262 263 int 264 diffreg(char *ofile1, char *ofile2, int flags) 265 { 266 char *file1 = ofile1; 267 char *file2 = ofile2; 268 FILE *f1 = NULL; 269 FILE *f2 = NULL; 270 int rval = D_SAME; 271 int i, ostdout = -1; 272 pid_t pid = -1; 273 274 anychange = 0; 275 chrtran = (iflag ? cup2low : clow2low); 276 if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) 277 return (D_MISMATCH); 278 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) 279 goto notsame; 280 281 if (flags & D_EMPTY1) 282 f1 = fopen(_PATH_DEVNULL, "r"); 283 else { 284 if (!S_ISREG(stb1.st_mode)) { 285 if ((f1 = opentemp(file1)) == NULL || 286 fstat(fileno(f1), &stb1) < 0) { 287 warn("%s", file1); 288 status |= 2; 289 goto closem; 290 } 291 } else if (strcmp(file1, "-") == 0) 292 f1 = stdin; 293 else 294 f1 = fopen(file1, "r"); 295 } 296 if (f1 == NULL) { 297 warn("%s", file1); 298 status |= 2; 299 goto closem; 300 } 301 302 if (flags & D_EMPTY2) 303 f2 = fopen(_PATH_DEVNULL, "r"); 304 else { 305 if (!S_ISREG(stb2.st_mode)) { 306 if ((f2 = opentemp(file2)) == NULL || 307 fstat(fileno(f2), &stb2) < 0) { 308 warn("%s", file2); 309 status |= 2; 310 goto closem; 311 } 312 } else if (strcmp(file2, "-") == 0) 313 f2 = stdin; 314 else 315 f2 = fopen(file2, "r"); 316 } 317 if (f2 == NULL) { 318 warn("%s", file2); 319 status |= 2; 320 goto closem; 321 } 322 323 switch (files_differ(f1, f2, flags)) { 324 case 0: 325 goto closem; 326 case 1: 327 break; 328 default: 329 /* error */ 330 status |= 2; 331 goto closem; 332 } 333 334 notsame: 335 /* 336 * Files certainly differ at this point; set status accordingly 337 */ 338 status |= 1; 339 rval = D_DIFFER; 340 if (!asciifile(f1) || !asciifile(f2)) { 341 rval = D_BINARY; 342 goto closem; 343 } 344 if (format == D_BRIEF) 345 goto closem; 346 if (lflag) { 347 /* redirect stdout to pr */ 348 int pfd[2]; 349 char *header; 350 char *prargv[] = { "pr", "-h", NULL, "-f", NULL }; 351 352 easprintf(&header, "%s %s %s", diffargs, file1, file2); 353 prargv[2] = header; 354 fflush(stdout); 355 rewind(stdout); 356 pipe(pfd); 357 switch ((pid = fork())) { 358 case -1: 359 warnx("No more processes"); 360 status |= 2; 361 free(header); 362 return (D_ERROR); 363 case 0: 364 /* child */ 365 if (pfd[0] != STDIN_FILENO) { 366 dup2(pfd[0], STDIN_FILENO); 367 close(pfd[0]); 368 } 369 close(pfd[1]); 370 execv(_PATH_PR, prargv); 371 _exit(127); 372 default: 373 /* parent */ 374 if (pfd[1] != STDOUT_FILENO) { 375 ostdout = dup(STDOUT_FILENO); 376 dup2(pfd[1], STDOUT_FILENO); 377 close(pfd[1]); 378 } 379 close(pfd[0]); 380 rewind(stdout); 381 free(header); 382 } 383 } else { 384 if (flags & D_HEADER) { 385 if (format == D_EDIT) 386 printf("ed - %s << '-*-END-*-'\n", 387 basename(file1)); 388 else 389 printf("%s %s %s\n", diffargs, file1, file2); 390 } 391 } 392 prepare(0, f1); 393 prepare(1, f2); 394 prune(); 395 sort(sfile[0], slen[0]); 396 sort(sfile[1], slen[1]); 397 398 member = (int *)file[1]; 399 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 400 member = erealloc(member, (slen[1] + 2) * sizeof(int)); 401 402 class = (int *)file[0]; 403 unsort(sfile[0], slen[0], class); 404 class = erealloc(class, (slen[0] + 2) * sizeof(int)); 405 406 klist = emalloc((slen[0] + 2) * sizeof(int)); 407 clist = emalloc(sizeof(cand)); 408 i = stone(class, slen[0], member, klist); 409 free(member); 410 free(class); 411 412 J = erealloc(J, (len[0] + 2) * sizeof(int)); 413 unravel(klist[i]); 414 free(clist); 415 free(klist); 416 417 ixold = erealloc(ixold, (len[0] + 2) * sizeof(long)); 418 ixnew = erealloc(ixnew, (len[1] + 2) * sizeof(long)); 419 check(file1, f1, file2, f2); 420 output(file1, f1, file2, f2); 421 if (ostdout != -1) { 422 int wstatus; 423 424 /* close the pipe to pr and restore stdout */ 425 fflush(stdout); 426 rewind(stdout); 427 if (ostdout != STDOUT_FILENO) { 428 close(STDOUT_FILENO); 429 dup2(ostdout, STDOUT_FILENO); 430 close(ostdout); 431 } 432 waitpid(pid, &wstatus, 0); 433 } else if ((flags & D_HEADER) && format == D_EDIT) 434 printf("w\nq\n-*-END-*-\n"); 435 closem: 436 if (f1 != NULL) 437 fclose(f1); 438 if (f2 != NULL) 439 fclose(f2); 440 if (file1 != ofile1) 441 free(file1); 442 if (file2 != ofile2) 443 free(file2); 444 return (rval); 445 } 446 447 /* 448 * Check to see if the given files differ. 449 * Returns 0 if they are the same, 1 if different, and -1 on error. 450 * XXX - could use code from cmp(1) [faster] 451 */ 452 static int 453 files_differ(FILE *f1, FILE *f2, int flags) 454 { 455 char buf1[BUFSIZ], buf2[BUFSIZ]; 456 size_t i, j; 457 458 if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size || 459 (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT)) 460 return (1); 461 for (;;) { 462 i = fread(buf1, 1, sizeof(buf1), f1); 463 j = fread(buf2, 1, sizeof(buf2), f2); 464 if (i != j) 465 return (1); 466 if (i == 0 && j == 0) { 467 if (ferror(f1) || ferror(f2)) 468 return (1); 469 return (0); 470 } 471 if (memcmp(buf1, buf2, i) != 0) 472 return (1); 473 } 474 } 475 476 static FILE * 477 opentemp(const char *file) 478 { 479 char buf[BUFSIZ], *tempdir, tempfile[MAXPATHLEN]; 480 ssize_t nread; 481 int ifd, ofd; 482 483 if (strcmp(file, "-") == 0) 484 ifd = STDIN_FILENO; 485 else if ((ifd = open(file, O_RDONLY, 0644)) < 0) 486 return (NULL); 487 488 if ((tempdir = getenv("TMPDIR")) == NULL) 489 tempdir = _PATH_TMP; 490 if (snprintf(tempfile, sizeof(tempfile), "%s/diff.XXXXXXXX", 491 tempdir) >= sizeof(tempfile)) { 492 close(ifd); 493 errno = ENAMETOOLONG; 494 return (NULL); 495 } 496 497 if ((ofd = mkstemp(tempfile)) < 0) 498 return (NULL); 499 unlink(tempfile); 500 while ((nread = read(ifd, buf, BUFSIZ)) > 0) { 501 if (write(ofd, buf, nread) != nread) { 502 close(ifd); 503 close(ofd); 504 return (NULL); 505 } 506 } 507 close(ifd); 508 return (fdopen(ofd, "r")); 509 } 510 511 char * 512 splice(char *dir, char *file) 513 { 514 char *tail, *buf; 515 516 if ((tail = strrchr(file, '/')) == NULL) 517 tail = file; 518 else 519 tail++; 520 easprintf(&buf, "%s/%s", dir, tail); 521 return (buf); 522 } 523 524 static void 525 prepare(int i, FILE *fd) 526 { 527 struct line *p; 528 int j, h; 529 530 rewind(fd); 531 p = emalloc(3 * sizeof(struct line)); 532 for (j = 0; (h = readhash(fd));) { 533 p = erealloc(p, (++j + 3) * sizeof(struct line)); 534 p[j].value = h; 535 } 536 len[i] = j; 537 file[i] = p; 538 } 539 540 static void 541 prune(void) 542 { 543 int i, j; 544 545 for (pref = 0; pref < len[0] && pref < len[1] && 546 file[0][pref + 1].value == file[1][pref + 1].value; 547 pref++) 548 ; 549 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 550 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 551 suff++) 552 ; 553 for (j = 0; j < 2; j++) { 554 sfile[j] = file[j] + pref; 555 slen[j] = len[j] - pref - suff; 556 for (i = 0; i <= slen[j]; i++) 557 sfile[j][i].serial = i; 558 } 559 } 560 561 static void 562 equiv(struct line *a, int n, struct line *b, int m, int *c) 563 { 564 int i, j; 565 566 i = j = 1; 567 while (i <= n && j <= m) { 568 if (a[i].value < b[j].value) 569 a[i++].value = 0; 570 else if (a[i].value == b[j].value) 571 a[i++].value = j; 572 else 573 j++; 574 } 575 while (i <= n) 576 a[i++].value = 0; 577 b[m + 1].value = 0; 578 j = 0; 579 while (++j <= m) { 580 c[j] = -b[j].serial; 581 while (b[j + 1].value == b[j].value) { 582 j++; 583 c[j] = b[j].serial; 584 } 585 } 586 c[j] = -1; 587 } 588 589 static int 590 stone(int *a, int n, int *b, int *c) 591 { 592 int i, k, y, j, l; 593 int oldc, tc, oldl; 594 595 k = 0; 596 c[0] = newcand(0, 0, 0); 597 for (i = 1; i <= n; i++) { 598 j = a[i]; 599 if (j == 0) 600 continue; 601 y = -b[j]; 602 oldl = 0; 603 oldc = c[0]; 604 do { 605 if (y <= clist[oldc].y) 606 continue; 607 l = search(c, k, y); 608 if (l != oldl + 1) 609 oldc = c[l - 1]; 610 if (l <= k) { 611 if (clist[c[l]].y <= y) 612 continue; 613 tc = c[l]; 614 c[l] = newcand(i, y, oldc); 615 oldc = tc; 616 oldl = l; 617 } else { 618 c[l] = newcand(i, y, oldc); 619 k++; 620 break; 621 } 622 } while ((y = b[++j]) > 0); 623 } 624 return (k); 625 } 626 627 static int 628 newcand(int x, int y, int pred) 629 { 630 struct cand *q; 631 632 clist = erealloc(clist, ++clen * sizeof(cand)); 633 q = clist + clen - 1; 634 q->x = x; 635 q->y = y; 636 q->pred = pred; 637 return (clen - 1); 638 } 639 640 static int 641 search(int *c, int k, int y) 642 { 643 int i, j, l, t; 644 645 if (clist[c[k]].y < y) /* quick look for typical case */ 646 return (k + 1); 647 i = 0; 648 j = k + 1; 649 while (1) { 650 l = i + j; 651 if ((l >>= 1) <= i) 652 break; 653 t = clist[c[l]].y; 654 if (t > y) 655 j = l; 656 else if (t < y) 657 i = l; 658 else 659 return (l); 660 } 661 return (l + 1); 662 } 663 664 static void 665 unravel(int p) 666 { 667 struct cand *q; 668 int i; 669 670 for (i = 0; i <= len[0]; i++) 671 J[i] = i <= pref ? i : 672 i > len[0] - suff ? i + len[1] - len[0] : 0; 673 for (q = clist + p; q->y != 0; q = clist + q->pred) 674 J[q->x + pref] = q->y + pref; 675 } 676 677 /* 678 * Check does double duty: 679 * 1. ferret out any fortuitous correspondences due 680 * to confounding by hashing (which result in "jackpot") 681 * 2. collect random access indexes to the two files 682 */ 683 static void 684 check(char *file1, FILE *f1, char *file2, FILE *f2) 685 { 686 int i, j, jackpot, c, d; 687 long ctold, ctnew; 688 689 rewind(f1); 690 rewind(f2); 691 j = 1; 692 ixold[0] = ixnew[0] = 0; 693 jackpot = 0; 694 ctold = ctnew = 0; 695 for (i = 1; i <= len[0]; i++) { 696 if (J[i] == 0) { 697 ixold[i] = ctold += skipline(f1); 698 continue; 699 } 700 while (j < J[i]) { 701 ixnew[j] = ctnew += skipline(f2); 702 j++; 703 } 704 if (bflag || wflag || iflag) { 705 for (;;) { 706 c = getc(f1); 707 d = getc(f2); 708 ctold++; 709 ctnew++; 710 if (bflag && isspace(c) && isspace(d)) { 711 do { 712 if (c == '\n') 713 break; 714 ctold++; 715 } while (isspace(c = getc(f1))); 716 do { 717 if (d == '\n') 718 break; 719 ctnew++; 720 } while (isspace(d = getc(f2))); 721 } else if (wflag) { 722 while (isspace(c) && c != '\n') { 723 c = getc(f1); 724 ctold++; 725 } 726 while (isspace(d) && d != '\n') { 727 d = getc(f2); 728 ctnew++; 729 } 730 } 731 if (chrtran[c] != chrtran[d]) { 732 jackpot++; 733 J[i] = 0; 734 if (c != '\n') 735 ctold += skipline(f1); 736 if (d != '\n') 737 ctnew += skipline(f2); 738 break; 739 } 740 if (c == '\n') 741 break; 742 } 743 } else { 744 for (;;) { 745 ctold++; 746 ctnew++; 747 if ((c = getc(f1)) != (d = getc(f2))) { 748 /* jackpot++; */ 749 J[i] = 0; 750 if (c != '\n') 751 ctold += skipline(f1); 752 if (d != '\n') 753 ctnew += skipline(f2); 754 break; 755 } 756 if (c == '\n') 757 break; 758 } 759 } 760 ixold[i] = ctold; 761 ixnew[j] = ctnew; 762 j++; 763 } 764 for (; j <= len[1]; j++) 765 ixnew[j] = ctnew += skipline(f2); 766 /* 767 * if (jackpot) 768 * fprintf(stderr, "jackpot\n"); 769 */ 770 } 771 772 /* shellsort CACM #201 */ 773 static void 774 sort(struct line *a, int n) 775 { 776 struct line *ai, *aim, w; 777 int j, m = 0, k; 778 779 if (n == 0) 780 return; 781 for (j = 1; j <= n; j *= 2) 782 m = 2 * j - 1; 783 for (m /= 2; m != 0; m /= 2) { 784 k = n - m; 785 for (j = 1; j <= k; j++) { 786 for (ai = &a[j]; ai > a; ai -= m) { 787 aim = &ai[m]; 788 if (aim < ai) 789 break; /* wraparound */ 790 if (aim->value > ai[0].value || 791 (aim->value == ai[0].value && 792 aim->serial > ai[0].serial)) 793 break; 794 w.value = ai[0].value; 795 ai[0].value = aim->value; 796 aim->value = w.value; 797 w.serial = ai[0].serial; 798 ai[0].serial = aim->serial; 799 aim->serial = w.serial; 800 } 801 } 802 } 803 } 804 805 static void 806 unsort(struct line *f, int l, int *b) 807 { 808 int *a, i; 809 810 a = emalloc((l + 1) * sizeof(int)); 811 for (i = 1; i <= l; i++) 812 a[f[i].serial] = f[i].value; 813 for (i = 1; i <= l; i++) 814 b[i] = a[i]; 815 free(a); 816 } 817 818 static int 819 skipline(FILE *f) 820 { 821 int i, c; 822 823 for (i = 1; (c = getc(f)) != '\n'; i++) 824 if (c < 0) 825 return (i); 826 return (i); 827 } 828 829 static void 830 output(char *file1, FILE *f1, char *file2, FILE *f2) 831 { 832 int m, i0, i1, j0, j1; 833 834 rewind(f1); 835 rewind(f2); 836 m = len[0]; 837 J[0] = 0; 838 J[m + 1] = len[1] + 1; 839 if (format != D_EDIT) { 840 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 841 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 842 i0++; 843 j0 = J[i0 - 1] + 1; 844 i1 = i0 - 1; 845 while (i1 < m && J[i1 + 1] == 0) 846 i1++; 847 j1 = J[i1 + 1] - 1; 848 J[i1] = j1; 849 change(file1, f1, file2, f2, i0, i1, j0, j1); 850 } 851 } else { 852 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 853 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 854 i0--; 855 j0 = J[i0 + 1] - 1; 856 i1 = i0 + 1; 857 while (i1 > 1 && J[i1 - 1] == 0) 858 i1--; 859 j1 = J[i1 - 1] + 1; 860 J[i1] = j1; 861 change(file1, f1, file2, f2, i1, i0, j1, j0); 862 } 863 } 864 if (m == 0) 865 change(file1, f1, file2, f2, 1, 0, 1, len[1]); 866 if (format == D_IFDEF) { 867 for (;;) { 868 #define c i0 869 c = getc(f1); 870 if (c < 0) 871 return; 872 putchar(c); 873 } 874 #undef c 875 } 876 if (anychange != 0) { 877 if (format == D_CONTEXT) 878 dump_context_vec(f1, f2); 879 else if (format == D_UNIFIED) 880 dump_unified_vec(f1, f2); 881 } 882 } 883 884 static __inline void 885 range(int a, int b, char *separator) 886 { 887 printf("%d", a > b ? b : a); 888 if (a < b) 889 printf("%s%d", separator, b); 890 } 891 892 static __inline void 893 uni_range(int a, int b) 894 { 895 if (a < b) 896 printf("%d,%d", a, b - a + 1); 897 else if (a == b) 898 printf("%d", b); 899 else 900 printf("%d,0", b); 901 } 902 903 /* 904 * The following struct is used to record change information when 905 * doing a "context" or "unified" diff. (see routine "change" to 906 * understand the highly mnemonic field names) 907 */ 908 struct context_vec { 909 int a; /* start line in old file */ 910 int b; /* end line in old file */ 911 int c; /* start line in new file */ 912 int d; /* end line in new file */ 913 }; 914 915 struct context_vec *context_vec_start, *context_vec_end, *context_vec_ptr; 916 917 #define MAX_CONTEXT 128 918 919 /* 920 * Indicate that there is a difference between lines a and b of the from file 921 * to get to lines c to d of the to file. If a is greater then b then there 922 * are no lines in the from file involved and this means that there were 923 * lines appended (beginning at b). If c is greater than d then there are 924 * lines missing from the to file. 925 */ 926 static void 927 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d) 928 { 929 if (format != D_IFDEF && a > b && c > d) 930 return; 931 if (anychange == 0) { 932 anychange = 1; 933 if (format == D_CONTEXT || format == D_UNIFIED) { 934 printf("%s %s %s", format == D_CONTEXT ? "***" : "---", 935 file1, ctime(&stb1.st_mtime)); 936 printf("%s %s %s", format == D_CONTEXT ? "---" : "+++", 937 file2, ctime(&stb2.st_mtime)); 938 if (context_vec_start == NULL) 939 context_vec_start = emalloc(MAX_CONTEXT * 940 sizeof(struct context_vec)); 941 context_vec_end = context_vec_start + MAX_CONTEXT; 942 context_vec_ptr = context_vec_start - 1; 943 } 944 } 945 if (format == D_CONTEXT || format == D_UNIFIED) { 946 /* 947 * If this new change is within 'context' lines of 948 * the previous change, just add it to the change 949 * record. If the record is full or if this 950 * change is more than 'context' lines from the previous 951 * change, dump the record, reset it & add the new change. 952 */ 953 if (context_vec_ptr >= context_vec_end || 954 (context_vec_ptr >= context_vec_start && 955 a > (context_vec_ptr->b + 2 * context) && 956 c > (context_vec_ptr->d + 2 * context))) { 957 if (format == D_CONTEXT) 958 dump_context_vec(f1, f2); 959 else 960 dump_unified_vec(f1, f2); 961 } 962 context_vec_ptr++; 963 context_vec_ptr->a = a; 964 context_vec_ptr->b = b; 965 context_vec_ptr->c = c; 966 context_vec_ptr->d = d; 967 return; 968 } 969 switch (format) { 970 971 case D_NORMAL: 972 case D_EDIT: 973 range(a, b, ","); 974 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 975 if (format == D_NORMAL) 976 range(c, d, ","); 977 putchar('\n'); 978 break; 979 case D_REVERSE: 980 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 981 range(a, b, " "); 982 putchar('\n'); 983 break; 984 case D_NREVERSE: 985 if (a > b) 986 printf("a%d %d\n", b, d - c + 1); 987 else { 988 printf("d%d %d\n", a, b - a + 1); 989 if (!(c > d)) 990 /* add changed lines */ 991 printf("a%d %d\n", b, d - c + 1); 992 } 993 break; 994 } 995 if (format == D_NORMAL || format == D_IFDEF) { 996 fetch(ixold, a, b, f1, "< ", 1); 997 if (a <= b && c <= d && format == D_NORMAL) 998 puts("---"); 999 } 1000 fetch(ixnew, c, d, f2, format == D_NORMAL ? "> " : "", 0); 1001 if ((format == D_EDIT || format == D_REVERSE) && c <= d) 1002 puts("."); 1003 if (inifdef) { 1004 printf("#endif /* %s */\n", ifdefname); 1005 inifdef = 0; 1006 } 1007 } 1008 1009 static void 1010 fetch(long *f, int a, int b, FILE *lb, char *s, int oldfile) 1011 { 1012 int i, j, c, col, nc; 1013 1014 /* 1015 * When doing #ifdef's, copy down to current line 1016 * if this is the first file, so that stuff makes it to output. 1017 */ 1018 if (format == D_IFDEF && oldfile) { 1019 long curpos = ftell(lb); 1020 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 1021 nc = f[a > b ? b : a - 1] - curpos; 1022 for (i = 0; i < nc; i++) 1023 putchar(getc(lb)); 1024 } 1025 if (a > b) 1026 return; 1027 if (format == D_IFDEF) { 1028 if (inifdef) { 1029 printf("#else /* %s%s */\n", 1030 oldfile == 1 ? "!" : "", ifdefname); 1031 } else { 1032 if (oldfile) 1033 printf("#ifndef %s\n", ifdefname); 1034 else 1035 printf("#ifdef %s\n", ifdefname); 1036 } 1037 inifdef = 1 + oldfile; 1038 } 1039 for (i = a; i <= b; i++) { 1040 fseek(lb, f[i - 1], SEEK_SET); 1041 nc = f[i] - f[i - 1]; 1042 if (format != D_IFDEF) 1043 fputs(s, stdout); 1044 col = 0; 1045 for (j = 0; j < nc; j++) { 1046 c = getc(lb); 1047 if (c == '\t' && tflag) 1048 do 1049 putchar(' '); 1050 while (++col & 7); 1051 else { 1052 putchar(c); 1053 col++; 1054 } 1055 } 1056 } 1057 } 1058 1059 #define POW2 /* define only if HALFLONG is 2**n */ 1060 #define HALFLONG 16 1061 #define low(x) (x&((1L<<HALFLONG)-1)) 1062 #define high(x) (x>>HALFLONG) 1063 1064 /* 1065 * hashing has the effect of 1066 * arranging line in 7-bit bytes and then 1067 * summing 1-s complement in 16-bit hunks 1068 */ 1069 static int 1070 readhash(FILE *f) 1071 { 1072 unsigned int shift; 1073 int t, space; 1074 long sum; 1075 1076 sum = 1; 1077 space = 0; 1078 if (!bflag && !wflag) { 1079 if (iflag) 1080 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 1081 if (t == -1) 1082 return (0); 1083 sum += (long)chrtran[t] << (shift 1084 #ifdef POW2 1085 &= HALFLONG - 1); 1086 #else 1087 %= HALFLONG); 1088 #endif 1089 } 1090 else 1091 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 1092 if (t == -1) 1093 return (0); 1094 sum += (long)t << (shift 1095 #ifdef POW2 1096 &= HALFLONG - 1); 1097 #else 1098 %= HALFLONG); 1099 #endif 1100 } 1101 } else { 1102 for (shift = 0;;) { 1103 switch (t = getc(f)) { 1104 case -1: 1105 return (0); 1106 case '\t': 1107 case ' ': 1108 space++; 1109 continue; 1110 default: 1111 if (space && !wflag) { 1112 shift += 7; 1113 space = 0; 1114 } 1115 sum += (long)chrtran[t] << (shift 1116 #ifdef POW2 1117 &= HALFLONG - 1); 1118 #else 1119 %= HALFLONG); 1120 #endif 1121 shift += 7; 1122 continue; 1123 case '\n': 1124 break; 1125 } 1126 break; 1127 } 1128 } 1129 sum = low(sum) + high(sum); 1130 return ((short) low(sum) + (short) high(sum)); 1131 } 1132 1133 int 1134 asciifile(FILE *f) 1135 { 1136 char buf[BUFSIZ], *cp; 1137 int cnt; 1138 1139 if (aflag || f == NULL) 1140 return (1); 1141 1142 rewind(f); 1143 cnt = fread(buf, 1, sizeof(buf), f); 1144 cp = buf; 1145 while (--cnt >= 0) 1146 if (*cp++ & 0200) 1147 return (0); 1148 return (1); 1149 } 1150 1151 static __inline int min(int a, int b) 1152 { 1153 return (a < b ? a : b); 1154 } 1155 1156 static __inline int max(int a, int b) 1157 { 1158 return (a > b ? a : b); 1159 } 1160 1161 /* dump accumulated "context" diff changes */ 1162 static void 1163 dump_context_vec(FILE *f1, FILE *f2) 1164 { 1165 struct context_vec *cvp = context_vec_start; 1166 int lowa, upb, lowc, upd, do_output; 1167 int a, b, c, d; 1168 char ch; 1169 1170 if (context_vec_start > context_vec_ptr) 1171 return; 1172 1173 b = d = 0; /* gcc */ 1174 lowa = max(1, cvp->a - context); 1175 upb = min(len[0], context_vec_ptr->b + context); 1176 lowc = max(1, cvp->c - context); 1177 upd = min(len[1], context_vec_ptr->d + context); 1178 1179 printf("***************\n*** "); 1180 range(lowa, upb, ","); 1181 printf(" ****\n"); 1182 1183 /* 1184 * Output changes to the "old" file. The first loop suppresses 1185 * output if there were no changes to the "old" file (we'll see 1186 * the "old" lines as context in the "new" list). 1187 */ 1188 do_output = 0; 1189 for (; cvp <= context_vec_ptr; cvp++) 1190 if (cvp->a <= cvp->b) { 1191 cvp = context_vec_start; 1192 do_output++; 1193 break; 1194 } 1195 if (do_output) { 1196 while (cvp <= context_vec_ptr) { 1197 a = cvp->a; 1198 b = cvp->b; 1199 c = cvp->c; 1200 d = cvp->d; 1201 1202 if (a <= b && c <= d) 1203 ch = 'c'; 1204 else 1205 ch = (a <= b) ? 'd' : 'a'; 1206 1207 if (ch == 'a') 1208 fetch(ixold, lowa, b, f1, " ", 0); 1209 else { 1210 fetch(ixold, lowa, a - 1, f1, " ", 0); 1211 fetch(ixold, a, b, f1, 1212 ch == 'c' ? "! " : "- ", 0); 1213 } 1214 lowa = b + 1; 1215 cvp++; 1216 } 1217 fetch(ixold, b + 1, upb, f1, " ", 0); 1218 } 1219 /* output changes to the "new" file */ 1220 printf("--- "); 1221 range(lowc, upd, ","); 1222 printf(" ----\n"); 1223 1224 do_output = 0; 1225 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1226 if (cvp->c <= cvp->d) { 1227 cvp = context_vec_start; 1228 do_output++; 1229 break; 1230 } 1231 if (do_output) { 1232 while (cvp <= context_vec_ptr) { 1233 a = cvp->a; 1234 b = cvp->b; 1235 c = cvp->c; 1236 d = cvp->d; 1237 1238 if (a <= b && c <= d) 1239 ch = 'c'; 1240 else 1241 ch = (a <= b) ? 'd' : 'a'; 1242 1243 if (ch == 'd') 1244 fetch(ixnew, lowc, d, f2, " ", 0); 1245 else { 1246 fetch(ixnew, lowc, c - 1, f2, " ", 0); 1247 fetch(ixnew, c, d, f2, 1248 ch == 'c' ? "! " : "+ ", 0); 1249 } 1250 lowc = d + 1; 1251 cvp++; 1252 } 1253 fetch(ixnew, d + 1, upd, f2, " ", 0); 1254 } 1255 context_vec_ptr = context_vec_start - 1; 1256 } 1257 1258 /* dump accumulated "unified" diff changes */ 1259 static void 1260 dump_unified_vec(FILE *f1, FILE *f2) 1261 { 1262 struct context_vec *cvp = context_vec_start; 1263 int lowa, upb, lowc, upd; 1264 int a, b, c, d; 1265 char ch; 1266 1267 if (context_vec_start > context_vec_ptr) 1268 return; 1269 1270 b = d = 0; /* gcc */ 1271 lowa = max(1, cvp->a - context); 1272 upb = min(len[0], context_vec_ptr->b + context); 1273 lowc = max(1, cvp->c - context); 1274 upd = min(len[1], context_vec_ptr->d + context); 1275 1276 fputs("@@ -", stdout); 1277 uni_range(lowa, upb); 1278 fputs(" +", stdout); 1279 uni_range(lowc, upd); 1280 fputs(" @@\n", stdout); 1281 1282 /* 1283 * Output changes in "unified" diff format--the old and new lines 1284 * are printed together. 1285 */ 1286 for (; cvp <= context_vec_ptr; cvp++) { 1287 a = cvp->a; 1288 b = cvp->b; 1289 c = cvp->c; 1290 d = cvp->d; 1291 1292 /* 1293 * c: both new and old changes 1294 * d: only changes in the old file 1295 * a: only changes in the new file 1296 */ 1297 if (a <= b && c <= d) 1298 ch = 'c'; 1299 else 1300 ch = (a <= b) ? 'd' : 'a'; 1301 1302 switch (ch) { 1303 case 'c': 1304 fetch(ixold, lowa, a - 1, f1, " ", 0); 1305 fetch(ixold, a, b, f1, "-", 0); 1306 fetch(ixnew, c, d, f2, "+", 0); 1307 break; 1308 case 'd': 1309 fetch(ixold, lowa, a - 1, f1, " ", 0); 1310 fetch(ixold, a, b, f1, "-", 0); 1311 break; 1312 case 'a': 1313 fetch(ixnew, lowc, c - 1, f2, " ", 0); 1314 fetch(ixnew, c, d, f2, "+", 0); 1315 break; 1316 } 1317 lowa = b + 1; 1318 lowc = d + 1; 1319 } 1320 fetch(ixnew, d + 1, upd, f2, " ", 0); 1321 1322 context_vec_ptr = context_vec_start - 1; 1323 } 1324