1 /* $OpenBSD: diffreg.c,v 1.35 2003/07/17 21:54:28 millert Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 /*- 37 * Copyright (c) 1991, 1993 38 * The Regents of the University of California. All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93 65 */ 66 67 #ifndef lint 68 static const char rcsid[] = "$OpenBSD: diffreg.c,v 1.35 2003/07/17 21:54:28 millert Exp $"; 69 #endif /* not lint */ 70 71 #include <sys/param.h> 72 #include <sys/stat.h> 73 #include <sys/wait.h> 74 75 #include <ctype.h> 76 #include <err.h> 77 #include <errno.h> 78 #include <fcntl.h> 79 #include <libgen.h> 80 #include <stdio.h> 81 #include <stdlib.h> 82 #include <string.h> 83 #include <unistd.h> 84 85 #include "diff.h" 86 #include "pathnames.h" 87 88 /* 89 * diff - compare two files. 90 */ 91 92 /* 93 * Uses an algorithm due to Harold Stone, which finds 94 * a pair of longest identical subsequences in the two 95 * files. 96 * 97 * The major goal is to generate the match vector J. 98 * J[i] is the index of the line in file1 corresponding 99 * to line i file0. J[i] = 0 if there is no 100 * such line in file1. 101 * 102 * Lines are hashed so as to work in core. All potential 103 * matches are located by sorting the lines of each file 104 * on the hash (called ``value''). In particular, this 105 * collects the equivalence classes in file1 together. 106 * Subroutine equiv replaces the value of each line in 107 * file0 by the index of the first element of its 108 * matching equivalence in (the reordered) file1. 109 * To save space equiv squeezes file1 into a single 110 * array member in which the equivalence classes 111 * are simply concatenated, except that their first 112 * members are flagged by changing sign. 113 * 114 * Next the indices that point into member are unsorted into 115 * array class according to the original order of file0. 116 * 117 * The cleverness lies in routine stone. This marches 118 * through the lines of file0, developing a vector klist 119 * of "k-candidates". At step i a k-candidate is a matched 120 * pair of lines x,y (x in file0 y in file1) such that 121 * there is a common subsequence of length k 122 * between the first i lines of file0 and the first y 123 * lines of file1, but there is no such subsequence for 124 * any smaller y. x is the earliest possible mate to y 125 * that occurs in such a subsequence. 126 * 127 * Whenever any of the members of the equivalence class of 128 * lines in file1 matable to a line in file0 has serial number 129 * less than the y of some k-candidate, that k-candidate 130 * with the smallest such y is replaced. The new 131 * k-candidate is chained (via pred) to the current 132 * k-1 candidate so that the actual subsequence can 133 * be recovered. When a member has serial number greater 134 * that the y of all k-candidates, the klist is extended. 135 * At the end, the longest subsequence is pulled out 136 * and placed in the array J by unravel 137 * 138 * With J in hand, the matches there recorded are 139 * check'ed against reality to assure that no spurious 140 * matches have crept in due to hashing. If they have, 141 * they are broken, and "jackpot" is recorded--a harmless 142 * matter except that a true match for a spuriously 143 * mated line may now be unnecessarily reported as a change. 144 * 145 * Much of the complexity of the program comes simply 146 * from trying to minimize core utilization and 147 * maximize the range of doable problems by dynamically 148 * allocating what is needed and reusing what is not. 149 * The core requirements for problems larger than somewhat 150 * are (in words) 2*length(file0) + length(file1) + 151 * 3*(number of k-candidates installed), typically about 152 * 6n words for files of length n. 153 */ 154 155 struct cand { 156 int x; 157 int y; 158 int pred; 159 } cand; 160 161 struct line { 162 int serial; 163 int value; 164 } *file[2]; 165 166 static int *J; /* will be overlaid on class */ 167 static int *class; /* will be overlaid on file[0] */ 168 static int *klist; /* will be overlaid on file[0] after class */ 169 static int *member; /* will be overlaid on file[1] */ 170 static int clen; 171 static int inifdef; /* whether or not we are in a #ifdef block */ 172 static int len[2]; 173 static int pref, suff; /* length of prefix and suffix */ 174 static int slen[2]; 175 static int anychange; 176 static long *ixnew; /* will be overlaid on file[1] */ 177 static long *ixold; /* will be overlaid on klist */ 178 static struct cand *clist; /* merely a free storage pot for candidates */ 179 static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ 180 static u_char *chrtran; /* translation table for case-folding */ 181 182 static FILE *opentemp(const char *); 183 static void fetch(long *, int, int, FILE *, char *, int); 184 static void output(char *, FILE *, char *, FILE *); 185 static void check(char *, FILE *, char *, FILE *); 186 static void range(int, int, char *); 187 static void uni_range(int, int); 188 static void dump_context_vec(FILE *, FILE *); 189 static void dump_unified_vec(FILE *, FILE *); 190 static void prepare(int, FILE *); 191 static void prune(void); 192 static void equiv(struct line *, int, struct line *, int, int *); 193 static void unravel(int); 194 static void unsort(struct line *, int, int *); 195 static void change(char *, FILE *, char *, FILE *, int, int, int, int); 196 static void sort(struct line *, int); 197 static int asciifile(FILE *); 198 static int newcand(int, int, int); 199 static int search(int *, int, int); 200 static int skipline(FILE *); 201 static int stone(int *, int, int *, int *); 202 static int readhash(FILE *); 203 static int files_differ(FILE *, FILE *, int); 204 205 /* 206 * chrtran points to one of 2 translation tables: cup2low if folding upper to 207 * lower case clow2low if not folding case 208 */ 209 u_char clow2low[256] = { 210 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 211 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 212 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 213 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 214 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 215 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 216 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 217 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 218 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 219 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 220 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 221 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 222 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 223 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 224 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 225 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 226 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 227 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 228 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 229 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 230 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 231 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 232 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 233 0xfd, 0xfe, 0xff 234 }; 235 236 u_char cup2low[256] = { 237 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 238 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 239 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 240 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 241 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 242 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 243 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 244 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 245 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 246 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 247 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 248 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 249 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 250 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 251 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 252 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 253 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 254 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 255 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 256 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 257 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 258 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 259 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 260 0xfd, 0xfe, 0xff 261 }; 262 263 int 264 diffreg(char *ofile1, char *ofile2, int flags) 265 { 266 char *file1 = ofile1; 267 char *file2 = ofile2; 268 FILE *f1 = NULL; 269 FILE *f2 = NULL; 270 int rval = D_SAME; 271 int i, ostdout = -1; 272 pid_t pid = -1; 273 274 anychange = 0; 275 chrtran = (iflag ? cup2low : clow2low); 276 if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) 277 return (D_MISMATCH); 278 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) 279 goto notsame; 280 281 if (flags & D_EMPTY1) 282 f1 = fopen(_PATH_DEVNULL, "r"); 283 else { 284 if (!S_ISREG(stb1.st_mode)) { 285 if ((f1 = opentemp(file1)) == NULL || 286 fstat(fileno(f1), &stb1) < 0) { 287 warn("%s", file1); 288 status |= 2; 289 goto closem; 290 } 291 } else if (strcmp(file1, "-") == 0) 292 f1 = stdin; 293 else 294 f1 = fopen(file1, "r"); 295 } 296 if (f1 == NULL) { 297 warn("%s", file1); 298 status |= 2; 299 goto closem; 300 } 301 302 if (flags & D_EMPTY2) 303 f2 = fopen(_PATH_DEVNULL, "r"); 304 else { 305 if (!S_ISREG(stb2.st_mode)) { 306 if ((f2 = opentemp(file2)) == NULL || 307 fstat(fileno(f2), &stb2) < 0) { 308 warn("%s", file2); 309 status |= 2; 310 goto closem; 311 } 312 } else if (strcmp(file2, "-") == 0) 313 f2 = stdin; 314 else 315 f2 = fopen(file2, "r"); 316 } 317 if (f2 == NULL) { 318 warn("%s", file2); 319 status |= 2; 320 goto closem; 321 } 322 323 switch (files_differ(f1, f2, flags)) { 324 case 0: 325 goto closem; 326 case 1: 327 break; 328 default: 329 /* error */ 330 status |= 2; 331 goto closem; 332 } 333 334 notsame: 335 /* 336 * Files certainly differ at this point; set status accordingly 337 */ 338 status |= 1; 339 rval = D_DIFFER; 340 if (!asciifile(f1) || !asciifile(f2)) { 341 rval = D_BINARY; 342 goto closem; 343 } 344 if (format == D_BRIEF) 345 goto closem; 346 if (lflag) { 347 /* redirect stdout to pr */ 348 int pfd[2]; 349 char *header; 350 char *prargv[] = { "pr", "-h", NULL, "-f", NULL }; 351 352 easprintf(&header, "%s %s %s", diffargs, file1, file2); 353 prargv[2] = header; 354 fflush(stdout); 355 rewind(stdout); 356 pipe(pfd); 357 switch ((pid = fork())) { 358 case -1: 359 warnx("No more processes"); 360 status |= 2; 361 free(header); 362 return (D_ERROR); 363 case 0: 364 /* child */ 365 if (pfd[0] != STDIN_FILENO) { 366 dup2(pfd[0], STDIN_FILENO); 367 close(pfd[0]); 368 } 369 close(pfd[1]); 370 execv(_PATH_PR, prargv); 371 _exit(127); 372 default: 373 /* parent */ 374 if (pfd[1] != STDOUT_FILENO) { 375 ostdout = dup(STDOUT_FILENO); 376 dup2(pfd[1], STDOUT_FILENO); 377 close(pfd[1]); 378 } 379 close(pfd[0]); 380 rewind(stdout); 381 free(header); 382 } 383 } else { 384 if (flags & D_HEADER) { 385 if (format == D_EDIT) 386 printf("ed - %s << '-*-END-*-'\n", 387 basename(file1)); 388 else 389 printf("%s %s %s\n", diffargs, file1, file2); 390 } 391 } 392 prepare(0, f1); 393 prepare(1, f2); 394 prune(); 395 sort(sfile[0], slen[0]); 396 sort(sfile[1], slen[1]); 397 398 member = (int *)file[1]; 399 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 400 member = erealloc(member, (slen[1] + 2) * sizeof(int)); 401 402 class = (int *)file[0]; 403 unsort(sfile[0], slen[0], class); 404 class = erealloc(class, (slen[0] + 2) * sizeof(int)); 405 406 klist = emalloc((slen[0] + 2) * sizeof(int)); 407 clist = emalloc(sizeof(cand)); 408 i = stone(class, slen[0], member, klist); 409 free(member); 410 free(class); 411 412 J = erealloc(J, (len[0] + 2) * sizeof(int)); 413 unravel(klist[i]); 414 free(clist); 415 free(klist); 416 417 ixold = erealloc(ixold, (len[0] + 2) * sizeof(long)); 418 ixnew = erealloc(ixnew, (len[1] + 2) * sizeof(long)); 419 check(file1, f1, file2, f2); 420 output(file1, f1, file2, f2); 421 if (ostdout != -1) { 422 int wstatus; 423 424 /* close the pipe to pr and restore stdout */ 425 fflush(stdout); 426 rewind(stdout); 427 if (ostdout != STDOUT_FILENO) { 428 close(STDOUT_FILENO); 429 dup2(ostdout, STDOUT_FILENO); 430 close(ostdout); 431 } 432 waitpid(pid, &wstatus, 0); 433 } else if ((flags & D_HEADER) && format == D_EDIT) 434 printf("w\nq\n-*-END-*-\n"); 435 closem: 436 if (f1 != NULL) 437 fclose(f1); 438 if (f2 != NULL) 439 fclose(f2); 440 if (file1 != ofile1) 441 free(file1); 442 if (file2 != ofile2) 443 free(file2); 444 return (rval); 445 } 446 447 /* 448 * Check to see if the given files differ. 449 * Returns 0 if they are the same, 1 if different, and -1 on error. 450 * XXX - could use code from cmp(1) [faster] 451 */ 452 static int 453 files_differ(FILE *f1, FILE *f2, int flags) 454 { 455 char buf1[BUFSIZ], buf2[BUFSIZ]; 456 size_t i, j; 457 458 if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size || 459 (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT)) 460 return (1); 461 for (;;) { 462 i = fread(buf1, 1, sizeof(buf1), f1); 463 j = fread(buf2, 1, sizeof(buf2), f2); 464 if (i != j) 465 return (1); 466 if (i == 0 && j == 0) { 467 if (ferror(f1) || ferror(f2)) 468 return (1); 469 return (0); 470 } 471 if (memcmp(buf1, buf2, i) != 0) 472 return (1); 473 } 474 } 475 476 static FILE * 477 opentemp(const char *file) 478 { 479 char buf[BUFSIZ], *tempdir, tempfile[MAXPATHLEN]; 480 ssize_t nread; 481 int ifd, ofd; 482 483 if (strcmp(file, "-") == 0) 484 ifd = STDIN_FILENO; 485 else if ((ifd = open(file, O_RDONLY, 0644)) < 0) 486 return (NULL); 487 488 if ((tempdir = getenv("TMPDIR")) == NULL) 489 tempdir = _PATH_TMP; 490 if (snprintf(tempfile, sizeof(tempfile), "%s/diff.XXXXXXXX", 491 tempdir) >= sizeof(tempfile)) { 492 close(ifd); 493 errno = ENAMETOOLONG; 494 return (NULL); 495 } 496 497 if ((ofd = mkstemp(tempfile)) < 0) 498 return (NULL); 499 unlink(tempfile); 500 while ((nread = read(ifd, buf, BUFSIZ)) > 0) { 501 if (write(ofd, buf, nread) != nread) { 502 close(ifd); 503 close(ofd); 504 return (NULL); 505 } 506 } 507 close(ifd); 508 return (fdopen(ofd, "r")); 509 } 510 511 char * 512 splice(char *dir, char *file) 513 { 514 char *tail, *buf; 515 516 if ((tail = strrchr(file, '/')) == NULL) 517 tail = file; 518 else 519 tail++; 520 easprintf(&buf, "%s/%s", dir, tail); 521 return (buf); 522 } 523 524 static void 525 prepare(int i, FILE *fd) 526 { 527 struct line *p; 528 int j, h; 529 530 rewind(fd); 531 p = emalloc(3 * sizeof(struct line)); 532 for (j = 0; (h = readhash(fd));) { 533 p = erealloc(p, (++j + 3) * sizeof(struct line)); 534 p[j].value = h; 535 } 536 len[i] = j; 537 file[i] = p; 538 } 539 540 static void 541 prune(void) 542 { 543 int i, j; 544 545 for (pref = 0; pref < len[0] && pref < len[1] && 546 file[0][pref + 1].value == file[1][pref + 1].value; 547 pref++) 548 ; 549 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 550 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 551 suff++) 552 ; 553 for (j = 0; j < 2; j++) { 554 sfile[j] = file[j] + pref; 555 slen[j] = len[j] - pref - suff; 556 for (i = 0; i <= slen[j]; i++) 557 sfile[j][i].serial = i; 558 } 559 } 560 561 static void 562 equiv(struct line *a, int n, struct line *b, int m, int *c) 563 { 564 int i, j; 565 566 i = j = 1; 567 while (i <= n && j <= m) { 568 if (a[i].value < b[j].value) 569 a[i++].value = 0; 570 else if (a[i].value == b[j].value) 571 a[i++].value = j; 572 else 573 j++; 574 } 575 while (i <= n) 576 a[i++].value = 0; 577 b[m + 1].value = 0; 578 j = 0; 579 while (++j <= m) { 580 c[j] = -b[j].serial; 581 while (b[j + 1].value == b[j].value) { 582 j++; 583 c[j] = b[j].serial; 584 } 585 } 586 c[j] = -1; 587 } 588 589 static int 590 stone(int *a, int n, int *b, int *c) 591 { 592 int i, k, y, j, l; 593 int oldc, tc, oldl; 594 595 k = 0; 596 c[0] = newcand(0, 0, 0); 597 for (i = 1; i <= n; i++) { 598 j = a[i]; 599 if (j == 0) 600 continue; 601 y = -b[j]; 602 oldl = 0; 603 oldc = c[0]; 604 do { 605 if (y <= clist[oldc].y) 606 continue; 607 l = search(c, k, y); 608 if (l != oldl + 1) 609 oldc = c[l - 1]; 610 if (l <= k) { 611 if (clist[c[l]].y <= y) 612 continue; 613 tc = c[l]; 614 c[l] = newcand(i, y, oldc); 615 oldc = tc; 616 oldl = l; 617 } else { 618 c[l] = newcand(i, y, oldc); 619 k++; 620 break; 621 } 622 } while ((y = b[++j]) > 0); 623 } 624 return (k); 625 } 626 627 static int 628 newcand(int x, int y, int pred) 629 { 630 struct cand *q; 631 632 clist = erealloc(clist, ++clen * sizeof(cand)); 633 q = clist + clen - 1; 634 q->x = x; 635 q->y = y; 636 q->pred = pred; 637 return (clen - 1); 638 } 639 640 static int 641 search(int *c, int k, int y) 642 { 643 int i, j, l, t; 644 645 if (clist[c[k]].y < y) /* quick look for typical case */ 646 return (k + 1); 647 i = 0; 648 j = k + 1; 649 while (1) { 650 l = i + j; 651 if ((l >>= 1) <= i) 652 break; 653 t = clist[c[l]].y; 654 if (t > y) 655 j = l; 656 else if (t < y) 657 i = l; 658 else 659 return (l); 660 } 661 return (l + 1); 662 } 663 664 static void 665 unravel(int p) 666 { 667 struct cand *q; 668 int i; 669 670 for (i = 0; i <= len[0]; i++) 671 J[i] = i <= pref ? i : 672 i > len[0] - suff ? i + len[1] - len[0] : 0; 673 for (q = clist + p; q->y != 0; q = clist + q->pred) 674 J[q->x + pref] = q->y + pref; 675 } 676 677 /* 678 * Check does double duty: 679 * 1. ferret out any fortuitous correspondences due 680 * to confounding by hashing (which result in "jackpot") 681 * 2. collect random access indexes to the two files 682 */ 683 static void 684 check(char *file1, FILE *f1, char *file2, FILE *f2) 685 { 686 int i, j, jackpot, c, d; 687 long ctold, ctnew; 688 689 rewind(f1); 690 rewind(f2); 691 j = 1; 692 ixold[0] = ixnew[0] = 0; 693 jackpot = 0; 694 ctold = ctnew = 0; 695 for (i = 1; i <= len[0]; i++) { 696 if (J[i] == 0) { 697 ixold[i] = ctold += skipline(f1); 698 continue; 699 } 700 while (j < J[i]) { 701 ixnew[j] = ctnew += skipline(f2); 702 j++; 703 } 704 if (bflag || wflag || iflag) { 705 for (;;) { 706 c = getc(f1); 707 d = getc(f2); 708 /* 709 * GNU diff ignores a missing newline 710 * in one file if bflag || wflag. 711 */ 712 if ((bflag || wflag) && 713 ((c == EOF && d == '\n') || 714 (c == '\n' && d == EOF))) { 715 break; 716 } 717 ctold++; 718 ctnew++; 719 if (bflag && isspace(c) && isspace(d)) { 720 do { 721 if (c == '\n') 722 break; 723 ctold++; 724 } while (isspace(c = getc(f1))); 725 do { 726 if (d == '\n') 727 break; 728 ctnew++; 729 } while (isspace(d = getc(f2))); 730 } else if (wflag) { 731 while (isspace(c) && c != '\n') { 732 c = getc(f1); 733 ctold++; 734 } 735 while (isspace(d) && d != '\n') { 736 d = getc(f2); 737 ctnew++; 738 } 739 } 740 if (chrtran[c] != chrtran[d]) { 741 jackpot++; 742 J[i] = 0; 743 if (c != '\n' && c != EOF) 744 ctold += skipline(f1); 745 if (d != '\n' && c != EOF) 746 ctnew += skipline(f2); 747 break; 748 } 749 if (c == '\n' || c == EOF) 750 break; 751 } 752 } else { 753 for (;;) { 754 ctold++; 755 ctnew++; 756 if ((c = getc(f1)) != (d = getc(f2))) { 757 /* jackpot++; */ 758 J[i] = 0; 759 if (c != '\n' && c != EOF) 760 ctold += skipline(f1); 761 if (d != '\n' && c != EOF) 762 ctnew += skipline(f2); 763 break; 764 } 765 if (c == '\n' || c == EOF) 766 break; 767 } 768 } 769 ixold[i] = ctold; 770 ixnew[j] = ctnew; 771 j++; 772 } 773 for (; j <= len[1]; j++) 774 ixnew[j] = ctnew += skipline(f2); 775 /* 776 * if (jackpot) 777 * fprintf(stderr, "jackpot\n"); 778 */ 779 } 780 781 /* shellsort CACM #201 */ 782 static void 783 sort(struct line *a, int n) 784 { 785 struct line *ai, *aim, w; 786 int j, m = 0, k; 787 788 if (n == 0) 789 return; 790 for (j = 1; j <= n; j *= 2) 791 m = 2 * j - 1; 792 for (m /= 2; m != 0; m /= 2) { 793 k = n - m; 794 for (j = 1; j <= k; j++) { 795 for (ai = &a[j]; ai > a; ai -= m) { 796 aim = &ai[m]; 797 if (aim < ai) 798 break; /* wraparound */ 799 if (aim->value > ai[0].value || 800 (aim->value == ai[0].value && 801 aim->serial > ai[0].serial)) 802 break; 803 w.value = ai[0].value; 804 ai[0].value = aim->value; 805 aim->value = w.value; 806 w.serial = ai[0].serial; 807 ai[0].serial = aim->serial; 808 aim->serial = w.serial; 809 } 810 } 811 } 812 } 813 814 static void 815 unsort(struct line *f, int l, int *b) 816 { 817 int *a, i; 818 819 a = emalloc((l + 1) * sizeof(int)); 820 for (i = 1; i <= l; i++) 821 a[f[i].serial] = f[i].value; 822 for (i = 1; i <= l; i++) 823 b[i] = a[i]; 824 free(a); 825 } 826 827 static int 828 skipline(FILE *f) 829 { 830 int i, c; 831 832 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++) 833 continue; 834 return (i); 835 } 836 837 static void 838 output(char *file1, FILE *f1, char *file2, FILE *f2) 839 { 840 int m, i0, i1, j0, j1; 841 842 rewind(f1); 843 rewind(f2); 844 m = len[0]; 845 J[0] = 0; 846 J[m + 1] = len[1] + 1; 847 if (format != D_EDIT) { 848 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 849 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 850 i0++; 851 j0 = J[i0 - 1] + 1; 852 i1 = i0 - 1; 853 while (i1 < m && J[i1 + 1] == 0) 854 i1++; 855 j1 = J[i1 + 1] - 1; 856 J[i1] = j1; 857 change(file1, f1, file2, f2, i0, i1, j0, j1); 858 } 859 } else { 860 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 861 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 862 i0--; 863 j0 = J[i0 + 1] - 1; 864 i1 = i0 + 1; 865 while (i1 > 1 && J[i1 - 1] == 0) 866 i1--; 867 j1 = J[i1 - 1] + 1; 868 J[i1] = j1; 869 change(file1, f1, file2, f2, i1, i0, j1, j0); 870 } 871 } 872 if (m == 0) 873 change(file1, f1, file2, f2, 1, 0, 1, len[1]); 874 if (format == D_IFDEF) { 875 for (;;) { 876 #define c i0 877 if ((c = getc(f1)) == EOF) 878 return; 879 putchar(c); 880 } 881 #undef c 882 } 883 if (anychange != 0) { 884 if (format == D_CONTEXT) 885 dump_context_vec(f1, f2); 886 else if (format == D_UNIFIED) 887 dump_unified_vec(f1, f2); 888 } 889 } 890 891 static __inline void 892 range(int a, int b, char *separator) 893 { 894 printf("%d", a > b ? b : a); 895 if (a < b) 896 printf("%s%d", separator, b); 897 } 898 899 static __inline void 900 uni_range(int a, int b) 901 { 902 if (a < b) 903 printf("%d,%d", a, b - a + 1); 904 else if (a == b) 905 printf("%d", b); 906 else 907 printf("%d,0", b); 908 } 909 910 /* 911 * The following struct is used to record change information when 912 * doing a "context" or "unified" diff. (see routine "change" to 913 * understand the highly mnemonic field names) 914 */ 915 struct context_vec { 916 int a; /* start line in old file */ 917 int b; /* end line in old file */ 918 int c; /* start line in new file */ 919 int d; /* end line in new file */ 920 }; 921 922 struct context_vec *context_vec_start, *context_vec_end, *context_vec_ptr; 923 924 #define MAX_CONTEXT 128 925 926 /* 927 * Indicate that there is a difference between lines a and b of the from file 928 * to get to lines c to d of the to file. If a is greater then b then there 929 * are no lines in the from file involved and this means that there were 930 * lines appended (beginning at b). If c is greater than d then there are 931 * lines missing from the to file. 932 */ 933 static void 934 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d) 935 { 936 if (format != D_IFDEF && a > b && c > d) 937 return; 938 if (anychange == 0) { 939 anychange = 1; 940 if (format == D_CONTEXT || format == D_UNIFIED) { 941 printf("%s %s %s", format == D_CONTEXT ? "***" : "---", 942 file1, ctime(&stb1.st_mtime)); 943 printf("%s %s %s", format == D_CONTEXT ? "---" : "+++", 944 file2, ctime(&stb2.st_mtime)); 945 if (context_vec_start == NULL) 946 context_vec_start = emalloc(MAX_CONTEXT * 947 sizeof(struct context_vec)); 948 context_vec_end = context_vec_start + MAX_CONTEXT; 949 context_vec_ptr = context_vec_start - 1; 950 } 951 } 952 if (format == D_CONTEXT || format == D_UNIFIED) { 953 /* 954 * If this new change is within 'context' lines of 955 * the previous change, just add it to the change 956 * record. If the record is full or if this 957 * change is more than 'context' lines from the previous 958 * change, dump the record, reset it & add the new change. 959 */ 960 if (context_vec_ptr >= context_vec_end || 961 (context_vec_ptr >= context_vec_start && 962 a > (context_vec_ptr->b + 2 * context) && 963 c > (context_vec_ptr->d + 2 * context))) { 964 if (format == D_CONTEXT) 965 dump_context_vec(f1, f2); 966 else 967 dump_unified_vec(f1, f2); 968 } 969 context_vec_ptr++; 970 context_vec_ptr->a = a; 971 context_vec_ptr->b = b; 972 context_vec_ptr->c = c; 973 context_vec_ptr->d = d; 974 return; 975 } 976 switch (format) { 977 978 case D_NORMAL: 979 case D_EDIT: 980 range(a, b, ","); 981 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 982 if (format == D_NORMAL) 983 range(c, d, ","); 984 putchar('\n'); 985 break; 986 case D_REVERSE: 987 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 988 range(a, b, " "); 989 putchar('\n'); 990 break; 991 case D_NREVERSE: 992 if (a > b) 993 printf("a%d %d\n", b, d - c + 1); 994 else { 995 printf("d%d %d\n", a, b - a + 1); 996 if (!(c > d)) 997 /* add changed lines */ 998 printf("a%d %d\n", b, d - c + 1); 999 } 1000 break; 1001 } 1002 if (format == D_NORMAL || format == D_IFDEF) { 1003 fetch(ixold, a, b, f1, "< ", 1); 1004 if (a <= b && c <= d && format == D_NORMAL) 1005 puts("---"); 1006 } 1007 fetch(ixnew, c, d, f2, format == D_NORMAL ? "> " : "", 0); 1008 if ((format == D_EDIT || format == D_REVERSE) && c <= d) 1009 puts("."); 1010 if (inifdef) { 1011 printf("#endif /* %s */\n", ifdefname); 1012 inifdef = 0; 1013 } 1014 } 1015 1016 static void 1017 fetch(long *f, int a, int b, FILE *lb, char *s, int oldfile) 1018 { 1019 int i, j, c, col, nc; 1020 1021 /* 1022 * When doing #ifdef's, copy down to current line 1023 * if this is the first file, so that stuff makes it to output. 1024 */ 1025 if (format == D_IFDEF && oldfile) { 1026 long curpos = ftell(lb); 1027 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 1028 nc = f[a > b ? b : a - 1] - curpos; 1029 for (i = 0; i < nc; i++) 1030 putchar(getc(lb)); 1031 } 1032 if (a > b) 1033 return; 1034 if (format == D_IFDEF) { 1035 if (inifdef) { 1036 printf("#else /* %s%s */\n", 1037 oldfile == 1 ? "!" : "", ifdefname); 1038 } else { 1039 if (oldfile) 1040 printf("#ifndef %s\n", ifdefname); 1041 else 1042 printf("#ifdef %s\n", ifdefname); 1043 } 1044 inifdef = 1 + oldfile; 1045 } 1046 for (i = a; i <= b; i++) { 1047 fseek(lb, f[i - 1], SEEK_SET); 1048 nc = f[i] - f[i - 1]; 1049 if (format != D_IFDEF) 1050 fputs(s, stdout); 1051 col = 0; 1052 for (j = 0; j < nc; j++) { 1053 if ((c = getc(lb)) == EOF) { 1054 puts("\n\\ No newline at end of file"); 1055 return; 1056 } 1057 if (c == '\t' && tflag) { 1058 do { 1059 putchar(' '); 1060 } while (++col & 7); 1061 } else { 1062 putchar(c); 1063 col++; 1064 } 1065 } 1066 } 1067 } 1068 1069 #define HASHMASK (16 - 1) /* for masking out 16 bytes */ 1070 1071 /* 1072 * hashing has the effect of 1073 * arranging line in 7-bit bytes and then 1074 * summing 1-s complement in 16-bit hunks 1075 */ 1076 static int 1077 readhash(FILE *f) 1078 { 1079 unsigned int shift; 1080 int t, space; 1081 long sum; 1082 1083 sum = 1; 1084 space = 0; 1085 if (!bflag && !wflag) { 1086 if (iflag) 1087 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 1088 if (t == EOF) { 1089 if (shift == 0) 1090 return (0); 1091 break; 1092 } 1093 sum += (long)chrtran[t] << (shift &= HASHMASK); 1094 } 1095 else 1096 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 1097 if (t == EOF) { 1098 if (shift == 0) 1099 return (0); 1100 break; 1101 } 1102 sum += (long)t << (shift &= HASHMASK); 1103 } 1104 } else { 1105 for (shift = 0;;) { 1106 switch (t = getc(f)) { 1107 case '\t': 1108 case ' ': 1109 space++; 1110 continue; 1111 default: 1112 if (space && !wflag) { 1113 shift += 7; 1114 space = 0; 1115 } 1116 sum += (long)chrtran[t] << (shift &= HASHMASK); 1117 shift += 7; 1118 continue; 1119 case EOF: 1120 if (shift == 0) 1121 return (0); 1122 /* FALLTHROUGH */ 1123 case '\n': 1124 break; 1125 } 1126 break; 1127 } 1128 } 1129 return (sum); 1130 } 1131 1132 int 1133 asciifile(FILE *f) 1134 { 1135 char buf[BUFSIZ], *cp; 1136 int cnt; 1137 1138 if (aflag || f == NULL) 1139 return (1); 1140 1141 rewind(f); 1142 cnt = fread(buf, 1, sizeof(buf), f); 1143 cp = buf; 1144 while (--cnt >= 0) 1145 if (*cp++ & 0200) 1146 return (0); 1147 return (1); 1148 } 1149 1150 static __inline int min(int a, int b) 1151 { 1152 return (a < b ? a : b); 1153 } 1154 1155 static __inline int max(int a, int b) 1156 { 1157 return (a > b ? a : b); 1158 } 1159 1160 /* dump accumulated "context" diff changes */ 1161 static void 1162 dump_context_vec(FILE *f1, FILE *f2) 1163 { 1164 struct context_vec *cvp = context_vec_start; 1165 int lowa, upb, lowc, upd, do_output; 1166 int a, b, c, d; 1167 char ch; 1168 1169 if (context_vec_start > context_vec_ptr) 1170 return; 1171 1172 b = d = 0; /* gcc */ 1173 lowa = max(1, cvp->a - context); 1174 upb = min(len[0], context_vec_ptr->b + context); 1175 lowc = max(1, cvp->c - context); 1176 upd = min(len[1], context_vec_ptr->d + context); 1177 1178 printf("***************\n*** "); 1179 range(lowa, upb, ","); 1180 printf(" ****\n"); 1181 1182 /* 1183 * Output changes to the "old" file. The first loop suppresses 1184 * output if there were no changes to the "old" file (we'll see 1185 * the "old" lines as context in the "new" list). 1186 */ 1187 do_output = 0; 1188 for (; cvp <= context_vec_ptr; cvp++) 1189 if (cvp->a <= cvp->b) { 1190 cvp = context_vec_start; 1191 do_output++; 1192 break; 1193 } 1194 if (do_output) { 1195 while (cvp <= context_vec_ptr) { 1196 a = cvp->a; 1197 b = cvp->b; 1198 c = cvp->c; 1199 d = cvp->d; 1200 1201 if (a <= b && c <= d) 1202 ch = 'c'; 1203 else 1204 ch = (a <= b) ? 'd' : 'a'; 1205 1206 if (ch == 'a') 1207 fetch(ixold, lowa, b, f1, " ", 0); 1208 else { 1209 fetch(ixold, lowa, a - 1, f1, " ", 0); 1210 fetch(ixold, a, b, f1, 1211 ch == 'c' ? "! " : "- ", 0); 1212 } 1213 lowa = b + 1; 1214 cvp++; 1215 } 1216 fetch(ixold, b + 1, upb, f1, " ", 0); 1217 } 1218 /* output changes to the "new" file */ 1219 printf("--- "); 1220 range(lowc, upd, ","); 1221 printf(" ----\n"); 1222 1223 do_output = 0; 1224 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1225 if (cvp->c <= cvp->d) { 1226 cvp = context_vec_start; 1227 do_output++; 1228 break; 1229 } 1230 if (do_output) { 1231 while (cvp <= context_vec_ptr) { 1232 a = cvp->a; 1233 b = cvp->b; 1234 c = cvp->c; 1235 d = cvp->d; 1236 1237 if (a <= b && c <= d) 1238 ch = 'c'; 1239 else 1240 ch = (a <= b) ? 'd' : 'a'; 1241 1242 if (ch == 'd') 1243 fetch(ixnew, lowc, d, f2, " ", 0); 1244 else { 1245 fetch(ixnew, lowc, c - 1, f2, " ", 0); 1246 fetch(ixnew, c, d, f2, 1247 ch == 'c' ? "! " : "+ ", 0); 1248 } 1249 lowc = d + 1; 1250 cvp++; 1251 } 1252 fetch(ixnew, d + 1, upd, f2, " ", 0); 1253 } 1254 context_vec_ptr = context_vec_start - 1; 1255 } 1256 1257 /* dump accumulated "unified" diff changes */ 1258 static void 1259 dump_unified_vec(FILE *f1, FILE *f2) 1260 { 1261 struct context_vec *cvp = context_vec_start; 1262 int lowa, upb, lowc, upd; 1263 int a, b, c, d; 1264 char ch; 1265 1266 if (context_vec_start > context_vec_ptr) 1267 return; 1268 1269 b = d = 0; /* gcc */ 1270 lowa = max(1, cvp->a - context); 1271 upb = min(len[0], context_vec_ptr->b + context); 1272 lowc = max(1, cvp->c - context); 1273 upd = min(len[1], context_vec_ptr->d + context); 1274 1275 fputs("@@ -", stdout); 1276 uni_range(lowa, upb); 1277 fputs(" +", stdout); 1278 uni_range(lowc, upd); 1279 fputs(" @@\n", stdout); 1280 1281 /* 1282 * Output changes in "unified" diff format--the old and new lines 1283 * are printed together. 1284 */ 1285 for (; cvp <= context_vec_ptr; cvp++) { 1286 a = cvp->a; 1287 b = cvp->b; 1288 c = cvp->c; 1289 d = cvp->d; 1290 1291 /* 1292 * c: both new and old changes 1293 * d: only changes in the old file 1294 * a: only changes in the new file 1295 */ 1296 if (a <= b && c <= d) 1297 ch = 'c'; 1298 else 1299 ch = (a <= b) ? 'd' : 'a'; 1300 1301 switch (ch) { 1302 case 'c': 1303 fetch(ixold, lowa, a - 1, f1, " ", 0); 1304 fetch(ixold, a, b, f1, "-", 0); 1305 fetch(ixnew, c, d, f2, "+", 0); 1306 break; 1307 case 'd': 1308 fetch(ixold, lowa, a - 1, f1, " ", 0); 1309 fetch(ixold, a, b, f1, "-", 0); 1310 break; 1311 case 'a': 1312 fetch(ixnew, lowc, c - 1, f2, " ", 0); 1313 fetch(ixnew, c, d, f2, "+", 0); 1314 break; 1315 } 1316 lowa = b + 1; 1317 lowc = d + 1; 1318 } 1319 fetch(ixnew, d + 1, upd, f2, " ", 0); 1320 1321 context_vec_ptr = context_vec_start - 1; 1322 } 1323