1 /* $OpenBSD: diffreg.c,v 1.36 2003/07/21 15:56:48 millert Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 /*- 37 * Copyright (c) 1991, 1993 38 * The Regents of the University of California. All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93 65 */ 66 67 #ifndef lint 68 static const char rcsid[] = "$OpenBSD: diffreg.c,v 1.36 2003/07/21 15:56:48 millert Exp $"; 69 #endif /* not lint */ 70 71 #include <sys/param.h> 72 #include <sys/stat.h> 73 #include <sys/wait.h> 74 75 #include <ctype.h> 76 #include <err.h> 77 #include <errno.h> 78 #include <fcntl.h> 79 #include <libgen.h> 80 #include <stddef.h> 81 #include <stdio.h> 82 #include <stdlib.h> 83 #include <string.h> 84 #include <unistd.h> 85 86 #include "diff.h" 87 #include "pathnames.h" 88 89 /* 90 * diff - compare two files. 91 */ 92 93 /* 94 * Uses an algorithm due to Harold Stone, which finds 95 * a pair of longest identical subsequences in the two 96 * files. 97 * 98 * The major goal is to generate the match vector J. 99 * J[i] is the index of the line in file1 corresponding 100 * to line i file0. J[i] = 0 if there is no 101 * such line in file1. 102 * 103 * Lines are hashed so as to work in core. All potential 104 * matches are located by sorting the lines of each file 105 * on the hash (called ``value''). In particular, this 106 * collects the equivalence classes in file1 together. 107 * Subroutine equiv replaces the value of each line in 108 * file0 by the index of the first element of its 109 * matching equivalence in (the reordered) file1. 110 * To save space equiv squeezes file1 into a single 111 * array member in which the equivalence classes 112 * are simply concatenated, except that their first 113 * members are flagged by changing sign. 114 * 115 * Next the indices that point into member are unsorted into 116 * array class according to the original order of file0. 117 * 118 * The cleverness lies in routine stone. This marches 119 * through the lines of file0, developing a vector klist 120 * of "k-candidates". At step i a k-candidate is a matched 121 * pair of lines x,y (x in file0 y in file1) such that 122 * there is a common subsequence of length k 123 * between the first i lines of file0 and the first y 124 * lines of file1, but there is no such subsequence for 125 * any smaller y. x is the earliest possible mate to y 126 * that occurs in such a subsequence. 127 * 128 * Whenever any of the members of the equivalence class of 129 * lines in file1 matable to a line in file0 has serial number 130 * less than the y of some k-candidate, that k-candidate 131 * with the smallest such y is replaced. The new 132 * k-candidate is chained (via pred) to the current 133 * k-1 candidate so that the actual subsequence can 134 * be recovered. When a member has serial number greater 135 * that the y of all k-candidates, the klist is extended. 136 * At the end, the longest subsequence is pulled out 137 * and placed in the array J by unravel 138 * 139 * With J in hand, the matches there recorded are 140 * check'ed against reality to assure that no spurious 141 * matches have crept in due to hashing. If they have, 142 * they are broken, and "jackpot" is recorded--a harmless 143 * matter except that a true match for a spuriously 144 * mated line may now be unnecessarily reported as a change. 145 * 146 * Much of the complexity of the program comes simply 147 * from trying to minimize core utilization and 148 * maximize the range of doable problems by dynamically 149 * allocating what is needed and reusing what is not. 150 * The core requirements for problems larger than somewhat 151 * are (in words) 2*length(file0) + length(file1) + 152 * 3*(number of k-candidates installed), typically about 153 * 6n words for files of length n. 154 */ 155 156 struct cand { 157 int x; 158 int y; 159 int pred; 160 } cand; 161 162 struct line { 163 int serial; 164 int value; 165 } *file[2]; 166 167 /* 168 * The following struct is used to record change information when 169 * doing a "context" or "unified" diff. (see routine "change" to 170 * understand the highly mnemonic field names) 171 */ 172 struct context_vec { 173 int a; /* start line in old file */ 174 int b; /* end line in old file */ 175 int c; /* start line in new file */ 176 int d; /* end line in new file */ 177 }; 178 179 static int *J; /* will be overlaid on class */ 180 static int *class; /* will be overlaid on file[0] */ 181 static int *klist; /* will be overlaid on file[0] after class */ 182 static int *member; /* will be overlaid on file[1] */ 183 static int clen; 184 static int inifdef; /* whether or not we are in a #ifdef block */ 185 static int len[2]; 186 static int pref, suff; /* length of prefix and suffix */ 187 static int slen[2]; 188 static int anychange; 189 static long *ixnew; /* will be overlaid on file[1] */ 190 static long *ixold; /* will be overlaid on klist */ 191 static struct cand *clist; /* merely a free storage pot for candidates */ 192 static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ 193 static u_char *chrtran; /* translation table for case-folding */ 194 static struct context_vec *context_vec_start; 195 static struct context_vec *context_vec_end; 196 static struct context_vec *context_vec_ptr; 197 198 static FILE *opentemp(const char *); 199 static void fetch(long *, int, int, FILE *, char *, int); 200 static void output(char *, FILE *, char *, FILE *); 201 static void check(char *, FILE *, char *, FILE *); 202 static void range(int, int, char *); 203 static void uni_range(int, int); 204 static void dump_context_vec(FILE *, FILE *); 205 static void dump_unified_vec(FILE *, FILE *); 206 static void prepare(int, FILE *); 207 static void prune(void); 208 static void equiv(struct line *, int, struct line *, int, int *); 209 static void unravel(int); 210 static void unsort(struct line *, int, int *); 211 static void change(char *, FILE *, char *, FILE *, int, int, int, int); 212 static void sort(struct line *, int); 213 static int asciifile(FILE *); 214 static int newcand(int, int, int); 215 static int search(int *, int, int); 216 static int skipline(FILE *); 217 static int stone(int *, int, int *, int *); 218 static int readhash(FILE *); 219 static int files_differ(FILE *, FILE *, int); 220 221 /* 222 * chrtran points to one of 2 translation tables: cup2low if folding upper to 223 * lower case clow2low if not folding case 224 */ 225 u_char clow2low[256] = { 226 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 227 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 228 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 229 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 230 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 231 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 232 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 233 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 234 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 235 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 236 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 237 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 238 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 239 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 240 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 241 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 242 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 243 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 244 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 245 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 246 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 247 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 248 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 249 0xfd, 0xfe, 0xff 250 }; 251 252 u_char cup2low[256] = { 253 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 254 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 255 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 256 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 257 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 258 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 259 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 260 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 261 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 262 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 263 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 264 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 265 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 266 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 267 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 268 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 269 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 270 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 271 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 272 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 273 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 274 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 275 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 276 0xfd, 0xfe, 0xff 277 }; 278 279 int 280 diffreg(char *ofile1, char *ofile2, int flags) 281 { 282 char *file1 = ofile1; 283 char *file2 = ofile2; 284 FILE *f1 = NULL; 285 FILE *f2 = NULL; 286 int rval = D_SAME; 287 int i, ostdout = -1; 288 pid_t pid = -1; 289 290 anychange = 0; 291 context_vec_ptr = context_vec_start - 1; 292 chrtran = (iflag ? cup2low : clow2low); 293 if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) 294 return (D_MISMATCH); 295 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) 296 goto notsame; 297 298 if (flags & D_EMPTY1) 299 f1 = fopen(_PATH_DEVNULL, "r"); 300 else { 301 if (!S_ISREG(stb1.st_mode)) { 302 if ((f1 = opentemp(file1)) == NULL || 303 fstat(fileno(f1), &stb1) < 0) { 304 warn("%s", file1); 305 status |= 2; 306 goto closem; 307 } 308 } else if (strcmp(file1, "-") == 0) 309 f1 = stdin; 310 else 311 f1 = fopen(file1, "r"); 312 } 313 if (f1 == NULL) { 314 warn("%s", file1); 315 status |= 2; 316 goto closem; 317 } 318 319 if (flags & D_EMPTY2) 320 f2 = fopen(_PATH_DEVNULL, "r"); 321 else { 322 if (!S_ISREG(stb2.st_mode)) { 323 if ((f2 = opentemp(file2)) == NULL || 324 fstat(fileno(f2), &stb2) < 0) { 325 warn("%s", file2); 326 status |= 2; 327 goto closem; 328 } 329 } else if (strcmp(file2, "-") == 0) 330 f2 = stdin; 331 else 332 f2 = fopen(file2, "r"); 333 } 334 if (f2 == NULL) { 335 warn("%s", file2); 336 status |= 2; 337 goto closem; 338 } 339 340 switch (files_differ(f1, f2, flags)) { 341 case 0: 342 goto closem; 343 case 1: 344 break; 345 default: 346 /* error */ 347 status |= 2; 348 goto closem; 349 } 350 351 notsame: 352 /* 353 * Files certainly differ at this point; set status accordingly 354 */ 355 status |= 1; 356 rval = D_DIFFER; 357 if (!asciifile(f1) || !asciifile(f2)) { 358 rval = D_BINARY; 359 goto closem; 360 } 361 if (format == D_BRIEF) 362 goto closem; 363 if (lflag) { 364 /* redirect stdout to pr */ 365 int pfd[2]; 366 char *header; 367 char *prargv[] = { "pr", "-h", NULL, "-f", NULL }; 368 369 easprintf(&header, "%s %s %s", diffargs, file1, file2); 370 prargv[2] = header; 371 fflush(stdout); 372 rewind(stdout); 373 pipe(pfd); 374 switch ((pid = fork())) { 375 case -1: 376 warnx("No more processes"); 377 status |= 2; 378 free(header); 379 return (D_ERROR); 380 case 0: 381 /* child */ 382 if (pfd[0] != STDIN_FILENO) { 383 dup2(pfd[0], STDIN_FILENO); 384 close(pfd[0]); 385 } 386 close(pfd[1]); 387 execv(_PATH_PR, prargv); 388 _exit(127); 389 default: 390 /* parent */ 391 if (pfd[1] != STDOUT_FILENO) { 392 ostdout = dup(STDOUT_FILENO); 393 dup2(pfd[1], STDOUT_FILENO); 394 close(pfd[1]); 395 } 396 close(pfd[0]); 397 rewind(stdout); 398 free(header); 399 } 400 } else { 401 if (flags & D_HEADER) { 402 if (format == D_EDIT) 403 printf("ed - %s << '-*-END-*-'\n", 404 basename(file1)); 405 else 406 printf("%s %s %s\n", diffargs, file1, file2); 407 } 408 } 409 prepare(0, f1); 410 prepare(1, f2); 411 prune(); 412 sort(sfile[0], slen[0]); 413 sort(sfile[1], slen[1]); 414 415 member = (int *)file[1]; 416 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 417 member = erealloc(member, (slen[1] + 2) * sizeof(int)); 418 419 class = (int *)file[0]; 420 unsort(sfile[0], slen[0], class); 421 class = erealloc(class, (slen[0] + 2) * sizeof(int)); 422 423 klist = emalloc((slen[0] + 2) * sizeof(int)); 424 clist = emalloc(sizeof(cand)); 425 i = stone(class, slen[0], member, klist); 426 free(member); 427 free(class); 428 429 J = erealloc(J, (len[0] + 2) * sizeof(int)); 430 unravel(klist[i]); 431 free(clist); 432 free(klist); 433 434 ixold = erealloc(ixold, (len[0] + 2) * sizeof(long)); 435 ixnew = erealloc(ixnew, (len[1] + 2) * sizeof(long)); 436 check(file1, f1, file2, f2); 437 output(file1, f1, file2, f2); 438 if (ostdout != -1) { 439 int wstatus; 440 441 /* close the pipe to pr and restore stdout */ 442 fflush(stdout); 443 rewind(stdout); 444 if (ostdout != STDOUT_FILENO) { 445 close(STDOUT_FILENO); 446 dup2(ostdout, STDOUT_FILENO); 447 close(ostdout); 448 } 449 waitpid(pid, &wstatus, 0); 450 } else if ((flags & D_HEADER) && format == D_EDIT) 451 printf("w\nq\n-*-END-*-\n"); 452 closem: 453 if (f1 != NULL) 454 fclose(f1); 455 if (f2 != NULL) 456 fclose(f2); 457 if (file1 != ofile1) 458 free(file1); 459 if (file2 != ofile2) 460 free(file2); 461 return (rval); 462 } 463 464 /* 465 * Check to see if the given files differ. 466 * Returns 0 if they are the same, 1 if different, and -1 on error. 467 * XXX - could use code from cmp(1) [faster] 468 */ 469 static int 470 files_differ(FILE *f1, FILE *f2, int flags) 471 { 472 char buf1[BUFSIZ], buf2[BUFSIZ]; 473 size_t i, j; 474 475 if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size || 476 (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT)) 477 return (1); 478 for (;;) { 479 i = fread(buf1, 1, sizeof(buf1), f1); 480 j = fread(buf2, 1, sizeof(buf2), f2); 481 if (i != j) 482 return (1); 483 if (i == 0 && j == 0) { 484 if (ferror(f1) || ferror(f2)) 485 return (1); 486 return (0); 487 } 488 if (memcmp(buf1, buf2, i) != 0) 489 return (1); 490 } 491 } 492 493 static FILE * 494 opentemp(const char *file) 495 { 496 char buf[BUFSIZ], *tempdir, tempfile[MAXPATHLEN]; 497 ssize_t nread; 498 int ifd, ofd; 499 500 if (strcmp(file, "-") == 0) 501 ifd = STDIN_FILENO; 502 else if ((ifd = open(file, O_RDONLY, 0644)) < 0) 503 return (NULL); 504 505 if ((tempdir = getenv("TMPDIR")) == NULL) 506 tempdir = _PATH_TMP; 507 if (snprintf(tempfile, sizeof(tempfile), "%s/diff.XXXXXXXX", 508 tempdir) >= sizeof(tempfile)) { 509 close(ifd); 510 errno = ENAMETOOLONG; 511 return (NULL); 512 } 513 514 if ((ofd = mkstemp(tempfile)) < 0) 515 return (NULL); 516 unlink(tempfile); 517 while ((nread = read(ifd, buf, BUFSIZ)) > 0) { 518 if (write(ofd, buf, nread) != nread) { 519 close(ifd); 520 close(ofd); 521 return (NULL); 522 } 523 } 524 close(ifd); 525 return (fdopen(ofd, "r")); 526 } 527 528 char * 529 splice(char *dir, char *file) 530 { 531 char *tail, *buf; 532 533 if ((tail = strrchr(file, '/')) == NULL) 534 tail = file; 535 else 536 tail++; 537 easprintf(&buf, "%s/%s", dir, tail); 538 return (buf); 539 } 540 541 static void 542 prepare(int i, FILE *fd) 543 { 544 struct line *p; 545 int j, h; 546 547 rewind(fd); 548 p = emalloc(3 * sizeof(struct line)); 549 for (j = 0; (h = readhash(fd));) { 550 p = erealloc(p, (++j + 3) * sizeof(struct line)); 551 p[j].value = h; 552 } 553 len[i] = j; 554 file[i] = p; 555 } 556 557 static void 558 prune(void) 559 { 560 int i, j; 561 562 for (pref = 0; pref < len[0] && pref < len[1] && 563 file[0][pref + 1].value == file[1][pref + 1].value; 564 pref++) 565 ; 566 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 567 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 568 suff++) 569 ; 570 for (j = 0; j < 2; j++) { 571 sfile[j] = file[j] + pref; 572 slen[j] = len[j] - pref - suff; 573 for (i = 0; i <= slen[j]; i++) 574 sfile[j][i].serial = i; 575 } 576 } 577 578 static void 579 equiv(struct line *a, int n, struct line *b, int m, int *c) 580 { 581 int i, j; 582 583 i = j = 1; 584 while (i <= n && j <= m) { 585 if (a[i].value < b[j].value) 586 a[i++].value = 0; 587 else if (a[i].value == b[j].value) 588 a[i++].value = j; 589 else 590 j++; 591 } 592 while (i <= n) 593 a[i++].value = 0; 594 b[m + 1].value = 0; 595 j = 0; 596 while (++j <= m) { 597 c[j] = -b[j].serial; 598 while (b[j + 1].value == b[j].value) { 599 j++; 600 c[j] = b[j].serial; 601 } 602 } 603 c[j] = -1; 604 } 605 606 static int 607 stone(int *a, int n, int *b, int *c) 608 { 609 int i, k, y, j, l; 610 int oldc, tc, oldl; 611 612 k = 0; 613 c[0] = newcand(0, 0, 0); 614 for (i = 1; i <= n; i++) { 615 j = a[i]; 616 if (j == 0) 617 continue; 618 y = -b[j]; 619 oldl = 0; 620 oldc = c[0]; 621 do { 622 if (y <= clist[oldc].y) 623 continue; 624 l = search(c, k, y); 625 if (l != oldl + 1) 626 oldc = c[l - 1]; 627 if (l <= k) { 628 if (clist[c[l]].y <= y) 629 continue; 630 tc = c[l]; 631 c[l] = newcand(i, y, oldc); 632 oldc = tc; 633 oldl = l; 634 } else { 635 c[l] = newcand(i, y, oldc); 636 k++; 637 break; 638 } 639 } while ((y = b[++j]) > 0); 640 } 641 return (k); 642 } 643 644 static int 645 newcand(int x, int y, int pred) 646 { 647 struct cand *q; 648 649 clist = erealloc(clist, ++clen * sizeof(cand)); 650 q = clist + clen - 1; 651 q->x = x; 652 q->y = y; 653 q->pred = pred; 654 return (clen - 1); 655 } 656 657 static int 658 search(int *c, int k, int y) 659 { 660 int i, j, l, t; 661 662 if (clist[c[k]].y < y) /* quick look for typical case */ 663 return (k + 1); 664 i = 0; 665 j = k + 1; 666 while (1) { 667 l = i + j; 668 if ((l >>= 1) <= i) 669 break; 670 t = clist[c[l]].y; 671 if (t > y) 672 j = l; 673 else if (t < y) 674 i = l; 675 else 676 return (l); 677 } 678 return (l + 1); 679 } 680 681 static void 682 unravel(int p) 683 { 684 struct cand *q; 685 int i; 686 687 for (i = 0; i <= len[0]; i++) 688 J[i] = i <= pref ? i : 689 i > len[0] - suff ? i + len[1] - len[0] : 0; 690 for (q = clist + p; q->y != 0; q = clist + q->pred) 691 J[q->x + pref] = q->y + pref; 692 } 693 694 /* 695 * Check does double duty: 696 * 1. ferret out any fortuitous correspondences due 697 * to confounding by hashing (which result in "jackpot") 698 * 2. collect random access indexes to the two files 699 */ 700 static void 701 check(char *file1, FILE *f1, char *file2, FILE *f2) 702 { 703 int i, j, jackpot, c, d; 704 long ctold, ctnew; 705 706 rewind(f1); 707 rewind(f2); 708 j = 1; 709 ixold[0] = ixnew[0] = 0; 710 jackpot = 0; 711 ctold = ctnew = 0; 712 for (i = 1; i <= len[0]; i++) { 713 if (J[i] == 0) { 714 ixold[i] = ctold += skipline(f1); 715 continue; 716 } 717 while (j < J[i]) { 718 ixnew[j] = ctnew += skipline(f2); 719 j++; 720 } 721 if (bflag || wflag || iflag) { 722 for (;;) { 723 c = getc(f1); 724 d = getc(f2); 725 /* 726 * GNU diff ignores a missing newline 727 * in one file if bflag || wflag. 728 */ 729 if ((bflag || wflag) && 730 ((c == EOF && d == '\n') || 731 (c == '\n' && d == EOF))) { 732 break; 733 } 734 ctold++; 735 ctnew++; 736 if (bflag && isspace(c) && isspace(d)) { 737 do { 738 if (c == '\n') 739 break; 740 ctold++; 741 } while (isspace(c = getc(f1))); 742 do { 743 if (d == '\n') 744 break; 745 ctnew++; 746 } while (isspace(d = getc(f2))); 747 } else if (wflag) { 748 while (isspace(c) && c != '\n') { 749 c = getc(f1); 750 ctold++; 751 } 752 while (isspace(d) && d != '\n') { 753 d = getc(f2); 754 ctnew++; 755 } 756 } 757 if (chrtran[c] != chrtran[d]) { 758 jackpot++; 759 J[i] = 0; 760 if (c != '\n' && c != EOF) 761 ctold += skipline(f1); 762 if (d != '\n' && c != EOF) 763 ctnew += skipline(f2); 764 break; 765 } 766 if (c == '\n' || c == EOF) 767 break; 768 } 769 } else { 770 for (;;) { 771 ctold++; 772 ctnew++; 773 if ((c = getc(f1)) != (d = getc(f2))) { 774 /* jackpot++; */ 775 J[i] = 0; 776 if (c != '\n' && c != EOF) 777 ctold += skipline(f1); 778 if (d != '\n' && c != EOF) 779 ctnew += skipline(f2); 780 break; 781 } 782 if (c == '\n' || c == EOF) 783 break; 784 } 785 } 786 ixold[i] = ctold; 787 ixnew[j] = ctnew; 788 j++; 789 } 790 for (; j <= len[1]; j++) 791 ixnew[j] = ctnew += skipline(f2); 792 /* 793 * if (jackpot) 794 * fprintf(stderr, "jackpot\n"); 795 */ 796 } 797 798 /* shellsort CACM #201 */ 799 static void 800 sort(struct line *a, int n) 801 { 802 struct line *ai, *aim, w; 803 int j, m = 0, k; 804 805 if (n == 0) 806 return; 807 for (j = 1; j <= n; j *= 2) 808 m = 2 * j - 1; 809 for (m /= 2; m != 0; m /= 2) { 810 k = n - m; 811 for (j = 1; j <= k; j++) { 812 for (ai = &a[j]; ai > a; ai -= m) { 813 aim = &ai[m]; 814 if (aim < ai) 815 break; /* wraparound */ 816 if (aim->value > ai[0].value || 817 (aim->value == ai[0].value && 818 aim->serial > ai[0].serial)) 819 break; 820 w.value = ai[0].value; 821 ai[0].value = aim->value; 822 aim->value = w.value; 823 w.serial = ai[0].serial; 824 ai[0].serial = aim->serial; 825 aim->serial = w.serial; 826 } 827 } 828 } 829 } 830 831 static void 832 unsort(struct line *f, int l, int *b) 833 { 834 int *a, i; 835 836 a = emalloc((l + 1) * sizeof(int)); 837 for (i = 1; i <= l; i++) 838 a[f[i].serial] = f[i].value; 839 for (i = 1; i <= l; i++) 840 b[i] = a[i]; 841 free(a); 842 } 843 844 static int 845 skipline(FILE *f) 846 { 847 int i, c; 848 849 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++) 850 continue; 851 return (i); 852 } 853 854 static void 855 output(char *file1, FILE *f1, char *file2, FILE *f2) 856 { 857 int m, i0, i1, j0, j1; 858 859 rewind(f1); 860 rewind(f2); 861 m = len[0]; 862 J[0] = 0; 863 J[m + 1] = len[1] + 1; 864 if (format != D_EDIT) { 865 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 866 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 867 i0++; 868 j0 = J[i0 - 1] + 1; 869 i1 = i0 - 1; 870 while (i1 < m && J[i1 + 1] == 0) 871 i1++; 872 j1 = J[i1 + 1] - 1; 873 J[i1] = j1; 874 change(file1, f1, file2, f2, i0, i1, j0, j1); 875 } 876 } else { 877 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 878 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 879 i0--; 880 j0 = J[i0 + 1] - 1; 881 i1 = i0 + 1; 882 while (i1 > 1 && J[i1 - 1] == 0) 883 i1--; 884 j1 = J[i1 - 1] + 1; 885 J[i1] = j1; 886 change(file1, f1, file2, f2, i1, i0, j1, j0); 887 } 888 } 889 if (m == 0) 890 change(file1, f1, file2, f2, 1, 0, 1, len[1]); 891 if (format == D_IFDEF) { 892 for (;;) { 893 #define c i0 894 if ((c = getc(f1)) == EOF) 895 return; 896 putchar(c); 897 } 898 #undef c 899 } 900 if (anychange != 0) { 901 if (format == D_CONTEXT) 902 dump_context_vec(f1, f2); 903 else if (format == D_UNIFIED) 904 dump_unified_vec(f1, f2); 905 } 906 } 907 908 static __inline void 909 range(int a, int b, char *separator) 910 { 911 printf("%d", a > b ? b : a); 912 if (a < b) 913 printf("%s%d", separator, b); 914 } 915 916 static __inline void 917 uni_range(int a, int b) 918 { 919 if (a < b) 920 printf("%d,%d", a, b - a + 1); 921 else if (a == b) 922 printf("%d", b); 923 else 924 printf("%d,0", b); 925 } 926 927 /* 928 * Indicate that there is a difference between lines a and b of the from file 929 * to get to lines c to d of the to file. If a is greater then b then there 930 * are no lines in the from file involved and this means that there were 931 * lines appended (beginning at b). If c is greater than d then there are 932 * lines missing from the to file. 933 */ 934 static void 935 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d) 936 { 937 static size_t max_context = 64; 938 939 if (format != D_IFDEF && a > b && c > d) 940 return; 941 if (format == D_CONTEXT || format == D_UNIFIED) { 942 /* 943 * Allocate change records as needed. 944 */ 945 if (context_vec_ptr == context_vec_end - 1) { 946 ptrdiff_t offset = context_vec_ptr - context_vec_start; 947 max_context <<= 1; 948 context_vec_start = erealloc(context_vec_start, 949 max_context * sizeof(struct context_vec)); 950 context_vec_end = context_vec_start + max_context; 951 context_vec_ptr = context_vec_start + offset; 952 } 953 if (anychange == 0) { 954 /* 955 * Print the context/unidiff header first time through. 956 */ 957 printf("%s %s %s", format == D_CONTEXT ? "***" : "---", 958 file1, ctime(&stb1.st_mtime)); 959 printf("%s %s %s", format == D_CONTEXT ? "---" : "+++", 960 file2, ctime(&stb2.st_mtime)); 961 anychange = 1; 962 } else if (a > context_vec_ptr->b + (2 * context) && 963 c > context_vec_ptr->d + (2 * context)) { 964 /* 965 * If this change is more than 'context' lines from the 966 * previous change, dump the record and reset it. 967 */ 968 if (format == D_CONTEXT) 969 dump_context_vec(f1, f2); 970 else 971 dump_unified_vec(f1, f2); 972 } 973 context_vec_ptr++; 974 context_vec_ptr->a = a; 975 context_vec_ptr->b = b; 976 context_vec_ptr->c = c; 977 context_vec_ptr->d = d; 978 return; 979 } 980 if (anychange == 0) 981 anychange = 1; 982 switch (format) { 983 984 case D_NORMAL: 985 case D_EDIT: 986 range(a, b, ","); 987 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 988 if (format == D_NORMAL) 989 range(c, d, ","); 990 putchar('\n'); 991 break; 992 case D_REVERSE: 993 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 994 range(a, b, " "); 995 putchar('\n'); 996 break; 997 case D_NREVERSE: 998 if (a > b) 999 printf("a%d %d\n", b, d - c + 1); 1000 else { 1001 printf("d%d %d\n", a, b - a + 1); 1002 if (!(c > d)) 1003 /* add changed lines */ 1004 printf("a%d %d\n", b, d - c + 1); 1005 } 1006 break; 1007 } 1008 if (format == D_NORMAL || format == D_IFDEF) { 1009 fetch(ixold, a, b, f1, "< ", 1); 1010 if (a <= b && c <= d && format == D_NORMAL) 1011 puts("---"); 1012 } 1013 fetch(ixnew, c, d, f2, format == D_NORMAL ? "> " : "", 0); 1014 if ((format == D_EDIT || format == D_REVERSE) && c <= d) 1015 puts("."); 1016 if (inifdef) { 1017 printf("#endif /* %s */\n", ifdefname); 1018 inifdef = 0; 1019 } 1020 } 1021 1022 static void 1023 fetch(long *f, int a, int b, FILE *lb, char *s, int oldfile) 1024 { 1025 int i, j, c, col, nc; 1026 1027 /* 1028 * When doing #ifdef's, copy down to current line 1029 * if this is the first file, so that stuff makes it to output. 1030 */ 1031 if (format == D_IFDEF && oldfile) { 1032 long curpos = ftell(lb); 1033 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 1034 nc = f[a > b ? b : a - 1] - curpos; 1035 for (i = 0; i < nc; i++) 1036 putchar(getc(lb)); 1037 } 1038 if (a > b) 1039 return; 1040 if (format == D_IFDEF) { 1041 if (inifdef) { 1042 printf("#else /* %s%s */\n", 1043 oldfile == 1 ? "!" : "", ifdefname); 1044 } else { 1045 if (oldfile) 1046 printf("#ifndef %s\n", ifdefname); 1047 else 1048 printf("#ifdef %s\n", ifdefname); 1049 } 1050 inifdef = 1 + oldfile; 1051 } 1052 for (i = a; i <= b; i++) { 1053 fseek(lb, f[i - 1], SEEK_SET); 1054 nc = f[i] - f[i - 1]; 1055 if (format != D_IFDEF) 1056 fputs(s, stdout); 1057 col = 0; 1058 for (j = 0; j < nc; j++) { 1059 if ((c = getc(lb)) == EOF) { 1060 puts("\n\\ No newline at end of file"); 1061 return; 1062 } 1063 if (c == '\t' && tflag) { 1064 do { 1065 putchar(' '); 1066 } while (++col & 7); 1067 } else { 1068 putchar(c); 1069 col++; 1070 } 1071 } 1072 } 1073 } 1074 1075 #define HASHMASK (16 - 1) /* for masking out 16 bytes */ 1076 1077 /* 1078 * hashing has the effect of 1079 * arranging line in 7-bit bytes and then 1080 * summing 1-s complement in 16-bit hunks 1081 */ 1082 static int 1083 readhash(FILE *f) 1084 { 1085 unsigned int shift; 1086 int t, space; 1087 long sum; 1088 1089 sum = 1; 1090 space = 0; 1091 if (!bflag && !wflag) { 1092 if (iflag) 1093 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 1094 if (t == EOF) { 1095 if (shift == 0) 1096 return (0); 1097 break; 1098 } 1099 sum += (long)chrtran[t] << (shift &= HASHMASK); 1100 } 1101 else 1102 for (shift = 0; (t = getc(f)) != '\n'; shift += 7) { 1103 if (t == EOF) { 1104 if (shift == 0) 1105 return (0); 1106 break; 1107 } 1108 sum += (long)t << (shift &= HASHMASK); 1109 } 1110 } else { 1111 for (shift = 0;;) { 1112 switch (t = getc(f)) { 1113 case '\t': 1114 case ' ': 1115 space++; 1116 continue; 1117 default: 1118 if (space && !wflag) { 1119 shift += 7; 1120 space = 0; 1121 } 1122 sum += (long)chrtran[t] << (shift &= HASHMASK); 1123 shift += 7; 1124 continue; 1125 case EOF: 1126 if (shift == 0) 1127 return (0); 1128 /* FALLTHROUGH */ 1129 case '\n': 1130 break; 1131 } 1132 break; 1133 } 1134 } 1135 return (sum); 1136 } 1137 1138 int 1139 asciifile(FILE *f) 1140 { 1141 char buf[BUFSIZ], *cp; 1142 int cnt; 1143 1144 if (aflag || f == NULL) 1145 return (1); 1146 1147 rewind(f); 1148 cnt = fread(buf, 1, sizeof(buf), f); 1149 cp = buf; 1150 while (--cnt >= 0) 1151 if (*cp++ & 0200) 1152 return (0); 1153 return (1); 1154 } 1155 1156 static __inline int min(int a, int b) 1157 { 1158 return (a < b ? a : b); 1159 } 1160 1161 static __inline int max(int a, int b) 1162 { 1163 return (a > b ? a : b); 1164 } 1165 1166 /* dump accumulated "context" diff changes */ 1167 static void 1168 dump_context_vec(FILE *f1, FILE *f2) 1169 { 1170 struct context_vec *cvp = context_vec_start; 1171 int lowa, upb, lowc, upd, do_output; 1172 int a, b, c, d; 1173 char ch; 1174 1175 if (context_vec_start > context_vec_ptr) 1176 return; 1177 1178 b = d = 0; /* gcc */ 1179 lowa = max(1, cvp->a - context); 1180 upb = min(len[0], context_vec_ptr->b + context); 1181 lowc = max(1, cvp->c - context); 1182 upd = min(len[1], context_vec_ptr->d + context); 1183 1184 printf("***************\n*** "); 1185 range(lowa, upb, ","); 1186 printf(" ****\n"); 1187 1188 /* 1189 * Output changes to the "old" file. The first loop suppresses 1190 * output if there were no changes to the "old" file (we'll see 1191 * the "old" lines as context in the "new" list). 1192 */ 1193 do_output = 0; 1194 for (; cvp <= context_vec_ptr; cvp++) 1195 if (cvp->a <= cvp->b) { 1196 cvp = context_vec_start; 1197 do_output++; 1198 break; 1199 } 1200 if (do_output) { 1201 while (cvp <= context_vec_ptr) { 1202 a = cvp->a; 1203 b = cvp->b; 1204 c = cvp->c; 1205 d = cvp->d; 1206 1207 if (a <= b && c <= d) 1208 ch = 'c'; 1209 else 1210 ch = (a <= b) ? 'd' : 'a'; 1211 1212 if (ch == 'a') 1213 fetch(ixold, lowa, b, f1, " ", 0); 1214 else { 1215 fetch(ixold, lowa, a - 1, f1, " ", 0); 1216 fetch(ixold, a, b, f1, 1217 ch == 'c' ? "! " : "- ", 0); 1218 } 1219 lowa = b + 1; 1220 cvp++; 1221 } 1222 fetch(ixold, b + 1, upb, f1, " ", 0); 1223 } 1224 /* output changes to the "new" file */ 1225 printf("--- "); 1226 range(lowc, upd, ","); 1227 printf(" ----\n"); 1228 1229 do_output = 0; 1230 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1231 if (cvp->c <= cvp->d) { 1232 cvp = context_vec_start; 1233 do_output++; 1234 break; 1235 } 1236 if (do_output) { 1237 while (cvp <= context_vec_ptr) { 1238 a = cvp->a; 1239 b = cvp->b; 1240 c = cvp->c; 1241 d = cvp->d; 1242 1243 if (a <= b && c <= d) 1244 ch = 'c'; 1245 else 1246 ch = (a <= b) ? 'd' : 'a'; 1247 1248 if (ch == 'd') 1249 fetch(ixnew, lowc, d, f2, " ", 0); 1250 else { 1251 fetch(ixnew, lowc, c - 1, f2, " ", 0); 1252 fetch(ixnew, c, d, f2, 1253 ch == 'c' ? "! " : "+ ", 0); 1254 } 1255 lowc = d + 1; 1256 cvp++; 1257 } 1258 fetch(ixnew, d + 1, upd, f2, " ", 0); 1259 } 1260 context_vec_ptr = context_vec_start - 1; 1261 } 1262 1263 /* dump accumulated "unified" diff changes */ 1264 static void 1265 dump_unified_vec(FILE *f1, FILE *f2) 1266 { 1267 struct context_vec *cvp = context_vec_start; 1268 int lowa, upb, lowc, upd; 1269 int a, b, c, d; 1270 char ch; 1271 1272 if (context_vec_start > context_vec_ptr) 1273 return; 1274 1275 b = d = 0; /* gcc */ 1276 lowa = max(1, cvp->a - context); 1277 upb = min(len[0], context_vec_ptr->b + context); 1278 lowc = max(1, cvp->c - context); 1279 upd = min(len[1], context_vec_ptr->d + context); 1280 1281 fputs("@@ -", stdout); 1282 uni_range(lowa, upb); 1283 fputs(" +", stdout); 1284 uni_range(lowc, upd); 1285 fputs(" @@\n", stdout); 1286 1287 /* 1288 * Output changes in "unified" diff format--the old and new lines 1289 * are printed together. 1290 */ 1291 for (; cvp <= context_vec_ptr; cvp++) { 1292 a = cvp->a; 1293 b = cvp->b; 1294 c = cvp->c; 1295 d = cvp->d; 1296 1297 /* 1298 * c: both new and old changes 1299 * d: only changes in the old file 1300 * a: only changes in the new file 1301 */ 1302 if (a <= b && c <= d) 1303 ch = 'c'; 1304 else 1305 ch = (a <= b) ? 'd' : 'a'; 1306 1307 switch (ch) { 1308 case 'c': 1309 fetch(ixold, lowa, a - 1, f1, " ", 0); 1310 fetch(ixold, a, b, f1, "-", 0); 1311 fetch(ixnew, c, d, f2, "+", 0); 1312 break; 1313 case 'd': 1314 fetch(ixold, lowa, a - 1, f1, " ", 0); 1315 fetch(ixold, a, b, f1, "-", 0); 1316 break; 1317 case 'a': 1318 fetch(ixnew, lowc, c - 1, f2, " ", 0); 1319 fetch(ixnew, c, d, f2, "+", 0); 1320 break; 1321 } 1322 lowa = b + 1; 1323 lowc = d + 1; 1324 } 1325 fetch(ixnew, d + 1, upd, f2, " ", 0); 1326 1327 context_vec_ptr = context_vec_start - 1; 1328 } 1329