1 /* $OpenBSD: diff.c,v 1.33 2005/04/25 19:09:15 jfb Exp $ */ 2 /* 3 * Copyright (C) Caldera International Inc. 2001-2002. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code and documentation must retain the above 10 * copyright notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. All advertising materials mentioning features or use of this software 15 * must display the following acknowledgement: 16 * This product includes software developed or owned by Caldera 17 * International, Inc. 18 * 4. Neither the name of Caldera International, Inc. nor the names of other 19 * contributors may be used to endorse or promote products derived from 20 * this software without specific prior written permission. 21 * 22 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 23 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 24 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 25 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 26 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 27 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 28 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 29 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 31 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 32 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * POSSIBILITY OF SUCH DAMAGE. 34 */ 35 /*- 36 * Copyright (c) 1991, 1993 37 * The Regents of the University of California. All rights reserved. 38 * Copyright (c) 2004 Jean-Francois Brousseau. All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93 65 */ 66 /* 67 * Uses an algorithm due to Harold Stone, which finds 68 * a pair of longest identical subsequences in the two 69 * files. 70 * 71 * The major goal is to generate the match vector J. 72 * J[i] is the index of the line in file1 corresponding 73 * to line i file0. J[i] = 0 if there is no 74 * such line in file1. 75 * 76 * Lines are hashed so as to work in core. All potential 77 * matches are located by sorting the lines of each file 78 * on the hash (called ``value''). In particular, this 79 * collects the equivalence classes in file1 together. 80 * Subroutine equiv replaces the value of each line in 81 * file0 by the index of the first element of its 82 * matching equivalence in (the reordered) file1. 83 * To save space equiv squeezes file1 into a single 84 * array member in which the equivalence classes 85 * are simply concatenated, except that their first 86 * members are flagged by changing sign. 87 * 88 * Next the indices that point into member are unsorted into 89 * array class according to the original order of file0. 90 * 91 * The cleverness lies in routine stone. This marches 92 * through the lines of file0, developing a vector klist 93 * of "k-candidates". At step i a k-candidate is a matched 94 * pair of lines x,y (x in file0 y in file1) such that 95 * there is a common subsequence of length k 96 * between the first i lines of file0 and the first y 97 * lines of file1, but there is no such subsequence for 98 * any smaller y. x is the earliest possible mate to y 99 * that occurs in such a subsequence. 100 * 101 * Whenever any of the members of the equivalence class of 102 * lines in file1 matable to a line in file0 has serial number 103 * less than the y of some k-candidate, that k-candidate 104 * with the smallest such y is replaced. The new 105 * k-candidate is chained (via pred) to the current 106 * k-1 candidate so that the actual subsequence can 107 * be recovered. When a member has serial number greater 108 * that the y of all k-candidates, the klist is extended. 109 * At the end, the longest subsequence is pulled out 110 * and placed in the array J by unravel 111 * 112 * With J in hand, the matches there recorded are 113 * check'ed against reality to assure that no spurious 114 * matches have crept in due to hashing. If they have, 115 * they are broken, and "jackpot" is recorded--a harmless 116 * matter except that a true match for a spuriously 117 * mated line may now be unnecessarily reported as a change. 118 * 119 * Much of the complexity of the program comes simply 120 * from trying to minimize core utilization and 121 * maximize the range of doable problems by dynamically 122 * allocating what is needed and reusing what is not. 123 * The core requirements for problems larger than somewhat 124 * are (in words) 2*length(file0) + length(file1) + 125 * 3*(number of k-candidates installed), typically about 126 * 6n words for files of length n. 127 */ 128 129 #include <sys/param.h> 130 #include <sys/stat.h> 131 #include <sys/wait.h> 132 133 #include <err.h> 134 #include <errno.h> 135 #include <ctype.h> 136 #include <stdio.h> 137 #include <fcntl.h> 138 #include <paths.h> 139 #include <regex.h> 140 #include <dirent.h> 141 #include <stdlib.h> 142 #include <stddef.h> 143 #include <unistd.h> 144 #include <string.h> 145 146 #include "cvs.h" 147 #include "log.h" 148 #include "buf.h" 149 #include "proto.h" 150 151 152 #define CVS_DIFF_DEFCTX 3 /* default context length */ 153 154 155 /* 156 * Output format options 157 */ 158 #define D_NORMAL 0 /* Normal output */ 159 #define D_CONTEXT 1 /* Diff with context */ 160 #define D_UNIFIED 2 /* Unified context diff */ 161 #define D_IFDEF 3 /* Diff with merged #ifdef's */ 162 #define D_BRIEF 4 /* Say if the files differ */ 163 #define D_RCSDIFF 5 /* Reverse editor output: RCS format */ 164 165 /* 166 * Status values for print_status() and diffreg() return values 167 */ 168 #define D_SAME 0 /* Files are the same */ 169 #define D_DIFFER 1 /* Files are different */ 170 #define D_BINARY 2 /* Binary files are different */ 171 #define D_COMMON 3 /* Subdirectory common to both dirs */ 172 #define D_ONLY 4 /* Only exists in one directory */ 173 #define D_MISMATCH1 5 /* path1 was a dir, path2 a file */ 174 #define D_MISMATCH2 6 /* path1 was a file, path2 a dir */ 175 #define D_ERROR 7 /* An error occurred */ 176 #define D_SKIPPED1 8 /* path1 was a special file */ 177 #define D_SKIPPED2 9 /* path2 was a special file */ 178 179 struct cand { 180 int x; 181 int y; 182 int pred; 183 } cand; 184 185 struct line { 186 int serial; 187 int value; 188 } *file[2]; 189 190 /* 191 * The following struct is used to record change information when 192 * doing a "context" or "unified" diff. (see routine "change" to 193 * understand the highly mnemonic field names) 194 */ 195 struct context_vec { 196 int a; /* start line in old file */ 197 int b; /* end line in old file */ 198 int c; /* start line in new file */ 199 int d; /* end line in new file */ 200 }; 201 202 struct diff_arg { 203 char *rev1; 204 char *rev2; 205 char *date1; 206 char *date2; 207 }; 208 209 210 struct excludes { 211 char *pattern; 212 struct excludes *next; 213 }; 214 215 216 char *splice(char *, char *); 217 int cvs_diff_options(char *, int, char **, int *); 218 int cvs_diffreg(const char *, const char *); 219 int cvs_diff_file(struct cvs_file *, void *); 220 int cvs_diff_sendflags(struct cvsroot *); 221 int cvs_diff_cleanup(void); 222 223 static void output(const char *, FILE *, const char *, FILE *); 224 static void check(FILE *, FILE *); 225 static void range(int, int, char *); 226 static void uni_range(int, int); 227 static void dump_context_vec(FILE *, FILE *); 228 static void dump_unified_vec(FILE *, FILE *); 229 static int prepare(int, FILE *, off_t); 230 static void prune(void); 231 static void equiv(struct line *, int, struct line *, int, int *); 232 static void unravel(int); 233 static void unsort(struct line *, int, int *); 234 static void change(const char *, FILE *, const char *, FILE *, int, int, int, int); 235 static void sort(struct line *, int); 236 static int ignoreline(char *); 237 static int asciifile(FILE *); 238 static int fetch(long *, int, int, FILE *, int, int); 239 static int newcand(int, int, int); 240 static int search(int *, int, int); 241 static int skipline(FILE *); 242 static int isqrt(int); 243 static int stone(int *, int, int *, int *); 244 static int readhash(FILE *); 245 static int files_differ(FILE *, FILE *); 246 static char *match_function(const long *, int, FILE *); 247 static char *preadline(int, size_t, off_t); 248 249 250 static int aflag, bflag, dflag, iflag, Nflag, pflag, tflag, Tflag, wflag; 251 static int context, status; 252 static int format = D_NORMAL; 253 static struct stat stb1, stb2; 254 static char *ifdefname, *ignore_pats, diffargs[128]; 255 static const char *diff_file; 256 regex_t ignore_re; 257 258 static int *J; /* will be overlaid on class */ 259 static int *class; /* will be overlaid on file[0] */ 260 static int *klist; /* will be overlaid on file[0] after class */ 261 static int *member; /* will be overlaid on file[1] */ 262 static int clen; 263 static int inifdef; /* whether or not we are in a #ifdef block */ 264 static int len[2]; 265 static int pref, suff; /* length of prefix and suffix */ 266 static int slen[2]; 267 static int anychange; 268 static long *ixnew; /* will be overlaid on file[1] */ 269 static long *ixold; /* will be overlaid on klist */ 270 static struct cand *clist; /* merely a free storage pot for candidates */ 271 static int clistlen; /* the length of clist */ 272 static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ 273 static u_char *chrtran; /* translation table for case-folding */ 274 static struct context_vec *context_vec_start; 275 static struct context_vec *context_vec_end; 276 static struct context_vec *context_vec_ptr; 277 278 #define FUNCTION_CONTEXT_SIZE 41 279 static char lastbuf[FUNCTION_CONTEXT_SIZE]; 280 static int lastline; 281 static int lastmatchline; 282 283 284 /* 285 * chrtran points to one of 2 translation tables: cup2low if folding upper to 286 * lower case clow2low if not folding case 287 */ 288 u_char clow2low[256] = { 289 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 290 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 291 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 292 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 293 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 294 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 295 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 296 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 297 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 298 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 299 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 300 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 301 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 302 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 303 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 304 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 305 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 306 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 307 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 308 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 309 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 310 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 311 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 312 0xfd, 0xfe, 0xff 313 }; 314 315 u_char cup2low[256] = { 316 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 317 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 318 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 319 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 320 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 321 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 322 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 323 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 324 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 325 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 326 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 327 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 328 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 329 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 330 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 331 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 332 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 333 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 334 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 335 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 336 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 337 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 338 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 339 0xfd, 0xfe, 0xff 340 }; 341 342 struct cvs_cmd_info cvs_diff = { 343 cvs_diff_options, 344 cvs_diff_sendflags, 345 cvs_diff_file, 346 cvs_diff_cleanup, 347 NULL, 348 CF_RECURSE | CF_IGNORE | CF_SORT | CF_KNOWN, 349 CVS_REQ_DIFF, 350 CVS_CMD_SENDARGS2 | CVS_CMD_ALLOWSPEC | CVS_CMD_SENDDIR 351 }; 352 353 static struct diff_arg *dap = NULL; 354 static int recurse; 355 356 int 357 cvs_diff_options(char *opt, int argc, char **argv, int *arg) 358 { 359 int ch; 360 361 dap = (struct diff_arg *)malloc(sizeof(*dap)); 362 if (dap == NULL) 363 return (CVS_EX_DATA); 364 dap->date1 = dap->date2 = dap->rev1 = dap->rev2 = NULL; 365 strlcpy(diffargs, argv[0], sizeof(diffargs)); 366 367 while ((ch = getopt(argc, argv, opt)) != -1) { 368 switch (ch) { 369 case 'c': 370 strlcat(diffargs, " -c", sizeof(diffargs)); 371 format = D_CONTEXT; 372 break; 373 case 'D': 374 if (dap->date1 == NULL && dap->rev1 == NULL) { 375 dap->date1 = optarg; 376 } else if (dap->date2 == NULL && dap->rev2 == NULL) { 377 dap->date2 = optarg; 378 } else { 379 cvs_log(LP_ERR, 380 "no more than two revisions/dates can " 381 "be specified"); 382 } 383 break; 384 case 'l': 385 strlcat(diffargs, " -l", sizeof(diffargs)); 386 recurse = 0; 387 cvs_diff.file_flags &= ~CF_RECURSE; 388 break; 389 case 'i': 390 strlcat(diffargs, " -i", sizeof(diffargs)); 391 iflag = 1; 392 break; 393 case 'N': 394 strlcat(diffargs, " -N", sizeof(diffargs)); 395 Nflag = 1; 396 break; 397 case 'n': 398 strlcat(diffargs, " -n", sizeof(diffargs)); 399 format = D_RCSDIFF; 400 break; 401 case 'p': 402 strlcat(diffargs, " -p", sizeof(diffargs)); 403 pflag = 1; 404 break; 405 case 'r': 406 if ((dap->rev1 == NULL) && (dap->date1 == NULL)) { 407 dap->rev1 = optarg; 408 } else if ((dap->rev2 == NULL) && 409 (dap->date2 == NULL)) { 410 dap->rev2 = optarg; 411 } else { 412 cvs_log(LP_ERR, 413 "no more than two revisions/dates can " 414 "be specified"); 415 return (CVS_EX_USAGE); 416 } 417 break; 418 case 'R': 419 cvs_diff.file_flags |= CF_RECURSE; 420 break; 421 case 'u': 422 strlcat(diffargs, " -u", sizeof(diffargs)); 423 format = D_UNIFIED; 424 break; 425 default: 426 return (CVS_EX_USAGE); 427 } 428 } 429 430 *arg = optind; 431 return (0); 432 } 433 434 int 435 cvs_diff_cleanup(void) 436 { 437 if (dap != NULL) 438 free(dap); 439 return (0); 440 } 441 442 /* 443 * cvs_diff_sendflags() 444 * 445 */ 446 int 447 cvs_diff_sendflags(struct cvsroot *root) 448 { 449 /* send the flags */ 450 if (Nflag && (cvs_sendarg(root, "-N", 0) < 0)) 451 return (CVS_EX_PROTO); 452 if (pflag && (cvs_sendarg(root, "-p", 0) < 0)) 453 return (CVS_EX_PROTO); 454 455 if (format == D_CONTEXT) 456 cvs_sendarg(root, "-c", 0); 457 else if (format == D_UNIFIED) 458 cvs_sendarg(root, "-u", 0); 459 460 if (dap->rev1 != NULL) { 461 cvs_sendarg(root, "-r", 0); 462 cvs_sendarg(root, dap->rev1, 0); 463 } else if (dap->date1 != NULL) { 464 cvs_sendarg(root, "-D", 0); 465 cvs_sendarg(root, dap->date1, 0); 466 } 467 if (dap->rev2 != NULL) { 468 cvs_sendarg(root, "-r", 0); 469 cvs_sendarg(root, dap->rev2, 0); 470 } else if (dap->date2 != NULL) { 471 cvs_sendarg(root, "-D", 0); 472 cvs_sendarg(root, dap->date2, 0); 473 } 474 475 return (0); 476 } 477 478 479 /* 480 * cvs_diff_file() 481 * 482 * Diff a single file. 483 */ 484 int 485 cvs_diff_file(struct cvs_file *cfp, void *arg) 486 { 487 int l; 488 char *dir, *repo, buf[64]; 489 char fpath[MAXPATHLEN], dfpath[MAXPATHLEN], rcspath[MAXPATHLEN]; 490 char path_tmp1[MAXPATHLEN], path_tmp2[MAXPATHLEN]; 491 BUF *b1, *b2; 492 RCSNUM *r1, *r2; 493 RCSFILE *rf; 494 struct cvsroot *root; 495 496 if (cfp->cf_type == DT_DIR) { 497 if (cfp->cf_cvstat == CVS_FST_UNKNOWN) { 498 root = cfp->cf_parent->cf_root; 499 cvs_sendreq(root, CVS_REQ_QUESTIONABLE, 500 CVS_FILE_NAME(cfp)); 501 } else { 502 root = cfp->cf_root; 503 #if 0 504 if ((cfp->cf_parent == NULL) || 505 (root != cfp->cf_parent->cf_root)) { 506 cvs_connect(root); 507 cvs_diff_sendflags(root); 508 } 509 #endif 510 511 cvs_senddir(root, cfp); 512 } 513 514 return (0); 515 } 516 517 if (cfp->cf_cvstat == CVS_FST_LOST) { 518 cvs_log(LP_WARN, "cannot find file %s", CVS_FILE_NAME(cfp)); 519 return (0); 520 } 521 522 rf = NULL; 523 diff_file = cvs_file_getpath(cfp, fpath, sizeof(fpath)); 524 525 if (cfp->cf_parent != NULL) { 526 dir = cvs_file_getpath(cfp->cf_parent, dfpath, sizeof(dfpath)); 527 root = cfp->cf_parent->cf_root; 528 repo = cfp->cf_parent->cf_repo; 529 } else { 530 dir = "."; 531 root = NULL; 532 repo = NULL; 533 } 534 535 if (cfp->cf_cvstat == CVS_FST_UNKNOWN) { 536 if (root->cr_method == CVS_METHOD_LOCAL) 537 cvs_log(LP_WARN, "I know nothing about %s", diff_file); 538 else 539 cvs_sendreq(root, CVS_REQ_QUESTIONABLE, 540 CVS_FILE_NAME(cfp)); 541 return (0); 542 } 543 544 if (root->cr_method != CVS_METHOD_LOCAL) { 545 if (cvs_sendentry(root, cfp) < 0) { 546 return (CVS_EX_PROTO); 547 } 548 } 549 550 if (cfp->cf_cvstat == CVS_FST_UPTODATE) { 551 if (root->cr_method != CVS_METHOD_LOCAL) 552 cvs_sendreq(root, CVS_REQ_UNCHANGED, 553 CVS_FILE_NAME(cfp)); 554 return (0); 555 } 556 557 /* at this point, the file is modified */ 558 if (root->cr_method != CVS_METHOD_LOCAL) { 559 cvs_sendreq(root, CVS_REQ_MODIFIED, CVS_FILE_NAME(cfp)); 560 cvs_sendfile(root, diff_file); 561 } else { 562 l = snprintf(rcspath, sizeof(rcspath), "%s/%s/%s%s", 563 root->cr_dir, repo, diff_file, RCS_FILE_EXT); 564 if (l == -1 || l >= (int)sizeof(rcspath)) { 565 errno = ENAMETOOLONG; 566 cvs_log(LP_ERRNO, "%s", rcspath); 567 return (-1); 568 } 569 570 rf = rcs_open(rcspath, RCS_READ); 571 if (rf == NULL) { 572 return (CVS_EX_DATA); 573 } 574 575 cvs_printf("Index: %s\n%s\nRCS file: %s\n", diff_file, 576 RCS_DIFF_DIV, rcspath); 577 578 if (dap->rev1 == NULL) 579 r1 = cfp->cf_lrev; 580 else { 581 if ((r1 = rcsnum_parse(dap->rev1)) == NULL) { 582 return (CVS_EX_DATA); 583 } 584 } 585 586 cvs_printf("retrieving revision %s\n", 587 rcsnum_tostr(r1, buf, sizeof(buf))); 588 b1 = rcs_getrev(rf, r1); 589 590 if (r1 != cfp->cf_lrev) 591 rcsnum_free(r1); 592 593 if (dap->rev2 != NULL) { 594 cvs_printf("retrieving revision %s\n", dap->rev2); 595 if ((r2 = rcsnum_parse(dap->rev2)) == NULL) { 596 return (CVS_EX_DATA); 597 } 598 b2 = rcs_getrev(rf, r2); 599 rcsnum_free(r2); 600 } else { 601 b2 = cvs_buf_load(diff_file, BUF_AUTOEXT); 602 } 603 604 rcs_close(rf); 605 606 printf("%s", diffargs); 607 printf(" -r%s", buf); 608 if (dap->rev2 != NULL) 609 printf(" -r%s", dap->rev2); 610 printf(" %s\n", diff_file); 611 strlcpy(path_tmp1, "/tmp/diff1.XXXXXXXXXX", sizeof(path_tmp1)); 612 if (cvs_buf_write_stmp(b1, path_tmp1, 0600) == -1) { 613 cvs_buf_free(b1); 614 cvs_buf_free(b2); 615 return (CVS_EX_DATA); 616 } 617 cvs_buf_free(b1); 618 619 strlcpy(path_tmp2, "/tmp/diff2.XXXXXXXXXX", sizeof(path_tmp2)); 620 if (cvs_buf_write_stmp(b2, path_tmp2, 0600) == -1) { 621 cvs_buf_free(b2); 622 (void)unlink(path_tmp1); 623 return (CVS_EX_DATA); 624 } 625 cvs_buf_free(b2); 626 627 cvs_diffreg(path_tmp1, path_tmp2); 628 (void)unlink(path_tmp1); 629 (void)unlink(path_tmp2); 630 } 631 632 return (0); 633 } 634 635 636 int 637 cvs_diffreg(const char *file1, const char *file2) 638 { 639 FILE *f1, *f2; 640 int i, rval; 641 void *tmp; 642 643 f1 = f2 = NULL; 644 rval = D_SAME; 645 anychange = 0; 646 lastline = 0; 647 lastmatchline = 0; 648 context_vec_ptr = context_vec_start - 1; 649 chrtran = (iflag ? cup2low : clow2low); 650 651 f1 = fopen(file1, "r"); 652 if (f1 == NULL) { 653 cvs_log(LP_ERRNO, "%s", file1); 654 status |= 2; 655 goto closem; 656 } 657 658 f2 = fopen(file2, "r"); 659 if (f2 == NULL) { 660 cvs_log(LP_ERRNO, "%s", file2); 661 status |= 2; 662 goto closem; 663 } 664 665 switch (files_differ(f1, f2)) { 666 case 0: 667 goto closem; 668 case 1: 669 break; 670 default: 671 /* error */ 672 status |= 2; 673 goto closem; 674 } 675 676 if (!asciifile(f1) || !asciifile(f2)) { 677 rval = D_BINARY; 678 status |= 1; 679 goto closem; 680 } 681 if ((prepare(0, f1, stb1.st_size) < 0) || 682 (prepare(1, f2, stb2.st_size) < 0)) { 683 status |= 2; 684 goto closem; 685 } 686 prune(); 687 sort(sfile[0], slen[0]); 688 sort(sfile[1], slen[1]); 689 690 member = (int *)file[1]; 691 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 692 if ((tmp = realloc(member, (slen[1] + 2) * sizeof(int))) == NULL) { 693 status |= 2; 694 goto closem; 695 } 696 member = (int *)tmp; 697 698 class = (int *)file[0]; 699 unsort(sfile[0], slen[0], class); 700 if ((tmp = realloc(class, (slen[0] + 2) * sizeof(int))) == NULL) { 701 status |= 2; 702 goto closem; 703 } 704 class = (int *)tmp; 705 706 if ((klist = malloc((slen[0] + 2) * sizeof(int))) == NULL) { 707 cvs_log(LP_ERRNO, "failed to allocate klist"); 708 status |= 2; 709 goto closem; 710 } 711 clen = 0; 712 clistlen = 100; 713 if ((clist = malloc(clistlen * sizeof(cand))) == NULL) { 714 cvs_log(LP_ERRNO, "failed to allocate clist"); 715 status |= 2; 716 goto closem; 717 } 718 i = stone(class, slen[0], member, klist); 719 free(member); 720 free(class); 721 722 J = realloc(J, (len[0] + 2) * sizeof(int)); 723 unravel(klist[i]); 724 free(clist); 725 free(klist); 726 727 ixold = realloc(ixold, (len[0] + 2) * sizeof(long)); 728 ixnew = realloc(ixnew, (len[1] + 2) * sizeof(long)); 729 check(f1, f2); 730 output(file1, f1, file2, f2); 731 732 closem: 733 if (anychange) { 734 status |= 1; 735 if (rval == D_SAME) 736 rval = D_DIFFER; 737 } 738 if (f1 != NULL) 739 fclose(f1); 740 if (f2 != NULL) 741 fclose(f2); 742 743 return (rval); 744 } 745 746 /* 747 * Check to see if the given files differ. 748 * Returns 0 if they are the same, 1 if different, and -1 on error. 749 * XXX - could use code from cmp(1) [faster] 750 */ 751 static int 752 files_differ(FILE *f1, FILE *f2) 753 { 754 char buf1[BUFSIZ], buf2[BUFSIZ]; 755 size_t i, j; 756 757 if (stb1.st_size != stb2.st_size) 758 return (1); 759 for (;;) { 760 i = fread(buf1, 1, sizeof(buf1), f1); 761 j = fread(buf2, 1, sizeof(buf2), f2); 762 if (i != j) 763 return (1); 764 if (i == 0 && j == 0) { 765 if (ferror(f1) || ferror(f2)) 766 return (1); 767 return (0); 768 } 769 if (memcmp(buf1, buf2, i) != 0) 770 return (1); 771 } 772 } 773 774 775 char * 776 splice(char *dir, char *filename) 777 { 778 char *tail, *buf; 779 780 if ((tail = strrchr(filename, '/')) == NULL) 781 tail = filename; 782 else 783 tail++; 784 asprintf(&buf, "%s/%s", dir, tail); 785 return (buf); 786 } 787 788 static int 789 prepare(int i, FILE *fd, off_t filesize) 790 { 791 void *tmp; 792 struct line *p; 793 int j, h; 794 size_t sz; 795 796 rewind(fd); 797 798 sz = ((size_t)filesize <= SIZE_MAX ? (size_t)filesize : SIZE_MAX) / 25; 799 if (sz < 100) 800 sz = 100; 801 802 p = (struct line *)malloc((sz + 3) * sizeof(struct line)); 803 if (p == NULL) { 804 cvs_log(LP_ERRNO, "failed to prepare line array"); 805 return (-1); 806 } 807 for (j = 0; (h = readhash(fd));) { 808 if (j == (int)sz) { 809 sz = sz * 3 / 2; 810 tmp = realloc(p, (sz + 3) * sizeof(struct line)); 811 if (tmp == NULL) { 812 cvs_log(LP_ERRNO, "failed to grow line array"); 813 free(p); 814 return (-1); 815 } 816 p = (struct line *)tmp; 817 } 818 p[++j].value = h; 819 } 820 len[i] = j; 821 file[i] = p; 822 823 return (0); 824 } 825 826 static void 827 prune(void) 828 { 829 int i, j; 830 831 for (pref = 0; pref < len[0] && pref < len[1] && 832 file[0][pref + 1].value == file[1][pref + 1].value; 833 pref++) 834 ; 835 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 836 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 837 suff++) 838 ; 839 for (j = 0; j < 2; j++) { 840 sfile[j] = file[j] + pref; 841 slen[j] = len[j] - pref - suff; 842 for (i = 0; i <= slen[j]; i++) 843 sfile[j][i].serial = i; 844 } 845 } 846 847 static void 848 equiv(struct line *a, int n, struct line *b, int m, int *c) 849 { 850 int i, j; 851 852 i = j = 1; 853 while (i <= n && j <= m) { 854 if (a[i].value < b[j].value) 855 a[i++].value = 0; 856 else if (a[i].value == b[j].value) 857 a[i++].value = j; 858 else 859 j++; 860 } 861 while (i <= n) 862 a[i++].value = 0; 863 b[m + 1].value = 0; 864 j = 0; 865 while (++j <= m) { 866 c[j] = -b[j].serial; 867 while (b[j + 1].value == b[j].value) { 868 j++; 869 c[j] = b[j].serial; 870 } 871 } 872 c[j] = -1; 873 } 874 875 /* Code taken from ping.c */ 876 static int 877 isqrt(int n) 878 { 879 int y, x = 1; 880 881 if (n == 0) 882 return (0); 883 884 do { /* newton was a stinker */ 885 y = x; 886 x = n / x; 887 x += y; 888 x /= 2; 889 } while ((x - y) > 1 || (x - y) < -1); 890 891 return (x); 892 } 893 894 static int 895 stone(int *a, int n, int *b, int *c) 896 { 897 int i, k, y, j, l; 898 int oldc, tc, oldl; 899 u_int numtries; 900 901 /* XXX move the isqrt() out of the macro to avoid multiple calls */ 902 const u_int bound = dflag ? UINT_MAX : MAX(256, (u_int)isqrt(n)); 903 904 k = 0; 905 c[0] = newcand(0, 0, 0); 906 for (i = 1; i <= n; i++) { 907 j = a[i]; 908 if (j == 0) 909 continue; 910 y = -b[j]; 911 oldl = 0; 912 oldc = c[0]; 913 numtries = 0; 914 do { 915 if (y <= clist[oldc].y) 916 continue; 917 l = search(c, k, y); 918 if (l != oldl + 1) 919 oldc = c[l - 1]; 920 if (l <= k) { 921 if (clist[c[l]].y <= y) 922 continue; 923 tc = c[l]; 924 c[l] = newcand(i, y, oldc); 925 oldc = tc; 926 oldl = l; 927 numtries++; 928 } else { 929 c[l] = newcand(i, y, oldc); 930 k++; 931 break; 932 } 933 } while ((y = b[++j]) > 0 && numtries < bound); 934 } 935 return (k); 936 } 937 938 static int 939 newcand(int x, int y, int pred) 940 { 941 struct cand *q; 942 943 if (clen == clistlen) { 944 clistlen = clistlen * 11 / 10; 945 clist = realloc(clist, clistlen * sizeof(cand)); 946 if (clist == NULL) { 947 cvs_log(LP_ERRNO, "failed to resize clist"); 948 return (-1); 949 } 950 } 951 q = clist + clen; 952 q->x = x; 953 q->y = y; 954 q->pred = pred; 955 return (clen++); 956 } 957 958 static int 959 search(int *c, int k, int y) 960 { 961 int i, j, l, t; 962 963 if (clist[c[k]].y < y) /* quick look for typical case */ 964 return (k + 1); 965 i = 0; 966 j = k + 1; 967 while (1) { 968 l = i + j; 969 if ((l >>= 1) <= i) 970 break; 971 t = clist[c[l]].y; 972 if (t > y) 973 j = l; 974 else if (t < y) 975 i = l; 976 else 977 return (l); 978 } 979 return (l + 1); 980 } 981 982 static void 983 unravel(int p) 984 { 985 struct cand *q; 986 int i; 987 988 for (i = 0; i <= len[0]; i++) 989 J[i] = i <= pref ? i : 990 i > len[0] - suff ? i + len[1] - len[0] : 0; 991 for (q = clist + p; q->y != 0; q = clist + q->pred) 992 J[q->x + pref] = q->y + pref; 993 } 994 995 /* 996 * Check does double duty: 997 * 1. ferret out any fortuitous correspondences due 998 * to confounding by hashing (which result in "jackpot") 999 * 2. collect random access indexes to the two files 1000 */ 1001 static void 1002 check(FILE *f1, FILE *f2) 1003 { 1004 int i, j, jackpot, c, d; 1005 long ctold, ctnew; 1006 1007 rewind(f1); 1008 rewind(f2); 1009 j = 1; 1010 ixold[0] = ixnew[0] = 0; 1011 jackpot = 0; 1012 ctold = ctnew = 0; 1013 for (i = 1; i <= len[0]; i++) { 1014 if (J[i] == 0) { 1015 ixold[i] = ctold += skipline(f1); 1016 continue; 1017 } 1018 while (j < J[i]) { 1019 ixnew[j] = ctnew += skipline(f2); 1020 j++; 1021 } 1022 if (bflag || wflag || iflag) { 1023 for (;;) { 1024 c = getc(f1); 1025 d = getc(f2); 1026 /* 1027 * GNU diff ignores a missing newline 1028 * in one file if bflag || wflag. 1029 */ 1030 if ((bflag || wflag) && 1031 ((c == EOF && d == '\n') || 1032 (c == '\n' && d == EOF))) { 1033 break; 1034 } 1035 ctold++; 1036 ctnew++; 1037 if (bflag && isspace(c) && isspace(d)) { 1038 do { 1039 if (c == '\n') 1040 break; 1041 ctold++; 1042 } while (isspace(c = getc(f1))); 1043 do { 1044 if (d == '\n') 1045 break; 1046 ctnew++; 1047 } while (isspace(d = getc(f2))); 1048 } else if (wflag) { 1049 while (isspace(c) && c != '\n') { 1050 c = getc(f1); 1051 ctold++; 1052 } 1053 while (isspace(d) && d != '\n') { 1054 d = getc(f2); 1055 ctnew++; 1056 } 1057 } 1058 if (chrtran[c] != chrtran[d]) { 1059 jackpot++; 1060 J[i] = 0; 1061 if (c != '\n' && c != EOF) 1062 ctold += skipline(f1); 1063 if (d != '\n' && c != EOF) 1064 ctnew += skipline(f2); 1065 break; 1066 } 1067 if (c == '\n' || c == EOF) 1068 break; 1069 } 1070 } else { 1071 for (;;) { 1072 ctold++; 1073 ctnew++; 1074 if ((c = getc(f1)) != (d = getc(f2))) { 1075 /* jackpot++; */ 1076 J[i] = 0; 1077 if (c != '\n' && c != EOF) 1078 ctold += skipline(f1); 1079 if (d != '\n' && c != EOF) 1080 ctnew += skipline(f2); 1081 break; 1082 } 1083 if (c == '\n' || c == EOF) 1084 break; 1085 } 1086 } 1087 ixold[i] = ctold; 1088 ixnew[j] = ctnew; 1089 j++; 1090 } 1091 for (; j <= len[1]; j++) 1092 ixnew[j] = ctnew += skipline(f2); 1093 /* 1094 * if (jackpot) 1095 * fprintf(stderr, "jackpot\n"); 1096 */ 1097 } 1098 1099 /* shellsort CACM #201 */ 1100 static void 1101 sort(struct line *a, int n) 1102 { 1103 struct line *ai, *aim, w; 1104 int j, m = 0, k; 1105 1106 if (n == 0) 1107 return; 1108 for (j = 1; j <= n; j *= 2) 1109 m = 2 * j - 1; 1110 for (m /= 2; m != 0; m /= 2) { 1111 k = n - m; 1112 for (j = 1; j <= k; j++) { 1113 for (ai = &a[j]; ai > a; ai -= m) { 1114 aim = &ai[m]; 1115 if (aim < ai) 1116 break; /* wraparound */ 1117 if (aim->value > ai[0].value || 1118 (aim->value == ai[0].value && 1119 aim->serial > ai[0].serial)) 1120 break; 1121 w.value = ai[0].value; 1122 ai[0].value = aim->value; 1123 aim->value = w.value; 1124 w.serial = ai[0].serial; 1125 ai[0].serial = aim->serial; 1126 aim->serial = w.serial; 1127 } 1128 } 1129 } 1130 } 1131 1132 static void 1133 unsort(struct line *f, int l, int *b) 1134 { 1135 int *a, i; 1136 1137 if ((a = (int *)malloc((l + 1) * sizeof(int))) == NULL) { 1138 cvs_log(LP_ERRNO, "failed to allocate sort array"); 1139 return; 1140 } 1141 for (i = 1; i <= l; i++) 1142 a[f[i].serial] = f[i].value; 1143 for (i = 1; i <= l; i++) 1144 b[i] = a[i]; 1145 free(a); 1146 } 1147 1148 static int 1149 skipline(FILE *f) 1150 { 1151 int i, c; 1152 1153 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++) 1154 continue; 1155 return (i); 1156 } 1157 1158 static void 1159 output(const char *file1, FILE *f1, const char *file2, FILE *f2) 1160 { 1161 int m, i0, i1, j0, j1; 1162 1163 rewind(f1); 1164 rewind(f2); 1165 m = len[0]; 1166 J[0] = 0; 1167 J[m + 1] = len[1] + 1; 1168 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 1169 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 1170 i0++; 1171 j0 = J[i0 - 1] + 1; 1172 i1 = i0 - 1; 1173 while (i1 < m && J[i1 + 1] == 0) 1174 i1++; 1175 j1 = J[i1 + 1] - 1; 1176 J[i1] = j1; 1177 change(file1, f1, file2, f2, i0, i1, j0, j1); 1178 } 1179 if (m == 0) 1180 change(file1, f1, file2, f2, 1, 0, 1, len[1]); 1181 if (format == D_IFDEF) { 1182 for (;;) { 1183 #define c i0 1184 if ((c = getc(f1)) == EOF) 1185 return; 1186 putchar(c); 1187 } 1188 #undef c 1189 } 1190 if (anychange != 0) { 1191 if (format == D_CONTEXT) 1192 dump_context_vec(f1, f2); 1193 else if (format == D_UNIFIED) 1194 dump_unified_vec(f1, f2); 1195 } 1196 } 1197 1198 static __inline void 1199 range(int a, int b, char *separator) 1200 { 1201 printf("%d", a > b ? b : a); 1202 if (a < b) 1203 printf("%s%d", separator, b); 1204 } 1205 1206 static __inline void 1207 uni_range(int a, int b) 1208 { 1209 if (a < b) 1210 printf("%d,%d", a, b - a + 1); 1211 else if (a == b) 1212 printf("%d", b); 1213 else 1214 printf("%d,0", b); 1215 } 1216 1217 static char * 1218 preadline(int fd, size_t rlen, off_t off) 1219 { 1220 char *line; 1221 ssize_t nr; 1222 1223 line = malloc(rlen + 1); 1224 if (line == NULL) { 1225 cvs_log(LP_ERRNO, "failed to allocate line"); 1226 return (NULL); 1227 } 1228 if ((nr = pread(fd, line, rlen, off)) < 0) { 1229 cvs_log(LP_ERRNO, "preadline failed"); 1230 return (NULL); 1231 } 1232 line[nr] = '\0'; 1233 return (line); 1234 } 1235 1236 static int 1237 ignoreline(char *line) 1238 { 1239 int ret; 1240 1241 ret = regexec(&ignore_re, line, 0, NULL, 0); 1242 free(line); 1243 return (ret == 0); /* if it matched, it should be ignored. */ 1244 } 1245 1246 /* 1247 * Indicate that there is a difference between lines a and b of the from file 1248 * to get to lines c to d of the to file. If a is greater then b then there 1249 * are no lines in the from file involved and this means that there were 1250 * lines appended (beginning at b). If c is greater than d then there are 1251 * lines missing from the to file. 1252 */ 1253 static void 1254 change(const char *file1, FILE *f1, const char *file2, FILE *f2, 1255 int a, int b, int c, int d) 1256 { 1257 static size_t max_context = 64; 1258 int i; 1259 1260 if (format != D_IFDEF && a > b && c > d) 1261 return; 1262 if (ignore_pats != NULL) { 1263 char *line; 1264 /* 1265 * All lines in the change, insert, or delete must 1266 * match an ignore pattern for the change to be 1267 * ignored. 1268 */ 1269 if (a <= b) { /* Changes and deletes. */ 1270 for (i = a; i <= b; i++) { 1271 line = preadline(fileno(f1), 1272 ixold[i] - ixold[i - 1], ixold[i - 1]); 1273 if (!ignoreline(line)) 1274 goto proceed; 1275 } 1276 } 1277 if (a > b || c <= d) { /* Changes and inserts. */ 1278 for (i = c; i <= d; i++) { 1279 line = preadline(fileno(f2), 1280 ixnew[i] - ixnew[i - 1], ixnew[i - 1]); 1281 if (!ignoreline(line)) 1282 goto proceed; 1283 } 1284 } 1285 return; 1286 } 1287 proceed: 1288 if (format == D_CONTEXT || format == D_UNIFIED) { 1289 /* 1290 * Allocate change records as needed. 1291 */ 1292 if (context_vec_ptr == context_vec_end - 1) { 1293 ptrdiff_t offset = context_vec_ptr - context_vec_start; 1294 max_context <<= 1; 1295 context_vec_start = realloc(context_vec_start, 1296 max_context * sizeof(struct context_vec)); 1297 context_vec_end = context_vec_start + max_context; 1298 context_vec_ptr = context_vec_start + offset; 1299 } 1300 if (anychange == 0) { 1301 /* 1302 * Print the context/unidiff header first time through. 1303 */ 1304 printf("%s %s %s", 1305 format == D_CONTEXT ? "***" : "---", diff_file, 1306 ctime(&stb1.st_mtime)); 1307 printf("%s %s %s", 1308 format == D_CONTEXT ? "---" : "+++", diff_file, 1309 ctime(&stb2.st_mtime)); 1310 anychange = 1; 1311 } else if (a > context_vec_ptr->b + (2 * context) + 1 && 1312 c > context_vec_ptr->d + (2 * context) + 1) { 1313 /* 1314 * If this change is more than 'context' lines from the 1315 * previous change, dump the record and reset it. 1316 */ 1317 if (format == D_CONTEXT) 1318 dump_context_vec(f1, f2); 1319 else 1320 dump_unified_vec(f1, f2); 1321 } 1322 context_vec_ptr++; 1323 context_vec_ptr->a = a; 1324 context_vec_ptr->b = b; 1325 context_vec_ptr->c = c; 1326 context_vec_ptr->d = d; 1327 return; 1328 } 1329 if (anychange == 0) 1330 anychange = 1; 1331 switch (format) { 1332 case D_BRIEF: 1333 return; 1334 case D_NORMAL: 1335 range(a, b, ","); 1336 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 1337 if (format == D_NORMAL) 1338 range(c, d, ","); 1339 putchar('\n'); 1340 break; 1341 case D_RCSDIFF: 1342 if (a > b) 1343 printf("a%d %d\n", b, d - c + 1); 1344 else { 1345 printf("d%d %d\n", a, b - a + 1); 1346 1347 if (!(c > d)) /* add changed lines */ 1348 printf("a%d %d\n", b, d - c + 1); 1349 } 1350 break; 1351 } 1352 if (format == D_NORMAL || format == D_IFDEF) { 1353 fetch(ixold, a, b, f1, '<', 1); 1354 if (a <= b && c <= d && format == D_NORMAL) 1355 puts("---"); 1356 } 1357 i = fetch(ixnew, c, d, f2, format == D_NORMAL ? '>' : '\0', 0); 1358 if (inifdef) { 1359 printf("#endif /* %s */\n", ifdefname); 1360 inifdef = 0; 1361 } 1362 } 1363 1364 static int 1365 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile) 1366 { 1367 int i, j, c, lastc, col, nc; 1368 1369 /* 1370 * When doing #ifdef's, copy down to current line 1371 * if this is the first file, so that stuff makes it to output. 1372 */ 1373 if (format == D_IFDEF && oldfile) { 1374 long curpos = ftell(lb); 1375 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 1376 nc = f[a > b ? b : a - 1] - curpos; 1377 for (i = 0; i < nc; i++) 1378 putchar(getc(lb)); 1379 } 1380 if (a > b) 1381 return (0); 1382 if (format == D_IFDEF) { 1383 if (inifdef) { 1384 printf("#else /* %s%s */\n", 1385 oldfile == 1 ? "!" : "", ifdefname); 1386 } else { 1387 if (oldfile) 1388 printf("#ifndef %s\n", ifdefname); 1389 else 1390 printf("#ifdef %s\n", ifdefname); 1391 } 1392 inifdef = 1 + oldfile; 1393 } 1394 for (i = a; i <= b; i++) { 1395 fseek(lb, f[i - 1], SEEK_SET); 1396 nc = f[i] - f[i - 1]; 1397 if (format != D_IFDEF && ch != '\0') { 1398 putchar(ch); 1399 if (Tflag && (format == D_NORMAL || format == D_CONTEXT 1400 || format == D_UNIFIED)) 1401 putchar('\t'); 1402 else if (format != D_UNIFIED) 1403 putchar(' '); 1404 } 1405 col = 0; 1406 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) { 1407 if ((c = getc(lb)) == EOF) { 1408 if (format == D_RCSDIFF) 1409 warnx("No newline at end of file"); 1410 else 1411 puts("\n\\ No newline at end of file"); 1412 return (0); 1413 } 1414 if (c == '\t' && tflag) { 1415 do { 1416 putchar(' '); 1417 } while (++col & 7); 1418 } else { 1419 putchar(c); 1420 col++; 1421 } 1422 } 1423 } 1424 return (0); 1425 } 1426 1427 /* 1428 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. 1429 */ 1430 static int 1431 readhash(FILE *f) 1432 { 1433 int i, t, space; 1434 int sum; 1435 1436 sum = 1; 1437 space = 0; 1438 if (!bflag && !wflag) { 1439 if (iflag) 1440 for (i = 0; (t = getc(f)) != '\n'; i++) { 1441 if (t == EOF) { 1442 if (i == 0) 1443 return (0); 1444 break; 1445 } 1446 sum = sum * 127 + chrtran[t]; 1447 } 1448 else 1449 for (i = 0; (t = getc(f)) != '\n'; i++) { 1450 if (t == EOF) { 1451 if (i == 0) 1452 return (0); 1453 break; 1454 } 1455 sum = sum * 127 + t; 1456 } 1457 } else { 1458 for (i = 0;;) { 1459 switch (t = getc(f)) { 1460 case '\t': 1461 case ' ': 1462 space++; 1463 continue; 1464 default: 1465 if (space && !wflag) { 1466 i++; 1467 space = 0; 1468 } 1469 sum = sum * 127 + chrtran[t]; 1470 i++; 1471 continue; 1472 case EOF: 1473 if (i == 0) 1474 return (0); 1475 /* FALLTHROUGH */ 1476 case '\n': 1477 break; 1478 } 1479 break; 1480 } 1481 } 1482 /* 1483 * There is a remote possibility that we end up with a zero sum. 1484 * Zero is used as an EOF marker, so return 1 instead. 1485 */ 1486 return (sum == 0 ? 1 : sum); 1487 } 1488 1489 static int 1490 asciifile(FILE *f) 1491 { 1492 char buf[BUFSIZ]; 1493 int i, cnt; 1494 1495 if (aflag || f == NULL) 1496 return (1); 1497 1498 rewind(f); 1499 cnt = fread(buf, 1, sizeof(buf), f); 1500 for (i = 0; i < cnt; i++) 1501 if (!isprint(buf[i]) && !isspace(buf[i])) 1502 return (0); 1503 return (1); 1504 } 1505 1506 static char* 1507 match_function(const long *f, int pos, FILE *fp) 1508 { 1509 unsigned char buf[FUNCTION_CONTEXT_SIZE]; 1510 size_t nc; 1511 int last = lastline; 1512 char *p; 1513 1514 lastline = pos; 1515 while (pos > last) { 1516 fseek(fp, f[pos - 1], SEEK_SET); 1517 nc = f[pos] - f[pos - 1]; 1518 if (nc >= sizeof(buf)) 1519 nc = sizeof(buf) - 1; 1520 nc = fread(buf, 1, nc, fp); 1521 if (nc > 0) { 1522 buf[nc] = '\0'; 1523 p = strchr(buf, '\n'); 1524 if (p != NULL) 1525 *p = '\0'; 1526 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') { 1527 strlcpy(lastbuf, buf, sizeof lastbuf); 1528 lastmatchline = pos; 1529 return lastbuf; 1530 } 1531 } 1532 pos--; 1533 } 1534 return (lastmatchline > 0) ? lastbuf : NULL; 1535 } 1536 1537 1538 /* dump accumulated "context" diff changes */ 1539 static void 1540 dump_context_vec(FILE *f1, FILE *f2) 1541 { 1542 struct context_vec *cvp = context_vec_start; 1543 int lowa, upb, lowc, upd, do_output; 1544 int a, b, c, d; 1545 char ch, *f; 1546 1547 if (context_vec_start > context_vec_ptr) 1548 return; 1549 1550 b = d = 0; /* gcc */ 1551 lowa = MAX(1, cvp->a - context); 1552 upb = MIN(len[0], context_vec_ptr->b + context); 1553 lowc = MAX(1, cvp->c - context); 1554 upd = MIN(len[1], context_vec_ptr->d + context); 1555 1556 printf("***************"); 1557 if (pflag) { 1558 f = match_function(ixold, lowa - 1, f1); 1559 if (f != NULL) { 1560 putchar(' '); 1561 fputs(f, stdout); 1562 } 1563 } 1564 printf("\n*** "); 1565 range(lowa, upb, ","); 1566 printf(" ****\n"); 1567 1568 /* 1569 * Output changes to the "old" file. The first loop suppresses 1570 * output if there were no changes to the "old" file (we'll see 1571 * the "old" lines as context in the "new" list). 1572 */ 1573 do_output = 0; 1574 for (; cvp <= context_vec_ptr; cvp++) 1575 if (cvp->a <= cvp->b) { 1576 cvp = context_vec_start; 1577 do_output++; 1578 break; 1579 } 1580 if (do_output) { 1581 while (cvp <= context_vec_ptr) { 1582 a = cvp->a; 1583 b = cvp->b; 1584 c = cvp->c; 1585 d = cvp->d; 1586 1587 if (a <= b && c <= d) 1588 ch = 'c'; 1589 else 1590 ch = (a <= b) ? 'd' : 'a'; 1591 1592 if (ch == 'a') 1593 fetch(ixold, lowa, b, f1, ' ', 0); 1594 else { 1595 fetch(ixold, lowa, a - 1, f1, ' ', 0); 1596 fetch(ixold, a, b, f1, 1597 ch == 'c' ? '!' : '-', 0); 1598 } 1599 lowa = b + 1; 1600 cvp++; 1601 } 1602 fetch(ixold, b + 1, upb, f1, ' ', 0); 1603 } 1604 /* output changes to the "new" file */ 1605 printf("--- "); 1606 range(lowc, upd, ","); 1607 printf(" ----\n"); 1608 1609 do_output = 0; 1610 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1611 if (cvp->c <= cvp->d) { 1612 cvp = context_vec_start; 1613 do_output++; 1614 break; 1615 } 1616 if (do_output) { 1617 while (cvp <= context_vec_ptr) { 1618 a = cvp->a; 1619 b = cvp->b; 1620 c = cvp->c; 1621 d = cvp->d; 1622 1623 if (a <= b && c <= d) 1624 ch = 'c'; 1625 else 1626 ch = (a <= b) ? 'd' : 'a'; 1627 1628 if (ch == 'd') 1629 fetch(ixnew, lowc, d, f2, ' ', 0); 1630 else { 1631 fetch(ixnew, lowc, c - 1, f2, ' ', 0); 1632 fetch(ixnew, c, d, f2, 1633 ch == 'c' ? '!' : '+', 0); 1634 } 1635 lowc = d + 1; 1636 cvp++; 1637 } 1638 fetch(ixnew, d + 1, upd, f2, ' ', 0); 1639 } 1640 context_vec_ptr = context_vec_start - 1; 1641 } 1642 1643 /* dump accumulated "unified" diff changes */ 1644 static void 1645 dump_unified_vec(FILE *f1, FILE *f2) 1646 { 1647 struct context_vec *cvp = context_vec_start; 1648 int lowa, upb, lowc, upd; 1649 int a, b, c, d; 1650 char ch, *f; 1651 1652 if (context_vec_start > context_vec_ptr) 1653 return; 1654 1655 b = d = 0; /* gcc */ 1656 lowa = MAX(1, cvp->a - context); 1657 upb = MIN(len[0], context_vec_ptr->b + context); 1658 lowc = MAX(1, cvp->c - context); 1659 upd = MIN(len[1], context_vec_ptr->d + context); 1660 1661 fputs("@@ -", stdout); 1662 uni_range(lowa, upb); 1663 fputs(" +", stdout); 1664 uni_range(lowc, upd); 1665 fputs(" @@", stdout); 1666 if (pflag) { 1667 f = match_function(ixold, lowa - 1, f1); 1668 if (f != NULL) { 1669 putchar(' '); 1670 fputs(f, stdout); 1671 } 1672 } 1673 putchar('\n'); 1674 1675 /* 1676 * Output changes in "unified" diff format--the old and new lines 1677 * are printed together. 1678 */ 1679 for (; cvp <= context_vec_ptr; cvp++) { 1680 a = cvp->a; 1681 b = cvp->b; 1682 c = cvp->c; 1683 d = cvp->d; 1684 1685 /* 1686 * c: both new and old changes 1687 * d: only changes in the old file 1688 * a: only changes in the new file 1689 */ 1690 if (a <= b && c <= d) 1691 ch = 'c'; 1692 else 1693 ch = (a <= b) ? 'd' : 'a'; 1694 1695 switch (ch) { 1696 case 'c': 1697 fetch(ixold, lowa, a - 1, f1, ' ', 0); 1698 fetch(ixold, a, b, f1, '-', 0); 1699 fetch(ixnew, c, d, f2, '+', 0); 1700 break; 1701 case 'd': 1702 fetch(ixold, lowa, a - 1, f1, ' ', 0); 1703 fetch(ixold, a, b, f1, '-', 0); 1704 break; 1705 case 'a': 1706 fetch(ixnew, lowc, c - 1, f2, ' ', 0); 1707 fetch(ixnew, c, d, f2, '+', 0); 1708 break; 1709 } 1710 lowa = b + 1; 1711 lowc = d + 1; 1712 } 1713 fetch(ixnew, d + 1, upd, f2, ' ', 0); 1714 1715 context_vec_ptr = context_vec_start - 1; 1716 } 1717