1 /* $OpenBSD: diff.c,v 1.54 2005/07/27 16:42:19 xsa Exp $ */ 2 /* 3 * Copyright (C) Caldera International Inc. 2001-2002. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code and documentation must retain the above 10 * copyright notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. All advertising materials mentioning features or use of this software 15 * must display the following acknowledgement: 16 * This product includes software developed or owned by Caldera 17 * International, Inc. 18 * 4. Neither the name of Caldera International, Inc. nor the names of other 19 * contributors may be used to endorse or promote products derived from 20 * this software without specific prior written permission. 21 * 22 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 23 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 24 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 25 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 26 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 27 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 28 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 29 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 31 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 32 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * POSSIBILITY OF SUCH DAMAGE. 34 */ 35 /*- 36 * Copyright (c) 1991, 1993 37 * The Regents of the University of California. All rights reserved. 38 * Copyright (c) 2004 Jean-Francois Brousseau. All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93 65 */ 66 /* 67 * Uses an algorithm due to Harold Stone, which finds 68 * a pair of longest identical subsequences in the two 69 * files. 70 * 71 * The major goal is to generate the match vector J. 72 * J[i] is the index of the line in file1 corresponding 73 * to line i file0. J[i] = 0 if there is no 74 * such line in file1. 75 * 76 * Lines are hashed so as to work in core. All potential 77 * matches are located by sorting the lines of each file 78 * on the hash (called ``value''). In particular, this 79 * collects the equivalence classes in file1 together. 80 * Subroutine equiv replaces the value of each line in 81 * file0 by the index of the first element of its 82 * matching equivalence in (the reordered) file1. 83 * To save space equiv squeezes file1 into a single 84 * array member in which the equivalence classes 85 * are simply concatenated, except that their first 86 * members are flagged by changing sign. 87 * 88 * Next the indices that point into member are unsorted into 89 * array class according to the original order of file0. 90 * 91 * The cleverness lies in routine stone. This marches 92 * through the lines of file0, developing a vector klist 93 * of "k-candidates". At step i a k-candidate is a matched 94 * pair of lines x,y (x in file0 y in file1) such that 95 * there is a common subsequence of length k 96 * between the first i lines of file0 and the first y 97 * lines of file1, but there is no such subsequence for 98 * any smaller y. x is the earliest possible mate to y 99 * that occurs in such a subsequence. 100 * 101 * Whenever any of the members of the equivalence class of 102 * lines in file1 matable to a line in file0 has serial number 103 * less than the y of some k-candidate, that k-candidate 104 * with the smallest such y is replaced. The new 105 * k-candidate is chained (via pred) to the current 106 * k-1 candidate so that the actual subsequence can 107 * be recovered. When a member has serial number greater 108 * that the y of all k-candidates, the klist is extended. 109 * At the end, the longest subsequence is pulled out 110 * and placed in the array J by unravel 111 * 112 * With J in hand, the matches there recorded are 113 * check'ed against reality to assure that no spurious 114 * matches have crept in due to hashing. If they have, 115 * they are broken, and "jackpot" is recorded--a harmless 116 * matter except that a true match for a spuriously 117 * mated line may now be unnecessarily reported as a change. 118 * 119 * Much of the complexity of the program comes simply 120 * from trying to minimize core utilization and 121 * maximize the range of doable problems by dynamically 122 * allocating what is needed and reusing what is not. 123 * The core requirements for problems larger than somewhat 124 * are (in words) 2*length(file0) + length(file1) + 125 * 3*(number of k-candidates installed), typically about 126 * 6n words for files of length n. 127 */ 128 129 #include <sys/stat.h> 130 131 #include <ctype.h> 132 #include <dirent.h> 133 #include <err.h> 134 #include <errno.h> 135 #include <fcntl.h> 136 #include <paths.h> 137 #include <regex.h> 138 #include <stddef.h> 139 #include <stdio.h> 140 #include <stdlib.h> 141 #include <string.h> 142 #include <unistd.h> 143 144 #include "buf.h" 145 #include "cvs.h" 146 #include "log.h" 147 #include "proto.h" 148 149 150 #define CVS_DIFF_DEFCTX 3 /* default context length */ 151 152 153 /* 154 * Output format options 155 */ 156 #define D_NORMAL 0 /* Normal output */ 157 #define D_CONTEXT 1 /* Diff with context */ 158 #define D_UNIFIED 2 /* Unified context diff */ 159 #define D_IFDEF 3 /* Diff with merged #ifdef's */ 160 #define D_BRIEF 4 /* Say if the files differ */ 161 #define D_RCSDIFF 5 /* Reverse editor output: RCS format */ 162 163 /* 164 * Status values for print_status() and diffreg() return values 165 */ 166 #define D_SAME 0 /* Files are the same */ 167 #define D_DIFFER 1 /* Files are different */ 168 #define D_BINARY 2 /* Binary files are different */ 169 #define D_COMMON 3 /* Subdirectory common to both dirs */ 170 #define D_ONLY 4 /* Only exists in one directory */ 171 #define D_MISMATCH1 5 /* path1 was a dir, path2 a file */ 172 #define D_MISMATCH2 6 /* path1 was a file, path2 a dir */ 173 #define D_ERROR 7 /* An error occurred */ 174 #define D_SKIPPED1 8 /* path1 was a special file */ 175 #define D_SKIPPED2 9 /* path2 was a special file */ 176 177 struct cand { 178 int x; 179 int y; 180 int pred; 181 } cand; 182 183 struct line { 184 int serial; 185 int value; 186 } *file[2]; 187 188 /* 189 * The following struct is used to record change information when 190 * doing a "context" or "unified" diff. (see routine "change" to 191 * understand the highly mnemonic field names) 192 */ 193 struct context_vec { 194 int a; /* start line in old file */ 195 int b; /* end line in old file */ 196 int c; /* start line in new file */ 197 int d; /* end line in new file */ 198 }; 199 200 struct diff_arg { 201 char *rev1; 202 char *rev2; 203 char *date1; 204 char *date2; 205 }; 206 207 208 static int cvs_diff_init(struct cvs_cmd *, int, char **, int *); 209 static int cvs_diff_remote(CVSFILE *, void *); 210 static int cvs_diff_local(CVSFILE *, void *); 211 static int cvs_diff_pre_exec(struct cvsroot *); 212 static int cvs_diff_cleanup(void); 213 int cvs_diffreg(const char *, const char *); 214 215 static void output(const char *, FILE *, const char *, FILE *); 216 static void check(FILE *, FILE *); 217 static void range(int, int, char *); 218 static void uni_range(int, int); 219 static void dump_context_vec(FILE *, FILE *); 220 static void dump_unified_vec(FILE *, FILE *); 221 static int prepare(int, FILE *, off_t); 222 static void prune(void); 223 static void equiv(struct line *, int, struct line *, int, int *); 224 static void unravel(int); 225 static void unsort(struct line *, int, int *); 226 static void change(const char *, FILE *, const char *, FILE *, int, 227 int, int, int); 228 static void sort(struct line *, int); 229 static int ignoreline(char *); 230 static int asciifile(FILE *); 231 static int fetch(long *, int, int, FILE *, int, int); 232 static int newcand(int, int, int); 233 static int search(int *, int, int); 234 static int skipline(FILE *); 235 static int isqrt(int); 236 static int stone(int *, int, int *, int *); 237 static int readhash(FILE *); 238 static int files_differ(FILE *, FILE *); 239 static char *match_function(const long *, int, FILE *); 240 static char *preadline(int, size_t, off_t); 241 242 243 static int aflag, bflag, dflag, iflag, Nflag, pflag, tflag, Tflag, wflag; 244 static int context; 245 static int format = D_NORMAL; 246 static struct stat stb1, stb2; 247 static char *ifdefname, *ignore_pats, diffargs[128]; 248 static const char *diff_file; 249 regex_t ignore_re; 250 251 static int *J; /* will be overlaid on class */ 252 static int *class; /* will be overlaid on file[0] */ 253 static int *klist; /* will be overlaid on file[0] after class */ 254 static int *member; /* will be overlaid on file[1] */ 255 static int clen; 256 static int inifdef; /* whether or not we are in a #ifdef block */ 257 static int diff_len[2]; 258 static int pref, suff; /* length of prefix and suffix */ 259 static int slen[2]; 260 static int anychange; 261 static long *ixnew; /* will be overlaid on file[1] */ 262 static long *ixold; /* will be overlaid on klist */ 263 static struct cand *clist; /* merely a free storage pot for candidates */ 264 static int clistlen; /* the length of clist */ 265 static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ 266 static u_char *chrtran; /* translation table for case-folding */ 267 static struct context_vec *context_vec_start; 268 static struct context_vec *context_vec_end; 269 static struct context_vec *context_vec_ptr; 270 271 #define FUNCTION_CONTEXT_SIZE 41 272 static char lastbuf[FUNCTION_CONTEXT_SIZE]; 273 static int lastline; 274 static int lastmatchline; 275 276 277 /* 278 * chrtran points to one of 2 translation tables: cup2low if folding upper to 279 * lower case clow2low if not folding case 280 */ 281 u_char clow2low[256] = { 282 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 283 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 284 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 285 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 286 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 287 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 288 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 289 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 290 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 291 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 292 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 293 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 294 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 295 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 296 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 297 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 298 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 299 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 300 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 301 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 302 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 303 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 304 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 305 0xfd, 0xfe, 0xff 306 }; 307 308 u_char cup2low[256] = { 309 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 310 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 311 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 312 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 313 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 314 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 315 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 316 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 317 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 318 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 319 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 320 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 321 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 322 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 323 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 324 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 325 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 326 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 327 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 328 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 329 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 330 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 331 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 332 0xfd, 0xfe, 0xff 333 }; 334 335 336 struct cvs_cmd cvs_cmd_diff = { 337 CVS_OP_DIFF, CVS_REQ_DIFF, "diff", 338 { "di", "dif" }, 339 "Show differences between revisions", 340 "[-cilNnpu] [[-D date] [-r rev] [-D date2 | -r rev2]] " 341 "[-k mode] [file ...]", 342 "cD:iklNnpr:Ru", 343 NULL, 344 CF_RECURSE | CF_IGNORE | CF_SORT | CF_KNOWN, 345 cvs_diff_init, 346 cvs_diff_pre_exec, 347 cvs_diff_remote, 348 cvs_diff_local, 349 NULL, 350 cvs_diff_cleanup, 351 CVS_CMD_SENDARGS2 | CVS_CMD_ALLOWSPEC | CVS_CMD_SENDDIR 352 }; 353 354 355 struct cvs_cmd cvs_cmd_rdiff = { 356 CVS_OP_RDIFF, CVS_REQ_DIFF, "rdiff", 357 { "pa", "patch" }, 358 "Create 'patch' format diffs between releases", 359 "[-flR] [-c | -u] [-s | -t] [-V ver] -D date | -r rev " 360 "[-D date2 | -rev2] module ...", 361 "cD:flRr:stuV:", 362 NULL, 363 CF_RECURSE | CF_IGNORE | CF_SORT | CF_KNOWN, 364 cvs_diff_init, 365 cvs_diff_pre_exec, 366 cvs_diff_remote, 367 cvs_diff_local, 368 NULL, 369 cvs_diff_cleanup, 370 CVS_CMD_SENDARGS2 | CVS_CMD_ALLOWSPEC | CVS_CMD_SENDDIR 371 }; 372 373 static struct diff_arg *dap = NULL; 374 static int recurse; 375 376 static int 377 cvs_diff_init(struct cvs_cmd *cmd, int argc, char **argv, int *arg) 378 { 379 int ch; 380 381 dap = (struct diff_arg *)malloc(sizeof(*dap)); 382 if (dap == NULL) 383 return (CVS_EX_DATA); 384 dap->date1 = dap->date2 = dap->rev1 = dap->rev2 = NULL; 385 strlcpy(diffargs, argv[0], sizeof(diffargs)); 386 387 while ((ch = getopt(argc, argv, cmd->cmd_opts)) != -1) { 388 switch (ch) { 389 case 'c': 390 strlcat(diffargs, " -c", sizeof(diffargs)); 391 format = D_CONTEXT; 392 break; 393 case 'D': 394 if (dap->date1 == NULL && dap->rev1 == NULL) { 395 dap->date1 = optarg; 396 } else if (dap->date2 == NULL && dap->rev2 == NULL) { 397 dap->date2 = optarg; 398 } else { 399 cvs_log(LP_ERR, 400 "no more than two revisions/dates can " 401 "be specified"); 402 } 403 break; 404 case 'l': 405 strlcat(diffargs, " -l", sizeof(diffargs)); 406 recurse = 0; 407 cvs_cmd_diff.file_flags &= ~CF_RECURSE; 408 break; 409 case 'i': 410 strlcat(diffargs, " -i", sizeof(diffargs)); 411 iflag = 1; 412 break; 413 case 'N': 414 strlcat(diffargs, " -N", sizeof(diffargs)); 415 Nflag = 1; 416 break; 417 case 'n': 418 strlcat(diffargs, " -n", sizeof(diffargs)); 419 format = D_RCSDIFF; 420 break; 421 case 'p': 422 strlcat(diffargs, " -p", sizeof(diffargs)); 423 pflag = 1; 424 break; 425 case 'r': 426 if ((dap->rev1 == NULL) && (dap->date1 == NULL)) { 427 dap->rev1 = optarg; 428 } else if ((dap->rev2 == NULL) && 429 (dap->date2 == NULL)) { 430 dap->rev2 = optarg; 431 } else { 432 cvs_log(LP_ERR, 433 "no more than two revisions/dates can " 434 "be specified"); 435 return (CVS_EX_USAGE); 436 } 437 break; 438 case 'R': 439 cvs_cmd_diff.file_flags |= CF_RECURSE; 440 break; 441 case 'u': 442 strlcat(diffargs, " -u", sizeof(diffargs)); 443 format = D_UNIFIED; 444 break; 445 default: 446 return (CVS_EX_USAGE); 447 } 448 } 449 450 *arg = optind; 451 return (0); 452 } 453 454 int 455 cvs_diff_cleanup(void) 456 { 457 if (dap != NULL) { 458 free(dap); 459 dap = NULL; 460 } 461 return (0); 462 } 463 464 /* 465 * cvs_diff_pre_exec() 466 * 467 */ 468 int 469 cvs_diff_pre_exec(struct cvsroot *root) 470 { 471 if (root->cr_method != CVS_METHOD_LOCAL) { 472 /* send the flags */ 473 if (Nflag && (cvs_sendarg(root, "-N", 0) < 0)) 474 return (CVS_EX_PROTO); 475 if (pflag && (cvs_sendarg(root, "-p", 0) < 0)) 476 return (CVS_EX_PROTO); 477 478 if (format == D_CONTEXT) { 479 if (cvs_sendarg(root, "-c", 0) < 0) 480 return (CVS_EX_PROTO); 481 } else if (format == D_UNIFIED) { 482 if (cvs_sendarg(root, "-u", 0) < 0) 483 return (CVS_EX_PROTO); 484 } 485 486 if (dap->rev1 != NULL) { 487 if ((cvs_sendarg(root, "-r", 0) < 0) || 488 (cvs_sendarg(root, dap->rev1, 0) < 0)) 489 return (CVS_EX_PROTO); 490 } else if (dap->date1 != NULL) { 491 if ((cvs_sendarg(root, "-D", 0) < 0) || 492 (cvs_sendarg(root, dap->date1, 0) < 0)) 493 return (CVS_EX_PROTO); 494 } 495 if (dap->rev2 != NULL) { 496 if ((cvs_sendarg(root, "-r", 0) < 0) || 497 (cvs_sendarg(root, dap->rev2, 0) < 0)) 498 return (CVS_EX_PROTO); 499 } else if (dap->date2 != NULL) { 500 if ((cvs_sendarg(root, "-D", 0) < 0) || 501 (cvs_sendarg(root, dap->date2, 0) < 0)) 502 return (CVS_EX_PROTO); 503 } 504 } 505 506 return (0); 507 } 508 509 510 /* 511 * cvs_diff_file() 512 * 513 * Diff a single file. 514 */ 515 static int 516 cvs_diff_remote(struct cvs_file *cfp, void *arg) 517 { 518 char *dir, *repo; 519 char fpath[MAXPATHLEN], dfpath[MAXPATHLEN]; 520 struct cvsroot *root; 521 522 if (cfp->cf_type == DT_DIR) { 523 if (cfp->cf_cvstat == CVS_FST_UNKNOWN) { 524 root = cfp->cf_parent->cf_root; 525 cvs_sendreq(root, CVS_REQ_QUESTIONABLE, cfp->cf_name); 526 } else { 527 root = cfp->cf_root; 528 #if 0 529 if ((cfp->cf_parent == NULL) || 530 (root != cfp->cf_parent->cf_root)) { 531 cvs_connect(root); 532 cvs_diff_pre_exec(root); 533 } 534 #endif 535 536 cvs_senddir(root, cfp); 537 } 538 539 return (0); 540 } 541 542 if (cfp->cf_cvstat == CVS_FST_LOST) { 543 cvs_log(LP_WARN, "cannot find file %s", cfp->cf_name); 544 return (0); 545 } 546 547 diff_file = cvs_file_getpath(cfp, fpath, sizeof(fpath)); 548 549 if (cfp->cf_parent != NULL) { 550 dir = cvs_file_getpath(cfp->cf_parent, dfpath, sizeof(dfpath)); 551 root = cfp->cf_parent->cf_root; 552 repo = cfp->cf_parent->cf_repo; 553 } else { 554 dir = "."; 555 root = NULL; 556 repo = NULL; 557 } 558 559 if (cfp->cf_cvstat == CVS_FST_UNKNOWN) { 560 cvs_sendreq(root, CVS_REQ_QUESTIONABLE, cfp->cf_name); 561 return (0); 562 } 563 564 if (cvs_sendentry(root, cfp) < 0) 565 return (CVS_EX_PROTO); 566 567 if (cfp->cf_cvstat == CVS_FST_UPTODATE) { 568 cvs_sendreq(root, CVS_REQ_UNCHANGED, cfp->cf_name); 569 return (0); 570 } 571 572 /* at this point, the file is modified */ 573 if ((cvs_sendreq(root, CVS_REQ_MODIFIED, cfp->cf_name) < 0) || 574 (cvs_sendfile(root, diff_file) < 0)) 575 return (CVS_EX_PROTO); 576 577 return (0); 578 } 579 580 static int 581 cvs_diff_local(CVSFILE *cf, void *arg) 582 { 583 int len; 584 char *repo, buf[64]; 585 char fpath[MAXPATHLEN], rcspath[MAXPATHLEN]; 586 char path_tmp1[MAXPATHLEN], path_tmp2[MAXPATHLEN]; 587 BUF *b1, *b2; 588 RCSNUM *r1, *r2; 589 RCSFILE *rf; 590 struct cvsroot *root; 591 592 rf = NULL; 593 root = CVS_DIR_ROOT(cf); 594 repo = CVS_DIR_REPO(cf); 595 diff_file = cvs_file_getpath(cf, fpath, sizeof(fpath)); 596 597 if (cf->cf_type == DT_DIR) { 598 if (verbosity > 1) 599 cvs_log(LP_NOTICE, "Diffing %s", fpath); 600 return (0); 601 } 602 603 if (cf->cf_cvstat == CVS_FST_LOST) { 604 cvs_log(LP_WARN, "cannot find file %s", cf->cf_name); 605 return (0); 606 } 607 608 if (cf->cf_cvstat == CVS_FST_UNKNOWN) { 609 cvs_log(LP_WARN, "I know nothing about %s", diff_file); 610 return (0); 611 } else if (cf->cf_cvstat == CVS_FST_UPTODATE) 612 return (0); 613 614 /* at this point, the file is modified */ 615 len = snprintf(rcspath, sizeof(rcspath), "%s/%s/%s%s", 616 root->cr_dir, repo, diff_file, RCS_FILE_EXT); 617 if (len == -1 || len >= (int)sizeof(rcspath)) { 618 errno = ENAMETOOLONG; 619 cvs_log(LP_ERRNO, "%s", rcspath); 620 return (CVS_EX_DATA); 621 } 622 623 rf = rcs_open(rcspath, RCS_READ); 624 if (rf == NULL) { 625 return (CVS_EX_DATA); 626 } 627 628 cvs_printf("Index: %s\n%s\nRCS file: %s\n", diff_file, 629 RCS_DIFF_DIV, rcspath); 630 631 if (dap->rev1 == NULL) 632 r1 = cf->cf_lrev; 633 else { 634 if ((r1 = rcsnum_parse(dap->rev1)) == NULL) { 635 return (CVS_EX_DATA); 636 } 637 } 638 639 cvs_printf("retrieving revision %s\n", 640 rcsnum_tostr(r1, buf, sizeof(buf))); 641 b1 = rcs_getrev(rf, r1); 642 643 if (b1 == NULL) { 644 cvs_log(LP_ERR, "failed to retrieve revision %s\n", 645 rcsnum_tostr(r1, buf, sizeof(buf))); 646 if (r1 != cf->cf_lrev) 647 rcsnum_free(r1); 648 return (CVS_EX_DATA); 649 } 650 651 if (r1 != cf->cf_lrev) 652 rcsnum_free(r1); 653 654 if (dap->rev2 != NULL) { 655 cvs_printf("retrieving revision %s\n", dap->rev2); 656 if ((r2 = rcsnum_parse(dap->rev2)) == NULL) { 657 return (CVS_EX_DATA); 658 } 659 b2 = rcs_getrev(rf, r2); 660 rcsnum_free(r2); 661 } else { 662 b2 = cvs_buf_load(diff_file, BUF_AUTOEXT); 663 } 664 665 rcs_close(rf); 666 667 if (b2 == NULL) { 668 cvs_log(LP_ERR, "failed to retrieve revision %s\n", 669 dap->rev2); 670 cvs_buf_free(b1); 671 return (CVS_EX_DATA); 672 } 673 674 cvs_printf("%s", diffargs); 675 cvs_printf(" -r%s", buf); 676 if (dap->rev2 != NULL) 677 cvs_printf(" -r%s", dap->rev2); 678 cvs_printf(" %s\n", diff_file); 679 strlcpy(path_tmp1, "/tmp/diff1.XXXXXXXXXX", sizeof(path_tmp1)); 680 if (cvs_buf_write_stmp(b1, path_tmp1, 0600) == -1) { 681 cvs_buf_free(b1); 682 cvs_buf_free(b2); 683 return (CVS_EX_DATA); 684 } 685 cvs_buf_free(b1); 686 687 strlcpy(path_tmp2, "/tmp/diff2.XXXXXXXXXX", sizeof(path_tmp2)); 688 if (cvs_buf_write_stmp(b2, path_tmp2, 0600) == -1) { 689 cvs_buf_free(b2); 690 (void)unlink(path_tmp1); 691 return (CVS_EX_DATA); 692 } 693 cvs_buf_free(b2); 694 695 cvs_diffreg(path_tmp1, path_tmp2); 696 (void)unlink(path_tmp1); 697 (void)unlink(path_tmp2); 698 699 return (0); 700 } 701 702 703 int 704 cvs_diffreg(const char *file1, const char *file2) 705 { 706 FILE *f1, *f2; 707 int i, rval; 708 void *tmp; 709 710 f1 = f2 = NULL; 711 rval = D_SAME; 712 anychange = 0; 713 lastline = 0; 714 lastmatchline = 0; 715 context_vec_ptr = context_vec_start - 1; 716 chrtran = (iflag ? cup2low : clow2low); 717 718 f1 = fopen(file1, "r"); 719 if (f1 == NULL) { 720 cvs_log(LP_ERRNO, "%s", file1); 721 goto closem; 722 } 723 724 f2 = fopen(file2, "r"); 725 if (f2 == NULL) { 726 cvs_log(LP_ERRNO, "%s", file2); 727 goto closem; 728 } 729 730 switch (files_differ(f1, f2)) { 731 case 0: 732 goto closem; 733 case 1: 734 break; 735 default: 736 /* error */ 737 goto closem; 738 } 739 740 if (!asciifile(f1) || !asciifile(f2)) { 741 rval = D_BINARY; 742 goto closem; 743 } 744 if ((prepare(0, f1, stb1.st_size) < 0) || 745 (prepare(1, f2, stb2.st_size) < 0)) { 746 goto closem; 747 } 748 prune(); 749 sort(sfile[0], slen[0]); 750 sort(sfile[1], slen[1]); 751 752 member = (int *)file[1]; 753 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 754 if ((tmp = realloc(member, (slen[1] + 2) * sizeof(int))) == NULL) { 755 free(member); 756 member = NULL; 757 cvs_log(LP_ERRNO, "failed to resize member"); 758 goto closem; 759 } 760 member = (int *)tmp; 761 762 class = (int *)file[0]; 763 unsort(sfile[0], slen[0], class); 764 if ((tmp = realloc(class, (slen[0] + 2) * sizeof(int))) == NULL) { 765 free(class); 766 class = NULL; 767 cvs_log(LP_ERRNO, "failed to resize class"); 768 goto closem; 769 } 770 class = (int *)tmp; 771 772 if ((klist = malloc((slen[0] + 2) * sizeof(int))) == NULL) { 773 cvs_log(LP_ERRNO, "failed to allocate klist"); 774 goto closem; 775 } 776 clen = 0; 777 clistlen = 100; 778 if ((clist = malloc(clistlen * sizeof(cand))) == NULL) { 779 cvs_log(LP_ERRNO, "failed to allocate clist"); 780 goto closem; 781 } 782 783 if ((i = stone(class, slen[0], member, klist)) < 0) 784 goto closem; 785 786 free(member); 787 free(class); 788 789 if ((tmp = realloc(J, (diff_len[0] + 2) * sizeof(int))) == NULL) { 790 free(J); 791 J = NULL; 792 cvs_log(LP_ERRNO, "failed to resize J"); 793 goto closem; 794 } 795 J = (int *)tmp; 796 unravel(klist[i]); 797 free(clist); 798 free(klist); 799 800 if ((tmp = realloc(ixold, (diff_len[0] + 2) * sizeof(long))) == NULL) { 801 free(ixold); 802 ixold = NULL; 803 cvs_log(LP_ERRNO, "failed to resize ixold"); 804 goto closem; 805 } 806 ixold = (long *)tmp; 807 if ((tmp = realloc(ixnew, (diff_len[1] + 2) * sizeof(long))) == NULL) { 808 free(ixnew); 809 ixnew = NULL; 810 cvs_log(LP_ERRNO, "failed to resize ixnew"); 811 goto closem; 812 } 813 ixnew = (long *)tmp; 814 check(f1, f2); 815 output(file1, f1, file2, f2); 816 817 closem: 818 if (anychange) { 819 if (rval == D_SAME) 820 rval = D_DIFFER; 821 } 822 if (f1 != NULL) 823 fclose(f1); 824 if (f2 != NULL) 825 fclose(f2); 826 827 return (rval); 828 } 829 830 /* 831 * Check to see if the given files differ. 832 * Returns 0 if they are the same, 1 if different, and -1 on error. 833 * XXX - could use code from cmp(1) [faster] 834 */ 835 static int 836 files_differ(FILE *f1, FILE *f2) 837 { 838 char buf1[BUFSIZ], buf2[BUFSIZ]; 839 size_t i, j; 840 841 if (stb1.st_size != stb2.st_size) 842 return (1); 843 for (;;) { 844 i = fread(buf1, 1, sizeof(buf1), f1); 845 j = fread(buf2, 1, sizeof(buf2), f2); 846 if (i != j) 847 return (1); 848 if (i == 0 && j == 0) { 849 if (ferror(f1) || ferror(f2)) 850 return (1); 851 return (0); 852 } 853 if (memcmp(buf1, buf2, i) != 0) 854 return (1); 855 } 856 } 857 858 static int 859 prepare(int i, FILE *fd, off_t filesize) 860 { 861 void *tmp; 862 struct line *p; 863 int j, h; 864 size_t sz; 865 866 rewind(fd); 867 868 sz = ((size_t)filesize <= SIZE_MAX ? (size_t)filesize : SIZE_MAX) / 25; 869 if (sz < 100) 870 sz = 100; 871 872 p = (struct line *)malloc((sz + 3) * sizeof(struct line)); 873 if (p == NULL) { 874 cvs_log(LP_ERRNO, "failed to prepare line array"); 875 return (-1); 876 } 877 for (j = 0; (h = readhash(fd));) { 878 if (j == (int)sz) { 879 sz = sz * 3 / 2; 880 tmp = realloc(p, (sz + 3) * sizeof(struct line)); 881 if (tmp == NULL) { 882 cvs_log(LP_ERRNO, "failed to grow line array"); 883 free(p); 884 return (-1); 885 } 886 p = (struct line *)tmp; 887 } 888 p[++j].value = h; 889 } 890 diff_len[i] = j; 891 file[i] = p; 892 893 return (0); 894 } 895 896 static void 897 prune(void) 898 { 899 int i, j; 900 901 for (pref = 0; pref < diff_len[0] && pref < diff_len[1] && 902 file[0][pref + 1].value == file[1][pref + 1].value; 903 pref++) 904 ; 905 for (suff = 0; 906 (suff < diff_len[0] - pref) && (suff < diff_len[1] - pref) && 907 (file[0][diff_len[0] - suff].value == 908 file[1][diff_len[1] - suff].value); 909 suff++) 910 ; 911 for (j = 0; j < 2; j++) { 912 sfile[j] = file[j] + pref; 913 slen[j] = diff_len[j] - pref - suff; 914 for (i = 0; i <= slen[j]; i++) 915 sfile[j][i].serial = i; 916 } 917 } 918 919 static void 920 equiv(struct line *a, int n, struct line *b, int m, int *c) 921 { 922 int i, j; 923 924 i = j = 1; 925 while (i <= n && j <= m) { 926 if (a[i].value < b[j].value) 927 a[i++].value = 0; 928 else if (a[i].value == b[j].value) 929 a[i++].value = j; 930 else 931 j++; 932 } 933 while (i <= n) 934 a[i++].value = 0; 935 b[m + 1].value = 0; 936 j = 0; 937 while (++j <= m) { 938 c[j] = -b[j].serial; 939 while (b[j + 1].value == b[j].value) { 940 j++; 941 c[j] = b[j].serial; 942 } 943 } 944 c[j] = -1; 945 } 946 947 /* Code taken from ping.c */ 948 static int 949 isqrt(int n) 950 { 951 int y, x = 1; 952 953 if (n == 0) 954 return (0); 955 956 do { /* newton was a stinker */ 957 y = x; 958 x = n / x; 959 x += y; 960 x /= 2; 961 } while ((x - y) > 1 || (x - y) < -1); 962 963 return (x); 964 } 965 966 static int 967 stone(int *a, int n, int *b, int *c) 968 { 969 int ret; 970 int i, k, y, j, l; 971 int oldc, tc, oldl; 972 u_int numtries; 973 974 /* XXX move the isqrt() out of the macro to avoid multiple calls */ 975 const u_int bound = dflag ? UINT_MAX : MAX(256, (u_int)isqrt(n)); 976 977 k = 0; 978 if ((ret = newcand(0, 0, 0)) < 0) 979 return (-1); 980 c[0] = ret; 981 for (i = 1; i <= n; i++) { 982 j = a[i]; 983 if (j == 0) 984 continue; 985 y = -b[j]; 986 oldl = 0; 987 oldc = c[0]; 988 numtries = 0; 989 do { 990 if (y <= clist[oldc].y) 991 continue; 992 l = search(c, k, y); 993 if (l != oldl + 1) 994 oldc = c[l - 1]; 995 if (l <= k) { 996 if (clist[c[l]].y <= y) 997 continue; 998 tc = c[l]; 999 if ((ret = newcand(i, y, oldc)) < 0) 1000 return (-1); 1001 c[l] = ret; 1002 oldc = tc; 1003 oldl = l; 1004 numtries++; 1005 } else { 1006 if ((ret = newcand(i, y, oldc)) < 0) 1007 return (-1); 1008 c[l] = ret; 1009 k++; 1010 break; 1011 } 1012 } while ((y = b[++j]) > 0 && numtries < bound); 1013 } 1014 return (k); 1015 } 1016 1017 static int 1018 newcand(int x, int y, int pred) 1019 { 1020 struct cand *q, *tmp; 1021 int newclistlen; 1022 1023 if (clen == clistlen) { 1024 newclistlen = clistlen * 11 / 10; 1025 tmp = realloc(clist, newclistlen * sizeof(cand)); 1026 if (tmp == NULL) { 1027 cvs_log(LP_ERRNO, "failed to resize clist"); 1028 return (-1); 1029 } 1030 clist = tmp; 1031 clistlen = newclistlen; 1032 } 1033 q = clist + clen; 1034 q->x = x; 1035 q->y = y; 1036 q->pred = pred; 1037 return (clen++); 1038 } 1039 1040 static int 1041 search(int *c, int k, int y) 1042 { 1043 int i, j, l, t; 1044 1045 if (clist[c[k]].y < y) /* quick look for typical case */ 1046 return (k + 1); 1047 i = 0; 1048 j = k + 1; 1049 while (1) { 1050 l = i + j; 1051 if ((l >>= 1) <= i) 1052 break; 1053 t = clist[c[l]].y; 1054 if (t > y) 1055 j = l; 1056 else if (t < y) 1057 i = l; 1058 else 1059 return (l); 1060 } 1061 return (l + 1); 1062 } 1063 1064 static void 1065 unravel(int p) 1066 { 1067 struct cand *q; 1068 int i; 1069 1070 for (i = 0; i <= diff_len[0]; i++) 1071 J[i] = i <= pref ? i : 1072 i > diff_len[0] - suff ? i + diff_len[1] - diff_len[0] : 0; 1073 for (q = clist + p; q->y != 0; q = clist + q->pred) 1074 J[q->x + pref] = q->y + pref; 1075 } 1076 1077 /* 1078 * Check does double duty: 1079 * 1. ferret out any fortuitous correspondences due 1080 * to confounding by hashing (which result in "jackpot") 1081 * 2. collect random access indexes to the two files 1082 */ 1083 static void 1084 check(FILE *f1, FILE *f2) 1085 { 1086 int i, j, jackpot, c, d; 1087 long ctold, ctnew; 1088 1089 rewind(f1); 1090 rewind(f2); 1091 j = 1; 1092 ixold[0] = ixnew[0] = 0; 1093 jackpot = 0; 1094 ctold = ctnew = 0; 1095 for (i = 1; i <= diff_len[0]; i++) { 1096 if (J[i] == 0) { 1097 ixold[i] = ctold += skipline(f1); 1098 continue; 1099 } 1100 while (j < J[i]) { 1101 ixnew[j] = ctnew += skipline(f2); 1102 j++; 1103 } 1104 if (bflag || wflag || iflag) { 1105 for (;;) { 1106 c = getc(f1); 1107 d = getc(f2); 1108 /* 1109 * GNU diff ignores a missing newline 1110 * in one file if bflag || wflag. 1111 */ 1112 if ((bflag || wflag) && 1113 ((c == EOF && d == '\n') || 1114 (c == '\n' && d == EOF))) { 1115 break; 1116 } 1117 ctold++; 1118 ctnew++; 1119 if (bflag && isspace(c) && isspace(d)) { 1120 do { 1121 if (c == '\n') 1122 break; 1123 ctold++; 1124 } while (isspace(c = getc(f1))); 1125 do { 1126 if (d == '\n') 1127 break; 1128 ctnew++; 1129 } while (isspace(d = getc(f2))); 1130 } else if (wflag) { 1131 while (isspace(c) && c != '\n') { 1132 c = getc(f1); 1133 ctold++; 1134 } 1135 while (isspace(d) && d != '\n') { 1136 d = getc(f2); 1137 ctnew++; 1138 } 1139 } 1140 if (chrtran[c] != chrtran[d]) { 1141 jackpot++; 1142 J[i] = 0; 1143 if (c != '\n' && c != EOF) 1144 ctold += skipline(f1); 1145 if (d != '\n' && c != EOF) 1146 ctnew += skipline(f2); 1147 break; 1148 } 1149 if (c == '\n' || c == EOF) 1150 break; 1151 } 1152 } else { 1153 for (;;) { 1154 ctold++; 1155 ctnew++; 1156 if ((c = getc(f1)) != (d = getc(f2))) { 1157 /* jackpot++; */ 1158 J[i] = 0; 1159 if (c != '\n' && c != EOF) 1160 ctold += skipline(f1); 1161 if (d != '\n' && c != EOF) 1162 ctnew += skipline(f2); 1163 break; 1164 } 1165 if (c == '\n' || c == EOF) 1166 break; 1167 } 1168 } 1169 ixold[i] = ctold; 1170 ixnew[j] = ctnew; 1171 j++; 1172 } 1173 for (; j <= diff_len[1]; j++) 1174 ixnew[j] = ctnew += skipline(f2); 1175 /* 1176 * if (jackpot) 1177 * cvs_printf("jackpot\n"); 1178 */ 1179 } 1180 1181 /* shellsort CACM #201 */ 1182 static void 1183 sort(struct line *a, int n) 1184 { 1185 struct line *ai, *aim, w; 1186 int j, m = 0, k; 1187 1188 if (n == 0) 1189 return; 1190 for (j = 1; j <= n; j *= 2) 1191 m = 2 * j - 1; 1192 for (m /= 2; m != 0; m /= 2) { 1193 k = n - m; 1194 for (j = 1; j <= k; j++) { 1195 for (ai = &a[j]; ai > a; ai -= m) { 1196 aim = &ai[m]; 1197 if (aim < ai) 1198 break; /* wraparound */ 1199 if (aim->value > ai[0].value || 1200 (aim->value == ai[0].value && 1201 aim->serial > ai[0].serial)) 1202 break; 1203 w.value = ai[0].value; 1204 ai[0].value = aim->value; 1205 aim->value = w.value; 1206 w.serial = ai[0].serial; 1207 ai[0].serial = aim->serial; 1208 aim->serial = w.serial; 1209 } 1210 } 1211 } 1212 } 1213 1214 static void 1215 unsort(struct line *f, int l, int *b) 1216 { 1217 int *a, i; 1218 1219 if ((a = (int *)malloc((l + 1) * sizeof(int))) == NULL) { 1220 cvs_log(LP_ERRNO, "failed to allocate sort array"); 1221 return; 1222 } 1223 for (i = 1; i <= l; i++) 1224 a[f[i].serial] = f[i].value; 1225 for (i = 1; i <= l; i++) 1226 b[i] = a[i]; 1227 free(a); 1228 } 1229 1230 static int 1231 skipline(FILE *f) 1232 { 1233 int i, c; 1234 1235 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++) 1236 continue; 1237 return (i); 1238 } 1239 1240 static void 1241 output(const char *file1, FILE *f1, const char *file2, FILE *f2) 1242 { 1243 int m, i0, i1, j0, j1; 1244 1245 rewind(f1); 1246 rewind(f2); 1247 m = diff_len[0]; 1248 J[0] = 0; 1249 J[m + 1] = diff_len[1] + 1; 1250 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 1251 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 1252 i0++; 1253 j0 = J[i0 - 1] + 1; 1254 i1 = i0 - 1; 1255 while (i1 < m && J[i1 + 1] == 0) 1256 i1++; 1257 j1 = J[i1 + 1] - 1; 1258 J[i1] = j1; 1259 change(file1, f1, file2, f2, i0, i1, j0, j1); 1260 } 1261 if (m == 0) 1262 change(file1, f1, file2, f2, 1, 0, 1, diff_len[1]); 1263 if (format == D_IFDEF) { 1264 for (;;) { 1265 #define c i0 1266 if ((c = getc(f1)) == EOF) 1267 return; 1268 cvs_putchar(c); 1269 } 1270 #undef c 1271 } 1272 if (anychange != 0) { 1273 if (format == D_CONTEXT) 1274 dump_context_vec(f1, f2); 1275 else if (format == D_UNIFIED) 1276 dump_unified_vec(f1, f2); 1277 } 1278 } 1279 1280 static __inline void 1281 range(int a, int b, char *separator) 1282 { 1283 cvs_printf("%d", a > b ? b : a); 1284 if (a < b) 1285 cvs_printf("%s%d", separator, b); 1286 } 1287 1288 static __inline void 1289 uni_range(int a, int b) 1290 { 1291 if (a < b) 1292 cvs_printf("%d,%d", a, b - a + 1); 1293 else if (a == b) 1294 cvs_printf("%d", b); 1295 else 1296 cvs_printf("%d,0", b); 1297 } 1298 1299 static char * 1300 preadline(int fd, size_t rlen, off_t off) 1301 { 1302 char *line; 1303 ssize_t nr; 1304 1305 line = malloc(rlen + 1); 1306 if (line == NULL) { 1307 cvs_log(LP_ERRNO, "failed to allocate line"); 1308 return (NULL); 1309 } 1310 if ((nr = pread(fd, line, rlen, off)) < 0) { 1311 cvs_log(LP_ERRNO, "preadline failed"); 1312 return (NULL); 1313 } 1314 line[nr] = '\0'; 1315 return (line); 1316 } 1317 1318 static int 1319 ignoreline(char *line) 1320 { 1321 int ret; 1322 1323 ret = regexec(&ignore_re, line, 0, NULL, 0); 1324 free(line); 1325 return (ret == 0); /* if it matched, it should be ignored. */ 1326 } 1327 1328 /* 1329 * Indicate that there is a difference between lines a and b of the from file 1330 * to get to lines c to d of the to file. If a is greater then b then there 1331 * are no lines in the from file involved and this means that there were 1332 * lines appended (beginning at b). If c is greater than d then there are 1333 * lines missing from the to file. 1334 */ 1335 static void 1336 change(const char *file1, FILE *f1, const char *file2, FILE *f2, 1337 int a, int b, int c, int d) 1338 { 1339 static size_t max_context = 64; 1340 int i; 1341 1342 if (format != D_IFDEF && a > b && c > d) 1343 return; 1344 if (ignore_pats != NULL) { 1345 char *line; 1346 /* 1347 * All lines in the change, insert, or delete must 1348 * match an ignore pattern for the change to be 1349 * ignored. 1350 */ 1351 if (a <= b) { /* Changes and deletes. */ 1352 for (i = a; i <= b; i++) { 1353 line = preadline(fileno(f1), 1354 ixold[i] - ixold[i - 1], ixold[i - 1]); 1355 if (!ignoreline(line)) 1356 goto proceed; 1357 } 1358 } 1359 if (a > b || c <= d) { /* Changes and inserts. */ 1360 for (i = c; i <= d; i++) { 1361 line = preadline(fileno(f2), 1362 ixnew[i] - ixnew[i - 1], ixnew[i - 1]); 1363 if (!ignoreline(line)) 1364 goto proceed; 1365 } 1366 } 1367 return; 1368 } 1369 proceed: 1370 if (format == D_CONTEXT || format == D_UNIFIED) { 1371 /* 1372 * Allocate change records as needed. 1373 */ 1374 if (context_vec_ptr == context_vec_end - 1) { 1375 struct context_vec *tmp; 1376 ptrdiff_t offset = context_vec_ptr - context_vec_start; 1377 max_context <<= 1; 1378 if ((tmp = realloc(context_vec_start, 1379 max_context * sizeof(struct context_vec))) == NULL) { 1380 free(context_vec_start); 1381 context_vec_start = NULL; 1382 cvs_log(LP_ERRNO, 1383 "failed to resize context_vec_start"); 1384 return; 1385 } 1386 context_vec_start = tmp; 1387 context_vec_end = context_vec_start + max_context; 1388 context_vec_ptr = context_vec_start + offset; 1389 } 1390 if (anychange == 0) { 1391 /* 1392 * Print the context/unidiff header first time through. 1393 */ 1394 cvs_printf("%s %s %s", 1395 format == D_CONTEXT ? "***" : "---", diff_file, 1396 ctime(&stb1.st_mtime)); 1397 cvs_printf("%s %s %s", 1398 format == D_CONTEXT ? "---" : "+++", diff_file, 1399 ctime(&stb2.st_mtime)); 1400 anychange = 1; 1401 } else if (a > context_vec_ptr->b + (2 * context) + 1 && 1402 c > context_vec_ptr->d + (2 * context) + 1) { 1403 /* 1404 * If this change is more than 'context' lines from the 1405 * previous change, dump the record and reset it. 1406 */ 1407 if (format == D_CONTEXT) 1408 dump_context_vec(f1, f2); 1409 else 1410 dump_unified_vec(f1, f2); 1411 } 1412 context_vec_ptr++; 1413 context_vec_ptr->a = a; 1414 context_vec_ptr->b = b; 1415 context_vec_ptr->c = c; 1416 context_vec_ptr->d = d; 1417 return; 1418 } 1419 if (anychange == 0) 1420 anychange = 1; 1421 switch (format) { 1422 case D_BRIEF: 1423 return; 1424 case D_NORMAL: 1425 range(a, b, ","); 1426 cvs_putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 1427 if (format == D_NORMAL) 1428 range(c, d, ","); 1429 cvs_putchar('\n'); 1430 break; 1431 case D_RCSDIFF: 1432 if (a > b) 1433 cvs_printf("a%d %d\n", b, d - c + 1); 1434 else { 1435 cvs_printf("d%d %d\n", a, b - a + 1); 1436 1437 if (!(c > d)) /* add changed lines */ 1438 cvs_printf("a%d %d\n", b, d - c + 1); 1439 } 1440 break; 1441 } 1442 if (format == D_NORMAL || format == D_IFDEF) { 1443 fetch(ixold, a, b, f1, '<', 1); 1444 if (a <= b && c <= d && format == D_NORMAL) 1445 puts("---"); 1446 } 1447 i = fetch(ixnew, c, d, f2, format == D_NORMAL ? '>' : '\0', 0); 1448 if (inifdef) { 1449 cvs_printf("#endif /* %s */\n", ifdefname); 1450 inifdef = 0; 1451 } 1452 } 1453 1454 static int 1455 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile) 1456 { 1457 int i, j, c, lastc, col, nc; 1458 1459 /* 1460 * When doing #ifdef's, copy down to current line 1461 * if this is the first file, so that stuff makes it to output. 1462 */ 1463 if (format == D_IFDEF && oldfile) { 1464 long curpos = ftell(lb); 1465 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 1466 nc = f[a > b ? b : a - 1] - curpos; 1467 for (i = 0; i < nc; i++) 1468 cvs_putchar(getc(lb)); 1469 } 1470 if (a > b) 1471 return (0); 1472 if (format == D_IFDEF) { 1473 if (inifdef) { 1474 cvs_printf("#else /* %s%s */\n", 1475 oldfile == 1 ? "!" : "", ifdefname); 1476 } else { 1477 if (oldfile) 1478 cvs_printf("#ifndef %s\n", ifdefname); 1479 else 1480 cvs_printf("#ifdef %s\n", ifdefname); 1481 } 1482 inifdef = 1 + oldfile; 1483 } 1484 for (i = a; i <= b; i++) { 1485 fseek(lb, f[i - 1], SEEK_SET); 1486 nc = f[i] - f[i - 1]; 1487 if (format != D_IFDEF && ch != '\0') { 1488 cvs_putchar(ch); 1489 if (Tflag && (format == D_NORMAL || format == D_CONTEXT 1490 || format == D_UNIFIED)) 1491 cvs_putchar('\t'); 1492 else if (format != D_UNIFIED) 1493 cvs_putchar(' '); 1494 } 1495 col = 0; 1496 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) { 1497 if ((c = getc(lb)) == EOF) { 1498 if (format == D_RCSDIFF) 1499 warnx("No newline at end of file"); 1500 else 1501 puts("\n\\ No newline at end of file"); 1502 return (0); 1503 } 1504 if (c == '\t' && tflag) { 1505 do { 1506 cvs_putchar(' '); 1507 } while (++col & 7); 1508 } else { 1509 cvs_putchar(c); 1510 col++; 1511 } 1512 } 1513 } 1514 return (0); 1515 } 1516 1517 /* 1518 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. 1519 */ 1520 static int 1521 readhash(FILE *f) 1522 { 1523 int i, t, space; 1524 int sum; 1525 1526 sum = 1; 1527 space = 0; 1528 if (!bflag && !wflag) { 1529 if (iflag) 1530 for (i = 0; (t = getc(f)) != '\n'; i++) { 1531 if (t == EOF) { 1532 if (i == 0) 1533 return (0); 1534 break; 1535 } 1536 sum = sum * 127 + chrtran[t]; 1537 } 1538 else 1539 for (i = 0; (t = getc(f)) != '\n'; i++) { 1540 if (t == EOF) { 1541 if (i == 0) 1542 return (0); 1543 break; 1544 } 1545 sum = sum * 127 + t; 1546 } 1547 } else { 1548 for (i = 0;;) { 1549 switch (t = getc(f)) { 1550 case '\t': 1551 case ' ': 1552 space++; 1553 continue; 1554 default: 1555 if (space && !wflag) { 1556 i++; 1557 space = 0; 1558 } 1559 sum = sum * 127 + chrtran[t]; 1560 i++; 1561 continue; 1562 case EOF: 1563 if (i == 0) 1564 return (0); 1565 /* FALLTHROUGH */ 1566 case '\n': 1567 break; 1568 } 1569 break; 1570 } 1571 } 1572 /* 1573 * There is a remote possibility that we end up with a zero sum. 1574 * Zero is used as an EOF marker, so return 1 instead. 1575 */ 1576 return (sum == 0 ? 1 : sum); 1577 } 1578 1579 static int 1580 asciifile(FILE *f) 1581 { 1582 char buf[BUFSIZ]; 1583 int i, cnt; 1584 1585 if (aflag || f == NULL) 1586 return (1); 1587 1588 rewind(f); 1589 cnt = fread(buf, 1, sizeof(buf), f); 1590 for (i = 0; i < cnt; i++) 1591 if (!isprint(buf[i]) && !isspace(buf[i])) 1592 return (0); 1593 return (1); 1594 } 1595 1596 static char* 1597 match_function(const long *f, int pos, FILE *fp) 1598 { 1599 unsigned char buf[FUNCTION_CONTEXT_SIZE]; 1600 size_t nc; 1601 int last = lastline; 1602 char *p; 1603 1604 lastline = pos; 1605 while (pos > last) { 1606 fseek(fp, f[pos - 1], SEEK_SET); 1607 nc = f[pos] - f[pos - 1]; 1608 if (nc >= sizeof(buf)) 1609 nc = sizeof(buf) - 1; 1610 nc = fread(buf, 1, nc, fp); 1611 if (nc > 0) { 1612 buf[nc] = '\0'; 1613 p = strchr((const char *)buf, '\n'); 1614 if (p != NULL) 1615 *p = '\0'; 1616 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') { 1617 strlcpy(lastbuf, (const char *)buf, sizeof lastbuf); 1618 lastmatchline = pos; 1619 return lastbuf; 1620 } 1621 } 1622 pos--; 1623 } 1624 return (lastmatchline > 0) ? lastbuf : NULL; 1625 } 1626 1627 1628 /* dump accumulated "context" diff changes */ 1629 static void 1630 dump_context_vec(FILE *f1, FILE *f2) 1631 { 1632 struct context_vec *cvp = context_vec_start; 1633 int lowa, upb, lowc, upd, do_output; 1634 int a, b, c, d; 1635 char ch, *f; 1636 1637 if (context_vec_start > context_vec_ptr) 1638 return; 1639 1640 b = d = 0; /* gcc */ 1641 lowa = MAX(1, cvp->a - context); 1642 upb = MIN(diff_len[0], context_vec_ptr->b + context); 1643 lowc = MAX(1, cvp->c - context); 1644 upd = MIN(diff_len[1], context_vec_ptr->d + context); 1645 1646 cvs_printf("***************"); 1647 if (pflag) { 1648 f = match_function(ixold, lowa - 1, f1); 1649 if (f != NULL) { 1650 cvs_putchar(' '); 1651 cvs_printf("%s", f); 1652 } 1653 } 1654 cvs_printf("\n*** "); 1655 range(lowa, upb, ","); 1656 cvs_printf(" ****\n"); 1657 1658 /* 1659 * Output changes to the "old" file. The first loop suppresses 1660 * output if there were no changes to the "old" file (we'll see 1661 * the "old" lines as context in the "new" list). 1662 */ 1663 do_output = 0; 1664 for (; cvp <= context_vec_ptr; cvp++) 1665 if (cvp->a <= cvp->b) { 1666 cvp = context_vec_start; 1667 do_output++; 1668 break; 1669 } 1670 if (do_output) { 1671 while (cvp <= context_vec_ptr) { 1672 a = cvp->a; 1673 b = cvp->b; 1674 c = cvp->c; 1675 d = cvp->d; 1676 1677 if (a <= b && c <= d) 1678 ch = 'c'; 1679 else 1680 ch = (a <= b) ? 'd' : 'a'; 1681 1682 if (ch == 'a') 1683 fetch(ixold, lowa, b, f1, ' ', 0); 1684 else { 1685 fetch(ixold, lowa, a - 1, f1, ' ', 0); 1686 fetch(ixold, a, b, f1, 1687 ch == 'c' ? '!' : '-', 0); 1688 } 1689 lowa = b + 1; 1690 cvp++; 1691 } 1692 fetch(ixold, b + 1, upb, f1, ' ', 0); 1693 } 1694 /* output changes to the "new" file */ 1695 cvs_printf("--- "); 1696 range(lowc, upd, ","); 1697 cvs_printf(" ----\n"); 1698 1699 do_output = 0; 1700 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1701 if (cvp->c <= cvp->d) { 1702 cvp = context_vec_start; 1703 do_output++; 1704 break; 1705 } 1706 if (do_output) { 1707 while (cvp <= context_vec_ptr) { 1708 a = cvp->a; 1709 b = cvp->b; 1710 c = cvp->c; 1711 d = cvp->d; 1712 1713 if (a <= b && c <= d) 1714 ch = 'c'; 1715 else 1716 ch = (a <= b) ? 'd' : 'a'; 1717 1718 if (ch == 'd') 1719 fetch(ixnew, lowc, d, f2, ' ', 0); 1720 else { 1721 fetch(ixnew, lowc, c - 1, f2, ' ', 0); 1722 fetch(ixnew, c, d, f2, 1723 ch == 'c' ? '!' : '+', 0); 1724 } 1725 lowc = d + 1; 1726 cvp++; 1727 } 1728 fetch(ixnew, d + 1, upd, f2, ' ', 0); 1729 } 1730 context_vec_ptr = context_vec_start - 1; 1731 } 1732 1733 /* dump accumulated "unified" diff changes */ 1734 static void 1735 dump_unified_vec(FILE *f1, FILE *f2) 1736 { 1737 struct context_vec *cvp = context_vec_start; 1738 int lowa, upb, lowc, upd; 1739 int a, b, c, d; 1740 char ch, *f; 1741 1742 if (context_vec_start > context_vec_ptr) 1743 return; 1744 1745 b = d = 0; /* gcc */ 1746 lowa = MAX(1, cvp->a - context); 1747 upb = MIN(diff_len[0], context_vec_ptr->b + context); 1748 lowc = MAX(1, cvp->c - context); 1749 upd = MIN(diff_len[1], context_vec_ptr->d + context); 1750 1751 cvs_printf("@@ -"); 1752 uni_range(lowa, upb); 1753 cvs_printf(" +"); 1754 uni_range(lowc, upd); 1755 cvs_printf(" @@"); 1756 if (pflag) { 1757 f = match_function(ixold, lowa - 1, f1); 1758 if (f != NULL) { 1759 cvs_putchar(' '); 1760 cvs_printf("%s", f); 1761 } 1762 } 1763 cvs_putchar('\n'); 1764 1765 /* 1766 * Output changes in "unified" diff format--the old and new lines 1767 * are printed together. 1768 */ 1769 for (; cvp <= context_vec_ptr; cvp++) { 1770 a = cvp->a; 1771 b = cvp->b; 1772 c = cvp->c; 1773 d = cvp->d; 1774 1775 /* 1776 * c: both new and old changes 1777 * d: only changes in the old file 1778 * a: only changes in the new file 1779 */ 1780 if (a <= b && c <= d) 1781 ch = 'c'; 1782 else 1783 ch = (a <= b) ? 'd' : 'a'; 1784 1785 switch (ch) { 1786 case 'c': 1787 fetch(ixold, lowa, a - 1, f1, ' ', 0); 1788 fetch(ixold, a, b, f1, '-', 0); 1789 fetch(ixnew, c, d, f2, '+', 0); 1790 break; 1791 case 'd': 1792 fetch(ixold, lowa, a - 1, f1, ' ', 0); 1793 fetch(ixold, a, b, f1, '-', 0); 1794 break; 1795 case 'a': 1796 fetch(ixnew, lowc, c - 1, f2, ' ', 0); 1797 fetch(ixnew, c, d, f2, '+', 0); 1798 break; 1799 } 1800 lowa = b + 1; 1801 lowc = d + 1; 1802 } 1803 fetch(ixnew, d + 1, upd, f2, ' ', 0); 1804 1805 context_vec_ptr = context_vec_start - 1; 1806 } 1807