1 /* $OpenBSD: diff.c,v 1.6 2004/08/06 13:08:39 jfb Exp $ */ 2 /* 3 * Copyright (C) Caldera International Inc. 2001-2002. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code and documentation must retain the above 10 * copyright notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. All advertising materials mentioning features or use of this software 15 * must display the following acknowledgement: 16 * This product includes software developed or owned by Caldera 17 * International, Inc. 18 * 4. Neither the name of Caldera International, Inc. nor the names of other 19 * contributors may be used to endorse or promote products derived from 20 * this software without specific prior written permission. 21 * 22 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 23 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 24 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 25 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 26 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 27 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 28 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 29 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 31 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 32 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * POSSIBILITY OF SUCH DAMAGE. 34 */ 35 /*- 36 * Copyright (c) 1991, 1993 37 * The Regents of the University of California. All rights reserved. 38 * Copyright (c) 2004 Jean-Francois Brousseau. All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93 65 */ 66 /* 67 * Uses an algorithm due to Harold Stone, which finds 68 * a pair of longest identical subsequences in the two 69 * files. 70 * 71 * The major goal is to generate the match vector J. 72 * J[i] is the index of the line in file1 corresponding 73 * to line i file0. J[i] = 0 if there is no 74 * such line in file1. 75 * 76 * Lines are hashed so as to work in core. All potential 77 * matches are located by sorting the lines of each file 78 * on the hash (called ``value''). In particular, this 79 * collects the equivalence classes in file1 together. 80 * Subroutine equiv replaces the value of each line in 81 * file0 by the index of the first element of its 82 * matching equivalence in (the reordered) file1. 83 * To save space equiv squeezes file1 into a single 84 * array member in which the equivalence classes 85 * are simply concatenated, except that their first 86 * members are flagged by changing sign. 87 * 88 * Next the indices that point into member are unsorted into 89 * array class according to the original order of file0. 90 * 91 * The cleverness lies in routine stone. This marches 92 * through the lines of file0, developing a vector klist 93 * of "k-candidates". At step i a k-candidate is a matched 94 * pair of lines x,y (x in file0 y in file1) such that 95 * there is a common subsequence of length k 96 * between the first i lines of file0 and the first y 97 * lines of file1, but there is no such subsequence for 98 * any smaller y. x is the earliest possible mate to y 99 * that occurs in such a subsequence. 100 * 101 * Whenever any of the members of the equivalence class of 102 * lines in file1 matable to a line in file0 has serial number 103 * less than the y of some k-candidate, that k-candidate 104 * with the smallest such y is replaced. The new 105 * k-candidate is chained (via pred) to the current 106 * k-1 candidate so that the actual subsequence can 107 * be recovered. When a member has serial number greater 108 * that the y of all k-candidates, the klist is extended. 109 * At the end, the longest subsequence is pulled out 110 * and placed in the array J by unravel 111 * 112 * With J in hand, the matches there recorded are 113 * check'ed against reality to assure that no spurious 114 * matches have crept in due to hashing. If they have, 115 * they are broken, and "jackpot" is recorded--a harmless 116 * matter except that a true match for a spuriously 117 * mated line may now be unnecessarily reported as a change. 118 * 119 * Much of the complexity of the program comes simply 120 * from trying to minimize core utilization and 121 * maximize the range of doable problems by dynamically 122 * allocating what is needed and reusing what is not. 123 * The core requirements for problems larger than somewhat 124 * are (in words) 2*length(file0) + length(file1) + 125 * 3*(number of k-candidates installed), typically about 126 * 6n words for files of length n. 127 */ 128 129 #include <sys/param.h> 130 #include <sys/stat.h> 131 #include <sys/wait.h> 132 133 #include <errno.h> 134 #include <ctype.h> 135 #include <stdio.h> 136 #include <fcntl.h> 137 #include <paths.h> 138 #include <regex.h> 139 #include <dirent.h> 140 #include <stdlib.h> 141 #include <stddef.h> 142 #include <unistd.h> 143 #include <string.h> 144 #include <sysexits.h> 145 146 #include "cvs.h" 147 #include "log.h" 148 #include "buf.h" 149 #include "proto.h" 150 151 152 #define CVS_DIFF_DEFCTX 3 /* default context length */ 153 154 155 /* 156 * Output format options 157 */ 158 #define D_NORMAL 0 /* Normal output */ 159 #define D_CONTEXT 1 /* Diff with context */ 160 #define D_UNIFIED 2 /* Unified context diff */ 161 #define D_IFDEF 3 /* Diff with merged #ifdef's */ 162 #define D_BRIEF 4 /* Say if the files differ */ 163 164 /* 165 * Status values for print_status() and diffreg() return values 166 */ 167 #define D_SAME 0 /* Files are the same */ 168 #define D_DIFFER 1 /* Files are different */ 169 #define D_BINARY 2 /* Binary files are different */ 170 #define D_COMMON 3 /* Subdirectory common to both dirs */ 171 #define D_ONLY 4 /* Only exists in one directory */ 172 #define D_MISMATCH1 5 /* path1 was a dir, path2 a file */ 173 #define D_MISMATCH2 6 /* path1 was a file, path2 a dir */ 174 #define D_ERROR 7 /* An error occurred */ 175 #define D_SKIPPED1 8 /* path1 was a special file */ 176 #define D_SKIPPED2 9 /* path2 was a special file */ 177 178 struct cand { 179 int x; 180 int y; 181 int pred; 182 } cand; 183 184 struct line { 185 int serial; 186 int value; 187 } *file[2]; 188 189 /* 190 * The following struct is used to record change information when 191 * doing a "context" or "unified" diff. (see routine "change" to 192 * understand the highly mnemonic field names) 193 */ 194 struct context_vec { 195 int a; /* start line in old file */ 196 int b; /* end line in old file */ 197 int c; /* start line in new file */ 198 int d; /* end line in new file */ 199 }; 200 201 struct diff_arg { 202 char *rev1; 203 char *rev2; 204 char *date1; 205 char *date2; 206 }; 207 208 209 struct excludes { 210 char *pattern; 211 struct excludes *next; 212 }; 213 214 215 char *splice(char *, char *); 216 int cvs_diffreg(const char *, const char *); 217 int cvs_diff_file (struct cvs_file *, void *); 218 static void output(const char *, FILE *, const char *, FILE *); 219 static void check(FILE *, FILE *); 220 static void range(int, int, char *); 221 static void uni_range(int, int); 222 static void dump_context_vec(FILE *, FILE *); 223 static void dump_unified_vec(FILE *, FILE *); 224 static void prepare(int, FILE *, off_t); 225 static void prune(void); 226 static void equiv(struct line *, int, struct line *, int, int *); 227 static void unravel(int); 228 static void unsort(struct line *, int, int *); 229 static void change(const char *, FILE *, const char *, FILE *, int, int, int, int); 230 static void sort(struct line *, int); 231 static int ignoreline(char *); 232 static int asciifile(FILE *); 233 static int fetch(long *, int, int, FILE *, int, int); 234 static int newcand(int, int, int); 235 static int search(int *, int, int); 236 static int skipline(FILE *); 237 static int isqrt(int); 238 static int stone(int *, int, int *, int *); 239 static int readhash(FILE *); 240 static int files_differ(FILE *, FILE *); 241 static char *preadline(int, size_t, off_t); 242 243 244 245 extern int cvs_client; 246 247 static int aflag, bflag, dflag, iflag, tflag, Tflag, wflag; 248 static int context, status; 249 static int format = D_NORMAL; 250 static struct stat stb1, stb2; 251 static char *ifdefname, *ignore_pats, diffargs[128]; 252 static const char *diff_file; 253 regex_t ignore_re; 254 255 static int *J; /* will be overlaid on class */ 256 static int *class; /* will be overlaid on file[0] */ 257 static int *klist; /* will be overlaid on file[0] after class */ 258 static int *member; /* will be overlaid on file[1] */ 259 static int clen; 260 static int inifdef; /* whether or not we are in a #ifdef block */ 261 static int len[2]; 262 static int pref, suff; /* length of prefix and suffix */ 263 static int slen[2]; 264 static int anychange; 265 static long *ixnew; /* will be overlaid on file[1] */ 266 static long *ixold; /* will be overlaid on klist */ 267 static struct cand *clist; /* merely a free storage pot for candidates */ 268 static int clistlen; /* the length of clist */ 269 static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ 270 static u_char *chrtran; /* translation table for case-folding */ 271 static struct context_vec *context_vec_start; 272 static struct context_vec *context_vec_end; 273 static struct context_vec *context_vec_ptr; 274 275 #define FUNCTION_CONTEXT_SIZE 41 276 static int lastline; 277 static int lastmatchline; 278 279 280 281 282 /* 283 * chrtran points to one of 2 translation tables: cup2low if folding upper to 284 * lower case clow2low if not folding case 285 */ 286 u_char clow2low[256] = { 287 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 288 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 289 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 290 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 291 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 292 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 293 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 294 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 295 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 296 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 297 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 298 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 299 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 300 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 301 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 302 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 303 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 304 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 305 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 306 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 307 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 308 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 309 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 310 0xfd, 0xfe, 0xff 311 }; 312 313 u_char cup2low[256] = { 314 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 315 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 316 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 317 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 318 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 319 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 320 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 321 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 322 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 323 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 324 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 325 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 326 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 327 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 328 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 329 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 330 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 331 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 332 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 333 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 334 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 335 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 336 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 337 0xfd, 0xfe, 0xff 338 }; 339 340 341 342 /* 343 * cvs_diff() 344 * 345 * Handler for the `cvs diff' command. 346 * 347 * SYNOPSIS: cvs [args] diff [-clipu] [-D date] [-r rev] 348 */ 349 350 int 351 cvs_diff(int argc, char **argv) 352 { 353 int ch, recurse, flags; 354 struct diff_arg darg; 355 struct cvsroot *root; 356 357 context = CVS_DIFF_DEFCTX; 358 flags = CF_RECURSE|CF_IGNORE|CF_SORT|CF_KNOWN; 359 recurse = 1; 360 361 memset(&darg, 0, sizeof(darg)); 362 strlcpy(diffargs, argv[0], sizeof(diffargs)); 363 364 while ((ch = getopt(argc, argv, "cD:lir:u")) != -1) { 365 switch (ch) { 366 case 'c': 367 strlcat(diffargs, " -c", sizeof(diffargs)); 368 format = D_CONTEXT; 369 break; 370 case 'D': 371 if (darg.date1 == NULL && darg.rev1 == NULL) 372 darg.date1 = optarg; 373 else if (darg.date2 == NULL && darg.rev2 == NULL) 374 darg.date2 = optarg; 375 else { 376 cvs_log(LP_ERR, 377 "no more than two revisions/dates can " 378 "be specified"); 379 } 380 break; 381 case 'l': 382 strlcat(diffargs, " -l", sizeof(diffargs)); 383 recurse = 0; 384 flags &= ~CF_RECURSE; 385 break; 386 case 'i': 387 strlcat(diffargs, " -i", sizeof(diffargs)); 388 iflag = 1; 389 break; 390 case 'r': 391 if ((darg.rev1 == NULL) && (darg.date1 == NULL)) 392 darg.rev1 = optarg; 393 else if ((darg.rev2 == NULL) && (darg.date2 == NULL)) 394 darg.rev2 = optarg; 395 else { 396 cvs_log(LP_ERR, 397 "no more than two revisions/dates can " 398 "be specified"); 399 return (EX_USAGE); 400 } 401 break; 402 case 'u': 403 strlcat(diffargs, " -u", sizeof(diffargs)); 404 format = D_UNIFIED; 405 break; 406 default: 407 return (EX_USAGE); 408 } 409 } 410 411 argc -= optind; 412 argv += optind; 413 414 if (argc == 0) { 415 cvs_files = cvs_file_get(".", flags); 416 } 417 else 418 cvs_files = cvs_file_getspec(argv, argc, 0); 419 420 cvs_file_examine(cvs_files, cvs_diff_file, &darg); 421 422 root = cvs_files->cf_ddat->cd_root; 423 if (root->cr_method != CVS_METHOD_LOCAL) { 424 cvs_senddir(root, cvs_files); 425 cvs_sendreq(root, CVS_REQ_DIFF, NULL); 426 } 427 428 return (0); 429 } 430 431 432 /* 433 * cvs_diff_sendflags() 434 * 435 */ 436 437 int 438 cvs_diff_sendflags(struct cvsroot *root, struct diff_arg *dap) 439 { 440 /* send the flags */ 441 if (format == D_CONTEXT) 442 cvs_sendarg(root, "-c", 0); 443 else if (format == D_UNIFIED) 444 cvs_sendarg(root, "-u", 0); 445 446 if (dap->rev1 != NULL) { 447 cvs_sendarg(root, "-r", 0); 448 cvs_sendarg(root, dap->rev1, 1); 449 } 450 else if (dap->date1 != NULL) { 451 cvs_sendarg(root, "-D", 0); 452 cvs_sendarg(root, dap->date1, 1); 453 } 454 if (dap->rev2 != NULL) { 455 cvs_sendarg(root, "-r", 0); 456 cvs_sendarg(root, dap->rev2, 1); 457 } 458 else if (dap->date2 != NULL) { 459 cvs_sendarg(root, "-D", 0); 460 cvs_sendarg(root, dap->date2, 1); 461 } 462 463 return (0); 464 } 465 466 467 /* 468 * cvs_diff_file() 469 * 470 * Diff a single file. 471 */ 472 473 int 474 cvs_diff_file(struct cvs_file *cfp, void *arg) 475 { 476 char *dir, *repo, rcspath[MAXPATHLEN], buf[64]; 477 BUF *b1, *b2; 478 RCSNUM *r1, *r2; 479 RCSFILE *rf; 480 struct diff_arg *dap; 481 struct cvs_ent *entp; 482 struct cvsroot *root; 483 484 dap = (struct diff_arg *)arg; 485 486 if (cfp->cf_type == DT_DIR) { 487 root = cfp->cf_ddat->cd_root; 488 if ((cfp->cf_parent == NULL) || 489 (root != cfp->cf_parent->cf_ddat->cd_root)) { 490 cvs_connect(root); 491 cvs_diff_sendflags(root, dap); 492 } 493 494 cvs_senddir(root, cfp); 495 return (0); 496 } 497 else /* take the root of parent directory */ 498 root = cfp->cf_parent->cf_ddat->cd_root; 499 500 rf = NULL; 501 diff_file = cfp->cf_path; 502 if (cfp->cf_parent != NULL) { 503 dir = cfp->cf_parent->cf_path; 504 repo = cfp->cf_parent->cf_ddat->cd_repo; 505 } 506 else { 507 dir = "."; 508 repo = NULL; 509 } 510 511 if (cfp->cf_cvstat == CVS_FST_UNKNOWN) { 512 if (root->cr_method == CVS_METHOD_LOCAL) 513 cvs_log(LP_WARN, "I know nothing about %s", diff_file); 514 else 515 cvs_sendreq(root, CVS_REQ_QUESTIONABLE, cfp->cf_name); 516 return (0); 517 } 518 519 entp = cvs_ent_getent(diff_file); 520 if (entp == NULL) 521 return (-1); 522 523 if (root->cr_method != CVS_METHOD_LOCAL) { 524 if (cvs_sendentry(root, entp) < 0) { 525 cvs_ent_free(entp); 526 return (-1); 527 } 528 } 529 530 if (cfp->cf_cvstat == CVS_FST_UPTODATE) { 531 if (root->cr_method != CVS_METHOD_LOCAL) 532 cvs_sendreq(root, CVS_REQ_UNCHANGED, cfp->cf_name); 533 cvs_ent_free(entp); 534 return (0); 535 } 536 537 /* at this point, the file is modified */ 538 if (root->cr_method != CVS_METHOD_LOCAL) { 539 cvs_sendreq(root, CVS_REQ_MODIFIED, cfp->cf_name); 540 cvs_sendfile(root, diff_file); 541 } 542 else { 543 snprintf(rcspath, sizeof(rcspath), "%s/%s/%s%s", 544 root->cr_dir, repo, diff_file, RCS_FILE_EXT); 545 546 rf = rcs_open(rcspath, RCS_MODE_READ); 547 if (rf == NULL) { 548 cvs_ent_free(entp); 549 return (-1); 550 } 551 552 cvs_printf("Index: %s\n%s\nRCS file: %s\n", diff_file, 553 RCS_DIFF_DIV, rcspath); 554 555 if (dap->rev1 == NULL) 556 r1 = entp->ce_rev; 557 else { 558 r1 = rcsnum_alloc(); 559 rcsnum_aton(dap->rev1, NULL, r1); 560 } 561 562 cvs_printf("retrieving revision %s\n", 563 rcsnum_tostr(r1, buf, sizeof(buf))); 564 b1 = rcs_getrev(rf, r1); 565 566 if (dap->rev2 != NULL) { 567 cvs_printf("retrieving revision %s\n", dap->rev2); 568 r2 = rcsnum_alloc(); 569 rcsnum_aton(dap->rev2, NULL, r2); 570 b2 = rcs_getrev(rf, r2); 571 } 572 else { 573 b2 = cvs_buf_load(diff_file, BUF_AUTOEXT); 574 } 575 576 rcs_close(rf); 577 578 printf("%s", diffargs); 579 printf(" -r%s", buf); 580 if (dap->rev2 != NULL) 581 printf(" -r%s", dap->rev2); 582 printf(" %s\n", diff_file); 583 cvs_buf_write(b1, "/tmp/diff1", 0600); 584 cvs_buf_write(b2, "/tmp/diff2", 0600); 585 cvs_diffreg("/tmp/diff1", "/tmp/diff2"); 586 } 587 588 cvs_ent_free(entp); 589 return (0); 590 } 591 592 593 int 594 cvs_diffreg(const char *file1, const char *file2) 595 { 596 FILE *f1, *f2; 597 int i, rval; 598 599 f1 = f2 = NULL; 600 rval = D_SAME; 601 anychange = 0; 602 lastline = 0; 603 lastmatchline = 0; 604 context_vec_ptr = context_vec_start - 1; 605 chrtran = (iflag ? cup2low : clow2low); 606 607 f1 = fopen(file1, "r"); 608 if (f1 == NULL) { 609 cvs_log(LP_ERRNO, "%s", file1); 610 status |= 2; 611 goto closem; 612 } 613 614 f2 = fopen(file2, "r"); 615 if (f2 == NULL) { 616 cvs_log(LP_ERRNO, "%s", file2); 617 status |= 2; 618 goto closem; 619 } 620 621 switch (files_differ(f1, f2)) { 622 case 0: 623 goto closem; 624 case 1: 625 break; 626 default: 627 /* error */ 628 status |= 2; 629 goto closem; 630 } 631 632 if (!asciifile(f1) || !asciifile(f2)) { 633 rval = D_BINARY; 634 status |= 1; 635 goto closem; 636 } 637 prepare(0, f1, stb1.st_size); 638 prepare(1, f2, stb2.st_size); 639 prune(); 640 sort(sfile[0], slen[0]); 641 sort(sfile[1], slen[1]); 642 643 member = (int *)file[1]; 644 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 645 member = realloc(member, (slen[1] + 2) * sizeof(int)); 646 647 class = (int *)file[0]; 648 unsort(sfile[0], slen[0], class); 649 class = realloc(class, (slen[0] + 2) * sizeof(int)); 650 651 klist = malloc((slen[0] + 2) * sizeof(int)); 652 clen = 0; 653 clistlen = 100; 654 clist = malloc(clistlen * sizeof(cand)); 655 i = stone(class, slen[0], member, klist); 656 free(member); 657 free(class); 658 659 J = realloc(J, (len[0] + 2) * sizeof(int)); 660 unravel(klist[i]); 661 free(clist); 662 free(klist); 663 664 ixold = realloc(ixold, (len[0] + 2) * sizeof(long)); 665 ixnew = realloc(ixnew, (len[1] + 2) * sizeof(long)); 666 check(f1, f2); 667 output(file1, f1, file2, f2); 668 669 closem: 670 if (anychange) { 671 status |= 1; 672 if (rval == D_SAME) 673 rval = D_DIFFER; 674 } 675 if (f1 != NULL) 676 fclose(f1); 677 if (f2 != NULL) 678 fclose(f2); 679 return (rval); 680 } 681 682 /* 683 * Check to see if the given files differ. 684 * Returns 0 if they are the same, 1 if different, and -1 on error. 685 * XXX - could use code from cmp(1) [faster] 686 */ 687 static int 688 files_differ(FILE *f1, FILE *f2) 689 { 690 char buf1[BUFSIZ], buf2[BUFSIZ]; 691 size_t i, j; 692 693 if (stb1.st_size != stb2.st_size) 694 return (1); 695 for (;;) { 696 i = fread(buf1, 1, sizeof(buf1), f1); 697 j = fread(buf2, 1, sizeof(buf2), f2); 698 if (i != j) 699 return (1); 700 if (i == 0 && j == 0) { 701 if (ferror(f1) || ferror(f2)) 702 return (1); 703 return (0); 704 } 705 if (memcmp(buf1, buf2, i) != 0) 706 return (1); 707 } 708 } 709 710 711 char * 712 splice(char *dir, char *file) 713 { 714 char *tail, *buf; 715 716 if ((tail = strrchr(file, '/')) == NULL) 717 tail = file; 718 else 719 tail++; 720 asprintf(&buf, "%s/%s", dir, tail); 721 return (buf); 722 } 723 724 static void 725 prepare(int i, FILE *fd, off_t filesize) 726 { 727 struct line *p; 728 int j, h; 729 size_t sz; 730 731 rewind(fd); 732 733 sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25; 734 if (sz < 100) 735 sz = 100; 736 737 p = malloc((sz + 3) * sizeof(struct line)); 738 for (j = 0; (h = readhash(fd));) { 739 if (j == (int)sz) { 740 sz = sz * 3 / 2; 741 p = realloc(p, (sz + 3) * sizeof(struct line)); 742 } 743 p[++j].value = h; 744 } 745 len[i] = j; 746 file[i] = p; 747 } 748 749 static void 750 prune(void) 751 { 752 int i, j; 753 754 for (pref = 0; pref < len[0] && pref < len[1] && 755 file[0][pref + 1].value == file[1][pref + 1].value; 756 pref++) 757 ; 758 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 759 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 760 suff++) 761 ; 762 for (j = 0; j < 2; j++) { 763 sfile[j] = file[j] + pref; 764 slen[j] = len[j] - pref - suff; 765 for (i = 0; i <= slen[j]; i++) 766 sfile[j][i].serial = i; 767 } 768 } 769 770 static void 771 equiv(struct line *a, int n, struct line *b, int m, int *c) 772 { 773 int i, j; 774 775 i = j = 1; 776 while (i <= n && j <= m) { 777 if (a[i].value < b[j].value) 778 a[i++].value = 0; 779 else if (a[i].value == b[j].value) 780 a[i++].value = j; 781 else 782 j++; 783 } 784 while (i <= n) 785 a[i++].value = 0; 786 b[m + 1].value = 0; 787 j = 0; 788 while (++j <= m) { 789 c[j] = -b[j].serial; 790 while (b[j + 1].value == b[j].value) { 791 j++; 792 c[j] = b[j].serial; 793 } 794 } 795 c[j] = -1; 796 } 797 798 /* Code taken from ping.c */ 799 static int 800 isqrt(int n) 801 { 802 int y, x = 1; 803 804 if (n == 0) 805 return(0); 806 807 do { /* newton was a stinker */ 808 y = x; 809 x = n / x; 810 x += y; 811 x /= 2; 812 } while ((x - y) > 1 || (x - y) < -1); 813 814 return (x); 815 } 816 817 static int 818 stone(int *a, int n, int *b, int *c) 819 { 820 int i, k, y, j, l; 821 int oldc, tc, oldl; 822 u_int numtries; 823 824 const u_int bound = dflag ? UINT_MAX : MAX(256, isqrt(n)); 825 826 k = 0; 827 c[0] = newcand(0, 0, 0); 828 for (i = 1; i <= n; i++) { 829 j = a[i]; 830 if (j == 0) 831 continue; 832 y = -b[j]; 833 oldl = 0; 834 oldc = c[0]; 835 numtries = 0; 836 do { 837 if (y <= clist[oldc].y) 838 continue; 839 l = search(c, k, y); 840 if (l != oldl + 1) 841 oldc = c[l - 1]; 842 if (l <= k) { 843 if (clist[c[l]].y <= y) 844 continue; 845 tc = c[l]; 846 c[l] = newcand(i, y, oldc); 847 oldc = tc; 848 oldl = l; 849 numtries++; 850 } else { 851 c[l] = newcand(i, y, oldc); 852 k++; 853 break; 854 } 855 } while ((y = b[++j]) > 0 && numtries < bound); 856 } 857 return (k); 858 } 859 860 static int 861 newcand(int x, int y, int pred) 862 { 863 struct cand *q; 864 865 if (clen == clistlen) { 866 clistlen = clistlen * 11 / 10; 867 clist = realloc(clist, clistlen * sizeof(cand)); 868 } 869 q = clist + clen; 870 q->x = x; 871 q->y = y; 872 q->pred = pred; 873 return (clen++); 874 } 875 876 static int 877 search(int *c, int k, int y) 878 { 879 int i, j, l, t; 880 881 if (clist[c[k]].y < y) /* quick look for typical case */ 882 return (k + 1); 883 i = 0; 884 j = k + 1; 885 while (1) { 886 l = i + j; 887 if ((l >>= 1) <= i) 888 break; 889 t = clist[c[l]].y; 890 if (t > y) 891 j = l; 892 else if (t < y) 893 i = l; 894 else 895 return (l); 896 } 897 return (l + 1); 898 } 899 900 static void 901 unravel(int p) 902 { 903 struct cand *q; 904 int i; 905 906 for (i = 0; i <= len[0]; i++) 907 J[i] = i <= pref ? i : 908 i > len[0] - suff ? i + len[1] - len[0] : 0; 909 for (q = clist + p; q->y != 0; q = clist + q->pred) 910 J[q->x + pref] = q->y + pref; 911 } 912 913 /* 914 * Check does double duty: 915 * 1. ferret out any fortuitous correspondences due 916 * to confounding by hashing (which result in "jackpot") 917 * 2. collect random access indexes to the two files 918 */ 919 static void 920 check(FILE *f1, FILE *f2) 921 { 922 int i, j, jackpot, c, d; 923 long ctold, ctnew; 924 925 rewind(f1); 926 rewind(f2); 927 j = 1; 928 ixold[0] = ixnew[0] = 0; 929 jackpot = 0; 930 ctold = ctnew = 0; 931 for (i = 1; i <= len[0]; i++) { 932 if (J[i] == 0) { 933 ixold[i] = ctold += skipline(f1); 934 continue; 935 } 936 while (j < J[i]) { 937 ixnew[j] = ctnew += skipline(f2); 938 j++; 939 } 940 if (bflag || wflag || iflag) { 941 for (;;) { 942 c = getc(f1); 943 d = getc(f2); 944 /* 945 * GNU diff ignores a missing newline 946 * in one file if bflag || wflag. 947 */ 948 if ((bflag || wflag) && 949 ((c == EOF && d == '\n') || 950 (c == '\n' && d == EOF))) { 951 break; 952 } 953 ctold++; 954 ctnew++; 955 if (bflag && isspace(c) && isspace(d)) { 956 do { 957 if (c == '\n') 958 break; 959 ctold++; 960 } while (isspace(c = getc(f1))); 961 do { 962 if (d == '\n') 963 break; 964 ctnew++; 965 } while (isspace(d = getc(f2))); 966 } else if (wflag) { 967 while (isspace(c) && c != '\n') { 968 c = getc(f1); 969 ctold++; 970 } 971 while (isspace(d) && d != '\n') { 972 d = getc(f2); 973 ctnew++; 974 } 975 } 976 if (chrtran[c] != chrtran[d]) { 977 jackpot++; 978 J[i] = 0; 979 if (c != '\n' && c != EOF) 980 ctold += skipline(f1); 981 if (d != '\n' && c != EOF) 982 ctnew += skipline(f2); 983 break; 984 } 985 if (c == '\n' || c == EOF) 986 break; 987 } 988 } else { 989 for (;;) { 990 ctold++; 991 ctnew++; 992 if ((c = getc(f1)) != (d = getc(f2))) { 993 /* jackpot++; */ 994 J[i] = 0; 995 if (c != '\n' && c != EOF) 996 ctold += skipline(f1); 997 if (d != '\n' && c != EOF) 998 ctnew += skipline(f2); 999 break; 1000 } 1001 if (c == '\n' || c == EOF) 1002 break; 1003 } 1004 } 1005 ixold[i] = ctold; 1006 ixnew[j] = ctnew; 1007 j++; 1008 } 1009 for (; j <= len[1]; j++) 1010 ixnew[j] = ctnew += skipline(f2); 1011 /* 1012 * if (jackpot) 1013 * fprintf(stderr, "jackpot\n"); 1014 */ 1015 } 1016 1017 /* shellsort CACM #201 */ 1018 static void 1019 sort(struct line *a, int n) 1020 { 1021 struct line *ai, *aim, w; 1022 int j, m = 0, k; 1023 1024 if (n == 0) 1025 return; 1026 for (j = 1; j <= n; j *= 2) 1027 m = 2 * j - 1; 1028 for (m /= 2; m != 0; m /= 2) { 1029 k = n - m; 1030 for (j = 1; j <= k; j++) { 1031 for (ai = &a[j]; ai > a; ai -= m) { 1032 aim = &ai[m]; 1033 if (aim < ai) 1034 break; /* wraparound */ 1035 if (aim->value > ai[0].value || 1036 (aim->value == ai[0].value && 1037 aim->serial > ai[0].serial)) 1038 break; 1039 w.value = ai[0].value; 1040 ai[0].value = aim->value; 1041 aim->value = w.value; 1042 w.serial = ai[0].serial; 1043 ai[0].serial = aim->serial; 1044 aim->serial = w.serial; 1045 } 1046 } 1047 } 1048 } 1049 1050 static void 1051 unsort(struct line *f, int l, int *b) 1052 { 1053 int *a, i; 1054 1055 a = malloc((l + 1) * sizeof(int)); 1056 for (i = 1; i <= l; i++) 1057 a[f[i].serial] = f[i].value; 1058 for (i = 1; i <= l; i++) 1059 b[i] = a[i]; 1060 free(a); 1061 } 1062 1063 static int 1064 skipline(FILE *f) 1065 { 1066 int i, c; 1067 1068 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++) 1069 continue; 1070 return (i); 1071 } 1072 1073 static void 1074 output(const char *file1, FILE *f1, const char *file2, FILE *f2) 1075 { 1076 int m, i0, i1, j0, j1; 1077 1078 rewind(f1); 1079 rewind(f2); 1080 m = len[0]; 1081 J[0] = 0; 1082 J[m + 1] = len[1] + 1; 1083 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 1084 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 1085 i0++; 1086 j0 = J[i0 - 1] + 1; 1087 i1 = i0 - 1; 1088 while (i1 < m && J[i1 + 1] == 0) 1089 i1++; 1090 j1 = J[i1 + 1] - 1; 1091 J[i1] = j1; 1092 change(file1, f1, file2, f2, i0, i1, j0, j1); 1093 } 1094 if (m == 0) 1095 change(file1, f1, file2, f2, 1, 0, 1, len[1]); 1096 if (format == D_IFDEF) { 1097 for (;;) { 1098 #define c i0 1099 if ((c = getc(f1)) == EOF) 1100 return; 1101 putchar(c); 1102 } 1103 #undef c 1104 } 1105 if (anychange != 0) { 1106 if (format == D_CONTEXT) 1107 dump_context_vec(f1, f2); 1108 else if (format == D_UNIFIED) 1109 dump_unified_vec(f1, f2); 1110 } 1111 } 1112 1113 static __inline void 1114 range(int a, int b, char *separator) 1115 { 1116 printf("%d", a > b ? b : a); 1117 if (a < b) 1118 printf("%s%d", separator, b); 1119 } 1120 1121 static __inline void 1122 uni_range(int a, int b) 1123 { 1124 if (a < b) 1125 printf("%d,%d", a, b - a + 1); 1126 else if (a == b) 1127 printf("%d", b); 1128 else 1129 printf("%d,0", b); 1130 } 1131 1132 static char * 1133 preadline(int fd, size_t len, off_t off) 1134 { 1135 char *line; 1136 ssize_t nr; 1137 1138 line = malloc(len + 1); 1139 if (line == NULL) { 1140 cvs_log(LP_ERRNO, "failed to allocate line"); 1141 return (NULL); 1142 } 1143 if ((nr = pread(fd, line, len, off)) < 0) { 1144 cvs_log(LP_ERRNO, "preadline failed"); 1145 return (NULL); 1146 } 1147 line[nr] = '\0'; 1148 return (line); 1149 } 1150 1151 static int 1152 ignoreline(char *line) 1153 { 1154 int ret; 1155 1156 ret = regexec(&ignore_re, line, 0, NULL, 0); 1157 free(line); 1158 return (ret == 0); /* if it matched, it should be ignored. */ 1159 } 1160 1161 /* 1162 * Indicate that there is a difference between lines a and b of the from file 1163 * to get to lines c to d of the to file. If a is greater then b then there 1164 * are no lines in the from file involved and this means that there were 1165 * lines appended (beginning at b). If c is greater than d then there are 1166 * lines missing from the to file. 1167 */ 1168 static void 1169 change(const char *file1, FILE *f1, const char *file2, FILE *f2, 1170 int a, int b, int c, int d) 1171 { 1172 static size_t max_context = 64; 1173 int i; 1174 1175 if (format != D_IFDEF && a > b && c > d) 1176 return; 1177 if (ignore_pats != NULL) { 1178 char *line; 1179 /* 1180 * All lines in the change, insert, or delete must 1181 * match an ignore pattern for the change to be 1182 * ignored. 1183 */ 1184 if (a <= b) { /* Changes and deletes. */ 1185 for (i = a; i <= b; i++) { 1186 line = preadline(fileno(f1), 1187 ixold[i] - ixold[i - 1], ixold[i - 1]); 1188 if (!ignoreline(line)) 1189 goto proceed; 1190 } 1191 } 1192 if (a > b || c <= d) { /* Changes and inserts. */ 1193 for (i = c; i <= d; i++) { 1194 line = preadline(fileno(f2), 1195 ixnew[i] - ixnew[i - 1], ixnew[i - 1]); 1196 if (!ignoreline(line)) 1197 goto proceed; 1198 } 1199 } 1200 return; 1201 } 1202 proceed: 1203 if (format == D_CONTEXT || format == D_UNIFIED) { 1204 /* 1205 * Allocate change records as needed. 1206 */ 1207 if (context_vec_ptr == context_vec_end - 1) { 1208 ptrdiff_t offset = context_vec_ptr - context_vec_start; 1209 max_context <<= 1; 1210 context_vec_start = realloc(context_vec_start, 1211 max_context * sizeof(struct context_vec)); 1212 context_vec_end = context_vec_start + max_context; 1213 context_vec_ptr = context_vec_start + offset; 1214 } 1215 if (anychange == 0) { 1216 /* 1217 * Print the context/unidiff header first time through. 1218 */ 1219 printf("%s %s %s", 1220 format == D_CONTEXT ? "***" : "---", diff_file, 1221 ctime(&stb1.st_mtime)); 1222 printf("%s %s %s", 1223 format == D_CONTEXT ? "---" : "+++", diff_file, 1224 ctime(&stb2.st_mtime)); 1225 anychange = 1; 1226 } else if (a > context_vec_ptr->b + (2 * context) + 1 && 1227 c > context_vec_ptr->d + (2 * context) + 1) { 1228 /* 1229 * If this change is more than 'context' lines from the 1230 * previous change, dump the record and reset it. 1231 */ 1232 if (format == D_CONTEXT) 1233 dump_context_vec(f1, f2); 1234 else 1235 dump_unified_vec(f1, f2); 1236 } 1237 context_vec_ptr++; 1238 context_vec_ptr->a = a; 1239 context_vec_ptr->b = b; 1240 context_vec_ptr->c = c; 1241 context_vec_ptr->d = d; 1242 return; 1243 } 1244 if (anychange == 0) 1245 anychange = 1; 1246 switch (format) { 1247 case D_BRIEF: 1248 return; 1249 case D_NORMAL: 1250 range(a, b, ","); 1251 putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 1252 if (format == D_NORMAL) 1253 range(c, d, ","); 1254 putchar('\n'); 1255 break; 1256 } 1257 if (format == D_NORMAL || format == D_IFDEF) { 1258 fetch(ixold, a, b, f1, '<', 1); 1259 if (a <= b && c <= d && format == D_NORMAL) 1260 puts("---"); 1261 } 1262 i = fetch(ixnew, c, d, f2, format == D_NORMAL ? '>' : '\0', 0); 1263 if (inifdef) { 1264 printf("#endif /* %s */\n", ifdefname); 1265 inifdef = 0; 1266 } 1267 } 1268 1269 static int 1270 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile) 1271 { 1272 int i, j, c, lastc, col, nc; 1273 1274 /* 1275 * When doing #ifdef's, copy down to current line 1276 * if this is the first file, so that stuff makes it to output. 1277 */ 1278 if (format == D_IFDEF && oldfile) { 1279 long curpos = ftell(lb); 1280 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 1281 nc = f[a > b ? b : a - 1] - curpos; 1282 for (i = 0; i < nc; i++) 1283 putchar(getc(lb)); 1284 } 1285 if (a > b) 1286 return (0); 1287 if (format == D_IFDEF) { 1288 if (inifdef) { 1289 printf("#else /* %s%s */\n", 1290 oldfile == 1 ? "!" : "", ifdefname); 1291 } else { 1292 if (oldfile) 1293 printf("#ifndef %s\n", ifdefname); 1294 else 1295 printf("#ifdef %s\n", ifdefname); 1296 } 1297 inifdef = 1 + oldfile; 1298 } 1299 for (i = a; i <= b; i++) { 1300 fseek(lb, f[i - 1], SEEK_SET); 1301 nc = f[i] - f[i - 1]; 1302 if (format != D_IFDEF && ch != '\0') { 1303 putchar(ch); 1304 if (Tflag && (format == D_NORMAL || format == D_CONTEXT 1305 || format == D_UNIFIED)) 1306 putchar('\t'); 1307 else if (format != D_UNIFIED) 1308 putchar(' '); 1309 } 1310 col = 0; 1311 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) { 1312 if ((c = getc(lb)) == EOF) { 1313 puts("\n\\ No newline at end of file"); 1314 return (0); 1315 } 1316 if (c == '\t' && tflag) { 1317 do { 1318 putchar(' '); 1319 } while (++col & 7); 1320 } else { 1321 putchar(c); 1322 col++; 1323 } 1324 } 1325 } 1326 return (0); 1327 } 1328 1329 /* 1330 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. 1331 */ 1332 static int 1333 readhash(FILE *f) 1334 { 1335 int i, t, space; 1336 int sum; 1337 1338 sum = 1; 1339 space = 0; 1340 if (!bflag && !wflag) { 1341 if (iflag) 1342 for (i = 0; (t = getc(f)) != '\n'; i++) { 1343 if (t == EOF) { 1344 if (i == 0) 1345 return (0); 1346 break; 1347 } 1348 sum = sum * 127 + chrtran[t]; 1349 } 1350 else 1351 for (i = 0; (t = getc(f)) != '\n'; i++) { 1352 if (t == EOF) { 1353 if (i == 0) 1354 return (0); 1355 break; 1356 } 1357 sum = sum * 127 + t; 1358 } 1359 } else { 1360 for (i = 0;;) { 1361 switch (t = getc(f)) { 1362 case '\t': 1363 case ' ': 1364 space++; 1365 continue; 1366 default: 1367 if (space && !wflag) { 1368 i++; 1369 space = 0; 1370 } 1371 sum = sum * 127 + chrtran[t]; 1372 i++; 1373 continue; 1374 case EOF: 1375 if (i == 0) 1376 return (0); 1377 /* FALLTHROUGH */ 1378 case '\n': 1379 break; 1380 } 1381 break; 1382 } 1383 } 1384 /* 1385 * There is a remote possibility that we end up with a zero sum. 1386 * Zero is used as an EOF marker, so return 1 instead. 1387 */ 1388 return (sum == 0 ? 1 : sum); 1389 } 1390 1391 static int 1392 asciifile(FILE *f) 1393 { 1394 char buf[BUFSIZ]; 1395 int i, cnt; 1396 1397 if (aflag || f == NULL) 1398 return (1); 1399 1400 rewind(f); 1401 cnt = fread(buf, 1, sizeof(buf), f); 1402 for (i = 0; i < cnt; i++) 1403 if (!isprint(buf[i]) && !isspace(buf[i])) 1404 return (0); 1405 return (1); 1406 } 1407 1408 1409 /* dump accumulated "context" diff changes */ 1410 static void 1411 dump_context_vec(FILE *f1, FILE *f2) 1412 { 1413 struct context_vec *cvp = context_vec_start; 1414 int lowa, upb, lowc, upd, do_output; 1415 int a, b, c, d; 1416 char ch; 1417 1418 if (context_vec_start > context_vec_ptr) 1419 return; 1420 1421 b = d = 0; /* gcc */ 1422 lowa = MAX(1, cvp->a - context); 1423 upb = MIN(len[0], context_vec_ptr->b + context); 1424 lowc = MAX(1, cvp->c - context); 1425 upd = MIN(len[1], context_vec_ptr->d + context); 1426 1427 printf("***************"); 1428 printf("\n*** "); 1429 range(lowa, upb, ","); 1430 printf(" ****\n"); 1431 1432 /* 1433 * Output changes to the "old" file. The first loop suppresses 1434 * output if there were no changes to the "old" file (we'll see 1435 * the "old" lines as context in the "new" list). 1436 */ 1437 do_output = 0; 1438 for (; cvp <= context_vec_ptr; cvp++) 1439 if (cvp->a <= cvp->b) { 1440 cvp = context_vec_start; 1441 do_output++; 1442 break; 1443 } 1444 if (do_output) { 1445 while (cvp <= context_vec_ptr) { 1446 a = cvp->a; 1447 b = cvp->b; 1448 c = cvp->c; 1449 d = cvp->d; 1450 1451 if (a <= b && c <= d) 1452 ch = 'c'; 1453 else 1454 ch = (a <= b) ? 'd' : 'a'; 1455 1456 if (ch == 'a') 1457 fetch(ixold, lowa, b, f1, ' ', 0); 1458 else { 1459 fetch(ixold, lowa, a - 1, f1, ' ', 0); 1460 fetch(ixold, a, b, f1, 1461 ch == 'c' ? '!' : '-', 0); 1462 } 1463 lowa = b + 1; 1464 cvp++; 1465 } 1466 fetch(ixold, b + 1, upb, f1, ' ', 0); 1467 } 1468 /* output changes to the "new" file */ 1469 printf("--- "); 1470 range(lowc, upd, ","); 1471 printf(" ----\n"); 1472 1473 do_output = 0; 1474 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1475 if (cvp->c <= cvp->d) { 1476 cvp = context_vec_start; 1477 do_output++; 1478 break; 1479 } 1480 if (do_output) { 1481 while (cvp <= context_vec_ptr) { 1482 a = cvp->a; 1483 b = cvp->b; 1484 c = cvp->c; 1485 d = cvp->d; 1486 1487 if (a <= b && c <= d) 1488 ch = 'c'; 1489 else 1490 ch = (a <= b) ? 'd' : 'a'; 1491 1492 if (ch == 'd') 1493 fetch(ixnew, lowc, d, f2, ' ', 0); 1494 else { 1495 fetch(ixnew, lowc, c - 1, f2, ' ', 0); 1496 fetch(ixnew, c, d, f2, 1497 ch == 'c' ? '!' : '+', 0); 1498 } 1499 lowc = d + 1; 1500 cvp++; 1501 } 1502 fetch(ixnew, d + 1, upd, f2, ' ', 0); 1503 } 1504 context_vec_ptr = context_vec_start - 1; 1505 } 1506 1507 /* dump accumulated "unified" diff changes */ 1508 static void 1509 dump_unified_vec(FILE *f1, FILE *f2) 1510 { 1511 struct context_vec *cvp = context_vec_start; 1512 int lowa, upb, lowc, upd; 1513 int a, b, c, d; 1514 char ch; 1515 1516 if (context_vec_start > context_vec_ptr) 1517 return; 1518 1519 b = d = 0; /* gcc */ 1520 lowa = MAX(1, cvp->a - context); 1521 upb = MIN(len[0], context_vec_ptr->b + context); 1522 lowc = MAX(1, cvp->c - context); 1523 upd = MIN(len[1], context_vec_ptr->d + context); 1524 1525 fputs("@@ -", stdout); 1526 uni_range(lowa, upb); 1527 fputs(" +", stdout); 1528 uni_range(lowc, upd); 1529 fputs(" @@", stdout); 1530 putchar('\n'); 1531 1532 /* 1533 * Output changes in "unified" diff format--the old and new lines 1534 * are printed together. 1535 */ 1536 for (; cvp <= context_vec_ptr; cvp++) { 1537 a = cvp->a; 1538 b = cvp->b; 1539 c = cvp->c; 1540 d = cvp->d; 1541 1542 /* 1543 * c: both new and old changes 1544 * d: only changes in the old file 1545 * a: only changes in the new file 1546 */ 1547 if (a <= b && c <= d) 1548 ch = 'c'; 1549 else 1550 ch = (a <= b) ? 'd' : 'a'; 1551 1552 switch (ch) { 1553 case 'c': 1554 fetch(ixold, lowa, a - 1, f1, ' ', 0); 1555 fetch(ixold, a, b, f1, '-', 0); 1556 fetch(ixnew, c, d, f2, '+', 0); 1557 break; 1558 case 'd': 1559 fetch(ixold, lowa, a - 1, f1, ' ', 0); 1560 fetch(ixold, a, b, f1, '-', 0); 1561 break; 1562 case 'a': 1563 fetch(ixnew, lowc, c - 1, f2, ' ', 0); 1564 fetch(ixnew, c, d, f2, '+', 0); 1565 break; 1566 } 1567 lowa = b + 1; 1568 lowc = d + 1; 1569 } 1570 fetch(ixnew, d + 1, upd, f2, ' ', 0); 1571 1572 context_vec_ptr = context_vec_start - 1; 1573 } 1574