xref: /openbsd-src/usr.bin/cvs/diff.c (revision 11efff7f3ac2b3cfeff0c0cddc14294d9b3aca4f)
1 /*	$OpenBSD: diff.c,v 1.13 2004/12/14 21:40:39 jfb Exp $	*/
2 /*
3  * Copyright (C) Caldera International Inc.  2001-2002.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code and documentation must retain the above
10  *    copyright notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. All advertising materials mentioning features or use of this software
15  *    must display the following acknowledgement:
16  *	This product includes software developed or owned by Caldera
17  *	International, Inc.
18  * 4. Neither the name of Caldera International, Inc. nor the names of other
19  *    contributors may be used to endorse or promote products derived from
20  *    this software without specific prior written permission.
21  *
22  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
23  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
27  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
28  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
29  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
32  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35 /*-
36  * Copyright (c) 1991, 1993
37  *	The Regents of the University of California.  All rights reserved.
38  * Copyright (c) 2004 Jean-Francois Brousseau.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 /*
67  *	Uses an algorithm due to Harold Stone, which finds
68  *	a pair of longest identical subsequences in the two
69  *	files.
70  *
71  *	The major goal is to generate the match vector J.
72  *	J[i] is the index of the line in file1 corresponding
73  *	to line i file0. J[i] = 0 if there is no
74  *	such line in file1.
75  *
76  *	Lines are hashed so as to work in core. All potential
77  *	matches are located by sorting the lines of each file
78  *	on the hash (called ``value''). In particular, this
79  *	collects the equivalence classes in file1 together.
80  *	Subroutine equiv replaces the value of each line in
81  *	file0 by the index of the first element of its
82  *	matching equivalence in (the reordered) file1.
83  *	To save space equiv squeezes file1 into a single
84  *	array member in which the equivalence classes
85  *	are simply concatenated, except that their first
86  *	members are flagged by changing sign.
87  *
88  *	Next the indices that point into member are unsorted into
89  *	array class according to the original order of file0.
90  *
91  *	The cleverness lies in routine stone. This marches
92  *	through the lines of file0, developing a vector klist
93  *	of "k-candidates". At step i a k-candidate is a matched
94  *	pair of lines x,y (x in file0 y in file1) such that
95  *	there is a common subsequence of length k
96  *	between the first i lines of file0 and the first y
97  *	lines of file1, but there is no such subsequence for
98  *	any smaller y. x is the earliest possible mate to y
99  *	that occurs in such a subsequence.
100  *
101  *	Whenever any of the members of the equivalence class of
102  *	lines in file1 matable to a line in file0 has serial number
103  *	less than the y of some k-candidate, that k-candidate
104  *	with the smallest such y is replaced. The new
105  *	k-candidate is chained (via pred) to the current
106  *	k-1 candidate so that the actual subsequence can
107  *	be recovered. When a member has serial number greater
108  *	that the y of all k-candidates, the klist is extended.
109  *	At the end, the longest subsequence is pulled out
110  *	and placed in the array J by unravel
111  *
112  *	With J in hand, the matches there recorded are
113  *	check'ed against reality to assure that no spurious
114  *	matches have crept in due to hashing. If they have,
115  *	they are broken, and "jackpot" is recorded--a harmless
116  *	matter except that a true match for a spuriously
117  *	mated line may now be unnecessarily reported as a change.
118  *
119  *	Much of the complexity of the program comes simply
120  *	from trying to minimize core utilization and
121  *	maximize the range of doable problems by dynamically
122  *	allocating what is needed and reusing what is not.
123  *	The core requirements for problems larger than somewhat
124  *	are (in words) 2*length(file0) + length(file1) +
125  *	3*(number of k-candidates installed),  typically about
126  *	6n words for files of length n.
127  */
128 
129 #include <sys/param.h>
130 #include <sys/stat.h>
131 #include <sys/wait.h>
132 
133 #include <errno.h>
134 #include <ctype.h>
135 #include <stdio.h>
136 #include <fcntl.h>
137 #include <paths.h>
138 #include <regex.h>
139 #include <dirent.h>
140 #include <stdlib.h>
141 #include <stddef.h>
142 #include <unistd.h>
143 #include <string.h>
144 #include <sysexits.h>
145 
146 #include "cvs.h"
147 #include "log.h"
148 #include "buf.h"
149 #include "proto.h"
150 
151 
152 #define CVS_DIFF_DEFCTX    3   /* default context length */
153 
154 
155 /*
156  * Output format options
157  */
158 #define	D_NORMAL	0	/* Normal output */
159 #define	D_CONTEXT	1	/* Diff with context */
160 #define	D_UNIFIED	2	/* Unified context diff */
161 #define	D_IFDEF		3	/* Diff with merged #ifdef's */
162 #define	D_BRIEF		4	/* Say if the files differ */
163 
164 /*
165  * Status values for print_status() and diffreg() return values
166  */
167 #define	D_SAME		0	/* Files are the same */
168 #define	D_DIFFER	1	/* Files are different */
169 #define	D_BINARY	2	/* Binary files are different */
170 #define	D_COMMON	3	/* Subdirectory common to both dirs */
171 #define	D_ONLY		4	/* Only exists in one directory */
172 #define	D_MISMATCH1	5	/* path1 was a dir, path2 a file */
173 #define	D_MISMATCH2	6	/* path1 was a file, path2 a dir */
174 #define	D_ERROR		7	/* An error occurred */
175 #define	D_SKIPPED1	8	/* path1 was a special file */
176 #define	D_SKIPPED2	9	/* path2 was a special file */
177 
178 struct cand {
179 	int x;
180 	int y;
181 	int pred;
182 } cand;
183 
184 struct line {
185 	int serial;
186 	int value;
187 } *file[2];
188 
189 /*
190  * The following struct is used to record change information when
191  * doing a "context" or "unified" diff.  (see routine "change" to
192  * understand the highly mnemonic field names)
193  */
194 struct context_vec {
195 	int a;			/* start line in old file */
196 	int b;			/* end line in old file */
197 	int c;			/* start line in new file */
198 	int d;			/* end line in new file */
199 };
200 
201 struct diff_arg {
202 	char  *rev1;
203 	char  *rev2;
204 	char  *date1;
205 	char  *date2;
206 };
207 
208 
209 struct excludes {
210 	char *pattern;
211 	struct excludes *next;
212 };
213 
214 
215 char	*splice(char *, char *);
216 int  cvs_diffreg(const char *, const char *);
217 int  cvs_diff_file(struct cvs_file *, void *);
218 int  cvs_diff_sendflags(struct cvsroot *, struct diff_arg *);
219 static void output(const char *, FILE *, const char *, FILE *);
220 static void check(FILE *, FILE *);
221 static void range(int, int, char *);
222 static void uni_range(int, int);
223 static void dump_context_vec(FILE *, FILE *);
224 static void dump_unified_vec(FILE *, FILE *);
225 static void prepare(int, FILE *, off_t);
226 static void prune(void);
227 static void equiv(struct line *, int, struct line *, int, int *);
228 static void unravel(int);
229 static void unsort(struct line *, int, int *);
230 static void change(const char *, FILE *, const char *, FILE *, int, int, int, int);
231 static void sort(struct line *, int);
232 static int  ignoreline(char *);
233 static int  asciifile(FILE *);
234 static int  fetch(long *, int, int, FILE *, int, int);
235 static int  newcand(int, int, int);
236 static int  search(int *, int, int);
237 static int  skipline(FILE *);
238 static int  isqrt(int);
239 static int  stone(int *, int, int *, int *);
240 static int  readhash(FILE *);
241 static int  files_differ(FILE *, FILE *);
242 static char *preadline(int, size_t, off_t);
243 
244 
245 extern int cvs_client;
246 
247 static int aflag, bflag, dflag, iflag, Nflag, tflag, Tflag, wflag;
248 static int context, status;
249 static int format = D_NORMAL;
250 static struct stat stb1, stb2;
251 static char *ifdefname, *ignore_pats, diffargs[128];
252 static const char *diff_file;
253 regex_t ignore_re;
254 
255 static int  *J;			/* will be overlaid on class */
256 static int  *class;		/* will be overlaid on file[0] */
257 static int  *klist;		/* will be overlaid on file[0] after class */
258 static int  *member;		/* will be overlaid on file[1] */
259 static int   clen;
260 static int   inifdef;		/* whether or not we are in a #ifdef block */
261 static int   len[2];
262 static int   pref, suff;	/* length of prefix and suffix */
263 static int   slen[2];
264 static int   anychange;
265 static long *ixnew;		/* will be overlaid on file[1] */
266 static long *ixold;		/* will be overlaid on klist */
267 static struct cand *clist;	/* merely a free storage pot for candidates */
268 static int   clistlen;		/* the length of clist */
269 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
270 static u_char *chrtran;		/* translation table for case-folding */
271 static struct context_vec *context_vec_start;
272 static struct context_vec *context_vec_end;
273 static struct context_vec *context_vec_ptr;
274 
275 #define FUNCTION_CONTEXT_SIZE	41
276 static int lastline;
277 static int lastmatchline;
278 
279 
280 /*
281  * chrtran points to one of 2 translation tables: cup2low if folding upper to
282  * lower case clow2low if not folding case
283  */
284 u_char clow2low[256] = {
285 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
286 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
287 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
288 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
289 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
290 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
291 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
292 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
293 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
294 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
295 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
296 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
297 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
298 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
299 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
300 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
301 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
302 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
303 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
304 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
305 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
306 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
307 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
308 	0xfd, 0xfe, 0xff
309 };
310 
311 u_char cup2low[256] = {
312 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
313 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
314 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
315 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
316 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
317 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
318 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
319 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
320 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
321 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
322 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
323 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
324 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
325 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
326 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
327 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
328 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
329 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
330 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
331 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
332 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
333 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
334 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
335 	0xfd, 0xfe, 0xff
336 };
337 
338 
339 /*
340  * cvs_diff()
341  *
342  * Handler for the `cvs diff' command.
343  *
344  * SYNOPSIS: cvs [args] diff [-clipu] [-D date] [-r rev]
345  */
346 int
347 cvs_diff(int argc, char **argv)
348 {
349 	int ch, recurse, flags;
350 	struct diff_arg darg;
351 	struct cvsroot *root;
352 
353 	context = CVS_DIFF_DEFCTX;
354 	flags = CF_RECURSE|CF_IGNORE|CF_SORT|CF_KNOWN;
355 	recurse = 1;
356 
357 	memset(&darg, 0, sizeof(darg));
358 	strlcpy(diffargs, argv[0], sizeof(diffargs));
359 
360 	while ((ch = getopt(argc, argv, "cD:liN:r:u")) != -1) {
361 		switch (ch) {
362 		case 'c':
363 			strlcat(diffargs, " -c", sizeof(diffargs));
364 			format = D_CONTEXT;
365 			break;
366 		case 'D':
367 			if (darg.date1 == NULL && darg.rev1 == NULL)
368 				darg.date1 = optarg;
369 			else if (darg.date2 == NULL && darg.rev2 == NULL)
370 				darg.date2 = optarg;
371 			else {
372 				cvs_log(LP_ERR,
373 				    "no more than two revisions/dates can "
374 				    "be specified");
375 			}
376 			break;
377 		case 'l':
378 			strlcat(diffargs, " -l", sizeof(diffargs));
379 			recurse = 0;
380 			flags &= ~CF_RECURSE;
381 			break;
382 		case 'i':
383 			strlcat(diffargs, " -i", sizeof(diffargs));
384 			iflag = 1;
385 			break;
386 		case 'N':
387 			strlcat(diffargs, " -N", sizeof(diffargs));
388 			Nflag = 1;
389 			break;
390 		case 'r':
391 			if ((darg.rev1 == NULL) && (darg.date1 == NULL))
392 				darg.rev1 = optarg;
393 			else if ((darg.rev2 == NULL) && (darg.date2 == NULL))
394 				darg.rev2 = optarg;
395 			else {
396 				cvs_log(LP_ERR,
397 				    "no more than two revisions/dates can "
398 				    "be specified");
399 				return (EX_USAGE);
400 			}
401 			break;
402 		case 'u':
403 			strlcat(diffargs, " -u", sizeof(diffargs));
404 			format = D_UNIFIED;
405 			break;
406 		default:
407 			return (EX_USAGE);
408 		}
409 	}
410 
411 	argc -= optind;
412 	argv += optind;
413 
414 	if (argc == 0) {
415 		cvs_files = cvs_file_get(".", flags);
416 	} else
417 		cvs_files = cvs_file_getspec(argv, argc, 0);
418 	if (cvs_files == NULL)
419 		return (EX_DATAERR);
420 
421 	cvs_file_examine(cvs_files, cvs_diff_file, &darg);
422 
423 	root = cvs_files->cf_ddat->cd_root;
424 	if (root->cr_method != CVS_METHOD_LOCAL) {
425 		cvs_senddir(root, cvs_files);
426 		cvs_sendreq(root, CVS_REQ_DIFF, NULL);
427 	}
428 
429 	return (0);
430 }
431 
432 
433 /*
434  * cvs_diff_sendflags()
435  *
436  */
437 int
438 cvs_diff_sendflags(struct cvsroot *root, struct diff_arg *dap)
439 {
440 	/* send the flags */
441 	if (format == D_CONTEXT)
442 		cvs_sendarg(root, "-c", 0);
443 	else if (format == D_UNIFIED)
444 		cvs_sendarg(root, "-u", 0);
445 
446 	if (dap->rev1 != NULL) {
447 		cvs_sendarg(root, "-r", 0);
448 		cvs_sendarg(root, dap->rev1, 1);
449 	} else if (dap->date1 != NULL) {
450 		cvs_sendarg(root, "-D", 0);
451 		cvs_sendarg(root, dap->date1, 1);
452 	}
453 	if (dap->rev2 != NULL) {
454 		cvs_sendarg(root, "-r", 0);
455 		cvs_sendarg(root, dap->rev2, 1);
456 	} else if (dap->date2 != NULL) {
457 		cvs_sendarg(root, "-D", 0);
458 		cvs_sendarg(root, dap->date2, 1);
459 	}
460 
461 	return (0);
462 }
463 
464 
465 /*
466  * cvs_diff_file()
467  *
468  * Diff a single file.
469  */
470 int
471 cvs_diff_file(struct cvs_file *cfp, void *arg)
472 {
473 	char *dir, *repo, buf[64];
474 	char fpath[MAXPATHLEN], dfpath[MAXPATHLEN], rcspath[MAXPATHLEN];
475 	char path_tmp1[MAXPATHLEN], path_tmp2[MAXPATHLEN];
476 	BUF *b1, *b2;
477 	RCSNUM *r1, *r2;
478 	RCSFILE *rf;
479 	struct diff_arg *dap;
480 	struct cvs_ent *entp;
481 	struct cvsroot *root;
482 
483 	dap = (struct diff_arg *)arg;
484 
485 	if (cfp->cf_type == DT_DIR) {
486 		if (cfp->cf_cvstat == CVS_FST_UNKNOWN) {
487 			root = cfp->cf_parent->cf_ddat->cd_root;
488 			cvs_sendreq(root, CVS_REQ_QUESTIONABLE,
489 			    CVS_FILE_NAME(cfp));
490 		} else {
491 			root = cfp->cf_ddat->cd_root;
492 			if ((cfp->cf_parent == NULL) ||
493 			    (root != cfp->cf_parent->cf_ddat->cd_root)) {
494 				cvs_connect(root);
495 				cvs_diff_sendflags(root, dap);
496 			}
497 
498 			cvs_senddir(root, cfp);
499 		}
500 
501 		return (0);
502 	}
503 
504 	if (cfp->cf_cvstat == CVS_FST_LOST) {
505 		cvs_log(LP_WARN, "cannot find file %s", CVS_FILE_NAME(cfp));
506 		return (0);
507 	}
508 
509 	rf = NULL;
510 	diff_file = cvs_file_getpath(cfp, fpath, sizeof(fpath));
511 
512 	if (cfp->cf_parent != NULL) {
513 		dir = cvs_file_getpath(cfp->cf_parent, dfpath, sizeof(dfpath));
514 		root = cfp->cf_parent->cf_ddat->cd_root;
515 		repo = cfp->cf_parent->cf_ddat->cd_repo;
516 	} else {
517 		dir = ".";
518 		root = NULL;
519 		repo = NULL;
520 	}
521 
522 	if (cfp->cf_cvstat == CVS_FST_UNKNOWN) {
523 		if (root->cr_method == CVS_METHOD_LOCAL)
524 			cvs_log(LP_WARN, "I know nothing about %s", diff_file);
525 		else
526 			cvs_sendreq(root, CVS_REQ_QUESTIONABLE,
527 			    CVS_FILE_NAME(cfp));
528 		return (0);
529 	}
530 
531 	entp = cvs_ent_getent(diff_file);
532 	if (entp == NULL)
533 		return (-1);
534 
535 	if (root->cr_method != CVS_METHOD_LOCAL) {
536 		if (cvs_sendentry(root, entp) < 0) {
537 			cvs_ent_free(entp);
538 			return (-1);
539 		}
540 	}
541 
542 	if (cfp->cf_cvstat == CVS_FST_UPTODATE) {
543 		if (root->cr_method != CVS_METHOD_LOCAL)
544 			cvs_sendreq(root, CVS_REQ_UNCHANGED,
545 			    CVS_FILE_NAME(cfp));
546 		cvs_ent_free(entp);
547 		return (0);
548 	}
549 
550 	/* at this point, the file is modified */
551 	if (root->cr_method != CVS_METHOD_LOCAL) {
552 		cvs_sendreq(root, CVS_REQ_MODIFIED, CVS_FILE_NAME(cfp));
553 		cvs_sendfile(root, diff_file);
554 	} else {
555 		snprintf(rcspath, sizeof(rcspath), "%s/%s/%s%s",
556 		    root->cr_dir, repo, diff_file, RCS_FILE_EXT);
557 
558 		rf = rcs_open(rcspath, RCS_MODE_READ);
559 		if (rf == NULL) {
560 			cvs_ent_free(entp);
561 			return (-1);
562 		}
563 
564 		cvs_printf("Index: %s\n%s\nRCS file: %s\n", diff_file,
565 		    RCS_DIFF_DIV, rcspath);
566 
567 		if (dap->rev1 == NULL)
568 			r1 = entp->ce_rev;
569 		else {
570 			r1 = rcsnum_alloc();
571 			rcsnum_aton(dap->rev1, NULL, r1);
572 		}
573 
574 		cvs_printf("retrieving revision %s\n",
575 		    rcsnum_tostr(r1, buf, sizeof(buf)));
576 		b1 = rcs_getrev(rf, r1);
577 
578 		if (dap->rev2 != NULL) {
579 			cvs_printf("retrieving revision %s\n", dap->rev2);
580 			r2 = rcsnum_alloc();
581 			rcsnum_aton(dap->rev2, NULL, r2);
582 			b2 = rcs_getrev(rf, r2);
583 		} else {
584 			b2 = cvs_buf_load(diff_file, BUF_AUTOEXT);
585 		}
586 
587 		rcs_close(rf);
588 
589 		printf("%s", diffargs);
590 		printf(" -r%s", buf);
591 		if (dap->rev2 != NULL)
592 			printf(" -r%s", dap->rev2);
593 		printf(" %s\n", diff_file);
594 		strlcpy(path_tmp1, "/tmp/diff1.XXXXXXXXXX", sizeof(path_tmp1));
595 		if (cvs_buf_write_stmp(b1, path_tmp1, 0600) == -1)
596 			return (-1);
597 		strlcpy(path_tmp2, "/tmp/diff2.XXXXXXXXXX", sizeof(path_tmp1));
598 		if (cvs_buf_write_stmp(b2, path_tmp2, 0600) == -1) {
599 			(void)unlink(path_tmp1);
600 			return (-1);
601 		}
602 		cvs_diffreg(path_tmp1, path_tmp2);
603 		(void)unlink(path_tmp1);
604 		(void)unlink(path_tmp2);
605 	}
606 
607 	cvs_ent_free(entp);
608 	return (0);
609 }
610 
611 
612 int
613 cvs_diffreg(const char *file1, const char *file2)
614 {
615 	FILE *f1, *f2;
616 	int i, rval;
617 
618 	f1 = f2 = NULL;
619 	rval = D_SAME;
620 	anychange = 0;
621 	lastline = 0;
622 	lastmatchline = 0;
623 	context_vec_ptr = context_vec_start - 1;
624 	chrtran = (iflag ? cup2low : clow2low);
625 
626 	f1 = fopen(file1, "r");
627 	if (f1 == NULL) {
628 		cvs_log(LP_ERRNO, "%s", file1);
629 		status |= 2;
630 		goto closem;
631 	}
632 
633 	f2 = fopen(file2, "r");
634 	if (f2 == NULL) {
635 		cvs_log(LP_ERRNO, "%s", file2);
636 		status |= 2;
637 		goto closem;
638 	}
639 
640 	switch (files_differ(f1, f2)) {
641 	case 0:
642 		goto closem;
643 	case 1:
644 		break;
645 	default:
646 		/* error */
647 		status |= 2;
648 		goto closem;
649 	}
650 
651 	if (!asciifile(f1) || !asciifile(f2)) {
652 		rval = D_BINARY;
653 		status |= 1;
654 		goto closem;
655 	}
656 	prepare(0, f1, stb1.st_size);
657 	prepare(1, f2, stb2.st_size);
658 	prune();
659 	sort(sfile[0], slen[0]);
660 	sort(sfile[1], slen[1]);
661 
662 	member = (int *)file[1];
663 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
664 	member = realloc(member, (slen[1] + 2) * sizeof(int));
665 
666 	class = (int *)file[0];
667 	unsort(sfile[0], slen[0], class);
668 	class = realloc(class, (slen[0] + 2) * sizeof(int));
669 
670 	klist = malloc((slen[0] + 2) * sizeof(int));
671 	clen = 0;
672 	clistlen = 100;
673 	clist = malloc(clistlen * sizeof(cand));
674 	i = stone(class, slen[0], member, klist);
675 	free(member);
676 	free(class);
677 
678 	J = realloc(J, (len[0] + 2) * sizeof(int));
679 	unravel(klist[i]);
680 	free(clist);
681 	free(klist);
682 
683 	ixold = realloc(ixold, (len[0] + 2) * sizeof(long));
684 	ixnew = realloc(ixnew, (len[1] + 2) * sizeof(long));
685 	check(f1, f2);
686 	output(file1, f1, file2, f2);
687 
688 closem:
689 	if (anychange) {
690 		status |= 1;
691 		if (rval == D_SAME)
692 			rval = D_DIFFER;
693 	}
694 	if (f1 != NULL)
695 		fclose(f1);
696 	if (f2 != NULL)
697 		fclose(f2);
698 	return (rval);
699 }
700 
701 /*
702  * Check to see if the given files differ.
703  * Returns 0 if they are the same, 1 if different, and -1 on error.
704  * XXX - could use code from cmp(1) [faster]
705  */
706 static int
707 files_differ(FILE *f1, FILE *f2)
708 {
709 	char buf1[BUFSIZ], buf2[BUFSIZ];
710 	size_t i, j;
711 
712 	if (stb1.st_size != stb2.st_size)
713 		return (1);
714 	for (;;) {
715 		i = fread(buf1, 1, sizeof(buf1), f1);
716 		j = fread(buf2, 1, sizeof(buf2), f2);
717 		if (i != j)
718 			return (1);
719 		if (i == 0 && j == 0) {
720 			if (ferror(f1) || ferror(f2))
721 				return (1);
722 			return (0);
723 		}
724 		if (memcmp(buf1, buf2, i) != 0)
725 			return (1);
726 	}
727 }
728 
729 
730 char *
731 splice(char *dir, char *file)
732 {
733 	char *tail, *buf;
734 
735 	if ((tail = strrchr(file, '/')) == NULL)
736 		tail = file;
737 	else
738 		tail++;
739 	asprintf(&buf, "%s/%s", dir, tail);
740 	return (buf);
741 }
742 
743 static void
744 prepare(int i, FILE *fd, off_t filesize)
745 {
746 	struct line *p;
747 	int j, h;
748 	size_t sz;
749 
750 	rewind(fd);
751 
752 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
753 	if (sz < 100)
754 		sz = 100;
755 
756 	p = malloc((sz + 3) * sizeof(struct line));
757 	for (j = 0; (h = readhash(fd));) {
758 		if (j == (int)sz) {
759 			sz = sz * 3 / 2;
760 			p = realloc(p, (sz + 3) * sizeof(struct line));
761 		}
762 		p[++j].value = h;
763 	}
764 	len[i] = j;
765 	file[i] = p;
766 }
767 
768 static void
769 prune(void)
770 {
771 	int i, j;
772 
773 	for (pref = 0; pref < len[0] && pref < len[1] &&
774 	    file[0][pref + 1].value == file[1][pref + 1].value;
775 	    pref++)
776 		;
777 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
778 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
779 	    suff++)
780 		;
781 	for (j = 0; j < 2; j++) {
782 		sfile[j] = file[j] + pref;
783 		slen[j] = len[j] - pref - suff;
784 		for (i = 0; i <= slen[j]; i++)
785 			sfile[j][i].serial = i;
786 	}
787 }
788 
789 static void
790 equiv(struct line *a, int n, struct line *b, int m, int *c)
791 {
792 	int i, j;
793 
794 	i = j = 1;
795 	while (i <= n && j <= m) {
796 		if (a[i].value < b[j].value)
797 			a[i++].value = 0;
798 		else if (a[i].value == b[j].value)
799 			a[i++].value = j;
800 		else
801 			j++;
802 	}
803 	while (i <= n)
804 		a[i++].value = 0;
805 	b[m + 1].value = 0;
806 	j = 0;
807 	while (++j <= m) {
808 		c[j] = -b[j].serial;
809 		while (b[j + 1].value == b[j].value) {
810 			j++;
811 			c[j] = b[j].serial;
812 		}
813 	}
814 	c[j] = -1;
815 }
816 
817 /* Code taken from ping.c */
818 static int
819 isqrt(int n)
820 {
821 	int y, x = 1;
822 
823 	if (n == 0)
824 		return(0);
825 
826 	do { /* newton was a stinker */
827 		y = x;
828 		x = n / x;
829 		x += y;
830 		x /= 2;
831 	} while ((x - y) > 1 || (x - y) < -1);
832 
833 	return (x);
834 }
835 
836 static int
837 stone(int *a, int n, int *b, int *c)
838 {
839 	int i, k, y, j, l;
840 	int oldc, tc, oldl;
841 	u_int numtries;
842 
843 	const u_int bound = dflag ? UINT_MAX : MAX(256, isqrt(n));
844 
845 	k = 0;
846 	c[0] = newcand(0, 0, 0);
847 	for (i = 1; i <= n; i++) {
848 		j = a[i];
849 		if (j == 0)
850 			continue;
851 		y = -b[j];
852 		oldl = 0;
853 		oldc = c[0];
854 		numtries = 0;
855 		do {
856 			if (y <= clist[oldc].y)
857 				continue;
858 			l = search(c, k, y);
859 			if (l != oldl + 1)
860 				oldc = c[l - 1];
861 			if (l <= k) {
862 				if (clist[c[l]].y <= y)
863 					continue;
864 				tc = c[l];
865 				c[l] = newcand(i, y, oldc);
866 				oldc = tc;
867 				oldl = l;
868 				numtries++;
869 			} else {
870 				c[l] = newcand(i, y, oldc);
871 				k++;
872 				break;
873 			}
874 		} while ((y = b[++j]) > 0 && numtries < bound);
875 	}
876 	return (k);
877 }
878 
879 static int
880 newcand(int x, int y, int pred)
881 {
882 	struct cand *q;
883 
884 	if (clen == clistlen) {
885 		clistlen = clistlen * 11 / 10;
886 		clist = realloc(clist, clistlen * sizeof(cand));
887 	}
888 	q = clist + clen;
889 	q->x = x;
890 	q->y = y;
891 	q->pred = pred;
892 	return (clen++);
893 }
894 
895 static int
896 search(int *c, int k, int y)
897 {
898 	int i, j, l, t;
899 
900 	if (clist[c[k]].y < y)	/* quick look for typical case */
901 		return (k + 1);
902 	i = 0;
903 	j = k + 1;
904 	while (1) {
905 		l = i + j;
906 		if ((l >>= 1) <= i)
907 			break;
908 		t = clist[c[l]].y;
909 		if (t > y)
910 			j = l;
911 		else if (t < y)
912 			i = l;
913 		else
914 			return (l);
915 	}
916 	return (l + 1);
917 }
918 
919 static void
920 unravel(int p)
921 {
922 	struct cand *q;
923 	int i;
924 
925 	for (i = 0; i <= len[0]; i++)
926 		J[i] = i <= pref ? i :
927 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
928 	for (q = clist + p; q->y != 0; q = clist + q->pred)
929 		J[q->x + pref] = q->y + pref;
930 }
931 
932 /*
933  * Check does double duty:
934  *  1.	ferret out any fortuitous correspondences due
935  *	to confounding by hashing (which result in "jackpot")
936  *  2.  collect random access indexes to the two files
937  */
938 static void
939 check(FILE *f1, FILE *f2)
940 {
941 	int i, j, jackpot, c, d;
942 	long ctold, ctnew;
943 
944 	rewind(f1);
945 	rewind(f2);
946 	j = 1;
947 	ixold[0] = ixnew[0] = 0;
948 	jackpot = 0;
949 	ctold = ctnew = 0;
950 	for (i = 1; i <= len[0]; i++) {
951 		if (J[i] == 0) {
952 			ixold[i] = ctold += skipline(f1);
953 			continue;
954 		}
955 		while (j < J[i]) {
956 			ixnew[j] = ctnew += skipline(f2);
957 			j++;
958 		}
959 		if (bflag || wflag || iflag) {
960 			for (;;) {
961 				c = getc(f1);
962 				d = getc(f2);
963 				/*
964 				 * GNU diff ignores a missing newline
965 				 * in one file if bflag || wflag.
966 				 */
967 				if ((bflag || wflag) &&
968 				    ((c == EOF && d == '\n') ||
969 				    (c == '\n' && d == EOF))) {
970 					break;
971 				}
972 				ctold++;
973 				ctnew++;
974 				if (bflag && isspace(c) && isspace(d)) {
975 					do {
976 						if (c == '\n')
977 							break;
978 						ctold++;
979 					} while (isspace(c = getc(f1)));
980 					do {
981 						if (d == '\n')
982 							break;
983 						ctnew++;
984 					} while (isspace(d = getc(f2)));
985 				} else if (wflag) {
986 					while (isspace(c) && c != '\n') {
987 						c = getc(f1);
988 						ctold++;
989 					}
990 					while (isspace(d) && d != '\n') {
991 						d = getc(f2);
992 						ctnew++;
993 					}
994 				}
995 				if (chrtran[c] != chrtran[d]) {
996 					jackpot++;
997 					J[i] = 0;
998 					if (c != '\n' && c != EOF)
999 						ctold += skipline(f1);
1000 					if (d != '\n' && c != EOF)
1001 						ctnew += skipline(f2);
1002 					break;
1003 				}
1004 				if (c == '\n' || c == EOF)
1005 					break;
1006 			}
1007 		} else {
1008 			for (;;) {
1009 				ctold++;
1010 				ctnew++;
1011 				if ((c = getc(f1)) != (d = getc(f2))) {
1012 					/* jackpot++; */
1013 					J[i] = 0;
1014 					if (c != '\n' && c != EOF)
1015 						ctold += skipline(f1);
1016 					if (d != '\n' && c != EOF)
1017 						ctnew += skipline(f2);
1018 					break;
1019 				}
1020 				if (c == '\n' || c == EOF)
1021 					break;
1022 			}
1023 		}
1024 		ixold[i] = ctold;
1025 		ixnew[j] = ctnew;
1026 		j++;
1027 	}
1028 	for (; j <= len[1]; j++)
1029 		ixnew[j] = ctnew += skipline(f2);
1030 	/*
1031 	 * if (jackpot)
1032 	 *	fprintf(stderr, "jackpot\n");
1033 	 */
1034 }
1035 
1036 /* shellsort CACM #201 */
1037 static void
1038 sort(struct line *a, int n)
1039 {
1040 	struct line *ai, *aim, w;
1041 	int j, m = 0, k;
1042 
1043 	if (n == 0)
1044 		return;
1045 	for (j = 1; j <= n; j *= 2)
1046 		m = 2 * j - 1;
1047 	for (m /= 2; m != 0; m /= 2) {
1048 		k = n - m;
1049 		for (j = 1; j <= k; j++) {
1050 			for (ai = &a[j]; ai > a; ai -= m) {
1051 				aim = &ai[m];
1052 				if (aim < ai)
1053 					break;	/* wraparound */
1054 				if (aim->value > ai[0].value ||
1055 				    (aim->value == ai[0].value &&
1056 					aim->serial > ai[0].serial))
1057 					break;
1058 				w.value = ai[0].value;
1059 				ai[0].value = aim->value;
1060 				aim->value = w.value;
1061 				w.serial = ai[0].serial;
1062 				ai[0].serial = aim->serial;
1063 				aim->serial = w.serial;
1064 			}
1065 		}
1066 	}
1067 }
1068 
1069 static void
1070 unsort(struct line *f, int l, int *b)
1071 {
1072 	int *a, i;
1073 
1074 	a = malloc((l + 1) * sizeof(int));
1075 	for (i = 1; i <= l; i++)
1076 		a[f[i].serial] = f[i].value;
1077 	for (i = 1; i <= l; i++)
1078 		b[i] = a[i];
1079 	free(a);
1080 }
1081 
1082 static int
1083 skipline(FILE *f)
1084 {
1085 	int i, c;
1086 
1087 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
1088 		continue;
1089 	return (i);
1090 }
1091 
1092 static void
1093 output(const char *file1, FILE *f1, const char *file2, FILE *f2)
1094 {
1095 	int m, i0, i1, j0, j1;
1096 
1097 	rewind(f1);
1098 	rewind(f2);
1099 	m = len[0];
1100 	J[0] = 0;
1101 	J[m + 1] = len[1] + 1;
1102 	for (i0 = 1; i0 <= m; i0 = i1 + 1) {
1103 		while (i0 <= m && J[i0] == J[i0 - 1] + 1)
1104 			i0++;
1105 		j0 = J[i0 - 1] + 1;
1106 		i1 = i0 - 1;
1107 		while (i1 < m && J[i1 + 1] == 0)
1108 			i1++;
1109 		j1 = J[i1 + 1] - 1;
1110 		J[i1] = j1;
1111 		change(file1, f1, file2, f2, i0, i1, j0, j1);
1112 	}
1113 	if (m == 0)
1114 		change(file1, f1, file2, f2, 1, 0, 1, len[1]);
1115 	if (format == D_IFDEF) {
1116 		for (;;) {
1117 #define	c i0
1118 			if ((c = getc(f1)) == EOF)
1119 				return;
1120 			putchar(c);
1121 		}
1122 #undef c
1123 	}
1124 	if (anychange != 0) {
1125 		if (format == D_CONTEXT)
1126 			dump_context_vec(f1, f2);
1127 		else if (format == D_UNIFIED)
1128 			dump_unified_vec(f1, f2);
1129 	}
1130 }
1131 
1132 static __inline void
1133 range(int a, int b, char *separator)
1134 {
1135 	printf("%d", a > b ? b : a);
1136 	if (a < b)
1137 		printf("%s%d", separator, b);
1138 }
1139 
1140 static __inline void
1141 uni_range(int a, int b)
1142 {
1143 	if (a < b)
1144 		printf("%d,%d", a, b - a + 1);
1145 	else if (a == b)
1146 		printf("%d", b);
1147 	else
1148 		printf("%d,0", b);
1149 }
1150 
1151 static char *
1152 preadline(int fd, size_t len, off_t off)
1153 {
1154 	char *line;
1155 	ssize_t nr;
1156 
1157 	line = malloc(len + 1);
1158 	if (line == NULL) {
1159 		cvs_log(LP_ERRNO, "failed to allocate line");
1160 		return (NULL);
1161 	}
1162 	if ((nr = pread(fd, line, len, off)) < 0) {
1163 		cvs_log(LP_ERRNO, "preadline failed");
1164 		return (NULL);
1165 	}
1166 	line[nr] = '\0';
1167 	return (line);
1168 }
1169 
1170 static int
1171 ignoreline(char *line)
1172 {
1173 	int ret;
1174 
1175 	ret = regexec(&ignore_re, line, 0, NULL, 0);
1176 	free(line);
1177 	return (ret == 0);	/* if it matched, it should be ignored. */
1178 }
1179 
1180 /*
1181  * Indicate that there is a difference between lines a and b of the from file
1182  * to get to lines c to d of the to file.  If a is greater then b then there
1183  * are no lines in the from file involved and this means that there were
1184  * lines appended (beginning at b).  If c is greater than d then there are
1185  * lines missing from the to file.
1186  */
1187 static void
1188 change(const char *file1, FILE *f1, const char *file2, FILE *f2,
1189 	int a, int b, int c, int d)
1190 {
1191 	static size_t max_context = 64;
1192 	int i;
1193 
1194 	if (format != D_IFDEF && a > b && c > d)
1195 		return;
1196 	if (ignore_pats != NULL) {
1197 		char *line;
1198 		/*
1199 		 * All lines in the change, insert, or delete must
1200 		 * match an ignore pattern for the change to be
1201 		 * ignored.
1202 		 */
1203 		if (a <= b) {		/* Changes and deletes. */
1204 			for (i = a; i <= b; i++) {
1205 				line = preadline(fileno(f1),
1206 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1207 				if (!ignoreline(line))
1208 					goto proceed;
1209 			}
1210 		}
1211 		if (a > b || c <= d) {	/* Changes and inserts. */
1212 			for (i = c; i <= d; i++) {
1213 				line = preadline(fileno(f2),
1214 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1215 				if (!ignoreline(line))
1216 					goto proceed;
1217 			}
1218 		}
1219 		return;
1220 	}
1221 proceed:
1222 	if (format == D_CONTEXT || format == D_UNIFIED) {
1223 		/*
1224 		 * Allocate change records as needed.
1225 		 */
1226 		if (context_vec_ptr == context_vec_end - 1) {
1227 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1228 			max_context <<= 1;
1229 			context_vec_start = realloc(context_vec_start,
1230 			    max_context * sizeof(struct context_vec));
1231 			context_vec_end = context_vec_start + max_context;
1232 			context_vec_ptr = context_vec_start + offset;
1233 		}
1234 		if (anychange == 0) {
1235 			/*
1236 			 * Print the context/unidiff header first time through.
1237 			 */
1238 			printf("%s %s	%s",
1239 			    format == D_CONTEXT ? "***" : "---", diff_file,
1240 			    ctime(&stb1.st_mtime));
1241 			printf("%s %s	%s",
1242 			    format == D_CONTEXT ? "---" : "+++", diff_file,
1243 			    ctime(&stb2.st_mtime));
1244 			anychange = 1;
1245 		} else if (a > context_vec_ptr->b + (2 * context) + 1 &&
1246 		    c > context_vec_ptr->d + (2 * context) + 1) {
1247 			/*
1248 			 * If this change is more than 'context' lines from the
1249 			 * previous change, dump the record and reset it.
1250 			 */
1251 			if (format == D_CONTEXT)
1252 				dump_context_vec(f1, f2);
1253 			else
1254 				dump_unified_vec(f1, f2);
1255 		}
1256 		context_vec_ptr++;
1257 		context_vec_ptr->a = a;
1258 		context_vec_ptr->b = b;
1259 		context_vec_ptr->c = c;
1260 		context_vec_ptr->d = d;
1261 		return;
1262 	}
1263 	if (anychange == 0)
1264 		anychange = 1;
1265 	switch (format) {
1266 	case D_BRIEF:
1267 		return;
1268 	case D_NORMAL:
1269 		range(a, b, ",");
1270 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1271 		if (format == D_NORMAL)
1272 			range(c, d, ",");
1273 		putchar('\n');
1274 		break;
1275 	}
1276 	if (format == D_NORMAL || format == D_IFDEF) {
1277 		fetch(ixold, a, b, f1, '<', 1);
1278 		if (a <= b && c <= d && format == D_NORMAL)
1279 			puts("---");
1280 	}
1281 	i = fetch(ixnew, c, d, f2, format == D_NORMAL ? '>' : '\0', 0);
1282 	if (inifdef) {
1283 		printf("#endif /* %s */\n", ifdefname);
1284 		inifdef = 0;
1285 	}
1286 }
1287 
1288 static int
1289 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile)
1290 {
1291 	int i, j, c, lastc, col, nc;
1292 
1293 	/*
1294 	 * When doing #ifdef's, copy down to current line
1295 	 * if this is the first file, so that stuff makes it to output.
1296 	 */
1297 	if (format == D_IFDEF && oldfile) {
1298 		long curpos = ftell(lb);
1299 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1300 		nc = f[a > b ? b : a - 1] - curpos;
1301 		for (i = 0; i < nc; i++)
1302 			putchar(getc(lb));
1303 	}
1304 	if (a > b)
1305 		return (0);
1306 	if (format == D_IFDEF) {
1307 		if (inifdef) {
1308 			printf("#else /* %s%s */\n",
1309 			    oldfile == 1 ? "!" : "", ifdefname);
1310 		} else {
1311 			if (oldfile)
1312 				printf("#ifndef %s\n", ifdefname);
1313 			else
1314 				printf("#ifdef %s\n", ifdefname);
1315 		}
1316 		inifdef = 1 + oldfile;
1317 	}
1318 	for (i = a; i <= b; i++) {
1319 		fseek(lb, f[i - 1], SEEK_SET);
1320 		nc = f[i] - f[i - 1];
1321 		if (format != D_IFDEF && ch != '\0') {
1322 			putchar(ch);
1323 			if (Tflag && (format == D_NORMAL || format == D_CONTEXT
1324 			    || format == D_UNIFIED))
1325 				putchar('\t');
1326 			else if (format != D_UNIFIED)
1327 				putchar(' ');
1328 		}
1329 		col = 0;
1330 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1331 			if ((c = getc(lb)) == EOF) {
1332 				puts("\n\\ No newline at end of file");
1333 				return (0);
1334 			}
1335 			if (c == '\t' && tflag) {
1336 				do {
1337 					putchar(' ');
1338 				} while (++col & 7);
1339 			} else {
1340 				putchar(c);
1341 				col++;
1342 			}
1343 		}
1344 	}
1345 	return (0);
1346 }
1347 
1348 /*
1349  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1350  */
1351 static int
1352 readhash(FILE *f)
1353 {
1354 	int i, t, space;
1355 	int sum;
1356 
1357 	sum = 1;
1358 	space = 0;
1359 	if (!bflag && !wflag) {
1360 		if (iflag)
1361 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1362 				if (t == EOF) {
1363 					if (i == 0)
1364 						return (0);
1365 					break;
1366 				}
1367 				sum = sum * 127 + chrtran[t];
1368 			}
1369 		else
1370 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1371 				if (t == EOF) {
1372 					if (i == 0)
1373 						return (0);
1374 					break;
1375 				}
1376 				sum = sum * 127 + t;
1377 			}
1378 	} else {
1379 		for (i = 0;;) {
1380 			switch (t = getc(f)) {
1381 			case '\t':
1382 			case ' ':
1383 				space++;
1384 				continue;
1385 			default:
1386 				if (space && !wflag) {
1387 					i++;
1388 					space = 0;
1389 				}
1390 				sum = sum * 127 + chrtran[t];
1391 				i++;
1392 				continue;
1393 			case EOF:
1394 				if (i == 0)
1395 					return (0);
1396 				/* FALLTHROUGH */
1397 			case '\n':
1398 				break;
1399 			}
1400 			break;
1401 		}
1402 	}
1403 	/*
1404 	 * There is a remote possibility that we end up with a zero sum.
1405 	 * Zero is used as an EOF marker, so return 1 instead.
1406 	 */
1407 	return (sum == 0 ? 1 : sum);
1408 }
1409 
1410 static int
1411 asciifile(FILE *f)
1412 {
1413 	char buf[BUFSIZ];
1414 	int i, cnt;
1415 
1416 	if (aflag || f == NULL)
1417 		return (1);
1418 
1419 	rewind(f);
1420 	cnt = fread(buf, 1, sizeof(buf), f);
1421 	for (i = 0; i < cnt; i++)
1422 		if (!isprint(buf[i]) && !isspace(buf[i]))
1423 			return (0);
1424 	return (1);
1425 }
1426 
1427 
1428 /* dump accumulated "context" diff changes */
1429 static void
1430 dump_context_vec(FILE *f1, FILE *f2)
1431 {
1432 	struct context_vec *cvp = context_vec_start;
1433 	int lowa, upb, lowc, upd, do_output;
1434 	int a, b, c, d;
1435 	char ch;
1436 
1437 	if (context_vec_start > context_vec_ptr)
1438 		return;
1439 
1440 	b = d = 0;		/* gcc */
1441 	lowa = MAX(1, cvp->a - context);
1442 	upb = MIN(len[0], context_vec_ptr->b + context);
1443 	lowc = MAX(1, cvp->c - context);
1444 	upd = MIN(len[1], context_vec_ptr->d + context);
1445 
1446 	printf("***************");
1447 	printf("\n*** ");
1448 	range(lowa, upb, ",");
1449 	printf(" ****\n");
1450 
1451 	/*
1452 	 * Output changes to the "old" file.  The first loop suppresses
1453 	 * output if there were no changes to the "old" file (we'll see
1454 	 * the "old" lines as context in the "new" list).
1455 	 */
1456 	do_output = 0;
1457 	for (; cvp <= context_vec_ptr; cvp++)
1458 		if (cvp->a <= cvp->b) {
1459 			cvp = context_vec_start;
1460 			do_output++;
1461 			break;
1462 		}
1463 	if (do_output) {
1464 		while (cvp <= context_vec_ptr) {
1465 			a = cvp->a;
1466 			b = cvp->b;
1467 			c = cvp->c;
1468 			d = cvp->d;
1469 
1470 			if (a <= b && c <= d)
1471 				ch = 'c';
1472 			else
1473 				ch = (a <= b) ? 'd' : 'a';
1474 
1475 			if (ch == 'a')
1476 				fetch(ixold, lowa, b, f1, ' ', 0);
1477 			else {
1478 				fetch(ixold, lowa, a - 1, f1, ' ', 0);
1479 				fetch(ixold, a, b, f1,
1480 				    ch == 'c' ? '!' : '-', 0);
1481 			}
1482 			lowa = b + 1;
1483 			cvp++;
1484 		}
1485 		fetch(ixold, b + 1, upb, f1, ' ', 0);
1486 	}
1487 	/* output changes to the "new" file */
1488 	printf("--- ");
1489 	range(lowc, upd, ",");
1490 	printf(" ----\n");
1491 
1492 	do_output = 0;
1493 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1494 		if (cvp->c <= cvp->d) {
1495 			cvp = context_vec_start;
1496 			do_output++;
1497 			break;
1498 		}
1499 	if (do_output) {
1500 		while (cvp <= context_vec_ptr) {
1501 			a = cvp->a;
1502 			b = cvp->b;
1503 			c = cvp->c;
1504 			d = cvp->d;
1505 
1506 			if (a <= b && c <= d)
1507 				ch = 'c';
1508 			else
1509 				ch = (a <= b) ? 'd' : 'a';
1510 
1511 			if (ch == 'd')
1512 				fetch(ixnew, lowc, d, f2, ' ', 0);
1513 			else {
1514 				fetch(ixnew, lowc, c - 1, f2, ' ', 0);
1515 				fetch(ixnew, c, d, f2,
1516 				    ch == 'c' ? '!' : '+', 0);
1517 			}
1518 			lowc = d + 1;
1519 			cvp++;
1520 		}
1521 		fetch(ixnew, d + 1, upd, f2, ' ', 0);
1522 	}
1523 	context_vec_ptr = context_vec_start - 1;
1524 }
1525 
1526 /* dump accumulated "unified" diff changes */
1527 static void
1528 dump_unified_vec(FILE *f1, FILE *f2)
1529 {
1530 	struct context_vec *cvp = context_vec_start;
1531 	int lowa, upb, lowc, upd;
1532 	int a, b, c, d;
1533 	char ch;
1534 
1535 	if (context_vec_start > context_vec_ptr)
1536 		return;
1537 
1538 	b = d = 0;		/* gcc */
1539 	lowa = MAX(1, cvp->a - context);
1540 	upb = MIN(len[0], context_vec_ptr->b + context);
1541 	lowc = MAX(1, cvp->c - context);
1542 	upd = MIN(len[1], context_vec_ptr->d + context);
1543 
1544 	fputs("@@ -", stdout);
1545 	uni_range(lowa, upb);
1546 	fputs(" +", stdout);
1547 	uni_range(lowc, upd);
1548 	fputs(" @@", stdout);
1549 	putchar('\n');
1550 
1551 	/*
1552 	 * Output changes in "unified" diff format--the old and new lines
1553 	 * are printed together.
1554 	 */
1555 	for (; cvp <= context_vec_ptr; cvp++) {
1556 		a = cvp->a;
1557 		b = cvp->b;
1558 		c = cvp->c;
1559 		d = cvp->d;
1560 
1561 		/*
1562 		 * c: both new and old changes
1563 		 * d: only changes in the old file
1564 		 * a: only changes in the new file
1565 		 */
1566 		if (a <= b && c <= d)
1567 			ch = 'c';
1568 		else
1569 			ch = (a <= b) ? 'd' : 'a';
1570 
1571 		switch (ch) {
1572 		case 'c':
1573 			fetch(ixold, lowa, a - 1, f1, ' ', 0);
1574 			fetch(ixold, a, b, f1, '-', 0);
1575 			fetch(ixnew, c, d, f2, '+', 0);
1576 			break;
1577 		case 'd':
1578 			fetch(ixold, lowa, a - 1, f1, ' ', 0);
1579 			fetch(ixold, a, b, f1, '-', 0);
1580 			break;
1581 		case 'a':
1582 			fetch(ixnew, lowc, c - 1, f2, ' ', 0);
1583 			fetch(ixnew, c, d, f2, '+', 0);
1584 			break;
1585 		}
1586 		lowa = b + 1;
1587 		lowc = d + 1;
1588 	}
1589 	fetch(ixnew, d + 1, upd, f2, ' ', 0);
1590 
1591 	context_vec_ptr = context_vec_start - 1;
1592 }
1593