xref: /openbsd-src/usr.bin/cvs/diff.c (revision 4deeb87832e5d00dbfea5b08ed9c463296b435ef)
1 /*	$OpenBSD: diff.c,v 1.7 2004/08/12 18:37:27 jfb Exp $	*/
2 /*
3  * Copyright (C) Caldera International Inc.  2001-2002.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code and documentation must retain the above
10  *    copyright notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. All advertising materials mentioning features or use of this software
15  *    must display the following acknowledgement:
16  *	This product includes software developed or owned by Caldera
17  *	International, Inc.
18  * 4. Neither the name of Caldera International, Inc. nor the names of other
19  *    contributors may be used to endorse or promote products derived from
20  *    this software without specific prior written permission.
21  *
22  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
23  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
27  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
28  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
29  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
32  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35 /*-
36  * Copyright (c) 1991, 1993
37  *	The Regents of the University of California.  All rights reserved.
38  * Copyright (c) 2004 Jean-Francois Brousseau.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 /*
67  *	Uses an algorithm due to Harold Stone, which finds
68  *	a pair of longest identical subsequences in the two
69  *	files.
70  *
71  *	The major goal is to generate the match vector J.
72  *	J[i] is the index of the line in file1 corresponding
73  *	to line i file0. J[i] = 0 if there is no
74  *	such line in file1.
75  *
76  *	Lines are hashed so as to work in core. All potential
77  *	matches are located by sorting the lines of each file
78  *	on the hash (called ``value''). In particular, this
79  *	collects the equivalence classes in file1 together.
80  *	Subroutine equiv replaces the value of each line in
81  *	file0 by the index of the first element of its
82  *	matching equivalence in (the reordered) file1.
83  *	To save space equiv squeezes file1 into a single
84  *	array member in which the equivalence classes
85  *	are simply concatenated, except that their first
86  *	members are flagged by changing sign.
87  *
88  *	Next the indices that point into member are unsorted into
89  *	array class according to the original order of file0.
90  *
91  *	The cleverness lies in routine stone. This marches
92  *	through the lines of file0, developing a vector klist
93  *	of "k-candidates". At step i a k-candidate is a matched
94  *	pair of lines x,y (x in file0 y in file1) such that
95  *	there is a common subsequence of length k
96  *	between the first i lines of file0 and the first y
97  *	lines of file1, but there is no such subsequence for
98  *	any smaller y. x is the earliest possible mate to y
99  *	that occurs in such a subsequence.
100  *
101  *	Whenever any of the members of the equivalence class of
102  *	lines in file1 matable to a line in file0 has serial number
103  *	less than the y of some k-candidate, that k-candidate
104  *	with the smallest such y is replaced. The new
105  *	k-candidate is chained (via pred) to the current
106  *	k-1 candidate so that the actual subsequence can
107  *	be recovered. When a member has serial number greater
108  *	that the y of all k-candidates, the klist is extended.
109  *	At the end, the longest subsequence is pulled out
110  *	and placed in the array J by unravel
111  *
112  *	With J in hand, the matches there recorded are
113  *	check'ed against reality to assure that no spurious
114  *	matches have crept in due to hashing. If they have,
115  *	they are broken, and "jackpot" is recorded--a harmless
116  *	matter except that a true match for a spuriously
117  *	mated line may now be unnecessarily reported as a change.
118  *
119  *	Much of the complexity of the program comes simply
120  *	from trying to minimize core utilization and
121  *	maximize the range of doable problems by dynamically
122  *	allocating what is needed and reusing what is not.
123  *	The core requirements for problems larger than somewhat
124  *	are (in words) 2*length(file0) + length(file1) +
125  *	3*(number of k-candidates installed),  typically about
126  *	6n words for files of length n.
127  */
128 
129 #include <sys/param.h>
130 #include <sys/stat.h>
131 #include <sys/wait.h>
132 
133 #include <errno.h>
134 #include <ctype.h>
135 #include <stdio.h>
136 #include <fcntl.h>
137 #include <paths.h>
138 #include <regex.h>
139 #include <dirent.h>
140 #include <stdlib.h>
141 #include <stddef.h>
142 #include <unistd.h>
143 #include <string.h>
144 #include <sysexits.h>
145 
146 #include "cvs.h"
147 #include "log.h"
148 #include "buf.h"
149 #include "proto.h"
150 
151 
152 #define CVS_DIFF_DEFCTX    3   /* default context length */
153 
154 
155 /*
156  * Output format options
157  */
158 #define	D_NORMAL	0	/* Normal output */
159 #define	D_CONTEXT	1	/* Diff with context */
160 #define	D_UNIFIED	2	/* Unified context diff */
161 #define	D_IFDEF		3	/* Diff with merged #ifdef's */
162 #define	D_BRIEF		4	/* Say if the files differ */
163 
164 /*
165  * Status values for print_status() and diffreg() return values
166  */
167 #define	D_SAME		0	/* Files are the same */
168 #define	D_DIFFER	1	/* Files are different */
169 #define	D_BINARY	2	/* Binary files are different */
170 #define	D_COMMON	3	/* Subdirectory common to both dirs */
171 #define	D_ONLY		4	/* Only exists in one directory */
172 #define	D_MISMATCH1	5	/* path1 was a dir, path2 a file */
173 #define	D_MISMATCH2	6	/* path1 was a file, path2 a dir */
174 #define	D_ERROR		7	/* An error occurred */
175 #define	D_SKIPPED1	8	/* path1 was a special file */
176 #define	D_SKIPPED2	9	/* path2 was a special file */
177 
178 struct cand {
179 	int x;
180 	int y;
181 	int pred;
182 } cand;
183 
184 struct line {
185 	int serial;
186 	int value;
187 } *file[2];
188 
189 /*
190  * The following struct is used to record change information when
191  * doing a "context" or "unified" diff.  (see routine "change" to
192  * understand the highly mnemonic field names)
193  */
194 struct context_vec {
195 	int a;			/* start line in old file */
196 	int b;			/* end line in old file */
197 	int c;			/* start line in new file */
198 	int d;			/* end line in new file */
199 };
200 
201 struct diff_arg {
202 	char  *rev1;
203 	char  *rev2;
204 	char  *date1;
205 	char  *date2;
206 };
207 
208 
209 struct excludes {
210 	char *pattern;
211 	struct excludes *next;
212 };
213 
214 
215 char	*splice(char *, char *);
216 int  cvs_diffreg(const char *, const char *);
217 int  cvs_diff_file  (struct cvs_file *, void *);
218 static void output(const char *, FILE *, const char *, FILE *);
219 static void check(FILE *, FILE *);
220 static void range(int, int, char *);
221 static void uni_range(int, int);
222 static void dump_context_vec(FILE *, FILE *);
223 static void dump_unified_vec(FILE *, FILE *);
224 static void prepare(int, FILE *, off_t);
225 static void prune(void);
226 static void equiv(struct line *, int, struct line *, int, int *);
227 static void unravel(int);
228 static void unsort(struct line *, int, int *);
229 static void change(const char *, FILE *, const char *, FILE *, int, int, int, int);
230 static void sort(struct line *, int);
231 static int  ignoreline(char *);
232 static int  asciifile(FILE *);
233 static int  fetch(long *, int, int, FILE *, int, int);
234 static int  newcand(int, int, int);
235 static int  search(int *, int, int);
236 static int  skipline(FILE *);
237 static int  isqrt(int);
238 static int  stone(int *, int, int *, int *);
239 static int  readhash(FILE *);
240 static int  files_differ(FILE *, FILE *);
241 static char *preadline(int, size_t, off_t);
242 
243 
244 
245 extern int cvs_client;
246 
247 static int aflag, bflag, dflag, iflag, tflag, Tflag, wflag;
248 static int context, status;
249 static int format = D_NORMAL;
250 static struct stat stb1, stb2;
251 static char *ifdefname, *ignore_pats, diffargs[128];
252 static const char *diff_file;
253 regex_t ignore_re;
254 
255 static int  *J;			/* will be overlaid on class */
256 static int  *class;		/* will be overlaid on file[0] */
257 static int  *klist;		/* will be overlaid on file[0] after class */
258 static int  *member;		/* will be overlaid on file[1] */
259 static int   clen;
260 static int   inifdef;		/* whether or not we are in a #ifdef block */
261 static int   len[2];
262 static int   pref, suff;	/* length of prefix and suffix */
263 static int   slen[2];
264 static int   anychange;
265 static long *ixnew;		/* will be overlaid on file[1] */
266 static long *ixold;		/* will be overlaid on klist */
267 static struct cand *clist;	/* merely a free storage pot for candidates */
268 static int   clistlen;		/* the length of clist */
269 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
270 static u_char *chrtran;		/* translation table for case-folding */
271 static struct context_vec *context_vec_start;
272 static struct context_vec *context_vec_end;
273 static struct context_vec *context_vec_ptr;
274 
275 #define FUNCTION_CONTEXT_SIZE	41
276 static int lastline;
277 static int lastmatchline;
278 
279 
280 
281 
282 /*
283  * chrtran points to one of 2 translation tables: cup2low if folding upper to
284  * lower case clow2low if not folding case
285  */
286 u_char clow2low[256] = {
287 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
288 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
289 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
290 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
291 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
292 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
293 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
294 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
295 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
296 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
297 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
298 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
299 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
300 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
301 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
302 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
303 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
304 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
305 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
306 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
307 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
308 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
309 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
310 	0xfd, 0xfe, 0xff
311 };
312 
313 u_char cup2low[256] = {
314 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
315 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
316 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
317 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
318 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
319 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
320 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
321 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
322 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
323 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
324 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
325 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
326 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
327 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
328 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
329 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
330 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
331 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
332 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
333 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
334 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
335 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
336 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
337 	0xfd, 0xfe, 0xff
338 };
339 
340 
341 
342 /*
343  * cvs_diff()
344  *
345  * Handler for the `cvs diff' command.
346  *
347  * SYNOPSIS: cvs [args] diff [-clipu] [-D date] [-r rev]
348  */
349 
350 int
351 cvs_diff(int argc, char **argv)
352 {
353 	int ch, recurse, flags;
354 	struct diff_arg darg;
355 	struct cvsroot *root;
356 
357 	context = CVS_DIFF_DEFCTX;
358 	flags = CF_RECURSE|CF_IGNORE|CF_SORT|CF_KNOWN;
359 	recurse = 1;
360 
361 	memset(&darg, 0, sizeof(darg));
362 	strlcpy(diffargs, argv[0], sizeof(diffargs));
363 
364 	while ((ch = getopt(argc, argv, "cD:lir:u")) != -1) {
365 		switch (ch) {
366 		case 'c':
367 			strlcat(diffargs, " -c", sizeof(diffargs));
368 			format = D_CONTEXT;
369 			break;
370 		case 'D':
371 			if (darg.date1 == NULL && darg.rev1 == NULL)
372 				darg.date1 = optarg;
373 			else if (darg.date2 == NULL && darg.rev2 == NULL)
374 				darg.date2 = optarg;
375 			else {
376 				cvs_log(LP_ERR,
377 				    "no more than two revisions/dates can "
378 				    "be specified");
379 			}
380 			break;
381 		case 'l':
382 			strlcat(diffargs, " -l", sizeof(diffargs));
383 			recurse = 0;
384 			flags &= ~CF_RECURSE;
385 			break;
386 		case 'i':
387 			strlcat(diffargs, " -i", sizeof(diffargs));
388 			iflag = 1;
389 			break;
390 		case 'r':
391 			if ((darg.rev1 == NULL) && (darg.date1 == NULL))
392 				darg.rev1 = optarg;
393 			else if ((darg.rev2 == NULL) && (darg.date2 == NULL))
394 				darg.rev2 = optarg;
395 			else {
396 				cvs_log(LP_ERR,
397 				    "no more than two revisions/dates can "
398 				    "be specified");
399 				return (EX_USAGE);
400 			}
401 			break;
402 		case 'u':
403 			strlcat(diffargs, " -u", sizeof(diffargs));
404 			format = D_UNIFIED;
405 			break;
406 		default:
407 			return (EX_USAGE);
408 		}
409 	}
410 
411 	argc -= optind;
412 	argv += optind;
413 
414 	if (argc == 0) {
415 		cvs_files = cvs_file_get(".", flags);
416 	}
417 	else
418 		cvs_files = cvs_file_getspec(argv, argc, 0);
419 	if (cvs_files == NULL)
420 		return (EX_DATAERR);
421 
422 	cvs_file_examine(cvs_files, cvs_diff_file, &darg);
423 
424 	root = cvs_files->cf_ddat->cd_root;
425 	if (root->cr_method != CVS_METHOD_LOCAL) {
426 		cvs_senddir(root, cvs_files);
427 		cvs_sendreq(root, CVS_REQ_DIFF, NULL);
428 	}
429 
430 	return (0);
431 }
432 
433 
434 /*
435  * cvs_diff_sendflags()
436  *
437  */
438 
439 int
440 cvs_diff_sendflags(struct cvsroot *root, struct diff_arg *dap)
441 {
442 	/* send the flags */
443 	if (format == D_CONTEXT)
444 		cvs_sendarg(root, "-c", 0);
445 	else if (format == D_UNIFIED)
446 		cvs_sendarg(root, "-u", 0);
447 
448 	if (dap->rev1 != NULL) {
449 		cvs_sendarg(root, "-r", 0);
450 		cvs_sendarg(root, dap->rev1, 1);
451 	}
452 	else if (dap->date1 != NULL) {
453 		cvs_sendarg(root, "-D", 0);
454 		cvs_sendarg(root, dap->date1, 1);
455 	}
456 	if (dap->rev2 != NULL) {
457 		cvs_sendarg(root, "-r", 0);
458 		cvs_sendarg(root, dap->rev2, 1);
459 	}
460 	else if (dap->date2 != NULL) {
461 		cvs_sendarg(root, "-D", 0);
462 		cvs_sendarg(root, dap->date2, 1);
463 	}
464 
465 	return (0);
466 }
467 
468 
469 /*
470  * cvs_diff_file()
471  *
472  * Diff a single file.
473  */
474 
475 int
476 cvs_diff_file(struct cvs_file *cfp, void *arg)
477 {
478 	char *dir, *repo, rcspath[MAXPATHLEN], buf[64];
479 	BUF *b1, *b2;
480 	RCSNUM *r1, *r2;
481 	RCSFILE *rf;
482 	struct diff_arg *dap;
483 	struct cvs_ent *entp;
484 	struct cvsroot *root;
485 
486 	dap = (struct diff_arg *)arg;
487 
488 	if (cfp->cf_type == DT_DIR) {
489 		if (cfp->cf_cvstat == CVS_FST_UNKNOWN) {
490 			root = cfp->cf_parent->cf_ddat->cd_root;
491 			cvs_sendreq(root, CVS_REQ_QUESTIONABLE, cfp->cf_name);
492 		}
493 		else {
494 			root = cfp->cf_ddat->cd_root;
495 			if ((cfp->cf_parent == NULL) ||
496 			    (root != cfp->cf_parent->cf_ddat->cd_root)) {
497 				cvs_connect(root);
498 				cvs_diff_sendflags(root, dap);
499 			}
500 
501 			cvs_senddir(root, cfp);
502 		}
503 
504 		return (0);
505 	}
506 
507 	rf = NULL;
508 	diff_file = cfp->cf_path;
509 
510 	if (cfp->cf_parent != NULL) {
511 		dir = cfp->cf_parent->cf_path;
512 		root = cfp->cf_parent->cf_ddat->cd_root;
513 		repo = cfp->cf_parent->cf_ddat->cd_repo;
514 	}
515 	else {
516 		dir = ".";
517 		root = NULL;
518 		repo = NULL;
519 	}
520 
521 	if (cfp->cf_cvstat == CVS_FST_UNKNOWN) {
522 		if (root->cr_method == CVS_METHOD_LOCAL)
523 			cvs_log(LP_WARN, "I know nothing about %s", diff_file);
524 		else
525 			cvs_sendreq(root, CVS_REQ_QUESTIONABLE, cfp->cf_name);
526 		return (0);
527 	}
528 
529 	entp = cvs_ent_getent(diff_file);
530 	if (entp == NULL)
531 		return (-1);
532 
533 	if (root->cr_method != CVS_METHOD_LOCAL) {
534 		if (cvs_sendentry(root, entp) < 0) {
535 			cvs_ent_free(entp);
536 			return (-1);
537 		}
538 	}
539 
540 	if (cfp->cf_cvstat == CVS_FST_UPTODATE) {
541 		if (root->cr_method != CVS_METHOD_LOCAL)
542 			cvs_sendreq(root, CVS_REQ_UNCHANGED, cfp->cf_name);
543 		cvs_ent_free(entp);
544 		return (0);
545 	}
546 
547 	/* at this point, the file is modified */
548 	if (root->cr_method != CVS_METHOD_LOCAL) {
549 		cvs_sendreq(root, CVS_REQ_MODIFIED, cfp->cf_name);
550 		cvs_sendfile(root, diff_file);
551 	}
552 	else {
553 		snprintf(rcspath, sizeof(rcspath), "%s/%s/%s%s",
554 		    root->cr_dir, repo, diff_file, RCS_FILE_EXT);
555 
556 		rf = rcs_open(rcspath, RCS_MODE_READ);
557 		if (rf == NULL) {
558 			cvs_ent_free(entp);
559 			return (-1);
560 		}
561 
562 		cvs_printf("Index: %s\n%s\nRCS file: %s\n", diff_file,
563 		    RCS_DIFF_DIV, rcspath);
564 
565 		if (dap->rev1 == NULL)
566 			r1 = entp->ce_rev;
567 		else {
568 			r1 = rcsnum_alloc();
569 			rcsnum_aton(dap->rev1, NULL, r1);
570 		}
571 
572 		cvs_printf("retrieving revision %s\n",
573 		    rcsnum_tostr(r1, buf, sizeof(buf)));
574 		b1 = rcs_getrev(rf, r1);
575 
576 		if (dap->rev2 != NULL) {
577 			cvs_printf("retrieving revision %s\n", dap->rev2);
578 			r2 = rcsnum_alloc();
579 			rcsnum_aton(dap->rev2, NULL, r2);
580 			b2 = rcs_getrev(rf, r2);
581 		}
582 		else {
583 			b2 = cvs_buf_load(diff_file, BUF_AUTOEXT);
584 		}
585 
586 		rcs_close(rf);
587 
588 		printf("%s", diffargs);
589 		printf(" -r%s", buf);
590 		if (dap->rev2 != NULL)
591 			printf(" -r%s", dap->rev2);
592 		printf(" %s\n", diff_file);
593 		cvs_buf_write(b1, "/tmp/diff1", 0600);
594 		cvs_buf_write(b2, "/tmp/diff2", 0600);
595 		cvs_diffreg("/tmp/diff1", "/tmp/diff2");
596 	}
597 
598 	cvs_ent_free(entp);
599 	return (0);
600 }
601 
602 
603 int
604 cvs_diffreg(const char *file1, const char *file2)
605 {
606 	FILE *f1, *f2;
607 	int i, rval;
608 
609 	f1 = f2 = NULL;
610 	rval = D_SAME;
611 	anychange = 0;
612 	lastline = 0;
613 	lastmatchline = 0;
614 	context_vec_ptr = context_vec_start - 1;
615 	chrtran = (iflag ? cup2low : clow2low);
616 
617 	f1 = fopen(file1, "r");
618 	if (f1 == NULL) {
619 		cvs_log(LP_ERRNO, "%s", file1);
620 		status |= 2;
621 		goto closem;
622 	}
623 
624 	f2 = fopen(file2, "r");
625 	if (f2 == NULL) {
626 		cvs_log(LP_ERRNO, "%s", file2);
627 		status |= 2;
628 		goto closem;
629 	}
630 
631 	switch (files_differ(f1, f2)) {
632 	case 0:
633 		goto closem;
634 	case 1:
635 		break;
636 	default:
637 		/* error */
638 		status |= 2;
639 		goto closem;
640 	}
641 
642 	if (!asciifile(f1) || !asciifile(f2)) {
643 		rval = D_BINARY;
644 		status |= 1;
645 		goto closem;
646 	}
647 	prepare(0, f1, stb1.st_size);
648 	prepare(1, f2, stb2.st_size);
649 	prune();
650 	sort(sfile[0], slen[0]);
651 	sort(sfile[1], slen[1]);
652 
653 	member = (int *)file[1];
654 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
655 	member = realloc(member, (slen[1] + 2) * sizeof(int));
656 
657 	class = (int *)file[0];
658 	unsort(sfile[0], slen[0], class);
659 	class = realloc(class, (slen[0] + 2) * sizeof(int));
660 
661 	klist = malloc((slen[0] + 2) * sizeof(int));
662 	clen = 0;
663 	clistlen = 100;
664 	clist = malloc(clistlen * sizeof(cand));
665 	i = stone(class, slen[0], member, klist);
666 	free(member);
667 	free(class);
668 
669 	J = realloc(J, (len[0] + 2) * sizeof(int));
670 	unravel(klist[i]);
671 	free(clist);
672 	free(klist);
673 
674 	ixold = realloc(ixold, (len[0] + 2) * sizeof(long));
675 	ixnew = realloc(ixnew, (len[1] + 2) * sizeof(long));
676 	check(f1, f2);
677 	output(file1, f1, file2, f2);
678 
679 closem:
680 	if (anychange) {
681 		status |= 1;
682 		if (rval == D_SAME)
683 			rval = D_DIFFER;
684 	}
685 	if (f1 != NULL)
686 		fclose(f1);
687 	if (f2 != NULL)
688 		fclose(f2);
689 	return (rval);
690 }
691 
692 /*
693  * Check to see if the given files differ.
694  * Returns 0 if they are the same, 1 if different, and -1 on error.
695  * XXX - could use code from cmp(1) [faster]
696  */
697 static int
698 files_differ(FILE *f1, FILE *f2)
699 {
700 	char buf1[BUFSIZ], buf2[BUFSIZ];
701 	size_t i, j;
702 
703 	if (stb1.st_size != stb2.st_size)
704 		return (1);
705 	for (;;) {
706 		i = fread(buf1, 1, sizeof(buf1), f1);
707 		j = fread(buf2, 1, sizeof(buf2), f2);
708 		if (i != j)
709 			return (1);
710 		if (i == 0 && j == 0) {
711 			if (ferror(f1) || ferror(f2))
712 				return (1);
713 			return (0);
714 		}
715 		if (memcmp(buf1, buf2, i) != 0)
716 			return (1);
717 	}
718 }
719 
720 
721 char *
722 splice(char *dir, char *file)
723 {
724 	char *tail, *buf;
725 
726 	if ((tail = strrchr(file, '/')) == NULL)
727 		tail = file;
728 	else
729 		tail++;
730 	asprintf(&buf, "%s/%s", dir, tail);
731 	return (buf);
732 }
733 
734 static void
735 prepare(int i, FILE *fd, off_t filesize)
736 {
737 	struct line *p;
738 	int j, h;
739 	size_t sz;
740 
741 	rewind(fd);
742 
743 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
744 	if (sz < 100)
745 		sz = 100;
746 
747 	p = malloc((sz + 3) * sizeof(struct line));
748 	for (j = 0; (h = readhash(fd));) {
749 		if (j == (int)sz) {
750 			sz = sz * 3 / 2;
751 			p = realloc(p, (sz + 3) * sizeof(struct line));
752 		}
753 		p[++j].value = h;
754 	}
755 	len[i] = j;
756 	file[i] = p;
757 }
758 
759 static void
760 prune(void)
761 {
762 	int i, j;
763 
764 	for (pref = 0; pref < len[0] && pref < len[1] &&
765 	    file[0][pref + 1].value == file[1][pref + 1].value;
766 	    pref++)
767 		;
768 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
769 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
770 	    suff++)
771 		;
772 	for (j = 0; j < 2; j++) {
773 		sfile[j] = file[j] + pref;
774 		slen[j] = len[j] - pref - suff;
775 		for (i = 0; i <= slen[j]; i++)
776 			sfile[j][i].serial = i;
777 	}
778 }
779 
780 static void
781 equiv(struct line *a, int n, struct line *b, int m, int *c)
782 {
783 	int i, j;
784 
785 	i = j = 1;
786 	while (i <= n && j <= m) {
787 		if (a[i].value < b[j].value)
788 			a[i++].value = 0;
789 		else if (a[i].value == b[j].value)
790 			a[i++].value = j;
791 		else
792 			j++;
793 	}
794 	while (i <= n)
795 		a[i++].value = 0;
796 	b[m + 1].value = 0;
797 	j = 0;
798 	while (++j <= m) {
799 		c[j] = -b[j].serial;
800 		while (b[j + 1].value == b[j].value) {
801 			j++;
802 			c[j] = b[j].serial;
803 		}
804 	}
805 	c[j] = -1;
806 }
807 
808 /* Code taken from ping.c */
809 static int
810 isqrt(int n)
811 {
812 	int y, x = 1;
813 
814 	if (n == 0)
815 		return(0);
816 
817 	do { /* newton was a stinker */
818 		y = x;
819 		x = n / x;
820 		x += y;
821 		x /= 2;
822 	} while ((x - y) > 1 || (x - y) < -1);
823 
824 	return (x);
825 }
826 
827 static int
828 stone(int *a, int n, int *b, int *c)
829 {
830 	int i, k, y, j, l;
831 	int oldc, tc, oldl;
832 	u_int numtries;
833 
834 	const u_int bound = dflag ? UINT_MAX : MAX(256, isqrt(n));
835 
836 	k = 0;
837 	c[0] = newcand(0, 0, 0);
838 	for (i = 1; i <= n; i++) {
839 		j = a[i];
840 		if (j == 0)
841 			continue;
842 		y = -b[j];
843 		oldl = 0;
844 		oldc = c[0];
845 		numtries = 0;
846 		do {
847 			if (y <= clist[oldc].y)
848 				continue;
849 			l = search(c, k, y);
850 			if (l != oldl + 1)
851 				oldc = c[l - 1];
852 			if (l <= k) {
853 				if (clist[c[l]].y <= y)
854 					continue;
855 				tc = c[l];
856 				c[l] = newcand(i, y, oldc);
857 				oldc = tc;
858 				oldl = l;
859 				numtries++;
860 			} else {
861 				c[l] = newcand(i, y, oldc);
862 				k++;
863 				break;
864 			}
865 		} while ((y = b[++j]) > 0 && numtries < bound);
866 	}
867 	return (k);
868 }
869 
870 static int
871 newcand(int x, int y, int pred)
872 {
873 	struct cand *q;
874 
875 	if (clen == clistlen) {
876 		clistlen = clistlen * 11 / 10;
877 		clist = realloc(clist, clistlen * sizeof(cand));
878 	}
879 	q = clist + clen;
880 	q->x = x;
881 	q->y = y;
882 	q->pred = pred;
883 	return (clen++);
884 }
885 
886 static int
887 search(int *c, int k, int y)
888 {
889 	int i, j, l, t;
890 
891 	if (clist[c[k]].y < y)	/* quick look for typical case */
892 		return (k + 1);
893 	i = 0;
894 	j = k + 1;
895 	while (1) {
896 		l = i + j;
897 		if ((l >>= 1) <= i)
898 			break;
899 		t = clist[c[l]].y;
900 		if (t > y)
901 			j = l;
902 		else if (t < y)
903 			i = l;
904 		else
905 			return (l);
906 	}
907 	return (l + 1);
908 }
909 
910 static void
911 unravel(int p)
912 {
913 	struct cand *q;
914 	int i;
915 
916 	for (i = 0; i <= len[0]; i++)
917 		J[i] = i <= pref ? i :
918 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
919 	for (q = clist + p; q->y != 0; q = clist + q->pred)
920 		J[q->x + pref] = q->y + pref;
921 }
922 
923 /*
924  * Check does double duty:
925  *  1.	ferret out any fortuitous correspondences due
926  *	to confounding by hashing (which result in "jackpot")
927  *  2.  collect random access indexes to the two files
928  */
929 static void
930 check(FILE *f1, FILE *f2)
931 {
932 	int i, j, jackpot, c, d;
933 	long ctold, ctnew;
934 
935 	rewind(f1);
936 	rewind(f2);
937 	j = 1;
938 	ixold[0] = ixnew[0] = 0;
939 	jackpot = 0;
940 	ctold = ctnew = 0;
941 	for (i = 1; i <= len[0]; i++) {
942 		if (J[i] == 0) {
943 			ixold[i] = ctold += skipline(f1);
944 			continue;
945 		}
946 		while (j < J[i]) {
947 			ixnew[j] = ctnew += skipline(f2);
948 			j++;
949 		}
950 		if (bflag || wflag || iflag) {
951 			for (;;) {
952 				c = getc(f1);
953 				d = getc(f2);
954 				/*
955 				 * GNU diff ignores a missing newline
956 				 * in one file if bflag || wflag.
957 				 */
958 				if ((bflag || wflag) &&
959 				    ((c == EOF && d == '\n') ||
960 				    (c == '\n' && d == EOF))) {
961 					break;
962 				}
963 				ctold++;
964 				ctnew++;
965 				if (bflag && isspace(c) && isspace(d)) {
966 					do {
967 						if (c == '\n')
968 							break;
969 						ctold++;
970 					} while (isspace(c = getc(f1)));
971 					do {
972 						if (d == '\n')
973 							break;
974 						ctnew++;
975 					} while (isspace(d = getc(f2)));
976 				} else if (wflag) {
977 					while (isspace(c) && c != '\n') {
978 						c = getc(f1);
979 						ctold++;
980 					}
981 					while (isspace(d) && d != '\n') {
982 						d = getc(f2);
983 						ctnew++;
984 					}
985 				}
986 				if (chrtran[c] != chrtran[d]) {
987 					jackpot++;
988 					J[i] = 0;
989 					if (c != '\n' && c != EOF)
990 						ctold += skipline(f1);
991 					if (d != '\n' && c != EOF)
992 						ctnew += skipline(f2);
993 					break;
994 				}
995 				if (c == '\n' || c == EOF)
996 					break;
997 			}
998 		} else {
999 			for (;;) {
1000 				ctold++;
1001 				ctnew++;
1002 				if ((c = getc(f1)) != (d = getc(f2))) {
1003 					/* jackpot++; */
1004 					J[i] = 0;
1005 					if (c != '\n' && c != EOF)
1006 						ctold += skipline(f1);
1007 					if (d != '\n' && c != EOF)
1008 						ctnew += skipline(f2);
1009 					break;
1010 				}
1011 				if (c == '\n' || c == EOF)
1012 					break;
1013 			}
1014 		}
1015 		ixold[i] = ctold;
1016 		ixnew[j] = ctnew;
1017 		j++;
1018 	}
1019 	for (; j <= len[1]; j++)
1020 		ixnew[j] = ctnew += skipline(f2);
1021 	/*
1022 	 * if (jackpot)
1023 	 *	fprintf(stderr, "jackpot\n");
1024 	 */
1025 }
1026 
1027 /* shellsort CACM #201 */
1028 static void
1029 sort(struct line *a, int n)
1030 {
1031 	struct line *ai, *aim, w;
1032 	int j, m = 0, k;
1033 
1034 	if (n == 0)
1035 		return;
1036 	for (j = 1; j <= n; j *= 2)
1037 		m = 2 * j - 1;
1038 	for (m /= 2; m != 0; m /= 2) {
1039 		k = n - m;
1040 		for (j = 1; j <= k; j++) {
1041 			for (ai = &a[j]; ai > a; ai -= m) {
1042 				aim = &ai[m];
1043 				if (aim < ai)
1044 					break;	/* wraparound */
1045 				if (aim->value > ai[0].value ||
1046 				    (aim->value == ai[0].value &&
1047 					aim->serial > ai[0].serial))
1048 					break;
1049 				w.value = ai[0].value;
1050 				ai[0].value = aim->value;
1051 				aim->value = w.value;
1052 				w.serial = ai[0].serial;
1053 				ai[0].serial = aim->serial;
1054 				aim->serial = w.serial;
1055 			}
1056 		}
1057 	}
1058 }
1059 
1060 static void
1061 unsort(struct line *f, int l, int *b)
1062 {
1063 	int *a, i;
1064 
1065 	a = malloc((l + 1) * sizeof(int));
1066 	for (i = 1; i <= l; i++)
1067 		a[f[i].serial] = f[i].value;
1068 	for (i = 1; i <= l; i++)
1069 		b[i] = a[i];
1070 	free(a);
1071 }
1072 
1073 static int
1074 skipline(FILE *f)
1075 {
1076 	int i, c;
1077 
1078 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
1079 		continue;
1080 	return (i);
1081 }
1082 
1083 static void
1084 output(const char *file1, FILE *f1, const char *file2, FILE *f2)
1085 {
1086 	int m, i0, i1, j0, j1;
1087 
1088 	rewind(f1);
1089 	rewind(f2);
1090 	m = len[0];
1091 	J[0] = 0;
1092 	J[m + 1] = len[1] + 1;
1093 	for (i0 = 1; i0 <= m; i0 = i1 + 1) {
1094 		while (i0 <= m && J[i0] == J[i0 - 1] + 1)
1095 			i0++;
1096 		j0 = J[i0 - 1] + 1;
1097 		i1 = i0 - 1;
1098 		while (i1 < m && J[i1 + 1] == 0)
1099 			i1++;
1100 		j1 = J[i1 + 1] - 1;
1101 		J[i1] = j1;
1102 		change(file1, f1, file2, f2, i0, i1, j0, j1);
1103 	}
1104 	if (m == 0)
1105 		change(file1, f1, file2, f2, 1, 0, 1, len[1]);
1106 	if (format == D_IFDEF) {
1107 		for (;;) {
1108 #define	c i0
1109 			if ((c = getc(f1)) == EOF)
1110 				return;
1111 			putchar(c);
1112 		}
1113 #undef c
1114 	}
1115 	if (anychange != 0) {
1116 		if (format == D_CONTEXT)
1117 			dump_context_vec(f1, f2);
1118 		else if (format == D_UNIFIED)
1119 			dump_unified_vec(f1, f2);
1120 	}
1121 }
1122 
1123 static __inline void
1124 range(int a, int b, char *separator)
1125 {
1126 	printf("%d", a > b ? b : a);
1127 	if (a < b)
1128 		printf("%s%d", separator, b);
1129 }
1130 
1131 static __inline void
1132 uni_range(int a, int b)
1133 {
1134 	if (a < b)
1135 		printf("%d,%d", a, b - a + 1);
1136 	else if (a == b)
1137 		printf("%d", b);
1138 	else
1139 		printf("%d,0", b);
1140 }
1141 
1142 static char *
1143 preadline(int fd, size_t len, off_t off)
1144 {
1145 	char *line;
1146 	ssize_t nr;
1147 
1148 	line = malloc(len + 1);
1149 	if (line == NULL) {
1150 		cvs_log(LP_ERRNO, "failed to allocate line");
1151 		return (NULL);
1152 	}
1153 	if ((nr = pread(fd, line, len, off)) < 0) {
1154 		cvs_log(LP_ERRNO, "preadline failed");
1155 		return (NULL);
1156 	}
1157 	line[nr] = '\0';
1158 	return (line);
1159 }
1160 
1161 static int
1162 ignoreline(char *line)
1163 {
1164 	int ret;
1165 
1166 	ret = regexec(&ignore_re, line, 0, NULL, 0);
1167 	free(line);
1168 	return (ret == 0);	/* if it matched, it should be ignored. */
1169 }
1170 
1171 /*
1172  * Indicate that there is a difference between lines a and b of the from file
1173  * to get to lines c to d of the to file.  If a is greater then b then there
1174  * are no lines in the from file involved and this means that there were
1175  * lines appended (beginning at b).  If c is greater than d then there are
1176  * lines missing from the to file.
1177  */
1178 static void
1179 change(const char *file1, FILE *f1, const char *file2, FILE *f2,
1180 	int a, int b, int c, int d)
1181 {
1182 	static size_t max_context = 64;
1183 	int i;
1184 
1185 	if (format != D_IFDEF && a > b && c > d)
1186 		return;
1187 	if (ignore_pats != NULL) {
1188 		char *line;
1189 		/*
1190 		 * All lines in the change, insert, or delete must
1191 		 * match an ignore pattern for the change to be
1192 		 * ignored.
1193 		 */
1194 		if (a <= b) {		/* Changes and deletes. */
1195 			for (i = a; i <= b; i++) {
1196 				line = preadline(fileno(f1),
1197 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1198 				if (!ignoreline(line))
1199 					goto proceed;
1200 			}
1201 		}
1202 		if (a > b || c <= d) {	/* Changes and inserts. */
1203 			for (i = c; i <= d; i++) {
1204 				line = preadline(fileno(f2),
1205 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1206 				if (!ignoreline(line))
1207 					goto proceed;
1208 			}
1209 		}
1210 		return;
1211 	}
1212 proceed:
1213 	if (format == D_CONTEXT || format == D_UNIFIED) {
1214 		/*
1215 		 * Allocate change records as needed.
1216 		 */
1217 		if (context_vec_ptr == context_vec_end - 1) {
1218 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1219 			max_context <<= 1;
1220 			context_vec_start = realloc(context_vec_start,
1221 			    max_context * sizeof(struct context_vec));
1222 			context_vec_end = context_vec_start + max_context;
1223 			context_vec_ptr = context_vec_start + offset;
1224 		}
1225 		if (anychange == 0) {
1226 			/*
1227 			 * Print the context/unidiff header first time through.
1228 			 */
1229 			printf("%s %s	%s",
1230 			    format == D_CONTEXT ? "***" : "---", diff_file,
1231 			    ctime(&stb1.st_mtime));
1232 			printf("%s %s	%s",
1233 			    format == D_CONTEXT ? "---" : "+++", diff_file,
1234 			    ctime(&stb2.st_mtime));
1235 			anychange = 1;
1236 		} else if (a > context_vec_ptr->b + (2 * context) + 1 &&
1237 		    c > context_vec_ptr->d + (2 * context) + 1) {
1238 			/*
1239 			 * If this change is more than 'context' lines from the
1240 			 * previous change, dump the record and reset it.
1241 			 */
1242 			if (format == D_CONTEXT)
1243 				dump_context_vec(f1, f2);
1244 			else
1245 				dump_unified_vec(f1, f2);
1246 		}
1247 		context_vec_ptr++;
1248 		context_vec_ptr->a = a;
1249 		context_vec_ptr->b = b;
1250 		context_vec_ptr->c = c;
1251 		context_vec_ptr->d = d;
1252 		return;
1253 	}
1254 	if (anychange == 0)
1255 		anychange = 1;
1256 	switch (format) {
1257 	case D_BRIEF:
1258 		return;
1259 	case D_NORMAL:
1260 		range(a, b, ",");
1261 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1262 		if (format == D_NORMAL)
1263 			range(c, d, ",");
1264 		putchar('\n');
1265 		break;
1266 	}
1267 	if (format == D_NORMAL || format == D_IFDEF) {
1268 		fetch(ixold, a, b, f1, '<', 1);
1269 		if (a <= b && c <= d && format == D_NORMAL)
1270 			puts("---");
1271 	}
1272 	i = fetch(ixnew, c, d, f2, format == D_NORMAL ? '>' : '\0', 0);
1273 	if (inifdef) {
1274 		printf("#endif /* %s */\n", ifdefname);
1275 		inifdef = 0;
1276 	}
1277 }
1278 
1279 static int
1280 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile)
1281 {
1282 	int i, j, c, lastc, col, nc;
1283 
1284 	/*
1285 	 * When doing #ifdef's, copy down to current line
1286 	 * if this is the first file, so that stuff makes it to output.
1287 	 */
1288 	if (format == D_IFDEF && oldfile) {
1289 		long curpos = ftell(lb);
1290 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1291 		nc = f[a > b ? b : a - 1] - curpos;
1292 		for (i = 0; i < nc; i++)
1293 			putchar(getc(lb));
1294 	}
1295 	if (a > b)
1296 		return (0);
1297 	if (format == D_IFDEF) {
1298 		if (inifdef) {
1299 			printf("#else /* %s%s */\n",
1300 			    oldfile == 1 ? "!" : "", ifdefname);
1301 		} else {
1302 			if (oldfile)
1303 				printf("#ifndef %s\n", ifdefname);
1304 			else
1305 				printf("#ifdef %s\n", ifdefname);
1306 		}
1307 		inifdef = 1 + oldfile;
1308 	}
1309 	for (i = a; i <= b; i++) {
1310 		fseek(lb, f[i - 1], SEEK_SET);
1311 		nc = f[i] - f[i - 1];
1312 		if (format != D_IFDEF && ch != '\0') {
1313 			putchar(ch);
1314 			if (Tflag && (format == D_NORMAL || format == D_CONTEXT
1315 			    || format == D_UNIFIED))
1316 				putchar('\t');
1317 			else if (format != D_UNIFIED)
1318 				putchar(' ');
1319 		}
1320 		col = 0;
1321 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1322 			if ((c = getc(lb)) == EOF) {
1323 				puts("\n\\ No newline at end of file");
1324 				return (0);
1325 			}
1326 			if (c == '\t' && tflag) {
1327 				do {
1328 					putchar(' ');
1329 				} while (++col & 7);
1330 			} else {
1331 				putchar(c);
1332 				col++;
1333 			}
1334 		}
1335 	}
1336 	return (0);
1337 }
1338 
1339 /*
1340  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1341  */
1342 static int
1343 readhash(FILE *f)
1344 {
1345 	int i, t, space;
1346 	int sum;
1347 
1348 	sum = 1;
1349 	space = 0;
1350 	if (!bflag && !wflag) {
1351 		if (iflag)
1352 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1353 				if (t == EOF) {
1354 					if (i == 0)
1355 						return (0);
1356 					break;
1357 				}
1358 				sum = sum * 127 + chrtran[t];
1359 			}
1360 		else
1361 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1362 				if (t == EOF) {
1363 					if (i == 0)
1364 						return (0);
1365 					break;
1366 				}
1367 				sum = sum * 127 + t;
1368 			}
1369 	} else {
1370 		for (i = 0;;) {
1371 			switch (t = getc(f)) {
1372 			case '\t':
1373 			case ' ':
1374 				space++;
1375 				continue;
1376 			default:
1377 				if (space && !wflag) {
1378 					i++;
1379 					space = 0;
1380 				}
1381 				sum = sum * 127 + chrtran[t];
1382 				i++;
1383 				continue;
1384 			case EOF:
1385 				if (i == 0)
1386 					return (0);
1387 				/* FALLTHROUGH */
1388 			case '\n':
1389 				break;
1390 			}
1391 			break;
1392 		}
1393 	}
1394 	/*
1395 	 * There is a remote possibility that we end up with a zero sum.
1396 	 * Zero is used as an EOF marker, so return 1 instead.
1397 	 */
1398 	return (sum == 0 ? 1 : sum);
1399 }
1400 
1401 static int
1402 asciifile(FILE *f)
1403 {
1404 	char buf[BUFSIZ];
1405 	int i, cnt;
1406 
1407 	if (aflag || f == NULL)
1408 		return (1);
1409 
1410 	rewind(f);
1411 	cnt = fread(buf, 1, sizeof(buf), f);
1412 	for (i = 0; i < cnt; i++)
1413 		if (!isprint(buf[i]) && !isspace(buf[i]))
1414 			return (0);
1415 	return (1);
1416 }
1417 
1418 
1419 /* dump accumulated "context" diff changes */
1420 static void
1421 dump_context_vec(FILE *f1, FILE *f2)
1422 {
1423 	struct context_vec *cvp = context_vec_start;
1424 	int lowa, upb, lowc, upd, do_output;
1425 	int a, b, c, d;
1426 	char ch;
1427 
1428 	if (context_vec_start > context_vec_ptr)
1429 		return;
1430 
1431 	b = d = 0;		/* gcc */
1432 	lowa = MAX(1, cvp->a - context);
1433 	upb = MIN(len[0], context_vec_ptr->b + context);
1434 	lowc = MAX(1, cvp->c - context);
1435 	upd = MIN(len[1], context_vec_ptr->d + context);
1436 
1437 	printf("***************");
1438 	printf("\n*** ");
1439 	range(lowa, upb, ",");
1440 	printf(" ****\n");
1441 
1442 	/*
1443 	 * Output changes to the "old" file.  The first loop suppresses
1444 	 * output if there were no changes to the "old" file (we'll see
1445 	 * the "old" lines as context in the "new" list).
1446 	 */
1447 	do_output = 0;
1448 	for (; cvp <= context_vec_ptr; cvp++)
1449 		if (cvp->a <= cvp->b) {
1450 			cvp = context_vec_start;
1451 			do_output++;
1452 			break;
1453 		}
1454 	if (do_output) {
1455 		while (cvp <= context_vec_ptr) {
1456 			a = cvp->a;
1457 			b = cvp->b;
1458 			c = cvp->c;
1459 			d = cvp->d;
1460 
1461 			if (a <= b && c <= d)
1462 				ch = 'c';
1463 			else
1464 				ch = (a <= b) ? 'd' : 'a';
1465 
1466 			if (ch == 'a')
1467 				fetch(ixold, lowa, b, f1, ' ', 0);
1468 			else {
1469 				fetch(ixold, lowa, a - 1, f1, ' ', 0);
1470 				fetch(ixold, a, b, f1,
1471 				    ch == 'c' ? '!' : '-', 0);
1472 			}
1473 			lowa = b + 1;
1474 			cvp++;
1475 		}
1476 		fetch(ixold, b + 1, upb, f1, ' ', 0);
1477 	}
1478 	/* output changes to the "new" file */
1479 	printf("--- ");
1480 	range(lowc, upd, ",");
1481 	printf(" ----\n");
1482 
1483 	do_output = 0;
1484 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1485 		if (cvp->c <= cvp->d) {
1486 			cvp = context_vec_start;
1487 			do_output++;
1488 			break;
1489 		}
1490 	if (do_output) {
1491 		while (cvp <= context_vec_ptr) {
1492 			a = cvp->a;
1493 			b = cvp->b;
1494 			c = cvp->c;
1495 			d = cvp->d;
1496 
1497 			if (a <= b && c <= d)
1498 				ch = 'c';
1499 			else
1500 				ch = (a <= b) ? 'd' : 'a';
1501 
1502 			if (ch == 'd')
1503 				fetch(ixnew, lowc, d, f2, ' ', 0);
1504 			else {
1505 				fetch(ixnew, lowc, c - 1, f2, ' ', 0);
1506 				fetch(ixnew, c, d, f2,
1507 				    ch == 'c' ? '!' : '+', 0);
1508 			}
1509 			lowc = d + 1;
1510 			cvp++;
1511 		}
1512 		fetch(ixnew, d + 1, upd, f2, ' ', 0);
1513 	}
1514 	context_vec_ptr = context_vec_start - 1;
1515 }
1516 
1517 /* dump accumulated "unified" diff changes */
1518 static void
1519 dump_unified_vec(FILE *f1, FILE *f2)
1520 {
1521 	struct context_vec *cvp = context_vec_start;
1522 	int lowa, upb, lowc, upd;
1523 	int a, b, c, d;
1524 	char ch;
1525 
1526 	if (context_vec_start > context_vec_ptr)
1527 		return;
1528 
1529 	b = d = 0;		/* gcc */
1530 	lowa = MAX(1, cvp->a - context);
1531 	upb = MIN(len[0], context_vec_ptr->b + context);
1532 	lowc = MAX(1, cvp->c - context);
1533 	upd = MIN(len[1], context_vec_ptr->d + context);
1534 
1535 	fputs("@@ -", stdout);
1536 	uni_range(lowa, upb);
1537 	fputs(" +", stdout);
1538 	uni_range(lowc, upd);
1539 	fputs(" @@", stdout);
1540 	putchar('\n');
1541 
1542 	/*
1543 	 * Output changes in "unified" diff format--the old and new lines
1544 	 * are printed together.
1545 	 */
1546 	for (; cvp <= context_vec_ptr; cvp++) {
1547 		a = cvp->a;
1548 		b = cvp->b;
1549 		c = cvp->c;
1550 		d = cvp->d;
1551 
1552 		/*
1553 		 * c: both new and old changes
1554 		 * d: only changes in the old file
1555 		 * a: only changes in the new file
1556 		 */
1557 		if (a <= b && c <= d)
1558 			ch = 'c';
1559 		else
1560 			ch = (a <= b) ? 'd' : 'a';
1561 
1562 		switch (ch) {
1563 		case 'c':
1564 			fetch(ixold, lowa, a - 1, f1, ' ', 0);
1565 			fetch(ixold, a, b, f1, '-', 0);
1566 			fetch(ixnew, c, d, f2, '+', 0);
1567 			break;
1568 		case 'd':
1569 			fetch(ixold, lowa, a - 1, f1, ' ', 0);
1570 			fetch(ixold, a, b, f1, '-', 0);
1571 			break;
1572 		case 'a':
1573 			fetch(ixnew, lowc, c - 1, f2, ' ', 0);
1574 			fetch(ixnew, c, d, f2, '+', 0);
1575 			break;
1576 		}
1577 		lowa = b + 1;
1578 		lowc = d + 1;
1579 	}
1580 	fetch(ixnew, d + 1, upd, f2, ' ', 0);
1581 
1582 	context_vec_ptr = context_vec_start - 1;
1583 }
1584