xref: /openbsd-src/usr.bin/cvs/diff.c (revision 0eea0d082377cb9c3ec583313dc4d52b7b6a4d6d)
1 /*	$OpenBSD: diff.c,v 1.6 2004/08/06 13:08:39 jfb Exp $	*/
2 /*
3  * Copyright (C) Caldera International Inc.  2001-2002.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code and documentation must retain the above
10  *    copyright notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. All advertising materials mentioning features or use of this software
15  *    must display the following acknowledgement:
16  *	This product includes software developed or owned by Caldera
17  *	International, Inc.
18  * 4. Neither the name of Caldera International, Inc. nor the names of other
19  *    contributors may be used to endorse or promote products derived from
20  *    this software without specific prior written permission.
21  *
22  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
23  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
27  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
28  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
29  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
32  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35 /*-
36  * Copyright (c) 1991, 1993
37  *	The Regents of the University of California.  All rights reserved.
38  * Copyright (c) 2004 Jean-Francois Brousseau.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 /*
67  *	Uses an algorithm due to Harold Stone, which finds
68  *	a pair of longest identical subsequences in the two
69  *	files.
70  *
71  *	The major goal is to generate the match vector J.
72  *	J[i] is the index of the line in file1 corresponding
73  *	to line i file0. J[i] = 0 if there is no
74  *	such line in file1.
75  *
76  *	Lines are hashed so as to work in core. All potential
77  *	matches are located by sorting the lines of each file
78  *	on the hash (called ``value''). In particular, this
79  *	collects the equivalence classes in file1 together.
80  *	Subroutine equiv replaces the value of each line in
81  *	file0 by the index of the first element of its
82  *	matching equivalence in (the reordered) file1.
83  *	To save space equiv squeezes file1 into a single
84  *	array member in which the equivalence classes
85  *	are simply concatenated, except that their first
86  *	members are flagged by changing sign.
87  *
88  *	Next the indices that point into member are unsorted into
89  *	array class according to the original order of file0.
90  *
91  *	The cleverness lies in routine stone. This marches
92  *	through the lines of file0, developing a vector klist
93  *	of "k-candidates". At step i a k-candidate is a matched
94  *	pair of lines x,y (x in file0 y in file1) such that
95  *	there is a common subsequence of length k
96  *	between the first i lines of file0 and the first y
97  *	lines of file1, but there is no such subsequence for
98  *	any smaller y. x is the earliest possible mate to y
99  *	that occurs in such a subsequence.
100  *
101  *	Whenever any of the members of the equivalence class of
102  *	lines in file1 matable to a line in file0 has serial number
103  *	less than the y of some k-candidate, that k-candidate
104  *	with the smallest such y is replaced. The new
105  *	k-candidate is chained (via pred) to the current
106  *	k-1 candidate so that the actual subsequence can
107  *	be recovered. When a member has serial number greater
108  *	that the y of all k-candidates, the klist is extended.
109  *	At the end, the longest subsequence is pulled out
110  *	and placed in the array J by unravel
111  *
112  *	With J in hand, the matches there recorded are
113  *	check'ed against reality to assure that no spurious
114  *	matches have crept in due to hashing. If they have,
115  *	they are broken, and "jackpot" is recorded--a harmless
116  *	matter except that a true match for a spuriously
117  *	mated line may now be unnecessarily reported as a change.
118  *
119  *	Much of the complexity of the program comes simply
120  *	from trying to minimize core utilization and
121  *	maximize the range of doable problems by dynamically
122  *	allocating what is needed and reusing what is not.
123  *	The core requirements for problems larger than somewhat
124  *	are (in words) 2*length(file0) + length(file1) +
125  *	3*(number of k-candidates installed),  typically about
126  *	6n words for files of length n.
127  */
128 
129 #include <sys/param.h>
130 #include <sys/stat.h>
131 #include <sys/wait.h>
132 
133 #include <errno.h>
134 #include <ctype.h>
135 #include <stdio.h>
136 #include <fcntl.h>
137 #include <paths.h>
138 #include <regex.h>
139 #include <dirent.h>
140 #include <stdlib.h>
141 #include <stddef.h>
142 #include <unistd.h>
143 #include <string.h>
144 #include <sysexits.h>
145 
146 #include "cvs.h"
147 #include "log.h"
148 #include "buf.h"
149 #include "proto.h"
150 
151 
152 #define CVS_DIFF_DEFCTX    3   /* default context length */
153 
154 
155 /*
156  * Output format options
157  */
158 #define	D_NORMAL	0	/* Normal output */
159 #define	D_CONTEXT	1	/* Diff with context */
160 #define	D_UNIFIED	2	/* Unified context diff */
161 #define	D_IFDEF		3	/* Diff with merged #ifdef's */
162 #define	D_BRIEF		4	/* Say if the files differ */
163 
164 /*
165  * Status values for print_status() and diffreg() return values
166  */
167 #define	D_SAME		0	/* Files are the same */
168 #define	D_DIFFER	1	/* Files are different */
169 #define	D_BINARY	2	/* Binary files are different */
170 #define	D_COMMON	3	/* Subdirectory common to both dirs */
171 #define	D_ONLY		4	/* Only exists in one directory */
172 #define	D_MISMATCH1	5	/* path1 was a dir, path2 a file */
173 #define	D_MISMATCH2	6	/* path1 was a file, path2 a dir */
174 #define	D_ERROR		7	/* An error occurred */
175 #define	D_SKIPPED1	8	/* path1 was a special file */
176 #define	D_SKIPPED2	9	/* path2 was a special file */
177 
178 struct cand {
179 	int x;
180 	int y;
181 	int pred;
182 } cand;
183 
184 struct line {
185 	int serial;
186 	int value;
187 } *file[2];
188 
189 /*
190  * The following struct is used to record change information when
191  * doing a "context" or "unified" diff.  (see routine "change" to
192  * understand the highly mnemonic field names)
193  */
194 struct context_vec {
195 	int a;			/* start line in old file */
196 	int b;			/* end line in old file */
197 	int c;			/* start line in new file */
198 	int d;			/* end line in new file */
199 };
200 
201 struct diff_arg {
202 	char  *rev1;
203 	char  *rev2;
204 	char  *date1;
205 	char  *date2;
206 };
207 
208 
209 struct excludes {
210 	char *pattern;
211 	struct excludes *next;
212 };
213 
214 
215 char	*splice(char *, char *);
216 int  cvs_diffreg(const char *, const char *);
217 int  cvs_diff_file  (struct cvs_file *, void *);
218 static void output(const char *, FILE *, const char *, FILE *);
219 static void check(FILE *, FILE *);
220 static void range(int, int, char *);
221 static void uni_range(int, int);
222 static void dump_context_vec(FILE *, FILE *);
223 static void dump_unified_vec(FILE *, FILE *);
224 static void prepare(int, FILE *, off_t);
225 static void prune(void);
226 static void equiv(struct line *, int, struct line *, int, int *);
227 static void unravel(int);
228 static void unsort(struct line *, int, int *);
229 static void change(const char *, FILE *, const char *, FILE *, int, int, int, int);
230 static void sort(struct line *, int);
231 static int  ignoreline(char *);
232 static int  asciifile(FILE *);
233 static int  fetch(long *, int, int, FILE *, int, int);
234 static int  newcand(int, int, int);
235 static int  search(int *, int, int);
236 static int  skipline(FILE *);
237 static int  isqrt(int);
238 static int  stone(int *, int, int *, int *);
239 static int  readhash(FILE *);
240 static int  files_differ(FILE *, FILE *);
241 static char *preadline(int, size_t, off_t);
242 
243 
244 
245 extern int cvs_client;
246 
247 static int aflag, bflag, dflag, iflag, tflag, Tflag, wflag;
248 static int context, status;
249 static int format = D_NORMAL;
250 static struct stat stb1, stb2;
251 static char *ifdefname, *ignore_pats, diffargs[128];
252 static const char *diff_file;
253 regex_t ignore_re;
254 
255 static int  *J;			/* will be overlaid on class */
256 static int  *class;		/* will be overlaid on file[0] */
257 static int  *klist;		/* will be overlaid on file[0] after class */
258 static int  *member;		/* will be overlaid on file[1] */
259 static int   clen;
260 static int   inifdef;		/* whether or not we are in a #ifdef block */
261 static int   len[2];
262 static int   pref, suff;	/* length of prefix and suffix */
263 static int   slen[2];
264 static int   anychange;
265 static long *ixnew;		/* will be overlaid on file[1] */
266 static long *ixold;		/* will be overlaid on klist */
267 static struct cand *clist;	/* merely a free storage pot for candidates */
268 static int   clistlen;		/* the length of clist */
269 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
270 static u_char *chrtran;		/* translation table for case-folding */
271 static struct context_vec *context_vec_start;
272 static struct context_vec *context_vec_end;
273 static struct context_vec *context_vec_ptr;
274 
275 #define FUNCTION_CONTEXT_SIZE	41
276 static int lastline;
277 static int lastmatchline;
278 
279 
280 
281 
282 /*
283  * chrtran points to one of 2 translation tables: cup2low if folding upper to
284  * lower case clow2low if not folding case
285  */
286 u_char clow2low[256] = {
287 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
288 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
289 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
290 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
291 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
292 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
293 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
294 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
295 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
296 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
297 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
298 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
299 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
300 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
301 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
302 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
303 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
304 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
305 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
306 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
307 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
308 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
309 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
310 	0xfd, 0xfe, 0xff
311 };
312 
313 u_char cup2low[256] = {
314 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
315 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
316 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
317 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
318 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
319 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
320 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
321 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
322 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
323 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
324 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
325 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
326 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
327 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
328 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
329 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
330 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
331 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
332 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
333 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
334 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
335 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
336 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
337 	0xfd, 0xfe, 0xff
338 };
339 
340 
341 
342 /*
343  * cvs_diff()
344  *
345  * Handler for the `cvs diff' command.
346  *
347  * SYNOPSIS: cvs [args] diff [-clipu] [-D date] [-r rev]
348  */
349 
350 int
351 cvs_diff(int argc, char **argv)
352 {
353 	int ch, recurse, flags;
354 	struct diff_arg darg;
355 	struct cvsroot *root;
356 
357 	context = CVS_DIFF_DEFCTX;
358 	flags = CF_RECURSE|CF_IGNORE|CF_SORT|CF_KNOWN;
359 	recurse = 1;
360 
361 	memset(&darg, 0, sizeof(darg));
362 	strlcpy(diffargs, argv[0], sizeof(diffargs));
363 
364 	while ((ch = getopt(argc, argv, "cD:lir:u")) != -1) {
365 		switch (ch) {
366 		case 'c':
367 			strlcat(diffargs, " -c", sizeof(diffargs));
368 			format = D_CONTEXT;
369 			break;
370 		case 'D':
371 			if (darg.date1 == NULL && darg.rev1 == NULL)
372 				darg.date1 = optarg;
373 			else if (darg.date2 == NULL && darg.rev2 == NULL)
374 				darg.date2 = optarg;
375 			else {
376 				cvs_log(LP_ERR,
377 				    "no more than two revisions/dates can "
378 				    "be specified");
379 			}
380 			break;
381 		case 'l':
382 			strlcat(diffargs, " -l", sizeof(diffargs));
383 			recurse = 0;
384 			flags &= ~CF_RECURSE;
385 			break;
386 		case 'i':
387 			strlcat(diffargs, " -i", sizeof(diffargs));
388 			iflag = 1;
389 			break;
390 		case 'r':
391 			if ((darg.rev1 == NULL) && (darg.date1 == NULL))
392 				darg.rev1 = optarg;
393 			else if ((darg.rev2 == NULL) && (darg.date2 == NULL))
394 				darg.rev2 = optarg;
395 			else {
396 				cvs_log(LP_ERR,
397 				    "no more than two revisions/dates can "
398 				    "be specified");
399 				return (EX_USAGE);
400 			}
401 			break;
402 		case 'u':
403 			strlcat(diffargs, " -u", sizeof(diffargs));
404 			format = D_UNIFIED;
405 			break;
406 		default:
407 			return (EX_USAGE);
408 		}
409 	}
410 
411 	argc -= optind;
412 	argv += optind;
413 
414 	if (argc == 0) {
415 		cvs_files = cvs_file_get(".", flags);
416 	}
417 	else
418 		cvs_files = cvs_file_getspec(argv, argc, 0);
419 
420 	cvs_file_examine(cvs_files, cvs_diff_file, &darg);
421 
422 	root = cvs_files->cf_ddat->cd_root;
423 	if (root->cr_method != CVS_METHOD_LOCAL) {
424 		cvs_senddir(root, cvs_files);
425 		cvs_sendreq(root, CVS_REQ_DIFF, NULL);
426 	}
427 
428 	return (0);
429 }
430 
431 
432 /*
433  * cvs_diff_sendflags()
434  *
435  */
436 
437 int
438 cvs_diff_sendflags(struct cvsroot *root, struct diff_arg *dap)
439 {
440 	/* send the flags */
441 	if (format == D_CONTEXT)
442 		cvs_sendarg(root, "-c", 0);
443 	else if (format == D_UNIFIED)
444 		cvs_sendarg(root, "-u", 0);
445 
446 	if (dap->rev1 != NULL) {
447 		cvs_sendarg(root, "-r", 0);
448 		cvs_sendarg(root, dap->rev1, 1);
449 	}
450 	else if (dap->date1 != NULL) {
451 		cvs_sendarg(root, "-D", 0);
452 		cvs_sendarg(root, dap->date1, 1);
453 	}
454 	if (dap->rev2 != NULL) {
455 		cvs_sendarg(root, "-r", 0);
456 		cvs_sendarg(root, dap->rev2, 1);
457 	}
458 	else if (dap->date2 != NULL) {
459 		cvs_sendarg(root, "-D", 0);
460 		cvs_sendarg(root, dap->date2, 1);
461 	}
462 
463 	return (0);
464 }
465 
466 
467 /*
468  * cvs_diff_file()
469  *
470  * Diff a single file.
471  */
472 
473 int
474 cvs_diff_file(struct cvs_file *cfp, void *arg)
475 {
476 	char *dir, *repo, rcspath[MAXPATHLEN], buf[64];
477 	BUF *b1, *b2;
478 	RCSNUM *r1, *r2;
479 	RCSFILE *rf;
480 	struct diff_arg *dap;
481 	struct cvs_ent *entp;
482 	struct cvsroot *root;
483 
484 	dap = (struct diff_arg *)arg;
485 
486 	if (cfp->cf_type == DT_DIR) {
487 		root = cfp->cf_ddat->cd_root;
488 		if ((cfp->cf_parent == NULL) ||
489 		    (root != cfp->cf_parent->cf_ddat->cd_root)) {
490 			cvs_connect(root);
491 			cvs_diff_sendflags(root, dap);
492 		}
493 
494 		cvs_senddir(root, cfp);
495 		return (0);
496 	}
497 	else	/* take the root of parent directory */
498 		root = cfp->cf_parent->cf_ddat->cd_root;
499 
500 	rf = NULL;
501 	diff_file = cfp->cf_path;
502 	if (cfp->cf_parent != NULL) {
503 		dir = cfp->cf_parent->cf_path;
504 		repo = cfp->cf_parent->cf_ddat->cd_repo;
505 	}
506 	else {
507 		dir = ".";
508 		repo = NULL;
509 	}
510 
511 	if (cfp->cf_cvstat == CVS_FST_UNKNOWN) {
512 		if (root->cr_method == CVS_METHOD_LOCAL)
513 			cvs_log(LP_WARN, "I know nothing about %s", diff_file);
514 		else
515 			cvs_sendreq(root, CVS_REQ_QUESTIONABLE, cfp->cf_name);
516 		return (0);
517 	}
518 
519 	entp = cvs_ent_getent(diff_file);
520 	if (entp == NULL)
521 		return (-1);
522 
523 	if (root->cr_method != CVS_METHOD_LOCAL) {
524 		if (cvs_sendentry(root, entp) < 0) {
525 			cvs_ent_free(entp);
526 			return (-1);
527 		}
528 	}
529 
530 	if (cfp->cf_cvstat == CVS_FST_UPTODATE) {
531 		if (root->cr_method != CVS_METHOD_LOCAL)
532 			cvs_sendreq(root, CVS_REQ_UNCHANGED, cfp->cf_name);
533 		cvs_ent_free(entp);
534 		return (0);
535 	}
536 
537 	/* at this point, the file is modified */
538 	if (root->cr_method != CVS_METHOD_LOCAL) {
539 		cvs_sendreq(root, CVS_REQ_MODIFIED, cfp->cf_name);
540 		cvs_sendfile(root, diff_file);
541 	}
542 	else {
543 		snprintf(rcspath, sizeof(rcspath), "%s/%s/%s%s",
544 		    root->cr_dir, repo, diff_file, RCS_FILE_EXT);
545 
546 		rf = rcs_open(rcspath, RCS_MODE_READ);
547 		if (rf == NULL) {
548 			cvs_ent_free(entp);
549 			return (-1);
550 		}
551 
552 		cvs_printf("Index: %s\n%s\nRCS file: %s\n", diff_file,
553 		    RCS_DIFF_DIV, rcspath);
554 
555 		if (dap->rev1 == NULL)
556 			r1 = entp->ce_rev;
557 		else {
558 			r1 = rcsnum_alloc();
559 			rcsnum_aton(dap->rev1, NULL, r1);
560 		}
561 
562 		cvs_printf("retrieving revision %s\n",
563 		    rcsnum_tostr(r1, buf, sizeof(buf)));
564 		b1 = rcs_getrev(rf, r1);
565 
566 		if (dap->rev2 != NULL) {
567 			cvs_printf("retrieving revision %s\n", dap->rev2);
568 			r2 = rcsnum_alloc();
569 			rcsnum_aton(dap->rev2, NULL, r2);
570 			b2 = rcs_getrev(rf, r2);
571 		}
572 		else {
573 			b2 = cvs_buf_load(diff_file, BUF_AUTOEXT);
574 		}
575 
576 		rcs_close(rf);
577 
578 		printf("%s", diffargs);
579 		printf(" -r%s", buf);
580 		if (dap->rev2 != NULL)
581 			printf(" -r%s", dap->rev2);
582 		printf(" %s\n", diff_file);
583 		cvs_buf_write(b1, "/tmp/diff1", 0600);
584 		cvs_buf_write(b2, "/tmp/diff2", 0600);
585 		cvs_diffreg("/tmp/diff1", "/tmp/diff2");
586 	}
587 
588 	cvs_ent_free(entp);
589 	return (0);
590 }
591 
592 
593 int
594 cvs_diffreg(const char *file1, const char *file2)
595 {
596 	FILE *f1, *f2;
597 	int i, rval;
598 
599 	f1 = f2 = NULL;
600 	rval = D_SAME;
601 	anychange = 0;
602 	lastline = 0;
603 	lastmatchline = 0;
604 	context_vec_ptr = context_vec_start - 1;
605 	chrtran = (iflag ? cup2low : clow2low);
606 
607 	f1 = fopen(file1, "r");
608 	if (f1 == NULL) {
609 		cvs_log(LP_ERRNO, "%s", file1);
610 		status |= 2;
611 		goto closem;
612 	}
613 
614 	f2 = fopen(file2, "r");
615 	if (f2 == NULL) {
616 		cvs_log(LP_ERRNO, "%s", file2);
617 		status |= 2;
618 		goto closem;
619 	}
620 
621 	switch (files_differ(f1, f2)) {
622 	case 0:
623 		goto closem;
624 	case 1:
625 		break;
626 	default:
627 		/* error */
628 		status |= 2;
629 		goto closem;
630 	}
631 
632 	if (!asciifile(f1) || !asciifile(f2)) {
633 		rval = D_BINARY;
634 		status |= 1;
635 		goto closem;
636 	}
637 	prepare(0, f1, stb1.st_size);
638 	prepare(1, f2, stb2.st_size);
639 	prune();
640 	sort(sfile[0], slen[0]);
641 	sort(sfile[1], slen[1]);
642 
643 	member = (int *)file[1];
644 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
645 	member = realloc(member, (slen[1] + 2) * sizeof(int));
646 
647 	class = (int *)file[0];
648 	unsort(sfile[0], slen[0], class);
649 	class = realloc(class, (slen[0] + 2) * sizeof(int));
650 
651 	klist = malloc((slen[0] + 2) * sizeof(int));
652 	clen = 0;
653 	clistlen = 100;
654 	clist = malloc(clistlen * sizeof(cand));
655 	i = stone(class, slen[0], member, klist);
656 	free(member);
657 	free(class);
658 
659 	J = realloc(J, (len[0] + 2) * sizeof(int));
660 	unravel(klist[i]);
661 	free(clist);
662 	free(klist);
663 
664 	ixold = realloc(ixold, (len[0] + 2) * sizeof(long));
665 	ixnew = realloc(ixnew, (len[1] + 2) * sizeof(long));
666 	check(f1, f2);
667 	output(file1, f1, file2, f2);
668 
669 closem:
670 	if (anychange) {
671 		status |= 1;
672 		if (rval == D_SAME)
673 			rval = D_DIFFER;
674 	}
675 	if (f1 != NULL)
676 		fclose(f1);
677 	if (f2 != NULL)
678 		fclose(f2);
679 	return (rval);
680 }
681 
682 /*
683  * Check to see if the given files differ.
684  * Returns 0 if they are the same, 1 if different, and -1 on error.
685  * XXX - could use code from cmp(1) [faster]
686  */
687 static int
688 files_differ(FILE *f1, FILE *f2)
689 {
690 	char buf1[BUFSIZ], buf2[BUFSIZ];
691 	size_t i, j;
692 
693 	if (stb1.st_size != stb2.st_size)
694 		return (1);
695 	for (;;) {
696 		i = fread(buf1, 1, sizeof(buf1), f1);
697 		j = fread(buf2, 1, sizeof(buf2), f2);
698 		if (i != j)
699 			return (1);
700 		if (i == 0 && j == 0) {
701 			if (ferror(f1) || ferror(f2))
702 				return (1);
703 			return (0);
704 		}
705 		if (memcmp(buf1, buf2, i) != 0)
706 			return (1);
707 	}
708 }
709 
710 
711 char *
712 splice(char *dir, char *file)
713 {
714 	char *tail, *buf;
715 
716 	if ((tail = strrchr(file, '/')) == NULL)
717 		tail = file;
718 	else
719 		tail++;
720 	asprintf(&buf, "%s/%s", dir, tail);
721 	return (buf);
722 }
723 
724 static void
725 prepare(int i, FILE *fd, off_t filesize)
726 {
727 	struct line *p;
728 	int j, h;
729 	size_t sz;
730 
731 	rewind(fd);
732 
733 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
734 	if (sz < 100)
735 		sz = 100;
736 
737 	p = malloc((sz + 3) * sizeof(struct line));
738 	for (j = 0; (h = readhash(fd));) {
739 		if (j == (int)sz) {
740 			sz = sz * 3 / 2;
741 			p = realloc(p, (sz + 3) * sizeof(struct line));
742 		}
743 		p[++j].value = h;
744 	}
745 	len[i] = j;
746 	file[i] = p;
747 }
748 
749 static void
750 prune(void)
751 {
752 	int i, j;
753 
754 	for (pref = 0; pref < len[0] && pref < len[1] &&
755 	    file[0][pref + 1].value == file[1][pref + 1].value;
756 	    pref++)
757 		;
758 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
759 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
760 	    suff++)
761 		;
762 	for (j = 0; j < 2; j++) {
763 		sfile[j] = file[j] + pref;
764 		slen[j] = len[j] - pref - suff;
765 		for (i = 0; i <= slen[j]; i++)
766 			sfile[j][i].serial = i;
767 	}
768 }
769 
770 static void
771 equiv(struct line *a, int n, struct line *b, int m, int *c)
772 {
773 	int i, j;
774 
775 	i = j = 1;
776 	while (i <= n && j <= m) {
777 		if (a[i].value < b[j].value)
778 			a[i++].value = 0;
779 		else if (a[i].value == b[j].value)
780 			a[i++].value = j;
781 		else
782 			j++;
783 	}
784 	while (i <= n)
785 		a[i++].value = 0;
786 	b[m + 1].value = 0;
787 	j = 0;
788 	while (++j <= m) {
789 		c[j] = -b[j].serial;
790 		while (b[j + 1].value == b[j].value) {
791 			j++;
792 			c[j] = b[j].serial;
793 		}
794 	}
795 	c[j] = -1;
796 }
797 
798 /* Code taken from ping.c */
799 static int
800 isqrt(int n)
801 {
802 	int y, x = 1;
803 
804 	if (n == 0)
805 		return(0);
806 
807 	do { /* newton was a stinker */
808 		y = x;
809 		x = n / x;
810 		x += y;
811 		x /= 2;
812 	} while ((x - y) > 1 || (x - y) < -1);
813 
814 	return (x);
815 }
816 
817 static int
818 stone(int *a, int n, int *b, int *c)
819 {
820 	int i, k, y, j, l;
821 	int oldc, tc, oldl;
822 	u_int numtries;
823 
824 	const u_int bound = dflag ? UINT_MAX : MAX(256, isqrt(n));
825 
826 	k = 0;
827 	c[0] = newcand(0, 0, 0);
828 	for (i = 1; i <= n; i++) {
829 		j = a[i];
830 		if (j == 0)
831 			continue;
832 		y = -b[j];
833 		oldl = 0;
834 		oldc = c[0];
835 		numtries = 0;
836 		do {
837 			if (y <= clist[oldc].y)
838 				continue;
839 			l = search(c, k, y);
840 			if (l != oldl + 1)
841 				oldc = c[l - 1];
842 			if (l <= k) {
843 				if (clist[c[l]].y <= y)
844 					continue;
845 				tc = c[l];
846 				c[l] = newcand(i, y, oldc);
847 				oldc = tc;
848 				oldl = l;
849 				numtries++;
850 			} else {
851 				c[l] = newcand(i, y, oldc);
852 				k++;
853 				break;
854 			}
855 		} while ((y = b[++j]) > 0 && numtries < bound);
856 	}
857 	return (k);
858 }
859 
860 static int
861 newcand(int x, int y, int pred)
862 {
863 	struct cand *q;
864 
865 	if (clen == clistlen) {
866 		clistlen = clistlen * 11 / 10;
867 		clist = realloc(clist, clistlen * sizeof(cand));
868 	}
869 	q = clist + clen;
870 	q->x = x;
871 	q->y = y;
872 	q->pred = pred;
873 	return (clen++);
874 }
875 
876 static int
877 search(int *c, int k, int y)
878 {
879 	int i, j, l, t;
880 
881 	if (clist[c[k]].y < y)	/* quick look for typical case */
882 		return (k + 1);
883 	i = 0;
884 	j = k + 1;
885 	while (1) {
886 		l = i + j;
887 		if ((l >>= 1) <= i)
888 			break;
889 		t = clist[c[l]].y;
890 		if (t > y)
891 			j = l;
892 		else if (t < y)
893 			i = l;
894 		else
895 			return (l);
896 	}
897 	return (l + 1);
898 }
899 
900 static void
901 unravel(int p)
902 {
903 	struct cand *q;
904 	int i;
905 
906 	for (i = 0; i <= len[0]; i++)
907 		J[i] = i <= pref ? i :
908 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
909 	for (q = clist + p; q->y != 0; q = clist + q->pred)
910 		J[q->x + pref] = q->y + pref;
911 }
912 
913 /*
914  * Check does double duty:
915  *  1.	ferret out any fortuitous correspondences due
916  *	to confounding by hashing (which result in "jackpot")
917  *  2.  collect random access indexes to the two files
918  */
919 static void
920 check(FILE *f1, FILE *f2)
921 {
922 	int i, j, jackpot, c, d;
923 	long ctold, ctnew;
924 
925 	rewind(f1);
926 	rewind(f2);
927 	j = 1;
928 	ixold[0] = ixnew[0] = 0;
929 	jackpot = 0;
930 	ctold = ctnew = 0;
931 	for (i = 1; i <= len[0]; i++) {
932 		if (J[i] == 0) {
933 			ixold[i] = ctold += skipline(f1);
934 			continue;
935 		}
936 		while (j < J[i]) {
937 			ixnew[j] = ctnew += skipline(f2);
938 			j++;
939 		}
940 		if (bflag || wflag || iflag) {
941 			for (;;) {
942 				c = getc(f1);
943 				d = getc(f2);
944 				/*
945 				 * GNU diff ignores a missing newline
946 				 * in one file if bflag || wflag.
947 				 */
948 				if ((bflag || wflag) &&
949 				    ((c == EOF && d == '\n') ||
950 				    (c == '\n' && d == EOF))) {
951 					break;
952 				}
953 				ctold++;
954 				ctnew++;
955 				if (bflag && isspace(c) && isspace(d)) {
956 					do {
957 						if (c == '\n')
958 							break;
959 						ctold++;
960 					} while (isspace(c = getc(f1)));
961 					do {
962 						if (d == '\n')
963 							break;
964 						ctnew++;
965 					} while (isspace(d = getc(f2)));
966 				} else if (wflag) {
967 					while (isspace(c) && c != '\n') {
968 						c = getc(f1);
969 						ctold++;
970 					}
971 					while (isspace(d) && d != '\n') {
972 						d = getc(f2);
973 						ctnew++;
974 					}
975 				}
976 				if (chrtran[c] != chrtran[d]) {
977 					jackpot++;
978 					J[i] = 0;
979 					if (c != '\n' && c != EOF)
980 						ctold += skipline(f1);
981 					if (d != '\n' && c != EOF)
982 						ctnew += skipline(f2);
983 					break;
984 				}
985 				if (c == '\n' || c == EOF)
986 					break;
987 			}
988 		} else {
989 			for (;;) {
990 				ctold++;
991 				ctnew++;
992 				if ((c = getc(f1)) != (d = getc(f2))) {
993 					/* jackpot++; */
994 					J[i] = 0;
995 					if (c != '\n' && c != EOF)
996 						ctold += skipline(f1);
997 					if (d != '\n' && c != EOF)
998 						ctnew += skipline(f2);
999 					break;
1000 				}
1001 				if (c == '\n' || c == EOF)
1002 					break;
1003 			}
1004 		}
1005 		ixold[i] = ctold;
1006 		ixnew[j] = ctnew;
1007 		j++;
1008 	}
1009 	for (; j <= len[1]; j++)
1010 		ixnew[j] = ctnew += skipline(f2);
1011 	/*
1012 	 * if (jackpot)
1013 	 *	fprintf(stderr, "jackpot\n");
1014 	 */
1015 }
1016 
1017 /* shellsort CACM #201 */
1018 static void
1019 sort(struct line *a, int n)
1020 {
1021 	struct line *ai, *aim, w;
1022 	int j, m = 0, k;
1023 
1024 	if (n == 0)
1025 		return;
1026 	for (j = 1; j <= n; j *= 2)
1027 		m = 2 * j - 1;
1028 	for (m /= 2; m != 0; m /= 2) {
1029 		k = n - m;
1030 		for (j = 1; j <= k; j++) {
1031 			for (ai = &a[j]; ai > a; ai -= m) {
1032 				aim = &ai[m];
1033 				if (aim < ai)
1034 					break;	/* wraparound */
1035 				if (aim->value > ai[0].value ||
1036 				    (aim->value == ai[0].value &&
1037 					aim->serial > ai[0].serial))
1038 					break;
1039 				w.value = ai[0].value;
1040 				ai[0].value = aim->value;
1041 				aim->value = w.value;
1042 				w.serial = ai[0].serial;
1043 				ai[0].serial = aim->serial;
1044 				aim->serial = w.serial;
1045 			}
1046 		}
1047 	}
1048 }
1049 
1050 static void
1051 unsort(struct line *f, int l, int *b)
1052 {
1053 	int *a, i;
1054 
1055 	a = malloc((l + 1) * sizeof(int));
1056 	for (i = 1; i <= l; i++)
1057 		a[f[i].serial] = f[i].value;
1058 	for (i = 1; i <= l; i++)
1059 		b[i] = a[i];
1060 	free(a);
1061 }
1062 
1063 static int
1064 skipline(FILE *f)
1065 {
1066 	int i, c;
1067 
1068 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
1069 		continue;
1070 	return (i);
1071 }
1072 
1073 static void
1074 output(const char *file1, FILE *f1, const char *file2, FILE *f2)
1075 {
1076 	int m, i0, i1, j0, j1;
1077 
1078 	rewind(f1);
1079 	rewind(f2);
1080 	m = len[0];
1081 	J[0] = 0;
1082 	J[m + 1] = len[1] + 1;
1083 	for (i0 = 1; i0 <= m; i0 = i1 + 1) {
1084 		while (i0 <= m && J[i0] == J[i0 - 1] + 1)
1085 			i0++;
1086 		j0 = J[i0 - 1] + 1;
1087 		i1 = i0 - 1;
1088 		while (i1 < m && J[i1 + 1] == 0)
1089 			i1++;
1090 		j1 = J[i1 + 1] - 1;
1091 		J[i1] = j1;
1092 		change(file1, f1, file2, f2, i0, i1, j0, j1);
1093 	}
1094 	if (m == 0)
1095 		change(file1, f1, file2, f2, 1, 0, 1, len[1]);
1096 	if (format == D_IFDEF) {
1097 		for (;;) {
1098 #define	c i0
1099 			if ((c = getc(f1)) == EOF)
1100 				return;
1101 			putchar(c);
1102 		}
1103 #undef c
1104 	}
1105 	if (anychange != 0) {
1106 		if (format == D_CONTEXT)
1107 			dump_context_vec(f1, f2);
1108 		else if (format == D_UNIFIED)
1109 			dump_unified_vec(f1, f2);
1110 	}
1111 }
1112 
1113 static __inline void
1114 range(int a, int b, char *separator)
1115 {
1116 	printf("%d", a > b ? b : a);
1117 	if (a < b)
1118 		printf("%s%d", separator, b);
1119 }
1120 
1121 static __inline void
1122 uni_range(int a, int b)
1123 {
1124 	if (a < b)
1125 		printf("%d,%d", a, b - a + 1);
1126 	else if (a == b)
1127 		printf("%d", b);
1128 	else
1129 		printf("%d,0", b);
1130 }
1131 
1132 static char *
1133 preadline(int fd, size_t len, off_t off)
1134 {
1135 	char *line;
1136 	ssize_t nr;
1137 
1138 	line = malloc(len + 1);
1139 	if (line == NULL) {
1140 		cvs_log(LP_ERRNO, "failed to allocate line");
1141 		return (NULL);
1142 	}
1143 	if ((nr = pread(fd, line, len, off)) < 0) {
1144 		cvs_log(LP_ERRNO, "preadline failed");
1145 		return (NULL);
1146 	}
1147 	line[nr] = '\0';
1148 	return (line);
1149 }
1150 
1151 static int
1152 ignoreline(char *line)
1153 {
1154 	int ret;
1155 
1156 	ret = regexec(&ignore_re, line, 0, NULL, 0);
1157 	free(line);
1158 	return (ret == 0);	/* if it matched, it should be ignored. */
1159 }
1160 
1161 /*
1162  * Indicate that there is a difference between lines a and b of the from file
1163  * to get to lines c to d of the to file.  If a is greater then b then there
1164  * are no lines in the from file involved and this means that there were
1165  * lines appended (beginning at b).  If c is greater than d then there are
1166  * lines missing from the to file.
1167  */
1168 static void
1169 change(const char *file1, FILE *f1, const char *file2, FILE *f2,
1170 	int a, int b, int c, int d)
1171 {
1172 	static size_t max_context = 64;
1173 	int i;
1174 
1175 	if (format != D_IFDEF && a > b && c > d)
1176 		return;
1177 	if (ignore_pats != NULL) {
1178 		char *line;
1179 		/*
1180 		 * All lines in the change, insert, or delete must
1181 		 * match an ignore pattern for the change to be
1182 		 * ignored.
1183 		 */
1184 		if (a <= b) {		/* Changes and deletes. */
1185 			for (i = a; i <= b; i++) {
1186 				line = preadline(fileno(f1),
1187 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1188 				if (!ignoreline(line))
1189 					goto proceed;
1190 			}
1191 		}
1192 		if (a > b || c <= d) {	/* Changes and inserts. */
1193 			for (i = c; i <= d; i++) {
1194 				line = preadline(fileno(f2),
1195 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1196 				if (!ignoreline(line))
1197 					goto proceed;
1198 			}
1199 		}
1200 		return;
1201 	}
1202 proceed:
1203 	if (format == D_CONTEXT || format == D_UNIFIED) {
1204 		/*
1205 		 * Allocate change records as needed.
1206 		 */
1207 		if (context_vec_ptr == context_vec_end - 1) {
1208 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1209 			max_context <<= 1;
1210 			context_vec_start = realloc(context_vec_start,
1211 			    max_context * sizeof(struct context_vec));
1212 			context_vec_end = context_vec_start + max_context;
1213 			context_vec_ptr = context_vec_start + offset;
1214 		}
1215 		if (anychange == 0) {
1216 			/*
1217 			 * Print the context/unidiff header first time through.
1218 			 */
1219 			printf("%s %s	%s",
1220 			    format == D_CONTEXT ? "***" : "---", diff_file,
1221 			    ctime(&stb1.st_mtime));
1222 			printf("%s %s	%s",
1223 			    format == D_CONTEXT ? "---" : "+++", diff_file,
1224 			    ctime(&stb2.st_mtime));
1225 			anychange = 1;
1226 		} else if (a > context_vec_ptr->b + (2 * context) + 1 &&
1227 		    c > context_vec_ptr->d + (2 * context) + 1) {
1228 			/*
1229 			 * If this change is more than 'context' lines from the
1230 			 * previous change, dump the record and reset it.
1231 			 */
1232 			if (format == D_CONTEXT)
1233 				dump_context_vec(f1, f2);
1234 			else
1235 				dump_unified_vec(f1, f2);
1236 		}
1237 		context_vec_ptr++;
1238 		context_vec_ptr->a = a;
1239 		context_vec_ptr->b = b;
1240 		context_vec_ptr->c = c;
1241 		context_vec_ptr->d = d;
1242 		return;
1243 	}
1244 	if (anychange == 0)
1245 		anychange = 1;
1246 	switch (format) {
1247 	case D_BRIEF:
1248 		return;
1249 	case D_NORMAL:
1250 		range(a, b, ",");
1251 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1252 		if (format == D_NORMAL)
1253 			range(c, d, ",");
1254 		putchar('\n');
1255 		break;
1256 	}
1257 	if (format == D_NORMAL || format == D_IFDEF) {
1258 		fetch(ixold, a, b, f1, '<', 1);
1259 		if (a <= b && c <= d && format == D_NORMAL)
1260 			puts("---");
1261 	}
1262 	i = fetch(ixnew, c, d, f2, format == D_NORMAL ? '>' : '\0', 0);
1263 	if (inifdef) {
1264 		printf("#endif /* %s */\n", ifdefname);
1265 		inifdef = 0;
1266 	}
1267 }
1268 
1269 static int
1270 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile)
1271 {
1272 	int i, j, c, lastc, col, nc;
1273 
1274 	/*
1275 	 * When doing #ifdef's, copy down to current line
1276 	 * if this is the first file, so that stuff makes it to output.
1277 	 */
1278 	if (format == D_IFDEF && oldfile) {
1279 		long curpos = ftell(lb);
1280 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1281 		nc = f[a > b ? b : a - 1] - curpos;
1282 		for (i = 0; i < nc; i++)
1283 			putchar(getc(lb));
1284 	}
1285 	if (a > b)
1286 		return (0);
1287 	if (format == D_IFDEF) {
1288 		if (inifdef) {
1289 			printf("#else /* %s%s */\n",
1290 			    oldfile == 1 ? "!" : "", ifdefname);
1291 		} else {
1292 			if (oldfile)
1293 				printf("#ifndef %s\n", ifdefname);
1294 			else
1295 				printf("#ifdef %s\n", ifdefname);
1296 		}
1297 		inifdef = 1 + oldfile;
1298 	}
1299 	for (i = a; i <= b; i++) {
1300 		fseek(lb, f[i - 1], SEEK_SET);
1301 		nc = f[i] - f[i - 1];
1302 		if (format != D_IFDEF && ch != '\0') {
1303 			putchar(ch);
1304 			if (Tflag && (format == D_NORMAL || format == D_CONTEXT
1305 			    || format == D_UNIFIED))
1306 				putchar('\t');
1307 			else if (format != D_UNIFIED)
1308 				putchar(' ');
1309 		}
1310 		col = 0;
1311 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1312 			if ((c = getc(lb)) == EOF) {
1313 				puts("\n\\ No newline at end of file");
1314 				return (0);
1315 			}
1316 			if (c == '\t' && tflag) {
1317 				do {
1318 					putchar(' ');
1319 				} while (++col & 7);
1320 			} else {
1321 				putchar(c);
1322 				col++;
1323 			}
1324 		}
1325 	}
1326 	return (0);
1327 }
1328 
1329 /*
1330  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1331  */
1332 static int
1333 readhash(FILE *f)
1334 {
1335 	int i, t, space;
1336 	int sum;
1337 
1338 	sum = 1;
1339 	space = 0;
1340 	if (!bflag && !wflag) {
1341 		if (iflag)
1342 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1343 				if (t == EOF) {
1344 					if (i == 0)
1345 						return (0);
1346 					break;
1347 				}
1348 				sum = sum * 127 + chrtran[t];
1349 			}
1350 		else
1351 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1352 				if (t == EOF) {
1353 					if (i == 0)
1354 						return (0);
1355 					break;
1356 				}
1357 				sum = sum * 127 + t;
1358 			}
1359 	} else {
1360 		for (i = 0;;) {
1361 			switch (t = getc(f)) {
1362 			case '\t':
1363 			case ' ':
1364 				space++;
1365 				continue;
1366 			default:
1367 				if (space && !wflag) {
1368 					i++;
1369 					space = 0;
1370 				}
1371 				sum = sum * 127 + chrtran[t];
1372 				i++;
1373 				continue;
1374 			case EOF:
1375 				if (i == 0)
1376 					return (0);
1377 				/* FALLTHROUGH */
1378 			case '\n':
1379 				break;
1380 			}
1381 			break;
1382 		}
1383 	}
1384 	/*
1385 	 * There is a remote possibility that we end up with a zero sum.
1386 	 * Zero is used as an EOF marker, so return 1 instead.
1387 	 */
1388 	return (sum == 0 ? 1 : sum);
1389 }
1390 
1391 static int
1392 asciifile(FILE *f)
1393 {
1394 	char buf[BUFSIZ];
1395 	int i, cnt;
1396 
1397 	if (aflag || f == NULL)
1398 		return (1);
1399 
1400 	rewind(f);
1401 	cnt = fread(buf, 1, sizeof(buf), f);
1402 	for (i = 0; i < cnt; i++)
1403 		if (!isprint(buf[i]) && !isspace(buf[i]))
1404 			return (0);
1405 	return (1);
1406 }
1407 
1408 
1409 /* dump accumulated "context" diff changes */
1410 static void
1411 dump_context_vec(FILE *f1, FILE *f2)
1412 {
1413 	struct context_vec *cvp = context_vec_start;
1414 	int lowa, upb, lowc, upd, do_output;
1415 	int a, b, c, d;
1416 	char ch;
1417 
1418 	if (context_vec_start > context_vec_ptr)
1419 		return;
1420 
1421 	b = d = 0;		/* gcc */
1422 	lowa = MAX(1, cvp->a - context);
1423 	upb = MIN(len[0], context_vec_ptr->b + context);
1424 	lowc = MAX(1, cvp->c - context);
1425 	upd = MIN(len[1], context_vec_ptr->d + context);
1426 
1427 	printf("***************");
1428 	printf("\n*** ");
1429 	range(lowa, upb, ",");
1430 	printf(" ****\n");
1431 
1432 	/*
1433 	 * Output changes to the "old" file.  The first loop suppresses
1434 	 * output if there were no changes to the "old" file (we'll see
1435 	 * the "old" lines as context in the "new" list).
1436 	 */
1437 	do_output = 0;
1438 	for (; cvp <= context_vec_ptr; cvp++)
1439 		if (cvp->a <= cvp->b) {
1440 			cvp = context_vec_start;
1441 			do_output++;
1442 			break;
1443 		}
1444 	if (do_output) {
1445 		while (cvp <= context_vec_ptr) {
1446 			a = cvp->a;
1447 			b = cvp->b;
1448 			c = cvp->c;
1449 			d = cvp->d;
1450 
1451 			if (a <= b && c <= d)
1452 				ch = 'c';
1453 			else
1454 				ch = (a <= b) ? 'd' : 'a';
1455 
1456 			if (ch == 'a')
1457 				fetch(ixold, lowa, b, f1, ' ', 0);
1458 			else {
1459 				fetch(ixold, lowa, a - 1, f1, ' ', 0);
1460 				fetch(ixold, a, b, f1,
1461 				    ch == 'c' ? '!' : '-', 0);
1462 			}
1463 			lowa = b + 1;
1464 			cvp++;
1465 		}
1466 		fetch(ixold, b + 1, upb, f1, ' ', 0);
1467 	}
1468 	/* output changes to the "new" file */
1469 	printf("--- ");
1470 	range(lowc, upd, ",");
1471 	printf(" ----\n");
1472 
1473 	do_output = 0;
1474 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1475 		if (cvp->c <= cvp->d) {
1476 			cvp = context_vec_start;
1477 			do_output++;
1478 			break;
1479 		}
1480 	if (do_output) {
1481 		while (cvp <= context_vec_ptr) {
1482 			a = cvp->a;
1483 			b = cvp->b;
1484 			c = cvp->c;
1485 			d = cvp->d;
1486 
1487 			if (a <= b && c <= d)
1488 				ch = 'c';
1489 			else
1490 				ch = (a <= b) ? 'd' : 'a';
1491 
1492 			if (ch == 'd')
1493 				fetch(ixnew, lowc, d, f2, ' ', 0);
1494 			else {
1495 				fetch(ixnew, lowc, c - 1, f2, ' ', 0);
1496 				fetch(ixnew, c, d, f2,
1497 				    ch == 'c' ? '!' : '+', 0);
1498 			}
1499 			lowc = d + 1;
1500 			cvp++;
1501 		}
1502 		fetch(ixnew, d + 1, upd, f2, ' ', 0);
1503 	}
1504 	context_vec_ptr = context_vec_start - 1;
1505 }
1506 
1507 /* dump accumulated "unified" diff changes */
1508 static void
1509 dump_unified_vec(FILE *f1, FILE *f2)
1510 {
1511 	struct context_vec *cvp = context_vec_start;
1512 	int lowa, upb, lowc, upd;
1513 	int a, b, c, d;
1514 	char ch;
1515 
1516 	if (context_vec_start > context_vec_ptr)
1517 		return;
1518 
1519 	b = d = 0;		/* gcc */
1520 	lowa = MAX(1, cvp->a - context);
1521 	upb = MIN(len[0], context_vec_ptr->b + context);
1522 	lowc = MAX(1, cvp->c - context);
1523 	upd = MIN(len[1], context_vec_ptr->d + context);
1524 
1525 	fputs("@@ -", stdout);
1526 	uni_range(lowa, upb);
1527 	fputs(" +", stdout);
1528 	uni_range(lowc, upd);
1529 	fputs(" @@", stdout);
1530 	putchar('\n');
1531 
1532 	/*
1533 	 * Output changes in "unified" diff format--the old and new lines
1534 	 * are printed together.
1535 	 */
1536 	for (; cvp <= context_vec_ptr; cvp++) {
1537 		a = cvp->a;
1538 		b = cvp->b;
1539 		c = cvp->c;
1540 		d = cvp->d;
1541 
1542 		/*
1543 		 * c: both new and old changes
1544 		 * d: only changes in the old file
1545 		 * a: only changes in the new file
1546 		 */
1547 		if (a <= b && c <= d)
1548 			ch = 'c';
1549 		else
1550 			ch = (a <= b) ? 'd' : 'a';
1551 
1552 		switch (ch) {
1553 		case 'c':
1554 			fetch(ixold, lowa, a - 1, f1, ' ', 0);
1555 			fetch(ixold, a, b, f1, '-', 0);
1556 			fetch(ixnew, c, d, f2, '+', 0);
1557 			break;
1558 		case 'd':
1559 			fetch(ixold, lowa, a - 1, f1, ' ', 0);
1560 			fetch(ixold, a, b, f1, '-', 0);
1561 			break;
1562 		case 'a':
1563 			fetch(ixnew, lowc, c - 1, f2, ' ', 0);
1564 			fetch(ixnew, c, d, f2, '+', 0);
1565 			break;
1566 		}
1567 		lowa = b + 1;
1568 		lowc = d + 1;
1569 	}
1570 	fetch(ixnew, d + 1, upd, f2, ' ', 0);
1571 
1572 	context_vec_ptr = context_vec_start - 1;
1573 }
1574