xref: /openbsd-src/usr.bin/cvs/diff.c (revision c5f4fad510dd427c0c20c0f4d164f60ce24651b6)
1 /*	$OpenBSD: diff.c,v 1.33 2005/04/25 19:09:15 jfb Exp $	*/
2 /*
3  * Copyright (C) Caldera International Inc.  2001-2002.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code and documentation must retain the above
10  *    copyright notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. All advertising materials mentioning features or use of this software
15  *    must display the following acknowledgement:
16  *	This product includes software developed or owned by Caldera
17  *	International, Inc.
18  * 4. Neither the name of Caldera International, Inc. nor the names of other
19  *    contributors may be used to endorse or promote products derived from
20  *    this software without specific prior written permission.
21  *
22  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
23  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
27  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
28  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
29  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
32  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35 /*-
36  * Copyright (c) 1991, 1993
37  *	The Regents of the University of California.  All rights reserved.
38  * Copyright (c) 2004 Jean-Francois Brousseau.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 /*
67  *	Uses an algorithm due to Harold Stone, which finds
68  *	a pair of longest identical subsequences in the two
69  *	files.
70  *
71  *	The major goal is to generate the match vector J.
72  *	J[i] is the index of the line in file1 corresponding
73  *	to line i file0. J[i] = 0 if there is no
74  *	such line in file1.
75  *
76  *	Lines are hashed so as to work in core. All potential
77  *	matches are located by sorting the lines of each file
78  *	on the hash (called ``value''). In particular, this
79  *	collects the equivalence classes in file1 together.
80  *	Subroutine equiv replaces the value of each line in
81  *	file0 by the index of the first element of its
82  *	matching equivalence in (the reordered) file1.
83  *	To save space equiv squeezes file1 into a single
84  *	array member in which the equivalence classes
85  *	are simply concatenated, except that their first
86  *	members are flagged by changing sign.
87  *
88  *	Next the indices that point into member are unsorted into
89  *	array class according to the original order of file0.
90  *
91  *	The cleverness lies in routine stone. This marches
92  *	through the lines of file0, developing a vector klist
93  *	of "k-candidates". At step i a k-candidate is a matched
94  *	pair of lines x,y (x in file0 y in file1) such that
95  *	there is a common subsequence of length k
96  *	between the first i lines of file0 and the first y
97  *	lines of file1, but there is no such subsequence for
98  *	any smaller y. x is the earliest possible mate to y
99  *	that occurs in such a subsequence.
100  *
101  *	Whenever any of the members of the equivalence class of
102  *	lines in file1 matable to a line in file0 has serial number
103  *	less than the y of some k-candidate, that k-candidate
104  *	with the smallest such y is replaced. The new
105  *	k-candidate is chained (via pred) to the current
106  *	k-1 candidate so that the actual subsequence can
107  *	be recovered. When a member has serial number greater
108  *	that the y of all k-candidates, the klist is extended.
109  *	At the end, the longest subsequence is pulled out
110  *	and placed in the array J by unravel
111  *
112  *	With J in hand, the matches there recorded are
113  *	check'ed against reality to assure that no spurious
114  *	matches have crept in due to hashing. If they have,
115  *	they are broken, and "jackpot" is recorded--a harmless
116  *	matter except that a true match for a spuriously
117  *	mated line may now be unnecessarily reported as a change.
118  *
119  *	Much of the complexity of the program comes simply
120  *	from trying to minimize core utilization and
121  *	maximize the range of doable problems by dynamically
122  *	allocating what is needed and reusing what is not.
123  *	The core requirements for problems larger than somewhat
124  *	are (in words) 2*length(file0) + length(file1) +
125  *	3*(number of k-candidates installed),  typically about
126  *	6n words for files of length n.
127  */
128 
129 #include <sys/param.h>
130 #include <sys/stat.h>
131 #include <sys/wait.h>
132 
133 #include <err.h>
134 #include <errno.h>
135 #include <ctype.h>
136 #include <stdio.h>
137 #include <fcntl.h>
138 #include <paths.h>
139 #include <regex.h>
140 #include <dirent.h>
141 #include <stdlib.h>
142 #include <stddef.h>
143 #include <unistd.h>
144 #include <string.h>
145 
146 #include "cvs.h"
147 #include "log.h"
148 #include "buf.h"
149 #include "proto.h"
150 
151 
152 #define CVS_DIFF_DEFCTX    3   /* default context length */
153 
154 
155 /*
156  * Output format options
157  */
158 #define	D_NORMAL	0	/* Normal output */
159 #define	D_CONTEXT	1	/* Diff with context */
160 #define	D_UNIFIED	2	/* Unified context diff */
161 #define	D_IFDEF		3	/* Diff with merged #ifdef's */
162 #define	D_BRIEF		4	/* Say if the files differ */
163 #define	D_RCSDIFF	5       /* Reverse editor output: RCS format */
164 
165 /*
166  * Status values for print_status() and diffreg() return values
167  */
168 #define	D_SAME		0	/* Files are the same */
169 #define	D_DIFFER	1	/* Files are different */
170 #define	D_BINARY	2	/* Binary files are different */
171 #define	D_COMMON	3	/* Subdirectory common to both dirs */
172 #define	D_ONLY		4	/* Only exists in one directory */
173 #define	D_MISMATCH1	5	/* path1 was a dir, path2 a file */
174 #define	D_MISMATCH2	6	/* path1 was a file, path2 a dir */
175 #define	D_ERROR		7	/* An error occurred */
176 #define	D_SKIPPED1	8	/* path1 was a special file */
177 #define	D_SKIPPED2	9	/* path2 was a special file */
178 
179 struct cand {
180 	int x;
181 	int y;
182 	int pred;
183 } cand;
184 
185 struct line {
186 	int serial;
187 	int value;
188 } *file[2];
189 
190 /*
191  * The following struct is used to record change information when
192  * doing a "context" or "unified" diff.  (see routine "change" to
193  * understand the highly mnemonic field names)
194  */
195 struct context_vec {
196 	int a;			/* start line in old file */
197 	int b;			/* end line in old file */
198 	int c;			/* start line in new file */
199 	int d;			/* end line in new file */
200 };
201 
202 struct diff_arg {
203 	char  *rev1;
204 	char  *rev2;
205 	char  *date1;
206 	char  *date2;
207 };
208 
209 
210 struct excludes {
211 	char *pattern;
212 	struct excludes *next;
213 };
214 
215 
216 char	*splice(char *, char *);
217 int  cvs_diff_options(char *, int, char **, int *);
218 int  cvs_diffreg(const char *, const char *);
219 int  cvs_diff_file(struct cvs_file *, void *);
220 int  cvs_diff_sendflags(struct cvsroot *);
221 int  cvs_diff_cleanup(void);
222 
223 static void output(const char *, FILE *, const char *, FILE *);
224 static void check(FILE *, FILE *);
225 static void range(int, int, char *);
226 static void uni_range(int, int);
227 static void dump_context_vec(FILE *, FILE *);
228 static void dump_unified_vec(FILE *, FILE *);
229 static int  prepare(int, FILE *, off_t);
230 static void prune(void);
231 static void equiv(struct line *, int, struct line *, int, int *);
232 static void unravel(int);
233 static void unsort(struct line *, int, int *);
234 static void change(const char *, FILE *, const char *, FILE *, int, int, int, int);
235 static void sort(struct line *, int);
236 static int  ignoreline(char *);
237 static int  asciifile(FILE *);
238 static int  fetch(long *, int, int, FILE *, int, int);
239 static int  newcand(int, int, int);
240 static int  search(int *, int, int);
241 static int  skipline(FILE *);
242 static int  isqrt(int);
243 static int  stone(int *, int, int *, int *);
244 static int  readhash(FILE *);
245 static int  files_differ(FILE *, FILE *);
246 static char *match_function(const long *, int, FILE *);
247 static char *preadline(int, size_t, off_t);
248 
249 
250 static int aflag, bflag, dflag, iflag, Nflag, pflag, tflag, Tflag, wflag;
251 static int context, status;
252 static int format = D_NORMAL;
253 static struct stat stb1, stb2;
254 static char *ifdefname, *ignore_pats, diffargs[128];
255 static const char *diff_file;
256 regex_t ignore_re;
257 
258 static int  *J;			/* will be overlaid on class */
259 static int  *class;		/* will be overlaid on file[0] */
260 static int  *klist;		/* will be overlaid on file[0] after class */
261 static int  *member;		/* will be overlaid on file[1] */
262 static int   clen;
263 static int   inifdef;		/* whether or not we are in a #ifdef block */
264 static int   len[2];
265 static int   pref, suff;	/* length of prefix and suffix */
266 static int   slen[2];
267 static int   anychange;
268 static long *ixnew;		/* will be overlaid on file[1] */
269 static long *ixold;		/* will be overlaid on klist */
270 static struct cand *clist;	/* merely a free storage pot for candidates */
271 static int   clistlen;		/* the length of clist */
272 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
273 static u_char *chrtran;		/* translation table for case-folding */
274 static struct context_vec *context_vec_start;
275 static struct context_vec *context_vec_end;
276 static struct context_vec *context_vec_ptr;
277 
278 #define FUNCTION_CONTEXT_SIZE	41
279 static char lastbuf[FUNCTION_CONTEXT_SIZE];
280 static int  lastline;
281 static int  lastmatchline;
282 
283 
284 /*
285  * chrtran points to one of 2 translation tables: cup2low if folding upper to
286  * lower case clow2low if not folding case
287  */
288 u_char clow2low[256] = {
289 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
290 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
291 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
292 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
293 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
294 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
295 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
296 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
297 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
298 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
299 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
300 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
301 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
302 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
303 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
304 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
305 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
306 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
307 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
308 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
309 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
310 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
311 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
312 	0xfd, 0xfe, 0xff
313 };
314 
315 u_char cup2low[256] = {
316 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
317 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
318 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
319 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
320 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
321 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
322 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
323 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
324 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
325 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
326 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
327 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
328 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
329 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
330 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
331 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
332 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
333 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
334 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
335 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
336 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
337 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
338 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
339 	0xfd, 0xfe, 0xff
340 };
341 
342 struct cvs_cmd_info cvs_diff = {
343 	cvs_diff_options,
344 	cvs_diff_sendflags,
345 	cvs_diff_file,
346 	cvs_diff_cleanup,
347 	NULL,
348 	CF_RECURSE | CF_IGNORE | CF_SORT | CF_KNOWN,
349 	CVS_REQ_DIFF,
350 	CVS_CMD_SENDARGS2 | CVS_CMD_ALLOWSPEC | CVS_CMD_SENDDIR
351 };
352 
353 static struct diff_arg *dap = NULL;
354 static int recurse;
355 
356 int
357 cvs_diff_options(char *opt, int argc, char **argv, int *arg)
358 {
359 	int ch;
360 
361 	dap = (struct diff_arg *)malloc(sizeof(*dap));
362 	if (dap == NULL)
363 		return (CVS_EX_DATA);
364 	dap->date1 = dap->date2 = dap->rev1 = dap->rev2 = NULL;
365 	strlcpy(diffargs, argv[0], sizeof(diffargs));
366 
367 	while ((ch = getopt(argc, argv, opt)) != -1) {
368 		switch (ch) {
369 		case 'c':
370 			strlcat(diffargs, " -c", sizeof(diffargs));
371 			format = D_CONTEXT;
372 			break;
373 		case 'D':
374 			if (dap->date1 == NULL && dap->rev1 == NULL) {
375 				dap->date1 = optarg;
376 			} else if (dap->date2 == NULL && dap->rev2 == NULL) {
377 				dap->date2 = optarg;
378 			} else {
379 				cvs_log(LP_ERR,
380 				    "no more than two revisions/dates can "
381 				    "be specified");
382 			}
383 			break;
384 		case 'l':
385 			strlcat(diffargs, " -l", sizeof(diffargs));
386 			recurse = 0;
387 			cvs_diff.file_flags &= ~CF_RECURSE;
388 			break;
389 		case 'i':
390 			strlcat(diffargs, " -i", sizeof(diffargs));
391 			iflag = 1;
392 			break;
393 		case 'N':
394 			strlcat(diffargs, " -N", sizeof(diffargs));
395 			Nflag = 1;
396 			break;
397 		case 'n':
398 			strlcat(diffargs, " -n", sizeof(diffargs));
399 			format = D_RCSDIFF;
400 			break;
401 		case 'p':
402 			strlcat(diffargs, " -p", sizeof(diffargs));
403 			pflag = 1;
404 			break;
405 		case 'r':
406 			if ((dap->rev1 == NULL) && (dap->date1 == NULL)) {
407 				dap->rev1 = optarg;
408 			} else if ((dap->rev2 == NULL) &&
409 			    (dap->date2 == NULL)) {
410 				dap->rev2 = optarg;
411 			} else {
412 				cvs_log(LP_ERR,
413 				    "no more than two revisions/dates can "
414 				    "be specified");
415 				return (CVS_EX_USAGE);
416 			}
417 			break;
418 		case 'R':
419 			cvs_diff.file_flags |= CF_RECURSE;
420 			break;
421 		case 'u':
422 			strlcat(diffargs, " -u", sizeof(diffargs));
423 			format = D_UNIFIED;
424 			break;
425 		default:
426 			return (CVS_EX_USAGE);
427 		}
428 	}
429 
430 	*arg = optind;
431 	return (0);
432 }
433 
434 int
435 cvs_diff_cleanup(void)
436 {
437 	if (dap != NULL)
438 		free(dap);
439 	return (0);
440 }
441 
442 /*
443  * cvs_diff_sendflags()
444  *
445  */
446 int
447 cvs_diff_sendflags(struct cvsroot *root)
448 {
449 	/* send the flags */
450 	if (Nflag && (cvs_sendarg(root, "-N", 0) < 0))
451 		return (CVS_EX_PROTO);
452 	if (pflag && (cvs_sendarg(root, "-p", 0) < 0))
453 		return (CVS_EX_PROTO);
454 
455 	if (format == D_CONTEXT)
456 		cvs_sendarg(root, "-c", 0);
457 	else if (format == D_UNIFIED)
458 		cvs_sendarg(root, "-u", 0);
459 
460 	if (dap->rev1 != NULL) {
461 		cvs_sendarg(root, "-r", 0);
462 		cvs_sendarg(root, dap->rev1, 0);
463 	} else if (dap->date1 != NULL) {
464 		cvs_sendarg(root, "-D", 0);
465 		cvs_sendarg(root, dap->date1, 0);
466 	}
467 	if (dap->rev2 != NULL) {
468 		cvs_sendarg(root, "-r", 0);
469 		cvs_sendarg(root, dap->rev2, 0);
470 	} else if (dap->date2 != NULL) {
471 		cvs_sendarg(root, "-D", 0);
472 		cvs_sendarg(root, dap->date2, 0);
473 	}
474 
475 	return (0);
476 }
477 
478 
479 /*
480  * cvs_diff_file()
481  *
482  * Diff a single file.
483  */
484 int
485 cvs_diff_file(struct cvs_file *cfp, void *arg)
486 {
487 	int l;
488 	char *dir, *repo, buf[64];
489 	char fpath[MAXPATHLEN], dfpath[MAXPATHLEN], rcspath[MAXPATHLEN];
490 	char path_tmp1[MAXPATHLEN], path_tmp2[MAXPATHLEN];
491 	BUF *b1, *b2;
492 	RCSNUM *r1, *r2;
493 	RCSFILE *rf;
494 	struct cvsroot *root;
495 
496 	if (cfp->cf_type == DT_DIR) {
497 		if (cfp->cf_cvstat == CVS_FST_UNKNOWN) {
498 			root = cfp->cf_parent->cf_root;
499 			cvs_sendreq(root, CVS_REQ_QUESTIONABLE,
500 			    CVS_FILE_NAME(cfp));
501 		} else {
502 			root = cfp->cf_root;
503 #if 0
504 			if ((cfp->cf_parent == NULL) ||
505 			    (root != cfp->cf_parent->cf_root)) {
506 				cvs_connect(root);
507 				cvs_diff_sendflags(root);
508 			}
509 #endif
510 
511 			cvs_senddir(root, cfp);
512 		}
513 
514 		return (0);
515 	}
516 
517 	if (cfp->cf_cvstat == CVS_FST_LOST) {
518 		cvs_log(LP_WARN, "cannot find file %s", CVS_FILE_NAME(cfp));
519 		return (0);
520 	}
521 
522 	rf = NULL;
523 	diff_file = cvs_file_getpath(cfp, fpath, sizeof(fpath));
524 
525 	if (cfp->cf_parent != NULL) {
526 		dir = cvs_file_getpath(cfp->cf_parent, dfpath, sizeof(dfpath));
527 		root = cfp->cf_parent->cf_root;
528 		repo = cfp->cf_parent->cf_repo;
529 	} else {
530 		dir = ".";
531 		root = NULL;
532 		repo = NULL;
533 	}
534 
535 	if (cfp->cf_cvstat == CVS_FST_UNKNOWN) {
536 		if (root->cr_method == CVS_METHOD_LOCAL)
537 			cvs_log(LP_WARN, "I know nothing about %s", diff_file);
538 		else
539 			cvs_sendreq(root, CVS_REQ_QUESTIONABLE,
540 			    CVS_FILE_NAME(cfp));
541 		return (0);
542 	}
543 
544 	if (root->cr_method != CVS_METHOD_LOCAL) {
545 		if (cvs_sendentry(root, cfp) < 0) {
546 			return (CVS_EX_PROTO);
547 		}
548 	}
549 
550 	if (cfp->cf_cvstat == CVS_FST_UPTODATE) {
551 		if (root->cr_method != CVS_METHOD_LOCAL)
552 			cvs_sendreq(root, CVS_REQ_UNCHANGED,
553 			    CVS_FILE_NAME(cfp));
554 		return (0);
555 	}
556 
557 	/* at this point, the file is modified */
558 	if (root->cr_method != CVS_METHOD_LOCAL) {
559 		cvs_sendreq(root, CVS_REQ_MODIFIED, CVS_FILE_NAME(cfp));
560 		cvs_sendfile(root, diff_file);
561 	} else {
562 		l = snprintf(rcspath, sizeof(rcspath), "%s/%s/%s%s",
563 		    root->cr_dir, repo, diff_file, RCS_FILE_EXT);
564 		if (l == -1 || l >= (int)sizeof(rcspath)) {
565 			errno = ENAMETOOLONG;
566 			cvs_log(LP_ERRNO, "%s", rcspath);
567 			return (-1);
568 		}
569 
570 		rf = rcs_open(rcspath, RCS_READ);
571 		if (rf == NULL) {
572 			return (CVS_EX_DATA);
573 		}
574 
575 		cvs_printf("Index: %s\n%s\nRCS file: %s\n", diff_file,
576 		    RCS_DIFF_DIV, rcspath);
577 
578 		if (dap->rev1 == NULL)
579 			r1 = cfp->cf_lrev;
580 		else {
581 			if ((r1 = rcsnum_parse(dap->rev1)) == NULL) {
582 				return (CVS_EX_DATA);
583 			}
584 		}
585 
586 		cvs_printf("retrieving revision %s\n",
587 		    rcsnum_tostr(r1, buf, sizeof(buf)));
588 		b1 = rcs_getrev(rf, r1);
589 
590 		if (r1 != cfp->cf_lrev)
591 			rcsnum_free(r1);
592 
593 		if (dap->rev2 != NULL) {
594 			cvs_printf("retrieving revision %s\n", dap->rev2);
595 			if ((r2 = rcsnum_parse(dap->rev2)) == NULL) {
596 				return (CVS_EX_DATA);
597 			}
598 			b2 = rcs_getrev(rf, r2);
599 			rcsnum_free(r2);
600 		} else {
601 			b2 = cvs_buf_load(diff_file, BUF_AUTOEXT);
602 		}
603 
604 		rcs_close(rf);
605 
606 		printf("%s", diffargs);
607 		printf(" -r%s", buf);
608 		if (dap->rev2 != NULL)
609 			printf(" -r%s", dap->rev2);
610 		printf(" %s\n", diff_file);
611 		strlcpy(path_tmp1, "/tmp/diff1.XXXXXXXXXX", sizeof(path_tmp1));
612 		if (cvs_buf_write_stmp(b1, path_tmp1, 0600) == -1) {
613 			cvs_buf_free(b1);
614 			cvs_buf_free(b2);
615 			return (CVS_EX_DATA);
616 		}
617 		cvs_buf_free(b1);
618 
619 		strlcpy(path_tmp2, "/tmp/diff2.XXXXXXXXXX", sizeof(path_tmp2));
620 		if (cvs_buf_write_stmp(b2, path_tmp2, 0600) == -1) {
621 			cvs_buf_free(b2);
622 			(void)unlink(path_tmp1);
623 			return (CVS_EX_DATA);
624 		}
625 		cvs_buf_free(b2);
626 
627 		cvs_diffreg(path_tmp1, path_tmp2);
628 		(void)unlink(path_tmp1);
629 		(void)unlink(path_tmp2);
630 	}
631 
632 	return (0);
633 }
634 
635 
636 int
637 cvs_diffreg(const char *file1, const char *file2)
638 {
639 	FILE *f1, *f2;
640 	int i, rval;
641 	void *tmp;
642 
643 	f1 = f2 = NULL;
644 	rval = D_SAME;
645 	anychange = 0;
646 	lastline = 0;
647 	lastmatchline = 0;
648 	context_vec_ptr = context_vec_start - 1;
649 	chrtran = (iflag ? cup2low : clow2low);
650 
651 	f1 = fopen(file1, "r");
652 	if (f1 == NULL) {
653 		cvs_log(LP_ERRNO, "%s", file1);
654 		status |= 2;
655 		goto closem;
656 	}
657 
658 	f2 = fopen(file2, "r");
659 	if (f2 == NULL) {
660 		cvs_log(LP_ERRNO, "%s", file2);
661 		status |= 2;
662 		goto closem;
663 	}
664 
665 	switch (files_differ(f1, f2)) {
666 	case 0:
667 		goto closem;
668 	case 1:
669 		break;
670 	default:
671 		/* error */
672 		status |= 2;
673 		goto closem;
674 	}
675 
676 	if (!asciifile(f1) || !asciifile(f2)) {
677 		rval = D_BINARY;
678 		status |= 1;
679 		goto closem;
680 	}
681 	if ((prepare(0, f1, stb1.st_size) < 0) ||
682 	    (prepare(1, f2, stb2.st_size) < 0)) {
683 		status |= 2;
684 		goto closem;
685 	}
686 	prune();
687 	sort(sfile[0], slen[0]);
688 	sort(sfile[1], slen[1]);
689 
690 	member = (int *)file[1];
691 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
692 	if ((tmp = realloc(member, (slen[1] + 2) * sizeof(int))) == NULL) {
693 		status |= 2;
694 		goto closem;
695 	}
696 	member = (int *)tmp;
697 
698 	class = (int *)file[0];
699 	unsort(sfile[0], slen[0], class);
700 	if ((tmp = realloc(class, (slen[0] + 2) * sizeof(int))) == NULL) {
701 		status |= 2;
702 		goto closem;
703 	}
704 	class = (int *)tmp;
705 
706 	if ((klist = malloc((slen[0] + 2) * sizeof(int))) == NULL) {
707 		cvs_log(LP_ERRNO, "failed to allocate klist");
708 		status |= 2;
709 		goto closem;
710 	}
711 	clen = 0;
712 	clistlen = 100;
713 	if ((clist = malloc(clistlen * sizeof(cand))) == NULL) {
714 		cvs_log(LP_ERRNO, "failed to allocate clist");
715 		status |= 2;
716 		goto closem;
717 	}
718 	i = stone(class, slen[0], member, klist);
719 	free(member);
720 	free(class);
721 
722 	J = realloc(J, (len[0] + 2) * sizeof(int));
723 	unravel(klist[i]);
724 	free(clist);
725 	free(klist);
726 
727 	ixold = realloc(ixold, (len[0] + 2) * sizeof(long));
728 	ixnew = realloc(ixnew, (len[1] + 2) * sizeof(long));
729 	check(f1, f2);
730 	output(file1, f1, file2, f2);
731 
732 closem:
733 	if (anychange) {
734 		status |= 1;
735 		if (rval == D_SAME)
736 			rval = D_DIFFER;
737 	}
738 	if (f1 != NULL)
739 		fclose(f1);
740 	if (f2 != NULL)
741 		fclose(f2);
742 
743 	return (rval);
744 }
745 
746 /*
747  * Check to see if the given files differ.
748  * Returns 0 if they are the same, 1 if different, and -1 on error.
749  * XXX - could use code from cmp(1) [faster]
750  */
751 static int
752 files_differ(FILE *f1, FILE *f2)
753 {
754 	char buf1[BUFSIZ], buf2[BUFSIZ];
755 	size_t i, j;
756 
757 	if (stb1.st_size != stb2.st_size)
758 		return (1);
759 	for (;;) {
760 		i = fread(buf1, 1, sizeof(buf1), f1);
761 		j = fread(buf2, 1, sizeof(buf2), f2);
762 		if (i != j)
763 			return (1);
764 		if (i == 0 && j == 0) {
765 			if (ferror(f1) || ferror(f2))
766 				return (1);
767 			return (0);
768 		}
769 		if (memcmp(buf1, buf2, i) != 0)
770 			return (1);
771 	}
772 }
773 
774 
775 char *
776 splice(char *dir, char *filename)
777 {
778 	char *tail, *buf;
779 
780 	if ((tail = strrchr(filename, '/')) == NULL)
781 		tail = filename;
782 	else
783 		tail++;
784 	asprintf(&buf, "%s/%s", dir, tail);
785 	return (buf);
786 }
787 
788 static int
789 prepare(int i, FILE *fd, off_t filesize)
790 {
791 	void *tmp;
792 	struct line *p;
793 	int j, h;
794 	size_t sz;
795 
796 	rewind(fd);
797 
798 	sz = ((size_t)filesize <= SIZE_MAX ? (size_t)filesize : SIZE_MAX) / 25;
799 	if (sz < 100)
800 		sz = 100;
801 
802 	p = (struct line *)malloc((sz + 3) * sizeof(struct line));
803 	if (p == NULL) {
804 		cvs_log(LP_ERRNO, "failed to prepare line array");
805 		return (-1);
806 	}
807 	for (j = 0; (h = readhash(fd));) {
808 		if (j == (int)sz) {
809 			sz = sz * 3 / 2;
810 			tmp = realloc(p, (sz + 3) * sizeof(struct line));
811 			if (tmp == NULL) {
812 				cvs_log(LP_ERRNO, "failed to grow line array");
813 				free(p);
814 				return (-1);
815 			}
816 			p = (struct line *)tmp;
817 		}
818 		p[++j].value = h;
819 	}
820 	len[i] = j;
821 	file[i] = p;
822 
823 	return (0);
824 }
825 
826 static void
827 prune(void)
828 {
829 	int i, j;
830 
831 	for (pref = 0; pref < len[0] && pref < len[1] &&
832 	    file[0][pref + 1].value == file[1][pref + 1].value;
833 	    pref++)
834 		;
835 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
836 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
837 	    suff++)
838 		;
839 	for (j = 0; j < 2; j++) {
840 		sfile[j] = file[j] + pref;
841 		slen[j] = len[j] - pref - suff;
842 		for (i = 0; i <= slen[j]; i++)
843 			sfile[j][i].serial = i;
844 	}
845 }
846 
847 static void
848 equiv(struct line *a, int n, struct line *b, int m, int *c)
849 {
850 	int i, j;
851 
852 	i = j = 1;
853 	while (i <= n && j <= m) {
854 		if (a[i].value < b[j].value)
855 			a[i++].value = 0;
856 		else if (a[i].value == b[j].value)
857 			a[i++].value = j;
858 		else
859 			j++;
860 	}
861 	while (i <= n)
862 		a[i++].value = 0;
863 	b[m + 1].value = 0;
864 	j = 0;
865 	while (++j <= m) {
866 		c[j] = -b[j].serial;
867 		while (b[j + 1].value == b[j].value) {
868 			j++;
869 			c[j] = b[j].serial;
870 		}
871 	}
872 	c[j] = -1;
873 }
874 
875 /* Code taken from ping.c */
876 static int
877 isqrt(int n)
878 {
879 	int y, x = 1;
880 
881 	if (n == 0)
882 		return (0);
883 
884 	do { /* newton was a stinker */
885 		y = x;
886 		x = n / x;
887 		x += y;
888 		x /= 2;
889 	} while ((x - y) > 1 || (x - y) < -1);
890 
891 	return (x);
892 }
893 
894 static int
895 stone(int *a, int n, int *b, int *c)
896 {
897 	int i, k, y, j, l;
898 	int oldc, tc, oldl;
899 	u_int numtries;
900 
901 	/* XXX move the isqrt() out of the macro to avoid multiple calls */
902 	const u_int bound = dflag ? UINT_MAX : MAX(256, (u_int)isqrt(n));
903 
904 	k = 0;
905 	c[0] = newcand(0, 0, 0);
906 	for (i = 1; i <= n; i++) {
907 		j = a[i];
908 		if (j == 0)
909 			continue;
910 		y = -b[j];
911 		oldl = 0;
912 		oldc = c[0];
913 		numtries = 0;
914 		do {
915 			if (y <= clist[oldc].y)
916 				continue;
917 			l = search(c, k, y);
918 			if (l != oldl + 1)
919 				oldc = c[l - 1];
920 			if (l <= k) {
921 				if (clist[c[l]].y <= y)
922 					continue;
923 				tc = c[l];
924 				c[l] = newcand(i, y, oldc);
925 				oldc = tc;
926 				oldl = l;
927 				numtries++;
928 			} else {
929 				c[l] = newcand(i, y, oldc);
930 				k++;
931 				break;
932 			}
933 		} while ((y = b[++j]) > 0 && numtries < bound);
934 	}
935 	return (k);
936 }
937 
938 static int
939 newcand(int x, int y, int pred)
940 {
941 	struct cand *q;
942 
943 	if (clen == clistlen) {
944 		clistlen = clistlen * 11 / 10;
945 		clist = realloc(clist, clistlen * sizeof(cand));
946 		if (clist == NULL) {
947 			cvs_log(LP_ERRNO, "failed to resize clist");
948 			return (-1);
949 		}
950 	}
951 	q = clist + clen;
952 	q->x = x;
953 	q->y = y;
954 	q->pred = pred;
955 	return (clen++);
956 }
957 
958 static int
959 search(int *c, int k, int y)
960 {
961 	int i, j, l, t;
962 
963 	if (clist[c[k]].y < y)	/* quick look for typical case */
964 		return (k + 1);
965 	i = 0;
966 	j = k + 1;
967 	while (1) {
968 		l = i + j;
969 		if ((l >>= 1) <= i)
970 			break;
971 		t = clist[c[l]].y;
972 		if (t > y)
973 			j = l;
974 		else if (t < y)
975 			i = l;
976 		else
977 			return (l);
978 	}
979 	return (l + 1);
980 }
981 
982 static void
983 unravel(int p)
984 {
985 	struct cand *q;
986 	int i;
987 
988 	for (i = 0; i <= len[0]; i++)
989 		J[i] = i <= pref ? i :
990 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
991 	for (q = clist + p; q->y != 0; q = clist + q->pred)
992 		J[q->x + pref] = q->y + pref;
993 }
994 
995 /*
996  * Check does double duty:
997  *  1.	ferret out any fortuitous correspondences due
998  *	to confounding by hashing (which result in "jackpot")
999  *  2.  collect random access indexes to the two files
1000  */
1001 static void
1002 check(FILE *f1, FILE *f2)
1003 {
1004 	int i, j, jackpot, c, d;
1005 	long ctold, ctnew;
1006 
1007 	rewind(f1);
1008 	rewind(f2);
1009 	j = 1;
1010 	ixold[0] = ixnew[0] = 0;
1011 	jackpot = 0;
1012 	ctold = ctnew = 0;
1013 	for (i = 1; i <= len[0]; i++) {
1014 		if (J[i] == 0) {
1015 			ixold[i] = ctold += skipline(f1);
1016 			continue;
1017 		}
1018 		while (j < J[i]) {
1019 			ixnew[j] = ctnew += skipline(f2);
1020 			j++;
1021 		}
1022 		if (bflag || wflag || iflag) {
1023 			for (;;) {
1024 				c = getc(f1);
1025 				d = getc(f2);
1026 				/*
1027 				 * GNU diff ignores a missing newline
1028 				 * in one file if bflag || wflag.
1029 				 */
1030 				if ((bflag || wflag) &&
1031 				    ((c == EOF && d == '\n') ||
1032 				    (c == '\n' && d == EOF))) {
1033 					break;
1034 				}
1035 				ctold++;
1036 				ctnew++;
1037 				if (bflag && isspace(c) && isspace(d)) {
1038 					do {
1039 						if (c == '\n')
1040 							break;
1041 						ctold++;
1042 					} while (isspace(c = getc(f1)));
1043 					do {
1044 						if (d == '\n')
1045 							break;
1046 						ctnew++;
1047 					} while (isspace(d = getc(f2)));
1048 				} else if (wflag) {
1049 					while (isspace(c) && c != '\n') {
1050 						c = getc(f1);
1051 						ctold++;
1052 					}
1053 					while (isspace(d) && d != '\n') {
1054 						d = getc(f2);
1055 						ctnew++;
1056 					}
1057 				}
1058 				if (chrtran[c] != chrtran[d]) {
1059 					jackpot++;
1060 					J[i] = 0;
1061 					if (c != '\n' && c != EOF)
1062 						ctold += skipline(f1);
1063 					if (d != '\n' && c != EOF)
1064 						ctnew += skipline(f2);
1065 					break;
1066 				}
1067 				if (c == '\n' || c == EOF)
1068 					break;
1069 			}
1070 		} else {
1071 			for (;;) {
1072 				ctold++;
1073 				ctnew++;
1074 				if ((c = getc(f1)) != (d = getc(f2))) {
1075 					/* jackpot++; */
1076 					J[i] = 0;
1077 					if (c != '\n' && c != EOF)
1078 						ctold += skipline(f1);
1079 					if (d != '\n' && c != EOF)
1080 						ctnew += skipline(f2);
1081 					break;
1082 				}
1083 				if (c == '\n' || c == EOF)
1084 					break;
1085 			}
1086 		}
1087 		ixold[i] = ctold;
1088 		ixnew[j] = ctnew;
1089 		j++;
1090 	}
1091 	for (; j <= len[1]; j++)
1092 		ixnew[j] = ctnew += skipline(f2);
1093 	/*
1094 	 * if (jackpot)
1095 	 *	fprintf(stderr, "jackpot\n");
1096 	 */
1097 }
1098 
1099 /* shellsort CACM #201 */
1100 static void
1101 sort(struct line *a, int n)
1102 {
1103 	struct line *ai, *aim, w;
1104 	int j, m = 0, k;
1105 
1106 	if (n == 0)
1107 		return;
1108 	for (j = 1; j <= n; j *= 2)
1109 		m = 2 * j - 1;
1110 	for (m /= 2; m != 0; m /= 2) {
1111 		k = n - m;
1112 		for (j = 1; j <= k; j++) {
1113 			for (ai = &a[j]; ai > a; ai -= m) {
1114 				aim = &ai[m];
1115 				if (aim < ai)
1116 					break;	/* wraparound */
1117 				if (aim->value > ai[0].value ||
1118 				    (aim->value == ai[0].value &&
1119 					aim->serial > ai[0].serial))
1120 					break;
1121 				w.value = ai[0].value;
1122 				ai[0].value = aim->value;
1123 				aim->value = w.value;
1124 				w.serial = ai[0].serial;
1125 				ai[0].serial = aim->serial;
1126 				aim->serial = w.serial;
1127 			}
1128 		}
1129 	}
1130 }
1131 
1132 static void
1133 unsort(struct line *f, int l, int *b)
1134 {
1135 	int *a, i;
1136 
1137 	if ((a = (int *)malloc((l + 1) * sizeof(int))) == NULL) {
1138 		cvs_log(LP_ERRNO, "failed to allocate sort array");
1139 		return;
1140 	}
1141 	for (i = 1; i <= l; i++)
1142 		a[f[i].serial] = f[i].value;
1143 	for (i = 1; i <= l; i++)
1144 		b[i] = a[i];
1145 	free(a);
1146 }
1147 
1148 static int
1149 skipline(FILE *f)
1150 {
1151 	int i, c;
1152 
1153 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
1154 		continue;
1155 	return (i);
1156 }
1157 
1158 static void
1159 output(const char *file1, FILE *f1, const char *file2, FILE *f2)
1160 {
1161 	int m, i0, i1, j0, j1;
1162 
1163 	rewind(f1);
1164 	rewind(f2);
1165 	m = len[0];
1166 	J[0] = 0;
1167 	J[m + 1] = len[1] + 1;
1168 	for (i0 = 1; i0 <= m; i0 = i1 + 1) {
1169 		while (i0 <= m && J[i0] == J[i0 - 1] + 1)
1170 			i0++;
1171 		j0 = J[i0 - 1] + 1;
1172 		i1 = i0 - 1;
1173 		while (i1 < m && J[i1 + 1] == 0)
1174 			i1++;
1175 		j1 = J[i1 + 1] - 1;
1176 		J[i1] = j1;
1177 		change(file1, f1, file2, f2, i0, i1, j0, j1);
1178 	}
1179 	if (m == 0)
1180 		change(file1, f1, file2, f2, 1, 0, 1, len[1]);
1181 	if (format == D_IFDEF) {
1182 		for (;;) {
1183 #define	c i0
1184 			if ((c = getc(f1)) == EOF)
1185 				return;
1186 			putchar(c);
1187 		}
1188 #undef c
1189 	}
1190 	if (anychange != 0) {
1191 		if (format == D_CONTEXT)
1192 			dump_context_vec(f1, f2);
1193 		else if (format == D_UNIFIED)
1194 			dump_unified_vec(f1, f2);
1195 	}
1196 }
1197 
1198 static __inline void
1199 range(int a, int b, char *separator)
1200 {
1201 	printf("%d", a > b ? b : a);
1202 	if (a < b)
1203 		printf("%s%d", separator, b);
1204 }
1205 
1206 static __inline void
1207 uni_range(int a, int b)
1208 {
1209 	if (a < b)
1210 		printf("%d,%d", a, b - a + 1);
1211 	else if (a == b)
1212 		printf("%d", b);
1213 	else
1214 		printf("%d,0", b);
1215 }
1216 
1217 static char *
1218 preadline(int fd, size_t rlen, off_t off)
1219 {
1220 	char *line;
1221 	ssize_t nr;
1222 
1223 	line = malloc(rlen + 1);
1224 	if (line == NULL) {
1225 		cvs_log(LP_ERRNO, "failed to allocate line");
1226 		return (NULL);
1227 	}
1228 	if ((nr = pread(fd, line, rlen, off)) < 0) {
1229 		cvs_log(LP_ERRNO, "preadline failed");
1230 		return (NULL);
1231 	}
1232 	line[nr] = '\0';
1233 	return (line);
1234 }
1235 
1236 static int
1237 ignoreline(char *line)
1238 {
1239 	int ret;
1240 
1241 	ret = regexec(&ignore_re, line, 0, NULL, 0);
1242 	free(line);
1243 	return (ret == 0);	/* if it matched, it should be ignored. */
1244 }
1245 
1246 /*
1247  * Indicate that there is a difference between lines a and b of the from file
1248  * to get to lines c to d of the to file.  If a is greater then b then there
1249  * are no lines in the from file involved and this means that there were
1250  * lines appended (beginning at b).  If c is greater than d then there are
1251  * lines missing from the to file.
1252  */
1253 static void
1254 change(const char *file1, FILE *f1, const char *file2, FILE *f2,
1255 	int a, int b, int c, int d)
1256 {
1257 	static size_t max_context = 64;
1258 	int i;
1259 
1260 	if (format != D_IFDEF && a > b && c > d)
1261 		return;
1262 	if (ignore_pats != NULL) {
1263 		char *line;
1264 		/*
1265 		 * All lines in the change, insert, or delete must
1266 		 * match an ignore pattern for the change to be
1267 		 * ignored.
1268 		 */
1269 		if (a <= b) {		/* Changes and deletes. */
1270 			for (i = a; i <= b; i++) {
1271 				line = preadline(fileno(f1),
1272 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1273 				if (!ignoreline(line))
1274 					goto proceed;
1275 			}
1276 		}
1277 		if (a > b || c <= d) {	/* Changes and inserts. */
1278 			for (i = c; i <= d; i++) {
1279 				line = preadline(fileno(f2),
1280 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1281 				if (!ignoreline(line))
1282 					goto proceed;
1283 			}
1284 		}
1285 		return;
1286 	}
1287 proceed:
1288 	if (format == D_CONTEXT || format == D_UNIFIED) {
1289 		/*
1290 		 * Allocate change records as needed.
1291 		 */
1292 		if (context_vec_ptr == context_vec_end - 1) {
1293 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1294 			max_context <<= 1;
1295 			context_vec_start = realloc(context_vec_start,
1296 			    max_context * sizeof(struct context_vec));
1297 			context_vec_end = context_vec_start + max_context;
1298 			context_vec_ptr = context_vec_start + offset;
1299 		}
1300 		if (anychange == 0) {
1301 			/*
1302 			 * Print the context/unidiff header first time through.
1303 			 */
1304 			printf("%s %s	%s",
1305 			    format == D_CONTEXT ? "***" : "---", diff_file,
1306 			    ctime(&stb1.st_mtime));
1307 			printf("%s %s	%s",
1308 			    format == D_CONTEXT ? "---" : "+++", diff_file,
1309 			    ctime(&stb2.st_mtime));
1310 			anychange = 1;
1311 		} else if (a > context_vec_ptr->b + (2 * context) + 1 &&
1312 		    c > context_vec_ptr->d + (2 * context) + 1) {
1313 			/*
1314 			 * If this change is more than 'context' lines from the
1315 			 * previous change, dump the record and reset it.
1316 			 */
1317 			if (format == D_CONTEXT)
1318 				dump_context_vec(f1, f2);
1319 			else
1320 				dump_unified_vec(f1, f2);
1321 		}
1322 		context_vec_ptr++;
1323 		context_vec_ptr->a = a;
1324 		context_vec_ptr->b = b;
1325 		context_vec_ptr->c = c;
1326 		context_vec_ptr->d = d;
1327 		return;
1328 	}
1329 	if (anychange == 0)
1330 		anychange = 1;
1331 	switch (format) {
1332 	case D_BRIEF:
1333 		return;
1334 	case D_NORMAL:
1335 		range(a, b, ",");
1336 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1337 		if (format == D_NORMAL)
1338 			range(c, d, ",");
1339 		putchar('\n');
1340 		break;
1341 	case D_RCSDIFF:
1342 		if (a > b)
1343 			printf("a%d %d\n", b, d - c + 1);
1344 		else {
1345 			printf("d%d %d\n", a, b - a + 1);
1346 
1347 			if (!(c > d))	/* add changed lines */
1348 				printf("a%d %d\n", b, d - c + 1);
1349 		}
1350 		break;
1351 	}
1352 	if (format == D_NORMAL || format == D_IFDEF) {
1353 		fetch(ixold, a, b, f1, '<', 1);
1354 		if (a <= b && c <= d && format == D_NORMAL)
1355 			puts("---");
1356 	}
1357 	i = fetch(ixnew, c, d, f2, format == D_NORMAL ? '>' : '\0', 0);
1358 	if (inifdef) {
1359 		printf("#endif /* %s */\n", ifdefname);
1360 		inifdef = 0;
1361 	}
1362 }
1363 
1364 static int
1365 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile)
1366 {
1367 	int i, j, c, lastc, col, nc;
1368 
1369 	/*
1370 	 * When doing #ifdef's, copy down to current line
1371 	 * if this is the first file, so that stuff makes it to output.
1372 	 */
1373 	if (format == D_IFDEF && oldfile) {
1374 		long curpos = ftell(lb);
1375 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1376 		nc = f[a > b ? b : a - 1] - curpos;
1377 		for (i = 0; i < nc; i++)
1378 			putchar(getc(lb));
1379 	}
1380 	if (a > b)
1381 		return (0);
1382 	if (format == D_IFDEF) {
1383 		if (inifdef) {
1384 			printf("#else /* %s%s */\n",
1385 			    oldfile == 1 ? "!" : "", ifdefname);
1386 		} else {
1387 			if (oldfile)
1388 				printf("#ifndef %s\n", ifdefname);
1389 			else
1390 				printf("#ifdef %s\n", ifdefname);
1391 		}
1392 		inifdef = 1 + oldfile;
1393 	}
1394 	for (i = a; i <= b; i++) {
1395 		fseek(lb, f[i - 1], SEEK_SET);
1396 		nc = f[i] - f[i - 1];
1397 		if (format != D_IFDEF && ch != '\0') {
1398 			putchar(ch);
1399 			if (Tflag && (format == D_NORMAL || format == D_CONTEXT
1400 			    || format == D_UNIFIED))
1401 				putchar('\t');
1402 			else if (format != D_UNIFIED)
1403 				putchar(' ');
1404 		}
1405 		col = 0;
1406 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1407 			if ((c = getc(lb)) == EOF) {
1408 				if (format == D_RCSDIFF)
1409 					warnx("No newline at end of file");
1410 				else
1411 					puts("\n\\ No newline at end of file");
1412 				return (0);
1413 			}
1414 			if (c == '\t' && tflag) {
1415 				do {
1416 					putchar(' ');
1417 				} while (++col & 7);
1418 			} else {
1419 				putchar(c);
1420 				col++;
1421 			}
1422 		}
1423 	}
1424 	return (0);
1425 }
1426 
1427 /*
1428  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1429  */
1430 static int
1431 readhash(FILE *f)
1432 {
1433 	int i, t, space;
1434 	int sum;
1435 
1436 	sum = 1;
1437 	space = 0;
1438 	if (!bflag && !wflag) {
1439 		if (iflag)
1440 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1441 				if (t == EOF) {
1442 					if (i == 0)
1443 						return (0);
1444 					break;
1445 				}
1446 				sum = sum * 127 + chrtran[t];
1447 			}
1448 		else
1449 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1450 				if (t == EOF) {
1451 					if (i == 0)
1452 						return (0);
1453 					break;
1454 				}
1455 				sum = sum * 127 + t;
1456 			}
1457 	} else {
1458 		for (i = 0;;) {
1459 			switch (t = getc(f)) {
1460 			case '\t':
1461 			case ' ':
1462 				space++;
1463 				continue;
1464 			default:
1465 				if (space && !wflag) {
1466 					i++;
1467 					space = 0;
1468 				}
1469 				sum = sum * 127 + chrtran[t];
1470 				i++;
1471 				continue;
1472 			case EOF:
1473 				if (i == 0)
1474 					return (0);
1475 				/* FALLTHROUGH */
1476 			case '\n':
1477 				break;
1478 			}
1479 			break;
1480 		}
1481 	}
1482 	/*
1483 	 * There is a remote possibility that we end up with a zero sum.
1484 	 * Zero is used as an EOF marker, so return 1 instead.
1485 	 */
1486 	return (sum == 0 ? 1 : sum);
1487 }
1488 
1489 static int
1490 asciifile(FILE *f)
1491 {
1492 	char buf[BUFSIZ];
1493 	int i, cnt;
1494 
1495 	if (aflag || f == NULL)
1496 		return (1);
1497 
1498 	rewind(f);
1499 	cnt = fread(buf, 1, sizeof(buf), f);
1500 	for (i = 0; i < cnt; i++)
1501 		if (!isprint(buf[i]) && !isspace(buf[i]))
1502 			return (0);
1503 	return (1);
1504 }
1505 
1506 static char*
1507 match_function(const long *f, int pos, FILE *fp)
1508 {
1509 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1510 	size_t nc;
1511 	int last = lastline;
1512 	char *p;
1513 
1514 	lastline = pos;
1515 	while (pos > last) {
1516 		fseek(fp, f[pos - 1], SEEK_SET);
1517 		nc = f[pos] - f[pos - 1];
1518 		if (nc >= sizeof(buf))
1519 			nc = sizeof(buf) - 1;
1520 		nc = fread(buf, 1, nc, fp);
1521 		if (nc > 0) {
1522 			buf[nc] = '\0';
1523 			p = strchr(buf, '\n');
1524 			if (p != NULL)
1525 				*p = '\0';
1526 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1527 				strlcpy(lastbuf, buf, sizeof lastbuf);
1528 				lastmatchline = pos;
1529 				return lastbuf;
1530 			}
1531 		}
1532 		pos--;
1533 	}
1534 	return (lastmatchline > 0) ? lastbuf : NULL;
1535 }
1536 
1537 
1538 /* dump accumulated "context" diff changes */
1539 static void
1540 dump_context_vec(FILE *f1, FILE *f2)
1541 {
1542 	struct context_vec *cvp = context_vec_start;
1543 	int lowa, upb, lowc, upd, do_output;
1544 	int a, b, c, d;
1545 	char ch, *f;
1546 
1547 	if (context_vec_start > context_vec_ptr)
1548 		return;
1549 
1550 	b = d = 0;		/* gcc */
1551 	lowa = MAX(1, cvp->a - context);
1552 	upb = MIN(len[0], context_vec_ptr->b + context);
1553 	lowc = MAX(1, cvp->c - context);
1554 	upd = MIN(len[1], context_vec_ptr->d + context);
1555 
1556 	printf("***************");
1557 	if (pflag) {
1558 		f = match_function(ixold, lowa - 1, f1);
1559 		if (f != NULL) {
1560 			putchar(' ');
1561 			fputs(f, stdout);
1562 		}
1563 	}
1564 	printf("\n*** ");
1565 	range(lowa, upb, ",");
1566 	printf(" ****\n");
1567 
1568 	/*
1569 	 * Output changes to the "old" file.  The first loop suppresses
1570 	 * output if there were no changes to the "old" file (we'll see
1571 	 * the "old" lines as context in the "new" list).
1572 	 */
1573 	do_output = 0;
1574 	for (; cvp <= context_vec_ptr; cvp++)
1575 		if (cvp->a <= cvp->b) {
1576 			cvp = context_vec_start;
1577 			do_output++;
1578 			break;
1579 		}
1580 	if (do_output) {
1581 		while (cvp <= context_vec_ptr) {
1582 			a = cvp->a;
1583 			b = cvp->b;
1584 			c = cvp->c;
1585 			d = cvp->d;
1586 
1587 			if (a <= b && c <= d)
1588 				ch = 'c';
1589 			else
1590 				ch = (a <= b) ? 'd' : 'a';
1591 
1592 			if (ch == 'a')
1593 				fetch(ixold, lowa, b, f1, ' ', 0);
1594 			else {
1595 				fetch(ixold, lowa, a - 1, f1, ' ', 0);
1596 				fetch(ixold, a, b, f1,
1597 				    ch == 'c' ? '!' : '-', 0);
1598 			}
1599 			lowa = b + 1;
1600 			cvp++;
1601 		}
1602 		fetch(ixold, b + 1, upb, f1, ' ', 0);
1603 	}
1604 	/* output changes to the "new" file */
1605 	printf("--- ");
1606 	range(lowc, upd, ",");
1607 	printf(" ----\n");
1608 
1609 	do_output = 0;
1610 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1611 		if (cvp->c <= cvp->d) {
1612 			cvp = context_vec_start;
1613 			do_output++;
1614 			break;
1615 		}
1616 	if (do_output) {
1617 		while (cvp <= context_vec_ptr) {
1618 			a = cvp->a;
1619 			b = cvp->b;
1620 			c = cvp->c;
1621 			d = cvp->d;
1622 
1623 			if (a <= b && c <= d)
1624 				ch = 'c';
1625 			else
1626 				ch = (a <= b) ? 'd' : 'a';
1627 
1628 			if (ch == 'd')
1629 				fetch(ixnew, lowc, d, f2, ' ', 0);
1630 			else {
1631 				fetch(ixnew, lowc, c - 1, f2, ' ', 0);
1632 				fetch(ixnew, c, d, f2,
1633 				    ch == 'c' ? '!' : '+', 0);
1634 			}
1635 			lowc = d + 1;
1636 			cvp++;
1637 		}
1638 		fetch(ixnew, d + 1, upd, f2, ' ', 0);
1639 	}
1640 	context_vec_ptr = context_vec_start - 1;
1641 }
1642 
1643 /* dump accumulated "unified" diff changes */
1644 static void
1645 dump_unified_vec(FILE *f1, FILE *f2)
1646 {
1647 	struct context_vec *cvp = context_vec_start;
1648 	int lowa, upb, lowc, upd;
1649 	int a, b, c, d;
1650 	char ch, *f;
1651 
1652 	if (context_vec_start > context_vec_ptr)
1653 		return;
1654 
1655 	b = d = 0;		/* gcc */
1656 	lowa = MAX(1, cvp->a - context);
1657 	upb = MIN(len[0], context_vec_ptr->b + context);
1658 	lowc = MAX(1, cvp->c - context);
1659 	upd = MIN(len[1], context_vec_ptr->d + context);
1660 
1661 	fputs("@@ -", stdout);
1662 	uni_range(lowa, upb);
1663 	fputs(" +", stdout);
1664 	uni_range(lowc, upd);
1665 	fputs(" @@", stdout);
1666 	if (pflag) {
1667 		f = match_function(ixold, lowa - 1, f1);
1668 		if (f != NULL) {
1669 			putchar(' ');
1670 			fputs(f, stdout);
1671 		}
1672 	}
1673 	putchar('\n');
1674 
1675 	/*
1676 	 * Output changes in "unified" diff format--the old and new lines
1677 	 * are printed together.
1678 	 */
1679 	for (; cvp <= context_vec_ptr; cvp++) {
1680 		a = cvp->a;
1681 		b = cvp->b;
1682 		c = cvp->c;
1683 		d = cvp->d;
1684 
1685 		/*
1686 		 * c: both new and old changes
1687 		 * d: only changes in the old file
1688 		 * a: only changes in the new file
1689 		 */
1690 		if (a <= b && c <= d)
1691 			ch = 'c';
1692 		else
1693 			ch = (a <= b) ? 'd' : 'a';
1694 
1695 		switch (ch) {
1696 		case 'c':
1697 			fetch(ixold, lowa, a - 1, f1, ' ', 0);
1698 			fetch(ixold, a, b, f1, '-', 0);
1699 			fetch(ixnew, c, d, f2, '+', 0);
1700 			break;
1701 		case 'd':
1702 			fetch(ixold, lowa, a - 1, f1, ' ', 0);
1703 			fetch(ixold, a, b, f1, '-', 0);
1704 			break;
1705 		case 'a':
1706 			fetch(ixnew, lowc, c - 1, f2, ' ', 0);
1707 			fetch(ixnew, c, d, f2, '+', 0);
1708 			break;
1709 		}
1710 		lowa = b + 1;
1711 		lowc = d + 1;
1712 	}
1713 	fetch(ixnew, d + 1, upd, f2, ' ', 0);
1714 
1715 	context_vec_ptr = context_vec_start - 1;
1716 }
1717