xref: /openbsd-src/usr.bin/cvs/diff.c (revision 298116df5b000b61a69743d21c92035418df8900)
1 /*	$OpenBSD: diff.c,v 1.54 2005/07/27 16:42:19 xsa Exp $	*/
2 /*
3  * Copyright (C) Caldera International Inc.  2001-2002.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code and documentation must retain the above
10  *    copyright notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. All advertising materials mentioning features or use of this software
15  *    must display the following acknowledgement:
16  *	This product includes software developed or owned by Caldera
17  *	International, Inc.
18  * 4. Neither the name of Caldera International, Inc. nor the names of other
19  *    contributors may be used to endorse or promote products derived from
20  *    this software without specific prior written permission.
21  *
22  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
23  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
27  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
28  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
29  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
31  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
32  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35 /*-
36  * Copyright (c) 1991, 1993
37  *	The Regents of the University of California.  All rights reserved.
38  * Copyright (c) 2004 Jean-Francois Brousseau.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 /*
67  *	Uses an algorithm due to Harold Stone, which finds
68  *	a pair of longest identical subsequences in the two
69  *	files.
70  *
71  *	The major goal is to generate the match vector J.
72  *	J[i] is the index of the line in file1 corresponding
73  *	to line i file0. J[i] = 0 if there is no
74  *	such line in file1.
75  *
76  *	Lines are hashed so as to work in core. All potential
77  *	matches are located by sorting the lines of each file
78  *	on the hash (called ``value''). In particular, this
79  *	collects the equivalence classes in file1 together.
80  *	Subroutine equiv replaces the value of each line in
81  *	file0 by the index of the first element of its
82  *	matching equivalence in (the reordered) file1.
83  *	To save space equiv squeezes file1 into a single
84  *	array member in which the equivalence classes
85  *	are simply concatenated, except that their first
86  *	members are flagged by changing sign.
87  *
88  *	Next the indices that point into member are unsorted into
89  *	array class according to the original order of file0.
90  *
91  *	The cleverness lies in routine stone. This marches
92  *	through the lines of file0, developing a vector klist
93  *	of "k-candidates". At step i a k-candidate is a matched
94  *	pair of lines x,y (x in file0 y in file1) such that
95  *	there is a common subsequence of length k
96  *	between the first i lines of file0 and the first y
97  *	lines of file1, but there is no such subsequence for
98  *	any smaller y. x is the earliest possible mate to y
99  *	that occurs in such a subsequence.
100  *
101  *	Whenever any of the members of the equivalence class of
102  *	lines in file1 matable to a line in file0 has serial number
103  *	less than the y of some k-candidate, that k-candidate
104  *	with the smallest such y is replaced. The new
105  *	k-candidate is chained (via pred) to the current
106  *	k-1 candidate so that the actual subsequence can
107  *	be recovered. When a member has serial number greater
108  *	that the y of all k-candidates, the klist is extended.
109  *	At the end, the longest subsequence is pulled out
110  *	and placed in the array J by unravel
111  *
112  *	With J in hand, the matches there recorded are
113  *	check'ed against reality to assure that no spurious
114  *	matches have crept in due to hashing. If they have,
115  *	they are broken, and "jackpot" is recorded--a harmless
116  *	matter except that a true match for a spuriously
117  *	mated line may now be unnecessarily reported as a change.
118  *
119  *	Much of the complexity of the program comes simply
120  *	from trying to minimize core utilization and
121  *	maximize the range of doable problems by dynamically
122  *	allocating what is needed and reusing what is not.
123  *	The core requirements for problems larger than somewhat
124  *	are (in words) 2*length(file0) + length(file1) +
125  *	3*(number of k-candidates installed),  typically about
126  *	6n words for files of length n.
127  */
128 
129 #include <sys/stat.h>
130 
131 #include <ctype.h>
132 #include <dirent.h>
133 #include <err.h>
134 #include <errno.h>
135 #include <fcntl.h>
136 #include <paths.h>
137 #include <regex.h>
138 #include <stddef.h>
139 #include <stdio.h>
140 #include <stdlib.h>
141 #include <string.h>
142 #include <unistd.h>
143 
144 #include "buf.h"
145 #include "cvs.h"
146 #include "log.h"
147 #include "proto.h"
148 
149 
150 #define CVS_DIFF_DEFCTX	3	/* default context length */
151 
152 
153 /*
154  * Output format options
155  */
156 #define	D_NORMAL	0	/* Normal output */
157 #define	D_CONTEXT	1	/* Diff with context */
158 #define	D_UNIFIED	2	/* Unified context diff */
159 #define	D_IFDEF		3	/* Diff with merged #ifdef's */
160 #define	D_BRIEF		4	/* Say if the files differ */
161 #define	D_RCSDIFF	5       /* Reverse editor output: RCS format */
162 
163 /*
164  * Status values for print_status() and diffreg() return values
165  */
166 #define	D_SAME		0	/* Files are the same */
167 #define	D_DIFFER	1	/* Files are different */
168 #define	D_BINARY	2	/* Binary files are different */
169 #define	D_COMMON	3	/* Subdirectory common to both dirs */
170 #define	D_ONLY		4	/* Only exists in one directory */
171 #define	D_MISMATCH1	5	/* path1 was a dir, path2 a file */
172 #define	D_MISMATCH2	6	/* path1 was a file, path2 a dir */
173 #define	D_ERROR		7	/* An error occurred */
174 #define	D_SKIPPED1	8	/* path1 was a special file */
175 #define	D_SKIPPED2	9	/* path2 was a special file */
176 
177 struct cand {
178 	int	x;
179 	int	y;
180 	int	pred;
181 } cand;
182 
183 struct line {
184 	int	serial;
185 	int	value;
186 } *file[2];
187 
188 /*
189  * The following struct is used to record change information when
190  * doing a "context" or "unified" diff.  (see routine "change" to
191  * understand the highly mnemonic field names)
192  */
193 struct context_vec {
194 	int	a;	/* start line in old file */
195 	int	b;	/* end line in old file */
196 	int	c;	/* start line in new file */
197 	int	d;	/* end line in new file */
198 };
199 
200 struct diff_arg {
201 	char	*rev1;
202 	char	*rev2;
203 	char	*date1;
204 	char	*date2;
205 };
206 
207 
208 static int	cvs_diff_init(struct cvs_cmd *, int, char **, int *);
209 static int	cvs_diff_remote(CVSFILE *, void *);
210 static int	cvs_diff_local(CVSFILE *, void *);
211 static int	cvs_diff_pre_exec(struct cvsroot *);
212 static int	cvs_diff_cleanup(void);
213 int		cvs_diffreg(const char *, const char *);
214 
215 static void	 output(const char *, FILE *, const char *, FILE *);
216 static void	 check(FILE *, FILE *);
217 static void	 range(int, int, char *);
218 static void	 uni_range(int, int);
219 static void	 dump_context_vec(FILE *, FILE *);
220 static void	 dump_unified_vec(FILE *, FILE *);
221 static int	 prepare(int, FILE *, off_t);
222 static void	 prune(void);
223 static void	 equiv(struct line *, int, struct line *, int, int *);
224 static void	 unravel(int);
225 static void	 unsort(struct line *, int, int *);
226 static void	 change(const char *, FILE *, const char *, FILE *, int,
227 		    int, int, int);
228 static void	 sort(struct line *, int);
229 static int	 ignoreline(char *);
230 static int	 asciifile(FILE *);
231 static int	 fetch(long *, int, int, FILE *, int, int);
232 static int	 newcand(int, int, int);
233 static int	 search(int *, int, int);
234 static int	 skipline(FILE *);
235 static int	 isqrt(int);
236 static int	 stone(int *, int, int *, int *);
237 static int	 readhash(FILE *);
238 static int	 files_differ(FILE *, FILE *);
239 static char	*match_function(const long *, int, FILE *);
240 static char	*preadline(int, size_t, off_t);
241 
242 
243 static int aflag, bflag, dflag, iflag, Nflag, pflag, tflag, Tflag, wflag;
244 static int context;
245 static int format = D_NORMAL;
246 static struct stat stb1, stb2;
247 static char *ifdefname, *ignore_pats, diffargs[128];
248 static const char *diff_file;
249 regex_t ignore_re;
250 
251 static int  *J;			/* will be overlaid on class */
252 static int  *class;		/* will be overlaid on file[0] */
253 static int  *klist;		/* will be overlaid on file[0] after class */
254 static int  *member;		/* will be overlaid on file[1] */
255 static int   clen;
256 static int   inifdef;		/* whether or not we are in a #ifdef block */
257 static int   diff_len[2];
258 static int   pref, suff;	/* length of prefix and suffix */
259 static int   slen[2];
260 static int   anychange;
261 static long *ixnew;		/* will be overlaid on file[1] */
262 static long *ixold;		/* will be overlaid on klist */
263 static struct cand *clist;	/* merely a free storage pot for candidates */
264 static int   clistlen;		/* the length of clist */
265 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
266 static u_char *chrtran;		/* translation table for case-folding */
267 static struct context_vec *context_vec_start;
268 static struct context_vec *context_vec_end;
269 static struct context_vec *context_vec_ptr;
270 
271 #define FUNCTION_CONTEXT_SIZE	41
272 static char lastbuf[FUNCTION_CONTEXT_SIZE];
273 static int  lastline;
274 static int  lastmatchline;
275 
276 
277 /*
278  * chrtran points to one of 2 translation tables: cup2low if folding upper to
279  * lower case clow2low if not folding case
280  */
281 u_char clow2low[256] = {
282 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
283 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
284 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
285 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
286 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
287 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
288 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
289 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
290 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
291 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
292 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
293 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
294 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
295 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
296 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
297 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
298 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
299 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
300 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
301 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
302 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
303 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
304 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
305 	0xfd, 0xfe, 0xff
306 };
307 
308 u_char cup2low[256] = {
309 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
310 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
311 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
312 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
313 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
314 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
315 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
316 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
317 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
318 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
319 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
320 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
321 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
322 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
323 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
324 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
325 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
326 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
327 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
328 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
329 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
330 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
331 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
332 	0xfd, 0xfe, 0xff
333 };
334 
335 
336 struct cvs_cmd cvs_cmd_diff = {
337 	CVS_OP_DIFF, CVS_REQ_DIFF, "diff",
338 	{ "di", "dif" },
339 	"Show differences between revisions",
340 	"[-cilNnpu] [[-D date] [-r rev] [-D date2 | -r rev2]] "
341 	"[-k mode] [file ...]",
342 	"cD:iklNnpr:Ru",
343 	NULL,
344 	CF_RECURSE | CF_IGNORE | CF_SORT | CF_KNOWN,
345 	cvs_diff_init,
346 	cvs_diff_pre_exec,
347 	cvs_diff_remote,
348 	cvs_diff_local,
349 	NULL,
350 	cvs_diff_cleanup,
351 	CVS_CMD_SENDARGS2 | CVS_CMD_ALLOWSPEC | CVS_CMD_SENDDIR
352 };
353 
354 
355 struct cvs_cmd cvs_cmd_rdiff = {
356 	CVS_OP_RDIFF, CVS_REQ_DIFF, "rdiff",
357 	{ "pa", "patch" },
358 	"Create 'patch' format diffs between releases",
359 	"[-flR] [-c | -u] [-s | -t] [-V ver] -D date | -r rev "
360 	"[-D date2 | -rev2] module ...",
361 	"cD:flRr:stuV:",
362 	NULL,
363 	CF_RECURSE | CF_IGNORE | CF_SORT | CF_KNOWN,
364 	cvs_diff_init,
365 	cvs_diff_pre_exec,
366 	cvs_diff_remote,
367 	cvs_diff_local,
368 	NULL,
369 	cvs_diff_cleanup,
370 	CVS_CMD_SENDARGS2 | CVS_CMD_ALLOWSPEC | CVS_CMD_SENDDIR
371 };
372 
373 static struct diff_arg *dap = NULL;
374 static int recurse;
375 
376 static int
377 cvs_diff_init(struct cvs_cmd *cmd, int argc, char **argv, int *arg)
378 {
379 	int ch;
380 
381 	dap = (struct diff_arg *)malloc(sizeof(*dap));
382 	if (dap == NULL)
383 		return (CVS_EX_DATA);
384 	dap->date1 = dap->date2 = dap->rev1 = dap->rev2 = NULL;
385 	strlcpy(diffargs, argv[0], sizeof(diffargs));
386 
387 	while ((ch = getopt(argc, argv, cmd->cmd_opts)) != -1) {
388 		switch (ch) {
389 		case 'c':
390 			strlcat(diffargs, " -c", sizeof(diffargs));
391 			format = D_CONTEXT;
392 			break;
393 		case 'D':
394 			if (dap->date1 == NULL && dap->rev1 == NULL) {
395 				dap->date1 = optarg;
396 			} else if (dap->date2 == NULL && dap->rev2 == NULL) {
397 				dap->date2 = optarg;
398 			} else {
399 				cvs_log(LP_ERR,
400 				    "no more than two revisions/dates can "
401 				    "be specified");
402 			}
403 			break;
404 		case 'l':
405 			strlcat(diffargs, " -l", sizeof(diffargs));
406 			recurse = 0;
407 			cvs_cmd_diff.file_flags &= ~CF_RECURSE;
408 			break;
409 		case 'i':
410 			strlcat(diffargs, " -i", sizeof(diffargs));
411 			iflag = 1;
412 			break;
413 		case 'N':
414 			strlcat(diffargs, " -N", sizeof(diffargs));
415 			Nflag = 1;
416 			break;
417 		case 'n':
418 			strlcat(diffargs, " -n", sizeof(diffargs));
419 			format = D_RCSDIFF;
420 			break;
421 		case 'p':
422 			strlcat(diffargs, " -p", sizeof(diffargs));
423 			pflag = 1;
424 			break;
425 		case 'r':
426 			if ((dap->rev1 == NULL) && (dap->date1 == NULL)) {
427 				dap->rev1 = optarg;
428 			} else if ((dap->rev2 == NULL) &&
429 			    (dap->date2 == NULL)) {
430 				dap->rev2 = optarg;
431 			} else {
432 				cvs_log(LP_ERR,
433 				    "no more than two revisions/dates can "
434 				    "be specified");
435 				return (CVS_EX_USAGE);
436 			}
437 			break;
438 		case 'R':
439 			cvs_cmd_diff.file_flags |= CF_RECURSE;
440 			break;
441 		case 'u':
442 			strlcat(diffargs, " -u", sizeof(diffargs));
443 			format = D_UNIFIED;
444 			break;
445 		default:
446 			return (CVS_EX_USAGE);
447 		}
448 	}
449 
450 	*arg = optind;
451 	return (0);
452 }
453 
454 int
455 cvs_diff_cleanup(void)
456 {
457 	if (dap != NULL) {
458 		free(dap);
459 		dap = NULL;
460 	}
461 	return (0);
462 }
463 
464 /*
465  * cvs_diff_pre_exec()
466  *
467  */
468 int
469 cvs_diff_pre_exec(struct cvsroot *root)
470 {
471 	if (root->cr_method != CVS_METHOD_LOCAL) {
472 		/* send the flags */
473 		if (Nflag && (cvs_sendarg(root, "-N", 0) < 0))
474 			return (CVS_EX_PROTO);
475 		if (pflag && (cvs_sendarg(root, "-p", 0) < 0))
476 			return (CVS_EX_PROTO);
477 
478 		if (format == D_CONTEXT) {
479 			if (cvs_sendarg(root, "-c", 0) < 0)
480 				return (CVS_EX_PROTO);
481 		} else if (format == D_UNIFIED) {
482 			if (cvs_sendarg(root, "-u", 0) < 0)
483 				return (CVS_EX_PROTO);
484 		}
485 
486 		if (dap->rev1 != NULL) {
487 			if ((cvs_sendarg(root, "-r", 0) < 0) ||
488 			    (cvs_sendarg(root, dap->rev1, 0) < 0))
489 				return (CVS_EX_PROTO);
490 		} else if (dap->date1 != NULL) {
491 			if ((cvs_sendarg(root, "-D", 0) < 0) ||
492 			    (cvs_sendarg(root, dap->date1, 0) < 0))
493 				return (CVS_EX_PROTO);
494 		}
495 		if (dap->rev2 != NULL) {
496 			if ((cvs_sendarg(root, "-r", 0) < 0) ||
497 			    (cvs_sendarg(root, dap->rev2, 0) < 0))
498 				return (CVS_EX_PROTO);
499 		} else if (dap->date2 != NULL) {
500 			if ((cvs_sendarg(root, "-D", 0) < 0) ||
501 			    (cvs_sendarg(root, dap->date2, 0) < 0))
502 				return  (CVS_EX_PROTO);
503 		}
504 	}
505 
506 	return (0);
507 }
508 
509 
510 /*
511  * cvs_diff_file()
512  *
513  * Diff a single file.
514  */
515 static int
516 cvs_diff_remote(struct cvs_file *cfp, void *arg)
517 {
518 	char *dir, *repo;
519 	char fpath[MAXPATHLEN], dfpath[MAXPATHLEN];
520 	struct cvsroot *root;
521 
522 	if (cfp->cf_type == DT_DIR) {
523 		if (cfp->cf_cvstat == CVS_FST_UNKNOWN) {
524 			root = cfp->cf_parent->cf_root;
525 			cvs_sendreq(root, CVS_REQ_QUESTIONABLE, cfp->cf_name);
526 		} else {
527 			root = cfp->cf_root;
528 #if 0
529 			if ((cfp->cf_parent == NULL) ||
530 			    (root != cfp->cf_parent->cf_root)) {
531 				cvs_connect(root);
532 				cvs_diff_pre_exec(root);
533 			}
534 #endif
535 
536 			cvs_senddir(root, cfp);
537 		}
538 
539 		return (0);
540 	}
541 
542 	if (cfp->cf_cvstat == CVS_FST_LOST) {
543 		cvs_log(LP_WARN, "cannot find file %s", cfp->cf_name);
544 		return (0);
545 	}
546 
547 	diff_file = cvs_file_getpath(cfp, fpath, sizeof(fpath));
548 
549 	if (cfp->cf_parent != NULL) {
550 		dir = cvs_file_getpath(cfp->cf_parent, dfpath, sizeof(dfpath));
551 		root = cfp->cf_parent->cf_root;
552 		repo = cfp->cf_parent->cf_repo;
553 	} else {
554 		dir = ".";
555 		root = NULL;
556 		repo = NULL;
557 	}
558 
559 	if (cfp->cf_cvstat == CVS_FST_UNKNOWN) {
560 		cvs_sendreq(root, CVS_REQ_QUESTIONABLE, cfp->cf_name);
561 		return (0);
562 	}
563 
564 	if (cvs_sendentry(root, cfp) < 0)
565 		return (CVS_EX_PROTO);
566 
567 	if (cfp->cf_cvstat == CVS_FST_UPTODATE) {
568 		cvs_sendreq(root, CVS_REQ_UNCHANGED, cfp->cf_name);
569 		return (0);
570 	}
571 
572 	/* at this point, the file is modified */
573 	if ((cvs_sendreq(root, CVS_REQ_MODIFIED, cfp->cf_name) < 0) ||
574 	    (cvs_sendfile(root, diff_file) < 0))
575 		return (CVS_EX_PROTO);
576 
577 	return (0);
578 }
579 
580 static int
581 cvs_diff_local(CVSFILE *cf, void *arg)
582 {
583 	int len;
584 	char *repo, buf[64];
585 	char fpath[MAXPATHLEN], rcspath[MAXPATHLEN];
586 	char path_tmp1[MAXPATHLEN], path_tmp2[MAXPATHLEN];
587 	BUF *b1, *b2;
588 	RCSNUM *r1, *r2;
589 	RCSFILE *rf;
590 	struct cvsroot *root;
591 
592 	rf = NULL;
593 	root = CVS_DIR_ROOT(cf);
594 	repo = CVS_DIR_REPO(cf);
595 	diff_file = cvs_file_getpath(cf, fpath, sizeof(fpath));
596 
597 	if (cf->cf_type == DT_DIR) {
598 		if (verbosity > 1)
599 			cvs_log(LP_NOTICE, "Diffing %s", fpath);
600 		return (0);
601 	}
602 
603 	if (cf->cf_cvstat == CVS_FST_LOST) {
604 		cvs_log(LP_WARN, "cannot find file %s", cf->cf_name);
605 		return (0);
606 	}
607 
608 	if (cf->cf_cvstat == CVS_FST_UNKNOWN) {
609 		cvs_log(LP_WARN, "I know nothing about %s", diff_file);
610 		return (0);
611 	} else if (cf->cf_cvstat == CVS_FST_UPTODATE)
612 		return (0);
613 
614 	/* at this point, the file is modified */
615 	len = snprintf(rcspath, sizeof(rcspath), "%s/%s/%s%s",
616 	    root->cr_dir, repo, diff_file, RCS_FILE_EXT);
617 	if (len == -1 || len >= (int)sizeof(rcspath)) {
618 		errno = ENAMETOOLONG;
619 		cvs_log(LP_ERRNO, "%s", rcspath);
620 		return (CVS_EX_DATA);
621 	}
622 
623 	rf = rcs_open(rcspath, RCS_READ);
624 	if (rf == NULL) {
625 		return (CVS_EX_DATA);
626 	}
627 
628 	cvs_printf("Index: %s\n%s\nRCS file: %s\n", diff_file,
629 	    RCS_DIFF_DIV, rcspath);
630 
631 	if (dap->rev1 == NULL)
632 		r1 = cf->cf_lrev;
633 	else {
634 		if ((r1 = rcsnum_parse(dap->rev1)) == NULL) {
635 			return (CVS_EX_DATA);
636 		}
637 	}
638 
639 	cvs_printf("retrieving revision %s\n",
640 	    rcsnum_tostr(r1, buf, sizeof(buf)));
641 	b1 = rcs_getrev(rf, r1);
642 
643 	if (b1 == NULL) {
644 		cvs_log(LP_ERR, "failed to retrieve revision %s\n",
645 		    rcsnum_tostr(r1, buf, sizeof(buf)));
646 		if (r1 != cf->cf_lrev)
647 			rcsnum_free(r1);
648 		return (CVS_EX_DATA);
649 	}
650 
651 	if (r1 != cf->cf_lrev)
652 		rcsnum_free(r1);
653 
654 	if (dap->rev2 != NULL) {
655 		cvs_printf("retrieving revision %s\n", dap->rev2);
656 		if ((r2 = rcsnum_parse(dap->rev2)) == NULL) {
657 			return (CVS_EX_DATA);
658 		}
659 		b2 = rcs_getrev(rf, r2);
660 		rcsnum_free(r2);
661 	} else {
662 		b2 = cvs_buf_load(diff_file, BUF_AUTOEXT);
663 	}
664 
665 	rcs_close(rf);
666 
667 	if (b2 == NULL) {
668 		cvs_log(LP_ERR, "failed to retrieve revision %s\n",
669 		    dap->rev2);
670 		cvs_buf_free(b1);
671 		return (CVS_EX_DATA);
672 	}
673 
674 	cvs_printf("%s", diffargs);
675 	cvs_printf(" -r%s", buf);
676 	if (dap->rev2 != NULL)
677 		cvs_printf(" -r%s", dap->rev2);
678 	cvs_printf(" %s\n", diff_file);
679 	strlcpy(path_tmp1, "/tmp/diff1.XXXXXXXXXX", sizeof(path_tmp1));
680 	if (cvs_buf_write_stmp(b1, path_tmp1, 0600) == -1) {
681 		cvs_buf_free(b1);
682 		cvs_buf_free(b2);
683 		return (CVS_EX_DATA);
684 	}
685 	cvs_buf_free(b1);
686 
687 	strlcpy(path_tmp2, "/tmp/diff2.XXXXXXXXXX", sizeof(path_tmp2));
688 	if (cvs_buf_write_stmp(b2, path_tmp2, 0600) == -1) {
689 		cvs_buf_free(b2);
690 		(void)unlink(path_tmp1);
691 		return (CVS_EX_DATA);
692 	}
693 	cvs_buf_free(b2);
694 
695 	cvs_diffreg(path_tmp1, path_tmp2);
696 	(void)unlink(path_tmp1);
697 	(void)unlink(path_tmp2);
698 
699 	return (0);
700 }
701 
702 
703 int
704 cvs_diffreg(const char *file1, const char *file2)
705 {
706 	FILE *f1, *f2;
707 	int i, rval;
708 	void *tmp;
709 
710 	f1 = f2 = NULL;
711 	rval = D_SAME;
712 	anychange = 0;
713 	lastline = 0;
714 	lastmatchline = 0;
715 	context_vec_ptr = context_vec_start - 1;
716 	chrtran = (iflag ? cup2low : clow2low);
717 
718 	f1 = fopen(file1, "r");
719 	if (f1 == NULL) {
720 		cvs_log(LP_ERRNO, "%s", file1);
721 		goto closem;
722 	}
723 
724 	f2 = fopen(file2, "r");
725 	if (f2 == NULL) {
726 		cvs_log(LP_ERRNO, "%s", file2);
727 		goto closem;
728 	}
729 
730 	switch (files_differ(f1, f2)) {
731 	case 0:
732 		goto closem;
733 	case 1:
734 		break;
735 	default:
736 		/* error */
737 		goto closem;
738 	}
739 
740 	if (!asciifile(f1) || !asciifile(f2)) {
741 		rval = D_BINARY;
742 		goto closem;
743 	}
744 	if ((prepare(0, f1, stb1.st_size) < 0) ||
745 	    (prepare(1, f2, stb2.st_size) < 0)) {
746 		goto closem;
747 	}
748 	prune();
749 	sort(sfile[0], slen[0]);
750 	sort(sfile[1], slen[1]);
751 
752 	member = (int *)file[1];
753 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
754 	if ((tmp = realloc(member, (slen[1] + 2) * sizeof(int))) == NULL) {
755 		free(member);
756 		member = NULL;
757 		cvs_log(LP_ERRNO, "failed to resize member");
758 		goto closem;
759 	}
760 	member = (int *)tmp;
761 
762 	class = (int *)file[0];
763 	unsort(sfile[0], slen[0], class);
764 	if ((tmp = realloc(class, (slen[0] + 2) * sizeof(int))) == NULL) {
765 		free(class);
766 		class = NULL;
767 		cvs_log(LP_ERRNO, "failed to resize class");
768 		goto closem;
769 	}
770 	class = (int *)tmp;
771 
772 	if ((klist = malloc((slen[0] + 2) * sizeof(int))) == NULL) {
773 		cvs_log(LP_ERRNO, "failed to allocate klist");
774 		goto closem;
775 	}
776 	clen = 0;
777 	clistlen = 100;
778 	if ((clist = malloc(clistlen * sizeof(cand))) == NULL) {
779 		cvs_log(LP_ERRNO, "failed to allocate clist");
780 		goto closem;
781 	}
782 
783 	if ((i = stone(class, slen[0], member, klist)) < 0)
784 		goto closem;
785 
786 	free(member);
787 	free(class);
788 
789 	if ((tmp = realloc(J, (diff_len[0] + 2) * sizeof(int))) == NULL) {
790 		free(J);
791 		J = NULL;
792 		cvs_log(LP_ERRNO, "failed to resize J");
793 		goto closem;
794 	}
795 	J = (int *)tmp;
796 	unravel(klist[i]);
797 	free(clist);
798 	free(klist);
799 
800 	if ((tmp = realloc(ixold, (diff_len[0] + 2) * sizeof(long))) == NULL) {
801 		free(ixold);
802 		ixold = NULL;
803 		cvs_log(LP_ERRNO, "failed to resize ixold");
804 		goto closem;
805 	}
806 	ixold = (long *)tmp;
807 	if ((tmp = realloc(ixnew, (diff_len[1] + 2) * sizeof(long))) == NULL) {
808 		free(ixnew);
809 		ixnew = NULL;
810 		cvs_log(LP_ERRNO, "failed to resize ixnew");
811 		goto closem;
812 	}
813 	ixnew = (long *)tmp;
814 	check(f1, f2);
815 	output(file1, f1, file2, f2);
816 
817 closem:
818 	if (anychange) {
819 		if (rval == D_SAME)
820 			rval = D_DIFFER;
821 	}
822 	if (f1 != NULL)
823 		fclose(f1);
824 	if (f2 != NULL)
825 		fclose(f2);
826 
827 	return (rval);
828 }
829 
830 /*
831  * Check to see if the given files differ.
832  * Returns 0 if they are the same, 1 if different, and -1 on error.
833  * XXX - could use code from cmp(1) [faster]
834  */
835 static int
836 files_differ(FILE *f1, FILE *f2)
837 {
838 	char buf1[BUFSIZ], buf2[BUFSIZ];
839 	size_t i, j;
840 
841 	if (stb1.st_size != stb2.st_size)
842 		return (1);
843 	for (;;) {
844 		i = fread(buf1, 1, sizeof(buf1), f1);
845 		j = fread(buf2, 1, sizeof(buf2), f2);
846 		if (i != j)
847 			return (1);
848 		if (i == 0 && j == 0) {
849 			if (ferror(f1) || ferror(f2))
850 				return (1);
851 			return (0);
852 		}
853 		if (memcmp(buf1, buf2, i) != 0)
854 			return (1);
855 	}
856 }
857 
858 static int
859 prepare(int i, FILE *fd, off_t filesize)
860 {
861 	void *tmp;
862 	struct line *p;
863 	int j, h;
864 	size_t sz;
865 
866 	rewind(fd);
867 
868 	sz = ((size_t)filesize <= SIZE_MAX ? (size_t)filesize : SIZE_MAX) / 25;
869 	if (sz < 100)
870 		sz = 100;
871 
872 	p = (struct line *)malloc((sz + 3) * sizeof(struct line));
873 	if (p == NULL) {
874 		cvs_log(LP_ERRNO, "failed to prepare line array");
875 		return (-1);
876 	}
877 	for (j = 0; (h = readhash(fd));) {
878 		if (j == (int)sz) {
879 			sz = sz * 3 / 2;
880 			tmp = realloc(p, (sz + 3) * sizeof(struct line));
881 			if (tmp == NULL) {
882 				cvs_log(LP_ERRNO, "failed to grow line array");
883 				free(p);
884 				return (-1);
885 			}
886 			p = (struct line *)tmp;
887 		}
888 		p[++j].value = h;
889 	}
890 	diff_len[i] = j;
891 	file[i] = p;
892 
893 	return (0);
894 }
895 
896 static void
897 prune(void)
898 {
899 	int i, j;
900 
901 	for (pref = 0; pref < diff_len[0] && pref < diff_len[1] &&
902 	    file[0][pref + 1].value == file[1][pref + 1].value;
903 	    pref++)
904 		;
905 	for (suff = 0;
906 	    (suff < diff_len[0] - pref) && (suff < diff_len[1] - pref) &&
907 	    (file[0][diff_len[0] - suff].value ==
908 	    file[1][diff_len[1] - suff].value);
909 	    suff++)
910 		;
911 	for (j = 0; j < 2; j++) {
912 		sfile[j] = file[j] + pref;
913 		slen[j] = diff_len[j] - pref - suff;
914 		for (i = 0; i <= slen[j]; i++)
915 			sfile[j][i].serial = i;
916 	}
917 }
918 
919 static void
920 equiv(struct line *a, int n, struct line *b, int m, int *c)
921 {
922 	int i, j;
923 
924 	i = j = 1;
925 	while (i <= n && j <= m) {
926 		if (a[i].value < b[j].value)
927 			a[i++].value = 0;
928 		else if (a[i].value == b[j].value)
929 			a[i++].value = j;
930 		else
931 			j++;
932 	}
933 	while (i <= n)
934 		a[i++].value = 0;
935 	b[m + 1].value = 0;
936 	j = 0;
937 	while (++j <= m) {
938 		c[j] = -b[j].serial;
939 		while (b[j + 1].value == b[j].value) {
940 			j++;
941 			c[j] = b[j].serial;
942 		}
943 	}
944 	c[j] = -1;
945 }
946 
947 /* Code taken from ping.c */
948 static int
949 isqrt(int n)
950 {
951 	int y, x = 1;
952 
953 	if (n == 0)
954 		return (0);
955 
956 	do { /* newton was a stinker */
957 		y = x;
958 		x = n / x;
959 		x += y;
960 		x /= 2;
961 	} while ((x - y) > 1 || (x - y) < -1);
962 
963 	return (x);
964 }
965 
966 static int
967 stone(int *a, int n, int *b, int *c)
968 {
969 	int ret;
970 	int i, k, y, j, l;
971 	int oldc, tc, oldl;
972 	u_int numtries;
973 
974 	/* XXX move the isqrt() out of the macro to avoid multiple calls */
975 	const u_int bound = dflag ? UINT_MAX : MAX(256, (u_int)isqrt(n));
976 
977 	k = 0;
978 	if ((ret = newcand(0, 0, 0)) < 0)
979 		return (-1);
980 	c[0] = ret;
981 	for (i = 1; i <= n; i++) {
982 		j = a[i];
983 		if (j == 0)
984 			continue;
985 		y = -b[j];
986 		oldl = 0;
987 		oldc = c[0];
988 		numtries = 0;
989 		do {
990 			if (y <= clist[oldc].y)
991 				continue;
992 			l = search(c, k, y);
993 			if (l != oldl + 1)
994 				oldc = c[l - 1];
995 			if (l <= k) {
996 				if (clist[c[l]].y <= y)
997 					continue;
998 				tc = c[l];
999 				if ((ret = newcand(i, y, oldc)) < 0)
1000 					return (-1);
1001 				c[l] = ret;
1002 				oldc = tc;
1003 				oldl = l;
1004 				numtries++;
1005 			} else {
1006 				if ((ret = newcand(i, y, oldc)) < 0)
1007 					return (-1);
1008 				c[l] = ret;
1009 				k++;
1010 				break;
1011 			}
1012 		} while ((y = b[++j]) > 0 && numtries < bound);
1013 	}
1014 	return (k);
1015 }
1016 
1017 static int
1018 newcand(int x, int y, int pred)
1019 {
1020 	struct cand *q, *tmp;
1021 	int newclistlen;
1022 
1023 	if (clen == clistlen) {
1024 		newclistlen = clistlen * 11 / 10;
1025 		tmp = realloc(clist, newclistlen * sizeof(cand));
1026 		if (tmp == NULL) {
1027 			cvs_log(LP_ERRNO, "failed to resize clist");
1028 			return (-1);
1029 		}
1030 		clist = tmp;
1031 		clistlen = newclistlen;
1032 	}
1033 	q = clist + clen;
1034 	q->x = x;
1035 	q->y = y;
1036 	q->pred = pred;
1037 	return (clen++);
1038 }
1039 
1040 static int
1041 search(int *c, int k, int y)
1042 {
1043 	int i, j, l, t;
1044 
1045 	if (clist[c[k]].y < y)	/* quick look for typical case */
1046 		return (k + 1);
1047 	i = 0;
1048 	j = k + 1;
1049 	while (1) {
1050 		l = i + j;
1051 		if ((l >>= 1) <= i)
1052 			break;
1053 		t = clist[c[l]].y;
1054 		if (t > y)
1055 			j = l;
1056 		else if (t < y)
1057 			i = l;
1058 		else
1059 			return (l);
1060 	}
1061 	return (l + 1);
1062 }
1063 
1064 static void
1065 unravel(int p)
1066 {
1067 	struct cand *q;
1068 	int i;
1069 
1070 	for (i = 0; i <= diff_len[0]; i++)
1071 		J[i] = i <= pref ? i :
1072 		    i > diff_len[0] - suff ? i + diff_len[1] - diff_len[0] : 0;
1073 	for (q = clist + p; q->y != 0; q = clist + q->pred)
1074 		J[q->x + pref] = q->y + pref;
1075 }
1076 
1077 /*
1078  * Check does double duty:
1079  *  1.	ferret out any fortuitous correspondences due
1080  *	to confounding by hashing (which result in "jackpot")
1081  *  2.  collect random access indexes to the two files
1082  */
1083 static void
1084 check(FILE *f1, FILE *f2)
1085 {
1086 	int i, j, jackpot, c, d;
1087 	long ctold, ctnew;
1088 
1089 	rewind(f1);
1090 	rewind(f2);
1091 	j = 1;
1092 	ixold[0] = ixnew[0] = 0;
1093 	jackpot = 0;
1094 	ctold = ctnew = 0;
1095 	for (i = 1; i <= diff_len[0]; i++) {
1096 		if (J[i] == 0) {
1097 			ixold[i] = ctold += skipline(f1);
1098 			continue;
1099 		}
1100 		while (j < J[i]) {
1101 			ixnew[j] = ctnew += skipline(f2);
1102 			j++;
1103 		}
1104 		if (bflag || wflag || iflag) {
1105 			for (;;) {
1106 				c = getc(f1);
1107 				d = getc(f2);
1108 				/*
1109 				 * GNU diff ignores a missing newline
1110 				 * in one file if bflag || wflag.
1111 				 */
1112 				if ((bflag || wflag) &&
1113 				    ((c == EOF && d == '\n') ||
1114 				    (c == '\n' && d == EOF))) {
1115 					break;
1116 				}
1117 				ctold++;
1118 				ctnew++;
1119 				if (bflag && isspace(c) && isspace(d)) {
1120 					do {
1121 						if (c == '\n')
1122 							break;
1123 						ctold++;
1124 					} while (isspace(c = getc(f1)));
1125 					do {
1126 						if (d == '\n')
1127 							break;
1128 						ctnew++;
1129 					} while (isspace(d = getc(f2)));
1130 				} else if (wflag) {
1131 					while (isspace(c) && c != '\n') {
1132 						c = getc(f1);
1133 						ctold++;
1134 					}
1135 					while (isspace(d) && d != '\n') {
1136 						d = getc(f2);
1137 						ctnew++;
1138 					}
1139 				}
1140 				if (chrtran[c] != chrtran[d]) {
1141 					jackpot++;
1142 					J[i] = 0;
1143 					if (c != '\n' && c != EOF)
1144 						ctold += skipline(f1);
1145 					if (d != '\n' && c != EOF)
1146 						ctnew += skipline(f2);
1147 					break;
1148 				}
1149 				if (c == '\n' || c == EOF)
1150 					break;
1151 			}
1152 		} else {
1153 			for (;;) {
1154 				ctold++;
1155 				ctnew++;
1156 				if ((c = getc(f1)) != (d = getc(f2))) {
1157 					/* jackpot++; */
1158 					J[i] = 0;
1159 					if (c != '\n' && c != EOF)
1160 						ctold += skipline(f1);
1161 					if (d != '\n' && c != EOF)
1162 						ctnew += skipline(f2);
1163 					break;
1164 				}
1165 				if (c == '\n' || c == EOF)
1166 					break;
1167 			}
1168 		}
1169 		ixold[i] = ctold;
1170 		ixnew[j] = ctnew;
1171 		j++;
1172 	}
1173 	for (; j <= diff_len[1]; j++)
1174 		ixnew[j] = ctnew += skipline(f2);
1175 	/*
1176 	 * if (jackpot)
1177 	 *	cvs_printf("jackpot\n");
1178 	 */
1179 }
1180 
1181 /* shellsort CACM #201 */
1182 static void
1183 sort(struct line *a, int n)
1184 {
1185 	struct line *ai, *aim, w;
1186 	int j, m = 0, k;
1187 
1188 	if (n == 0)
1189 		return;
1190 	for (j = 1; j <= n; j *= 2)
1191 		m = 2 * j - 1;
1192 	for (m /= 2; m != 0; m /= 2) {
1193 		k = n - m;
1194 		for (j = 1; j <= k; j++) {
1195 			for (ai = &a[j]; ai > a; ai -= m) {
1196 				aim = &ai[m];
1197 				if (aim < ai)
1198 					break;	/* wraparound */
1199 				if (aim->value > ai[0].value ||
1200 				    (aim->value == ai[0].value &&
1201 					aim->serial > ai[0].serial))
1202 					break;
1203 				w.value = ai[0].value;
1204 				ai[0].value = aim->value;
1205 				aim->value = w.value;
1206 				w.serial = ai[0].serial;
1207 				ai[0].serial = aim->serial;
1208 				aim->serial = w.serial;
1209 			}
1210 		}
1211 	}
1212 }
1213 
1214 static void
1215 unsort(struct line *f, int l, int *b)
1216 {
1217 	int *a, i;
1218 
1219 	if ((a = (int *)malloc((l + 1) * sizeof(int))) == NULL) {
1220 		cvs_log(LP_ERRNO, "failed to allocate sort array");
1221 		return;
1222 	}
1223 	for (i = 1; i <= l; i++)
1224 		a[f[i].serial] = f[i].value;
1225 	for (i = 1; i <= l; i++)
1226 		b[i] = a[i];
1227 	free(a);
1228 }
1229 
1230 static int
1231 skipline(FILE *f)
1232 {
1233 	int i, c;
1234 
1235 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
1236 		continue;
1237 	return (i);
1238 }
1239 
1240 static void
1241 output(const char *file1, FILE *f1, const char *file2, FILE *f2)
1242 {
1243 	int m, i0, i1, j0, j1;
1244 
1245 	rewind(f1);
1246 	rewind(f2);
1247 	m = diff_len[0];
1248 	J[0] = 0;
1249 	J[m + 1] = diff_len[1] + 1;
1250 	for (i0 = 1; i0 <= m; i0 = i1 + 1) {
1251 		while (i0 <= m && J[i0] == J[i0 - 1] + 1)
1252 			i0++;
1253 		j0 = J[i0 - 1] + 1;
1254 		i1 = i0 - 1;
1255 		while (i1 < m && J[i1 + 1] == 0)
1256 			i1++;
1257 		j1 = J[i1 + 1] - 1;
1258 		J[i1] = j1;
1259 		change(file1, f1, file2, f2, i0, i1, j0, j1);
1260 	}
1261 	if (m == 0)
1262 		change(file1, f1, file2, f2, 1, 0, 1, diff_len[1]);
1263 	if (format == D_IFDEF) {
1264 		for (;;) {
1265 #define	c i0
1266 			if ((c = getc(f1)) == EOF)
1267 				return;
1268 			cvs_putchar(c);
1269 		}
1270 #undef c
1271 	}
1272 	if (anychange != 0) {
1273 		if (format == D_CONTEXT)
1274 			dump_context_vec(f1, f2);
1275 		else if (format == D_UNIFIED)
1276 			dump_unified_vec(f1, f2);
1277 	}
1278 }
1279 
1280 static __inline void
1281 range(int a, int b, char *separator)
1282 {
1283 	cvs_printf("%d", a > b ? b : a);
1284 	if (a < b)
1285 		cvs_printf("%s%d", separator, b);
1286 }
1287 
1288 static __inline void
1289 uni_range(int a, int b)
1290 {
1291 	if (a < b)
1292 		cvs_printf("%d,%d", a, b - a + 1);
1293 	else if (a == b)
1294 		cvs_printf("%d", b);
1295 	else
1296 		cvs_printf("%d,0", b);
1297 }
1298 
1299 static char *
1300 preadline(int fd, size_t rlen, off_t off)
1301 {
1302 	char *line;
1303 	ssize_t nr;
1304 
1305 	line = malloc(rlen + 1);
1306 	if (line == NULL) {
1307 		cvs_log(LP_ERRNO, "failed to allocate line");
1308 		return (NULL);
1309 	}
1310 	if ((nr = pread(fd, line, rlen, off)) < 0) {
1311 		cvs_log(LP_ERRNO, "preadline failed");
1312 		return (NULL);
1313 	}
1314 	line[nr] = '\0';
1315 	return (line);
1316 }
1317 
1318 static int
1319 ignoreline(char *line)
1320 {
1321 	int ret;
1322 
1323 	ret = regexec(&ignore_re, line, 0, NULL, 0);
1324 	free(line);
1325 	return (ret == 0);	/* if it matched, it should be ignored. */
1326 }
1327 
1328 /*
1329  * Indicate that there is a difference between lines a and b of the from file
1330  * to get to lines c to d of the to file.  If a is greater then b then there
1331  * are no lines in the from file involved and this means that there were
1332  * lines appended (beginning at b).  If c is greater than d then there are
1333  * lines missing from the to file.
1334  */
1335 static void
1336 change(const char *file1, FILE *f1, const char *file2, FILE *f2,
1337 	int a, int b, int c, int d)
1338 {
1339 	static size_t max_context = 64;
1340 	int i;
1341 
1342 	if (format != D_IFDEF && a > b && c > d)
1343 		return;
1344 	if (ignore_pats != NULL) {
1345 		char *line;
1346 		/*
1347 		 * All lines in the change, insert, or delete must
1348 		 * match an ignore pattern for the change to be
1349 		 * ignored.
1350 		 */
1351 		if (a <= b) {		/* Changes and deletes. */
1352 			for (i = a; i <= b; i++) {
1353 				line = preadline(fileno(f1),
1354 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1355 				if (!ignoreline(line))
1356 					goto proceed;
1357 			}
1358 		}
1359 		if (a > b || c <= d) {	/* Changes and inserts. */
1360 			for (i = c; i <= d; i++) {
1361 				line = preadline(fileno(f2),
1362 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1363 				if (!ignoreline(line))
1364 					goto proceed;
1365 			}
1366 		}
1367 		return;
1368 	}
1369 proceed:
1370 	if (format == D_CONTEXT || format == D_UNIFIED) {
1371 		/*
1372 		 * Allocate change records as needed.
1373 		 */
1374 		if (context_vec_ptr == context_vec_end - 1) {
1375 			struct context_vec *tmp;
1376 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1377 			max_context <<= 1;
1378 			if ((tmp = realloc(context_vec_start,
1379 			    max_context * sizeof(struct context_vec))) == NULL) {
1380 				free(context_vec_start);
1381 				context_vec_start = NULL;
1382 				cvs_log(LP_ERRNO,
1383 				    "failed to resize context_vec_start");
1384 				return;
1385 			}
1386 			context_vec_start = tmp;
1387 			context_vec_end = context_vec_start + max_context;
1388 			context_vec_ptr = context_vec_start + offset;
1389 		}
1390 		if (anychange == 0) {
1391 			/*
1392 			 * Print the context/unidiff header first time through.
1393 			 */
1394 			cvs_printf("%s %s	%s",
1395 			    format == D_CONTEXT ? "***" : "---", diff_file,
1396 			    ctime(&stb1.st_mtime));
1397 			cvs_printf("%s %s	%s",
1398 			    format == D_CONTEXT ? "---" : "+++", diff_file,
1399 			    ctime(&stb2.st_mtime));
1400 			anychange = 1;
1401 		} else if (a > context_vec_ptr->b + (2 * context) + 1 &&
1402 		    c > context_vec_ptr->d + (2 * context) + 1) {
1403 			/*
1404 			 * If this change is more than 'context' lines from the
1405 			 * previous change, dump the record and reset it.
1406 			 */
1407 			if (format == D_CONTEXT)
1408 				dump_context_vec(f1, f2);
1409 			else
1410 				dump_unified_vec(f1, f2);
1411 		}
1412 		context_vec_ptr++;
1413 		context_vec_ptr->a = a;
1414 		context_vec_ptr->b = b;
1415 		context_vec_ptr->c = c;
1416 		context_vec_ptr->d = d;
1417 		return;
1418 	}
1419 	if (anychange == 0)
1420 		anychange = 1;
1421 	switch (format) {
1422 	case D_BRIEF:
1423 		return;
1424 	case D_NORMAL:
1425 		range(a, b, ",");
1426 		cvs_putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1427 		if (format == D_NORMAL)
1428 			range(c, d, ",");
1429 		cvs_putchar('\n');
1430 		break;
1431 	case D_RCSDIFF:
1432 		if (a > b)
1433 			cvs_printf("a%d %d\n", b, d - c + 1);
1434 		else {
1435 			cvs_printf("d%d %d\n", a, b - a + 1);
1436 
1437 			if (!(c > d))	/* add changed lines */
1438 				cvs_printf("a%d %d\n", b, d - c + 1);
1439 		}
1440 		break;
1441 	}
1442 	if (format == D_NORMAL || format == D_IFDEF) {
1443 		fetch(ixold, a, b, f1, '<', 1);
1444 		if (a <= b && c <= d && format == D_NORMAL)
1445 			puts("---");
1446 	}
1447 	i = fetch(ixnew, c, d, f2, format == D_NORMAL ? '>' : '\0', 0);
1448 	if (inifdef) {
1449 		cvs_printf("#endif /* %s */\n", ifdefname);
1450 		inifdef = 0;
1451 	}
1452 }
1453 
1454 static int
1455 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile)
1456 {
1457 	int i, j, c, lastc, col, nc;
1458 
1459 	/*
1460 	 * When doing #ifdef's, copy down to current line
1461 	 * if this is the first file, so that stuff makes it to output.
1462 	 */
1463 	if (format == D_IFDEF && oldfile) {
1464 		long curpos = ftell(lb);
1465 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1466 		nc = f[a > b ? b : a - 1] - curpos;
1467 		for (i = 0; i < nc; i++)
1468 			cvs_putchar(getc(lb));
1469 	}
1470 	if (a > b)
1471 		return (0);
1472 	if (format == D_IFDEF) {
1473 		if (inifdef) {
1474 			cvs_printf("#else /* %s%s */\n",
1475 			    oldfile == 1 ? "!" : "", ifdefname);
1476 		} else {
1477 			if (oldfile)
1478 				cvs_printf("#ifndef %s\n", ifdefname);
1479 			else
1480 				cvs_printf("#ifdef %s\n", ifdefname);
1481 		}
1482 		inifdef = 1 + oldfile;
1483 	}
1484 	for (i = a; i <= b; i++) {
1485 		fseek(lb, f[i - 1], SEEK_SET);
1486 		nc = f[i] - f[i - 1];
1487 		if (format != D_IFDEF && ch != '\0') {
1488 			cvs_putchar(ch);
1489 			if (Tflag && (format == D_NORMAL || format == D_CONTEXT
1490 			    || format == D_UNIFIED))
1491 				cvs_putchar('\t');
1492 			else if (format != D_UNIFIED)
1493 				cvs_putchar(' ');
1494 		}
1495 		col = 0;
1496 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1497 			if ((c = getc(lb)) == EOF) {
1498 				if (format == D_RCSDIFF)
1499 					warnx("No newline at end of file");
1500 				else
1501 					puts("\n\\ No newline at end of file");
1502 				return (0);
1503 			}
1504 			if (c == '\t' && tflag) {
1505 				do {
1506 					cvs_putchar(' ');
1507 				} while (++col & 7);
1508 			} else {
1509 				cvs_putchar(c);
1510 				col++;
1511 			}
1512 		}
1513 	}
1514 	return (0);
1515 }
1516 
1517 /*
1518  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1519  */
1520 static int
1521 readhash(FILE *f)
1522 {
1523 	int i, t, space;
1524 	int sum;
1525 
1526 	sum = 1;
1527 	space = 0;
1528 	if (!bflag && !wflag) {
1529 		if (iflag)
1530 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1531 				if (t == EOF) {
1532 					if (i == 0)
1533 						return (0);
1534 					break;
1535 				}
1536 				sum = sum * 127 + chrtran[t];
1537 			}
1538 		else
1539 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1540 				if (t == EOF) {
1541 					if (i == 0)
1542 						return (0);
1543 					break;
1544 				}
1545 				sum = sum * 127 + t;
1546 			}
1547 	} else {
1548 		for (i = 0;;) {
1549 			switch (t = getc(f)) {
1550 			case '\t':
1551 			case ' ':
1552 				space++;
1553 				continue;
1554 			default:
1555 				if (space && !wflag) {
1556 					i++;
1557 					space = 0;
1558 				}
1559 				sum = sum * 127 + chrtran[t];
1560 				i++;
1561 				continue;
1562 			case EOF:
1563 				if (i == 0)
1564 					return (0);
1565 				/* FALLTHROUGH */
1566 			case '\n':
1567 				break;
1568 			}
1569 			break;
1570 		}
1571 	}
1572 	/*
1573 	 * There is a remote possibility that we end up with a zero sum.
1574 	 * Zero is used as an EOF marker, so return 1 instead.
1575 	 */
1576 	return (sum == 0 ? 1 : sum);
1577 }
1578 
1579 static int
1580 asciifile(FILE *f)
1581 {
1582 	char buf[BUFSIZ];
1583 	int i, cnt;
1584 
1585 	if (aflag || f == NULL)
1586 		return (1);
1587 
1588 	rewind(f);
1589 	cnt = fread(buf, 1, sizeof(buf), f);
1590 	for (i = 0; i < cnt; i++)
1591 		if (!isprint(buf[i]) && !isspace(buf[i]))
1592 			return (0);
1593 	return (1);
1594 }
1595 
1596 static char*
1597 match_function(const long *f, int pos, FILE *fp)
1598 {
1599 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1600 	size_t nc;
1601 	int last = lastline;
1602 	char *p;
1603 
1604 	lastline = pos;
1605 	while (pos > last) {
1606 		fseek(fp, f[pos - 1], SEEK_SET);
1607 		nc = f[pos] - f[pos - 1];
1608 		if (nc >= sizeof(buf))
1609 			nc = sizeof(buf) - 1;
1610 		nc = fread(buf, 1, nc, fp);
1611 		if (nc > 0) {
1612 			buf[nc] = '\0';
1613 			p = strchr((const char *)buf, '\n');
1614 			if (p != NULL)
1615 				*p = '\0';
1616 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1617 				strlcpy(lastbuf, (const char *)buf, sizeof lastbuf);
1618 				lastmatchline = pos;
1619 				return lastbuf;
1620 			}
1621 		}
1622 		pos--;
1623 	}
1624 	return (lastmatchline > 0) ? lastbuf : NULL;
1625 }
1626 
1627 
1628 /* dump accumulated "context" diff changes */
1629 static void
1630 dump_context_vec(FILE *f1, FILE *f2)
1631 {
1632 	struct context_vec *cvp = context_vec_start;
1633 	int lowa, upb, lowc, upd, do_output;
1634 	int a, b, c, d;
1635 	char ch, *f;
1636 
1637 	if (context_vec_start > context_vec_ptr)
1638 		return;
1639 
1640 	b = d = 0;		/* gcc */
1641 	lowa = MAX(1, cvp->a - context);
1642 	upb = MIN(diff_len[0], context_vec_ptr->b + context);
1643 	lowc = MAX(1, cvp->c - context);
1644 	upd = MIN(diff_len[1], context_vec_ptr->d + context);
1645 
1646 	cvs_printf("***************");
1647 	if (pflag) {
1648 		f = match_function(ixold, lowa - 1, f1);
1649 		if (f != NULL) {
1650 			cvs_putchar(' ');
1651 			cvs_printf("%s", f);
1652 		}
1653 	}
1654 	cvs_printf("\n*** ");
1655 	range(lowa, upb, ",");
1656 	cvs_printf(" ****\n");
1657 
1658 	/*
1659 	 * Output changes to the "old" file.  The first loop suppresses
1660 	 * output if there were no changes to the "old" file (we'll see
1661 	 * the "old" lines as context in the "new" list).
1662 	 */
1663 	do_output = 0;
1664 	for (; cvp <= context_vec_ptr; cvp++)
1665 		if (cvp->a <= cvp->b) {
1666 			cvp = context_vec_start;
1667 			do_output++;
1668 			break;
1669 		}
1670 	if (do_output) {
1671 		while (cvp <= context_vec_ptr) {
1672 			a = cvp->a;
1673 			b = cvp->b;
1674 			c = cvp->c;
1675 			d = cvp->d;
1676 
1677 			if (a <= b && c <= d)
1678 				ch = 'c';
1679 			else
1680 				ch = (a <= b) ? 'd' : 'a';
1681 
1682 			if (ch == 'a')
1683 				fetch(ixold, lowa, b, f1, ' ', 0);
1684 			else {
1685 				fetch(ixold, lowa, a - 1, f1, ' ', 0);
1686 				fetch(ixold, a, b, f1,
1687 				    ch == 'c' ? '!' : '-', 0);
1688 			}
1689 			lowa = b + 1;
1690 			cvp++;
1691 		}
1692 		fetch(ixold, b + 1, upb, f1, ' ', 0);
1693 	}
1694 	/* output changes to the "new" file */
1695 	cvs_printf("--- ");
1696 	range(lowc, upd, ",");
1697 	cvs_printf(" ----\n");
1698 
1699 	do_output = 0;
1700 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1701 		if (cvp->c <= cvp->d) {
1702 			cvp = context_vec_start;
1703 			do_output++;
1704 			break;
1705 		}
1706 	if (do_output) {
1707 		while (cvp <= context_vec_ptr) {
1708 			a = cvp->a;
1709 			b = cvp->b;
1710 			c = cvp->c;
1711 			d = cvp->d;
1712 
1713 			if (a <= b && c <= d)
1714 				ch = 'c';
1715 			else
1716 				ch = (a <= b) ? 'd' : 'a';
1717 
1718 			if (ch == 'd')
1719 				fetch(ixnew, lowc, d, f2, ' ', 0);
1720 			else {
1721 				fetch(ixnew, lowc, c - 1, f2, ' ', 0);
1722 				fetch(ixnew, c, d, f2,
1723 				    ch == 'c' ? '!' : '+', 0);
1724 			}
1725 			lowc = d + 1;
1726 			cvp++;
1727 		}
1728 		fetch(ixnew, d + 1, upd, f2, ' ', 0);
1729 	}
1730 	context_vec_ptr = context_vec_start - 1;
1731 }
1732 
1733 /* dump accumulated "unified" diff changes */
1734 static void
1735 dump_unified_vec(FILE *f1, FILE *f2)
1736 {
1737 	struct context_vec *cvp = context_vec_start;
1738 	int lowa, upb, lowc, upd;
1739 	int a, b, c, d;
1740 	char ch, *f;
1741 
1742 	if (context_vec_start > context_vec_ptr)
1743 		return;
1744 
1745 	b = d = 0;		/* gcc */
1746 	lowa = MAX(1, cvp->a - context);
1747 	upb = MIN(diff_len[0], context_vec_ptr->b + context);
1748 	lowc = MAX(1, cvp->c - context);
1749 	upd = MIN(diff_len[1], context_vec_ptr->d + context);
1750 
1751 	cvs_printf("@@ -");
1752 	uni_range(lowa, upb);
1753 	cvs_printf(" +");
1754 	uni_range(lowc, upd);
1755 	cvs_printf(" @@");
1756 	if (pflag) {
1757 		f = match_function(ixold, lowa - 1, f1);
1758 		if (f != NULL) {
1759 			cvs_putchar(' ');
1760 			cvs_printf("%s", f);
1761 		}
1762 	}
1763 	cvs_putchar('\n');
1764 
1765 	/*
1766 	 * Output changes in "unified" diff format--the old and new lines
1767 	 * are printed together.
1768 	 */
1769 	for (; cvp <= context_vec_ptr; cvp++) {
1770 		a = cvp->a;
1771 		b = cvp->b;
1772 		c = cvp->c;
1773 		d = cvp->d;
1774 
1775 		/*
1776 		 * c: both new and old changes
1777 		 * d: only changes in the old file
1778 		 * a: only changes in the new file
1779 		 */
1780 		if (a <= b && c <= d)
1781 			ch = 'c';
1782 		else
1783 			ch = (a <= b) ? 'd' : 'a';
1784 
1785 		switch (ch) {
1786 		case 'c':
1787 			fetch(ixold, lowa, a - 1, f1, ' ', 0);
1788 			fetch(ixold, a, b, f1, '-', 0);
1789 			fetch(ixnew, c, d, f2, '+', 0);
1790 			break;
1791 		case 'd':
1792 			fetch(ixold, lowa, a - 1, f1, ' ', 0);
1793 			fetch(ixold, a, b, f1, '-', 0);
1794 			break;
1795 		case 'a':
1796 			fetch(ixnew, lowc, c - 1, f2, ' ', 0);
1797 			fetch(ixnew, c, d, f2, '+', 0);
1798 			break;
1799 		}
1800 		lowa = b + 1;
1801 		lowc = d + 1;
1802 	}
1803 	fetch(ixnew, d + 1, upd, f2, ' ', 0);
1804 
1805 	context_vec_ptr = context_vec_start - 1;
1806 }
1807