xref: /dflybsd-src/usr.bin/diff/diffreg.c (revision c9733229451fac5faa53b1a016b01866eae75a1c)
1 /*	$OpenBSD: diffreg.c,v 1.95 2021/10/24 21:24:16 deraadt Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/stat.h>
68 #include <sys/wait.h>
69 
70 #include <ctype.h>
71 #include <err.h>
72 #include <errno.h>
73 #include <fcntl.h>
74 #include <paths.h>
75 #include <stddef.h>
76 #include <stdint.h>
77 #include <stdio.h>
78 #include <stdlib.h>
79 #include <string.h>
80 #include <unistd.h>
81 #include <limits.h>
82 
83 #include "diff.h"
84 #include "xmalloc.h"
85 
86 #define MINIMUM(a, b)	(((a) < (b)) ? (a) : (b))
87 #define MAXIMUM(a, b)	(((a) > (b)) ? (a) : (b))
88 
89 /*
90  * diff - compare two files.
91  */
92 
93 /*
94  *	Uses an algorithm due to Harold Stone, which finds
95  *	a pair of longest identical subsequences in the two
96  *	files.
97  *
98  *	The major goal is to generate the match vector J.
99  *	J[i] is the index of the line in file1 corresponding
100  *	to line i file0. J[i] = 0 if there is no
101  *	such line in file1.
102  *
103  *	Lines are hashed so as to work in core. All potential
104  *	matches are located by sorting the lines of each file
105  *	on the hash (called ``value''). In particular, this
106  *	collects the equivalence classes in file1 together.
107  *	Subroutine equiv replaces the value of each line in
108  *	file0 by the index of the first element of its
109  *	matching equivalence in (the reordered) file1.
110  *	To save space equiv squeezes file1 into a single
111  *	array member in which the equivalence classes
112  *	are simply concatenated, except that their first
113  *	members are flagged by changing sign.
114  *
115  *	Next the indices that point into member are unsorted into
116  *	array class according to the original order of file0.
117  *
118  *	The cleverness lies in routine stone. This marches
119  *	through the lines of file0, developing a vector klist
120  *	of "k-candidates". At step i a k-candidate is a matched
121  *	pair of lines x,y (x in file0 y in file1) such that
122  *	there is a common subsequence of length k
123  *	between the first i lines of file0 and the first y
124  *	lines of file1, but there is no such subsequence for
125  *	any smaller y. x is the earliest possible mate to y
126  *	that occurs in such a subsequence.
127  *
128  *	Whenever any of the members of the equivalence class of
129  *	lines in file1 matable to a line in file0 has serial number
130  *	less than the y of some k-candidate, that k-candidate
131  *	with the smallest such y is replaced. The new
132  *	k-candidate is chained (via pred) to the current
133  *	k-1 candidate so that the actual subsequence can
134  *	be recovered. When a member has serial number greater
135  *	that the y of all k-candidates, the klist is extended.
136  *	At the end, the longest subsequence is pulled out
137  *	and placed in the array J by unravel
138  *
139  *	With J in hand, the matches there recorded are
140  *	check'ed against reality to assure that no spurious
141  *	matches have crept in due to hashing. If they have,
142  *	they are broken, and "jackpot" is recorded--a harmless
143  *	matter except that a true match for a spuriously
144  *	mated line may now be unnecessarily reported as a change.
145  *
146  *	Much of the complexity of the program comes simply
147  *	from trying to minimize core utilization and
148  *	maximize the range of doable problems by dynamically
149  *	allocating what is needed and reusing what is not.
150  *	The core requirements for problems larger than somewhat
151  *	are (in words) 2*length(file0) + length(file1) +
152  *	3*(number of k-candidates installed),  typically about
153  *	6n words for files of length n.
154  */
155 
156 struct cand {
157 	int	x;
158 	int	y;
159 	int	pred;
160 };
161 
162 struct line {
163 	int	serial;
164 	int	value;
165 } *file[2];
166 
167 /*
168  * The following struct is used to record change information when
169  * doing a "context" or "unified" diff.  (see routine "change" to
170  * understand the highly mnemonic field names)
171  */
172 struct context_vec {
173 	int	a;		/* start line in old file */
174 	int	b;		/* end line in old file */
175 	int	c;		/* start line in new file */
176 	int	d;		/* end line in new file */
177 };
178 
179 #define	diff_output	printf
180 static FILE	*opentemp(const char *);
181 static void	 output(char *, FILE *, char *, FILE *, int);
182 static void	 check(FILE *, FILE *, int);
183 static void	 range(int, int, const char *);
184 static void	 uni_range(int, int);
185 static void	 dump_context_vec(FILE *, FILE *, int);
186 static void	 dump_unified_vec(FILE *, FILE *, int);
187 static void	 prepare(int, FILE *, size_t, int);
188 static void	 prune(void);
189 static void	 equiv(struct line *, int, struct line *, int, int *);
190 static void	 unravel(int);
191 static void	 unsort(struct line *, int, int *);
192 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
193 static void	 sort(struct line *, int);
194 static void	 print_header(const char *, const char *);
195 static int	 ignoreline(char *);
196 static int	 asciifile(FILE *);
197 static int	 fetch(long *, int, int, FILE *, int, int, int);
198 static int	 newcand(int, int, int);
199 static int	 search(int *, int, int);
200 static int	 skipline(FILE *);
201 static int	 isqrt(int);
202 static int	 stone(int *, int, int *, int *, int);
203 static int	 readhash(FILE *, int);
204 static int	 files_differ(FILE *, FILE *, int);
205 static char	*match_function(const long *, int, FILE *);
206 static char	*preadline(int, size_t, off_t);
207 
208 static int  *J;			/* will be overlaid on class */
209 static int  *class;		/* will be overlaid on file[0] */
210 static int  *klist;		/* will be overlaid on file[0] after class */
211 static int  *member;		/* will be overlaid on file[1] */
212 static int   clen;
213 static int   inifdef;		/* whether or not we are in a #ifdef block */
214 static int   len[2];
215 static int   pref, suff;	/* length of prefix and suffix */
216 static int   slen[2];
217 static int   anychange;
218 static long *ixnew;		/* will be overlaid on file[1] */
219 static long *ixold;		/* will be overlaid on klist */
220 static struct cand *clist;	/* merely a free storage pot for candidates */
221 static int   clistlen;		/* the length of clist */
222 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
223 static u_char *chrtran;		/* translation table for case-folding */
224 static struct context_vec *context_vec_start;
225 static struct context_vec *context_vec_end;
226 static struct context_vec *context_vec_ptr;
227 
228 #define FUNCTION_CONTEXT_SIZE	55
229 static char lastbuf[FUNCTION_CONTEXT_SIZE];
230 static int lastline;
231 static int lastmatchline;
232 
233 
234 /*
235  * chrtran points to one of 2 translation tables: cup2low if folding upper to
236  * lower case clow2low if not folding case
237  */
238 u_char clow2low[256] = {
239 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
240 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
241 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
242 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
243 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
244 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
245 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
246 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
247 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
248 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
249 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
250 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
251 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
252 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
253 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
254 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
255 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
256 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
257 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
258 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
259 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
260 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
261 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
262 	0xfd, 0xfe, 0xff
263 };
264 
265 u_char cup2low[256] = {
266 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
267 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
268 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
269 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
270 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
271 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
272 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
273 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
274 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
275 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
276 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
277 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
278 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
279 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
280 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
281 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
282 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
283 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
284 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
285 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
286 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
287 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
288 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
289 	0xfd, 0xfe, 0xff
290 };
291 
292 int
diffreg(char * file1,char * file2,int flags)293 diffreg(char *file1, char *file2, int flags)
294 {
295 	FILE *f1, *f2;
296 	int i, rval;
297 
298 	f1 = f2 = NULL;
299 	rval = D_SAME;
300 	anychange = 0;
301 	lastline = 0;
302 	lastmatchline = 0;
303 	context_vec_ptr = context_vec_start - 1;
304 	if (flags & D_IGNORECASE)
305 		chrtran = cup2low;
306 	else
307 		chrtran = clow2low;
308 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
309 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
310 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
311 		goto closem;
312 
313 	if (flags & D_EMPTY1)
314 		f1 = fopen(_PATH_DEVNULL, "r");
315 	else {
316 		if (!S_ISREG(stb1.st_mode)) {
317 			if ((f1 = opentemp(file1)) == NULL ||
318 			    fstat(fileno(f1), &stb1) == -1) {
319 				warn("%s", file1);
320 				status |= 2;
321 				goto closem;
322 			}
323 		} else if (strcmp(file1, "-") == 0)
324 			f1 = stdin;
325 		else
326 			f1 = fopen(file1, "r");
327 	}
328 	if (f1 == NULL) {
329 		warn("%s", file1);
330 		status |= 2;
331 		goto closem;
332 	}
333 
334 	if (flags & D_EMPTY2)
335 		f2 = fopen(_PATH_DEVNULL, "r");
336 	else {
337 		if (!S_ISREG(stb2.st_mode)) {
338 			if ((f2 = opentemp(file2)) == NULL ||
339 			    fstat(fileno(f2), &stb2) == -1) {
340 				warn("%s", file2);
341 				status |= 2;
342 				goto closem;
343 			}
344 		} else if (strcmp(file2, "-") == 0)
345 			f2 = stdin;
346 		else
347 			f2 = fopen(file2, "r");
348 	}
349 	if (f2 == NULL) {
350 		warn("%s", file2);
351 		status |= 2;
352 		goto closem;
353 	}
354 
355 	switch (files_differ(f1, f2, flags)) {
356 	case 0:
357 		goto closem;
358 	case 1:
359 		break;
360 	default:
361 		/* error */
362 		status |= 2;
363 		goto closem;
364 	}
365 
366 	if ((flags & D_FORCEASCII) == 0 &&
367 	    (!asciifile(f1) || !asciifile(f2))) {
368 		rval = D_BINARY;
369 		status |= 1;
370 		goto closem;
371 	}
372 	prepare(0, f1, stb1.st_size, flags);
373 	prepare(1, f2, stb2.st_size, flags);
374 
375 	prune();
376 	sort(sfile[0], slen[0]);
377 	sort(sfile[1], slen[1]);
378 
379 	member = (int *)file[1];
380 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
381 	member = xreallocarray(member, slen[1] + 2, sizeof(*member));
382 
383 	class = (int *)file[0];
384 	unsort(sfile[0], slen[0], class);
385 	class = xreallocarray(class, slen[0] + 2, sizeof(*class));
386 
387 	klist = xcalloc(slen[0] + 2, sizeof(*klist));
388 	clen = 0;
389 	clistlen = 100;
390 	clist = xcalloc(clistlen, sizeof(*clist));
391 	i = stone(class, slen[0], member, klist, flags);
392 	free(member);
393 	free(class);
394 
395 	J = xreallocarray(J, len[0] + 2, sizeof(*J));
396 	unravel(klist[i]);
397 	free(clist);
398 	free(klist);
399 
400 	ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold));
401 	ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew));
402 	check(f1, f2, flags);
403 	output(file1, f1, file2, f2, flags);
404 closem:
405 	if (anychange) {
406 		status |= 1;
407 		if (rval == D_SAME)
408 			rval = D_DIFFER;
409 	}
410 	if (f1 != NULL)
411 		fclose(f1);
412 	if (f2 != NULL)
413 		fclose(f2);
414 
415 	return (rval);
416 }
417 
418 /*
419  * Check to see if the given files differ.
420  * Returns 0 if they are the same, 1 if different, and -1 on error.
421  * XXX - could use code from cmp(1) [faster]
422  */
423 static int
files_differ(FILE * f1,FILE * f2,int flags)424 files_differ(FILE *f1, FILE *f2, int flags)
425 {
426 	char buf1[BUFSIZ], buf2[BUFSIZ];
427 	size_t i, j;
428 
429 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
430 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
431 		return (1);
432 
433 	if (stb1.st_dev == stb2.st_dev && stb1.st_ino == stb2.st_ino)
434 		return (0);
435 
436 	for (;;) {
437 		i = fread(buf1, 1, sizeof(buf1), f1);
438 		j = fread(buf2, 1, sizeof(buf2), f2);
439 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
440 			return (-1);
441 		if (i != j)
442 			return (1);
443 		if (i == 0)
444 			return (0);
445 		if (memcmp(buf1, buf2, i) != 0)
446 			return (1);
447 	}
448 }
449 
450 static FILE *
opentemp(const char * tmp_file)451 opentemp(const char *tmp_file)
452 {
453 	char buf[BUFSIZ], tempfile[PATH_MAX];
454 	ssize_t nread;
455 	int ifd, ofd;
456 
457 	if (strcmp(tmp_file, "-") == 0)
458 		ifd = STDIN_FILENO;
459 	else if ((ifd = open(tmp_file, O_RDONLY)) == -1)
460 		return (NULL);
461 
462 	(void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
463 
464 	if ((ofd = mkstemp(tempfile)) == -1) {
465 		close(ifd);
466 		return (NULL);
467 	}
468 	unlink(tempfile);
469 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
470 		if (write(ofd, buf, nread) != nread) {
471 			close(ifd);
472 			close(ofd);
473 			return (NULL);
474 		}
475 	}
476 	close(ifd);
477 	lseek(ofd, (off_t)0, SEEK_SET);
478 	return (fdopen(ofd, "r"));
479 }
480 
481 char *
splice(char * dir,char * splice_file)482 splice(char *dir, char *splice_file)
483 {
484 	char *tail, *buf;
485 	size_t dirlen;
486 
487 	dirlen = strlen(dir);
488 	while (dirlen != 0 && dir[dirlen - 1] == '/')
489 	    dirlen--;
490 	if ((tail = strrchr(splice_file, '/')) == NULL)
491 		tail = splice_file;
492 	else
493 		tail++;
494 	xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
495 	return (buf);
496 }
497 
498 static void
prepare(int i,FILE * fd,size_t filesize,int flags)499 prepare(int i, FILE *fd, size_t filesize, int flags)
500 {
501 	struct line *p;
502 	int h;
503 	size_t sz, j;
504 
505 	rewind(fd);
506 
507 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
508 	if (sz < 100)
509 		sz = 100;
510 
511 	p = xcalloc(sz + 3, sizeof(*p));
512 	for (j = 0; (h = readhash(fd, flags));) {
513 		if (j == sz) {
514 			sz = sz * 3 / 2;
515 			p = xreallocarray(p, sz + 3, sizeof(*p));
516 		}
517 		p[++j].value = h;
518 	}
519 	len[i] = j;
520 	file[i] = p;
521 }
522 
523 static void
prune(void)524 prune(void)
525 {
526 	int i, j;
527 
528 	for (pref = 0; pref < len[0] && pref < len[1] &&
529 	    file[0][pref + 1].value == file[1][pref + 1].value;
530 	    pref++)
531 		;
532 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
533 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
534 	    suff++)
535 		;
536 	for (j = 0; j < 2; j++) {
537 		sfile[j] = file[j] + pref;
538 		slen[j] = len[j] - pref - suff;
539 		for (i = 0; i <= slen[j]; i++)
540 			sfile[j][i].serial = i;
541 	}
542 }
543 
544 static void
equiv(struct line * a,int n,struct line * b,int m,int * c)545 equiv(struct line *a, int n, struct line *b, int m, int *c)
546 {
547 	int i, j;
548 
549 	i = j = 1;
550 	while (i <= n && j <= m) {
551 		if (a[i].value < b[j].value)
552 			a[i++].value = 0;
553 		else if (a[i].value == b[j].value)
554 			a[i++].value = j;
555 		else
556 			j++;
557 	}
558 	while (i <= n)
559 		a[i++].value = 0;
560 	b[m + 1].value = 0;
561 	j = 0;
562 	while (++j <= m) {
563 		c[j] = -b[j].serial;
564 		while (b[j + 1].value == b[j].value) {
565 			j++;
566 			c[j] = b[j].serial;
567 		}
568 	}
569 	c[j] = -1;
570 }
571 
572 /* Code taken from ping.c */
573 static int
isqrt(int n)574 isqrt(int n)
575 {
576 	int y, x = 1;
577 
578 	if (n == 0)
579 		return (0);
580 
581 	do { /* newton was a stinker */
582 		y = x;
583 		x = n / x;
584 		x += y;
585 		x /= 2;
586 	} while ((x - y) > 1 || (x - y) < -1);
587 
588 	return (x);
589 }
590 
591 static int
stone(int * a,int n,int * b,int * c,int flags)592 stone(int *a, int n, int *b, int *c, int flags)
593 {
594 	int i, k, y, j, l;
595 	int oldc, tc, oldl, sq;
596 	u_int numtries, bound;
597 
598 	if (flags & D_MINIMAL)
599 		bound = UINT_MAX;
600 	else {
601 		sq = isqrt(n);
602 		bound = MAXIMUM(256, sq);
603 	}
604 
605 	k = 0;
606 	c[0] = newcand(0, 0, 0);
607 	for (i = 1; i <= n; i++) {
608 		j = a[i];
609 		if (j == 0)
610 			continue;
611 		y = -b[j];
612 		oldl = 0;
613 		oldc = c[0];
614 		numtries = 0;
615 		do {
616 			if (y <= clist[oldc].y)
617 				continue;
618 			l = search(c, k, y);
619 			if (l != oldl + 1)
620 				oldc = c[l - 1];
621 			if (l <= k) {
622 				if (clist[c[l]].y <= y)
623 					continue;
624 				tc = c[l];
625 				c[l] = newcand(i, y, oldc);
626 				oldc = tc;
627 				oldl = l;
628 				numtries++;
629 			} else {
630 				c[l] = newcand(i, y, oldc);
631 				k++;
632 				break;
633 			}
634 		} while ((y = b[++j]) > 0 && numtries < bound);
635 	}
636 	return (k);
637 }
638 
639 static int
newcand(int x,int y,int pred)640 newcand(int x, int y, int pred)
641 {
642 	struct cand *q;
643 
644 	if (clen == clistlen) {
645 		clistlen = clistlen * 11 / 10;
646 		clist = xreallocarray(clist, clistlen, sizeof(*clist));
647 	}
648 	q = clist + clen;
649 	q->x = x;
650 	q->y = y;
651 	q->pred = pred;
652 	return (clen++);
653 }
654 
655 static int
search(int * c,int k,int y)656 search(int *c, int k, int y)
657 {
658 	int i, j, l, t;
659 
660 	if (clist[c[k]].y < y)	/* quick look for typical case */
661 		return (k + 1);
662 	i = 0;
663 	j = k + 1;
664 	for (;;) {
665 		l = (i + j) / 2;
666 		if (l <= i)
667 			break;
668 		t = clist[c[l]].y;
669 		if (t > y)
670 			j = l;
671 		else if (t < y)
672 			i = l;
673 		else
674 			return (l);
675 	}
676 	return (l + 1);
677 }
678 
679 static void
unravel(int p)680 unravel(int p)
681 {
682 	struct cand *q;
683 	int i;
684 
685 	for (i = 0; i <= len[0]; i++)
686 		J[i] = i <= pref ? i :
687 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
688 	for (q = clist + p; q->y != 0; q = clist + q->pred)
689 		J[q->x + pref] = q->y + pref;
690 }
691 
692 /*
693  * Check does double duty:
694  *  1.	ferret out any fortuitous correspondences due
695  *	to confounding by hashing (which result in "jackpot")
696  *  2.  collect random access indexes to the two files
697  */
698 static void
check(FILE * f1,FILE * f2,int flags)699 check(FILE *f1, FILE *f2, int flags)
700 {
701 	int i, j, jackpot, c, d;
702 	long ctold, ctnew;
703 
704 	rewind(f1);
705 	rewind(f2);
706 	j = 1;
707 	ixold[0] = ixnew[0] = 0;
708 	jackpot = 0;
709 	ctold = ctnew = 0;
710 	for (i = 1; i <= len[0]; i++) {
711 		if (J[i] == 0) {
712 			ixold[i] = ctold += skipline(f1);
713 			continue;
714 		}
715 		while (j < J[i]) {
716 			ixnew[j] = ctnew += skipline(f2);
717 			j++;
718 		}
719 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
720 			for (;;) {
721 				c = getc(f1);
722 				d = getc(f2);
723 				/*
724 				 * GNU diff ignores a missing newline
725 				 * in one file for -b or -w.
726 				 */
727 				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
728 					if (c == EOF && d == '\n') {
729 						ctnew++;
730 						break;
731 					} else if (c == '\n' && d == EOF) {
732 						ctold++;
733 						break;
734 					}
735 				}
736 				ctold++;
737 				ctnew++;
738 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
739 				    isspace(d)) {
740 					do {
741 						if (c == '\n')
742 							break;
743 						ctold++;
744 					} while (isspace(c = getc(f1)));
745 					do {
746 						if (d == '\n')
747 							break;
748 						ctnew++;
749 					} while (isspace(d = getc(f2)));
750 				} else if ((flags & D_IGNOREBLANKS)) {
751 					while (isspace(c) && c != '\n') {
752 						c = getc(f1);
753 						ctold++;
754 					}
755 					while (isspace(d) && d != '\n') {
756 						d = getc(f2);
757 						ctnew++;
758 					}
759 				}
760 				if (chrtran[c] != chrtran[d]) {
761 					jackpot++;
762 					J[i] = 0;
763 					if (c != '\n' && c != EOF)
764 						ctold += skipline(f1);
765 					if (d != '\n' && c != EOF)
766 						ctnew += skipline(f2);
767 					break;
768 				}
769 				if (c == '\n' || c == EOF)
770 					break;
771 			}
772 		} else {
773 			for (;;) {
774 				ctold++;
775 				ctnew++;
776 				if ((c = getc(f1)) != (d = getc(f2))) {
777 					/* jackpot++; */
778 					J[i] = 0;
779 					if (c != '\n' && c != EOF)
780 						ctold += skipline(f1);
781 					if (d != '\n' && c != EOF)
782 						ctnew += skipline(f2);
783 					break;
784 				}
785 				if (c == '\n' || c == EOF)
786 					break;
787 			}
788 		}
789 		ixold[i] = ctold;
790 		ixnew[j] = ctnew;
791 		j++;
792 	}
793 	for (; j <= len[1]; j++)
794 		ixnew[j] = ctnew += skipline(f2);
795 	/*
796 	 * if (jackpot)
797 	 *	fprintf(stderr, "jackpot\n");
798 	 */
799 }
800 
801 /* shellsort CACM #201 */
802 static void
sort(struct line * a,int n)803 sort(struct line *a, int n)
804 {
805 	struct line *ai, *aim, w;
806 	int j, m = 0, k;
807 
808 	if (n == 0)
809 		return;
810 	for (j = 1; j <= n; j *= 2)
811 		m = 2 * j - 1;
812 	for (m /= 2; m != 0; m /= 2) {
813 		k = n - m;
814 		for (j = 1; j <= k; j++) {
815 			for (ai = &a[j]; ai > a; ai -= m) {
816 				aim = &ai[m];
817 				if (aim < ai)
818 					break;	/* wraparound */
819 				if (aim->value > ai[0].value ||
820 				    (aim->value == ai[0].value &&
821 					aim->serial > ai[0].serial))
822 					break;
823 				w.value = ai[0].value;
824 				ai[0].value = aim->value;
825 				aim->value = w.value;
826 				w.serial = ai[0].serial;
827 				ai[0].serial = aim->serial;
828 				aim->serial = w.serial;
829 			}
830 		}
831 	}
832 }
833 
834 static void
unsort(struct line * f,int l,int * b)835 unsort(struct line *f, int l, int *b)
836 {
837 	int *a, i;
838 
839 	a = xcalloc(l + 1, sizeof(*a));
840 	for (i = 1; i <= l; i++)
841 		a[f[i].serial] = f[i].value;
842 	for (i = 1; i <= l; i++)
843 		b[i] = a[i];
844 	free(a);
845 }
846 
847 static int
skipline(FILE * f)848 skipline(FILE *f)
849 {
850 	int i, c;
851 
852 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
853 		continue;
854 	return (i);
855 }
856 
857 static void
output(char * file1,FILE * f1,char * file2,FILE * f2,int flags)858 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
859 {
860 	int m, i0, i1, j0, j1;
861 
862 	rewind(f1);
863 	rewind(f2);
864 	m = len[0];
865 	J[0] = 0;
866 	J[m + 1] = len[1] + 1;
867 	if (diff_format != D_EDIT) {
868 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
869 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
870 				i0++;
871 			j0 = J[i0 - 1] + 1;
872 			i1 = i0 - 1;
873 			while (i1 < m && J[i1 + 1] == 0)
874 				i1++;
875 			j1 = J[i1 + 1] - 1;
876 			J[i1] = j1;
877 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
878 		}
879 	} else {
880 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
881 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
882 				i0--;
883 			j0 = J[i0 + 1] - 1;
884 			i1 = i0 + 1;
885 			while (i1 > 1 && J[i1 - 1] == 0)
886 				i1--;
887 			j1 = J[i1 - 1] + 1;
888 			J[i1] = j1;
889 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
890 		}
891 	}
892 	if (m == 0)
893 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
894 	if (diff_format == D_IFDEF) {
895 		for (;;) {
896 #define	c i0
897 			if ((c = getc(f1)) == EOF)
898 				return;
899 			diff_output("%c", c);
900 		}
901 #undef c
902 	}
903 	if (anychange != 0) {
904 		if (diff_format == D_CONTEXT)
905 			dump_context_vec(f1, f2, flags);
906 		else if (diff_format == D_UNIFIED)
907 			dump_unified_vec(f1, f2, flags);
908 	}
909 }
910 
911 static void
range(int a,int b,const char * separator)912 range(int a, int b, const char *separator)
913 {
914 	diff_output("%d", a > b ? b : a);
915 	if (a < b)
916 		diff_output("%s%d", separator, b);
917 }
918 
919 static void
uni_range(int a,int b)920 uni_range(int a, int b)
921 {
922 	if (a < b)
923 		diff_output("%d,%d", a, b - a + 1);
924 	else if (a == b)
925 		diff_output("%d", b);
926 	else
927 		diff_output("%d,0", b);
928 }
929 
930 static char *
preadline(int fd,size_t rlen,off_t off)931 preadline(int fd, size_t rlen, off_t off)
932 {
933 	char *line;
934 	ssize_t nr;
935 
936 	line = xmalloc(rlen + 1);
937 	if ((nr = pread(fd, line, rlen, off)) == -1)
938 		err(2, "preadline");
939 	if (nr > 0 && line[nr-1] == '\n')
940 		nr--;
941 	line[nr] = '\0';
942 	return (line);
943 }
944 
945 static int
ignoreline(char * line)946 ignoreline(char *line)
947 {
948 	int ret;
949 
950 	ret = regexec(&ignore_re, line, 0, NULL, 0);
951 	free(line);
952 	return (ret == 0);	/* if it matched, it should be ignored. */
953 }
954 
955 /*
956  * Indicate that there is a difference between lines a and b of the from file
957  * to get to lines c to d of the to file.  If a is greater then b then there
958  * are no lines in the from file involved and this means that there were
959  * lines appended (beginning at b).  If c is greater than d then there are
960  * lines missing from the to file.
961  */
962 static void
change(char * file1,FILE * f1,char * file2,FILE * f2,int a,int b,int c,int d,int * pflags)963 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
964     int *pflags)
965 {
966 	static size_t max_context = 64;
967 	int i;
968 
969 restart:
970 	if (diff_format != D_IFDEF && a > b && c > d)
971 		return;
972 	if (ignore_pats != NULL) {
973 		char *line;
974 		/*
975 		 * All lines in the change, insert, or delete must
976 		 * match an ignore pattern for the change to be
977 		 * ignored.
978 		 */
979 		if (a <= b) {		/* Changes and deletes. */
980 			for (i = a; i <= b; i++) {
981 				line = preadline(fileno(f1),
982 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
983 				if (!ignoreline(line))
984 					goto proceed;
985 			}
986 		}
987 		if (a > b || c <= d) {	/* Changes and inserts. */
988 			for (i = c; i <= d; i++) {
989 				line = preadline(fileno(f2),
990 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
991 				if (!ignoreline(line))
992 					goto proceed;
993 			}
994 		}
995 		return;
996 	}
997 proceed:
998 	if (*pflags & D_HEADER) {
999 		diff_output("%s %s %s\n", diffargs, file1, file2);
1000 		*pflags &= ~D_HEADER;
1001 	}
1002 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1003 		/*
1004 		 * Allocate change records as needed.
1005 		 */
1006 		if (context_vec_ptr == context_vec_end - 1) {
1007 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1008 			max_context <<= 1;
1009 			context_vec_start = xreallocarray(context_vec_start,
1010 			    max_context, sizeof(*context_vec_start));
1011 			context_vec_end = context_vec_start + max_context;
1012 			context_vec_ptr = context_vec_start + offset;
1013 		}
1014 		if (anychange == 0) {
1015 			/*
1016 			 * Print the context/unidiff header first time through.
1017 			 */
1018 			print_header(file1, file2);
1019 			anychange = 1;
1020 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1021 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1022 			/*
1023 			 * If this change is more than 'diff_context' lines from the
1024 			 * previous change, dump the record and reset it.
1025 			 */
1026 			if (diff_format == D_CONTEXT)
1027 				dump_context_vec(f1, f2, *pflags);
1028 			else
1029 				dump_unified_vec(f1, f2, *pflags);
1030 		}
1031 		context_vec_ptr++;
1032 		context_vec_ptr->a = a;
1033 		context_vec_ptr->b = b;
1034 		context_vec_ptr->c = c;
1035 		context_vec_ptr->d = d;
1036 		return;
1037 	}
1038 	if (anychange == 0)
1039 		anychange = 1;
1040 	switch (diff_format) {
1041 	case D_BRIEF:
1042 		return;
1043 	case D_NORMAL:
1044 	case D_EDIT:
1045 		range(a, b, ",");
1046 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1047 		if (diff_format == D_NORMAL)
1048 			range(c, d, ",");
1049 		diff_output("\n");
1050 		break;
1051 	case D_REVERSE:
1052 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1053 		range(a, b, " ");
1054 		diff_output("\n");
1055 		break;
1056 	case D_NREVERSE:
1057 		if (a > b)
1058 			diff_output("a%d %d\n", b, d - c + 1);
1059 		else {
1060 			diff_output("d%d %d\n", a, b - a + 1);
1061 			if (!(c > d))
1062 				/* add changed lines */
1063 				diff_output("a%d %d\n", b, d - c + 1);
1064 		}
1065 		break;
1066 	}
1067 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1068 		fetch(ixold, a, b, f1, '<', 1, *pflags);
1069 		if (a <= b && c <= d && diff_format == D_NORMAL)
1070 			diff_output("---\n");
1071 	}
1072 	i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1073 	if (i != 0 && diff_format == D_EDIT) {
1074 		/*
1075 		 * A non-zero return value for D_EDIT indicates that the
1076 		 * last line printed was a bare dot (".") that has been
1077 		 * escaped as ".." to prevent ed(1) from misinterpreting
1078 		 * it.  We have to add a substitute command to change this
1079 		 * back and restart where we left off.
1080 		 */
1081 		diff_output(".\n");
1082 		diff_output("%ds/.//\n", a + i - 1);
1083 		b = a + i - 1;
1084 		a = b + 1;
1085 		c += i;
1086 		goto restart;
1087 	}
1088 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1089 		diff_output(".\n");
1090 	if (inifdef) {
1091 		diff_output("#endif /* %s */\n", ifdefname);
1092 		inifdef = 0;
1093 	}
1094 }
1095 
1096 static int
fetch(long * f,int a,int b,FILE * lb,int ch,int oldfile,int flags)1097 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1098 {
1099 	int i, j, c, lastc, col, nc;
1100 
1101 	/*
1102 	 * When doing #ifdef's, copy down to current line
1103 	 * if this is the first file, so that stuff makes it to output.
1104 	 */
1105 	if (diff_format == D_IFDEF && oldfile) {
1106 		long curpos = ftell(lb);
1107 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1108 		nc = f[a > b ? b : a - 1] - curpos;
1109 		for (i = 0; i < nc; i++)
1110 			diff_output("%c", getc(lb));
1111 	}
1112 	if (a > b)
1113 		return (0);
1114 	if (diff_format == D_IFDEF) {
1115 		if (inifdef) {
1116 			diff_output("#else /* %s%s */\n",
1117 			    oldfile == 1 ? "!" : "", ifdefname);
1118 		} else {
1119 			if (oldfile)
1120 				diff_output("#ifndef %s\n", ifdefname);
1121 			else
1122 				diff_output("#ifdef %s\n", ifdefname);
1123 		}
1124 		inifdef = 1 + oldfile;
1125 	}
1126 	for (i = a; i <= b; i++) {
1127 		fseek(lb, f[i - 1], SEEK_SET);
1128 		nc = f[i] - f[i - 1];
1129 		if (diff_format != D_IFDEF && ch != '\0') {
1130 			diff_output("%c", ch);
1131 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1132 			    || diff_format == D_UNIFIED))
1133 				diff_output("\t");
1134 			else if (diff_format != D_UNIFIED)
1135 				diff_output(" ");
1136 		}
1137 		col = 0;
1138 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1139 			if ((c = getc(lb)) == EOF) {
1140 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1141 				    diff_format == D_NREVERSE)
1142 					warnx("No newline at end of file");
1143 				else
1144 					diff_output("\n\\ No newline at end of "
1145 					    "file\n");
1146 				return (0);
1147 			}
1148 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1149 				do {
1150 					diff_output(" ");
1151 				} while (++col & 7);
1152 			} else {
1153 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1154 				    && lastc == '.') {
1155 					/*
1156 					 * Don't print a bare "." line
1157 					 * since that will confuse ed(1).
1158 					 * Print ".." instead and return,
1159 					 * giving the caller an offset
1160 					 * from which to restart.
1161 					 */
1162 					diff_output(".\n");
1163 					return (i - a + 1);
1164 				}
1165 				diff_output("%c", c);
1166 				col++;
1167 			}
1168 		}
1169 	}
1170 	return (0);
1171 }
1172 
1173 /*
1174  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1175  */
1176 static int
readhash(FILE * f,int flags)1177 readhash(FILE *f, int flags)
1178 {
1179 	int i, t, space;
1180 	int sum;
1181 
1182 	sum = 1;
1183 	space = 0;
1184 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1185 		if (flags & D_IGNORECASE)
1186 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1187 				if (t == EOF) {
1188 					if (i == 0)
1189 						return (0);
1190 					break;
1191 				}
1192 				sum = sum * 127 + chrtran[t];
1193 			}
1194 		else
1195 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1196 				if (t == EOF) {
1197 					if (i == 0)
1198 						return (0);
1199 					break;
1200 				}
1201 				sum = sum * 127 + t;
1202 			}
1203 	} else {
1204 		for (i = 0;;) {
1205 			switch (t = getc(f)) {
1206 			case '\t':
1207 			case '\r':
1208 			case '\v':
1209 			case '\f':
1210 			case ' ':
1211 				space++;
1212 				continue;
1213 			default:
1214 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1215 					i++;
1216 					space = 0;
1217 				}
1218 				sum = sum * 127 + chrtran[t];
1219 				i++;
1220 				continue;
1221 			case EOF:
1222 				if (i == 0)
1223 					return (0);
1224 				/* FALLTHROUGH */
1225 			case '\n':
1226 				break;
1227 			}
1228 			break;
1229 		}
1230 	}
1231 	/*
1232 	 * There is a remote possibility that we end up with a zero sum.
1233 	 * Zero is used as an EOF marker, so return 1 instead.
1234 	 */
1235 	return (sum == 0 ? 1 : sum);
1236 }
1237 
1238 static int
asciifile(FILE * f)1239 asciifile(FILE *f)
1240 {
1241 	unsigned char buf[BUFSIZ];
1242 	size_t cnt;
1243 
1244 	if (f == NULL)
1245 		return (1);
1246 
1247 	rewind(f);
1248 	cnt = fread(buf, 1, sizeof(buf), f);
1249 	return (memchr(buf, '\0', cnt) == NULL);
1250 }
1251 
1252 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1253 
1254 static char *
match_function(const long * f,int pos,FILE * fp)1255 match_function(const long *f, int pos, FILE *fp)
1256 {
1257 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1258 	size_t nc;
1259 	int last = lastline;
1260 	const char *state = NULL;
1261 
1262 	lastline = pos;
1263 	while (pos > last) {
1264 		fseek(fp, f[pos - 1], SEEK_SET);
1265 		nc = f[pos] - f[pos - 1];
1266 		if (nc >= sizeof(buf))
1267 			nc = sizeof(buf) - 1;
1268 		nc = fread(buf, 1, nc, fp);
1269 		if (nc > 0) {
1270 			buf[nc] = '\0';
1271 			buf[strcspn(buf, "\n")] = '\0';
1272 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1273 				if (begins_with(buf, "private:")) {
1274 					if (!state)
1275 						state = " (private)";
1276 				} else if (begins_with(buf, "protected:")) {
1277 					if (!state)
1278 						state = " (protected)";
1279 				} else if (begins_with(buf, "public:")) {
1280 					if (!state)
1281 						state = " (public)";
1282 				} else {
1283 					strlcpy(lastbuf, buf, sizeof lastbuf);
1284 					if (state)
1285 						strlcat(lastbuf, state,
1286 						    sizeof lastbuf);
1287 					lastmatchline = pos;
1288 					return lastbuf;
1289 				}
1290 			}
1291 		}
1292 		pos--;
1293 	}
1294 	return lastmatchline > 0 ? lastbuf : NULL;
1295 }
1296 
1297 /* dump accumulated "context" diff changes */
1298 static void
dump_context_vec(FILE * f1,FILE * f2,int flags)1299 dump_context_vec(FILE *f1, FILE *f2, int flags)
1300 {
1301 	struct context_vec *cvp = context_vec_start;
1302 	int lowa, upb, lowc, upd, do_output;
1303 	int a, b, c, d;
1304 	char ch, *f;
1305 
1306 	if (context_vec_start > context_vec_ptr)
1307 		return;
1308 
1309 	b = d = 0;		/* gcc */
1310 	lowa = MAXIMUM(1, cvp->a - diff_context);
1311 	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1312 	lowc = MAXIMUM(1, cvp->c - diff_context);
1313 	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1314 
1315 	diff_output("***************");
1316 	if ((flags & D_PROTOTYPE)) {
1317 		f = match_function(ixold, lowa-1, f1);
1318 		if (f != NULL)
1319 			diff_output(" %s", f);
1320 	}
1321 	diff_output("\n*** ");
1322 	range(lowa, upb, ",");
1323 	diff_output(" ****\n");
1324 
1325 	/*
1326 	 * Output changes to the "old" file.  The first loop suppresses
1327 	 * output if there were no changes to the "old" file (we'll see
1328 	 * the "old" lines as context in the "new" list).
1329 	 */
1330 	do_output = 0;
1331 	for (; cvp <= context_vec_ptr; cvp++)
1332 		if (cvp->a <= cvp->b) {
1333 			cvp = context_vec_start;
1334 			do_output++;
1335 			break;
1336 		}
1337 	if (do_output) {
1338 		while (cvp <= context_vec_ptr) {
1339 			a = cvp->a;
1340 			b = cvp->b;
1341 			c = cvp->c;
1342 			d = cvp->d;
1343 
1344 			if (a <= b && c <= d)
1345 				ch = 'c';
1346 			else
1347 				ch = (a <= b) ? 'd' : 'a';
1348 
1349 			if (ch == 'a')
1350 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1351 			else {
1352 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1353 				fetch(ixold, a, b, f1,
1354 				    ch == 'c' ? '!' : '-', 0, flags);
1355 			}
1356 			lowa = b + 1;
1357 			cvp++;
1358 		}
1359 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1360 	}
1361 	/* output changes to the "new" file */
1362 	diff_output("--- ");
1363 	range(lowc, upd, ",");
1364 	diff_output(" ----\n");
1365 
1366 	do_output = 0;
1367 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1368 		if (cvp->c <= cvp->d) {
1369 			cvp = context_vec_start;
1370 			do_output++;
1371 			break;
1372 		}
1373 	if (do_output) {
1374 		while (cvp <= context_vec_ptr) {
1375 			a = cvp->a;
1376 			b = cvp->b;
1377 			c = cvp->c;
1378 			d = cvp->d;
1379 
1380 			if (a <= b && c <= d)
1381 				ch = 'c';
1382 			else
1383 				ch = (a <= b) ? 'd' : 'a';
1384 
1385 			if (ch == 'd')
1386 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1387 			else {
1388 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1389 				fetch(ixnew, c, d, f2,
1390 				    ch == 'c' ? '!' : '+', 0, flags);
1391 			}
1392 			lowc = d + 1;
1393 			cvp++;
1394 		}
1395 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1396 	}
1397 	context_vec_ptr = context_vec_start - 1;
1398 }
1399 
1400 /* dump accumulated "unified" diff changes */
1401 static void
dump_unified_vec(FILE * f1,FILE * f2,int flags)1402 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1403 {
1404 	struct context_vec *cvp = context_vec_start;
1405 	int lowa, upb, lowc, upd;
1406 	int a, b, c, d;
1407 	char ch, *f;
1408 
1409 	if (context_vec_start > context_vec_ptr)
1410 		return;
1411 
1412 	b = d = 0;		/* gcc */
1413 	lowa = MAXIMUM(1, cvp->a - diff_context);
1414 	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1415 	lowc = MAXIMUM(1, cvp->c - diff_context);
1416 	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1417 
1418 	diff_output("@@ -");
1419 	uni_range(lowa, upb);
1420 	diff_output(" +");
1421 	uni_range(lowc, upd);
1422 	diff_output(" @@");
1423 	if ((flags & D_PROTOTYPE)) {
1424 		f = match_function(ixold, lowa-1, f1);
1425 		if (f != NULL)
1426 			diff_output(" %s", f);
1427 	}
1428 	diff_output("\n");
1429 
1430 	/*
1431 	 * Output changes in "unified" diff format--the old and new lines
1432 	 * are printed together.
1433 	 */
1434 	for (; cvp <= context_vec_ptr; cvp++) {
1435 		a = cvp->a;
1436 		b = cvp->b;
1437 		c = cvp->c;
1438 		d = cvp->d;
1439 
1440 		/*
1441 		 * c: both new and old changes
1442 		 * d: only changes in the old file
1443 		 * a: only changes in the new file
1444 		 */
1445 		if (a <= b && c <= d)
1446 			ch = 'c';
1447 		else
1448 			ch = (a <= b) ? 'd' : 'a';
1449 
1450 		switch (ch) {
1451 		case 'c':
1452 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1453 			fetch(ixold, a, b, f1, '-', 0, flags);
1454 			fetch(ixnew, c, d, f2, '+', 0, flags);
1455 			break;
1456 		case 'd':
1457 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1458 			fetch(ixold, a, b, f1, '-', 0, flags);
1459 			break;
1460 		case 'a':
1461 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1462 			fetch(ixnew, c, d, f2, '+', 0, flags);
1463 			break;
1464 		}
1465 		lowa = b + 1;
1466 		lowc = d + 1;
1467 	}
1468 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1469 
1470 	context_vec_ptr = context_vec_start - 1;
1471 }
1472 
1473 static void
print_header(const char * file1,const char * file2)1474 print_header(const char *file1, const char *file2)
1475 {
1476 	if (label[0] != NULL)
1477 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1478 		    label[0]);
1479 	else
1480 		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
1481 		    file1, ctime(&stb1.st_mtime));
1482 	if (label[1] != NULL)
1483 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1484 		    label[1]);
1485 	else
1486 		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
1487 		    file2, ctime(&stb2.st_mtime));
1488 }
1489