xref: /openbsd-src/usr.bin/diff/diffreg.c (revision b7041c0781c8668129da8084451ded41b0c43954)
1*b7041c07Sderaadt /*	$OpenBSD: diffreg.c,v 1.95 2021/10/24 21:24:16 deraadt Exp $	*/
2d0c3f575Sderaadt 
3d0c3f575Sderaadt /*
4d0c3f575Sderaadt  * Copyright (C) Caldera International Inc.  2001-2002.
5d0c3f575Sderaadt  * All rights reserved.
6d0c3f575Sderaadt  *
7d0c3f575Sderaadt  * Redistribution and use in source and binary forms, with or without
8d0c3f575Sderaadt  * modification, are permitted provided that the following conditions
9d0c3f575Sderaadt  * are met:
10d0c3f575Sderaadt  * 1. Redistributions of source code and documentation must retain the above
11d0c3f575Sderaadt  *    copyright notice, this list of conditions and the following disclaimer.
12d0c3f575Sderaadt  * 2. Redistributions in binary form must reproduce the above copyright
13d0c3f575Sderaadt  *    notice, this list of conditions and the following disclaimer in the
14d0c3f575Sderaadt  *    documentation and/or other materials provided with the distribution.
15d0c3f575Sderaadt  * 3. All advertising materials mentioning features or use of this software
16d0c3f575Sderaadt  *    must display the following acknowledgement:
17d0c3f575Sderaadt  *	This product includes software developed or owned by Caldera
18d0c3f575Sderaadt  *	International, Inc.
19d0c3f575Sderaadt  * 4. Neither the name of Caldera International, Inc. nor the names of other
20d0c3f575Sderaadt  *    contributors may be used to endorse or promote products derived from
21d0c3f575Sderaadt  *    this software without specific prior written permission.
22d0c3f575Sderaadt  *
23d0c3f575Sderaadt  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24d0c3f575Sderaadt  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25d0c3f575Sderaadt  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26d0c3f575Sderaadt  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27d0c3f575Sderaadt  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28d0c3f575Sderaadt  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29d0c3f575Sderaadt  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30d0c3f575Sderaadt  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31d0c3f575Sderaadt  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32d0c3f575Sderaadt  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33d0c3f575Sderaadt  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34d0c3f575Sderaadt  * POSSIBILITY OF SUCH DAMAGE.
35d0c3f575Sderaadt  */
364ec4b3d5Smillert /*-
374ec4b3d5Smillert  * Copyright (c) 1991, 1993
384ec4b3d5Smillert  *	The Regents of the University of California.  All rights reserved.
394ec4b3d5Smillert  *
404ec4b3d5Smillert  * Redistribution and use in source and binary forms, with or without
414ec4b3d5Smillert  * modification, are permitted provided that the following conditions
424ec4b3d5Smillert  * are met:
434ec4b3d5Smillert  * 1. Redistributions of source code must retain the above copyright
444ec4b3d5Smillert  *    notice, this list of conditions and the following disclaimer.
454ec4b3d5Smillert  * 2. Redistributions in binary form must reproduce the above copyright
464ec4b3d5Smillert  *    notice, this list of conditions and the following disclaimer in the
474ec4b3d5Smillert  *    documentation and/or other materials provided with the distribution.
484ec4b3d5Smillert  * 3. Neither the name of the University nor the names of its contributors
494ec4b3d5Smillert  *    may be used to endorse or promote products derived from this software
504ec4b3d5Smillert  *    without specific prior written permission.
514ec4b3d5Smillert  *
524ec4b3d5Smillert  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
534ec4b3d5Smillert  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
544ec4b3d5Smillert  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
554ec4b3d5Smillert  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
564ec4b3d5Smillert  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
574ec4b3d5Smillert  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
584ec4b3d5Smillert  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
594ec4b3d5Smillert  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
604ec4b3d5Smillert  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
614ec4b3d5Smillert  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
624ec4b3d5Smillert  * SUCH DAMAGE.
634ec4b3d5Smillert  *
644ec4b3d5Smillert  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
654ec4b3d5Smillert  */
664ec4b3d5Smillert 
674ec4b3d5Smillert #include <sys/stat.h>
68b4bca33fSmillert #include <sys/wait.h>
6926da422aStedu 
704ec4b3d5Smillert #include <ctype.h>
714ec4b3d5Smillert #include <err.h>
727b6ec9e4Smillert #include <errno.h>
7326da422aStedu #include <fcntl.h>
7440e7295bSmillert #include <paths.h>
750efcb26eSmillert #include <stddef.h>
761357284aSmillert #include <stdint.h>
774ec4b3d5Smillert #include <stdio.h>
784ec4b3d5Smillert #include <stdlib.h>
7926da422aStedu #include <string.h>
804ec4b3d5Smillert #include <unistd.h>
81b9fc9a72Sderaadt #include <limits.h>
82ae8d569bSderaadt 
83ae8d569bSderaadt #include "diff.h"
844a034c3aSray #include "xmalloc.h"
8526da422aStedu 
86b9fc9a72Sderaadt #define MINIMUM(a, b)	(((a) < (b)) ? (a) : (b))
87b9fc9a72Sderaadt #define MAXIMUM(a, b)	(((a) > (b)) ? (a) : (b))
88b9fc9a72Sderaadt 
89ae8d569bSderaadt /*
90ae8d569bSderaadt  * diff - compare two files.
91ae8d569bSderaadt  */
92ae8d569bSderaadt 
93ae8d569bSderaadt /*
94ae8d569bSderaadt  *	Uses an algorithm due to Harold Stone, which finds
95ae8d569bSderaadt  *	a pair of longest identical subsequences in the two
96ae8d569bSderaadt  *	files.
97ae8d569bSderaadt  *
98ae8d569bSderaadt  *	The major goal is to generate the match vector J.
99ae8d569bSderaadt  *	J[i] is the index of the line in file1 corresponding
100ae8d569bSderaadt  *	to line i file0. J[i] = 0 if there is no
101ae8d569bSderaadt  *	such line in file1.
102ae8d569bSderaadt  *
103ae8d569bSderaadt  *	Lines are hashed so as to work in core. All potential
104ae8d569bSderaadt  *	matches are located by sorting the lines of each file
105ae8d569bSderaadt  *	on the hash (called ``value''). In particular, this
106ae8d569bSderaadt  *	collects the equivalence classes in file1 together.
107ae8d569bSderaadt  *	Subroutine equiv replaces the value of each line in
108ae8d569bSderaadt  *	file0 by the index of the first element of its
109ae8d569bSderaadt  *	matching equivalence in (the reordered) file1.
110ae8d569bSderaadt  *	To save space equiv squeezes file1 into a single
111ae8d569bSderaadt  *	array member in which the equivalence classes
112ae8d569bSderaadt  *	are simply concatenated, except that their first
113ae8d569bSderaadt  *	members are flagged by changing sign.
114ae8d569bSderaadt  *
115ae8d569bSderaadt  *	Next the indices that point into member are unsorted into
116ae8d569bSderaadt  *	array class according to the original order of file0.
117ae8d569bSderaadt  *
118ae8d569bSderaadt  *	The cleverness lies in routine stone. This marches
119ae8d569bSderaadt  *	through the lines of file0, developing a vector klist
120ae8d569bSderaadt  *	of "k-candidates". At step i a k-candidate is a matched
121ae8d569bSderaadt  *	pair of lines x,y (x in file0 y in file1) such that
122ae8d569bSderaadt  *	there is a common subsequence of length k
123ae8d569bSderaadt  *	between the first i lines of file0 and the first y
124ae8d569bSderaadt  *	lines of file1, but there is no such subsequence for
125ae8d569bSderaadt  *	any smaller y. x is the earliest possible mate to y
126ae8d569bSderaadt  *	that occurs in such a subsequence.
127ae8d569bSderaadt  *
128ae8d569bSderaadt  *	Whenever any of the members of the equivalence class of
129ae8d569bSderaadt  *	lines in file1 matable to a line in file0 has serial number
130ae8d569bSderaadt  *	less than the y of some k-candidate, that k-candidate
131ae8d569bSderaadt  *	with the smallest such y is replaced. The new
132ae8d569bSderaadt  *	k-candidate is chained (via pred) to the current
133ae8d569bSderaadt  *	k-1 candidate so that the actual subsequence can
134ae8d569bSderaadt  *	be recovered. When a member has serial number greater
135ae8d569bSderaadt  *	that the y of all k-candidates, the klist is extended.
136ae8d569bSderaadt  *	At the end, the longest subsequence is pulled out
137ae8d569bSderaadt  *	and placed in the array J by unravel
138ae8d569bSderaadt  *
139ae8d569bSderaadt  *	With J in hand, the matches there recorded are
140ae8d569bSderaadt  *	check'ed against reality to assure that no spurious
141ae8d569bSderaadt  *	matches have crept in due to hashing. If they have,
142ae8d569bSderaadt  *	they are broken, and "jackpot" is recorded--a harmless
143ae8d569bSderaadt  *	matter except that a true match for a spuriously
144ae8d569bSderaadt  *	mated line may now be unnecessarily reported as a change.
145ae8d569bSderaadt  *
146ae8d569bSderaadt  *	Much of the complexity of the program comes simply
147ae8d569bSderaadt  *	from trying to minimize core utilization and
148ae8d569bSderaadt  *	maximize the range of doable problems by dynamically
149ae8d569bSderaadt  *	allocating what is needed and reusing what is not.
150ae8d569bSderaadt  *	The core requirements for problems larger than somewhat
151ae8d569bSderaadt  *	are (in words) 2*length(file0) + length(file1) +
152ae8d569bSderaadt  *	3*(number of k-candidates installed),  typically about
153ae8d569bSderaadt  *	6n words for files of length n.
154ae8d569bSderaadt  */
155ae8d569bSderaadt 
156ae8d569bSderaadt struct cand {
157ae8d569bSderaadt 	int	x;
158ae8d569bSderaadt 	int	y;
159ae8d569bSderaadt 	int	pred;
16060b9d8fdSderaadt };
16126da422aStedu 
162ae8d569bSderaadt struct line {
163ae8d569bSderaadt 	int	serial;
164ae8d569bSderaadt 	int	value;
16592af4c2dStedu } *file[2];
16626da422aStedu 
1670efcb26eSmillert /*
1680efcb26eSmillert  * The following struct is used to record change information when
1690efcb26eSmillert  * doing a "context" or "unified" diff.  (see routine "change" to
1700efcb26eSmillert  * understand the highly mnemonic field names)
1710efcb26eSmillert  */
1720efcb26eSmillert struct context_vec {
1730efcb26eSmillert 	int	a;		/* start line in old file */
1740efcb26eSmillert 	int	b;		/* end line in old file */
1750efcb26eSmillert 	int	c;		/* start line in new file */
1760efcb26eSmillert 	int	d;		/* end line in new file */
1770efcb26eSmillert };
1780efcb26eSmillert 
1795e07282dSray #define	diff_output	printf
1807b6ec9e4Smillert static FILE	*opentemp(const char *);
181ac73e8e6Smillert static void	 output(char *, FILE *, char *, FILE *, int);
1826fc40daeSray static void	 check(FILE *, FILE *, int);
18326da422aStedu static void	 range(int, int, char *);
184c72ea322Smillert static void	 uni_range(int, int);
185dba1d6eaSray static void	 dump_context_vec(FILE *, FILE *, int);
186dba1d6eaSray static void	 dump_unified_vec(FILE *, FILE *, int);
187dba1d6eaSray static void	 prepare(int, FILE *, off_t, int);
18826da422aStedu static void	 prune(void);
18926da422aStedu static void	 equiv(struct line *, int, struct line *, int, int *);
19026da422aStedu static void	 unravel(int);
19126da422aStedu static void	 unsort(struct line *, int, int *);
19261783bcdSespie static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
19326da422aStedu static void	 sort(struct line *, int);
1947bdb251cSmillert static void	 print_header(const char *, const char *);
195ccd55a2cSotto static int	 ignoreline(char *);
1964ec4b3d5Smillert static int	 asciifile(FILE *);
197dba1d6eaSray static int	 fetch(long *, int, int, FILE *, int, int, int);
19826da422aStedu static int	 newcand(int, int, int);
19926da422aStedu static int	 search(int *, int, int);
2004ec4b3d5Smillert static int	 skipline(FILE *);
2016e18f850Sotto static int	 isqrt(int);
202dba1d6eaSray static int	 stone(int *, int, int *, int *, int);
203dba1d6eaSray static int	 readhash(FILE *, int);
2044ec4b3d5Smillert static int	 files_differ(FILE *, FILE *, int);
20596e45528Sotto static char	*match_function(const long *, int, FILE *);
206ccd55a2cSotto static char	*preadline(int, size_t, off_t);
2076e18f850Sotto 
208aa215433Sray static int  *J;			/* will be overlaid on class */
209aa215433Sray static int  *class;		/* will be overlaid on file[0] */
210aa215433Sray static int  *klist;		/* will be overlaid on file[0] after class */
211aa215433Sray static int  *member;		/* will be overlaid on file[1] */
212aa215433Sray static int   clen;
213aa215433Sray static int   inifdef;		/* whether or not we are in a #ifdef block */
214aa215433Sray static int   len[2];
215aa215433Sray static int   pref, suff;	/* length of prefix and suffix */
216aa215433Sray static int   slen[2];
217aa215433Sray static int   anychange;
218aa215433Sray static long *ixnew;		/* will be overlaid on file[1] */
219aa215433Sray static long *ixold;		/* will be overlaid on klist */
220aa215433Sray static struct cand *clist;	/* merely a free storage pot for candidates */
221aa215433Sray static int   clistlen;		/* the length of clist */
222aa215433Sray static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
223aa215433Sray static u_char *chrtran;		/* translation table for case-folding */
224aa215433Sray static struct context_vec *context_vec_start;
225aa215433Sray static struct context_vec *context_vec_end;
226aa215433Sray static struct context_vec *context_vec_ptr;
227aa215433Sray 
228aa215433Sray #define FUNCTION_CONTEXT_SIZE	55
229aa215433Sray static char lastbuf[FUNCTION_CONTEXT_SIZE];
230aa215433Sray static int lastline;
231aa215433Sray static int lastmatchline;
232aa215433Sray 
23326da422aStedu 
23426da422aStedu /*
23526da422aStedu  * chrtran points to one of 2 translation tables: cup2low if folding upper to
23626da422aStedu  * lower case clow2low if not folding case
237ae8d569bSderaadt  */
2388a15d8deSderaadt u_char clow2low[256] = {
23926da422aStedu 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
24026da422aStedu 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
24126da422aStedu 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
24226da422aStedu 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
24326da422aStedu 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
24426da422aStedu 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
24526da422aStedu 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
24626da422aStedu 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
24726da422aStedu 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
24826da422aStedu 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
24926da422aStedu 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
25026da422aStedu 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
25126da422aStedu 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
25226da422aStedu 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
25326da422aStedu 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
25426da422aStedu 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
25526da422aStedu 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
25626da422aStedu 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
25726da422aStedu 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
25826da422aStedu 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
25926da422aStedu 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
26026da422aStedu 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
26126da422aStedu 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
26226da422aStedu 	0xfd, 0xfe, 0xff
263ae8d569bSderaadt };
264ae8d569bSderaadt 
2658a15d8deSderaadt u_char cup2low[256] = {
26626da422aStedu 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
26726da422aStedu 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
26826da422aStedu 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
26926da422aStedu 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
27026da422aStedu 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
27126da422aStedu 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
27226da422aStedu 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
27326da422aStedu 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
27426da422aStedu 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
27526da422aStedu 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
27626da422aStedu 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
27726da422aStedu 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
27826da422aStedu 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
27926da422aStedu 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
28026da422aStedu 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
28126da422aStedu 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
28226da422aStedu 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
28326da422aStedu 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
28426da422aStedu 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
28526da422aStedu 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
28626da422aStedu 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
28726da422aStedu 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
28826da422aStedu 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
28926da422aStedu 	0xfd, 0xfe, 0xff
290ae8d569bSderaadt };
291ae8d569bSderaadt 
292b4bca33fSmillert int
diffreg(char * file1,char * file2,int flags)29357003866Sray diffreg(char *file1, char *file2, int flags)
294ae8d569bSderaadt {
295dba1d6eaSray 	FILE *f1, *f2;
29640e7295bSmillert 	int i, rval;
297ae8d569bSderaadt 
298dba1d6eaSray 	f1 = f2 = NULL;
299dba1d6eaSray 	rval = D_SAME;
3004ec4b3d5Smillert 	anychange = 0;
30196e45528Sotto 	lastline = 0;
30296e45528Sotto 	lastmatchline = 0;
3030efcb26eSmillert 	context_vec_ptr = context_vec_start - 1;
304dba1d6eaSray 	if (flags & D_IGNORECASE)
305dba1d6eaSray 		chrtran = cup2low;
306dba1d6eaSray 	else
307dba1d6eaSray 		chrtran = clow2low;
3087b6ec9e4Smillert 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
309fed3a06dSmillert 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
31066e5764eSmillert 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
311f28259e9Smillert 		goto closem;
3124ec4b3d5Smillert 
3134ec4b3d5Smillert 	if (flags & D_EMPTY1)
3144ec4b3d5Smillert 		f1 = fopen(_PATH_DEVNULL, "r");
3154ec4b3d5Smillert 	else {
3167b6ec9e4Smillert 		if (!S_ISREG(stb1.st_mode)) {
3177b6ec9e4Smillert 			if ((f1 = opentemp(file1)) == NULL ||
3183aaa63ebSderaadt 			    fstat(fileno(f1), &stb1) == -1) {
3194ec4b3d5Smillert 				warn("%s", file1);
3204ec4b3d5Smillert 				status |= 2;
3214ec4b3d5Smillert 				goto closem;
32248b947b7Smillert 			}
3237b6ec9e4Smillert 		} else if (strcmp(file1, "-") == 0)
324b1a26502Smillert 			f1 = stdin;
325b1a26502Smillert 		else
3264ec4b3d5Smillert 			f1 = fopen(file1, "r");
3274ec4b3d5Smillert 	}
3284ec4b3d5Smillert 	if (f1 == NULL) {
3294ec4b3d5Smillert 		warn("%s", file1);
3304ec4b3d5Smillert 		status |= 2;
3314ec4b3d5Smillert 		goto closem;
3324ec4b3d5Smillert 	}
3334ec4b3d5Smillert 
3344ec4b3d5Smillert 	if (flags & D_EMPTY2)
3354ec4b3d5Smillert 		f2 = fopen(_PATH_DEVNULL, "r");
3364ec4b3d5Smillert 	else {
3377b6ec9e4Smillert 		if (!S_ISREG(stb2.st_mode)) {
3387b6ec9e4Smillert 			if ((f2 = opentemp(file2)) == NULL ||
3393aaa63ebSderaadt 			    fstat(fileno(f2), &stb2) == -1) {
3404ec4b3d5Smillert 				warn("%s", file2);
3414ec4b3d5Smillert 				status |= 2;
3424ec4b3d5Smillert 				goto closem;
3434ec4b3d5Smillert 			}
3447b6ec9e4Smillert 		} else if (strcmp(file2, "-") == 0)
345b1a26502Smillert 			f2 = stdin;
346b1a26502Smillert 		else
3474ec4b3d5Smillert 			f2 = fopen(file2, "r");
3484ec4b3d5Smillert 	}
3494ec4b3d5Smillert 	if (f2 == NULL) {
3504ec4b3d5Smillert 		warn("%s", file2);
3514ec4b3d5Smillert 		status |= 2;
3524ec4b3d5Smillert 		goto closem;
3534ec4b3d5Smillert 	}
3544ec4b3d5Smillert 
3554ec4b3d5Smillert 	switch (files_differ(f1, f2, flags)) {
3564ec4b3d5Smillert 	case 0:
357b4bca33fSmillert 		goto closem;
3584ec4b3d5Smillert 	case 1:
3594ec4b3d5Smillert 		break;
3604ec4b3d5Smillert 	default:
3614ec4b3d5Smillert 		/* error */
3624ec4b3d5Smillert 		status |= 2;
3634ec4b3d5Smillert 		goto closem;
364ae8d569bSderaadt 	}
3654ec4b3d5Smillert 
366dba1d6eaSray 	if ((flags & D_FORCEASCII) == 0 &&
367dba1d6eaSray 	    (!asciifile(f1) || !asciifile(f2))) {
368b4bca33fSmillert 		rval = D_BINARY;
3695f9fc8aaSmillert 		status |= 1;
370cab5d83cSmillert 		goto closem;
371cab5d83cSmillert 	}
372dba1d6eaSray 	prepare(0, f1, stb1.st_size, flags);
373dba1d6eaSray 	prepare(1, f2, stb2.st_size, flags);
37457003866Sray 
375ae8d569bSderaadt 	prune();
376ae8d569bSderaadt 	sort(sfile[0], slen[0]);
377ae8d569bSderaadt 	sort(sfile[1], slen[1]);
378ae8d569bSderaadt 
379ae8d569bSderaadt 	member = (int *)file[1];
380ae8d569bSderaadt 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
381371275caSderaadt 	member = xreallocarray(member, slen[1] + 2, sizeof(*member));
382ae8d569bSderaadt 
383ae8d569bSderaadt 	class = (int *)file[0];
384ae8d569bSderaadt 	unsort(sfile[0], slen[0], class);
385371275caSderaadt 	class = xreallocarray(class, slen[0] + 2, sizeof(*class));
386ae8d569bSderaadt 
38757003866Sray 	klist = xcalloc(slen[0] + 2, sizeof(*klist));
388058e94f4Smillert 	clen = 0;
3896e18f850Sotto 	clistlen = 100;
39057003866Sray 	clist = xcalloc(clistlen, sizeof(*clist));
391dba1d6eaSray 	i = stone(class, slen[0], member, klist, flags);
392a79a2617Stedu 	free(member);
393a79a2617Stedu 	free(class);
394ae8d569bSderaadt 
395371275caSderaadt 	J = xreallocarray(J, len[0] + 2, sizeof(*J));
396ae8d569bSderaadt 	unravel(klist[i]);
397a79a2617Stedu 	free(clist);
398a79a2617Stedu 	free(klist);
399ae8d569bSderaadt 
400371275caSderaadt 	ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold));
401371275caSderaadt 	ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew));
4026fc40daeSray 	check(f1, f2, flags);
403dba1d6eaSray 	output(file1, f1, file2, f2, flags);
4044ec4b3d5Smillert closem:
4055afc3be2Smillert 	if (anychange) {
4065afc3be2Smillert 		status |= 1;
4075afc3be2Smillert 		if (rval == D_SAME)
4085afc3be2Smillert 			rval = D_DIFFER;
4095afc3be2Smillert 	}
4104ec4b3d5Smillert 	if (f1 != NULL)
4114ec4b3d5Smillert 		fclose(f1);
4124ec4b3d5Smillert 	if (f2 != NULL)
4134ec4b3d5Smillert 		fclose(f2);
414dba1d6eaSray 
415b4bca33fSmillert 	return (rval);
4164ec4b3d5Smillert }
4174ec4b3d5Smillert 
4184ec4b3d5Smillert /*
4194ec4b3d5Smillert  * Check to see if the given files differ.
4204ec4b3d5Smillert  * Returns 0 if they are the same, 1 if different, and -1 on error.
4214ec4b3d5Smillert  * XXX - could use code from cmp(1) [faster]
4224ec4b3d5Smillert  */
4234ec4b3d5Smillert static int
files_differ(FILE * f1,FILE * f2,int flags)4244ec4b3d5Smillert files_differ(FILE *f1, FILE *f2, int flags)
4254ec4b3d5Smillert {
4264ec4b3d5Smillert 	char buf1[BUFSIZ], buf2[BUFSIZ];
4274ec4b3d5Smillert 	size_t i, j;
4284ec4b3d5Smillert 
4294ec4b3d5Smillert 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
4304ec4b3d5Smillert 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
4314ec4b3d5Smillert 		return (1);
432a05fc07dShalex 
433a05fc07dShalex 	if (stb1.st_dev == stb2.st_dev && stb1.st_ino == stb2.st_ino)
434a05fc07dShalex 		return (0);
435a05fc07dShalex 
4364ec4b3d5Smillert 	for (;;) {
4374ec4b3d5Smillert 		i = fread(buf1, 1, sizeof(buf1), f1);
4384ec4b3d5Smillert 		j = fread(buf2, 1, sizeof(buf2), f2);
439bdcce04dSray 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
440bdcce04dSray 			return (-1);
4414ec4b3d5Smillert 		if (i != j)
4424ec4b3d5Smillert 			return (1);
443bdcce04dSray 		if (i == 0)
4444ec4b3d5Smillert 			return (0);
4454ec4b3d5Smillert 		if (memcmp(buf1, buf2, i) != 0)
4464ec4b3d5Smillert 			return (1);
4474ec4b3d5Smillert 	}
448ae8d569bSderaadt }
449ae8d569bSderaadt 
4507b6ec9e4Smillert static FILE *
opentemp(const char * file)4517b6ec9e4Smillert opentemp(const char *file)
452ae8d569bSderaadt {
4534c413bf6Stedu 	char buf[BUFSIZ], tempfile[PATH_MAX];
4547b6ec9e4Smillert 	ssize_t nread;
4557b6ec9e4Smillert 	int ifd, ofd;
45648b947b7Smillert 
45748b947b7Smillert 	if (strcmp(file, "-") == 0)
45848b947b7Smillert 		ifd = STDIN_FILENO;
459*b7041c07Sderaadt 	else if ((ifd = open(file, O_RDONLY)) == -1)
4604ec4b3d5Smillert 		return (NULL);
46148b947b7Smillert 
4624c413bf6Stedu 	(void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
4637b6ec9e4Smillert 
464c2d43ecaSderaadt 	if ((ofd = mkstemp(tempfile)) == -1) {
465b5188ac1Sschwarze 		close(ifd);
4667b6ec9e4Smillert 		return (NULL);
467b5188ac1Sschwarze 	}
4687b6ec9e4Smillert 	unlink(tempfile);
4697b6ec9e4Smillert 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
4707b6ec9e4Smillert 		if (write(ofd, buf, nread) != nread) {
47148b947b7Smillert 			close(ifd);
47248b947b7Smillert 			close(ofd);
4737b6ec9e4Smillert 			return (NULL);
4747b6ec9e4Smillert 		}
4757b6ec9e4Smillert 	}
4767b6ec9e4Smillert 	close(ifd);
477f28259e9Smillert 	lseek(ofd, (off_t)0, SEEK_SET);
4787b6ec9e4Smillert 	return (fdopen(ofd, "r"));
479ae8d569bSderaadt }
480ae8d569bSderaadt 
481ae8d569bSderaadt char *
splice(char * dir,char * file)48226da422aStedu splice(char *dir, char *file)
483ae8d569bSderaadt {
48449dffe13Smillert 	char *tail, *buf;
48578245330Smillert 	size_t dirlen;
486ae8d569bSderaadt 
48778245330Smillert 	dirlen = strlen(dir);
48878245330Smillert 	while (dirlen != 0 && dir[dirlen - 1] == '/')
48978245330Smillert 	    dirlen--;
4907b6ec9e4Smillert 	if ((tail = strrchr(file, '/')) == NULL)
491ae8d569bSderaadt 		tail = file;
492ae8d569bSderaadt 	else
493ae8d569bSderaadt 		tail++;
49478245330Smillert 	xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
49549dffe13Smillert 	return (buf);
496ae8d569bSderaadt }
497ae8d569bSderaadt 
49826da422aStedu static void
prepare(int i,FILE * fd,off_t filesize,int flags)499dba1d6eaSray prepare(int i, FILE *fd, off_t filesize, int flags)
500ae8d569bSderaadt {
50126da422aStedu 	struct line *p;
50226da422aStedu 	int j, h;
503739e7267Sotto 	size_t sz;
504ae8d569bSderaadt 
5054ec4b3d5Smillert 	rewind(fd);
506739e7267Sotto 
507739e7267Sotto 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
508739e7267Sotto 	if (sz < 100)
509a8013e93Sotto 		sz = 100;
510739e7267Sotto 
51157003866Sray 	p = xcalloc(sz + 3, sizeof(*p));
512dba1d6eaSray 	for (j = 0; (h = readhash(fd, flags));) {
513a8013e93Sotto 		if (j == sz) {
514a8013e93Sotto 			sz = sz * 3 / 2;
515371275caSderaadt 			p = xreallocarray(p, sz + 3, sizeof(*p));
516a8013e93Sotto 		}
517a8013e93Sotto 		p[++j].value = h;
518ae8d569bSderaadt 	}
519ae8d569bSderaadt 	len[i] = j;
520ae8d569bSderaadt 	file[i] = p;
521ae8d569bSderaadt }
522ae8d569bSderaadt 
52326da422aStedu static void
prune(void)52426da422aStedu prune(void)
525ae8d569bSderaadt {
52626da422aStedu 	int i, j;
52748b8c3e3Sderaadt 
528ae8d569bSderaadt 	for (pref = 0; pref < len[0] && pref < len[1] &&
529ae8d569bSderaadt 	    file[0][pref + 1].value == file[1][pref + 1].value;
5307d2b2b91Sderaadt 	    pref++)
5317d2b2b91Sderaadt 		;
532ae8d569bSderaadt 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
533ae8d569bSderaadt 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
5347d2b2b91Sderaadt 	    suff++)
5357d2b2b91Sderaadt 		;
536ae8d569bSderaadt 	for (j = 0; j < 2; j++) {
537ae8d569bSderaadt 		sfile[j] = file[j] + pref;
538ae8d569bSderaadt 		slen[j] = len[j] - pref - suff;
539ae8d569bSderaadt 		for (i = 0; i <= slen[j]; i++)
540ae8d569bSderaadt 			sfile[j][i].serial = i;
541ae8d569bSderaadt 	}
542ae8d569bSderaadt }
543ae8d569bSderaadt 
54426da422aStedu static void
equiv(struct line * a,int n,struct line * b,int m,int * c)54526da422aStedu equiv(struct line *a, int n, struct line *b, int m, int *c)
546ae8d569bSderaadt {
54726da422aStedu 	int i, j;
54848b8c3e3Sderaadt 
549ae8d569bSderaadt 	i = j = 1;
550ae8d569bSderaadt 	while (i <= n && j <= m) {
551ae8d569bSderaadt 		if (a[i].value < b[j].value)
552ae8d569bSderaadt 			a[i++].value = 0;
553ae8d569bSderaadt 		else if (a[i].value == b[j].value)
554ae8d569bSderaadt 			a[i++].value = j;
555ae8d569bSderaadt 		else
556ae8d569bSderaadt 			j++;
557ae8d569bSderaadt 	}
558ae8d569bSderaadt 	while (i <= n)
559ae8d569bSderaadt 		a[i++].value = 0;
560ae8d569bSderaadt 	b[m + 1].value = 0;
561ae8d569bSderaadt 	j = 0;
562ae8d569bSderaadt 	while (++j <= m) {
563ae8d569bSderaadt 		c[j] = -b[j].serial;
564ae8d569bSderaadt 		while (b[j + 1].value == b[j].value) {
565ae8d569bSderaadt 			j++;
566ae8d569bSderaadt 			c[j] = b[j].serial;
567ae8d569bSderaadt 		}
568ae8d569bSderaadt 	}
569ae8d569bSderaadt 	c[j] = -1;
570ae8d569bSderaadt }
571ae8d569bSderaadt 
5726e18f850Sotto /* Code taken from ping.c */
5736e18f850Sotto static int
isqrt(int n)5746e18f850Sotto isqrt(int n)
5756e18f850Sotto {
5766e18f850Sotto 	int y, x = 1;
5776e18f850Sotto 
5786e18f850Sotto 	if (n == 0)
5796e18f850Sotto 		return (0);
5806e18f850Sotto 
5816e18f850Sotto 	do { /* newton was a stinker */
5826e18f850Sotto 		y = x;
5836e18f850Sotto 		x = n / x;
5846e18f850Sotto 		x += y;
5856e18f850Sotto 		x /= 2;
5866e18f850Sotto 	} while ((x - y) > 1 || (x - y) < -1);
5876e18f850Sotto 
5886e18f850Sotto 	return (x);
5896e18f850Sotto }
5906e18f850Sotto 
59126da422aStedu static int
stone(int * a,int n,int * b,int * c,int flags)592dba1d6eaSray stone(int *a, int n, int *b, int *c, int flags)
593ae8d569bSderaadt {
59448b8c3e3Sderaadt 	int i, k, y, j, l;
595df890a16Snicm 	int oldc, tc, oldl, sq;
596df890a16Snicm 	u_int numtries, bound;
5976e18f850Sotto 
598df890a16Snicm 	if (flags & D_MINIMAL)
599df890a16Snicm 		bound = UINT_MAX;
600df890a16Snicm 	else {
601df890a16Snicm 		sq = isqrt(n);
602b9fc9a72Sderaadt 		bound = MAXIMUM(256, sq);
603df890a16Snicm 	}
60448b8c3e3Sderaadt 
605ae8d569bSderaadt 	k = 0;
606ae8d569bSderaadt 	c[0] = newcand(0, 0, 0);
607ae8d569bSderaadt 	for (i = 1; i <= n; i++) {
608ae8d569bSderaadt 		j = a[i];
609ae8d569bSderaadt 		if (j == 0)
610ae8d569bSderaadt 			continue;
611ae8d569bSderaadt 		y = -b[j];
612ae8d569bSderaadt 		oldl = 0;
613ae8d569bSderaadt 		oldc = c[0];
6142a89a2f7Smillert 		numtries = 0;
615ae8d569bSderaadt 		do {
616ae8d569bSderaadt 			if (y <= clist[oldc].y)
617ae8d569bSderaadt 				continue;
618ae8d569bSderaadt 			l = search(c, k, y);
619ae8d569bSderaadt 			if (l != oldl + 1)
620ae8d569bSderaadt 				oldc = c[l - 1];
621ae8d569bSderaadt 			if (l <= k) {
622ae8d569bSderaadt 				if (clist[c[l]].y <= y)
623ae8d569bSderaadt 					continue;
624ae8d569bSderaadt 				tc = c[l];
625ae8d569bSderaadt 				c[l] = newcand(i, y, oldc);
626ae8d569bSderaadt 				oldc = tc;
627ae8d569bSderaadt 				oldl = l;
6282a89a2f7Smillert 				numtries++;
629ae8d569bSderaadt 			} else {
630ae8d569bSderaadt 				c[l] = newcand(i, y, oldc);
631ae8d569bSderaadt 				k++;
632ae8d569bSderaadt 				break;
633ae8d569bSderaadt 			}
6342a89a2f7Smillert 		} while ((y = b[++j]) > 0 && numtries < bound);
635ae8d569bSderaadt 	}
636ae8d569bSderaadt 	return (k);
637ae8d569bSderaadt }
638ae8d569bSderaadt 
63926da422aStedu static int
newcand(int x,int y,int pred)64026da422aStedu newcand(int x, int y, int pred)
641ae8d569bSderaadt {
64226da422aStedu 	struct cand *q;
64326da422aStedu 
6446e18f850Sotto 	if (clen == clistlen) {
6456e18f850Sotto 		clistlen = clistlen * 11 / 10;
646371275caSderaadt 		clist = xreallocarray(clist, clistlen, sizeof(*clist));
6476e18f850Sotto 	}
6486e18f850Sotto 	q = clist + clen;
649ae8d569bSderaadt 	q->x = x;
650ae8d569bSderaadt 	q->y = y;
651ae8d569bSderaadt 	q->pred = pred;
6526e18f850Sotto 	return (clen++);
653ae8d569bSderaadt }
654ae8d569bSderaadt 
65526da422aStedu static int
search(int * c,int k,int y)65626da422aStedu search(int *c, int k, int y)
657ae8d569bSderaadt {
65848b8c3e3Sderaadt 	int i, j, l, t;
65948b8c3e3Sderaadt 
660ae8d569bSderaadt 	if (clist[c[k]].y < y)	/* quick look for typical case */
661ae8d569bSderaadt 		return (k + 1);
662ae8d569bSderaadt 	i = 0;
663ae8d569bSderaadt 	j = k + 1;
664dba1d6eaSray 	for (;;) {
665dba1d6eaSray 		l = (i + j) / 2;
666dba1d6eaSray 		if (l <= i)
667ae8d569bSderaadt 			break;
668ae8d569bSderaadt 		t = clist[c[l]].y;
669ae8d569bSderaadt 		if (t > y)
670ae8d569bSderaadt 			j = l;
671ae8d569bSderaadt 		else if (t < y)
672ae8d569bSderaadt 			i = l;
673ae8d569bSderaadt 		else
674ae8d569bSderaadt 			return (l);
675ae8d569bSderaadt 	}
676ae8d569bSderaadt 	return (l + 1);
677ae8d569bSderaadt }
678ae8d569bSderaadt 
67926da422aStedu static void
unravel(int p)68026da422aStedu unravel(int p)
681ae8d569bSderaadt {
68226da422aStedu 	struct cand *q;
68348b8c3e3Sderaadt 	int i;
68448b8c3e3Sderaadt 
685ae8d569bSderaadt 	for (i = 0; i <= len[0]; i++)
686ae8d569bSderaadt 		J[i] = i <= pref ? i :
6877d2b2b91Sderaadt 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
688ae8d569bSderaadt 	for (q = clist + p; q->y != 0; q = clist + q->pred)
689ae8d569bSderaadt 		J[q->x + pref] = q->y + pref;
690ae8d569bSderaadt }
691ae8d569bSderaadt 
69226da422aStedu /*
69349dffe13Smillert  * Check does double duty:
69449dffe13Smillert  *  1.	ferret out any fortuitous correspondences due
69549dffe13Smillert  *	to confounding by hashing (which result in "jackpot")
69649dffe13Smillert  *  2.  collect random access indexes to the two files
69726da422aStedu  */
69826da422aStedu static void
check(FILE * f1,FILE * f2,int flags)6996fc40daeSray check(FILE *f1, FILE *f2, int flags)
700ae8d569bSderaadt {
70148b8c3e3Sderaadt 	int i, j, jackpot, c, d;
702ae8d569bSderaadt 	long ctold, ctnew;
703ae8d569bSderaadt 
7044ec4b3d5Smillert 	rewind(f1);
7054ec4b3d5Smillert 	rewind(f2);
706ae8d569bSderaadt 	j = 1;
707ae8d569bSderaadt 	ixold[0] = ixnew[0] = 0;
708ae8d569bSderaadt 	jackpot = 0;
709ae8d569bSderaadt 	ctold = ctnew = 0;
710ae8d569bSderaadt 	for (i = 1; i <= len[0]; i++) {
711ae8d569bSderaadt 		if (J[i] == 0) {
7124ec4b3d5Smillert 			ixold[i] = ctold += skipline(f1);
713ae8d569bSderaadt 			continue;
714ae8d569bSderaadt 		}
715ae8d569bSderaadt 		while (j < J[i]) {
7164ec4b3d5Smillert 			ixnew[j] = ctnew += skipline(f2);
717ae8d569bSderaadt 			j++;
718ae8d569bSderaadt 		}
719dba1d6eaSray 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
720ae8d569bSderaadt 			for (;;) {
7214ec4b3d5Smillert 				c = getc(f1);
7224ec4b3d5Smillert 				d = getc(f2);
723bb34d48bSmillert 				/*
724bb34d48bSmillert 				 * GNU diff ignores a missing newline
725dba1d6eaSray 				 * in one file for -b or -w.
726bb34d48bSmillert 				 */
72782328041Skspillner 				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
72882328041Skspillner 					if (c == EOF && d == '\n') {
72982328041Skspillner 						ctnew++;
730bb34d48bSmillert 						break;
73182328041Skspillner 					} else if (c == '\n' && d == EOF) {
73282328041Skspillner 						ctold++;
73382328041Skspillner 						break;
73482328041Skspillner 					}
735bb34d48bSmillert 				}
736ae8d569bSderaadt 				ctold++;
737ae8d569bSderaadt 				ctnew++;
738dba1d6eaSray 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
739dba1d6eaSray 				    isspace(d)) {
740ae8d569bSderaadt 					do {
741ae8d569bSderaadt 						if (c == '\n')
742ae8d569bSderaadt 							break;
743ae8d569bSderaadt 						ctold++;
7444ec4b3d5Smillert 					} while (isspace(c = getc(f1)));
745ae8d569bSderaadt 					do {
746ae8d569bSderaadt 						if (d == '\n')
747ae8d569bSderaadt 							break;
748ae8d569bSderaadt 						ctnew++;
7494ec4b3d5Smillert 					} while (isspace(d = getc(f2)));
750dba1d6eaSray 				} else if ((flags & D_IGNOREBLANKS)) {
751ae8d569bSderaadt 					while (isspace(c) && c != '\n') {
7524ec4b3d5Smillert 						c = getc(f1);
753ae8d569bSderaadt 						ctold++;
754ae8d569bSderaadt 					}
755ae8d569bSderaadt 					while (isspace(d) && d != '\n') {
7564ec4b3d5Smillert 						d = getc(f2);
757ae8d569bSderaadt 						ctnew++;
758ae8d569bSderaadt 					}
759ae8d569bSderaadt 				}
760ae8d569bSderaadt 				if (chrtran[c] != chrtran[d]) {
761ae8d569bSderaadt 					jackpot++;
762ae8d569bSderaadt 					J[i] = 0;
763bb34d48bSmillert 					if (c != '\n' && c != EOF)
7644ec4b3d5Smillert 						ctold += skipline(f1);
765bb34d48bSmillert 					if (d != '\n' && c != EOF)
7664ec4b3d5Smillert 						ctnew += skipline(f2);
767ae8d569bSderaadt 					break;
768ae8d569bSderaadt 				}
769bb34d48bSmillert 				if (c == '\n' || c == EOF)
770ae8d569bSderaadt 					break;
771ae8d569bSderaadt 			}
772ae8d569bSderaadt 		} else {
773ae8d569bSderaadt 			for (;;) {
774ae8d569bSderaadt 				ctold++;
775ae8d569bSderaadt 				ctnew++;
7764ec4b3d5Smillert 				if ((c = getc(f1)) != (d = getc(f2))) {
777ae8d569bSderaadt 					/* jackpot++; */
778ae8d569bSderaadt 					J[i] = 0;
779bb34d48bSmillert 					if (c != '\n' && c != EOF)
7804ec4b3d5Smillert 						ctold += skipline(f1);
781bb34d48bSmillert 					if (d != '\n' && c != EOF)
7824ec4b3d5Smillert 						ctnew += skipline(f2);
783ae8d569bSderaadt 					break;
784ae8d569bSderaadt 				}
785bb34d48bSmillert 				if (c == '\n' || c == EOF)
786ae8d569bSderaadt 					break;
787ae8d569bSderaadt 			}
788ae8d569bSderaadt 		}
789ae8d569bSderaadt 		ixold[i] = ctold;
790ae8d569bSderaadt 		ixnew[j] = ctnew;
791ae8d569bSderaadt 		j++;
792ae8d569bSderaadt 	}
7934ec4b3d5Smillert 	for (; j <= len[1]; j++)
7944ec4b3d5Smillert 		ixnew[j] = ctnew += skipline(f2);
795ae8d569bSderaadt 	/*
79648b8c3e3Sderaadt 	 * if (jackpot)
79748b8c3e3Sderaadt 	 *	fprintf(stderr, "jackpot\n");
798ae8d569bSderaadt 	 */
799ae8d569bSderaadt }
800ae8d569bSderaadt 
80148b8c3e3Sderaadt /* shellsort CACM #201 */
80226da422aStedu static void
sort(struct line * a,int n)80326da422aStedu sort(struct line *a, int n)
80448b8c3e3Sderaadt {
80548b8c3e3Sderaadt 	struct line *ai, *aim, w;
80648b8c3e3Sderaadt 	int j, m = 0, k;
807ae8d569bSderaadt 
808ae8d569bSderaadt 	if (n == 0)
809ae8d569bSderaadt 		return;
810ae8d569bSderaadt 	for (j = 1; j <= n; j *= 2)
811ae8d569bSderaadt 		m = 2 * j - 1;
812ae8d569bSderaadt 	for (m /= 2; m != 0; m /= 2) {
813ae8d569bSderaadt 		k = n - m;
814ae8d569bSderaadt 		for (j = 1; j <= k; j++) {
815ae8d569bSderaadt 			for (ai = &a[j]; ai > a; ai -= m) {
816ae8d569bSderaadt 				aim = &ai[m];
817ae8d569bSderaadt 				if (aim < ai)
818ae8d569bSderaadt 					break;	/* wraparound */
819ae8d569bSderaadt 				if (aim->value > ai[0].value ||
82026da422aStedu 				    (aim->value == ai[0].value &&
82126da422aStedu 					aim->serial > ai[0].serial))
822ae8d569bSderaadt 					break;
823ae8d569bSderaadt 				w.value = ai[0].value;
824ae8d569bSderaadt 				ai[0].value = aim->value;
825ae8d569bSderaadt 				aim->value = w.value;
826ae8d569bSderaadt 				w.serial = ai[0].serial;
827ae8d569bSderaadt 				ai[0].serial = aim->serial;
828ae8d569bSderaadt 				aim->serial = w.serial;
829ae8d569bSderaadt 			}
830ae8d569bSderaadt 		}
831ae8d569bSderaadt 	}
832ae8d569bSderaadt }
833ae8d569bSderaadt 
83426da422aStedu static void
unsort(struct line * f,int l,int * b)83526da422aStedu unsort(struct line *f, int l, int *b)
836ae8d569bSderaadt {
83748b8c3e3Sderaadt 	int *a, i;
83826da422aStedu 
83957003866Sray 	a = xcalloc(l + 1, sizeof(*a));
840ae8d569bSderaadt 	for (i = 1; i <= l; i++)
841ae8d569bSderaadt 		a[f[i].serial] = f[i].value;
842ae8d569bSderaadt 	for (i = 1; i <= l; i++)
843ae8d569bSderaadt 		b[i] = a[i];
844a79a2617Stedu 	free(a);
845ae8d569bSderaadt }
846ae8d569bSderaadt 
84726da422aStedu static int
skipline(FILE * f)8484ec4b3d5Smillert skipline(FILE *f)
849ae8d569bSderaadt {
85026da422aStedu 	int i, c;
851ae8d569bSderaadt 
852bb34d48bSmillert 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
853bb34d48bSmillert 		continue;
854ae8d569bSderaadt 	return (i);
855ae8d569bSderaadt }
856ae8d569bSderaadt 
85726da422aStedu static void
output(char * file1,FILE * f1,char * file2,FILE * f2,int flags)858ac73e8e6Smillert output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
859ae8d569bSderaadt {
86048b8c3e3Sderaadt 	int m, i0, i1, j0, j1;
86148b8c3e3Sderaadt 
8624ec4b3d5Smillert 	rewind(f1);
8634ec4b3d5Smillert 	rewind(f2);
864ae8d569bSderaadt 	m = len[0];
865ae8d569bSderaadt 	J[0] = 0;
866ae8d569bSderaadt 	J[m + 1] = len[1] + 1;
86757003866Sray 	if (diff_format != D_EDIT) {
86826da422aStedu 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
86926da422aStedu 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
87026da422aStedu 				i0++;
871ae8d569bSderaadt 			j0 = J[i0 - 1] + 1;
872ae8d569bSderaadt 			i1 = i0 - 1;
87326da422aStedu 			while (i1 < m && J[i1 + 1] == 0)
87426da422aStedu 				i1++;
875ae8d569bSderaadt 			j1 = J[i1 + 1] - 1;
876ae8d569bSderaadt 			J[i1] = j1;
87761783bcdSespie 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
87826da422aStedu 		}
87926da422aStedu 	} else {
88026da422aStedu 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
88126da422aStedu 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
88226da422aStedu 				i0--;
883ae8d569bSderaadt 			j0 = J[i0 + 1] - 1;
884ae8d569bSderaadt 			i1 = i0 + 1;
88526da422aStedu 			while (i1 > 1 && J[i1 - 1] == 0)
88626da422aStedu 				i1--;
887ae8d569bSderaadt 			j1 = J[i1 - 1] + 1;
888ae8d569bSderaadt 			J[i1] = j1;
88961783bcdSespie 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
890ae8d569bSderaadt 		}
89126da422aStedu 	}
892ae8d569bSderaadt 	if (m == 0)
89361783bcdSespie 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
89457003866Sray 	if (diff_format == D_IFDEF) {
895ae8d569bSderaadt 		for (;;) {
896ae8d569bSderaadt #define	c i0
897bb34d48bSmillert 			if ((c = getc(f1)) == EOF)
898ae8d569bSderaadt 				return;
8995e07282dSray 			diff_output("%c", c);
900ae8d569bSderaadt 		}
901ae8d569bSderaadt #undef c
902ae8d569bSderaadt 	}
9039de32c1bSmillert 	if (anychange != 0) {
90457003866Sray 		if (diff_format == D_CONTEXT)
905dba1d6eaSray 			dump_context_vec(f1, f2, flags);
90657003866Sray 		else if (diff_format == D_UNIFIED)
907dba1d6eaSray 			dump_unified_vec(f1, f2, flags);
9089de32c1bSmillert 	}
909ae8d569bSderaadt }
910ae8d569bSderaadt 
9114a034c3aSray static void
range(int a,int b,char * separator)912c72ea322Smillert range(int a, int b, char *separator)
913c72ea322Smillert {
9145e07282dSray 	diff_output("%d", a > b ? b : a);
915c72ea322Smillert 	if (a < b)
9165e07282dSray 		diff_output("%s%d", separator, b);
917c72ea322Smillert }
918c72ea322Smillert 
9194a034c3aSray static void
uni_range(int a,int b)920c72ea322Smillert uni_range(int a, int b)
921c72ea322Smillert {
922c72ea322Smillert 	if (a < b)
9235e07282dSray 		diff_output("%d,%d", a, b - a + 1);
924c72ea322Smillert 	else if (a == b)
9255e07282dSray 		diff_output("%d", b);
926c72ea322Smillert 	else
9275e07282dSray 		diff_output("%d,0", b);
928c72ea322Smillert }
929c72ea322Smillert 
930ccd55a2cSotto static char *
preadline(int fd,size_t rlen,off_t off)931dba1d6eaSray preadline(int fd, size_t rlen, off_t off)
932ccd55a2cSotto {
933ccd55a2cSotto 	char *line;
934ccd55a2cSotto 	ssize_t nr;
935ccd55a2cSotto 
936dba1d6eaSray 	line = xmalloc(rlen + 1);
9373aaa63ebSderaadt 	if ((nr = pread(fd, line, rlen, off)) == -1)
93860b54a0eSray 		err(2, "preadline");
9398d981b00Sotto 	if (nr > 0 && line[nr-1] == '\n')
9408d981b00Sotto 		nr--;
941ccd55a2cSotto 	line[nr] = '\0';
942ccd55a2cSotto 	return (line);
943ccd55a2cSotto }
944ccd55a2cSotto 
945ccd55a2cSotto static int
ignoreline(char * line)946ccd55a2cSotto ignoreline(char *line)
947ccd55a2cSotto {
948ccd55a2cSotto 	int ret;
949ccd55a2cSotto 
950ccd55a2cSotto 	ret = regexec(&ignore_re, line, 0, NULL, 0);
951a79a2617Stedu 	free(line);
952ccd55a2cSotto 	return (ret == 0);	/* if it matched, it should be ignored. */
953ccd55a2cSotto }
954ccd55a2cSotto 
955ae8d569bSderaadt /*
9564ec4b3d5Smillert  * Indicate that there is a difference between lines a and b of the from file
95726da422aStedu  * to get to lines c to d of the to file.  If a is greater then b then there
95826da422aStedu  * are no lines in the from file involved and this means that there were
95926da422aStedu  * lines appended (beginning at b).  If c is greater than d then there are
96026da422aStedu  * lines missing from the to file.
961ae8d569bSderaadt  */
96226da422aStedu static void
change(char * file1,FILE * f1,char * file2,FILE * f2,int a,int b,int c,int d,int * pflags)963ac73e8e6Smillert change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
96461783bcdSespie     int *pflags)
965ae8d569bSderaadt {
9660efcb26eSmillert 	static size_t max_context = 64;
967f8a6bf23Smillert 	int i;
9680efcb26eSmillert 
969f8a6bf23Smillert restart:
97057003866Sray 	if (diff_format != D_IFDEF && a > b && c > d)
971ae8d569bSderaadt 		return;
972ccd55a2cSotto 	if (ignore_pats != NULL) {
973ccd55a2cSotto 		char *line;
974ccd55a2cSotto 		/*
975ccd55a2cSotto 		 * All lines in the change, insert, or delete must
976ccd55a2cSotto 		 * match an ignore pattern for the change to be
977ccd55a2cSotto 		 * ignored.
978ccd55a2cSotto 		 */
979ccd55a2cSotto 		if (a <= b) {		/* Changes and deletes. */
980ccd55a2cSotto 			for (i = a; i <= b; i++) {
981ccd55a2cSotto 				line = preadline(fileno(f1),
982ccd55a2cSotto 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
983ccd55a2cSotto 				if (!ignoreline(line))
984ccd55a2cSotto 					goto proceed;
985ccd55a2cSotto 			}
986ccd55a2cSotto 		}
987ccd55a2cSotto 		if (a > b || c <= d) {	/* Changes and inserts. */
988ccd55a2cSotto 			for (i = c; i <= d; i++) {
989ccd55a2cSotto 				line = preadline(fileno(f2),
990ccd55a2cSotto 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
991ccd55a2cSotto 				if (!ignoreline(line))
992ccd55a2cSotto 					goto proceed;
993ccd55a2cSotto 			}
994ccd55a2cSotto 		}
995ccd55a2cSotto 		return;
996ccd55a2cSotto 	}
997ccd55a2cSotto proceed:
99861783bcdSespie 	if (*pflags & D_HEADER) {
9995e07282dSray 		diff_output("%s %s %s\n", diffargs, file1, file2);
100061783bcdSespie 		*pflags &= ~D_HEADER;
100161783bcdSespie 	}
100257003866Sray 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
10030efcb26eSmillert 		/*
10040efcb26eSmillert 		 * Allocate change records as needed.
10050efcb26eSmillert 		 */
10060efcb26eSmillert 		if (context_vec_ptr == context_vec_end - 1) {
10070efcb26eSmillert 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
10080efcb26eSmillert 			max_context <<= 1;
1009371275caSderaadt 			context_vec_start = xreallocarray(context_vec_start,
1010aa215433Sray 			    max_context, sizeof(*context_vec_start));
10110efcb26eSmillert 			context_vec_end = context_vec_start + max_context;
10120efcb26eSmillert 			context_vec_ptr = context_vec_start + offset;
10130efcb26eSmillert 		}
10140efcb26eSmillert 		if (anychange == 0) {
10150efcb26eSmillert 			/*
10160efcb26eSmillert 			 * Print the context/unidiff header first time through.
10170efcb26eSmillert 			 */
10187bdb251cSmillert 			print_header(file1, file2);
10190efcb26eSmillert 			anychange = 1;
102057003866Sray 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
102157003866Sray 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1022ae8d569bSderaadt 			/*
102357003866Sray 			 * If this change is more than 'diff_context' lines from the
10240efcb26eSmillert 			 * previous change, dump the record and reset it.
1025ae8d569bSderaadt 			 */
102657003866Sray 			if (diff_format == D_CONTEXT)
1027dba1d6eaSray 				dump_context_vec(f1, f2, *pflags);
10289de32c1bSmillert 			else
1029dba1d6eaSray 				dump_unified_vec(f1, f2, *pflags);
10309de32c1bSmillert 		}
1031ae8d569bSderaadt 		context_vec_ptr++;
1032ae8d569bSderaadt 		context_vec_ptr->a = a;
1033ae8d569bSderaadt 		context_vec_ptr->b = b;
1034ae8d569bSderaadt 		context_vec_ptr->c = c;
1035ae8d569bSderaadt 		context_vec_ptr->d = d;
1036ae8d569bSderaadt 		return;
1037ae8d569bSderaadt 	}
10380efcb26eSmillert 	if (anychange == 0)
10390efcb26eSmillert 		anychange = 1;
104057003866Sray 	switch (diff_format) {
10415f9fc8aaSmillert 	case D_BRIEF:
10425f9fc8aaSmillert 		return;
1043ae8d569bSderaadt 	case D_NORMAL:
1044ae8d569bSderaadt 	case D_EDIT:
1045ae8d569bSderaadt 		range(a, b, ",");
10465e07282dSray 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
104757003866Sray 		if (diff_format == D_NORMAL)
1048ae8d569bSderaadt 			range(c, d, ",");
10495e07282dSray 		diff_output("\n");
1050ae8d569bSderaadt 		break;
1051ae8d569bSderaadt 	case D_REVERSE:
10525e07282dSray 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1053ae8d569bSderaadt 		range(a, b, " ");
10545e07282dSray 		diff_output("\n");
1055ae8d569bSderaadt 		break;
1056ae8d569bSderaadt 	case D_NREVERSE:
1057ae8d569bSderaadt 		if (a > b)
10585e07282dSray 			diff_output("a%d %d\n", b, d - c + 1);
1059ae8d569bSderaadt 		else {
10605e07282dSray 			diff_output("d%d %d\n", a, b - a + 1);
1061ae8d569bSderaadt 			if (!(c > d))
1062ae8d569bSderaadt 				/* add changed lines */
10635e07282dSray 				diff_output("a%d %d\n", b, d - c + 1);
1064ae8d569bSderaadt 		}
1065ae8d569bSderaadt 		break;
1066ae8d569bSderaadt 	}
106757003866Sray 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1068dba1d6eaSray 		fetch(ixold, a, b, f1, '<', 1, *pflags);
106957003866Sray 		if (a <= b && c <= d && diff_format == D_NORMAL)
10705e07282dSray 			diff_output("---\n");
1071ae8d569bSderaadt 	}
107257003866Sray 	i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
107357003866Sray 	if (i != 0 && diff_format == D_EDIT) {
1074f8a6bf23Smillert 		/*
1075f8a6bf23Smillert 		 * A non-zero return value for D_EDIT indicates that the
1076f8a6bf23Smillert 		 * last line printed was a bare dot (".") that has been
1077f8a6bf23Smillert 		 * escaped as ".." to prevent ed(1) from misinterpreting
1078f8a6bf23Smillert 		 * it.  We have to add a substitute command to change this
1079f8a6bf23Smillert 		 * back and restart where we left off.
1080f8a6bf23Smillert 		 */
10815e07282dSray 		diff_output(".\n");
1082d1e24f31Snatano 		diff_output("%ds/.//\n", a + i - 1);
1083d1e24f31Snatano 		b = a + i - 1;
1084d1e24f31Snatano 		a = b + 1;
1085f8a6bf23Smillert 		c += i;
1086f8a6bf23Smillert 		goto restart;
1087f8a6bf23Smillert 	}
108857003866Sray 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
10895e07282dSray 		diff_output(".\n");
1090ae8d569bSderaadt 	if (inifdef) {
10915e07282dSray 		diff_output("#endif /* %s */\n", ifdefname);
1092ae8d569bSderaadt 		inifdef = 0;
1093ae8d569bSderaadt 	}
1094ae8d569bSderaadt }
1095ae8d569bSderaadt 
1096f8a6bf23Smillert static int
fetch(long * f,int a,int b,FILE * lb,int ch,int oldfile,int flags)1097dba1d6eaSray fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1098ae8d569bSderaadt {
1099f8a6bf23Smillert 	int i, j, c, lastc, col, nc;
1100ae8d569bSderaadt 
1101ae8d569bSderaadt 	/*
1102ae8d569bSderaadt 	 * When doing #ifdef's, copy down to current line
1103ae8d569bSderaadt 	 * if this is the first file, so that stuff makes it to output.
1104ae8d569bSderaadt 	 */
110557003866Sray 	if (diff_format == D_IFDEF && oldfile) {
1106ae8d569bSderaadt 		long curpos = ftell(lb);
1107ae8d569bSderaadt 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1108ae8d569bSderaadt 		nc = f[a > b ? b : a - 1] - curpos;
1109ae8d569bSderaadt 		for (i = 0; i < nc; i++)
11105e07282dSray 			diff_output("%c", getc(lb));
1111ae8d569bSderaadt 	}
1112ae8d569bSderaadt 	if (a > b)
1113f8a6bf23Smillert 		return (0);
111457003866Sray 	if (diff_format == D_IFDEF) {
111590f56ad8Smillert 		if (inifdef) {
11165e07282dSray 			diff_output("#else /* %s%s */\n",
111790f56ad8Smillert 			    oldfile == 1 ? "!" : "", ifdefname);
111826da422aStedu 		} else {
111990f56ad8Smillert 			if (oldfile)
11205e07282dSray 				diff_output("#ifndef %s\n", ifdefname);
112190f56ad8Smillert 			else
11225e07282dSray 				diff_output("#ifdef %s\n", ifdefname);
1123ae8d569bSderaadt 		}
1124ae8d569bSderaadt 		inifdef = 1 + oldfile;
1125ae8d569bSderaadt 	}
1126ae8d569bSderaadt 	for (i = a; i <= b; i++) {
112791cf91eeSderaadt 		fseek(lb, f[i - 1], SEEK_SET);
1128ae8d569bSderaadt 		nc = f[i] - f[i - 1];
112957003866Sray 		if (diff_format != D_IFDEF && ch != '\0') {
11305e07282dSray 			diff_output("%c", ch);
113157003866Sray 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
113257003866Sray 			    || diff_format == D_UNIFIED))
11335e07282dSray 				diff_output("\t");
113457003866Sray 			else if (diff_format != D_UNIFIED)
11355e07282dSray 				diff_output(" ");
11361f9aa9e0Smillert 		}
1137ae8d569bSderaadt 		col = 0;
1138f8a6bf23Smillert 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1139bb34d48bSmillert 			if ((c = getc(lb)) == EOF) {
114057003866Sray 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
114157003866Sray 				    diff_format == D_NREVERSE)
1142643dc60cSmillert 					warnx("No newline at end of file");
1143643dc60cSmillert 				else
11445e07282dSray 					diff_output("\n\\ No newline at end of "
11455e07282dSray 					    "file\n");
1146643dc60cSmillert 				return (0);
1147bb34d48bSmillert 			}
1148dba1d6eaSray 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1149bb34d48bSmillert 				do {
11505e07282dSray 					diff_output(" ");
1151bb34d48bSmillert 				} while (++col & 7);
1152bb34d48bSmillert 			} else {
115357003866Sray 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1154f8a6bf23Smillert 				    && lastc == '.') {
1155f8a6bf23Smillert 					/*
1156f8a6bf23Smillert 					 * Don't print a bare "." line
1157f8a6bf23Smillert 					 * since that will confuse ed(1).
1158f8a6bf23Smillert 					 * Print ".." instead and return,
1159f8a6bf23Smillert 					 * giving the caller an offset
1160f8a6bf23Smillert 					 * from which to restart.
1161f8a6bf23Smillert 					 */
11625e07282dSray 					diff_output(".\n");
1163f8a6bf23Smillert 					return (i - a + 1);
1164f8a6bf23Smillert 				}
11655e07282dSray 				diff_output("%c", c);
1166ae8d569bSderaadt 				col++;
1167ae8d569bSderaadt 			}
1168ae8d569bSderaadt 		}
1169ae8d569bSderaadt 	}
1170f8a6bf23Smillert 	return (0);
1171ae8d569bSderaadt }
1172ae8d569bSderaadt 
1173ae8d569bSderaadt /*
1174a8013e93Sotto  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1175ae8d569bSderaadt  */
117626da422aStedu static int
readhash(FILE * f,int flags)1177dba1d6eaSray readhash(FILE *f, int flags)
1178ae8d569bSderaadt {
1179a8013e93Sotto 	int i, t, space;
1180a8013e93Sotto 	int sum;
1181ae8d569bSderaadt 
1182ae8d569bSderaadt 	sum = 1;
1183ae8d569bSderaadt 	space = 0;
1184dba1d6eaSray 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1185dba1d6eaSray 		if (flags & D_IGNORECASE)
1186a8013e93Sotto 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1187bb34d48bSmillert 				if (t == EOF) {
1188a8013e93Sotto 					if (i == 0)
1189ae8d569bSderaadt 						return (0);
1190bb34d48bSmillert 					break;
1191bb34d48bSmillert 				}
1192a8013e93Sotto 				sum = sum * 127 + chrtran[t];
1193ae8d569bSderaadt 			}
1194ae8d569bSderaadt 		else
1195a8013e93Sotto 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1196bb34d48bSmillert 				if (t == EOF) {
1197a8013e93Sotto 					if (i == 0)
1198ae8d569bSderaadt 						return (0);
1199bb34d48bSmillert 					break;
1200bb34d48bSmillert 				}
1201a8013e93Sotto 				sum = sum * 127 + t;
1202ae8d569bSderaadt 			}
1203ae8d569bSderaadt 	} else {
1204a8013e93Sotto 		for (i = 0;;) {
1205ae8d569bSderaadt 			switch (t = getc(f)) {
1206ae8d569bSderaadt 			case '\t':
1207b5b605d5Sotto 			case '\r':
1208b5b605d5Sotto 			case '\v':
1209b5b605d5Sotto 			case '\f':
1210ae8d569bSderaadt 			case ' ':
1211ae8d569bSderaadt 				space++;
1212ae8d569bSderaadt 				continue;
1213ae8d569bSderaadt 			default:
1214dba1d6eaSray 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1215a8013e93Sotto 					i++;
1216ae8d569bSderaadt 					space = 0;
1217ae8d569bSderaadt 				}
1218a8013e93Sotto 				sum = sum * 127 + chrtran[t];
1219a8013e93Sotto 				i++;
1220ae8d569bSderaadt 				continue;
1221bb34d48bSmillert 			case EOF:
1222a8013e93Sotto 				if (i == 0)
1223bb34d48bSmillert 					return (0);
1224bb34d48bSmillert 				/* FALLTHROUGH */
1225ae8d569bSderaadt 			case '\n':
1226ae8d569bSderaadt 				break;
1227ae8d569bSderaadt 			}
1228ae8d569bSderaadt 			break;
1229ae8d569bSderaadt 		}
1230ae8d569bSderaadt 	}
1231a8013e93Sotto 	/*
1232a8013e93Sotto 	 * There is a remote possibility that we end up with a zero sum.
1233a8013e93Sotto 	 * Zero is used as an EOF marker, so return 1 instead.
1234a8013e93Sotto 	 */
1235a8013e93Sotto 	return (sum == 0 ? 1 : sum);
1236ae8d569bSderaadt }
1237ae8d569bSderaadt 
1238774cb253Savsm static int
asciifile(FILE * f)123926da422aStedu asciifile(FILE *f)
1240ae8d569bSderaadt {
1241063293f0Sotto 	unsigned char buf[BUFSIZ];
12420631431fSstsp 	size_t cnt;
1243ae8d569bSderaadt 
1244dba1d6eaSray 	if (f == NULL)
12452a1593dfStedu 		return (1);
12462a1593dfStedu 
12474ec4b3d5Smillert 	rewind(f);
12484ec4b3d5Smillert 	cnt = fread(buf, 1, sizeof(buf), f);
12490631431fSstsp 	return (memchr(buf, '\0', cnt) == NULL);
1250ae8d569bSderaadt }
1251ae8d569bSderaadt 
12525c68ba7eSespie #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
12535c68ba7eSespie 
125496e45528Sotto static char *
match_function(const long * f,int pos,FILE * fp)1255dba1d6eaSray match_function(const long *f, int pos, FILE *fp)
125696e45528Sotto {
1257063293f0Sotto 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
125896e45528Sotto 	size_t nc;
125996e45528Sotto 	int last = lastline;
12605c68ba7eSespie 	char *state = NULL;
126196e45528Sotto 
126296e45528Sotto 	lastline = pos;
126396e45528Sotto 	while (pos > last) {
1264dba1d6eaSray 		fseek(fp, f[pos - 1], SEEK_SET);
126596e45528Sotto 		nc = f[pos] - f[pos - 1];
126696e45528Sotto 		if (nc >= sizeof(buf))
126796e45528Sotto 			nc = sizeof(buf) - 1;
1268dba1d6eaSray 		nc = fread(buf, 1, nc, fp);
126996e45528Sotto 		if (nc > 0) {
127096e45528Sotto 			buf[nc] = '\0';
12714fd6ed32Sgilles 			buf[strcspn(buf, "\n")] = '\0';
127296e45528Sotto 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
12735c68ba7eSespie 				if (begins_with(buf, "private:")) {
12745c68ba7eSespie 					if (!state)
12755c68ba7eSespie 						state = " (private)";
12765c68ba7eSespie 				} else if (begins_with(buf, "protected:")) {
12775c68ba7eSespie 					if (!state)
12785c68ba7eSespie 						state = " (protected)";
12795c68ba7eSespie 				} else if (begins_with(buf, "public:")) {
12805c68ba7eSespie 					if (!state)
12815c68ba7eSespie 						state = " (public)";
12825c68ba7eSespie 				} else {
128396e45528Sotto 					strlcpy(lastbuf, buf, sizeof lastbuf);
12845c68ba7eSespie 					if (state)
12855c68ba7eSespie 						strlcat(lastbuf, state,
12865c68ba7eSespie 						    sizeof lastbuf);
128796e45528Sotto 					lastmatchline = pos;
128896e45528Sotto 					return lastbuf;
128996e45528Sotto 				}
129096e45528Sotto 			}
12915c68ba7eSespie 		}
129296e45528Sotto 		pos--;
129396e45528Sotto 	}
129496e45528Sotto 	return lastmatchline > 0 ? lastbuf : NULL;
129596e45528Sotto }
129696e45528Sotto 
1297ae8d569bSderaadt /* dump accumulated "context" diff changes */
129826da422aStedu static void
dump_context_vec(FILE * f1,FILE * f2,int flags)1299dba1d6eaSray dump_context_vec(FILE *f1, FILE *f2, int flags)
1300ae8d569bSderaadt {
130148b8c3e3Sderaadt 	struct context_vec *cvp = context_vec_start;
130248b8c3e3Sderaadt 	int lowa, upb, lowc, upd, do_output;
130326da422aStedu 	int a, b, c, d;
130496e45528Sotto 	char ch, *f;
1305ae8d569bSderaadt 
130690f56ad8Smillert 	if (context_vec_start > context_vec_ptr)
1307ae8d569bSderaadt 		return;
1308ae8d569bSderaadt 
130926da422aStedu 	b = d = 0;		/* gcc */
1310b9fc9a72Sderaadt 	lowa = MAXIMUM(1, cvp->a - diff_context);
1311b9fc9a72Sderaadt 	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1312b9fc9a72Sderaadt 	lowc = MAXIMUM(1, cvp->c - diff_context);
1313b9fc9a72Sderaadt 	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1314ae8d569bSderaadt 
13155e07282dSray 	diff_output("***************");
1316dba1d6eaSray 	if ((flags & D_PROTOTYPE)) {
131796e45528Sotto 		f = match_function(ixold, lowa-1, f1);
13185e07282dSray 		if (f != NULL)
13195e07282dSray 			diff_output(" %s", f);
132096e45528Sotto 	}
13215e07282dSray 	diff_output("\n*** ");
1322ae8d569bSderaadt 	range(lowa, upb, ",");
13235e07282dSray 	diff_output(" ****\n");
1324ae8d569bSderaadt 
1325ae8d569bSderaadt 	/*
13264ec4b3d5Smillert 	 * Output changes to the "old" file.  The first loop suppresses
1327ae8d569bSderaadt 	 * output if there were no changes to the "old" file (we'll see
1328ae8d569bSderaadt 	 * the "old" lines as context in the "new" list).
1329ae8d569bSderaadt 	 */
1330ae8d569bSderaadt 	do_output = 0;
1331ae8d569bSderaadt 	for (; cvp <= context_vec_ptr; cvp++)
1332ae8d569bSderaadt 		if (cvp->a <= cvp->b) {
1333ae8d569bSderaadt 			cvp = context_vec_start;
1334ae8d569bSderaadt 			do_output++;
1335ae8d569bSderaadt 			break;
1336ae8d569bSderaadt 		}
1337ae8d569bSderaadt 	if (do_output) {
1338ae8d569bSderaadt 		while (cvp <= context_vec_ptr) {
133926da422aStedu 			a = cvp->a;
134026da422aStedu 			b = cvp->b;
134126da422aStedu 			c = cvp->c;
134226da422aStedu 			d = cvp->d;
1343ae8d569bSderaadt 
1344ae8d569bSderaadt 			if (a <= b && c <= d)
1345ae8d569bSderaadt 				ch = 'c';
1346ae8d569bSderaadt 			else
1347ae8d569bSderaadt 				ch = (a <= b) ? 'd' : 'a';
1348ae8d569bSderaadt 
1349ae8d569bSderaadt 			if (ch == 'a')
1350dba1d6eaSray 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1351ae8d569bSderaadt 			else {
1352dba1d6eaSray 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
13534ec4b3d5Smillert 				fetch(ixold, a, b, f1,
1354dba1d6eaSray 				    ch == 'c' ? '!' : '-', 0, flags);
1355ae8d569bSderaadt 			}
1356ae8d569bSderaadt 			lowa = b + 1;
1357ae8d569bSderaadt 			cvp++;
1358ae8d569bSderaadt 		}
1359dba1d6eaSray 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1360ae8d569bSderaadt 	}
1361ae8d569bSderaadt 	/* output changes to the "new" file */
13625e07282dSray 	diff_output("--- ");
1363ae8d569bSderaadt 	range(lowc, upd, ",");
13645e07282dSray 	diff_output(" ----\n");
1365ae8d569bSderaadt 
1366ae8d569bSderaadt 	do_output = 0;
1367ae8d569bSderaadt 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1368ae8d569bSderaadt 		if (cvp->c <= cvp->d) {
1369ae8d569bSderaadt 			cvp = context_vec_start;
1370ae8d569bSderaadt 			do_output++;
1371ae8d569bSderaadt 			break;
1372ae8d569bSderaadt 		}
1373ae8d569bSderaadt 	if (do_output) {
1374ae8d569bSderaadt 		while (cvp <= context_vec_ptr) {
137526da422aStedu 			a = cvp->a;
137626da422aStedu 			b = cvp->b;
137726da422aStedu 			c = cvp->c;
137826da422aStedu 			d = cvp->d;
1379ae8d569bSderaadt 
1380ae8d569bSderaadt 			if (a <= b && c <= d)
1381ae8d569bSderaadt 				ch = 'c';
1382ae8d569bSderaadt 			else
1383ae8d569bSderaadt 				ch = (a <= b) ? 'd' : 'a';
1384ae8d569bSderaadt 
1385ae8d569bSderaadt 			if (ch == 'd')
1386dba1d6eaSray 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1387ae8d569bSderaadt 			else {
1388dba1d6eaSray 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
13894ec4b3d5Smillert 				fetch(ixnew, c, d, f2,
1390dba1d6eaSray 				    ch == 'c' ? '!' : '+', 0, flags);
1391ae8d569bSderaadt 			}
1392ae8d569bSderaadt 			lowc = d + 1;
1393ae8d569bSderaadt 			cvp++;
1394ae8d569bSderaadt 		}
1395dba1d6eaSray 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1396ae8d569bSderaadt 	}
1397ae8d569bSderaadt 	context_vec_ptr = context_vec_start - 1;
1398ae8d569bSderaadt }
13999de32c1bSmillert 
14009de32c1bSmillert /* dump accumulated "unified" diff changes */
14019de32c1bSmillert static void
dump_unified_vec(FILE * f1,FILE * f2,int flags)1402dba1d6eaSray dump_unified_vec(FILE *f1, FILE *f2, int flags)
14039de32c1bSmillert {
14049de32c1bSmillert 	struct context_vec *cvp = context_vec_start;
14059de32c1bSmillert 	int lowa, upb, lowc, upd;
14069de32c1bSmillert 	int a, b, c, d;
140796e45528Sotto 	char ch, *f;
14089de32c1bSmillert 
140990f56ad8Smillert 	if (context_vec_start > context_vec_ptr)
14109de32c1bSmillert 		return;
14119de32c1bSmillert 
14129de32c1bSmillert 	b = d = 0;		/* gcc */
1413b9fc9a72Sderaadt 	lowa = MAXIMUM(1, cvp->a - diff_context);
1414b9fc9a72Sderaadt 	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1415b9fc9a72Sderaadt 	lowc = MAXIMUM(1, cvp->c - diff_context);
1416b9fc9a72Sderaadt 	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
14179de32c1bSmillert 
14185e07282dSray 	diff_output("@@ -");
1419c72ea322Smillert 	uni_range(lowa, upb);
14205e07282dSray 	diff_output(" +");
1421c72ea322Smillert 	uni_range(lowc, upd);
14225e07282dSray 	diff_output(" @@");
1423dba1d6eaSray 	if ((flags & D_PROTOTYPE)) {
142496e45528Sotto 		f = match_function(ixold, lowa-1, f1);
14255e07282dSray 		if (f != NULL)
14265e07282dSray 			diff_output(" %s", f);
142796e45528Sotto 	}
14285e07282dSray 	diff_output("\n");
14299de32c1bSmillert 
14309de32c1bSmillert 	/*
14319de32c1bSmillert 	 * Output changes in "unified" diff format--the old and new lines
14329de32c1bSmillert 	 * are printed together.
14339de32c1bSmillert 	 */
14349de32c1bSmillert 	for (; cvp <= context_vec_ptr; cvp++) {
14359de32c1bSmillert 		a = cvp->a;
14369de32c1bSmillert 		b = cvp->b;
14379de32c1bSmillert 		c = cvp->c;
14389de32c1bSmillert 		d = cvp->d;
14399de32c1bSmillert 
14409de32c1bSmillert 		/*
14419de32c1bSmillert 		 * c: both new and old changes
14429de32c1bSmillert 		 * d: only changes in the old file
14439de32c1bSmillert 		 * a: only changes in the new file
14449de32c1bSmillert 		 */
14459de32c1bSmillert 		if (a <= b && c <= d)
14469de32c1bSmillert 			ch = 'c';
14479de32c1bSmillert 		else
14489de32c1bSmillert 			ch = (a <= b) ? 'd' : 'a';
14499de32c1bSmillert 
14509de32c1bSmillert 		switch (ch) {
14519de32c1bSmillert 		case 'c':
1452dba1d6eaSray 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1453dba1d6eaSray 			fetch(ixold, a, b, f1, '-', 0, flags);
1454dba1d6eaSray 			fetch(ixnew, c, d, f2, '+', 0, flags);
14559de32c1bSmillert 			break;
14569de32c1bSmillert 		case 'd':
1457dba1d6eaSray 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1458dba1d6eaSray 			fetch(ixold, a, b, f1, '-', 0, flags);
14599de32c1bSmillert 			break;
14609de32c1bSmillert 		case 'a':
1461dba1d6eaSray 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1462dba1d6eaSray 			fetch(ixnew, c, d, f2, '+', 0, flags);
14639de32c1bSmillert 			break;
14649de32c1bSmillert 		}
14659de32c1bSmillert 		lowa = b + 1;
14669de32c1bSmillert 		lowc = d + 1;
14679de32c1bSmillert 	}
1468dba1d6eaSray 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
14699de32c1bSmillert 
14709de32c1bSmillert 	context_vec_ptr = context_vec_start - 1;
14719de32c1bSmillert }
14727bdb251cSmillert 
14737bdb251cSmillert static void
print_header(const char * file1,const char * file2)14747bdb251cSmillert print_header(const char *file1, const char *file2)
14757bdb251cSmillert {
14767bdb251cSmillert 	if (label[0] != NULL)
14775e07282dSray 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
14787bdb251cSmillert 		    label[0]);
14797bdb251cSmillert 	else
14805e07282dSray 		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
14817bdb251cSmillert 		    file1, ctime(&stb1.st_mtime));
14827bdb251cSmillert 	if (label[1] != NULL)
14835e07282dSray 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
14847bdb251cSmillert 		    label[1]);
14857bdb251cSmillert 	else
14865e07282dSray 		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
14877bdb251cSmillert 		    file2, ctime(&stb2.st_mtime));
14887bdb251cSmillert }
1489