xref: /openbsd-src/usr.bin/diff/diffreg.c (revision dba1d6ea0e7adc7407ba8b634ca533c869704ccd)
1*dba1d6eaSray /*	$OpenBSD: diffreg.c,v 1.71 2009/06/06 15:00:27 ray Exp $	*/
2d0c3f575Sderaadt 
3d0c3f575Sderaadt /*
4d0c3f575Sderaadt  * Copyright (C) Caldera International Inc.  2001-2002.
5d0c3f575Sderaadt  * All rights reserved.
6d0c3f575Sderaadt  *
7d0c3f575Sderaadt  * Redistribution and use in source and binary forms, with or without
8d0c3f575Sderaadt  * modification, are permitted provided that the following conditions
9d0c3f575Sderaadt  * are met:
10d0c3f575Sderaadt  * 1. Redistributions of source code and documentation must retain the above
11d0c3f575Sderaadt  *    copyright notice, this list of conditions and the following disclaimer.
12d0c3f575Sderaadt  * 2. Redistributions in binary form must reproduce the above copyright
13d0c3f575Sderaadt  *    notice, this list of conditions and the following disclaimer in the
14d0c3f575Sderaadt  *    documentation and/or other materials provided with the distribution.
15d0c3f575Sderaadt  * 3. All advertising materials mentioning features or use of this software
16d0c3f575Sderaadt  *    must display the following acknowledgement:
17d0c3f575Sderaadt  *	This product includes software developed or owned by Caldera
18d0c3f575Sderaadt  *	International, Inc.
19d0c3f575Sderaadt  * 4. Neither the name of Caldera International, Inc. nor the names of other
20d0c3f575Sderaadt  *    contributors may be used to endorse or promote products derived from
21d0c3f575Sderaadt  *    this software without specific prior written permission.
22d0c3f575Sderaadt  *
23d0c3f575Sderaadt  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24d0c3f575Sderaadt  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25d0c3f575Sderaadt  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26d0c3f575Sderaadt  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27d0c3f575Sderaadt  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28d0c3f575Sderaadt  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29d0c3f575Sderaadt  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30d0c3f575Sderaadt  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31d0c3f575Sderaadt  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32d0c3f575Sderaadt  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33d0c3f575Sderaadt  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34d0c3f575Sderaadt  * POSSIBILITY OF SUCH DAMAGE.
35d0c3f575Sderaadt  */
364ec4b3d5Smillert /*-
374ec4b3d5Smillert  * Copyright (c) 1991, 1993
384ec4b3d5Smillert  *	The Regents of the University of California.  All rights reserved.
394ec4b3d5Smillert  *
404ec4b3d5Smillert  * Redistribution and use in source and binary forms, with or without
414ec4b3d5Smillert  * modification, are permitted provided that the following conditions
424ec4b3d5Smillert  * are met:
434ec4b3d5Smillert  * 1. Redistributions of source code must retain the above copyright
444ec4b3d5Smillert  *    notice, this list of conditions and the following disclaimer.
454ec4b3d5Smillert  * 2. Redistributions in binary form must reproduce the above copyright
464ec4b3d5Smillert  *    notice, this list of conditions and the following disclaimer in the
474ec4b3d5Smillert  *    documentation and/or other materials provided with the distribution.
484ec4b3d5Smillert  * 3. Neither the name of the University nor the names of its contributors
494ec4b3d5Smillert  *    may be used to endorse or promote products derived from this software
504ec4b3d5Smillert  *    without specific prior written permission.
514ec4b3d5Smillert  *
524ec4b3d5Smillert  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
534ec4b3d5Smillert  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
544ec4b3d5Smillert  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
554ec4b3d5Smillert  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
564ec4b3d5Smillert  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
574ec4b3d5Smillert  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
584ec4b3d5Smillert  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
594ec4b3d5Smillert  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
604ec4b3d5Smillert  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
614ec4b3d5Smillert  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
624ec4b3d5Smillert  * SUCH DAMAGE.
634ec4b3d5Smillert  *
644ec4b3d5Smillert  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
654ec4b3d5Smillert  */
664ec4b3d5Smillert 
674ec4b3d5Smillert #ifndef lint
68*dba1d6eaSray static const char rcsid[] = "$OpenBSD: diffreg.c,v 1.71 2009/06/06 15:00:27 ray Exp $";
694ec4b3d5Smillert #endif /* not lint */
70d0c3f575Sderaadt 
717b6ec9e4Smillert #include <sys/param.h>
724ec4b3d5Smillert #include <sys/stat.h>
73b4bca33fSmillert #include <sys/wait.h>
7426da422aStedu 
754ec4b3d5Smillert #include <ctype.h>
764ec4b3d5Smillert #include <err.h>
777b6ec9e4Smillert #include <errno.h>
7826da422aStedu #include <fcntl.h>
790efcb26eSmillert #include <stddef.h>
804ec4b3d5Smillert #include <stdio.h>
814ec4b3d5Smillert #include <stdlib.h>
8226da422aStedu #include <string.h>
834ec4b3d5Smillert #include <unistd.h>
84ae8d569bSderaadt 
85ae8d569bSderaadt #include "diff.h"
86b4bca33fSmillert #include "pathnames.h"
874a034c3aSray #include "xmalloc.h"
8826da422aStedu 
89ae8d569bSderaadt /*
90ae8d569bSderaadt  * diff - compare two files.
91ae8d569bSderaadt  */
92ae8d569bSderaadt 
93ae8d569bSderaadt /*
94ae8d569bSderaadt  *	Uses an algorithm due to Harold Stone, which finds
95ae8d569bSderaadt  *	a pair of longest identical subsequences in the two
96ae8d569bSderaadt  *	files.
97ae8d569bSderaadt  *
98ae8d569bSderaadt  *	The major goal is to generate the match vector J.
99ae8d569bSderaadt  *	J[i] is the index of the line in file1 corresponding
100ae8d569bSderaadt  *	to line i file0. J[i] = 0 if there is no
101ae8d569bSderaadt  *	such line in file1.
102ae8d569bSderaadt  *
103ae8d569bSderaadt  *	Lines are hashed so as to work in core. All potential
104ae8d569bSderaadt  *	matches are located by sorting the lines of each file
105ae8d569bSderaadt  *	on the hash (called ``value''). In particular, this
106ae8d569bSderaadt  *	collects the equivalence classes in file1 together.
107ae8d569bSderaadt  *	Subroutine equiv replaces the value of each line in
108ae8d569bSderaadt  *	file0 by the index of the first element of its
109ae8d569bSderaadt  *	matching equivalence in (the reordered) file1.
110ae8d569bSderaadt  *	To save space equiv squeezes file1 into a single
111ae8d569bSderaadt  *	array member in which the equivalence classes
112ae8d569bSderaadt  *	are simply concatenated, except that their first
113ae8d569bSderaadt  *	members are flagged by changing sign.
114ae8d569bSderaadt  *
115ae8d569bSderaadt  *	Next the indices that point into member are unsorted into
116ae8d569bSderaadt  *	array class according to the original order of file0.
117ae8d569bSderaadt  *
118ae8d569bSderaadt  *	The cleverness lies in routine stone. This marches
119ae8d569bSderaadt  *	through the lines of file0, developing a vector klist
120ae8d569bSderaadt  *	of "k-candidates". At step i a k-candidate is a matched
121ae8d569bSderaadt  *	pair of lines x,y (x in file0 y in file1) such that
122ae8d569bSderaadt  *	there is a common subsequence of length k
123ae8d569bSderaadt  *	between the first i lines of file0 and the first y
124ae8d569bSderaadt  *	lines of file1, but there is no such subsequence for
125ae8d569bSderaadt  *	any smaller y. x is the earliest possible mate to y
126ae8d569bSderaadt  *	that occurs in such a subsequence.
127ae8d569bSderaadt  *
128ae8d569bSderaadt  *	Whenever any of the members of the equivalence class of
129ae8d569bSderaadt  *	lines in file1 matable to a line in file0 has serial number
130ae8d569bSderaadt  *	less than the y of some k-candidate, that k-candidate
131ae8d569bSderaadt  *	with the smallest such y is replaced. The new
132ae8d569bSderaadt  *	k-candidate is chained (via pred) to the current
133ae8d569bSderaadt  *	k-1 candidate so that the actual subsequence can
134ae8d569bSderaadt  *	be recovered. When a member has serial number greater
135ae8d569bSderaadt  *	that the y of all k-candidates, the klist is extended.
136ae8d569bSderaadt  *	At the end, the longest subsequence is pulled out
137ae8d569bSderaadt  *	and placed in the array J by unravel
138ae8d569bSderaadt  *
139ae8d569bSderaadt  *	With J in hand, the matches there recorded are
140ae8d569bSderaadt  *	check'ed against reality to assure that no spurious
141ae8d569bSderaadt  *	matches have crept in due to hashing. If they have,
142ae8d569bSderaadt  *	they are broken, and "jackpot" is recorded--a harmless
143ae8d569bSderaadt  *	matter except that a true match for a spuriously
144ae8d569bSderaadt  *	mated line may now be unnecessarily reported as a change.
145ae8d569bSderaadt  *
146ae8d569bSderaadt  *	Much of the complexity of the program comes simply
147ae8d569bSderaadt  *	from trying to minimize core utilization and
148ae8d569bSderaadt  *	maximize the range of doable problems by dynamically
149ae8d569bSderaadt  *	allocating what is needed and reusing what is not.
150ae8d569bSderaadt  *	The core requirements for problems larger than somewhat
151ae8d569bSderaadt  *	are (in words) 2*length(file0) + length(file1) +
152ae8d569bSderaadt  *	3*(number of k-candidates installed),  typically about
153ae8d569bSderaadt  *	6n words for files of length n.
154ae8d569bSderaadt  */
155ae8d569bSderaadt 
156ae8d569bSderaadt struct cand {
157ae8d569bSderaadt 	int	x;
158ae8d569bSderaadt 	int	y;
159ae8d569bSderaadt 	int	pred;
16060b9d8fdSderaadt };
16126da422aStedu 
162ae8d569bSderaadt struct line {
163ae8d569bSderaadt 	int	serial;
164ae8d569bSderaadt 	int	value;
16592af4c2dStedu } *file[2];
16626da422aStedu 
1670efcb26eSmillert /*
1680efcb26eSmillert  * The following struct is used to record change information when
1690efcb26eSmillert  * doing a "context" or "unified" diff.  (see routine "change" to
1700efcb26eSmillert  * understand the highly mnemonic field names)
1710efcb26eSmillert  */
1720efcb26eSmillert struct context_vec {
1730efcb26eSmillert 	int	a;		/* start line in old file */
1740efcb26eSmillert 	int	b;		/* end line in old file */
1750efcb26eSmillert 	int	c;		/* start line in new file */
1760efcb26eSmillert 	int	d;		/* end line in new file */
1770efcb26eSmillert };
1780efcb26eSmillert 
1797b6ec9e4Smillert static FILE	*opentemp(const char *);
180ac73e8e6Smillert static void	 output(char *, FILE *, char *, FILE *, int);
181*dba1d6eaSray static void	 check(char *, FILE *, char *, FILE *, int);
18226da422aStedu static void	 range(int, int, char *);
183c72ea322Smillert static void	 uni_range(int, int);
184*dba1d6eaSray static void	 dump_context_vec(FILE *, FILE *, int);
185*dba1d6eaSray static void	 dump_unified_vec(FILE *, FILE *, int);
186*dba1d6eaSray static void	 prepare(int, FILE *, off_t, int);
18726da422aStedu static void	 prune(void);
18826da422aStedu static void	 equiv(struct line *, int, struct line *, int, int *);
18926da422aStedu static void	 unravel(int);
19026da422aStedu static void	 unsort(struct line *, int, int *);
19161783bcdSespie static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
19226da422aStedu static void	 sort(struct line *, int);
1937bdb251cSmillert static void	 print_header(const char *, const char *);
194ccd55a2cSotto static int	 ignoreline(char *);
1954ec4b3d5Smillert static int	 asciifile(FILE *);
196*dba1d6eaSray static int	 fetch(long *, int, int, FILE *, int, int, int);
19726da422aStedu static int	 newcand(int, int, int);
19826da422aStedu static int	 search(int *, int, int);
1994ec4b3d5Smillert static int	 skipline(FILE *);
2006e18f850Sotto static int	 isqrt(int);
201*dba1d6eaSray static int	 stone(int *, int, int *, int *, int);
202*dba1d6eaSray static int	 readhash(FILE *, int);
2034ec4b3d5Smillert static int	 files_differ(FILE *, FILE *, int);
20496e45528Sotto static char	*match_function(const long *, int, FILE *);
205ccd55a2cSotto static char	*preadline(int, size_t, off_t);
2066e18f850Sotto 
207aa215433Sray static int  *J;			/* will be overlaid on class */
208aa215433Sray static int  *class;		/* will be overlaid on file[0] */
209aa215433Sray static int  *klist;		/* will be overlaid on file[0] after class */
210aa215433Sray static int  *member;		/* will be overlaid on file[1] */
211aa215433Sray static int   clen;
212aa215433Sray static int   inifdef;		/* whether or not we are in a #ifdef block */
213aa215433Sray static int   len[2];
214aa215433Sray static int   pref, suff;	/* length of prefix and suffix */
215aa215433Sray static int   slen[2];
216aa215433Sray static int   anychange;
217aa215433Sray static long *ixnew;		/* will be overlaid on file[1] */
218aa215433Sray static long *ixold;		/* will be overlaid on klist */
219aa215433Sray static struct cand *clist;	/* merely a free storage pot for candidates */
220aa215433Sray static int   clistlen;		/* the length of clist */
221aa215433Sray static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
222aa215433Sray static u_char *chrtran;		/* translation table for case-folding */
223aa215433Sray static struct context_vec *context_vec_start;
224aa215433Sray static struct context_vec *context_vec_end;
225aa215433Sray static struct context_vec *context_vec_ptr;
226aa215433Sray 
227aa215433Sray #define FUNCTION_CONTEXT_SIZE	55
228aa215433Sray static char lastbuf[FUNCTION_CONTEXT_SIZE];
229aa215433Sray static int lastline;
230aa215433Sray static int lastmatchline;
231aa215433Sray 
23226da422aStedu 
23326da422aStedu /*
23426da422aStedu  * chrtran points to one of 2 translation tables: cup2low if folding upper to
23526da422aStedu  * lower case clow2low if not folding case
236ae8d569bSderaadt  */
2378a15d8deSderaadt u_char clow2low[256] = {
23826da422aStedu 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
23926da422aStedu 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
24026da422aStedu 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
24126da422aStedu 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
24226da422aStedu 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
24326da422aStedu 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
24426da422aStedu 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
24526da422aStedu 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
24626da422aStedu 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
24726da422aStedu 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
24826da422aStedu 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
24926da422aStedu 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
25026da422aStedu 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
25126da422aStedu 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
25226da422aStedu 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
25326da422aStedu 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
25426da422aStedu 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
25526da422aStedu 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
25626da422aStedu 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
25726da422aStedu 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
25826da422aStedu 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
25926da422aStedu 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
26026da422aStedu 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
26126da422aStedu 	0xfd, 0xfe, 0xff
262ae8d569bSderaadt };
263ae8d569bSderaadt 
2648a15d8deSderaadt u_char cup2low[256] = {
26526da422aStedu 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
26626da422aStedu 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
26726da422aStedu 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
26826da422aStedu 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
26926da422aStedu 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
27026da422aStedu 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
27126da422aStedu 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
27226da422aStedu 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
27326da422aStedu 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
27426da422aStedu 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
27526da422aStedu 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
27626da422aStedu 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
27726da422aStedu 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
27826da422aStedu 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
27926da422aStedu 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
28026da422aStedu 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
28126da422aStedu 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
28226da422aStedu 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
28326da422aStedu 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
28426da422aStedu 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
28526da422aStedu 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
28626da422aStedu 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
28726da422aStedu 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
28826da422aStedu 	0xfd, 0xfe, 0xff
289ae8d569bSderaadt };
290ae8d569bSderaadt 
291b4bca33fSmillert int
2924ec4b3d5Smillert diffreg(char *ofile1, char *ofile2, int flags)
293ae8d569bSderaadt {
2944ec4b3d5Smillert 	char *file1 = ofile1;
2954ec4b3d5Smillert 	char *file2 = ofile2;
296*dba1d6eaSray 	FILE *f1, *f2;
297*dba1d6eaSray 	int i, rval, ostdout = -1;
298b4bca33fSmillert 	pid_t pid = -1;
299ae8d569bSderaadt 
300*dba1d6eaSray 	f1 = f2 = NULL;
301*dba1d6eaSray 	rval = D_SAME;
3024ec4b3d5Smillert 	anychange = 0;
30396e45528Sotto 	lastline = 0;
30496e45528Sotto 	lastmatchline = 0;
3050efcb26eSmillert 	context_vec_ptr = context_vec_start - 1;
306*dba1d6eaSray 	if (flags & D_IGNORECASE)
307*dba1d6eaSray 		chrtran = cup2low;
308*dba1d6eaSray 	else
309*dba1d6eaSray 		chrtran = clow2low;
3107b6ec9e4Smillert 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
311fed3a06dSmillert 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
31266e5764eSmillert 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
313f28259e9Smillert 		goto closem;
3144ec4b3d5Smillert 
3154ec4b3d5Smillert 	if (flags & D_EMPTY1)
3164ec4b3d5Smillert 		f1 = fopen(_PATH_DEVNULL, "r");
3174ec4b3d5Smillert 	else {
3187b6ec9e4Smillert 		if (!S_ISREG(stb1.st_mode)) {
3197b6ec9e4Smillert 			if ((f1 = opentemp(file1)) == NULL ||
3207b6ec9e4Smillert 			    fstat(fileno(f1), &stb1) < 0) {
3214ec4b3d5Smillert 				warn("%s", file1);
3224ec4b3d5Smillert 				status |= 2;
3234ec4b3d5Smillert 				goto closem;
32448b947b7Smillert 			}
3257b6ec9e4Smillert 		} else if (strcmp(file1, "-") == 0)
326b1a26502Smillert 			f1 = stdin;
327b1a26502Smillert 		else
3284ec4b3d5Smillert 			f1 = fopen(file1, "r");
3294ec4b3d5Smillert 	}
3304ec4b3d5Smillert 	if (f1 == NULL) {
3314ec4b3d5Smillert 		warn("%s", file1);
3324ec4b3d5Smillert 		status |= 2;
3334ec4b3d5Smillert 		goto closem;
3344ec4b3d5Smillert 	}
3354ec4b3d5Smillert 
3364ec4b3d5Smillert 	if (flags & D_EMPTY2)
3374ec4b3d5Smillert 		f2 = fopen(_PATH_DEVNULL, "r");
3384ec4b3d5Smillert 	else {
3397b6ec9e4Smillert 		if (!S_ISREG(stb2.st_mode)) {
3407b6ec9e4Smillert 			if ((f2 = opentemp(file2)) == NULL ||
3417b6ec9e4Smillert 			    fstat(fileno(f2), &stb2) < 0) {
3424ec4b3d5Smillert 				warn("%s", file2);
3434ec4b3d5Smillert 				status |= 2;
3444ec4b3d5Smillert 				goto closem;
3454ec4b3d5Smillert 			}
3467b6ec9e4Smillert 		} else if (strcmp(file2, "-") == 0)
347b1a26502Smillert 			f2 = stdin;
348b1a26502Smillert 		else
3494ec4b3d5Smillert 			f2 = fopen(file2, "r");
3504ec4b3d5Smillert 	}
3514ec4b3d5Smillert 	if (f2 == NULL) {
3524ec4b3d5Smillert 		warn("%s", file2);
3534ec4b3d5Smillert 		status |= 2;
3544ec4b3d5Smillert 		goto closem;
3554ec4b3d5Smillert 	}
3564ec4b3d5Smillert 
3574ec4b3d5Smillert 	switch (files_differ(f1, f2, flags)) {
3584ec4b3d5Smillert 	case 0:
359b4bca33fSmillert 		goto closem;
3604ec4b3d5Smillert 	case 1:
3614ec4b3d5Smillert 		break;
3624ec4b3d5Smillert 	default:
3634ec4b3d5Smillert 		/* error */
3644ec4b3d5Smillert 		status |= 2;
3654ec4b3d5Smillert 		goto closem;
366ae8d569bSderaadt 	}
3674ec4b3d5Smillert 
368*dba1d6eaSray 	if ((flags & D_FORCEASCII) == 0 &&
369*dba1d6eaSray 	    (!asciifile(f1) || !asciifile(f2))) {
370b4bca33fSmillert 		rval = D_BINARY;
3715f9fc8aaSmillert 		status |= 1;
372cab5d83cSmillert 		goto closem;
373cab5d83cSmillert 	}
374b4bca33fSmillert 	if (lflag) {
375b4bca33fSmillert 		/* redirect stdout to pr */
376b4bca33fSmillert 		int pfd[2];
377b4bca33fSmillert 		char *header;
378b4bca33fSmillert 		char *prargv[] = { "pr", "-h", NULL, "-f", NULL };
379b4bca33fSmillert 
3804a034c3aSray 		xasprintf(&header, "%s %s %s", diffargs, file1, file2);
381b4bca33fSmillert 		prargv[2] = header;
382b4bca33fSmillert 		fflush(stdout);
383b4bca33fSmillert 		rewind(stdout);
384b4bca33fSmillert 		pipe(pfd);
385b4bca33fSmillert 		switch ((pid = fork())) {
386b4bca33fSmillert 		case -1:
387b4bca33fSmillert 			warnx("No more processes");
388b4bca33fSmillert 			status |= 2;
3894a034c3aSray 			xfree(header);
390b4bca33fSmillert 			return (D_ERROR);
391b4bca33fSmillert 		case 0:
392b4bca33fSmillert 			/* child */
393b4bca33fSmillert 			if (pfd[0] != STDIN_FILENO) {
394b4bca33fSmillert 				dup2(pfd[0], STDIN_FILENO);
395b4bca33fSmillert 				close(pfd[0]);
396b4bca33fSmillert 			}
397b4bca33fSmillert 			close(pfd[1]);
398b4bca33fSmillert 			execv(_PATH_PR, prargv);
399b4bca33fSmillert 			_exit(127);
400b4bca33fSmillert 		default:
401b4bca33fSmillert 			/* parent */
402b4bca33fSmillert 			if (pfd[1] != STDOUT_FILENO) {
403b4bca33fSmillert 				ostdout = dup(STDOUT_FILENO);
404b4bca33fSmillert 				dup2(pfd[1], STDOUT_FILENO);
405b4bca33fSmillert 				close(pfd[1]);
406b4bca33fSmillert 			}
407b4bca33fSmillert 			close(pfd[0]);
408b4bca33fSmillert 			rewind(stdout);
4094a034c3aSray 			xfree(header);
410b4bca33fSmillert 		}
411ac73e8e6Smillert 	}
412*dba1d6eaSray 	prepare(0, f1, stb1.st_size, flags);
413*dba1d6eaSray 	prepare(1, f2, stb2.st_size, flags);
414ae8d569bSderaadt 	prune();
415ae8d569bSderaadt 	sort(sfile[0], slen[0]);
416ae8d569bSderaadt 	sort(sfile[1], slen[1]);
417ae8d569bSderaadt 
418ae8d569bSderaadt 	member = (int *)file[1];
419ae8d569bSderaadt 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
420aa215433Sray 	member = xrealloc(member, slen[1] + 2, sizeof(*member));
421ae8d569bSderaadt 
422ae8d569bSderaadt 	class = (int *)file[0];
423ae8d569bSderaadt 	unsort(sfile[0], slen[0], class);
424aa215433Sray 	class = xrealloc(class, slen[0] + 2, sizeof(*class));
425ae8d569bSderaadt 
426aa215433Sray 	klist = xmalloc((slen[0] + 2) * sizeof(*klist));
427058e94f4Smillert 	clen = 0;
4286e18f850Sotto 	clistlen = 100;
429aa215433Sray 	clist = xmalloc(clistlen * sizeof(*clist));
430*dba1d6eaSray 	i = stone(class, slen[0], member, klist, flags);
4314a034c3aSray 	xfree(member);
4324a034c3aSray 	xfree(class);
433ae8d569bSderaadt 
434aa215433Sray 	J = xrealloc(J, len[0] + 2, sizeof(*J));
435ae8d569bSderaadt 	unravel(klist[i]);
4364a034c3aSray 	xfree(clist);
4374a034c3aSray 	xfree(klist);
438ae8d569bSderaadt 
439aa215433Sray 	ixold = xrealloc(ixold, len[0] + 2, sizeof(*ixold));
440aa215433Sray 	ixnew = xrealloc(ixnew, len[1] + 2, sizeof(*ixnew));
441*dba1d6eaSray 	check(file1, f1, file2, f2, flags);
442*dba1d6eaSray 	output(file1, f1, file2, f2, flags);
443b4bca33fSmillert 	if (ostdout != -1) {
444b4bca33fSmillert 		int wstatus;
4454ec4b3d5Smillert 
446b4bca33fSmillert 		/* close the pipe to pr and restore stdout */
447b4bca33fSmillert 		fflush(stdout);
448b4bca33fSmillert 		rewind(stdout);
449b4bca33fSmillert 		if (ostdout != STDOUT_FILENO) {
450b4bca33fSmillert 			close(STDOUT_FILENO);
451b4bca33fSmillert 			dup2(ostdout, STDOUT_FILENO);
452b4bca33fSmillert 			close(ostdout);
453b4bca33fSmillert 		}
454b4bca33fSmillert 		waitpid(pid, &wstatus, 0);
45552e5bd6bSmillert 	}
4564ec4b3d5Smillert closem:
4575afc3be2Smillert 	if (anychange) {
4585afc3be2Smillert 		status |= 1;
4595afc3be2Smillert 		if (rval == D_SAME)
4605afc3be2Smillert 			rval = D_DIFFER;
4615afc3be2Smillert 	}
4624ec4b3d5Smillert 	if (f1 != NULL)
4634ec4b3d5Smillert 		fclose(f1);
4644ec4b3d5Smillert 	if (f2 != NULL)
4654ec4b3d5Smillert 		fclose(f2);
4667b6ec9e4Smillert 	if (file1 != ofile1)
4674a034c3aSray 		xfree(file1);
4687b6ec9e4Smillert 	if (file2 != ofile2)
4694a034c3aSray 		xfree(file2);
470*dba1d6eaSray 
471b4bca33fSmillert 	return (rval);
4724ec4b3d5Smillert }
4734ec4b3d5Smillert 
4744ec4b3d5Smillert /*
4754ec4b3d5Smillert  * Check to see if the given files differ.
4764ec4b3d5Smillert  * Returns 0 if they are the same, 1 if different, and -1 on error.
4774ec4b3d5Smillert  * XXX - could use code from cmp(1) [faster]
4784ec4b3d5Smillert  */
4794ec4b3d5Smillert static int
4804ec4b3d5Smillert files_differ(FILE *f1, FILE *f2, int flags)
4814ec4b3d5Smillert {
4824ec4b3d5Smillert 	char buf1[BUFSIZ], buf2[BUFSIZ];
4834ec4b3d5Smillert 	size_t i, j;
4844ec4b3d5Smillert 
4854ec4b3d5Smillert 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
4864ec4b3d5Smillert 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
4874ec4b3d5Smillert 		return (1);
4884ec4b3d5Smillert 	for (;;) {
4894ec4b3d5Smillert 		i = fread(buf1, 1, sizeof(buf1), f1);
4904ec4b3d5Smillert 		j = fread(buf2, 1, sizeof(buf2), f2);
4914ec4b3d5Smillert 		if (i != j)
4924ec4b3d5Smillert 			return (1);
4934ec4b3d5Smillert 		if (i == 0 && j == 0) {
4944ec4b3d5Smillert 			if (ferror(f1) || ferror(f2))
4954ec4b3d5Smillert 				return (1);
4964ec4b3d5Smillert 			return (0);
4974ec4b3d5Smillert 		}
4984ec4b3d5Smillert 		if (memcmp(buf1, buf2, i) != 0)
4994ec4b3d5Smillert 			return (1);
5004ec4b3d5Smillert 	}
501ae8d569bSderaadt }
502ae8d569bSderaadt 
5037b6ec9e4Smillert static FILE *
5047b6ec9e4Smillert opentemp(const char *file)
505ae8d569bSderaadt {
5067b6ec9e4Smillert 	char buf[BUFSIZ], *tempdir, tempfile[MAXPATHLEN];
5077b6ec9e4Smillert 	ssize_t nread;
5087b6ec9e4Smillert 	int ifd, ofd;
50948b947b7Smillert 
51048b947b7Smillert 	if (strcmp(file, "-") == 0)
51148b947b7Smillert 		ifd = STDIN_FILENO;
51266e5764eSmillert 	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
5134ec4b3d5Smillert 		return (NULL);
51448b947b7Smillert 
51548b947b7Smillert 	if ((tempdir = getenv("TMPDIR")) == NULL)
51648b947b7Smillert 		tempdir = _PATH_TMP;
51709bddaddSotto 
51809bddaddSotto 	if (strlcpy(tempfile, tempdir, sizeof(tempfile)) >= sizeof(tempfile) ||
51909bddaddSotto 	    strlcat(tempfile, "/diff.XXXXXXXX", sizeof(tempfile)) >=
52009bddaddSotto 	    sizeof(tempfile)) {
5217b6ec9e4Smillert 		close(ifd);
5227b6ec9e4Smillert 		errno = ENAMETOOLONG;
5234ec4b3d5Smillert 		return (NULL);
524ae8d569bSderaadt 	}
5257b6ec9e4Smillert 
5267b6ec9e4Smillert 	if ((ofd = mkstemp(tempfile)) < 0)
5277b6ec9e4Smillert 		return (NULL);
5287b6ec9e4Smillert 	unlink(tempfile);
5297b6ec9e4Smillert 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
5307b6ec9e4Smillert 		if (write(ofd, buf, nread) != nread) {
53148b947b7Smillert 			close(ifd);
53248b947b7Smillert 			close(ofd);
5337b6ec9e4Smillert 			return (NULL);
5347b6ec9e4Smillert 		}
5357b6ec9e4Smillert 	}
5367b6ec9e4Smillert 	close(ifd);
537f28259e9Smillert 	lseek(ofd, (off_t)0, SEEK_SET);
5387b6ec9e4Smillert 	return (fdopen(ofd, "r"));
539ae8d569bSderaadt }
540ae8d569bSderaadt 
541ae8d569bSderaadt char *
54226da422aStedu splice(char *dir, char *file)
543ae8d569bSderaadt {
54449dffe13Smillert 	char *tail, *buf;
545ae8d569bSderaadt 
5467b6ec9e4Smillert 	if ((tail = strrchr(file, '/')) == NULL)
547ae8d569bSderaadt 		tail = file;
548ae8d569bSderaadt 	else
549ae8d569bSderaadt 		tail++;
5504a034c3aSray 	xasprintf(&buf, "%s/%s", dir, tail);
55149dffe13Smillert 	return (buf);
552ae8d569bSderaadt }
553ae8d569bSderaadt 
55426da422aStedu static void
555*dba1d6eaSray prepare(int i, FILE *fd, off_t filesize, int flags)
556ae8d569bSderaadt {
55726da422aStedu 	struct line *p;
55826da422aStedu 	int j, h;
559739e7267Sotto 	size_t sz;
560ae8d569bSderaadt 
5614ec4b3d5Smillert 	rewind(fd);
562739e7267Sotto 
563739e7267Sotto 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
564739e7267Sotto 	if (sz < 100)
565a8013e93Sotto 		sz = 100;
566739e7267Sotto 
567aa215433Sray 	p = xmalloc((sz + 3) * sizeof(*p));
568*dba1d6eaSray 	for (j = 0; (h = readhash(fd, flags));) {
569a8013e93Sotto 		if (j == sz) {
570a8013e93Sotto 			sz = sz * 3 / 2;
571aa215433Sray 			p = xrealloc(p, sz + 3, sizeof(*p));
572a8013e93Sotto 		}
573a8013e93Sotto 		p[++j].value = h;
574ae8d569bSderaadt 	}
575ae8d569bSderaadt 	len[i] = j;
576ae8d569bSderaadt 	file[i] = p;
577ae8d569bSderaadt }
578ae8d569bSderaadt 
57926da422aStedu static void
58026da422aStedu prune(void)
581ae8d569bSderaadt {
58226da422aStedu 	int i, j;
58348b8c3e3Sderaadt 
584ae8d569bSderaadt 	for (pref = 0; pref < len[0] && pref < len[1] &&
585ae8d569bSderaadt 	    file[0][pref + 1].value == file[1][pref + 1].value;
5867d2b2b91Sderaadt 	    pref++)
5877d2b2b91Sderaadt 		;
588ae8d569bSderaadt 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
589ae8d569bSderaadt 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
5907d2b2b91Sderaadt 	    suff++)
5917d2b2b91Sderaadt 		;
592ae8d569bSderaadt 	for (j = 0; j < 2; j++) {
593ae8d569bSderaadt 		sfile[j] = file[j] + pref;
594ae8d569bSderaadt 		slen[j] = len[j] - pref - suff;
595ae8d569bSderaadt 		for (i = 0; i <= slen[j]; i++)
596ae8d569bSderaadt 			sfile[j][i].serial = i;
597ae8d569bSderaadt 	}
598ae8d569bSderaadt }
599ae8d569bSderaadt 
60026da422aStedu static void
60126da422aStedu equiv(struct line *a, int n, struct line *b, int m, int *c)
602ae8d569bSderaadt {
60326da422aStedu 	int i, j;
60448b8c3e3Sderaadt 
605ae8d569bSderaadt 	i = j = 1;
606ae8d569bSderaadt 	while (i <= n && j <= m) {
607ae8d569bSderaadt 		if (a[i].value < b[j].value)
608ae8d569bSderaadt 			a[i++].value = 0;
609ae8d569bSderaadt 		else if (a[i].value == b[j].value)
610ae8d569bSderaadt 			a[i++].value = j;
611ae8d569bSderaadt 		else
612ae8d569bSderaadt 			j++;
613ae8d569bSderaadt 	}
614ae8d569bSderaadt 	while (i <= n)
615ae8d569bSderaadt 		a[i++].value = 0;
616ae8d569bSderaadt 	b[m + 1].value = 0;
617ae8d569bSderaadt 	j = 0;
618ae8d569bSderaadt 	while (++j <= m) {
619ae8d569bSderaadt 		c[j] = -b[j].serial;
620ae8d569bSderaadt 		while (b[j + 1].value == b[j].value) {
621ae8d569bSderaadt 			j++;
622ae8d569bSderaadt 			c[j] = b[j].serial;
623ae8d569bSderaadt 		}
624ae8d569bSderaadt 	}
625ae8d569bSderaadt 	c[j] = -1;
626ae8d569bSderaadt }
627ae8d569bSderaadt 
6286e18f850Sotto /* Code taken from ping.c */
6296e18f850Sotto static int
6306e18f850Sotto isqrt(int n)
6316e18f850Sotto {
6326e18f850Sotto 	int y, x = 1;
6336e18f850Sotto 
6346e18f850Sotto 	if (n == 0)
6356e18f850Sotto 		return (0);
6366e18f850Sotto 
6376e18f850Sotto 	do { /* newton was a stinker */
6386e18f850Sotto 		y = x;
6396e18f850Sotto 		x = n / x;
6406e18f850Sotto 		x += y;
6416e18f850Sotto 		x /= 2;
6426e18f850Sotto 	} while ((x - y) > 1 || (x - y) < -1);
6436e18f850Sotto 
6446e18f850Sotto 	return (x);
6456e18f850Sotto }
6466e18f850Sotto 
64726da422aStedu static int
648*dba1d6eaSray stone(int *a, int n, int *b, int *c, int flags)
649ae8d569bSderaadt {
65048b8c3e3Sderaadt 	int i, k, y, j, l;
65148b8c3e3Sderaadt 	int oldc, tc, oldl;
6522a89a2f7Smillert 	u_int numtries;
6536e18f850Sotto 
654*dba1d6eaSray 	/* XXX move the isqrt() out of the macro to avoid multiple calls */
655*dba1d6eaSray 	const u_int bound = (flags & D_MINIMAL) ? UINT_MAX :
656*dba1d6eaSray 	    MAX(256, isqrt(n));
65748b8c3e3Sderaadt 
658ae8d569bSderaadt 	k = 0;
659ae8d569bSderaadt 	c[0] = newcand(0, 0, 0);
660ae8d569bSderaadt 	for (i = 1; i <= n; i++) {
661ae8d569bSderaadt 		j = a[i];
662ae8d569bSderaadt 		if (j == 0)
663ae8d569bSderaadt 			continue;
664ae8d569bSderaadt 		y = -b[j];
665ae8d569bSderaadt 		oldl = 0;
666ae8d569bSderaadt 		oldc = c[0];
6672a89a2f7Smillert 		numtries = 0;
668ae8d569bSderaadt 		do {
669ae8d569bSderaadt 			if (y <= clist[oldc].y)
670ae8d569bSderaadt 				continue;
671ae8d569bSderaadt 			l = search(c, k, y);
672ae8d569bSderaadt 			if (l != oldl + 1)
673ae8d569bSderaadt 				oldc = c[l - 1];
674ae8d569bSderaadt 			if (l <= k) {
675ae8d569bSderaadt 				if (clist[c[l]].y <= y)
676ae8d569bSderaadt 					continue;
677ae8d569bSderaadt 				tc = c[l];
678ae8d569bSderaadt 				c[l] = newcand(i, y, oldc);
679ae8d569bSderaadt 				oldc = tc;
680ae8d569bSderaadt 				oldl = l;
6812a89a2f7Smillert 				numtries++;
682ae8d569bSderaadt 			} else {
683ae8d569bSderaadt 				c[l] = newcand(i, y, oldc);
684ae8d569bSderaadt 				k++;
685ae8d569bSderaadt 				break;
686ae8d569bSderaadt 			}
6872a89a2f7Smillert 		} while ((y = b[++j]) > 0 && numtries < bound);
688ae8d569bSderaadt 	}
689ae8d569bSderaadt 	return (k);
690ae8d569bSderaadt }
691ae8d569bSderaadt 
69226da422aStedu static int
69326da422aStedu newcand(int x, int y, int pred)
694ae8d569bSderaadt {
69526da422aStedu 	struct cand *q;
69626da422aStedu 
6976e18f850Sotto 	if (clen == clistlen) {
6986e18f850Sotto 		clistlen = clistlen * 11 / 10;
699aa215433Sray 		clist = xrealloc(clist, clistlen, sizeof(*clist));
7006e18f850Sotto 	}
7016e18f850Sotto 	q = clist + clen;
702ae8d569bSderaadt 	q->x = x;
703ae8d569bSderaadt 	q->y = y;
704ae8d569bSderaadt 	q->pred = pred;
7056e18f850Sotto 	return (clen++);
706ae8d569bSderaadt }
707ae8d569bSderaadt 
70826da422aStedu static int
70926da422aStedu search(int *c, int k, int y)
710ae8d569bSderaadt {
71148b8c3e3Sderaadt 	int i, j, l, t;
71248b8c3e3Sderaadt 
713ae8d569bSderaadt 	if (clist[c[k]].y < y)	/* quick look for typical case */
714ae8d569bSderaadt 		return (k + 1);
715ae8d569bSderaadt 	i = 0;
716ae8d569bSderaadt 	j = k + 1;
717*dba1d6eaSray 	for (;;) {
718*dba1d6eaSray 		l = (i + j) / 2;
719*dba1d6eaSray 		if (l <= i)
720ae8d569bSderaadt 			break;
721ae8d569bSderaadt 		t = clist[c[l]].y;
722ae8d569bSderaadt 		if (t > y)
723ae8d569bSderaadt 			j = l;
724ae8d569bSderaadt 		else if (t < y)
725ae8d569bSderaadt 			i = l;
726ae8d569bSderaadt 		else
727ae8d569bSderaadt 			return (l);
728ae8d569bSderaadt 	}
729ae8d569bSderaadt 	return (l + 1);
730ae8d569bSderaadt }
731ae8d569bSderaadt 
73226da422aStedu static void
73326da422aStedu unravel(int p)
734ae8d569bSderaadt {
73526da422aStedu 	struct cand *q;
73648b8c3e3Sderaadt 	int i;
73748b8c3e3Sderaadt 
738ae8d569bSderaadt 	for (i = 0; i <= len[0]; i++)
739ae8d569bSderaadt 		J[i] = i <= pref ? i :
7407d2b2b91Sderaadt 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
741ae8d569bSderaadt 	for (q = clist + p; q->y != 0; q = clist + q->pred)
742ae8d569bSderaadt 		J[q->x + pref] = q->y + pref;
743ae8d569bSderaadt }
744ae8d569bSderaadt 
74526da422aStedu /*
74649dffe13Smillert  * Check does double duty:
74749dffe13Smillert  *  1.	ferret out any fortuitous correspondences due
74849dffe13Smillert  *	to confounding by hashing (which result in "jackpot")
74949dffe13Smillert  *  2.  collect random access indexes to the two files
75026da422aStedu  */
75126da422aStedu static void
752*dba1d6eaSray check(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
753ae8d569bSderaadt {
75448b8c3e3Sderaadt 	int i, j, jackpot, c, d;
755ae8d569bSderaadt 	long ctold, ctnew;
756ae8d569bSderaadt 
7574ec4b3d5Smillert 	rewind(f1);
7584ec4b3d5Smillert 	rewind(f2);
759ae8d569bSderaadt 	j = 1;
760ae8d569bSderaadt 	ixold[0] = ixnew[0] = 0;
761ae8d569bSderaadt 	jackpot = 0;
762ae8d569bSderaadt 	ctold = ctnew = 0;
763ae8d569bSderaadt 	for (i = 1; i <= len[0]; i++) {
764ae8d569bSderaadt 		if (J[i] == 0) {
7654ec4b3d5Smillert 			ixold[i] = ctold += skipline(f1);
766ae8d569bSderaadt 			continue;
767ae8d569bSderaadt 		}
768ae8d569bSderaadt 		while (j < J[i]) {
7694ec4b3d5Smillert 			ixnew[j] = ctnew += skipline(f2);
770ae8d569bSderaadt 			j++;
771ae8d569bSderaadt 		}
772*dba1d6eaSray 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
773ae8d569bSderaadt 			for (;;) {
7744ec4b3d5Smillert 				c = getc(f1);
7754ec4b3d5Smillert 				d = getc(f2);
776bb34d48bSmillert 				/*
777bb34d48bSmillert 				 * GNU diff ignores a missing newline
778*dba1d6eaSray 				 * in one file for -b or -w.
779bb34d48bSmillert 				 */
780*dba1d6eaSray 				if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) &&
781bb34d48bSmillert 				    ((c == EOF && d == '\n') ||
782bb34d48bSmillert 				    (c == '\n' && d == EOF))) {
783bb34d48bSmillert 					break;
784bb34d48bSmillert 				}
785ae8d569bSderaadt 				ctold++;
786ae8d569bSderaadt 				ctnew++;
787*dba1d6eaSray 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
788*dba1d6eaSray 				    isspace(d)) {
789ae8d569bSderaadt 					do {
790ae8d569bSderaadt 						if (c == '\n')
791ae8d569bSderaadt 							break;
792ae8d569bSderaadt 						ctold++;
7934ec4b3d5Smillert 					} while (isspace(c = getc(f1)));
794ae8d569bSderaadt 					do {
795ae8d569bSderaadt 						if (d == '\n')
796ae8d569bSderaadt 							break;
797ae8d569bSderaadt 						ctnew++;
7984ec4b3d5Smillert 					} while (isspace(d = getc(f2)));
799*dba1d6eaSray 				} else if ((flags & D_IGNOREBLANKS)) {
800ae8d569bSderaadt 					while (isspace(c) && c != '\n') {
8014ec4b3d5Smillert 						c = getc(f1);
802ae8d569bSderaadt 						ctold++;
803ae8d569bSderaadt 					}
804ae8d569bSderaadt 					while (isspace(d) && d != '\n') {
8054ec4b3d5Smillert 						d = getc(f2);
806ae8d569bSderaadt 						ctnew++;
807ae8d569bSderaadt 					}
808ae8d569bSderaadt 				}
809ae8d569bSderaadt 				if (chrtran[c] != chrtran[d]) {
810ae8d569bSderaadt 					jackpot++;
811ae8d569bSderaadt 					J[i] = 0;
812bb34d48bSmillert 					if (c != '\n' && c != EOF)
8134ec4b3d5Smillert 						ctold += skipline(f1);
814bb34d48bSmillert 					if (d != '\n' && c != EOF)
8154ec4b3d5Smillert 						ctnew += skipline(f2);
816ae8d569bSderaadt 					break;
817ae8d569bSderaadt 				}
818bb34d48bSmillert 				if (c == '\n' || c == EOF)
819ae8d569bSderaadt 					break;
820ae8d569bSderaadt 			}
821ae8d569bSderaadt 		} else {
822ae8d569bSderaadt 			for (;;) {
823ae8d569bSderaadt 				ctold++;
824ae8d569bSderaadt 				ctnew++;
8254ec4b3d5Smillert 				if ((c = getc(f1)) != (d = getc(f2))) {
826ae8d569bSderaadt 					/* jackpot++; */
827ae8d569bSderaadt 					J[i] = 0;
828bb34d48bSmillert 					if (c != '\n' && c != EOF)
8294ec4b3d5Smillert 						ctold += skipline(f1);
830bb34d48bSmillert 					if (d != '\n' && c != EOF)
8314ec4b3d5Smillert 						ctnew += skipline(f2);
832ae8d569bSderaadt 					break;
833ae8d569bSderaadt 				}
834bb34d48bSmillert 				if (c == '\n' || c == EOF)
835ae8d569bSderaadt 					break;
836ae8d569bSderaadt 			}
837ae8d569bSderaadt 		}
838ae8d569bSderaadt 		ixold[i] = ctold;
839ae8d569bSderaadt 		ixnew[j] = ctnew;
840ae8d569bSderaadt 		j++;
841ae8d569bSderaadt 	}
8424ec4b3d5Smillert 	for (; j <= len[1]; j++)
8434ec4b3d5Smillert 		ixnew[j] = ctnew += skipline(f2);
844ae8d569bSderaadt 	/*
84548b8c3e3Sderaadt 	 * if (jackpot)
84648b8c3e3Sderaadt 	 *	fprintf(stderr, "jackpot\n");
847ae8d569bSderaadt 	 */
848ae8d569bSderaadt }
849ae8d569bSderaadt 
85048b8c3e3Sderaadt /* shellsort CACM #201 */
85126da422aStedu static void
85226da422aStedu sort(struct line *a, int n)
85348b8c3e3Sderaadt {
85448b8c3e3Sderaadt 	struct line *ai, *aim, w;
85548b8c3e3Sderaadt 	int j, m = 0, k;
856ae8d569bSderaadt 
857ae8d569bSderaadt 	if (n == 0)
858ae8d569bSderaadt 		return;
859ae8d569bSderaadt 	for (j = 1; j <= n; j *= 2)
860ae8d569bSderaadt 		m = 2 * j - 1;
861ae8d569bSderaadt 	for (m /= 2; m != 0; m /= 2) {
862ae8d569bSderaadt 		k = n - m;
863ae8d569bSderaadt 		for (j = 1; j <= k; j++) {
864ae8d569bSderaadt 			for (ai = &a[j]; ai > a; ai -= m) {
865ae8d569bSderaadt 				aim = &ai[m];
866ae8d569bSderaadt 				if (aim < ai)
867ae8d569bSderaadt 					break;	/* wraparound */
868ae8d569bSderaadt 				if (aim->value > ai[0].value ||
86926da422aStedu 				    (aim->value == ai[0].value &&
87026da422aStedu 					aim->serial > ai[0].serial))
871ae8d569bSderaadt 					break;
872ae8d569bSderaadt 				w.value = ai[0].value;
873ae8d569bSderaadt 				ai[0].value = aim->value;
874ae8d569bSderaadt 				aim->value = w.value;
875ae8d569bSderaadt 				w.serial = ai[0].serial;
876ae8d569bSderaadt 				ai[0].serial = aim->serial;
877ae8d569bSderaadt 				aim->serial = w.serial;
878ae8d569bSderaadt 			}
879ae8d569bSderaadt 		}
880ae8d569bSderaadt 	}
881ae8d569bSderaadt }
882ae8d569bSderaadt 
88326da422aStedu static void
88426da422aStedu unsort(struct line *f, int l, int *b)
885ae8d569bSderaadt {
88648b8c3e3Sderaadt 	int *a, i;
88726da422aStedu 
888aa215433Sray 	a = xmalloc((l + 1) * sizeof(*a));
889ae8d569bSderaadt 	for (i = 1; i <= l; i++)
890ae8d569bSderaadt 		a[f[i].serial] = f[i].value;
891ae8d569bSderaadt 	for (i = 1; i <= l; i++)
892ae8d569bSderaadt 		b[i] = a[i];
8934a034c3aSray 	xfree(a);
894ae8d569bSderaadt }
895ae8d569bSderaadt 
89626da422aStedu static int
8974ec4b3d5Smillert skipline(FILE *f)
898ae8d569bSderaadt {
89926da422aStedu 	int i, c;
900ae8d569bSderaadt 
901bb34d48bSmillert 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
902bb34d48bSmillert 		continue;
903ae8d569bSderaadt 	return (i);
904ae8d569bSderaadt }
905ae8d569bSderaadt 
90626da422aStedu static void
907ac73e8e6Smillert output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
908ae8d569bSderaadt {
90948b8c3e3Sderaadt 	int m, i0, i1, j0, j1;
91048b8c3e3Sderaadt 
9114ec4b3d5Smillert 	rewind(f1);
9124ec4b3d5Smillert 	rewind(f2);
913ae8d569bSderaadt 	m = len[0];
914ae8d569bSderaadt 	J[0] = 0;
915ae8d569bSderaadt 	J[m + 1] = len[1] + 1;
9164ec4b3d5Smillert 	if (format != D_EDIT) {
91726da422aStedu 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
91826da422aStedu 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
91926da422aStedu 				i0++;
920ae8d569bSderaadt 			j0 = J[i0 - 1] + 1;
921ae8d569bSderaadt 			i1 = i0 - 1;
92226da422aStedu 			while (i1 < m && J[i1 + 1] == 0)
92326da422aStedu 				i1++;
924ae8d569bSderaadt 			j1 = J[i1 + 1] - 1;
925ae8d569bSderaadt 			J[i1] = j1;
92661783bcdSespie 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
92726da422aStedu 		}
92826da422aStedu 	} else {
92926da422aStedu 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
93026da422aStedu 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
93126da422aStedu 				i0--;
932ae8d569bSderaadt 			j0 = J[i0 + 1] - 1;
933ae8d569bSderaadt 			i1 = i0 + 1;
93426da422aStedu 			while (i1 > 1 && J[i1 - 1] == 0)
93526da422aStedu 				i1--;
936ae8d569bSderaadt 			j1 = J[i1 - 1] + 1;
937ae8d569bSderaadt 			J[i1] = j1;
93861783bcdSespie 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
939ae8d569bSderaadt 		}
94026da422aStedu 	}
941ae8d569bSderaadt 	if (m == 0)
94261783bcdSespie 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
9434ec4b3d5Smillert 	if (format == D_IFDEF) {
944ae8d569bSderaadt 		for (;;) {
945ae8d569bSderaadt #define	c i0
946bb34d48bSmillert 			if ((c = getc(f1)) == EOF)
947ae8d569bSderaadt 				return;
948ae8d569bSderaadt 			putchar(c);
949ae8d569bSderaadt 		}
950ae8d569bSderaadt #undef c
951ae8d569bSderaadt 	}
9529de32c1bSmillert 	if (anychange != 0) {
9534ec4b3d5Smillert 		if (format == D_CONTEXT)
954*dba1d6eaSray 			dump_context_vec(f1, f2, flags);
9554ec4b3d5Smillert 		else if (format == D_UNIFIED)
956*dba1d6eaSray 			dump_unified_vec(f1, f2, flags);
9579de32c1bSmillert 	}
958ae8d569bSderaadt }
959ae8d569bSderaadt 
9604a034c3aSray static void
961c72ea322Smillert range(int a, int b, char *separator)
962c72ea322Smillert {
963c72ea322Smillert 	printf("%d", a > b ? b : a);
964c72ea322Smillert 	if (a < b)
965c72ea322Smillert 		printf("%s%d", separator, b);
966c72ea322Smillert }
967c72ea322Smillert 
9684a034c3aSray static void
969c72ea322Smillert uni_range(int a, int b)
970c72ea322Smillert {
971c72ea322Smillert 	if (a < b)
972c72ea322Smillert 		printf("%d,%d", a, b - a + 1);
973c72ea322Smillert 	else if (a == b)
974c72ea322Smillert 		printf("%d", b);
975c72ea322Smillert 	else
976c72ea322Smillert 		printf("%d,0", b);
977c72ea322Smillert }
978c72ea322Smillert 
979ccd55a2cSotto static char *
980*dba1d6eaSray preadline(int fd, size_t rlen, off_t off)
981ccd55a2cSotto {
982ccd55a2cSotto 	char *line;
983ccd55a2cSotto 	ssize_t nr;
984ccd55a2cSotto 
985*dba1d6eaSray 	line = xmalloc(rlen + 1);
986*dba1d6eaSray 	if ((nr = pread(fd, line, rlen, off)) < 0)
987ccd55a2cSotto 		err(1, "preadline");
9888d981b00Sotto 	if (nr > 0 && line[nr-1] == '\n')
9898d981b00Sotto 		nr--;
990ccd55a2cSotto 	line[nr] = '\0';
991ccd55a2cSotto 	return (line);
992ccd55a2cSotto }
993ccd55a2cSotto 
994ccd55a2cSotto static int
995ccd55a2cSotto ignoreline(char *line)
996ccd55a2cSotto {
997ccd55a2cSotto 	int ret;
998ccd55a2cSotto 
999ccd55a2cSotto 	ret = regexec(&ignore_re, line, 0, NULL, 0);
10004a034c3aSray 	xfree(line);
1001ccd55a2cSotto 	return (ret == 0);	/* if it matched, it should be ignored. */
1002ccd55a2cSotto }
1003ccd55a2cSotto 
1004ae8d569bSderaadt /*
10054ec4b3d5Smillert  * Indicate that there is a difference between lines a and b of the from file
100626da422aStedu  * to get to lines c to d of the to file.  If a is greater then b then there
100726da422aStedu  * are no lines in the from file involved and this means that there were
100826da422aStedu  * lines appended (beginning at b).  If c is greater than d then there are
100926da422aStedu  * lines missing from the to file.
1010ae8d569bSderaadt  */
101126da422aStedu static void
1012ac73e8e6Smillert change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
101361783bcdSespie     int *pflags)
1014ae8d569bSderaadt {
10150efcb26eSmillert 	static size_t max_context = 64;
1016f8a6bf23Smillert 	int i;
10170efcb26eSmillert 
1018f8a6bf23Smillert restart:
10194ec4b3d5Smillert 	if (format != D_IFDEF && a > b && c > d)
1020ae8d569bSderaadt 		return;
1021ccd55a2cSotto 	if (ignore_pats != NULL) {
1022ccd55a2cSotto 		char *line;
1023ccd55a2cSotto 		/*
1024ccd55a2cSotto 		 * All lines in the change, insert, or delete must
1025ccd55a2cSotto 		 * match an ignore pattern for the change to be
1026ccd55a2cSotto 		 * ignored.
1027ccd55a2cSotto 		 */
1028ccd55a2cSotto 		if (a <= b) {		/* Changes and deletes. */
1029ccd55a2cSotto 			for (i = a; i <= b; i++) {
1030ccd55a2cSotto 				line = preadline(fileno(f1),
1031ccd55a2cSotto 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1032ccd55a2cSotto 				if (!ignoreline(line))
1033ccd55a2cSotto 					goto proceed;
1034ccd55a2cSotto 			}
1035ccd55a2cSotto 		}
1036ccd55a2cSotto 		if (a > b || c <= d) {	/* Changes and inserts. */
1037ccd55a2cSotto 			for (i = c; i <= d; i++) {
1038ccd55a2cSotto 				line = preadline(fileno(f2),
1039ccd55a2cSotto 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1040ccd55a2cSotto 				if (!ignoreline(line))
1041ccd55a2cSotto 					goto proceed;
1042ccd55a2cSotto 			}
1043ccd55a2cSotto 		}
1044ccd55a2cSotto 		return;
1045ccd55a2cSotto 	}
1046ccd55a2cSotto proceed:
104761783bcdSespie 	if (*pflags & D_HEADER) {
1048ac73e8e6Smillert 		printf("%s %s %s\n", diffargs, file1, file2);
104961783bcdSespie 		*pflags &= ~D_HEADER;
105061783bcdSespie 	}
10514ec4b3d5Smillert 	if (format == D_CONTEXT || format == D_UNIFIED) {
10520efcb26eSmillert 		/*
10530efcb26eSmillert 		 * Allocate change records as needed.
10540efcb26eSmillert 		 */
10550efcb26eSmillert 		if (context_vec_ptr == context_vec_end - 1) {
10560efcb26eSmillert 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
10570efcb26eSmillert 			max_context <<= 1;
10584a034c3aSray 			context_vec_start = xrealloc(context_vec_start,
1059aa215433Sray 			    max_context, sizeof(*context_vec_start));
10600efcb26eSmillert 			context_vec_end = context_vec_start + max_context;
10610efcb26eSmillert 			context_vec_ptr = context_vec_start + offset;
10620efcb26eSmillert 		}
10630efcb26eSmillert 		if (anychange == 0) {
10640efcb26eSmillert 			/*
10650efcb26eSmillert 			 * Print the context/unidiff header first time through.
10660efcb26eSmillert 			 */
10677bdb251cSmillert 			print_header(file1, file2);
10680efcb26eSmillert 			anychange = 1;
1069f02e3d86Sotto 		} else if (a > context_vec_ptr->b + (2 * context) + 1 &&
1070f02e3d86Sotto 		    c > context_vec_ptr->d + (2 * context) + 1) {
1071ae8d569bSderaadt 			/*
10720efcb26eSmillert 			 * If this change is more than 'context' lines from the
10730efcb26eSmillert 			 * previous change, dump the record and reset it.
1074ae8d569bSderaadt 			 */
10754ec4b3d5Smillert 			if (format == D_CONTEXT)
1076*dba1d6eaSray 				dump_context_vec(f1, f2, *pflags);
10779de32c1bSmillert 			else
1078*dba1d6eaSray 				dump_unified_vec(f1, f2, *pflags);
10799de32c1bSmillert 		}
1080ae8d569bSderaadt 		context_vec_ptr++;
1081ae8d569bSderaadt 		context_vec_ptr->a = a;
1082ae8d569bSderaadt 		context_vec_ptr->b = b;
1083ae8d569bSderaadt 		context_vec_ptr->c = c;
1084ae8d569bSderaadt 		context_vec_ptr->d = d;
1085ae8d569bSderaadt 		return;
1086ae8d569bSderaadt 	}
10870efcb26eSmillert 	if (anychange == 0)
10880efcb26eSmillert 		anychange = 1;
10894ec4b3d5Smillert 	switch (format) {
1090ae8d569bSderaadt 
10915f9fc8aaSmillert 	case D_BRIEF:
10925f9fc8aaSmillert 		return;
1093ae8d569bSderaadt 	case D_NORMAL:
1094ae8d569bSderaadt 	case D_EDIT:
1095ae8d569bSderaadt 		range(a, b, ",");
1096ae8d569bSderaadt 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
10974ec4b3d5Smillert 		if (format == D_NORMAL)
1098ae8d569bSderaadt 			range(c, d, ",");
1099ae8d569bSderaadt 		putchar('\n');
1100ae8d569bSderaadt 		break;
1101ae8d569bSderaadt 	case D_REVERSE:
1102ae8d569bSderaadt 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1103ae8d569bSderaadt 		range(a, b, " ");
1104ae8d569bSderaadt 		putchar('\n');
1105ae8d569bSderaadt 		break;
1106ae8d569bSderaadt 	case D_NREVERSE:
1107ae8d569bSderaadt 		if (a > b)
1108ae8d569bSderaadt 			printf("a%d %d\n", b, d - c + 1);
1109ae8d569bSderaadt 		else {
1110ae8d569bSderaadt 			printf("d%d %d\n", a, b - a + 1);
1111ae8d569bSderaadt 			if (!(c > d))
1112ae8d569bSderaadt 				/* add changed lines */
1113ae8d569bSderaadt 				printf("a%d %d\n", b, d - c + 1);
1114ae8d569bSderaadt 		}
1115ae8d569bSderaadt 		break;
1116ae8d569bSderaadt 	}
11174ec4b3d5Smillert 	if (format == D_NORMAL || format == D_IFDEF) {
1118*dba1d6eaSray 		fetch(ixold, a, b, f1, '<', 1, *pflags);
11194ec4b3d5Smillert 		if (a <= b && c <= d && format == D_NORMAL)
11204ec4b3d5Smillert 			puts("---");
1121ae8d569bSderaadt 	}
1122*dba1d6eaSray 	i = fetch(ixnew, c, d, f2, format == D_NORMAL ? '>' : '\0', 0, *pflags);
1123f8a6bf23Smillert 	if (i != 0 && format == D_EDIT) {
1124f8a6bf23Smillert 		/*
1125f8a6bf23Smillert 		 * A non-zero return value for D_EDIT indicates that the
1126f8a6bf23Smillert 		 * last line printed was a bare dot (".") that has been
1127f8a6bf23Smillert 		 * escaped as ".." to prevent ed(1) from misinterpreting
1128f8a6bf23Smillert 		 * it.  We have to add a substitute command to change this
1129f8a6bf23Smillert 		 * back and restart where we left off.
1130f8a6bf23Smillert 		 */
1131f8a6bf23Smillert 		puts(".");
1132f8a6bf23Smillert 		printf("%ds/^\\.\\././\n", a);
1133f8a6bf23Smillert 		a += i;
1134f8a6bf23Smillert 		c += i;
1135f8a6bf23Smillert 		goto restart;
1136f8a6bf23Smillert 	}
11374ec4b3d5Smillert 	if ((format == D_EDIT || format == D_REVERSE) && c <= d)
11384ec4b3d5Smillert 		puts(".");
1139ae8d569bSderaadt 	if (inifdef) {
1140b4bca33fSmillert 		printf("#endif /* %s */\n", ifdefname);
1141ae8d569bSderaadt 		inifdef = 0;
1142ae8d569bSderaadt 	}
1143ae8d569bSderaadt }
1144ae8d569bSderaadt 
1145f8a6bf23Smillert static int
1146*dba1d6eaSray fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1147ae8d569bSderaadt {
1148f8a6bf23Smillert 	int i, j, c, lastc, col, nc;
1149ae8d569bSderaadt 
1150ae8d569bSderaadt 	/*
1151ae8d569bSderaadt 	 * When doing #ifdef's, copy down to current line
1152ae8d569bSderaadt 	 * if this is the first file, so that stuff makes it to output.
1153ae8d569bSderaadt 	 */
11544ec4b3d5Smillert 	if (format == D_IFDEF && oldfile) {
1155ae8d569bSderaadt 		long curpos = ftell(lb);
1156ae8d569bSderaadt 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1157ae8d569bSderaadt 		nc = f[a > b ? b : a - 1] - curpos;
1158ae8d569bSderaadt 		for (i = 0; i < nc; i++)
1159ae8d569bSderaadt 			putchar(getc(lb));
1160ae8d569bSderaadt 	}
1161ae8d569bSderaadt 	if (a > b)
1162f8a6bf23Smillert 		return (0);
11634ec4b3d5Smillert 	if (format == D_IFDEF) {
116490f56ad8Smillert 		if (inifdef) {
1165b4bca33fSmillert 			printf("#else /* %s%s */\n",
116690f56ad8Smillert 			    oldfile == 1 ? "!" : "", ifdefname);
116726da422aStedu 		} else {
116890f56ad8Smillert 			if (oldfile)
1169b4bca33fSmillert 				printf("#ifndef %s\n", ifdefname);
117090f56ad8Smillert 			else
1171b4bca33fSmillert 				printf("#ifdef %s\n", ifdefname);
1172ae8d569bSderaadt 		}
1173ae8d569bSderaadt 		inifdef = 1 + oldfile;
1174ae8d569bSderaadt 	}
1175ae8d569bSderaadt 	for (i = a; i <= b; i++) {
117691cf91eeSderaadt 		fseek(lb, f[i - 1], SEEK_SET);
1177ae8d569bSderaadt 		nc = f[i] - f[i - 1];
11781f9aa9e0Smillert 		if (format != D_IFDEF && ch != '\0') {
11791f9aa9e0Smillert 			putchar(ch);
11801f9aa9e0Smillert 			if (Tflag && (format == D_NORMAL || format == D_CONTEXT
11811f9aa9e0Smillert 			    || format == D_UNIFIED))
11821f9aa9e0Smillert 				putchar('\t');
11831f9aa9e0Smillert 			else if (format != D_UNIFIED)
11841f9aa9e0Smillert 				putchar(' ');
11851f9aa9e0Smillert 		}
1186ae8d569bSderaadt 		col = 0;
1187f8a6bf23Smillert 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1188bb34d48bSmillert 			if ((c = getc(lb)) == EOF) {
1189643dc60cSmillert 				if (format == D_EDIT || format == D_REVERSE ||
1190643dc60cSmillert 				    format == D_NREVERSE)
1191643dc60cSmillert 					warnx("No newline at end of file");
1192643dc60cSmillert 				else
1193bb34d48bSmillert 					puts("\n\\ No newline at end of file");
1194643dc60cSmillert 				return (0);
1195bb34d48bSmillert 			}
1196*dba1d6eaSray 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1197bb34d48bSmillert 				do {
1198ae8d569bSderaadt 					putchar(' ');
1199bb34d48bSmillert 				} while (++col & 7);
1200bb34d48bSmillert 			} else {
1201f8a6bf23Smillert 				if (format == D_EDIT && j == 1 && c == '\n'
1202f8a6bf23Smillert 				    && lastc == '.') {
1203f8a6bf23Smillert 					/*
1204f8a6bf23Smillert 					 * Don't print a bare "." line
1205f8a6bf23Smillert 					 * since that will confuse ed(1).
1206f8a6bf23Smillert 					 * Print ".." instead and return,
1207f8a6bf23Smillert 					 * giving the caller an offset
1208f8a6bf23Smillert 					 * from which to restart.
1209f8a6bf23Smillert 					 */
1210f8a6bf23Smillert 					puts(".");
1211f8a6bf23Smillert 					return (i - a + 1);
1212f8a6bf23Smillert 				}
1213ae8d569bSderaadt 				putchar(c);
1214ae8d569bSderaadt 				col++;
1215ae8d569bSderaadt 			}
1216ae8d569bSderaadt 		}
1217ae8d569bSderaadt 	}
1218f8a6bf23Smillert 	return (0);
1219ae8d569bSderaadt }
1220ae8d569bSderaadt 
1221ae8d569bSderaadt /*
1222a8013e93Sotto  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1223ae8d569bSderaadt  */
122426da422aStedu static int
1225*dba1d6eaSray readhash(FILE *f, int flags)
1226ae8d569bSderaadt {
1227a8013e93Sotto 	int i, t, space;
1228a8013e93Sotto 	int sum;
1229ae8d569bSderaadt 
1230ae8d569bSderaadt 	sum = 1;
1231ae8d569bSderaadt 	space = 0;
1232*dba1d6eaSray 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1233*dba1d6eaSray 		if (flags & D_IGNORECASE)
1234a8013e93Sotto 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1235bb34d48bSmillert 				if (t == EOF) {
1236a8013e93Sotto 					if (i == 0)
1237ae8d569bSderaadt 						return (0);
1238bb34d48bSmillert 					break;
1239bb34d48bSmillert 				}
1240a8013e93Sotto 				sum = sum * 127 + chrtran[t];
1241ae8d569bSderaadt 			}
1242ae8d569bSderaadt 		else
1243a8013e93Sotto 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1244bb34d48bSmillert 				if (t == EOF) {
1245a8013e93Sotto 					if (i == 0)
1246ae8d569bSderaadt 						return (0);
1247bb34d48bSmillert 					break;
1248bb34d48bSmillert 				}
1249a8013e93Sotto 				sum = sum * 127 + t;
1250ae8d569bSderaadt 			}
1251ae8d569bSderaadt 	} else {
1252a8013e93Sotto 		for (i = 0;;) {
1253ae8d569bSderaadt 			switch (t = getc(f)) {
1254ae8d569bSderaadt 			case '\t':
1255b5b605d5Sotto 			case '\r':
1256b5b605d5Sotto 			case '\v':
1257b5b605d5Sotto 			case '\f':
1258ae8d569bSderaadt 			case ' ':
1259ae8d569bSderaadt 				space++;
1260ae8d569bSderaadt 				continue;
1261ae8d569bSderaadt 			default:
1262*dba1d6eaSray 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1263a8013e93Sotto 					i++;
1264ae8d569bSderaadt 					space = 0;
1265ae8d569bSderaadt 				}
1266a8013e93Sotto 				sum = sum * 127 + chrtran[t];
1267a8013e93Sotto 				i++;
1268ae8d569bSderaadt 				continue;
1269bb34d48bSmillert 			case EOF:
1270a8013e93Sotto 				if (i == 0)
1271bb34d48bSmillert 					return (0);
1272bb34d48bSmillert 				/* FALLTHROUGH */
1273ae8d569bSderaadt 			case '\n':
1274ae8d569bSderaadt 				break;
1275ae8d569bSderaadt 			}
1276ae8d569bSderaadt 			break;
1277ae8d569bSderaadt 		}
1278ae8d569bSderaadt 	}
1279a8013e93Sotto 	/*
1280a8013e93Sotto 	 * There is a remote possibility that we end up with a zero sum.
1281a8013e93Sotto 	 * Zero is used as an EOF marker, so return 1 instead.
1282a8013e93Sotto 	 */
1283a8013e93Sotto 	return (sum == 0 ? 1 : sum);
1284ae8d569bSderaadt }
1285ae8d569bSderaadt 
1286774cb253Savsm static int
128726da422aStedu asciifile(FILE *f)
1288ae8d569bSderaadt {
1289063293f0Sotto 	unsigned char buf[BUFSIZ];
1290*dba1d6eaSray 	size_t i, cnt;
1291ae8d569bSderaadt 
1292*dba1d6eaSray 	if (f == NULL)
12932a1593dfStedu 		return (1);
12942a1593dfStedu 
12954ec4b3d5Smillert 	rewind(f);
12964ec4b3d5Smillert 	cnt = fread(buf, 1, sizeof(buf), f);
12974640f069Stedu 	for (i = 0; i < cnt; i++)
1298ed58cb82Stedu 		if (!isprint(buf[i]) && !isspace(buf[i]))
1299ae8d569bSderaadt 			return (0);
1300ae8d569bSderaadt 	return (1);
1301ae8d569bSderaadt }
1302ae8d569bSderaadt 
13035c68ba7eSespie #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
13045c68ba7eSespie 
130596e45528Sotto static char *
1306*dba1d6eaSray match_function(const long *f, int pos, FILE *fp)
130796e45528Sotto {
1308063293f0Sotto 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
130996e45528Sotto 	size_t nc;
131096e45528Sotto 	int last = lastline;
13115c68ba7eSespie 	char *state = NULL;
131296e45528Sotto 
131396e45528Sotto 	lastline = pos;
131496e45528Sotto 	while (pos > last) {
1315*dba1d6eaSray 		fseek(fp, f[pos - 1], SEEK_SET);
131696e45528Sotto 		nc = f[pos] - f[pos - 1];
131796e45528Sotto 		if (nc >= sizeof(buf))
131896e45528Sotto 			nc = sizeof(buf) - 1;
1319*dba1d6eaSray 		nc = fread(buf, 1, nc, fp);
132096e45528Sotto 		if (nc > 0) {
132196e45528Sotto 			buf[nc] = '\0';
13224fd6ed32Sgilles 			buf[strcspn(buf, "\n")] = '\0';
13234fd6ed32Sgilles 
132496e45528Sotto 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
13255c68ba7eSespie 				if (begins_with(buf, "private:")) {
13265c68ba7eSespie 					if (!state)
13275c68ba7eSespie 						state = " (private)";
13285c68ba7eSespie 				} else if (begins_with(buf, "protected:")) {
13295c68ba7eSespie 					if (!state)
13305c68ba7eSespie 						state = " (protected)";
13315c68ba7eSespie 				} else if (begins_with(buf, "public:")) {
13325c68ba7eSespie 					if (!state)
13335c68ba7eSespie 						state = " (public)";
13345c68ba7eSespie 				} else {
133596e45528Sotto 					strlcpy(lastbuf, buf, sizeof lastbuf);
13365c68ba7eSespie 					if (state)
13375c68ba7eSespie 						strlcat(lastbuf, state,
13385c68ba7eSespie 						    sizeof lastbuf);
133996e45528Sotto 					lastmatchline = pos;
134096e45528Sotto 					return lastbuf;
134196e45528Sotto 				}
134296e45528Sotto 			}
13435c68ba7eSespie 		}
134496e45528Sotto 		pos--;
134596e45528Sotto 	}
134696e45528Sotto 	return lastmatchline > 0 ? lastbuf : NULL;
134796e45528Sotto }
134896e45528Sotto 
1349ae8d569bSderaadt /* dump accumulated "context" diff changes */
135026da422aStedu static void
1351*dba1d6eaSray dump_context_vec(FILE *f1, FILE *f2, int flags)
1352ae8d569bSderaadt {
135348b8c3e3Sderaadt 	struct context_vec *cvp = context_vec_start;
135448b8c3e3Sderaadt 	int lowa, upb, lowc, upd, do_output;
135526da422aStedu 	int a, b, c, d;
135696e45528Sotto 	char ch, *f;
1357ae8d569bSderaadt 
135890f56ad8Smillert 	if (context_vec_start > context_vec_ptr)
1359ae8d569bSderaadt 		return;
1360ae8d569bSderaadt 
136126da422aStedu 	b = d = 0;		/* gcc */
13624a034c3aSray 	lowa = MAX(1, cvp->a - context);
13634a034c3aSray 	upb = MIN(len[0], context_vec_ptr->b + context);
13644a034c3aSray 	lowc = MAX(1, cvp->c - context);
13654a034c3aSray 	upd = MIN(len[1], context_vec_ptr->d + context);
1366ae8d569bSderaadt 
136796e45528Sotto 	printf("***************");
1368*dba1d6eaSray 	if ((flags & D_PROTOTYPE)) {
136996e45528Sotto 		f = match_function(ixold, lowa-1, f1);
137096e45528Sotto 		if (f != NULL) {
137196e45528Sotto 			putchar(' ');
137296e45528Sotto 			fputs(f, stdout);
137396e45528Sotto 		}
137496e45528Sotto 	}
137596e45528Sotto 	printf("\n*** ");
1376ae8d569bSderaadt 	range(lowa, upb, ",");
1377ae8d569bSderaadt 	printf(" ****\n");
1378ae8d569bSderaadt 
1379ae8d569bSderaadt 	/*
13804ec4b3d5Smillert 	 * Output changes to the "old" file.  The first loop suppresses
1381ae8d569bSderaadt 	 * output if there were no changes to the "old" file (we'll see
1382ae8d569bSderaadt 	 * the "old" lines as context in the "new" list).
1383ae8d569bSderaadt 	 */
1384ae8d569bSderaadt 	do_output = 0;
1385ae8d569bSderaadt 	for (; cvp <= context_vec_ptr; cvp++)
1386ae8d569bSderaadt 		if (cvp->a <= cvp->b) {
1387ae8d569bSderaadt 			cvp = context_vec_start;
1388ae8d569bSderaadt 			do_output++;
1389ae8d569bSderaadt 			break;
1390ae8d569bSderaadt 		}
1391ae8d569bSderaadt 	if (do_output) {
1392ae8d569bSderaadt 		while (cvp <= context_vec_ptr) {
139326da422aStedu 			a = cvp->a;
139426da422aStedu 			b = cvp->b;
139526da422aStedu 			c = cvp->c;
139626da422aStedu 			d = cvp->d;
1397ae8d569bSderaadt 
1398ae8d569bSderaadt 			if (a <= b && c <= d)
1399ae8d569bSderaadt 				ch = 'c';
1400ae8d569bSderaadt 			else
1401ae8d569bSderaadt 				ch = (a <= b) ? 'd' : 'a';
1402ae8d569bSderaadt 
1403ae8d569bSderaadt 			if (ch == 'a')
1404*dba1d6eaSray 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1405ae8d569bSderaadt 			else {
1406*dba1d6eaSray 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
14074ec4b3d5Smillert 				fetch(ixold, a, b, f1,
1408*dba1d6eaSray 				    ch == 'c' ? '!' : '-', 0, flags);
1409ae8d569bSderaadt 			}
1410ae8d569bSderaadt 			lowa = b + 1;
1411ae8d569bSderaadt 			cvp++;
1412ae8d569bSderaadt 		}
1413*dba1d6eaSray 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1414ae8d569bSderaadt 	}
1415ae8d569bSderaadt 	/* output changes to the "new" file */
1416ae8d569bSderaadt 	printf("--- ");
1417ae8d569bSderaadt 	range(lowc, upd, ",");
1418ae8d569bSderaadt 	printf(" ----\n");
1419ae8d569bSderaadt 
1420ae8d569bSderaadt 	do_output = 0;
1421ae8d569bSderaadt 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1422ae8d569bSderaadt 		if (cvp->c <= cvp->d) {
1423ae8d569bSderaadt 			cvp = context_vec_start;
1424ae8d569bSderaadt 			do_output++;
1425ae8d569bSderaadt 			break;
1426ae8d569bSderaadt 		}
1427ae8d569bSderaadt 	if (do_output) {
1428ae8d569bSderaadt 		while (cvp <= context_vec_ptr) {
142926da422aStedu 			a = cvp->a;
143026da422aStedu 			b = cvp->b;
143126da422aStedu 			c = cvp->c;
143226da422aStedu 			d = cvp->d;
1433ae8d569bSderaadt 
1434ae8d569bSderaadt 			if (a <= b && c <= d)
1435ae8d569bSderaadt 				ch = 'c';
1436ae8d569bSderaadt 			else
1437ae8d569bSderaadt 				ch = (a <= b) ? 'd' : 'a';
1438ae8d569bSderaadt 
1439ae8d569bSderaadt 			if (ch == 'd')
1440*dba1d6eaSray 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1441ae8d569bSderaadt 			else {
1442*dba1d6eaSray 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
14434ec4b3d5Smillert 				fetch(ixnew, c, d, f2,
1444*dba1d6eaSray 				    ch == 'c' ? '!' : '+', 0, flags);
1445ae8d569bSderaadt 			}
1446ae8d569bSderaadt 			lowc = d + 1;
1447ae8d569bSderaadt 			cvp++;
1448ae8d569bSderaadt 		}
1449*dba1d6eaSray 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1450ae8d569bSderaadt 	}
1451ae8d569bSderaadt 	context_vec_ptr = context_vec_start - 1;
1452ae8d569bSderaadt }
14539de32c1bSmillert 
14549de32c1bSmillert /* dump accumulated "unified" diff changes */
14559de32c1bSmillert static void
1456*dba1d6eaSray dump_unified_vec(FILE *f1, FILE *f2, int flags)
14579de32c1bSmillert {
14589de32c1bSmillert 	struct context_vec *cvp = context_vec_start;
14599de32c1bSmillert 	int lowa, upb, lowc, upd;
14609de32c1bSmillert 	int a, b, c, d;
146196e45528Sotto 	char ch, *f;
14629de32c1bSmillert 
146390f56ad8Smillert 	if (context_vec_start > context_vec_ptr)
14649de32c1bSmillert 		return;
14659de32c1bSmillert 
14669de32c1bSmillert 	b = d = 0;		/* gcc */
14674a034c3aSray 	lowa = MAX(1, cvp->a - context);
14684a034c3aSray 	upb = MIN(len[0], context_vec_ptr->b + context);
14694a034c3aSray 	lowc = MAX(1, cvp->c - context);
14704a034c3aSray 	upd = MIN(len[1], context_vec_ptr->d + context);
14719de32c1bSmillert 
1472c72ea322Smillert 	fputs("@@ -", stdout);
1473c72ea322Smillert 	uni_range(lowa, upb);
1474c72ea322Smillert 	fputs(" +", stdout);
1475c72ea322Smillert 	uni_range(lowc, upd);
147696e45528Sotto 	fputs(" @@", stdout);
1477*dba1d6eaSray 	if ((flags & D_PROTOTYPE)) {
147896e45528Sotto 		f = match_function(ixold, lowa-1, f1);
147996e45528Sotto 		if (f != NULL) {
148096e45528Sotto 			putchar(' ');
148196e45528Sotto 			fputs(f, stdout);
148296e45528Sotto 		}
148396e45528Sotto 	}
148496e45528Sotto 	putchar('\n');
14859de32c1bSmillert 
14869de32c1bSmillert 	/*
14879de32c1bSmillert 	 * Output changes in "unified" diff format--the old and new lines
14889de32c1bSmillert 	 * are printed together.
14899de32c1bSmillert 	 */
14909de32c1bSmillert 	for (; cvp <= context_vec_ptr; cvp++) {
14919de32c1bSmillert 		a = cvp->a;
14929de32c1bSmillert 		b = cvp->b;
14939de32c1bSmillert 		c = cvp->c;
14949de32c1bSmillert 		d = cvp->d;
14959de32c1bSmillert 
14969de32c1bSmillert 		/*
14979de32c1bSmillert 		 * c: both new and old changes
14989de32c1bSmillert 		 * d: only changes in the old file
14999de32c1bSmillert 		 * a: only changes in the new file
15009de32c1bSmillert 		 */
15019de32c1bSmillert 		if (a <= b && c <= d)
15029de32c1bSmillert 			ch = 'c';
15039de32c1bSmillert 		else
15049de32c1bSmillert 			ch = (a <= b) ? 'd' : 'a';
15059de32c1bSmillert 
15069de32c1bSmillert 		switch (ch) {
15079de32c1bSmillert 		case 'c':
1508*dba1d6eaSray 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1509*dba1d6eaSray 			fetch(ixold, a, b, f1, '-', 0, flags);
1510*dba1d6eaSray 			fetch(ixnew, c, d, f2, '+', 0, flags);
15119de32c1bSmillert 			break;
15129de32c1bSmillert 		case 'd':
1513*dba1d6eaSray 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1514*dba1d6eaSray 			fetch(ixold, a, b, f1, '-', 0, flags);
15159de32c1bSmillert 			break;
15169de32c1bSmillert 		case 'a':
1517*dba1d6eaSray 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1518*dba1d6eaSray 			fetch(ixnew, c, d, f2, '+', 0, flags);
15199de32c1bSmillert 			break;
15209de32c1bSmillert 		}
15219de32c1bSmillert 		lowa = b + 1;
15229de32c1bSmillert 		lowc = d + 1;
15239de32c1bSmillert 	}
1524*dba1d6eaSray 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
15259de32c1bSmillert 
15269de32c1bSmillert 	context_vec_ptr = context_vec_start - 1;
15279de32c1bSmillert }
15287bdb251cSmillert 
15297bdb251cSmillert static void
15307bdb251cSmillert print_header(const char *file1, const char *file2)
15317bdb251cSmillert {
15327bdb251cSmillert 	if (label[0] != NULL)
15337bdb251cSmillert 		printf("%s %s\n", format == D_CONTEXT ? "***" : "---",
15347bdb251cSmillert 		    label[0]);
15357bdb251cSmillert 	else
15367bdb251cSmillert 		printf("%s %s\t%s", format == D_CONTEXT ? "***" : "---",
15377bdb251cSmillert 		    file1, ctime(&stb1.st_mtime));
15387bdb251cSmillert 	if (label[1] != NULL)
15397bdb251cSmillert 		printf("%s %s\n", format == D_CONTEXT ? "---" : "+++",
15407bdb251cSmillert 		    label[1]);
15417bdb251cSmillert 	else
15427bdb251cSmillert 		printf("%s %s\t%s", format == D_CONTEXT ? "---" : "+++",
15437bdb251cSmillert 		    file2, ctime(&stb2.st_mtime));
15447bdb251cSmillert }
1545