xref: /openbsd-src/usr.bin/diff/diffreg.c (revision b9fc9a728fce9c4289b7e9a992665e28d5629a54)
1*b9fc9a72Sderaadt /*	$OpenBSD: diffreg.c,v 1.84 2015/01/16 06:40:07 deraadt Exp $	*/
2d0c3f575Sderaadt 
3d0c3f575Sderaadt /*
4d0c3f575Sderaadt  * Copyright (C) Caldera International Inc.  2001-2002.
5d0c3f575Sderaadt  * All rights reserved.
6d0c3f575Sderaadt  *
7d0c3f575Sderaadt  * Redistribution and use in source and binary forms, with or without
8d0c3f575Sderaadt  * modification, are permitted provided that the following conditions
9d0c3f575Sderaadt  * are met:
10d0c3f575Sderaadt  * 1. Redistributions of source code and documentation must retain the above
11d0c3f575Sderaadt  *    copyright notice, this list of conditions and the following disclaimer.
12d0c3f575Sderaadt  * 2. Redistributions in binary form must reproduce the above copyright
13d0c3f575Sderaadt  *    notice, this list of conditions and the following disclaimer in the
14d0c3f575Sderaadt  *    documentation and/or other materials provided with the distribution.
15d0c3f575Sderaadt  * 3. All advertising materials mentioning features or use of this software
16d0c3f575Sderaadt  *    must display the following acknowledgement:
17d0c3f575Sderaadt  *	This product includes software developed or owned by Caldera
18d0c3f575Sderaadt  *	International, Inc.
19d0c3f575Sderaadt  * 4. Neither the name of Caldera International, Inc. nor the names of other
20d0c3f575Sderaadt  *    contributors may be used to endorse or promote products derived from
21d0c3f575Sderaadt  *    this software without specific prior written permission.
22d0c3f575Sderaadt  *
23d0c3f575Sderaadt  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24d0c3f575Sderaadt  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25d0c3f575Sderaadt  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26d0c3f575Sderaadt  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27d0c3f575Sderaadt  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28d0c3f575Sderaadt  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29d0c3f575Sderaadt  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30d0c3f575Sderaadt  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31d0c3f575Sderaadt  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32d0c3f575Sderaadt  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33d0c3f575Sderaadt  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34d0c3f575Sderaadt  * POSSIBILITY OF SUCH DAMAGE.
35d0c3f575Sderaadt  */
364ec4b3d5Smillert /*-
374ec4b3d5Smillert  * Copyright (c) 1991, 1993
384ec4b3d5Smillert  *	The Regents of the University of California.  All rights reserved.
394ec4b3d5Smillert  *
404ec4b3d5Smillert  * Redistribution and use in source and binary forms, with or without
414ec4b3d5Smillert  * modification, are permitted provided that the following conditions
424ec4b3d5Smillert  * are met:
434ec4b3d5Smillert  * 1. Redistributions of source code must retain the above copyright
444ec4b3d5Smillert  *    notice, this list of conditions and the following disclaimer.
454ec4b3d5Smillert  * 2. Redistributions in binary form must reproduce the above copyright
464ec4b3d5Smillert  *    notice, this list of conditions and the following disclaimer in the
474ec4b3d5Smillert  *    documentation and/or other materials provided with the distribution.
484ec4b3d5Smillert  * 3. Neither the name of the University nor the names of its contributors
494ec4b3d5Smillert  *    may be used to endorse or promote products derived from this software
504ec4b3d5Smillert  *    without specific prior written permission.
514ec4b3d5Smillert  *
524ec4b3d5Smillert  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
534ec4b3d5Smillert  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
544ec4b3d5Smillert  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
554ec4b3d5Smillert  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
564ec4b3d5Smillert  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
574ec4b3d5Smillert  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
584ec4b3d5Smillert  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
594ec4b3d5Smillert  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
604ec4b3d5Smillert  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
614ec4b3d5Smillert  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
624ec4b3d5Smillert  * SUCH DAMAGE.
634ec4b3d5Smillert  *
644ec4b3d5Smillert  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
654ec4b3d5Smillert  */
664ec4b3d5Smillert 
674ec4b3d5Smillert #include <sys/stat.h>
68b4bca33fSmillert #include <sys/wait.h>
6926da422aStedu 
704ec4b3d5Smillert #include <ctype.h>
714ec4b3d5Smillert #include <err.h>
727b6ec9e4Smillert #include <errno.h>
7326da422aStedu #include <fcntl.h>
740efcb26eSmillert #include <stddef.h>
754ec4b3d5Smillert #include <stdio.h>
764ec4b3d5Smillert #include <stdlib.h>
7726da422aStedu #include <string.h>
784ec4b3d5Smillert #include <unistd.h>
79*b9fc9a72Sderaadt #include <limits.h>
80ae8d569bSderaadt 
81ae8d569bSderaadt #include "diff.h"
82b4bca33fSmillert #include "pathnames.h"
834a034c3aSray #include "xmalloc.h"
8426da422aStedu 
85*b9fc9a72Sderaadt #define MINIMUM(a, b)	(((a) < (b)) ? (a) : (b))
86*b9fc9a72Sderaadt #define MAXIMUM(a, b)	(((a) > (b)) ? (a) : (b))
87*b9fc9a72Sderaadt 
88ae8d569bSderaadt /*
89ae8d569bSderaadt  * diff - compare two files.
90ae8d569bSderaadt  */
91ae8d569bSderaadt 
92ae8d569bSderaadt /*
93ae8d569bSderaadt  *	Uses an algorithm due to Harold Stone, which finds
94ae8d569bSderaadt  *	a pair of longest identical subsequences in the two
95ae8d569bSderaadt  *	files.
96ae8d569bSderaadt  *
97ae8d569bSderaadt  *	The major goal is to generate the match vector J.
98ae8d569bSderaadt  *	J[i] is the index of the line in file1 corresponding
99ae8d569bSderaadt  *	to line i file0. J[i] = 0 if there is no
100ae8d569bSderaadt  *	such line in file1.
101ae8d569bSderaadt  *
102ae8d569bSderaadt  *	Lines are hashed so as to work in core. All potential
103ae8d569bSderaadt  *	matches are located by sorting the lines of each file
104ae8d569bSderaadt  *	on the hash (called ``value''). In particular, this
105ae8d569bSderaadt  *	collects the equivalence classes in file1 together.
106ae8d569bSderaadt  *	Subroutine equiv replaces the value of each line in
107ae8d569bSderaadt  *	file0 by the index of the first element of its
108ae8d569bSderaadt  *	matching equivalence in (the reordered) file1.
109ae8d569bSderaadt  *	To save space equiv squeezes file1 into a single
110ae8d569bSderaadt  *	array member in which the equivalence classes
111ae8d569bSderaadt  *	are simply concatenated, except that their first
112ae8d569bSderaadt  *	members are flagged by changing sign.
113ae8d569bSderaadt  *
114ae8d569bSderaadt  *	Next the indices that point into member are unsorted into
115ae8d569bSderaadt  *	array class according to the original order of file0.
116ae8d569bSderaadt  *
117ae8d569bSderaadt  *	The cleverness lies in routine stone. This marches
118ae8d569bSderaadt  *	through the lines of file0, developing a vector klist
119ae8d569bSderaadt  *	of "k-candidates". At step i a k-candidate is a matched
120ae8d569bSderaadt  *	pair of lines x,y (x in file0 y in file1) such that
121ae8d569bSderaadt  *	there is a common subsequence of length k
122ae8d569bSderaadt  *	between the first i lines of file0 and the first y
123ae8d569bSderaadt  *	lines of file1, but there is no such subsequence for
124ae8d569bSderaadt  *	any smaller y. x is the earliest possible mate to y
125ae8d569bSderaadt  *	that occurs in such a subsequence.
126ae8d569bSderaadt  *
127ae8d569bSderaadt  *	Whenever any of the members of the equivalence class of
128ae8d569bSderaadt  *	lines in file1 matable to a line in file0 has serial number
129ae8d569bSderaadt  *	less than the y of some k-candidate, that k-candidate
130ae8d569bSderaadt  *	with the smallest such y is replaced. The new
131ae8d569bSderaadt  *	k-candidate is chained (via pred) to the current
132ae8d569bSderaadt  *	k-1 candidate so that the actual subsequence can
133ae8d569bSderaadt  *	be recovered. When a member has serial number greater
134ae8d569bSderaadt  *	that the y of all k-candidates, the klist is extended.
135ae8d569bSderaadt  *	At the end, the longest subsequence is pulled out
136ae8d569bSderaadt  *	and placed in the array J by unravel
137ae8d569bSderaadt  *
138ae8d569bSderaadt  *	With J in hand, the matches there recorded are
139ae8d569bSderaadt  *	check'ed against reality to assure that no spurious
140ae8d569bSderaadt  *	matches have crept in due to hashing. If they have,
141ae8d569bSderaadt  *	they are broken, and "jackpot" is recorded--a harmless
142ae8d569bSderaadt  *	matter except that a true match for a spuriously
143ae8d569bSderaadt  *	mated line may now be unnecessarily reported as a change.
144ae8d569bSderaadt  *
145ae8d569bSderaadt  *	Much of the complexity of the program comes simply
146ae8d569bSderaadt  *	from trying to minimize core utilization and
147ae8d569bSderaadt  *	maximize the range of doable problems by dynamically
148ae8d569bSderaadt  *	allocating what is needed and reusing what is not.
149ae8d569bSderaadt  *	The core requirements for problems larger than somewhat
150ae8d569bSderaadt  *	are (in words) 2*length(file0) + length(file1) +
151ae8d569bSderaadt  *	3*(number of k-candidates installed),  typically about
152ae8d569bSderaadt  *	6n words for files of length n.
153ae8d569bSderaadt  */
154ae8d569bSderaadt 
155ae8d569bSderaadt struct cand {
156ae8d569bSderaadt 	int	x;
157ae8d569bSderaadt 	int	y;
158ae8d569bSderaadt 	int	pred;
15960b9d8fdSderaadt };
16026da422aStedu 
161ae8d569bSderaadt struct line {
162ae8d569bSderaadt 	int	serial;
163ae8d569bSderaadt 	int	value;
16492af4c2dStedu } *file[2];
16526da422aStedu 
1660efcb26eSmillert /*
1670efcb26eSmillert  * The following struct is used to record change information when
1680efcb26eSmillert  * doing a "context" or "unified" diff.  (see routine "change" to
1690efcb26eSmillert  * understand the highly mnemonic field names)
1700efcb26eSmillert  */
1710efcb26eSmillert struct context_vec {
1720efcb26eSmillert 	int	a;		/* start line in old file */
1730efcb26eSmillert 	int	b;		/* end line in old file */
1740efcb26eSmillert 	int	c;		/* start line in new file */
1750efcb26eSmillert 	int	d;		/* end line in new file */
1760efcb26eSmillert };
1770efcb26eSmillert 
1785e07282dSray #define	diff_output	printf
1797b6ec9e4Smillert static FILE	*opentemp(const char *);
180ac73e8e6Smillert static void	 output(char *, FILE *, char *, FILE *, int);
1816fc40daeSray static void	 check(FILE *, FILE *, int);
18226da422aStedu static void	 range(int, int, char *);
183c72ea322Smillert static void	 uni_range(int, int);
184dba1d6eaSray static void	 dump_context_vec(FILE *, FILE *, int);
185dba1d6eaSray static void	 dump_unified_vec(FILE *, FILE *, int);
186dba1d6eaSray static void	 prepare(int, FILE *, off_t, int);
18726da422aStedu static void	 prune(void);
18826da422aStedu static void	 equiv(struct line *, int, struct line *, int, int *);
18926da422aStedu static void	 unravel(int);
19026da422aStedu static void	 unsort(struct line *, int, int *);
19161783bcdSespie static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
19226da422aStedu static void	 sort(struct line *, int);
1937bdb251cSmillert static void	 print_header(const char *, const char *);
194ccd55a2cSotto static int	 ignoreline(char *);
1954ec4b3d5Smillert static int	 asciifile(FILE *);
196dba1d6eaSray static int	 fetch(long *, int, int, FILE *, int, int, int);
19726da422aStedu static int	 newcand(int, int, int);
19826da422aStedu static int	 search(int *, int, int);
1994ec4b3d5Smillert static int	 skipline(FILE *);
2006e18f850Sotto static int	 isqrt(int);
201dba1d6eaSray static int	 stone(int *, int, int *, int *, int);
202dba1d6eaSray static int	 readhash(FILE *, int);
2034ec4b3d5Smillert static int	 files_differ(FILE *, FILE *, int);
20496e45528Sotto static char	*match_function(const long *, int, FILE *);
205ccd55a2cSotto static char	*preadline(int, size_t, off_t);
2066e18f850Sotto 
207aa215433Sray static int  *J;			/* will be overlaid on class */
208aa215433Sray static int  *class;		/* will be overlaid on file[0] */
209aa215433Sray static int  *klist;		/* will be overlaid on file[0] after class */
210aa215433Sray static int  *member;		/* will be overlaid on file[1] */
211aa215433Sray static int   clen;
212aa215433Sray static int   inifdef;		/* whether or not we are in a #ifdef block */
213aa215433Sray static int   len[2];
214aa215433Sray static int   pref, suff;	/* length of prefix and suffix */
215aa215433Sray static int   slen[2];
216aa215433Sray static int   anychange;
217aa215433Sray static long *ixnew;		/* will be overlaid on file[1] */
218aa215433Sray static long *ixold;		/* will be overlaid on klist */
219aa215433Sray static struct cand *clist;	/* merely a free storage pot for candidates */
220aa215433Sray static int   clistlen;		/* the length of clist */
221aa215433Sray static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
222aa215433Sray static u_char *chrtran;		/* translation table for case-folding */
223aa215433Sray static struct context_vec *context_vec_start;
224aa215433Sray static struct context_vec *context_vec_end;
225aa215433Sray static struct context_vec *context_vec_ptr;
226aa215433Sray 
227aa215433Sray #define FUNCTION_CONTEXT_SIZE	55
228aa215433Sray static char lastbuf[FUNCTION_CONTEXT_SIZE];
229aa215433Sray static int lastline;
230aa215433Sray static int lastmatchline;
231aa215433Sray 
23226da422aStedu 
23326da422aStedu /*
23426da422aStedu  * chrtran points to one of 2 translation tables: cup2low if folding upper to
23526da422aStedu  * lower case clow2low if not folding case
236ae8d569bSderaadt  */
2378a15d8deSderaadt u_char clow2low[256] = {
23826da422aStedu 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
23926da422aStedu 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
24026da422aStedu 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
24126da422aStedu 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
24226da422aStedu 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
24326da422aStedu 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
24426da422aStedu 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
24526da422aStedu 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
24626da422aStedu 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
24726da422aStedu 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
24826da422aStedu 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
24926da422aStedu 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
25026da422aStedu 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
25126da422aStedu 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
25226da422aStedu 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
25326da422aStedu 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
25426da422aStedu 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
25526da422aStedu 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
25626da422aStedu 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
25726da422aStedu 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
25826da422aStedu 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
25926da422aStedu 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
26026da422aStedu 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
26126da422aStedu 	0xfd, 0xfe, 0xff
262ae8d569bSderaadt };
263ae8d569bSderaadt 
2648a15d8deSderaadt u_char cup2low[256] = {
26526da422aStedu 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
26626da422aStedu 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
26726da422aStedu 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
26826da422aStedu 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
26926da422aStedu 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
27026da422aStedu 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
27126da422aStedu 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
27226da422aStedu 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
27326da422aStedu 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
27426da422aStedu 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
27526da422aStedu 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
27626da422aStedu 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
27726da422aStedu 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
27826da422aStedu 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
27926da422aStedu 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
28026da422aStedu 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
28126da422aStedu 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
28226da422aStedu 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
28326da422aStedu 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
28426da422aStedu 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
28526da422aStedu 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
28626da422aStedu 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
28726da422aStedu 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
28826da422aStedu 	0xfd, 0xfe, 0xff
289ae8d569bSderaadt };
290ae8d569bSderaadt 
291b4bca33fSmillert int
29257003866Sray diffreg(char *file1, char *file2, int flags)
293ae8d569bSderaadt {
294dba1d6eaSray 	FILE *f1, *f2;
295dba1d6eaSray 	int i, rval, ostdout = -1;
296b4bca33fSmillert 	pid_t pid = -1;
297ae8d569bSderaadt 
298dba1d6eaSray 	f1 = f2 = NULL;
299dba1d6eaSray 	rval = D_SAME;
3004ec4b3d5Smillert 	anychange = 0;
30196e45528Sotto 	lastline = 0;
30296e45528Sotto 	lastmatchline = 0;
3030efcb26eSmillert 	context_vec_ptr = context_vec_start - 1;
304dba1d6eaSray 	if (flags & D_IGNORECASE)
305dba1d6eaSray 		chrtran = cup2low;
306dba1d6eaSray 	else
307dba1d6eaSray 		chrtran = clow2low;
3087b6ec9e4Smillert 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
309fed3a06dSmillert 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
31066e5764eSmillert 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
311f28259e9Smillert 		goto closem;
3124ec4b3d5Smillert 
3134ec4b3d5Smillert 	if (flags & D_EMPTY1)
3144ec4b3d5Smillert 		f1 = fopen(_PATH_DEVNULL, "r");
3154ec4b3d5Smillert 	else {
3167b6ec9e4Smillert 		if (!S_ISREG(stb1.st_mode)) {
3177b6ec9e4Smillert 			if ((f1 = opentemp(file1)) == NULL ||
3187b6ec9e4Smillert 			    fstat(fileno(f1), &stb1) < 0) {
3194ec4b3d5Smillert 				warn("%s", file1);
3204ec4b3d5Smillert 				status |= 2;
3214ec4b3d5Smillert 				goto closem;
32248b947b7Smillert 			}
3237b6ec9e4Smillert 		} else if (strcmp(file1, "-") == 0)
324b1a26502Smillert 			f1 = stdin;
325b1a26502Smillert 		else
3264ec4b3d5Smillert 			f1 = fopen(file1, "r");
3274ec4b3d5Smillert 	}
3284ec4b3d5Smillert 	if (f1 == NULL) {
3294ec4b3d5Smillert 		warn("%s", file1);
3304ec4b3d5Smillert 		status |= 2;
3314ec4b3d5Smillert 		goto closem;
3324ec4b3d5Smillert 	}
3334ec4b3d5Smillert 
3344ec4b3d5Smillert 	if (flags & D_EMPTY2)
3354ec4b3d5Smillert 		f2 = fopen(_PATH_DEVNULL, "r");
3364ec4b3d5Smillert 	else {
3377b6ec9e4Smillert 		if (!S_ISREG(stb2.st_mode)) {
3387b6ec9e4Smillert 			if ((f2 = opentemp(file2)) == NULL ||
3397b6ec9e4Smillert 			    fstat(fileno(f2), &stb2) < 0) {
3404ec4b3d5Smillert 				warn("%s", file2);
3414ec4b3d5Smillert 				status |= 2;
3424ec4b3d5Smillert 				goto closem;
3434ec4b3d5Smillert 			}
3447b6ec9e4Smillert 		} else if (strcmp(file2, "-") == 0)
345b1a26502Smillert 			f2 = stdin;
346b1a26502Smillert 		else
3474ec4b3d5Smillert 			f2 = fopen(file2, "r");
3484ec4b3d5Smillert 	}
3494ec4b3d5Smillert 	if (f2 == NULL) {
3504ec4b3d5Smillert 		warn("%s", file2);
3514ec4b3d5Smillert 		status |= 2;
3524ec4b3d5Smillert 		goto closem;
3534ec4b3d5Smillert 	}
3544ec4b3d5Smillert 
3554ec4b3d5Smillert 	switch (files_differ(f1, f2, flags)) {
3564ec4b3d5Smillert 	case 0:
357b4bca33fSmillert 		goto closem;
3584ec4b3d5Smillert 	case 1:
3594ec4b3d5Smillert 		break;
3604ec4b3d5Smillert 	default:
3614ec4b3d5Smillert 		/* error */
3624ec4b3d5Smillert 		status |= 2;
3634ec4b3d5Smillert 		goto closem;
364ae8d569bSderaadt 	}
3654ec4b3d5Smillert 
366dba1d6eaSray 	if ((flags & D_FORCEASCII) == 0 &&
367dba1d6eaSray 	    (!asciifile(f1) || !asciifile(f2))) {
368b4bca33fSmillert 		rval = D_BINARY;
3695f9fc8aaSmillert 		status |= 1;
370cab5d83cSmillert 		goto closem;
371cab5d83cSmillert 	}
372b4bca33fSmillert 	if (lflag) {
373b4bca33fSmillert 		/* redirect stdout to pr */
374b4bca33fSmillert 		int pfd[2];
375b4bca33fSmillert 		char *header;
376b4bca33fSmillert 		char *prargv[] = { "pr", "-h", NULL, "-f", NULL };
377b4bca33fSmillert 
3784a034c3aSray 		xasprintf(&header, "%s %s %s", diffargs, file1, file2);
379b4bca33fSmillert 		prargv[2] = header;
380b4bca33fSmillert 		fflush(stdout);
381b4bca33fSmillert 		rewind(stdout);
382b4bca33fSmillert 		pipe(pfd);
383b4bca33fSmillert 		switch ((pid = fork())) {
384b4bca33fSmillert 		case -1:
385b4bca33fSmillert 			warnx("No more processes");
386b4bca33fSmillert 			status |= 2;
3874a034c3aSray 			xfree(header);
388f481a864Sray 			rval = D_ERROR;
389f481a864Sray 			goto closem;
390b4bca33fSmillert 		case 0:
391b4bca33fSmillert 			/* child */
392b4bca33fSmillert 			if (pfd[0] != STDIN_FILENO) {
393b4bca33fSmillert 				dup2(pfd[0], STDIN_FILENO);
394b4bca33fSmillert 				close(pfd[0]);
395b4bca33fSmillert 			}
396b4bca33fSmillert 			close(pfd[1]);
397b4bca33fSmillert 			execv(_PATH_PR, prargv);
398b4bca33fSmillert 			_exit(127);
399b4bca33fSmillert 		default:
400b4bca33fSmillert 			/* parent */
401b4bca33fSmillert 			if (pfd[1] != STDOUT_FILENO) {
402b4bca33fSmillert 				ostdout = dup(STDOUT_FILENO);
403b4bca33fSmillert 				dup2(pfd[1], STDOUT_FILENO);
404b4bca33fSmillert 				close(pfd[1]);
405b4bca33fSmillert 			}
406b4bca33fSmillert 			close(pfd[0]);
407b4bca33fSmillert 			rewind(stdout);
4084a034c3aSray 			xfree(header);
409b4bca33fSmillert 		}
410ac73e8e6Smillert 	}
411dba1d6eaSray 	prepare(0, f1, stb1.st_size, flags);
412dba1d6eaSray 	prepare(1, f2, stb2.st_size, flags);
41357003866Sray 
414ae8d569bSderaadt 	prune();
415ae8d569bSderaadt 	sort(sfile[0], slen[0]);
416ae8d569bSderaadt 	sort(sfile[1], slen[1]);
417ae8d569bSderaadt 
418ae8d569bSderaadt 	member = (int *)file[1];
419ae8d569bSderaadt 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
420aa215433Sray 	member = xrealloc(member, slen[1] + 2, sizeof(*member));
421ae8d569bSderaadt 
422ae8d569bSderaadt 	class = (int *)file[0];
423ae8d569bSderaadt 	unsort(sfile[0], slen[0], class);
424aa215433Sray 	class = xrealloc(class, slen[0] + 2, sizeof(*class));
425ae8d569bSderaadt 
42657003866Sray 	klist = xcalloc(slen[0] + 2, sizeof(*klist));
427058e94f4Smillert 	clen = 0;
4286e18f850Sotto 	clistlen = 100;
42957003866Sray 	clist = xcalloc(clistlen, sizeof(*clist));
430dba1d6eaSray 	i = stone(class, slen[0], member, klist, flags);
4314a034c3aSray 	xfree(member);
4324a034c3aSray 	xfree(class);
433ae8d569bSderaadt 
434aa215433Sray 	J = xrealloc(J, len[0] + 2, sizeof(*J));
435ae8d569bSderaadt 	unravel(klist[i]);
4364a034c3aSray 	xfree(clist);
4374a034c3aSray 	xfree(klist);
438ae8d569bSderaadt 
439aa215433Sray 	ixold = xrealloc(ixold, len[0] + 2, sizeof(*ixold));
440aa215433Sray 	ixnew = xrealloc(ixnew, len[1] + 2, sizeof(*ixnew));
4416fc40daeSray 	check(f1, f2, flags);
442dba1d6eaSray 	output(file1, f1, file2, f2, flags);
443b4bca33fSmillert 	if (ostdout != -1) {
444b4bca33fSmillert 		int wstatus;
4454ec4b3d5Smillert 
446b4bca33fSmillert 		/* close the pipe to pr and restore stdout */
447b4bca33fSmillert 		fflush(stdout);
448b4bca33fSmillert 		rewind(stdout);
449b4bca33fSmillert 		if (ostdout != STDOUT_FILENO) {
450b4bca33fSmillert 			close(STDOUT_FILENO);
451b4bca33fSmillert 			dup2(ostdout, STDOUT_FILENO);
452b4bca33fSmillert 			close(ostdout);
453b4bca33fSmillert 		}
454b4bca33fSmillert 		waitpid(pid, &wstatus, 0);
45552e5bd6bSmillert 	}
4564ec4b3d5Smillert closem:
4575afc3be2Smillert 	if (anychange) {
4585afc3be2Smillert 		status |= 1;
4595afc3be2Smillert 		if (rval == D_SAME)
4605afc3be2Smillert 			rval = D_DIFFER;
4615afc3be2Smillert 	}
4624ec4b3d5Smillert 	if (f1 != NULL)
4634ec4b3d5Smillert 		fclose(f1);
4644ec4b3d5Smillert 	if (f2 != NULL)
4654ec4b3d5Smillert 		fclose(f2);
466dba1d6eaSray 
467b4bca33fSmillert 	return (rval);
4684ec4b3d5Smillert }
4694ec4b3d5Smillert 
4704ec4b3d5Smillert /*
4714ec4b3d5Smillert  * Check to see if the given files differ.
4724ec4b3d5Smillert  * Returns 0 if they are the same, 1 if different, and -1 on error.
4734ec4b3d5Smillert  * XXX - could use code from cmp(1) [faster]
4744ec4b3d5Smillert  */
4754ec4b3d5Smillert static int
4764ec4b3d5Smillert files_differ(FILE *f1, FILE *f2, int flags)
4774ec4b3d5Smillert {
4784ec4b3d5Smillert 	char buf1[BUFSIZ], buf2[BUFSIZ];
4794ec4b3d5Smillert 	size_t i, j;
4804ec4b3d5Smillert 
4814ec4b3d5Smillert 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
4824ec4b3d5Smillert 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
4834ec4b3d5Smillert 		return (1);
4844ec4b3d5Smillert 	for (;;) {
4854ec4b3d5Smillert 		i = fread(buf1, 1, sizeof(buf1), f1);
4864ec4b3d5Smillert 		j = fread(buf2, 1, sizeof(buf2), f2);
487bdcce04dSray 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
488bdcce04dSray 			return (-1);
4894ec4b3d5Smillert 		if (i != j)
4904ec4b3d5Smillert 			return (1);
491bdcce04dSray 		if (i == 0)
4924ec4b3d5Smillert 			return (0);
4934ec4b3d5Smillert 		if (memcmp(buf1, buf2, i) != 0)
4944ec4b3d5Smillert 			return (1);
4954ec4b3d5Smillert 	}
496ae8d569bSderaadt }
497ae8d569bSderaadt 
4987b6ec9e4Smillert static FILE *
4997b6ec9e4Smillert opentemp(const char *file)
500ae8d569bSderaadt {
501*b9fc9a72Sderaadt 	char buf[BUFSIZ], *tempdir, tempfile[PATH_MAX];
5027b6ec9e4Smillert 	ssize_t nread;
5037b6ec9e4Smillert 	int ifd, ofd;
50448b947b7Smillert 
50548b947b7Smillert 	if (strcmp(file, "-") == 0)
50648b947b7Smillert 		ifd = STDIN_FILENO;
50766e5764eSmillert 	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
5084ec4b3d5Smillert 		return (NULL);
50948b947b7Smillert 
51048b947b7Smillert 	if ((tempdir = getenv("TMPDIR")) == NULL)
51148b947b7Smillert 		tempdir = _PATH_TMP;
51209bddaddSotto 
51309bddaddSotto 	if (strlcpy(tempfile, tempdir, sizeof(tempfile)) >= sizeof(tempfile) ||
51409bddaddSotto 	    strlcat(tempfile, "/diff.XXXXXXXX", sizeof(tempfile)) >=
51509bddaddSotto 	    sizeof(tempfile)) {
5167b6ec9e4Smillert 		close(ifd);
5177b6ec9e4Smillert 		errno = ENAMETOOLONG;
5184ec4b3d5Smillert 		return (NULL);
519ae8d569bSderaadt 	}
5207b6ec9e4Smillert 
521b5188ac1Sschwarze 	if ((ofd = mkstemp(tempfile)) < 0) {
522b5188ac1Sschwarze 		close(ifd);
5237b6ec9e4Smillert 		return (NULL);
524b5188ac1Sschwarze 	}
5257b6ec9e4Smillert 	unlink(tempfile);
5267b6ec9e4Smillert 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
5277b6ec9e4Smillert 		if (write(ofd, buf, nread) != nread) {
52848b947b7Smillert 			close(ifd);
52948b947b7Smillert 			close(ofd);
5307b6ec9e4Smillert 			return (NULL);
5317b6ec9e4Smillert 		}
5327b6ec9e4Smillert 	}
5337b6ec9e4Smillert 	close(ifd);
534f28259e9Smillert 	lseek(ofd, (off_t)0, SEEK_SET);
5357b6ec9e4Smillert 	return (fdopen(ofd, "r"));
536ae8d569bSderaadt }
537ae8d569bSderaadt 
538ae8d569bSderaadt char *
53926da422aStedu splice(char *dir, char *file)
540ae8d569bSderaadt {
54149dffe13Smillert 	char *tail, *buf;
54278245330Smillert 	size_t dirlen;
543ae8d569bSderaadt 
54478245330Smillert 	dirlen = strlen(dir);
54578245330Smillert 	while (dirlen != 0 && dir[dirlen - 1] == '/')
54678245330Smillert 	    dirlen--;
5477b6ec9e4Smillert 	if ((tail = strrchr(file, '/')) == NULL)
548ae8d569bSderaadt 		tail = file;
549ae8d569bSderaadt 	else
550ae8d569bSderaadt 		tail++;
55178245330Smillert 	xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
55249dffe13Smillert 	return (buf);
553ae8d569bSderaadt }
554ae8d569bSderaadt 
55526da422aStedu static void
556dba1d6eaSray prepare(int i, FILE *fd, off_t filesize, int flags)
557ae8d569bSderaadt {
55826da422aStedu 	struct line *p;
55926da422aStedu 	int j, h;
560739e7267Sotto 	size_t sz;
561ae8d569bSderaadt 
5624ec4b3d5Smillert 	rewind(fd);
563739e7267Sotto 
564739e7267Sotto 	sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
565739e7267Sotto 	if (sz < 100)
566a8013e93Sotto 		sz = 100;
567739e7267Sotto 
56857003866Sray 	p = xcalloc(sz + 3, sizeof(*p));
569dba1d6eaSray 	for (j = 0; (h = readhash(fd, flags));) {
570a8013e93Sotto 		if (j == sz) {
571a8013e93Sotto 			sz = sz * 3 / 2;
572aa215433Sray 			p = xrealloc(p, sz + 3, sizeof(*p));
573a8013e93Sotto 		}
574a8013e93Sotto 		p[++j].value = h;
575ae8d569bSderaadt 	}
576ae8d569bSderaadt 	len[i] = j;
577ae8d569bSderaadt 	file[i] = p;
578ae8d569bSderaadt }
579ae8d569bSderaadt 
58026da422aStedu static void
58126da422aStedu prune(void)
582ae8d569bSderaadt {
58326da422aStedu 	int i, j;
58448b8c3e3Sderaadt 
585ae8d569bSderaadt 	for (pref = 0; pref < len[0] && pref < len[1] &&
586ae8d569bSderaadt 	    file[0][pref + 1].value == file[1][pref + 1].value;
5877d2b2b91Sderaadt 	    pref++)
5887d2b2b91Sderaadt 		;
589ae8d569bSderaadt 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
590ae8d569bSderaadt 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
5917d2b2b91Sderaadt 	    suff++)
5927d2b2b91Sderaadt 		;
593ae8d569bSderaadt 	for (j = 0; j < 2; j++) {
594ae8d569bSderaadt 		sfile[j] = file[j] + pref;
595ae8d569bSderaadt 		slen[j] = len[j] - pref - suff;
596ae8d569bSderaadt 		for (i = 0; i <= slen[j]; i++)
597ae8d569bSderaadt 			sfile[j][i].serial = i;
598ae8d569bSderaadt 	}
599ae8d569bSderaadt }
600ae8d569bSderaadt 
60126da422aStedu static void
60226da422aStedu equiv(struct line *a, int n, struct line *b, int m, int *c)
603ae8d569bSderaadt {
60426da422aStedu 	int i, j;
60548b8c3e3Sderaadt 
606ae8d569bSderaadt 	i = j = 1;
607ae8d569bSderaadt 	while (i <= n && j <= m) {
608ae8d569bSderaadt 		if (a[i].value < b[j].value)
609ae8d569bSderaadt 			a[i++].value = 0;
610ae8d569bSderaadt 		else if (a[i].value == b[j].value)
611ae8d569bSderaadt 			a[i++].value = j;
612ae8d569bSderaadt 		else
613ae8d569bSderaadt 			j++;
614ae8d569bSderaadt 	}
615ae8d569bSderaadt 	while (i <= n)
616ae8d569bSderaadt 		a[i++].value = 0;
617ae8d569bSderaadt 	b[m + 1].value = 0;
618ae8d569bSderaadt 	j = 0;
619ae8d569bSderaadt 	while (++j <= m) {
620ae8d569bSderaadt 		c[j] = -b[j].serial;
621ae8d569bSderaadt 		while (b[j + 1].value == b[j].value) {
622ae8d569bSderaadt 			j++;
623ae8d569bSderaadt 			c[j] = b[j].serial;
624ae8d569bSderaadt 		}
625ae8d569bSderaadt 	}
626ae8d569bSderaadt 	c[j] = -1;
627ae8d569bSderaadt }
628ae8d569bSderaadt 
6296e18f850Sotto /* Code taken from ping.c */
6306e18f850Sotto static int
6316e18f850Sotto isqrt(int n)
6326e18f850Sotto {
6336e18f850Sotto 	int y, x = 1;
6346e18f850Sotto 
6356e18f850Sotto 	if (n == 0)
6366e18f850Sotto 		return (0);
6376e18f850Sotto 
6386e18f850Sotto 	do { /* newton was a stinker */
6396e18f850Sotto 		y = x;
6406e18f850Sotto 		x = n / x;
6416e18f850Sotto 		x += y;
6426e18f850Sotto 		x /= 2;
6436e18f850Sotto 	} while ((x - y) > 1 || (x - y) < -1);
6446e18f850Sotto 
6456e18f850Sotto 	return (x);
6466e18f850Sotto }
6476e18f850Sotto 
64826da422aStedu static int
649dba1d6eaSray stone(int *a, int n, int *b, int *c, int flags)
650ae8d569bSderaadt {
65148b8c3e3Sderaadt 	int i, k, y, j, l;
652df890a16Snicm 	int oldc, tc, oldl, sq;
653df890a16Snicm 	u_int numtries, bound;
6546e18f850Sotto 
655df890a16Snicm 	if (flags & D_MINIMAL)
656df890a16Snicm 		bound = UINT_MAX;
657df890a16Snicm 	else {
658df890a16Snicm 		sq = isqrt(n);
659*b9fc9a72Sderaadt 		bound = MAXIMUM(256, sq);
660df890a16Snicm 	}
66148b8c3e3Sderaadt 
662ae8d569bSderaadt 	k = 0;
663ae8d569bSderaadt 	c[0] = newcand(0, 0, 0);
664ae8d569bSderaadt 	for (i = 1; i <= n; i++) {
665ae8d569bSderaadt 		j = a[i];
666ae8d569bSderaadt 		if (j == 0)
667ae8d569bSderaadt 			continue;
668ae8d569bSderaadt 		y = -b[j];
669ae8d569bSderaadt 		oldl = 0;
670ae8d569bSderaadt 		oldc = c[0];
6712a89a2f7Smillert 		numtries = 0;
672ae8d569bSderaadt 		do {
673ae8d569bSderaadt 			if (y <= clist[oldc].y)
674ae8d569bSderaadt 				continue;
675ae8d569bSderaadt 			l = search(c, k, y);
676ae8d569bSderaadt 			if (l != oldl + 1)
677ae8d569bSderaadt 				oldc = c[l - 1];
678ae8d569bSderaadt 			if (l <= k) {
679ae8d569bSderaadt 				if (clist[c[l]].y <= y)
680ae8d569bSderaadt 					continue;
681ae8d569bSderaadt 				tc = c[l];
682ae8d569bSderaadt 				c[l] = newcand(i, y, oldc);
683ae8d569bSderaadt 				oldc = tc;
684ae8d569bSderaadt 				oldl = l;
6852a89a2f7Smillert 				numtries++;
686ae8d569bSderaadt 			} else {
687ae8d569bSderaadt 				c[l] = newcand(i, y, oldc);
688ae8d569bSderaadt 				k++;
689ae8d569bSderaadt 				break;
690ae8d569bSderaadt 			}
6912a89a2f7Smillert 		} while ((y = b[++j]) > 0 && numtries < bound);
692ae8d569bSderaadt 	}
693ae8d569bSderaadt 	return (k);
694ae8d569bSderaadt }
695ae8d569bSderaadt 
69626da422aStedu static int
69726da422aStedu newcand(int x, int y, int pred)
698ae8d569bSderaadt {
69926da422aStedu 	struct cand *q;
70026da422aStedu 
7016e18f850Sotto 	if (clen == clistlen) {
7026e18f850Sotto 		clistlen = clistlen * 11 / 10;
703aa215433Sray 		clist = xrealloc(clist, clistlen, sizeof(*clist));
7046e18f850Sotto 	}
7056e18f850Sotto 	q = clist + clen;
706ae8d569bSderaadt 	q->x = x;
707ae8d569bSderaadt 	q->y = y;
708ae8d569bSderaadt 	q->pred = pred;
7096e18f850Sotto 	return (clen++);
710ae8d569bSderaadt }
711ae8d569bSderaadt 
71226da422aStedu static int
71326da422aStedu search(int *c, int k, int y)
714ae8d569bSderaadt {
71548b8c3e3Sderaadt 	int i, j, l, t;
71648b8c3e3Sderaadt 
717ae8d569bSderaadt 	if (clist[c[k]].y < y)	/* quick look for typical case */
718ae8d569bSderaadt 		return (k + 1);
719ae8d569bSderaadt 	i = 0;
720ae8d569bSderaadt 	j = k + 1;
721dba1d6eaSray 	for (;;) {
722dba1d6eaSray 		l = (i + j) / 2;
723dba1d6eaSray 		if (l <= i)
724ae8d569bSderaadt 			break;
725ae8d569bSderaadt 		t = clist[c[l]].y;
726ae8d569bSderaadt 		if (t > y)
727ae8d569bSderaadt 			j = l;
728ae8d569bSderaadt 		else if (t < y)
729ae8d569bSderaadt 			i = l;
730ae8d569bSderaadt 		else
731ae8d569bSderaadt 			return (l);
732ae8d569bSderaadt 	}
733ae8d569bSderaadt 	return (l + 1);
734ae8d569bSderaadt }
735ae8d569bSderaadt 
73626da422aStedu static void
73726da422aStedu unravel(int p)
738ae8d569bSderaadt {
73926da422aStedu 	struct cand *q;
74048b8c3e3Sderaadt 	int i;
74148b8c3e3Sderaadt 
742ae8d569bSderaadt 	for (i = 0; i <= len[0]; i++)
743ae8d569bSderaadt 		J[i] = i <= pref ? i :
7447d2b2b91Sderaadt 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
745ae8d569bSderaadt 	for (q = clist + p; q->y != 0; q = clist + q->pred)
746ae8d569bSderaadt 		J[q->x + pref] = q->y + pref;
747ae8d569bSderaadt }
748ae8d569bSderaadt 
74926da422aStedu /*
75049dffe13Smillert  * Check does double duty:
75149dffe13Smillert  *  1.	ferret out any fortuitous correspondences due
75249dffe13Smillert  *	to confounding by hashing (which result in "jackpot")
75349dffe13Smillert  *  2.  collect random access indexes to the two files
75426da422aStedu  */
75526da422aStedu static void
7566fc40daeSray check(FILE *f1, FILE *f2, int flags)
757ae8d569bSderaadt {
75848b8c3e3Sderaadt 	int i, j, jackpot, c, d;
759ae8d569bSderaadt 	long ctold, ctnew;
760ae8d569bSderaadt 
7614ec4b3d5Smillert 	rewind(f1);
7624ec4b3d5Smillert 	rewind(f2);
763ae8d569bSderaadt 	j = 1;
764ae8d569bSderaadt 	ixold[0] = ixnew[0] = 0;
765ae8d569bSderaadt 	jackpot = 0;
766ae8d569bSderaadt 	ctold = ctnew = 0;
767ae8d569bSderaadt 	for (i = 1; i <= len[0]; i++) {
768ae8d569bSderaadt 		if (J[i] == 0) {
7694ec4b3d5Smillert 			ixold[i] = ctold += skipline(f1);
770ae8d569bSderaadt 			continue;
771ae8d569bSderaadt 		}
772ae8d569bSderaadt 		while (j < J[i]) {
7734ec4b3d5Smillert 			ixnew[j] = ctnew += skipline(f2);
774ae8d569bSderaadt 			j++;
775ae8d569bSderaadt 		}
776dba1d6eaSray 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
777ae8d569bSderaadt 			for (;;) {
7784ec4b3d5Smillert 				c = getc(f1);
7794ec4b3d5Smillert 				d = getc(f2);
780bb34d48bSmillert 				/*
781bb34d48bSmillert 				 * GNU diff ignores a missing newline
782dba1d6eaSray 				 * in one file for -b or -w.
783bb34d48bSmillert 				 */
78482328041Skspillner 				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
78582328041Skspillner 					if (c == EOF && d == '\n') {
78682328041Skspillner 						ctnew++;
787bb34d48bSmillert 						break;
78882328041Skspillner 					} else if (c == '\n' && d == EOF) {
78982328041Skspillner 						ctold++;
79082328041Skspillner 						break;
79182328041Skspillner 					}
792bb34d48bSmillert 				}
793ae8d569bSderaadt 				ctold++;
794ae8d569bSderaadt 				ctnew++;
795dba1d6eaSray 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
796dba1d6eaSray 				    isspace(d)) {
797ae8d569bSderaadt 					do {
798ae8d569bSderaadt 						if (c == '\n')
799ae8d569bSderaadt 							break;
800ae8d569bSderaadt 						ctold++;
8014ec4b3d5Smillert 					} while (isspace(c = getc(f1)));
802ae8d569bSderaadt 					do {
803ae8d569bSderaadt 						if (d == '\n')
804ae8d569bSderaadt 							break;
805ae8d569bSderaadt 						ctnew++;
8064ec4b3d5Smillert 					} while (isspace(d = getc(f2)));
807dba1d6eaSray 				} else if ((flags & D_IGNOREBLANKS)) {
808ae8d569bSderaadt 					while (isspace(c) && c != '\n') {
8094ec4b3d5Smillert 						c = getc(f1);
810ae8d569bSderaadt 						ctold++;
811ae8d569bSderaadt 					}
812ae8d569bSderaadt 					while (isspace(d) && d != '\n') {
8134ec4b3d5Smillert 						d = getc(f2);
814ae8d569bSderaadt 						ctnew++;
815ae8d569bSderaadt 					}
816ae8d569bSderaadt 				}
817ae8d569bSderaadt 				if (chrtran[c] != chrtran[d]) {
818ae8d569bSderaadt 					jackpot++;
819ae8d569bSderaadt 					J[i] = 0;
820bb34d48bSmillert 					if (c != '\n' && c != EOF)
8214ec4b3d5Smillert 						ctold += skipline(f1);
822bb34d48bSmillert 					if (d != '\n' && c != EOF)
8234ec4b3d5Smillert 						ctnew += skipline(f2);
824ae8d569bSderaadt 					break;
825ae8d569bSderaadt 				}
826bb34d48bSmillert 				if (c == '\n' || c == EOF)
827ae8d569bSderaadt 					break;
828ae8d569bSderaadt 			}
829ae8d569bSderaadt 		} else {
830ae8d569bSderaadt 			for (;;) {
831ae8d569bSderaadt 				ctold++;
832ae8d569bSderaadt 				ctnew++;
8334ec4b3d5Smillert 				if ((c = getc(f1)) != (d = getc(f2))) {
834ae8d569bSderaadt 					/* jackpot++; */
835ae8d569bSderaadt 					J[i] = 0;
836bb34d48bSmillert 					if (c != '\n' && c != EOF)
8374ec4b3d5Smillert 						ctold += skipline(f1);
838bb34d48bSmillert 					if (d != '\n' && c != EOF)
8394ec4b3d5Smillert 						ctnew += skipline(f2);
840ae8d569bSderaadt 					break;
841ae8d569bSderaadt 				}
842bb34d48bSmillert 				if (c == '\n' || c == EOF)
843ae8d569bSderaadt 					break;
844ae8d569bSderaadt 			}
845ae8d569bSderaadt 		}
846ae8d569bSderaadt 		ixold[i] = ctold;
847ae8d569bSderaadt 		ixnew[j] = ctnew;
848ae8d569bSderaadt 		j++;
849ae8d569bSderaadt 	}
8504ec4b3d5Smillert 	for (; j <= len[1]; j++)
8514ec4b3d5Smillert 		ixnew[j] = ctnew += skipline(f2);
852ae8d569bSderaadt 	/*
85348b8c3e3Sderaadt 	 * if (jackpot)
85448b8c3e3Sderaadt 	 *	fprintf(stderr, "jackpot\n");
855ae8d569bSderaadt 	 */
856ae8d569bSderaadt }
857ae8d569bSderaadt 
85848b8c3e3Sderaadt /* shellsort CACM #201 */
85926da422aStedu static void
86026da422aStedu sort(struct line *a, int n)
86148b8c3e3Sderaadt {
86248b8c3e3Sderaadt 	struct line *ai, *aim, w;
86348b8c3e3Sderaadt 	int j, m = 0, k;
864ae8d569bSderaadt 
865ae8d569bSderaadt 	if (n == 0)
866ae8d569bSderaadt 		return;
867ae8d569bSderaadt 	for (j = 1; j <= n; j *= 2)
868ae8d569bSderaadt 		m = 2 * j - 1;
869ae8d569bSderaadt 	for (m /= 2; m != 0; m /= 2) {
870ae8d569bSderaadt 		k = n - m;
871ae8d569bSderaadt 		for (j = 1; j <= k; j++) {
872ae8d569bSderaadt 			for (ai = &a[j]; ai > a; ai -= m) {
873ae8d569bSderaadt 				aim = &ai[m];
874ae8d569bSderaadt 				if (aim < ai)
875ae8d569bSderaadt 					break;	/* wraparound */
876ae8d569bSderaadt 				if (aim->value > ai[0].value ||
87726da422aStedu 				    (aim->value == ai[0].value &&
87826da422aStedu 					aim->serial > ai[0].serial))
879ae8d569bSderaadt 					break;
880ae8d569bSderaadt 				w.value = ai[0].value;
881ae8d569bSderaadt 				ai[0].value = aim->value;
882ae8d569bSderaadt 				aim->value = w.value;
883ae8d569bSderaadt 				w.serial = ai[0].serial;
884ae8d569bSderaadt 				ai[0].serial = aim->serial;
885ae8d569bSderaadt 				aim->serial = w.serial;
886ae8d569bSderaadt 			}
887ae8d569bSderaadt 		}
888ae8d569bSderaadt 	}
889ae8d569bSderaadt }
890ae8d569bSderaadt 
89126da422aStedu static void
89226da422aStedu unsort(struct line *f, int l, int *b)
893ae8d569bSderaadt {
89448b8c3e3Sderaadt 	int *a, i;
89526da422aStedu 
89657003866Sray 	a = xcalloc(l + 1, sizeof(*a));
897ae8d569bSderaadt 	for (i = 1; i <= l; i++)
898ae8d569bSderaadt 		a[f[i].serial] = f[i].value;
899ae8d569bSderaadt 	for (i = 1; i <= l; i++)
900ae8d569bSderaadt 		b[i] = a[i];
9014a034c3aSray 	xfree(a);
902ae8d569bSderaadt }
903ae8d569bSderaadt 
90426da422aStedu static int
9054ec4b3d5Smillert skipline(FILE *f)
906ae8d569bSderaadt {
90726da422aStedu 	int i, c;
908ae8d569bSderaadt 
909bb34d48bSmillert 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
910bb34d48bSmillert 		continue;
911ae8d569bSderaadt 	return (i);
912ae8d569bSderaadt }
913ae8d569bSderaadt 
91426da422aStedu static void
915ac73e8e6Smillert output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
916ae8d569bSderaadt {
91748b8c3e3Sderaadt 	int m, i0, i1, j0, j1;
91848b8c3e3Sderaadt 
9194ec4b3d5Smillert 	rewind(f1);
9204ec4b3d5Smillert 	rewind(f2);
921ae8d569bSderaadt 	m = len[0];
922ae8d569bSderaadt 	J[0] = 0;
923ae8d569bSderaadt 	J[m + 1] = len[1] + 1;
92457003866Sray 	if (diff_format != D_EDIT) {
92526da422aStedu 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
92626da422aStedu 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
92726da422aStedu 				i0++;
928ae8d569bSderaadt 			j0 = J[i0 - 1] + 1;
929ae8d569bSderaadt 			i1 = i0 - 1;
93026da422aStedu 			while (i1 < m && J[i1 + 1] == 0)
93126da422aStedu 				i1++;
932ae8d569bSderaadt 			j1 = J[i1 + 1] - 1;
933ae8d569bSderaadt 			J[i1] = j1;
93461783bcdSespie 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
93526da422aStedu 		}
93626da422aStedu 	} else {
93726da422aStedu 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
93826da422aStedu 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
93926da422aStedu 				i0--;
940ae8d569bSderaadt 			j0 = J[i0 + 1] - 1;
941ae8d569bSderaadt 			i1 = i0 + 1;
94226da422aStedu 			while (i1 > 1 && J[i1 - 1] == 0)
94326da422aStedu 				i1--;
944ae8d569bSderaadt 			j1 = J[i1 - 1] + 1;
945ae8d569bSderaadt 			J[i1] = j1;
94661783bcdSespie 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
947ae8d569bSderaadt 		}
94826da422aStedu 	}
949ae8d569bSderaadt 	if (m == 0)
95061783bcdSespie 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
95157003866Sray 	if (diff_format == D_IFDEF) {
952ae8d569bSderaadt 		for (;;) {
953ae8d569bSderaadt #define	c i0
954bb34d48bSmillert 			if ((c = getc(f1)) == EOF)
955ae8d569bSderaadt 				return;
9565e07282dSray 			diff_output("%c", c);
957ae8d569bSderaadt 		}
958ae8d569bSderaadt #undef c
959ae8d569bSderaadt 	}
9609de32c1bSmillert 	if (anychange != 0) {
96157003866Sray 		if (diff_format == D_CONTEXT)
962dba1d6eaSray 			dump_context_vec(f1, f2, flags);
96357003866Sray 		else if (diff_format == D_UNIFIED)
964dba1d6eaSray 			dump_unified_vec(f1, f2, flags);
9659de32c1bSmillert 	}
966ae8d569bSderaadt }
967ae8d569bSderaadt 
9684a034c3aSray static void
969c72ea322Smillert range(int a, int b, char *separator)
970c72ea322Smillert {
9715e07282dSray 	diff_output("%d", a > b ? b : a);
972c72ea322Smillert 	if (a < b)
9735e07282dSray 		diff_output("%s%d", separator, b);
974c72ea322Smillert }
975c72ea322Smillert 
9764a034c3aSray static void
977c72ea322Smillert uni_range(int a, int b)
978c72ea322Smillert {
979c72ea322Smillert 	if (a < b)
9805e07282dSray 		diff_output("%d,%d", a, b - a + 1);
981c72ea322Smillert 	else if (a == b)
9825e07282dSray 		diff_output("%d", b);
983c72ea322Smillert 	else
9845e07282dSray 		diff_output("%d,0", b);
985c72ea322Smillert }
986c72ea322Smillert 
987ccd55a2cSotto static char *
988dba1d6eaSray preadline(int fd, size_t rlen, off_t off)
989ccd55a2cSotto {
990ccd55a2cSotto 	char *line;
991ccd55a2cSotto 	ssize_t nr;
992ccd55a2cSotto 
993dba1d6eaSray 	line = xmalloc(rlen + 1);
994dba1d6eaSray 	if ((nr = pread(fd, line, rlen, off)) < 0)
99560b54a0eSray 		err(2, "preadline");
9968d981b00Sotto 	if (nr > 0 && line[nr-1] == '\n')
9978d981b00Sotto 		nr--;
998ccd55a2cSotto 	line[nr] = '\0';
999ccd55a2cSotto 	return (line);
1000ccd55a2cSotto }
1001ccd55a2cSotto 
1002ccd55a2cSotto static int
1003ccd55a2cSotto ignoreline(char *line)
1004ccd55a2cSotto {
1005ccd55a2cSotto 	int ret;
1006ccd55a2cSotto 
1007ccd55a2cSotto 	ret = regexec(&ignore_re, line, 0, NULL, 0);
10084a034c3aSray 	xfree(line);
1009ccd55a2cSotto 	return (ret == 0);	/* if it matched, it should be ignored. */
1010ccd55a2cSotto }
1011ccd55a2cSotto 
1012ae8d569bSderaadt /*
10134ec4b3d5Smillert  * Indicate that there is a difference between lines a and b of the from file
101426da422aStedu  * to get to lines c to d of the to file.  If a is greater then b then there
101526da422aStedu  * are no lines in the from file involved and this means that there were
101626da422aStedu  * lines appended (beginning at b).  If c is greater than d then there are
101726da422aStedu  * lines missing from the to file.
1018ae8d569bSderaadt  */
101926da422aStedu static void
1020ac73e8e6Smillert change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
102161783bcdSespie     int *pflags)
1022ae8d569bSderaadt {
10230efcb26eSmillert 	static size_t max_context = 64;
1024f8a6bf23Smillert 	int i;
10250efcb26eSmillert 
1026f8a6bf23Smillert restart:
102757003866Sray 	if (diff_format != D_IFDEF && a > b && c > d)
1028ae8d569bSderaadt 		return;
1029ccd55a2cSotto 	if (ignore_pats != NULL) {
1030ccd55a2cSotto 		char *line;
1031ccd55a2cSotto 		/*
1032ccd55a2cSotto 		 * All lines in the change, insert, or delete must
1033ccd55a2cSotto 		 * match an ignore pattern for the change to be
1034ccd55a2cSotto 		 * ignored.
1035ccd55a2cSotto 		 */
1036ccd55a2cSotto 		if (a <= b) {		/* Changes and deletes. */
1037ccd55a2cSotto 			for (i = a; i <= b; i++) {
1038ccd55a2cSotto 				line = preadline(fileno(f1),
1039ccd55a2cSotto 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1040ccd55a2cSotto 				if (!ignoreline(line))
1041ccd55a2cSotto 					goto proceed;
1042ccd55a2cSotto 			}
1043ccd55a2cSotto 		}
1044ccd55a2cSotto 		if (a > b || c <= d) {	/* Changes and inserts. */
1045ccd55a2cSotto 			for (i = c; i <= d; i++) {
1046ccd55a2cSotto 				line = preadline(fileno(f2),
1047ccd55a2cSotto 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1048ccd55a2cSotto 				if (!ignoreline(line))
1049ccd55a2cSotto 					goto proceed;
1050ccd55a2cSotto 			}
1051ccd55a2cSotto 		}
1052ccd55a2cSotto 		return;
1053ccd55a2cSotto 	}
1054ccd55a2cSotto proceed:
105561783bcdSespie 	if (*pflags & D_HEADER) {
10565e07282dSray 		diff_output("%s %s %s\n", diffargs, file1, file2);
105761783bcdSespie 		*pflags &= ~D_HEADER;
105861783bcdSespie 	}
105957003866Sray 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
10600efcb26eSmillert 		/*
10610efcb26eSmillert 		 * Allocate change records as needed.
10620efcb26eSmillert 		 */
10630efcb26eSmillert 		if (context_vec_ptr == context_vec_end - 1) {
10640efcb26eSmillert 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
10650efcb26eSmillert 			max_context <<= 1;
10664a034c3aSray 			context_vec_start = xrealloc(context_vec_start,
1067aa215433Sray 			    max_context, sizeof(*context_vec_start));
10680efcb26eSmillert 			context_vec_end = context_vec_start + max_context;
10690efcb26eSmillert 			context_vec_ptr = context_vec_start + offset;
10700efcb26eSmillert 		}
10710efcb26eSmillert 		if (anychange == 0) {
10720efcb26eSmillert 			/*
10730efcb26eSmillert 			 * Print the context/unidiff header first time through.
10740efcb26eSmillert 			 */
10757bdb251cSmillert 			print_header(file1, file2);
10760efcb26eSmillert 			anychange = 1;
107757003866Sray 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
107857003866Sray 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1079ae8d569bSderaadt 			/*
108057003866Sray 			 * If this change is more than 'diff_context' lines from the
10810efcb26eSmillert 			 * previous change, dump the record and reset it.
1082ae8d569bSderaadt 			 */
108357003866Sray 			if (diff_format == D_CONTEXT)
1084dba1d6eaSray 				dump_context_vec(f1, f2, *pflags);
10859de32c1bSmillert 			else
1086dba1d6eaSray 				dump_unified_vec(f1, f2, *pflags);
10879de32c1bSmillert 		}
1088ae8d569bSderaadt 		context_vec_ptr++;
1089ae8d569bSderaadt 		context_vec_ptr->a = a;
1090ae8d569bSderaadt 		context_vec_ptr->b = b;
1091ae8d569bSderaadt 		context_vec_ptr->c = c;
1092ae8d569bSderaadt 		context_vec_ptr->d = d;
1093ae8d569bSderaadt 		return;
1094ae8d569bSderaadt 	}
10950efcb26eSmillert 	if (anychange == 0)
10960efcb26eSmillert 		anychange = 1;
109757003866Sray 	switch (diff_format) {
10985f9fc8aaSmillert 	case D_BRIEF:
10995f9fc8aaSmillert 		return;
1100ae8d569bSderaadt 	case D_NORMAL:
1101ae8d569bSderaadt 	case D_EDIT:
1102ae8d569bSderaadt 		range(a, b, ",");
11035e07282dSray 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
110457003866Sray 		if (diff_format == D_NORMAL)
1105ae8d569bSderaadt 			range(c, d, ",");
11065e07282dSray 		diff_output("\n");
1107ae8d569bSderaadt 		break;
1108ae8d569bSderaadt 	case D_REVERSE:
11095e07282dSray 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1110ae8d569bSderaadt 		range(a, b, " ");
11115e07282dSray 		diff_output("\n");
1112ae8d569bSderaadt 		break;
1113ae8d569bSderaadt 	case D_NREVERSE:
1114ae8d569bSderaadt 		if (a > b)
11155e07282dSray 			diff_output("a%d %d\n", b, d - c + 1);
1116ae8d569bSderaadt 		else {
11175e07282dSray 			diff_output("d%d %d\n", a, b - a + 1);
1118ae8d569bSderaadt 			if (!(c > d))
1119ae8d569bSderaadt 				/* add changed lines */
11205e07282dSray 				diff_output("a%d %d\n", b, d - c + 1);
1121ae8d569bSderaadt 		}
1122ae8d569bSderaadt 		break;
1123ae8d569bSderaadt 	}
112457003866Sray 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1125dba1d6eaSray 		fetch(ixold, a, b, f1, '<', 1, *pflags);
112657003866Sray 		if (a <= b && c <= d && diff_format == D_NORMAL)
11275e07282dSray 			diff_output("---\n");
1128ae8d569bSderaadt 	}
112957003866Sray 	i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
113057003866Sray 	if (i != 0 && diff_format == D_EDIT) {
1131f8a6bf23Smillert 		/*
1132f8a6bf23Smillert 		 * A non-zero return value for D_EDIT indicates that the
1133f8a6bf23Smillert 		 * last line printed was a bare dot (".") that has been
1134f8a6bf23Smillert 		 * escaped as ".." to prevent ed(1) from misinterpreting
1135f8a6bf23Smillert 		 * it.  We have to add a substitute command to change this
1136f8a6bf23Smillert 		 * back and restart where we left off.
1137f8a6bf23Smillert 		 */
11385e07282dSray 		diff_output(".\n");
11395e07282dSray 		diff_output("%ds/^\\.\\././\n", a);
1140f8a6bf23Smillert 		a += i;
1141f8a6bf23Smillert 		c += i;
1142f8a6bf23Smillert 		goto restart;
1143f8a6bf23Smillert 	}
114457003866Sray 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
11455e07282dSray 		diff_output(".\n");
1146ae8d569bSderaadt 	if (inifdef) {
11475e07282dSray 		diff_output("#endif /* %s */\n", ifdefname);
1148ae8d569bSderaadt 		inifdef = 0;
1149ae8d569bSderaadt 	}
1150ae8d569bSderaadt }
1151ae8d569bSderaadt 
1152f8a6bf23Smillert static int
1153dba1d6eaSray fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1154ae8d569bSderaadt {
1155f8a6bf23Smillert 	int i, j, c, lastc, col, nc;
1156ae8d569bSderaadt 
1157ae8d569bSderaadt 	/*
1158ae8d569bSderaadt 	 * When doing #ifdef's, copy down to current line
1159ae8d569bSderaadt 	 * if this is the first file, so that stuff makes it to output.
1160ae8d569bSderaadt 	 */
116157003866Sray 	if (diff_format == D_IFDEF && oldfile) {
1162ae8d569bSderaadt 		long curpos = ftell(lb);
1163ae8d569bSderaadt 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1164ae8d569bSderaadt 		nc = f[a > b ? b : a - 1] - curpos;
1165ae8d569bSderaadt 		for (i = 0; i < nc; i++)
11665e07282dSray 			diff_output("%c", getc(lb));
1167ae8d569bSderaadt 	}
1168ae8d569bSderaadt 	if (a > b)
1169f8a6bf23Smillert 		return (0);
117057003866Sray 	if (diff_format == D_IFDEF) {
117190f56ad8Smillert 		if (inifdef) {
11725e07282dSray 			diff_output("#else /* %s%s */\n",
117390f56ad8Smillert 			    oldfile == 1 ? "!" : "", ifdefname);
117426da422aStedu 		} else {
117590f56ad8Smillert 			if (oldfile)
11765e07282dSray 				diff_output("#ifndef %s\n", ifdefname);
117790f56ad8Smillert 			else
11785e07282dSray 				diff_output("#ifdef %s\n", ifdefname);
1179ae8d569bSderaadt 		}
1180ae8d569bSderaadt 		inifdef = 1 + oldfile;
1181ae8d569bSderaadt 	}
1182ae8d569bSderaadt 	for (i = a; i <= b; i++) {
118391cf91eeSderaadt 		fseek(lb, f[i - 1], SEEK_SET);
1184ae8d569bSderaadt 		nc = f[i] - f[i - 1];
118557003866Sray 		if (diff_format != D_IFDEF && ch != '\0') {
11865e07282dSray 			diff_output("%c", ch);
118757003866Sray 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
118857003866Sray 			    || diff_format == D_UNIFIED))
11895e07282dSray 				diff_output("\t");
119057003866Sray 			else if (diff_format != D_UNIFIED)
11915e07282dSray 				diff_output(" ");
11921f9aa9e0Smillert 		}
1193ae8d569bSderaadt 		col = 0;
1194f8a6bf23Smillert 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1195bb34d48bSmillert 			if ((c = getc(lb)) == EOF) {
119657003866Sray 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
119757003866Sray 				    diff_format == D_NREVERSE)
1198643dc60cSmillert 					warnx("No newline at end of file");
1199643dc60cSmillert 				else
12005e07282dSray 					diff_output("\n\\ No newline at end of "
12015e07282dSray 					    "file\n");
1202643dc60cSmillert 				return (0);
1203bb34d48bSmillert 			}
1204dba1d6eaSray 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1205bb34d48bSmillert 				do {
12065e07282dSray 					diff_output(" ");
1207bb34d48bSmillert 				} while (++col & 7);
1208bb34d48bSmillert 			} else {
120957003866Sray 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1210f8a6bf23Smillert 				    && lastc == '.') {
1211f8a6bf23Smillert 					/*
1212f8a6bf23Smillert 					 * Don't print a bare "." line
1213f8a6bf23Smillert 					 * since that will confuse ed(1).
1214f8a6bf23Smillert 					 * Print ".." instead and return,
1215f8a6bf23Smillert 					 * giving the caller an offset
1216f8a6bf23Smillert 					 * from which to restart.
1217f8a6bf23Smillert 					 */
12185e07282dSray 					diff_output(".\n");
1219f8a6bf23Smillert 					return (i - a + 1);
1220f8a6bf23Smillert 				}
12215e07282dSray 				diff_output("%c", c);
1222ae8d569bSderaadt 				col++;
1223ae8d569bSderaadt 			}
1224ae8d569bSderaadt 		}
1225ae8d569bSderaadt 	}
1226f8a6bf23Smillert 	return (0);
1227ae8d569bSderaadt }
1228ae8d569bSderaadt 
1229ae8d569bSderaadt /*
1230a8013e93Sotto  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1231ae8d569bSderaadt  */
123226da422aStedu static int
1233dba1d6eaSray readhash(FILE *f, int flags)
1234ae8d569bSderaadt {
1235a8013e93Sotto 	int i, t, space;
1236a8013e93Sotto 	int sum;
1237ae8d569bSderaadt 
1238ae8d569bSderaadt 	sum = 1;
1239ae8d569bSderaadt 	space = 0;
1240dba1d6eaSray 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1241dba1d6eaSray 		if (flags & D_IGNORECASE)
1242a8013e93Sotto 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1243bb34d48bSmillert 				if (t == EOF) {
1244a8013e93Sotto 					if (i == 0)
1245ae8d569bSderaadt 						return (0);
1246bb34d48bSmillert 					break;
1247bb34d48bSmillert 				}
1248a8013e93Sotto 				sum = sum * 127 + chrtran[t];
1249ae8d569bSderaadt 			}
1250ae8d569bSderaadt 		else
1251a8013e93Sotto 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1252bb34d48bSmillert 				if (t == EOF) {
1253a8013e93Sotto 					if (i == 0)
1254ae8d569bSderaadt 						return (0);
1255bb34d48bSmillert 					break;
1256bb34d48bSmillert 				}
1257a8013e93Sotto 				sum = sum * 127 + t;
1258ae8d569bSderaadt 			}
1259ae8d569bSderaadt 	} else {
1260a8013e93Sotto 		for (i = 0;;) {
1261ae8d569bSderaadt 			switch (t = getc(f)) {
1262ae8d569bSderaadt 			case '\t':
1263b5b605d5Sotto 			case '\r':
1264b5b605d5Sotto 			case '\v':
1265b5b605d5Sotto 			case '\f':
1266ae8d569bSderaadt 			case ' ':
1267ae8d569bSderaadt 				space++;
1268ae8d569bSderaadt 				continue;
1269ae8d569bSderaadt 			default:
1270dba1d6eaSray 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1271a8013e93Sotto 					i++;
1272ae8d569bSderaadt 					space = 0;
1273ae8d569bSderaadt 				}
1274a8013e93Sotto 				sum = sum * 127 + chrtran[t];
1275a8013e93Sotto 				i++;
1276ae8d569bSderaadt 				continue;
1277bb34d48bSmillert 			case EOF:
1278a8013e93Sotto 				if (i == 0)
1279bb34d48bSmillert 					return (0);
1280bb34d48bSmillert 				/* FALLTHROUGH */
1281ae8d569bSderaadt 			case '\n':
1282ae8d569bSderaadt 				break;
1283ae8d569bSderaadt 			}
1284ae8d569bSderaadt 			break;
1285ae8d569bSderaadt 		}
1286ae8d569bSderaadt 	}
1287a8013e93Sotto 	/*
1288a8013e93Sotto 	 * There is a remote possibility that we end up with a zero sum.
1289a8013e93Sotto 	 * Zero is used as an EOF marker, so return 1 instead.
1290a8013e93Sotto 	 */
1291a8013e93Sotto 	return (sum == 0 ? 1 : sum);
1292ae8d569bSderaadt }
1293ae8d569bSderaadt 
1294774cb253Savsm static int
129526da422aStedu asciifile(FILE *f)
1296ae8d569bSderaadt {
1297063293f0Sotto 	unsigned char buf[BUFSIZ];
12980631431fSstsp 	size_t cnt;
1299ae8d569bSderaadt 
1300dba1d6eaSray 	if (f == NULL)
13012a1593dfStedu 		return (1);
13022a1593dfStedu 
13034ec4b3d5Smillert 	rewind(f);
13044ec4b3d5Smillert 	cnt = fread(buf, 1, sizeof(buf), f);
13050631431fSstsp 	return (memchr(buf, '\0', cnt) == NULL);
1306ae8d569bSderaadt }
1307ae8d569bSderaadt 
13085c68ba7eSespie #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
13095c68ba7eSespie 
131096e45528Sotto static char *
1311dba1d6eaSray match_function(const long *f, int pos, FILE *fp)
131296e45528Sotto {
1313063293f0Sotto 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
131496e45528Sotto 	size_t nc;
131596e45528Sotto 	int last = lastline;
13165c68ba7eSespie 	char *state = NULL;
131796e45528Sotto 
131896e45528Sotto 	lastline = pos;
131996e45528Sotto 	while (pos > last) {
1320dba1d6eaSray 		fseek(fp, f[pos - 1], SEEK_SET);
132196e45528Sotto 		nc = f[pos] - f[pos - 1];
132296e45528Sotto 		if (nc >= sizeof(buf))
132396e45528Sotto 			nc = sizeof(buf) - 1;
1324dba1d6eaSray 		nc = fread(buf, 1, nc, fp);
132596e45528Sotto 		if (nc > 0) {
132696e45528Sotto 			buf[nc] = '\0';
13274fd6ed32Sgilles 			buf[strcspn(buf, "\n")] = '\0';
132896e45528Sotto 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
13295c68ba7eSespie 				if (begins_with(buf, "private:")) {
13305c68ba7eSespie 					if (!state)
13315c68ba7eSespie 						state = " (private)";
13325c68ba7eSespie 				} else if (begins_with(buf, "protected:")) {
13335c68ba7eSespie 					if (!state)
13345c68ba7eSespie 						state = " (protected)";
13355c68ba7eSespie 				} else if (begins_with(buf, "public:")) {
13365c68ba7eSespie 					if (!state)
13375c68ba7eSespie 						state = " (public)";
13385c68ba7eSespie 				} else {
133996e45528Sotto 					strlcpy(lastbuf, buf, sizeof lastbuf);
13405c68ba7eSespie 					if (state)
13415c68ba7eSespie 						strlcat(lastbuf, state,
13425c68ba7eSespie 						    sizeof lastbuf);
134396e45528Sotto 					lastmatchline = pos;
134496e45528Sotto 					return lastbuf;
134596e45528Sotto 				}
134696e45528Sotto 			}
13475c68ba7eSespie 		}
134896e45528Sotto 		pos--;
134996e45528Sotto 	}
135096e45528Sotto 	return lastmatchline > 0 ? lastbuf : NULL;
135196e45528Sotto }
135296e45528Sotto 
1353ae8d569bSderaadt /* dump accumulated "context" diff changes */
135426da422aStedu static void
1355dba1d6eaSray dump_context_vec(FILE *f1, FILE *f2, int flags)
1356ae8d569bSderaadt {
135748b8c3e3Sderaadt 	struct context_vec *cvp = context_vec_start;
135848b8c3e3Sderaadt 	int lowa, upb, lowc, upd, do_output;
135926da422aStedu 	int a, b, c, d;
136096e45528Sotto 	char ch, *f;
1361ae8d569bSderaadt 
136290f56ad8Smillert 	if (context_vec_start > context_vec_ptr)
1363ae8d569bSderaadt 		return;
1364ae8d569bSderaadt 
136526da422aStedu 	b = d = 0;		/* gcc */
1366*b9fc9a72Sderaadt 	lowa = MAXIMUM(1, cvp->a - diff_context);
1367*b9fc9a72Sderaadt 	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1368*b9fc9a72Sderaadt 	lowc = MAXIMUM(1, cvp->c - diff_context);
1369*b9fc9a72Sderaadt 	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1370ae8d569bSderaadt 
13715e07282dSray 	diff_output("***************");
1372dba1d6eaSray 	if ((flags & D_PROTOTYPE)) {
137396e45528Sotto 		f = match_function(ixold, lowa-1, f1);
13745e07282dSray 		if (f != NULL)
13755e07282dSray 			diff_output(" %s", f);
137696e45528Sotto 	}
13775e07282dSray 	diff_output("\n*** ");
1378ae8d569bSderaadt 	range(lowa, upb, ",");
13795e07282dSray 	diff_output(" ****\n");
1380ae8d569bSderaadt 
1381ae8d569bSderaadt 	/*
13824ec4b3d5Smillert 	 * Output changes to the "old" file.  The first loop suppresses
1383ae8d569bSderaadt 	 * output if there were no changes to the "old" file (we'll see
1384ae8d569bSderaadt 	 * the "old" lines as context in the "new" list).
1385ae8d569bSderaadt 	 */
1386ae8d569bSderaadt 	do_output = 0;
1387ae8d569bSderaadt 	for (; cvp <= context_vec_ptr; cvp++)
1388ae8d569bSderaadt 		if (cvp->a <= cvp->b) {
1389ae8d569bSderaadt 			cvp = context_vec_start;
1390ae8d569bSderaadt 			do_output++;
1391ae8d569bSderaadt 			break;
1392ae8d569bSderaadt 		}
1393ae8d569bSderaadt 	if (do_output) {
1394ae8d569bSderaadt 		while (cvp <= context_vec_ptr) {
139526da422aStedu 			a = cvp->a;
139626da422aStedu 			b = cvp->b;
139726da422aStedu 			c = cvp->c;
139826da422aStedu 			d = cvp->d;
1399ae8d569bSderaadt 
1400ae8d569bSderaadt 			if (a <= b && c <= d)
1401ae8d569bSderaadt 				ch = 'c';
1402ae8d569bSderaadt 			else
1403ae8d569bSderaadt 				ch = (a <= b) ? 'd' : 'a';
1404ae8d569bSderaadt 
1405ae8d569bSderaadt 			if (ch == 'a')
1406dba1d6eaSray 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1407ae8d569bSderaadt 			else {
1408dba1d6eaSray 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
14094ec4b3d5Smillert 				fetch(ixold, a, b, f1,
1410dba1d6eaSray 				    ch == 'c' ? '!' : '-', 0, flags);
1411ae8d569bSderaadt 			}
1412ae8d569bSderaadt 			lowa = b + 1;
1413ae8d569bSderaadt 			cvp++;
1414ae8d569bSderaadt 		}
1415dba1d6eaSray 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1416ae8d569bSderaadt 	}
1417ae8d569bSderaadt 	/* output changes to the "new" file */
14185e07282dSray 	diff_output("--- ");
1419ae8d569bSderaadt 	range(lowc, upd, ",");
14205e07282dSray 	diff_output(" ----\n");
1421ae8d569bSderaadt 
1422ae8d569bSderaadt 	do_output = 0;
1423ae8d569bSderaadt 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1424ae8d569bSderaadt 		if (cvp->c <= cvp->d) {
1425ae8d569bSderaadt 			cvp = context_vec_start;
1426ae8d569bSderaadt 			do_output++;
1427ae8d569bSderaadt 			break;
1428ae8d569bSderaadt 		}
1429ae8d569bSderaadt 	if (do_output) {
1430ae8d569bSderaadt 		while (cvp <= context_vec_ptr) {
143126da422aStedu 			a = cvp->a;
143226da422aStedu 			b = cvp->b;
143326da422aStedu 			c = cvp->c;
143426da422aStedu 			d = cvp->d;
1435ae8d569bSderaadt 
1436ae8d569bSderaadt 			if (a <= b && c <= d)
1437ae8d569bSderaadt 				ch = 'c';
1438ae8d569bSderaadt 			else
1439ae8d569bSderaadt 				ch = (a <= b) ? 'd' : 'a';
1440ae8d569bSderaadt 
1441ae8d569bSderaadt 			if (ch == 'd')
1442dba1d6eaSray 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1443ae8d569bSderaadt 			else {
1444dba1d6eaSray 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
14454ec4b3d5Smillert 				fetch(ixnew, c, d, f2,
1446dba1d6eaSray 				    ch == 'c' ? '!' : '+', 0, flags);
1447ae8d569bSderaadt 			}
1448ae8d569bSderaadt 			lowc = d + 1;
1449ae8d569bSderaadt 			cvp++;
1450ae8d569bSderaadt 		}
1451dba1d6eaSray 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1452ae8d569bSderaadt 	}
1453ae8d569bSderaadt 	context_vec_ptr = context_vec_start - 1;
1454ae8d569bSderaadt }
14559de32c1bSmillert 
14569de32c1bSmillert /* dump accumulated "unified" diff changes */
14579de32c1bSmillert static void
1458dba1d6eaSray dump_unified_vec(FILE *f1, FILE *f2, int flags)
14599de32c1bSmillert {
14609de32c1bSmillert 	struct context_vec *cvp = context_vec_start;
14619de32c1bSmillert 	int lowa, upb, lowc, upd;
14629de32c1bSmillert 	int a, b, c, d;
146396e45528Sotto 	char ch, *f;
14649de32c1bSmillert 
146590f56ad8Smillert 	if (context_vec_start > context_vec_ptr)
14669de32c1bSmillert 		return;
14679de32c1bSmillert 
14689de32c1bSmillert 	b = d = 0;		/* gcc */
1469*b9fc9a72Sderaadt 	lowa = MAXIMUM(1, cvp->a - diff_context);
1470*b9fc9a72Sderaadt 	upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1471*b9fc9a72Sderaadt 	lowc = MAXIMUM(1, cvp->c - diff_context);
1472*b9fc9a72Sderaadt 	upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
14739de32c1bSmillert 
14745e07282dSray 	diff_output("@@ -");
1475c72ea322Smillert 	uni_range(lowa, upb);
14765e07282dSray 	diff_output(" +");
1477c72ea322Smillert 	uni_range(lowc, upd);
14785e07282dSray 	diff_output(" @@");
1479dba1d6eaSray 	if ((flags & D_PROTOTYPE)) {
148096e45528Sotto 		f = match_function(ixold, lowa-1, f1);
14815e07282dSray 		if (f != NULL)
14825e07282dSray 			diff_output(" %s", f);
148396e45528Sotto 	}
14845e07282dSray 	diff_output("\n");
14859de32c1bSmillert 
14869de32c1bSmillert 	/*
14879de32c1bSmillert 	 * Output changes in "unified" diff format--the old and new lines
14889de32c1bSmillert 	 * are printed together.
14899de32c1bSmillert 	 */
14909de32c1bSmillert 	for (; cvp <= context_vec_ptr; cvp++) {
14919de32c1bSmillert 		a = cvp->a;
14929de32c1bSmillert 		b = cvp->b;
14939de32c1bSmillert 		c = cvp->c;
14949de32c1bSmillert 		d = cvp->d;
14959de32c1bSmillert 
14969de32c1bSmillert 		/*
14979de32c1bSmillert 		 * c: both new and old changes
14989de32c1bSmillert 		 * d: only changes in the old file
14999de32c1bSmillert 		 * a: only changes in the new file
15009de32c1bSmillert 		 */
15019de32c1bSmillert 		if (a <= b && c <= d)
15029de32c1bSmillert 			ch = 'c';
15039de32c1bSmillert 		else
15049de32c1bSmillert 			ch = (a <= b) ? 'd' : 'a';
15059de32c1bSmillert 
15069de32c1bSmillert 		switch (ch) {
15079de32c1bSmillert 		case 'c':
1508dba1d6eaSray 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1509dba1d6eaSray 			fetch(ixold, a, b, f1, '-', 0, flags);
1510dba1d6eaSray 			fetch(ixnew, c, d, f2, '+', 0, flags);
15119de32c1bSmillert 			break;
15129de32c1bSmillert 		case 'd':
1513dba1d6eaSray 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1514dba1d6eaSray 			fetch(ixold, a, b, f1, '-', 0, flags);
15159de32c1bSmillert 			break;
15169de32c1bSmillert 		case 'a':
1517dba1d6eaSray 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1518dba1d6eaSray 			fetch(ixnew, c, d, f2, '+', 0, flags);
15199de32c1bSmillert 			break;
15209de32c1bSmillert 		}
15219de32c1bSmillert 		lowa = b + 1;
15229de32c1bSmillert 		lowc = d + 1;
15239de32c1bSmillert 	}
1524dba1d6eaSray 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
15259de32c1bSmillert 
15269de32c1bSmillert 	context_vec_ptr = context_vec_start - 1;
15279de32c1bSmillert }
15287bdb251cSmillert 
15297bdb251cSmillert static void
15307bdb251cSmillert print_header(const char *file1, const char *file2)
15317bdb251cSmillert {
15327bdb251cSmillert 	if (label[0] != NULL)
15335e07282dSray 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
15347bdb251cSmillert 		    label[0]);
15357bdb251cSmillert 	else
15365e07282dSray 		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
15377bdb251cSmillert 		    file1, ctime(&stb1.st_mtime));
15387bdb251cSmillert 	if (label[1] != NULL)
15395e07282dSray 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
15407bdb251cSmillert 		    label[1]);
15417bdb251cSmillert 	else
15425e07282dSray 		diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
15437bdb251cSmillert 		    file2, ctime(&stb2.st_mtime));
15447bdb251cSmillert }
1545