xref: /openbsd-src/usr.bin/diff/diffreg.c (revision f28259e9bfdc4a8ed2bc2df0bccf8f9dab5d6a41)
1*f28259e9Smillert /*	$OpenBSD: diffreg.c,v 1.46 2003/07/31 02:53:57 millert Exp $	*/
2d0c3f575Sderaadt 
3d0c3f575Sderaadt /*
4d0c3f575Sderaadt  * Copyright (C) Caldera International Inc.  2001-2002.
5d0c3f575Sderaadt  * All rights reserved.
6d0c3f575Sderaadt  *
7d0c3f575Sderaadt  * Redistribution and use in source and binary forms, with or without
8d0c3f575Sderaadt  * modification, are permitted provided that the following conditions
9d0c3f575Sderaadt  * are met:
10d0c3f575Sderaadt  * 1. Redistributions of source code and documentation must retain the above
11d0c3f575Sderaadt  *    copyright notice, this list of conditions and the following disclaimer.
12d0c3f575Sderaadt  * 2. Redistributions in binary form must reproduce the above copyright
13d0c3f575Sderaadt  *    notice, this list of conditions and the following disclaimer in the
14d0c3f575Sderaadt  *    documentation and/or other materials provided with the distribution.
15d0c3f575Sderaadt  * 3. All advertising materials mentioning features or use of this software
16d0c3f575Sderaadt  *    must display the following acknowledgement:
17d0c3f575Sderaadt  *	This product includes software developed or owned by Caldera
18d0c3f575Sderaadt  *	International, Inc.
19d0c3f575Sderaadt  * 4. Neither the name of Caldera International, Inc. nor the names of other
20d0c3f575Sderaadt  *    contributors may be used to endorse or promote products derived from
21d0c3f575Sderaadt  *    this software without specific prior written permission.
22d0c3f575Sderaadt  *
23d0c3f575Sderaadt  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24d0c3f575Sderaadt  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25d0c3f575Sderaadt  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26d0c3f575Sderaadt  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27d0c3f575Sderaadt  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28d0c3f575Sderaadt  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29d0c3f575Sderaadt  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30d0c3f575Sderaadt  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31d0c3f575Sderaadt  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32d0c3f575Sderaadt  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33d0c3f575Sderaadt  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34d0c3f575Sderaadt  * POSSIBILITY OF SUCH DAMAGE.
35d0c3f575Sderaadt  */
364ec4b3d5Smillert /*-
374ec4b3d5Smillert  * Copyright (c) 1991, 1993
384ec4b3d5Smillert  *	The Regents of the University of California.  All rights reserved.
394ec4b3d5Smillert  *
404ec4b3d5Smillert  * Redistribution and use in source and binary forms, with or without
414ec4b3d5Smillert  * modification, are permitted provided that the following conditions
424ec4b3d5Smillert  * are met:
434ec4b3d5Smillert  * 1. Redistributions of source code must retain the above copyright
444ec4b3d5Smillert  *    notice, this list of conditions and the following disclaimer.
454ec4b3d5Smillert  * 2. Redistributions in binary form must reproduce the above copyright
464ec4b3d5Smillert  *    notice, this list of conditions and the following disclaimer in the
474ec4b3d5Smillert  *    documentation and/or other materials provided with the distribution.
484ec4b3d5Smillert  * 3. Neither the name of the University nor the names of its contributors
494ec4b3d5Smillert  *    may be used to endorse or promote products derived from this software
504ec4b3d5Smillert  *    without specific prior written permission.
514ec4b3d5Smillert  *
524ec4b3d5Smillert  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
534ec4b3d5Smillert  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
544ec4b3d5Smillert  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
554ec4b3d5Smillert  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
564ec4b3d5Smillert  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
574ec4b3d5Smillert  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
584ec4b3d5Smillert  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
594ec4b3d5Smillert  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
604ec4b3d5Smillert  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
614ec4b3d5Smillert  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
624ec4b3d5Smillert  * SUCH DAMAGE.
634ec4b3d5Smillert  *
644ec4b3d5Smillert  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
654ec4b3d5Smillert  */
664ec4b3d5Smillert 
674ec4b3d5Smillert #ifndef lint
68*f28259e9Smillert static const char rcsid[] = "$OpenBSD: diffreg.c,v 1.46 2003/07/31 02:53:57 millert Exp $";
694ec4b3d5Smillert #endif /* not lint */
70d0c3f575Sderaadt 
717b6ec9e4Smillert #include <sys/param.h>
724ec4b3d5Smillert #include <sys/stat.h>
73b4bca33fSmillert #include <sys/wait.h>
7426da422aStedu 
754ec4b3d5Smillert #include <ctype.h>
764ec4b3d5Smillert #include <err.h>
777b6ec9e4Smillert #include <errno.h>
7826da422aStedu #include <fcntl.h>
790efcb26eSmillert #include <stddef.h>
804ec4b3d5Smillert #include <stdio.h>
814ec4b3d5Smillert #include <stdlib.h>
8226da422aStedu #include <string.h>
834ec4b3d5Smillert #include <unistd.h>
84ae8d569bSderaadt 
85ae8d569bSderaadt #include "diff.h"
86b4bca33fSmillert #include "pathnames.h"
8726da422aStedu 
88ae8d569bSderaadt /*
89ae8d569bSderaadt  * diff - compare two files.
90ae8d569bSderaadt  */
91ae8d569bSderaadt 
92ae8d569bSderaadt /*
93ae8d569bSderaadt  *	Uses an algorithm due to Harold Stone, which finds
94ae8d569bSderaadt  *	a pair of longest identical subsequences in the two
95ae8d569bSderaadt  *	files.
96ae8d569bSderaadt  *
97ae8d569bSderaadt  *	The major goal is to generate the match vector J.
98ae8d569bSderaadt  *	J[i] is the index of the line in file1 corresponding
99ae8d569bSderaadt  *	to line i file0. J[i] = 0 if there is no
100ae8d569bSderaadt  *	such line in file1.
101ae8d569bSderaadt  *
102ae8d569bSderaadt  *	Lines are hashed so as to work in core. All potential
103ae8d569bSderaadt  *	matches are located by sorting the lines of each file
104ae8d569bSderaadt  *	on the hash (called ``value''). In particular, this
105ae8d569bSderaadt  *	collects the equivalence classes in file1 together.
106ae8d569bSderaadt  *	Subroutine equiv replaces the value of each line in
107ae8d569bSderaadt  *	file0 by the index of the first element of its
108ae8d569bSderaadt  *	matching equivalence in (the reordered) file1.
109ae8d569bSderaadt  *	To save space equiv squeezes file1 into a single
110ae8d569bSderaadt  *	array member in which the equivalence classes
111ae8d569bSderaadt  *	are simply concatenated, except that their first
112ae8d569bSderaadt  *	members are flagged by changing sign.
113ae8d569bSderaadt  *
114ae8d569bSderaadt  *	Next the indices that point into member are unsorted into
115ae8d569bSderaadt  *	array class according to the original order of file0.
116ae8d569bSderaadt  *
117ae8d569bSderaadt  *	The cleverness lies in routine stone. This marches
118ae8d569bSderaadt  *	through the lines of file0, developing a vector klist
119ae8d569bSderaadt  *	of "k-candidates". At step i a k-candidate is a matched
120ae8d569bSderaadt  *	pair of lines x,y (x in file0 y in file1) such that
121ae8d569bSderaadt  *	there is a common subsequence of length k
122ae8d569bSderaadt  *	between the first i lines of file0 and the first y
123ae8d569bSderaadt  *	lines of file1, but there is no such subsequence for
124ae8d569bSderaadt  *	any smaller y. x is the earliest possible mate to y
125ae8d569bSderaadt  *	that occurs in such a subsequence.
126ae8d569bSderaadt  *
127ae8d569bSderaadt  *	Whenever any of the members of the equivalence class of
128ae8d569bSderaadt  *	lines in file1 matable to a line in file0 has serial number
129ae8d569bSderaadt  *	less than the y of some k-candidate, that k-candidate
130ae8d569bSderaadt  *	with the smallest such y is replaced. The new
131ae8d569bSderaadt  *	k-candidate is chained (via pred) to the current
132ae8d569bSderaadt  *	k-1 candidate so that the actual subsequence can
133ae8d569bSderaadt  *	be recovered. When a member has serial number greater
134ae8d569bSderaadt  *	that the y of all k-candidates, the klist is extended.
135ae8d569bSderaadt  *	At the end, the longest subsequence is pulled out
136ae8d569bSderaadt  *	and placed in the array J by unravel
137ae8d569bSderaadt  *
138ae8d569bSderaadt  *	With J in hand, the matches there recorded are
139ae8d569bSderaadt  *	check'ed against reality to assure that no spurious
140ae8d569bSderaadt  *	matches have crept in due to hashing. If they have,
141ae8d569bSderaadt  *	they are broken, and "jackpot" is recorded--a harmless
142ae8d569bSderaadt  *	matter except that a true match for a spuriously
143ae8d569bSderaadt  *	mated line may now be unnecessarily reported as a change.
144ae8d569bSderaadt  *
145ae8d569bSderaadt  *	Much of the complexity of the program comes simply
146ae8d569bSderaadt  *	from trying to minimize core utilization and
147ae8d569bSderaadt  *	maximize the range of doable problems by dynamically
148ae8d569bSderaadt  *	allocating what is needed and reusing what is not.
149ae8d569bSderaadt  *	The core requirements for problems larger than somewhat
150ae8d569bSderaadt  *	are (in words) 2*length(file0) + length(file1) +
151ae8d569bSderaadt  *	3*(number of k-candidates installed),  typically about
152ae8d569bSderaadt  *	6n words for files of length n.
153ae8d569bSderaadt  */
154ae8d569bSderaadt 
155ae8d569bSderaadt struct cand {
156ae8d569bSderaadt 	int x;
157ae8d569bSderaadt 	int y;
158ae8d569bSderaadt 	int pred;
159ae8d569bSderaadt } cand;
16026da422aStedu 
161ae8d569bSderaadt struct line {
162ae8d569bSderaadt 	int serial;
163ae8d569bSderaadt 	int value;
16492af4c2dStedu } *file[2];
16526da422aStedu 
1660efcb26eSmillert /*
1670efcb26eSmillert  * The following struct is used to record change information when
1680efcb26eSmillert  * doing a "context" or "unified" diff.  (see routine "change" to
1690efcb26eSmillert  * understand the highly mnemonic field names)
1700efcb26eSmillert  */
1710efcb26eSmillert struct context_vec {
1720efcb26eSmillert 	int a;			/* start line in old file */
1730efcb26eSmillert 	int b;			/* end line in old file */
1740efcb26eSmillert 	int c;			/* start line in new file */
1750efcb26eSmillert 	int d;			/* end line in new file */
1760efcb26eSmillert };
1770efcb26eSmillert 
1784ec4b3d5Smillert static int  *J;			/* will be overlaid on class */
1794ec4b3d5Smillert static int  *class;		/* will be overlaid on file[0] */
1804ec4b3d5Smillert static int  *klist;		/* will be overlaid on file[0] after class */
1814ec4b3d5Smillert static int  *member;		/* will be overlaid on file[1] */
1824ec4b3d5Smillert static int   clen;
1834ec4b3d5Smillert static int   inifdef;		/* whether or not we are in a #ifdef block */
1844ec4b3d5Smillert static int   len[2];
1854ec4b3d5Smillert static int   pref, suff;	/* length of prefix and suffix */
1864ec4b3d5Smillert static int   slen[2];
1874ec4b3d5Smillert static int   anychange;
1884ec4b3d5Smillert static long *ixnew;		/* will be overlaid on file[1] */
1894ec4b3d5Smillert static long *ixold;		/* will be overlaid on klist */
1904ec4b3d5Smillert static struct cand *clist;	/* merely a free storage pot for candidates */
1916e18f850Sotto static int   clistlen;		/* the length of clist */
1924ec4b3d5Smillert static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
1934ec4b3d5Smillert static u_char *chrtran;		/* translation table for case-folding */
1940efcb26eSmillert static struct context_vec *context_vec_start;
1950efcb26eSmillert static struct context_vec *context_vec_end;
1960efcb26eSmillert static struct context_vec *context_vec_ptr;
197ae8d569bSderaadt 
1987b6ec9e4Smillert static FILE *opentemp(const char *);
1994ec4b3d5Smillert static void output(char *, FILE *, char *, FILE *);
2004ec4b3d5Smillert static void check(char *, FILE *, char *, FILE *);
20126da422aStedu static void range(int, int, char *);
202c72ea322Smillert static void uni_range(int, int);
2034ec4b3d5Smillert static void dump_context_vec(FILE *, FILE *);
2044ec4b3d5Smillert static void dump_unified_vec(FILE *, FILE *);
20526da422aStedu static void prepare(int, FILE *);
20626da422aStedu static void prune(void);
20726da422aStedu static void equiv(struct line *, int, struct line *, int, int *);
20826da422aStedu static void unravel(int);
20926da422aStedu static void unsort(struct line *, int, int *);
2104ec4b3d5Smillert static void change(char *, FILE *, char *, FILE *, int, int, int, int);
21126da422aStedu static void sort(struct line *, int);
2124ec4b3d5Smillert static int  asciifile(FILE *);
2131f9aa9e0Smillert static int  fetch(long *, int, int, FILE *, int, int);
21426da422aStedu static int  newcand(int, int, int);
21526da422aStedu static int  search(int *, int, int);
2164ec4b3d5Smillert static int  skipline(FILE *);
2176e18f850Sotto static int  isqrt(int);
21826da422aStedu static int  stone(int *, int, int *, int *);
21926da422aStedu static int  readhash(FILE *);
2204ec4b3d5Smillert static int  files_differ(FILE *, FILE *, int);
2216e18f850Sotto static __inline int min(int, int);
2226e18f850Sotto static __inline int max(int, int);
2236e18f850Sotto 
22426da422aStedu 
22526da422aStedu /*
22626da422aStedu  * chrtran points to one of 2 translation tables: cup2low if folding upper to
22726da422aStedu  * lower case clow2low if not folding case
228ae8d569bSderaadt  */
2298a15d8deSderaadt u_char clow2low[256] = {
23026da422aStedu 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
23126da422aStedu 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
23226da422aStedu 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
23326da422aStedu 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
23426da422aStedu 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
23526da422aStedu 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
23626da422aStedu 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
23726da422aStedu 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
23826da422aStedu 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
23926da422aStedu 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
24026da422aStedu 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
24126da422aStedu 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
24226da422aStedu 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
24326da422aStedu 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
24426da422aStedu 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
24526da422aStedu 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
24626da422aStedu 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
24726da422aStedu 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
24826da422aStedu 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
24926da422aStedu 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
25026da422aStedu 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
25126da422aStedu 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
25226da422aStedu 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
25326da422aStedu 	0xfd, 0xfe, 0xff
254ae8d569bSderaadt };
255ae8d569bSderaadt 
2568a15d8deSderaadt u_char cup2low[256] = {
25726da422aStedu 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
25826da422aStedu 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
25926da422aStedu 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
26026da422aStedu 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
26126da422aStedu 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
26226da422aStedu 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
26326da422aStedu 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
26426da422aStedu 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
26526da422aStedu 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
26626da422aStedu 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
26726da422aStedu 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
26826da422aStedu 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
26926da422aStedu 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
27026da422aStedu 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
27126da422aStedu 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
27226da422aStedu 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
27326da422aStedu 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
27426da422aStedu 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
27526da422aStedu 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
27626da422aStedu 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
27726da422aStedu 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
27826da422aStedu 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
27926da422aStedu 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
28026da422aStedu 	0xfd, 0xfe, 0xff
281ae8d569bSderaadt };
282ae8d569bSderaadt 
283b4bca33fSmillert int
2844ec4b3d5Smillert diffreg(char *ofile1, char *ofile2, int flags)
285ae8d569bSderaadt {
2864ec4b3d5Smillert 	char *file1 = ofile1;
2874ec4b3d5Smillert 	char *file2 = ofile2;
2884ec4b3d5Smillert 	FILE *f1 = NULL;
2894ec4b3d5Smillert 	FILE *f2 = NULL;
290b4bca33fSmillert 	int rval = D_SAME;
291b4bca33fSmillert 	int i, ostdout = -1;
292b4bca33fSmillert 	pid_t pid = -1;
293ae8d569bSderaadt 
2944ec4b3d5Smillert 	anychange = 0;
2950efcb26eSmillert 	context_vec_ptr = context_vec_start - 1;
296ae8d569bSderaadt 	chrtran = (iflag ? cup2low : clow2low);
2977b6ec9e4Smillert 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
298fed3a06dSmillert 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
29966e5764eSmillert 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
300*f28259e9Smillert 		goto closem;
3014ec4b3d5Smillert 
3024ec4b3d5Smillert 	if (flags & D_EMPTY1)
3034ec4b3d5Smillert 		f1 = fopen(_PATH_DEVNULL, "r");
3044ec4b3d5Smillert 	else {
3057b6ec9e4Smillert 		if (!S_ISREG(stb1.st_mode)) {
3067b6ec9e4Smillert 			if ((f1 = opentemp(file1)) == NULL ||
3077b6ec9e4Smillert 			    fstat(fileno(f1), &stb1) < 0) {
3084ec4b3d5Smillert 				warn("%s", file1);
3094ec4b3d5Smillert 				status |= 2;
3104ec4b3d5Smillert 				goto closem;
31148b947b7Smillert 			}
3127b6ec9e4Smillert 		} else if (strcmp(file1, "-") == 0)
313b1a26502Smillert 			f1 = stdin;
314b1a26502Smillert 		else
3154ec4b3d5Smillert 			f1 = fopen(file1, "r");
3164ec4b3d5Smillert 	}
3174ec4b3d5Smillert 	if (f1 == NULL) {
3184ec4b3d5Smillert 		warn("%s", file1);
3194ec4b3d5Smillert 		status |= 2;
3204ec4b3d5Smillert 		goto closem;
3214ec4b3d5Smillert 	}
3224ec4b3d5Smillert 
3234ec4b3d5Smillert 	if (flags & D_EMPTY2)
3244ec4b3d5Smillert 		f2 = fopen(_PATH_DEVNULL, "r");
3254ec4b3d5Smillert 	else {
3267b6ec9e4Smillert 		if (!S_ISREG(stb2.st_mode)) {
3277b6ec9e4Smillert 			if ((f2 = opentemp(file2)) == NULL ||
3287b6ec9e4Smillert 			    fstat(fileno(f2), &stb2) < 0) {
3294ec4b3d5Smillert 				warn("%s", file2);
3304ec4b3d5Smillert 				status |= 2;
3314ec4b3d5Smillert 				goto closem;
3324ec4b3d5Smillert 			}
3337b6ec9e4Smillert 		} else if (strcmp(file2, "-") == 0)
334b1a26502Smillert 			f2 = stdin;
335b1a26502Smillert 		else
3364ec4b3d5Smillert 			f2 = fopen(file2, "r");
3374ec4b3d5Smillert 	}
3384ec4b3d5Smillert 	if (f2 == NULL) {
3394ec4b3d5Smillert 		warn("%s", file2);
3404ec4b3d5Smillert 		status |= 2;
3414ec4b3d5Smillert 		goto closem;
3424ec4b3d5Smillert 	}
3434ec4b3d5Smillert 
3444ec4b3d5Smillert 	switch (files_differ(f1, f2, flags)) {
3454ec4b3d5Smillert 	case 0:
346b4bca33fSmillert 		goto closem;
3474ec4b3d5Smillert 	case 1:
3484ec4b3d5Smillert 		break;
3494ec4b3d5Smillert 	default:
3504ec4b3d5Smillert 		/* error */
3514ec4b3d5Smillert 		status |= 2;
3524ec4b3d5Smillert 		goto closem;
353ae8d569bSderaadt 	}
3544ec4b3d5Smillert 
355ae8d569bSderaadt 	/*
356ae8d569bSderaadt 	 * Files certainly differ at this point; set status accordingly
357ae8d569bSderaadt 	 */
3584ec4b3d5Smillert 	status |= 1;
359b4bca33fSmillert 	rval = D_DIFFER;
360b4bca33fSmillert 	if (!asciifile(f1) || !asciifile(f2)) {
361b4bca33fSmillert 		rval = D_BINARY;
362cab5d83cSmillert 		goto closem;
363cab5d83cSmillert 	}
364b4bca33fSmillert 	if (format == D_BRIEF)
365b4bca33fSmillert 		goto closem;
366b4bca33fSmillert 	if (lflag) {
367b4bca33fSmillert 		/* redirect stdout to pr */
368b4bca33fSmillert 		int pfd[2];
369b4bca33fSmillert 		char *header;
370b4bca33fSmillert 		char *prargv[] = { "pr", "-h", NULL, "-f", NULL };
371b4bca33fSmillert 
372b4bca33fSmillert 		easprintf(&header, "%s %s %s", diffargs, file1, file2);
373b4bca33fSmillert 		prargv[2] = header;
374b4bca33fSmillert 		fflush(stdout);
375b4bca33fSmillert 		rewind(stdout);
376b4bca33fSmillert 		pipe(pfd);
377b4bca33fSmillert 		switch ((pid = fork())) {
378b4bca33fSmillert 		case -1:
379b4bca33fSmillert 			warnx("No more processes");
380b4bca33fSmillert 			status |= 2;
381b4bca33fSmillert 			free(header);
382b4bca33fSmillert 			return (D_ERROR);
383b4bca33fSmillert 		case 0:
384b4bca33fSmillert 			/* child */
385b4bca33fSmillert 			if (pfd[0] != STDIN_FILENO) {
386b4bca33fSmillert 				dup2(pfd[0], STDIN_FILENO);
387b4bca33fSmillert 				close(pfd[0]);
388b4bca33fSmillert 			}
389b4bca33fSmillert 			close(pfd[1]);
390b4bca33fSmillert 			execv(_PATH_PR, prargv);
391b4bca33fSmillert 			_exit(127);
392b4bca33fSmillert 		default:
393b4bca33fSmillert 			/* parent */
394b4bca33fSmillert 			if (pfd[1] != STDOUT_FILENO) {
395b4bca33fSmillert 				ostdout = dup(STDOUT_FILENO);
396b4bca33fSmillert 				dup2(pfd[1], STDOUT_FILENO);
397b4bca33fSmillert 				close(pfd[1]);
398b4bca33fSmillert 			}
399b4bca33fSmillert 			close(pfd[0]);
400b4bca33fSmillert 			rewind(stdout);
401b4bca33fSmillert 			free(header);
402b4bca33fSmillert 		}
40352e5bd6bSmillert 	} else if (flags & D_HEADER)
4044ec4b3d5Smillert 		printf("%s %s %s\n", diffargs, file1, file2);
405ae8d569bSderaadt 	prepare(0, f1);
406ae8d569bSderaadt 	prepare(1, f2);
407ae8d569bSderaadt 	prune();
408ae8d569bSderaadt 	sort(sfile[0], slen[0]);
409ae8d569bSderaadt 	sort(sfile[1], slen[1]);
410ae8d569bSderaadt 
411ae8d569bSderaadt 	member = (int *)file[1];
412ae8d569bSderaadt 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
41349dffe13Smillert 	member = erealloc(member, (slen[1] + 2) * sizeof(int));
414ae8d569bSderaadt 
415ae8d569bSderaadt 	class = (int *)file[0];
416ae8d569bSderaadt 	unsort(sfile[0], slen[0], class);
41749dffe13Smillert 	class = erealloc(class, (slen[0] + 2) * sizeof(int));
418ae8d569bSderaadt 
41949dffe13Smillert 	klist = emalloc((slen[0] + 2) * sizeof(int));
420058e94f4Smillert 	clen = 0;
4216e18f850Sotto 	clistlen = 100;
4226e18f850Sotto 	clist = emalloc(clistlen * sizeof(cand));
423ae8d569bSderaadt 	i = stone(class, slen[0], member, klist);
42426da422aStedu 	free(member);
42526da422aStedu 	free(class);
426ae8d569bSderaadt 
4274ec4b3d5Smillert 	J = erealloc(J, (len[0] + 2) * sizeof(int));
428ae8d569bSderaadt 	unravel(klist[i]);
42926da422aStedu 	free(clist);
43026da422aStedu 	free(klist);
431ae8d569bSderaadt 
4324ec4b3d5Smillert 	ixold = erealloc(ixold, (len[0] + 2) * sizeof(long));
4334ec4b3d5Smillert 	ixnew = erealloc(ixnew, (len[1] + 2) * sizeof(long));
4344ec4b3d5Smillert 	check(file1, f1, file2, f2);
4354ec4b3d5Smillert 	output(file1, f1, file2, f2);
436b4bca33fSmillert 	if (ostdout != -1) {
437b4bca33fSmillert 		int wstatus;
4384ec4b3d5Smillert 
439b4bca33fSmillert 		/* close the pipe to pr and restore stdout */
440b4bca33fSmillert 		fflush(stdout);
441b4bca33fSmillert 		rewind(stdout);
442b4bca33fSmillert 		if (ostdout != STDOUT_FILENO) {
443b4bca33fSmillert 			close(STDOUT_FILENO);
444b4bca33fSmillert 			dup2(ostdout, STDOUT_FILENO);
445b4bca33fSmillert 			close(ostdout);
446b4bca33fSmillert 		}
447b4bca33fSmillert 		waitpid(pid, &wstatus, 0);
44852e5bd6bSmillert 	}
4494ec4b3d5Smillert closem:
4504ec4b3d5Smillert 	if (f1 != NULL)
4514ec4b3d5Smillert 		fclose(f1);
4524ec4b3d5Smillert 	if (f2 != NULL)
4534ec4b3d5Smillert 		fclose(f2);
4547b6ec9e4Smillert 	if (file1 != ofile1)
455b1a26502Smillert 		free(file1);
4567b6ec9e4Smillert 	if (file2 != ofile2)
4574ec4b3d5Smillert 		free(file2);
458b4bca33fSmillert 	return (rval);
4594ec4b3d5Smillert }
4604ec4b3d5Smillert 
4614ec4b3d5Smillert /*
4624ec4b3d5Smillert  * Check to see if the given files differ.
4634ec4b3d5Smillert  * Returns 0 if they are the same, 1 if different, and -1 on error.
4644ec4b3d5Smillert  * XXX - could use code from cmp(1) [faster]
4654ec4b3d5Smillert  */
4664ec4b3d5Smillert static int
4674ec4b3d5Smillert files_differ(FILE *f1, FILE *f2, int flags)
4684ec4b3d5Smillert {
4694ec4b3d5Smillert 	char buf1[BUFSIZ], buf2[BUFSIZ];
4704ec4b3d5Smillert 	size_t i, j;
4714ec4b3d5Smillert 
4724ec4b3d5Smillert 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
4734ec4b3d5Smillert 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
4744ec4b3d5Smillert 		return (1);
4754ec4b3d5Smillert 	for (;;) {
4764ec4b3d5Smillert 		i = fread(buf1, 1, sizeof(buf1), f1);
4774ec4b3d5Smillert 		j = fread(buf2, 1, sizeof(buf2), f2);
4784ec4b3d5Smillert 		if (i != j)
4794ec4b3d5Smillert 			return (1);
4804ec4b3d5Smillert 		if (i == 0 && j == 0) {
4814ec4b3d5Smillert 			if (ferror(f1) || ferror(f2))
4824ec4b3d5Smillert 				return (1);
4834ec4b3d5Smillert 			return (0);
4844ec4b3d5Smillert 		}
4854ec4b3d5Smillert 		if (memcmp(buf1, buf2, i) != 0)
4864ec4b3d5Smillert 			return (1);
4874ec4b3d5Smillert 	}
488ae8d569bSderaadt }
489ae8d569bSderaadt 
4907b6ec9e4Smillert static FILE *
4917b6ec9e4Smillert opentemp(const char *file)
492ae8d569bSderaadt {
4937b6ec9e4Smillert 	char buf[BUFSIZ], *tempdir, tempfile[MAXPATHLEN];
4947b6ec9e4Smillert 	ssize_t nread;
4957b6ec9e4Smillert 	int ifd, ofd;
49648b947b7Smillert 
49748b947b7Smillert 	if (strcmp(file, "-") == 0)
49848b947b7Smillert 		ifd = STDIN_FILENO;
49966e5764eSmillert 	else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
5004ec4b3d5Smillert 		return (NULL);
50148b947b7Smillert 
50248b947b7Smillert 	if ((tempdir = getenv("TMPDIR")) == NULL)
50348b947b7Smillert 		tempdir = _PATH_TMP;
5047b6ec9e4Smillert 	if (snprintf(tempfile, sizeof(tempfile), "%s/diff.XXXXXXXX",
5057b6ec9e4Smillert 	    tempdir) >= sizeof(tempfile)) {
5067b6ec9e4Smillert 		close(ifd);
5077b6ec9e4Smillert 		errno = ENAMETOOLONG;
5084ec4b3d5Smillert 		return (NULL);
509ae8d569bSderaadt 	}
5107b6ec9e4Smillert 
5117b6ec9e4Smillert 	if ((ofd = mkstemp(tempfile)) < 0)
5127b6ec9e4Smillert 		return (NULL);
5137b6ec9e4Smillert 	unlink(tempfile);
5147b6ec9e4Smillert 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
5157b6ec9e4Smillert 		if (write(ofd, buf, nread) != nread) {
51648b947b7Smillert 			close(ifd);
51748b947b7Smillert 			close(ofd);
5187b6ec9e4Smillert 			return (NULL);
5197b6ec9e4Smillert 		}
5207b6ec9e4Smillert 	}
5217b6ec9e4Smillert 	close(ifd);
522*f28259e9Smillert 	lseek(ofd, (off_t)0, SEEK_SET);
5237b6ec9e4Smillert 	return (fdopen(ofd, "r"));
524ae8d569bSderaadt }
525ae8d569bSderaadt 
526ae8d569bSderaadt char *
52726da422aStedu splice(char *dir, char *file)
528ae8d569bSderaadt {
52949dffe13Smillert 	char *tail, *buf;
530ae8d569bSderaadt 
5317b6ec9e4Smillert 	if ((tail = strrchr(file, '/')) == NULL)
532ae8d569bSderaadt 		tail = file;
533ae8d569bSderaadt 	else
534ae8d569bSderaadt 		tail++;
535b4bca33fSmillert 	easprintf(&buf, "%s/%s", dir, tail);
53649dffe13Smillert 	return (buf);
537ae8d569bSderaadt }
538ae8d569bSderaadt 
53926da422aStedu static void
54026da422aStedu prepare(int i, FILE *fd)
541ae8d569bSderaadt {
54226da422aStedu 	struct line *p;
54326da422aStedu 	int j, h;
544ae8d569bSderaadt 
5454ec4b3d5Smillert 	rewind(fd);
54649dffe13Smillert 	p = emalloc(3 * sizeof(struct line));
54726da422aStedu 	for (j = 0; (h = readhash(fd));) {
54849dffe13Smillert 		p = erealloc(p, (++j + 3) * sizeof(struct line));
549ae8d569bSderaadt 		p[j].value = h;
550ae8d569bSderaadt 	}
551ae8d569bSderaadt 	len[i] = j;
552ae8d569bSderaadt 	file[i] = p;
553ae8d569bSderaadt }
554ae8d569bSderaadt 
55526da422aStedu static void
55626da422aStedu prune(void)
557ae8d569bSderaadt {
55826da422aStedu 	int i, j;
55948b8c3e3Sderaadt 
560ae8d569bSderaadt 	for (pref = 0; pref < len[0] && pref < len[1] &&
561ae8d569bSderaadt 	    file[0][pref + 1].value == file[1][pref + 1].value;
5627d2b2b91Sderaadt 	    pref++)
5637d2b2b91Sderaadt 		;
564ae8d569bSderaadt 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
565ae8d569bSderaadt 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
5667d2b2b91Sderaadt 	    suff++)
5677d2b2b91Sderaadt 		;
568ae8d569bSderaadt 	for (j = 0; j < 2; j++) {
569ae8d569bSderaadt 		sfile[j] = file[j] + pref;
570ae8d569bSderaadt 		slen[j] = len[j] - pref - suff;
571ae8d569bSderaadt 		for (i = 0; i <= slen[j]; i++)
572ae8d569bSderaadt 			sfile[j][i].serial = i;
573ae8d569bSderaadt 	}
574ae8d569bSderaadt }
575ae8d569bSderaadt 
57626da422aStedu static void
57726da422aStedu equiv(struct line *a, int n, struct line *b, int m, int *c)
578ae8d569bSderaadt {
57926da422aStedu 	int i, j;
58048b8c3e3Sderaadt 
581ae8d569bSderaadt 	i = j = 1;
582ae8d569bSderaadt 	while (i <= n && j <= m) {
583ae8d569bSderaadt 		if (a[i].value < b[j].value)
584ae8d569bSderaadt 			a[i++].value = 0;
585ae8d569bSderaadt 		else if (a[i].value == b[j].value)
586ae8d569bSderaadt 			a[i++].value = j;
587ae8d569bSderaadt 		else
588ae8d569bSderaadt 			j++;
589ae8d569bSderaadt 	}
590ae8d569bSderaadt 	while (i <= n)
591ae8d569bSderaadt 		a[i++].value = 0;
592ae8d569bSderaadt 	b[m + 1].value = 0;
593ae8d569bSderaadt 	j = 0;
594ae8d569bSderaadt 	while (++j <= m) {
595ae8d569bSderaadt 		c[j] = -b[j].serial;
596ae8d569bSderaadt 		while (b[j + 1].value == b[j].value) {
597ae8d569bSderaadt 			j++;
598ae8d569bSderaadt 			c[j] = b[j].serial;
599ae8d569bSderaadt 		}
600ae8d569bSderaadt 	}
601ae8d569bSderaadt 	c[j] = -1;
602ae8d569bSderaadt }
603ae8d569bSderaadt 
6046e18f850Sotto /* Code taken from ping.c */
6056e18f850Sotto static int
6066e18f850Sotto isqrt(int n)
6076e18f850Sotto {
6086e18f850Sotto 	int y, x = 1;
6096e18f850Sotto 
6106e18f850Sotto 	if (n == 0)
6116e18f850Sotto 		return(0);
6126e18f850Sotto 
6136e18f850Sotto 	do { /* newton was a stinker */
6146e18f850Sotto 		y = x;
6156e18f850Sotto 		x = n / x;
6166e18f850Sotto 		x += y;
6176e18f850Sotto 		x /= 2;
6186e18f850Sotto 	} while ((x - y) > 1 || (x - y) < -1);
6196e18f850Sotto 
6206e18f850Sotto 	return (x);
6216e18f850Sotto }
6226e18f850Sotto 
62326da422aStedu static int
62426da422aStedu stone(int *a, int n, int *b, int *c)
625ae8d569bSderaadt {
62648b8c3e3Sderaadt 	int i, k, y, j, l;
62748b8c3e3Sderaadt 	int oldc, tc, oldl;
6286e18f850Sotto 	u_int loopcount;
6296e18f850Sotto 
6306e18f850Sotto 	const u_int bound = dflag ? UINT_MAX : max(256, isqrt(n));
63148b8c3e3Sderaadt 
632ae8d569bSderaadt 	k = 0;
633ae8d569bSderaadt 	c[0] = newcand(0, 0, 0);
634ae8d569bSderaadt 	for (i = 1; i <= n; i++) {
635ae8d569bSderaadt 		j = a[i];
636ae8d569bSderaadt 		if (j == 0)
637ae8d569bSderaadt 			continue;
638ae8d569bSderaadt 		y = -b[j];
639ae8d569bSderaadt 		oldl = 0;
640ae8d569bSderaadt 		oldc = c[0];
6416e18f850Sotto 		loopcount = 0;
642ae8d569bSderaadt 		do {
6436e18f850Sotto 			loopcount++;
644ae8d569bSderaadt 			if (y <= clist[oldc].y)
645ae8d569bSderaadt 				continue;
646ae8d569bSderaadt 			l = search(c, k, y);
647ae8d569bSderaadt 			if (l != oldl + 1)
648ae8d569bSderaadt 				oldc = c[l - 1];
649ae8d569bSderaadt 			if (l <= k) {
650ae8d569bSderaadt 				if (clist[c[l]].y <= y)
651ae8d569bSderaadt 					continue;
652ae8d569bSderaadt 				tc = c[l];
653ae8d569bSderaadt 				c[l] = newcand(i, y, oldc);
654ae8d569bSderaadt 				oldc = tc;
655ae8d569bSderaadt 				oldl = l;
656ae8d569bSderaadt 			} else {
657ae8d569bSderaadt 				c[l] = newcand(i, y, oldc);
658ae8d569bSderaadt 				k++;
659ae8d569bSderaadt 				break;
660ae8d569bSderaadt 			}
6616e18f850Sotto 		} while ((y = b[++j]) > 0 && loopcount < bound);
662ae8d569bSderaadt 	}
663ae8d569bSderaadt 	return (k);
664ae8d569bSderaadt }
665ae8d569bSderaadt 
66626da422aStedu static int
66726da422aStedu newcand(int x, int y, int pred)
668ae8d569bSderaadt {
66926da422aStedu 	struct cand *q;
67026da422aStedu 
6716e18f850Sotto 	if (clen == clistlen) {
6726e18f850Sotto 		clistlen = clistlen * 11 / 10;
6736e18f850Sotto 		clist = erealloc(clist, clistlen * sizeof(cand));
6746e18f850Sotto 	}
6756e18f850Sotto 	q = clist + clen;
676ae8d569bSderaadt 	q->x = x;
677ae8d569bSderaadt 	q->y = y;
678ae8d569bSderaadt 	q->pred = pred;
6796e18f850Sotto 	return (clen++);
680ae8d569bSderaadt }
681ae8d569bSderaadt 
68226da422aStedu static int
68326da422aStedu search(int *c, int k, int y)
684ae8d569bSderaadt {
68548b8c3e3Sderaadt 	int i, j, l, t;
68648b8c3e3Sderaadt 
687ae8d569bSderaadt 	if (clist[c[k]].y < y)	/* quick look for typical case */
688ae8d569bSderaadt 		return (k + 1);
689ae8d569bSderaadt 	i = 0;
690ae8d569bSderaadt 	j = k + 1;
691ae8d569bSderaadt 	while (1) {
692ae8d569bSderaadt 		l = i + j;
693ae8d569bSderaadt 		if ((l >>= 1) <= i)
694ae8d569bSderaadt 			break;
695ae8d569bSderaadt 		t = clist[c[l]].y;
696ae8d569bSderaadt 		if (t > y)
697ae8d569bSderaadt 			j = l;
698ae8d569bSderaadt 		else if (t < y)
699ae8d569bSderaadt 			i = l;
700ae8d569bSderaadt 		else
701ae8d569bSderaadt 			return (l);
702ae8d569bSderaadt 	}
703ae8d569bSderaadt 	return (l + 1);
704ae8d569bSderaadt }
705ae8d569bSderaadt 
70626da422aStedu static void
70726da422aStedu unravel(int p)
708ae8d569bSderaadt {
70926da422aStedu 	struct cand *q;
71048b8c3e3Sderaadt 	int i;
71148b8c3e3Sderaadt 
712ae8d569bSderaadt 	for (i = 0; i <= len[0]; i++)
713ae8d569bSderaadt 		J[i] = i <= pref ? i :
7147d2b2b91Sderaadt 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
715ae8d569bSderaadt 	for (q = clist + p; q->y != 0; q = clist + q->pred)
716ae8d569bSderaadt 		J[q->x + pref] = q->y + pref;
717ae8d569bSderaadt }
718ae8d569bSderaadt 
71926da422aStedu /*
72049dffe13Smillert  * Check does double duty:
72149dffe13Smillert  *  1.	ferret out any fortuitous correspondences due
72249dffe13Smillert  *	to confounding by hashing (which result in "jackpot")
72349dffe13Smillert  *  2.  collect random access indexes to the two files
72426da422aStedu  */
72526da422aStedu static void
7264ec4b3d5Smillert check(char *file1, FILE *f1, char *file2, FILE *f2)
727ae8d569bSderaadt {
72848b8c3e3Sderaadt 	int i, j, jackpot, c, d;
729ae8d569bSderaadt 	long ctold, ctnew;
730ae8d569bSderaadt 
7314ec4b3d5Smillert 	rewind(f1);
7324ec4b3d5Smillert 	rewind(f2);
733ae8d569bSderaadt 	j = 1;
734ae8d569bSderaadt 	ixold[0] = ixnew[0] = 0;
735ae8d569bSderaadt 	jackpot = 0;
736ae8d569bSderaadt 	ctold = ctnew = 0;
737ae8d569bSderaadt 	for (i = 1; i <= len[0]; i++) {
738ae8d569bSderaadt 		if (J[i] == 0) {
7394ec4b3d5Smillert 			ixold[i] = ctold += skipline(f1);
740ae8d569bSderaadt 			continue;
741ae8d569bSderaadt 		}
742ae8d569bSderaadt 		while (j < J[i]) {
7434ec4b3d5Smillert 			ixnew[j] = ctnew += skipline(f2);
744ae8d569bSderaadt 			j++;
745ae8d569bSderaadt 		}
746ae8d569bSderaadt 		if (bflag || wflag || iflag) {
747ae8d569bSderaadt 			for (;;) {
7484ec4b3d5Smillert 				c = getc(f1);
7494ec4b3d5Smillert 				d = getc(f2);
750bb34d48bSmillert 				/*
751bb34d48bSmillert 				 * GNU diff ignores a missing newline
752bb34d48bSmillert 				 * in one file if bflag || wflag.
753bb34d48bSmillert 				 */
754bb34d48bSmillert 				if ((bflag || wflag) &&
755bb34d48bSmillert 				    ((c == EOF && d == '\n') ||
756bb34d48bSmillert 				    (c == '\n' && d == EOF))) {
757bb34d48bSmillert 					break;
758bb34d48bSmillert 				}
759ae8d569bSderaadt 				ctold++;
760ae8d569bSderaadt 				ctnew++;
761ae8d569bSderaadt 				if (bflag && isspace(c) && isspace(d)) {
762ae8d569bSderaadt 					do {
763ae8d569bSderaadt 						if (c == '\n')
764ae8d569bSderaadt 							break;
765ae8d569bSderaadt 						ctold++;
7664ec4b3d5Smillert 					} while (isspace(c = getc(f1)));
767ae8d569bSderaadt 					do {
768ae8d569bSderaadt 						if (d == '\n')
769ae8d569bSderaadt 							break;
770ae8d569bSderaadt 						ctnew++;
7714ec4b3d5Smillert 					} while (isspace(d = getc(f2)));
772ae8d569bSderaadt 				} else if (wflag) {
773ae8d569bSderaadt 					while (isspace(c) && c != '\n') {
7744ec4b3d5Smillert 						c = getc(f1);
775ae8d569bSderaadt 						ctold++;
776ae8d569bSderaadt 					}
777ae8d569bSderaadt 					while (isspace(d) && d != '\n') {
7784ec4b3d5Smillert 						d = getc(f2);
779ae8d569bSderaadt 						ctnew++;
780ae8d569bSderaadt 					}
781ae8d569bSderaadt 				}
782ae8d569bSderaadt 				if (chrtran[c] != chrtran[d]) {
783ae8d569bSderaadt 					jackpot++;
784ae8d569bSderaadt 					J[i] = 0;
785bb34d48bSmillert 					if (c != '\n' && c != EOF)
7864ec4b3d5Smillert 						ctold += skipline(f1);
787bb34d48bSmillert 					if (d != '\n' && c != EOF)
7884ec4b3d5Smillert 						ctnew += skipline(f2);
789ae8d569bSderaadt 					break;
790ae8d569bSderaadt 				}
791bb34d48bSmillert 				if (c == '\n' || c == EOF)
792ae8d569bSderaadt 					break;
793ae8d569bSderaadt 			}
794ae8d569bSderaadt 		} else {
795ae8d569bSderaadt 			for (;;) {
796ae8d569bSderaadt 				ctold++;
797ae8d569bSderaadt 				ctnew++;
7984ec4b3d5Smillert 				if ((c = getc(f1)) != (d = getc(f2))) {
799ae8d569bSderaadt 					/* jackpot++; */
800ae8d569bSderaadt 					J[i] = 0;
801bb34d48bSmillert 					if (c != '\n' && c != EOF)
8024ec4b3d5Smillert 						ctold += skipline(f1);
803bb34d48bSmillert 					if (d != '\n' && c != EOF)
8044ec4b3d5Smillert 						ctnew += skipline(f2);
805ae8d569bSderaadt 					break;
806ae8d569bSderaadt 				}
807bb34d48bSmillert 				if (c == '\n' || c == EOF)
808ae8d569bSderaadt 					break;
809ae8d569bSderaadt 			}
810ae8d569bSderaadt 		}
811ae8d569bSderaadt 		ixold[i] = ctold;
812ae8d569bSderaadt 		ixnew[j] = ctnew;
813ae8d569bSderaadt 		j++;
814ae8d569bSderaadt 	}
8154ec4b3d5Smillert 	for (; j <= len[1]; j++)
8164ec4b3d5Smillert 		ixnew[j] = ctnew += skipline(f2);
817ae8d569bSderaadt 	/*
81848b8c3e3Sderaadt 	 * if (jackpot)
81948b8c3e3Sderaadt 	 *	fprintf(stderr, "jackpot\n");
820ae8d569bSderaadt 	 */
821ae8d569bSderaadt }
822ae8d569bSderaadt 
82348b8c3e3Sderaadt /* shellsort CACM #201 */
82426da422aStedu static void
82526da422aStedu sort(struct line *a, int n)
82648b8c3e3Sderaadt {
82748b8c3e3Sderaadt 	struct line *ai, *aim, w;
82848b8c3e3Sderaadt 	int j, m = 0, k;
829ae8d569bSderaadt 
830ae8d569bSderaadt 	if (n == 0)
831ae8d569bSderaadt 		return;
832ae8d569bSderaadt 	for (j = 1; j <= n; j *= 2)
833ae8d569bSderaadt 		m = 2 * j - 1;
834ae8d569bSderaadt 	for (m /= 2; m != 0; m /= 2) {
835ae8d569bSderaadt 		k = n - m;
836ae8d569bSderaadt 		for (j = 1; j <= k; j++) {
837ae8d569bSderaadt 			for (ai = &a[j]; ai > a; ai -= m) {
838ae8d569bSderaadt 				aim = &ai[m];
839ae8d569bSderaadt 				if (aim < ai)
840ae8d569bSderaadt 					break;	/* wraparound */
841ae8d569bSderaadt 				if (aim->value > ai[0].value ||
84226da422aStedu 				    (aim->value == ai[0].value &&
84326da422aStedu 					aim->serial > ai[0].serial))
844ae8d569bSderaadt 					break;
845ae8d569bSderaadt 				w.value = ai[0].value;
846ae8d569bSderaadt 				ai[0].value = aim->value;
847ae8d569bSderaadt 				aim->value = w.value;
848ae8d569bSderaadt 				w.serial = ai[0].serial;
849ae8d569bSderaadt 				ai[0].serial = aim->serial;
850ae8d569bSderaadt 				aim->serial = w.serial;
851ae8d569bSderaadt 			}
852ae8d569bSderaadt 		}
853ae8d569bSderaadt 	}
854ae8d569bSderaadt }
855ae8d569bSderaadt 
85626da422aStedu static void
85726da422aStedu unsort(struct line *f, int l, int *b)
858ae8d569bSderaadt {
85948b8c3e3Sderaadt 	int *a, i;
86026da422aStedu 
86149dffe13Smillert 	a = emalloc((l + 1) * sizeof(int));
862ae8d569bSderaadt 	for (i = 1; i <= l; i++)
863ae8d569bSderaadt 		a[f[i].serial] = f[i].value;
864ae8d569bSderaadt 	for (i = 1; i <= l; i++)
865ae8d569bSderaadt 		b[i] = a[i];
86626da422aStedu 	free(a);
867ae8d569bSderaadt }
868ae8d569bSderaadt 
86926da422aStedu static int
8704ec4b3d5Smillert skipline(FILE *f)
871ae8d569bSderaadt {
87226da422aStedu 	int i, c;
873ae8d569bSderaadt 
874bb34d48bSmillert 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
875bb34d48bSmillert 		continue;
876ae8d569bSderaadt 	return (i);
877ae8d569bSderaadt }
878ae8d569bSderaadt 
87926da422aStedu static void
8804ec4b3d5Smillert output(char *file1, FILE *f1, char *file2, FILE *f2)
881ae8d569bSderaadt {
88248b8c3e3Sderaadt 	int m, i0, i1, j0, j1;
88348b8c3e3Sderaadt 
8844ec4b3d5Smillert 	rewind(f1);
8854ec4b3d5Smillert 	rewind(f2);
886ae8d569bSderaadt 	m = len[0];
887ae8d569bSderaadt 	J[0] = 0;
888ae8d569bSderaadt 	J[m + 1] = len[1] + 1;
8894ec4b3d5Smillert 	if (format != D_EDIT) {
89026da422aStedu 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
89126da422aStedu 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
89226da422aStedu 				i0++;
893ae8d569bSderaadt 			j0 = J[i0 - 1] + 1;
894ae8d569bSderaadt 			i1 = i0 - 1;
89526da422aStedu 			while (i1 < m && J[i1 + 1] == 0)
89626da422aStedu 				i1++;
897ae8d569bSderaadt 			j1 = J[i1 + 1] - 1;
898ae8d569bSderaadt 			J[i1] = j1;
8994ec4b3d5Smillert 			change(file1, f1, file2, f2, i0, i1, j0, j1);
90026da422aStedu 		}
90126da422aStedu 	} else {
90226da422aStedu 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
90326da422aStedu 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
90426da422aStedu 				i0--;
905ae8d569bSderaadt 			j0 = J[i0 + 1] - 1;
906ae8d569bSderaadt 			i1 = i0 + 1;
90726da422aStedu 			while (i1 > 1 && J[i1 - 1] == 0)
90826da422aStedu 				i1--;
909ae8d569bSderaadt 			j1 = J[i1 - 1] + 1;
910ae8d569bSderaadt 			J[i1] = j1;
9114ec4b3d5Smillert 			change(file1, f1, file2, f2, i1, i0, j1, j0);
912ae8d569bSderaadt 		}
91326da422aStedu 	}
914ae8d569bSderaadt 	if (m == 0)
9154ec4b3d5Smillert 		change(file1, f1, file2, f2, 1, 0, 1, len[1]);
9164ec4b3d5Smillert 	if (format == D_IFDEF) {
917ae8d569bSderaadt 		for (;;) {
918ae8d569bSderaadt #define	c i0
919bb34d48bSmillert 			if ((c = getc(f1)) == EOF)
920ae8d569bSderaadt 				return;
921ae8d569bSderaadt 			putchar(c);
922ae8d569bSderaadt 		}
923ae8d569bSderaadt #undef c
924ae8d569bSderaadt 	}
9259de32c1bSmillert 	if (anychange != 0) {
9264ec4b3d5Smillert 		if (format == D_CONTEXT)
9274ec4b3d5Smillert 			dump_context_vec(f1, f2);
9284ec4b3d5Smillert 		else if (format == D_UNIFIED)
9294ec4b3d5Smillert 			dump_unified_vec(f1, f2);
9309de32c1bSmillert 	}
931ae8d569bSderaadt }
932ae8d569bSderaadt 
933c72ea322Smillert static __inline void
934c72ea322Smillert range(int a, int b, char *separator)
935c72ea322Smillert {
936c72ea322Smillert 	printf("%d", a > b ? b : a);
937c72ea322Smillert 	if (a < b)
938c72ea322Smillert 		printf("%s%d", separator, b);
939c72ea322Smillert }
940c72ea322Smillert 
941c72ea322Smillert static __inline void
942c72ea322Smillert uni_range(int a, int b)
943c72ea322Smillert {
944c72ea322Smillert 	if (a < b)
945c72ea322Smillert 		printf("%d,%d", a, b - a + 1);
946c72ea322Smillert 	else if (a == b)
947c72ea322Smillert 		printf("%d", b);
948c72ea322Smillert 	else
949c72ea322Smillert 		printf("%d,0", b);
950c72ea322Smillert }
951c72ea322Smillert 
952ae8d569bSderaadt /*
9534ec4b3d5Smillert  * Indicate that there is a difference between lines a and b of the from file
95426da422aStedu  * to get to lines c to d of the to file.  If a is greater then b then there
95526da422aStedu  * are no lines in the from file involved and this means that there were
95626da422aStedu  * lines appended (beginning at b).  If c is greater than d then there are
95726da422aStedu  * lines missing from the to file.
958ae8d569bSderaadt  */
95926da422aStedu static void
9604ec4b3d5Smillert change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d)
961ae8d569bSderaadt {
9620efcb26eSmillert 	static size_t max_context = 64;
963f8a6bf23Smillert 	int i;
9640efcb26eSmillert 
965f8a6bf23Smillert restart:
9664ec4b3d5Smillert 	if (format != D_IFDEF && a > b && c > d)
967ae8d569bSderaadt 		return;
9684ec4b3d5Smillert 	if (format == D_CONTEXT || format == D_UNIFIED) {
9690efcb26eSmillert 		/*
9700efcb26eSmillert 		 * Allocate change records as needed.
9710efcb26eSmillert 		 */
9720efcb26eSmillert 		if (context_vec_ptr == context_vec_end - 1) {
9730efcb26eSmillert 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
9740efcb26eSmillert 			max_context <<= 1;
9750efcb26eSmillert 			context_vec_start = erealloc(context_vec_start,
9760efcb26eSmillert 			    max_context * sizeof(struct context_vec));
9770efcb26eSmillert 			context_vec_end = context_vec_start + max_context;
9780efcb26eSmillert 			context_vec_ptr = context_vec_start + offset;
9790efcb26eSmillert 		}
9800efcb26eSmillert 		if (anychange == 0) {
9810efcb26eSmillert 			/*
9820efcb26eSmillert 			 * Print the context/unidiff header first time through.
9830efcb26eSmillert 			 */
9841f9aa9e0Smillert 			if (label != NULL)
9851f9aa9e0Smillert 				printf("%s %s\n",
9861f9aa9e0Smillert 				    format == D_CONTEXT ? "***" : "---", label);
9871f9aa9e0Smillert 			else
9881f9aa9e0Smillert 				printf("%s %s	%s",
9891f9aa9e0Smillert 				    format == D_CONTEXT ? "***" : "---", file1,
9901f9aa9e0Smillert 				    ctime(&stb1.st_mtime));
9911f9aa9e0Smillert 			printf("%s %s	%s",
9921f9aa9e0Smillert 			    format == D_CONTEXT ? "---" : "+++", file2,
9931f9aa9e0Smillert 			    ctime(&stb2.st_mtime));
9940efcb26eSmillert 			anychange = 1;
9950efcb26eSmillert 		} else if (a > context_vec_ptr->b + (2 * context) &&
9960efcb26eSmillert 		    c > context_vec_ptr->d + (2 * context)) {
997ae8d569bSderaadt 			/*
9980efcb26eSmillert 			 * If this change is more than 'context' lines from the
9990efcb26eSmillert 			 * previous change, dump the record and reset it.
1000ae8d569bSderaadt 			 */
10014ec4b3d5Smillert 			if (format == D_CONTEXT)
10024ec4b3d5Smillert 				dump_context_vec(f1, f2);
10039de32c1bSmillert 			else
10044ec4b3d5Smillert 				dump_unified_vec(f1, f2);
10059de32c1bSmillert 		}
1006ae8d569bSderaadt 		context_vec_ptr++;
1007ae8d569bSderaadt 		context_vec_ptr->a = a;
1008ae8d569bSderaadt 		context_vec_ptr->b = b;
1009ae8d569bSderaadt 		context_vec_ptr->c = c;
1010ae8d569bSderaadt 		context_vec_ptr->d = d;
1011ae8d569bSderaadt 		return;
1012ae8d569bSderaadt 	}
10130efcb26eSmillert 	if (anychange == 0)
10140efcb26eSmillert 		anychange = 1;
10154ec4b3d5Smillert 	switch (format) {
1016ae8d569bSderaadt 
1017ae8d569bSderaadt 	case D_NORMAL:
1018ae8d569bSderaadt 	case D_EDIT:
1019ae8d569bSderaadt 		range(a, b, ",");
1020ae8d569bSderaadt 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
10214ec4b3d5Smillert 		if (format == D_NORMAL)
1022ae8d569bSderaadt 			range(c, d, ",");
1023ae8d569bSderaadt 		putchar('\n');
1024ae8d569bSderaadt 		break;
1025ae8d569bSderaadt 	case D_REVERSE:
1026ae8d569bSderaadt 		putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1027ae8d569bSderaadt 		range(a, b, " ");
1028ae8d569bSderaadt 		putchar('\n');
1029ae8d569bSderaadt 		break;
1030ae8d569bSderaadt 	case D_NREVERSE:
1031ae8d569bSderaadt 		if (a > b)
1032ae8d569bSderaadt 			printf("a%d %d\n", b, d - c + 1);
1033ae8d569bSderaadt 		else {
1034ae8d569bSderaadt 			printf("d%d %d\n", a, b - a + 1);
1035ae8d569bSderaadt 			if (!(c > d))
1036ae8d569bSderaadt 				/* add changed lines */
1037ae8d569bSderaadt 				printf("a%d %d\n", b, d - c + 1);
1038ae8d569bSderaadt 		}
1039ae8d569bSderaadt 		break;
1040ae8d569bSderaadt 	}
10414ec4b3d5Smillert 	if (format == D_NORMAL || format == D_IFDEF) {
10421f9aa9e0Smillert 		fetch(ixold, a, b, f1, '<', 1);
10434ec4b3d5Smillert 		if (a <= b && c <= d && format == D_NORMAL)
10444ec4b3d5Smillert 			puts("---");
1045ae8d569bSderaadt 	}
10461f9aa9e0Smillert 	i = fetch(ixnew, c, d, f2, format == D_NORMAL ? '>' : '\0', 0);
1047f8a6bf23Smillert 	if (i != 0 && format == D_EDIT) {
1048f8a6bf23Smillert 		/*
1049f8a6bf23Smillert 		 * A non-zero return value for D_EDIT indicates that the
1050f8a6bf23Smillert 		 * last line printed was a bare dot (".") that has been
1051f8a6bf23Smillert 		 * escaped as ".." to prevent ed(1) from misinterpreting
1052f8a6bf23Smillert 		 * it.  We have to add a substitute command to change this
1053f8a6bf23Smillert 		 * back and restart where we left off.
1054f8a6bf23Smillert 		 */
1055f8a6bf23Smillert 		puts(".");
1056f8a6bf23Smillert 		printf("%ds/^\\.\\././\n", a);
1057f8a6bf23Smillert 		a += i;
1058f8a6bf23Smillert 		c += i;
1059f8a6bf23Smillert 		goto restart;
1060f8a6bf23Smillert 	}
10614ec4b3d5Smillert 	if ((format == D_EDIT || format == D_REVERSE) && c <= d)
10624ec4b3d5Smillert 		puts(".");
1063ae8d569bSderaadt 	if (inifdef) {
1064b4bca33fSmillert 		printf("#endif /* %s */\n", ifdefname);
1065ae8d569bSderaadt 		inifdef = 0;
1066ae8d569bSderaadt 	}
1067ae8d569bSderaadt }
1068ae8d569bSderaadt 
1069f8a6bf23Smillert static int
10701f9aa9e0Smillert fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile)
1071ae8d569bSderaadt {
1072f8a6bf23Smillert 	int i, j, c, lastc, col, nc;
1073ae8d569bSderaadt 
1074ae8d569bSderaadt 	/*
1075ae8d569bSderaadt 	 * When doing #ifdef's, copy down to current line
1076ae8d569bSderaadt 	 * if this is the first file, so that stuff makes it to output.
1077ae8d569bSderaadt 	 */
10784ec4b3d5Smillert 	if (format == D_IFDEF && oldfile) {
1079ae8d569bSderaadt 		long curpos = ftell(lb);
1080ae8d569bSderaadt 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1081ae8d569bSderaadt 		nc = f[a > b ? b : a - 1] - curpos;
1082ae8d569bSderaadt 		for (i = 0; i < nc; i++)
1083ae8d569bSderaadt 			putchar(getc(lb));
1084ae8d569bSderaadt 	}
1085ae8d569bSderaadt 	if (a > b)
1086f8a6bf23Smillert 		return (0);
10874ec4b3d5Smillert 	if (format == D_IFDEF) {
108890f56ad8Smillert 		if (inifdef) {
1089b4bca33fSmillert 			printf("#else /* %s%s */\n",
109090f56ad8Smillert 			    oldfile == 1 ? "!" : "", ifdefname);
109126da422aStedu 		} else {
109290f56ad8Smillert 			if (oldfile)
1093b4bca33fSmillert 				printf("#ifndef %s\n", ifdefname);
109490f56ad8Smillert 			else
1095b4bca33fSmillert 				printf("#ifdef %s\n", ifdefname);
1096ae8d569bSderaadt 		}
1097ae8d569bSderaadt 		inifdef = 1 + oldfile;
1098ae8d569bSderaadt 	}
1099ae8d569bSderaadt 	for (i = a; i <= b; i++) {
110091cf91eeSderaadt 		fseek(lb, f[i - 1], SEEK_SET);
1101ae8d569bSderaadt 		nc = f[i] - f[i - 1];
11021f9aa9e0Smillert 		if (format != D_IFDEF && ch != '\0') {
11031f9aa9e0Smillert 			putchar(ch);
11041f9aa9e0Smillert 			if (Tflag && (format == D_NORMAL || format == D_CONTEXT
11051f9aa9e0Smillert 			    || format == D_UNIFIED))
11061f9aa9e0Smillert 				putchar('\t');
11071f9aa9e0Smillert 			else if (format != D_UNIFIED)
11081f9aa9e0Smillert 				putchar(' ');
11091f9aa9e0Smillert 		}
1110ae8d569bSderaadt 		col = 0;
1111f8a6bf23Smillert 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1112bb34d48bSmillert 			if ((c = getc(lb)) == EOF) {
1113bb34d48bSmillert 				puts("\n\\ No newline at end of file");
1114f8a6bf23Smillert 				return (0);;
1115bb34d48bSmillert 			}
1116bb34d48bSmillert 			if (c == '\t' && tflag) {
1117bb34d48bSmillert 				do {
1118ae8d569bSderaadt 					putchar(' ');
1119bb34d48bSmillert 				} while (++col & 7);
1120bb34d48bSmillert 			} else {
1121f8a6bf23Smillert 				if (format == D_EDIT && j == 1 && c == '\n'
1122f8a6bf23Smillert 				    && lastc == '.') {
1123f8a6bf23Smillert 					/*
1124f8a6bf23Smillert 					 * Don't print a bare "." line
1125f8a6bf23Smillert 					 * since that will confuse ed(1).
1126f8a6bf23Smillert 					 * Print ".." instead and return,
1127f8a6bf23Smillert 					 * giving the caller an offset
1128f8a6bf23Smillert 					 * from which to restart.
1129f8a6bf23Smillert 					 */
1130f8a6bf23Smillert 					puts(".");
1131f8a6bf23Smillert 					return (i - a + 1);
1132f8a6bf23Smillert 				}
1133ae8d569bSderaadt 				putchar(c);
1134ae8d569bSderaadt 				col++;
1135ae8d569bSderaadt 			}
1136ae8d569bSderaadt 		}
1137ae8d569bSderaadt 	}
1138f8a6bf23Smillert 	return (0);
1139ae8d569bSderaadt }
1140ae8d569bSderaadt 
114148e572adSmillert #define HASHMASK (16 - 1)	/* for masking out 16 bytes */
1142ae8d569bSderaadt 
1143ae8d569bSderaadt /*
1144ae8d569bSderaadt  * hashing has the effect of
1145ae8d569bSderaadt  * arranging line in 7-bit bytes and then
1146ae8d569bSderaadt  * summing 1-s complement in 16-bit hunks
1147ae8d569bSderaadt  */
114826da422aStedu static int
114926da422aStedu readhash(FILE *f)
1150ae8d569bSderaadt {
115148b8c3e3Sderaadt 	unsigned int shift;
115248b8c3e3Sderaadt 	int t, space;
115326da422aStedu 	long sum;
1154ae8d569bSderaadt 
1155ae8d569bSderaadt 	sum = 1;
1156ae8d569bSderaadt 	space = 0;
1157ae8d569bSderaadt 	if (!bflag && !wflag) {
1158ae8d569bSderaadt 		if (iflag)
1159ae8d569bSderaadt 			for (shift = 0; (t = getc(f)) != '\n'; shift += 7) {
1160bb34d48bSmillert 				if (t == EOF) {
1161bb34d48bSmillert 					if (shift == 0)
1162ae8d569bSderaadt 						return (0);
1163bb34d48bSmillert 					break;
1164bb34d48bSmillert 				}
116548e572adSmillert 				sum += (long)chrtran[t] << (shift &= HASHMASK);
1166ae8d569bSderaadt 			}
1167ae8d569bSderaadt 		else
1168ae8d569bSderaadt 			for (shift = 0; (t = getc(f)) != '\n'; shift += 7) {
1169bb34d48bSmillert 				if (t == EOF) {
1170bb34d48bSmillert 					if (shift == 0)
1171ae8d569bSderaadt 						return (0);
1172bb34d48bSmillert 					break;
1173bb34d48bSmillert 				}
117448e572adSmillert 				sum += (long)t << (shift &= HASHMASK);
1175ae8d569bSderaadt 			}
1176ae8d569bSderaadt 	} else {
1177ae8d569bSderaadt 		for (shift = 0;;) {
1178ae8d569bSderaadt 			switch (t = getc(f)) {
1179ae8d569bSderaadt 			case '\t':
1180ae8d569bSderaadt 			case ' ':
1181ae8d569bSderaadt 				space++;
1182ae8d569bSderaadt 				continue;
1183ae8d569bSderaadt 			default:
1184ae8d569bSderaadt 				if (space && !wflag) {
1185ae8d569bSderaadt 					shift += 7;
1186ae8d569bSderaadt 					space = 0;
1187ae8d569bSderaadt 				}
118848e572adSmillert 				sum += (long)chrtran[t] << (shift &= HASHMASK);
1189ae8d569bSderaadt 				shift += 7;
1190ae8d569bSderaadt 				continue;
1191bb34d48bSmillert 			case EOF:
1192bb34d48bSmillert 				if (shift == 0)
1193bb34d48bSmillert 					return (0);
1194bb34d48bSmillert 				/* FALLTHROUGH */
1195ae8d569bSderaadt 			case '\n':
1196ae8d569bSderaadt 				break;
1197ae8d569bSderaadt 			}
1198ae8d569bSderaadt 			break;
1199ae8d569bSderaadt 		}
1200ae8d569bSderaadt 	}
120148e572adSmillert 	return (sum);
1202ae8d569bSderaadt }
1203ae8d569bSderaadt 
12044ec4b3d5Smillert int
120526da422aStedu asciifile(FILE *f)
1206ae8d569bSderaadt {
120748b8c3e3Sderaadt 	char buf[BUFSIZ], *cp;
12084640f069Stedu 	int i, cnt;
1209ae8d569bSderaadt 
12104ec4b3d5Smillert 	if (aflag || f == NULL)
12112a1593dfStedu 		return (1);
12122a1593dfStedu 
12134ec4b3d5Smillert 	rewind(f);
12144ec4b3d5Smillert 	cnt = fread(buf, 1, sizeof(buf), f);
1215ae8d569bSderaadt 	cp = buf;
12164640f069Stedu 	for (i = 0; i < cnt; i++)
12174640f069Stedu 		if (!isprint(*cp) && !isspace(*cp))
1218ae8d569bSderaadt 			return (0);
1219ae8d569bSderaadt 	return (1);
1220ae8d569bSderaadt }
1221ae8d569bSderaadt 
12224ec4b3d5Smillert static __inline int min(int a, int b)
12234ec4b3d5Smillert {
12244ec4b3d5Smillert 	return (a < b ? a : b);
12254ec4b3d5Smillert }
12264ec4b3d5Smillert 
12274ec4b3d5Smillert static __inline int max(int a, int b)
12284ec4b3d5Smillert {
12294ec4b3d5Smillert 	return (a > b ? a : b);
12304ec4b3d5Smillert }
12314ec4b3d5Smillert 
1232ae8d569bSderaadt /* dump accumulated "context" diff changes */
123326da422aStedu static void
12344ec4b3d5Smillert dump_context_vec(FILE *f1, FILE *f2)
1235ae8d569bSderaadt {
123648b8c3e3Sderaadt 	struct context_vec *cvp = context_vec_start;
123748b8c3e3Sderaadt 	int lowa, upb, lowc, upd, do_output;
123826da422aStedu 	int a, b, c, d;
123926da422aStedu 	char ch;
1240ae8d569bSderaadt 
124190f56ad8Smillert 	if (context_vec_start > context_vec_ptr)
1242ae8d569bSderaadt 		return;
1243ae8d569bSderaadt 
124426da422aStedu 	b = d = 0;		/* gcc */
1245ae8d569bSderaadt 	lowa = max(1, cvp->a - context);
1246ae8d569bSderaadt 	upb = min(len[0], context_vec_ptr->b + context);
1247ae8d569bSderaadt 	lowc = max(1, cvp->c - context);
1248ae8d569bSderaadt 	upd = min(len[1], context_vec_ptr->d + context);
1249ae8d569bSderaadt 
1250ae8d569bSderaadt 	printf("***************\n*** ");
1251ae8d569bSderaadt 	range(lowa, upb, ",");
1252ae8d569bSderaadt 	printf(" ****\n");
1253ae8d569bSderaadt 
1254ae8d569bSderaadt 	/*
12554ec4b3d5Smillert 	 * Output changes to the "old" file.  The first loop suppresses
1256ae8d569bSderaadt 	 * output if there were no changes to the "old" file (we'll see
1257ae8d569bSderaadt 	 * the "old" lines as context in the "new" list).
1258ae8d569bSderaadt 	 */
1259ae8d569bSderaadt 	do_output = 0;
1260ae8d569bSderaadt 	for (; cvp <= context_vec_ptr; cvp++)
1261ae8d569bSderaadt 		if (cvp->a <= cvp->b) {
1262ae8d569bSderaadt 			cvp = context_vec_start;
1263ae8d569bSderaadt 			do_output++;
1264ae8d569bSderaadt 			break;
1265ae8d569bSderaadt 		}
1266ae8d569bSderaadt 	if (do_output) {
1267ae8d569bSderaadt 		while (cvp <= context_vec_ptr) {
126826da422aStedu 			a = cvp->a;
126926da422aStedu 			b = cvp->b;
127026da422aStedu 			c = cvp->c;
127126da422aStedu 			d = cvp->d;
1272ae8d569bSderaadt 
1273ae8d569bSderaadt 			if (a <= b && c <= d)
1274ae8d569bSderaadt 				ch = 'c';
1275ae8d569bSderaadt 			else
1276ae8d569bSderaadt 				ch = (a <= b) ? 'd' : 'a';
1277ae8d569bSderaadt 
1278ae8d569bSderaadt 			if (ch == 'a')
12791f9aa9e0Smillert 				fetch(ixold, lowa, b, f1, ' ', 0);
1280ae8d569bSderaadt 			else {
12811f9aa9e0Smillert 				fetch(ixold, lowa, a - 1, f1, ' ', 0);
12824ec4b3d5Smillert 				fetch(ixold, a, b, f1,
12831f9aa9e0Smillert 				    ch == 'c' ? '!' : '-', 0);
1284ae8d569bSderaadt 			}
1285ae8d569bSderaadt 			lowa = b + 1;
1286ae8d569bSderaadt 			cvp++;
1287ae8d569bSderaadt 		}
12881f9aa9e0Smillert 		fetch(ixold, b + 1, upb, f1, ' ', 0);
1289ae8d569bSderaadt 	}
1290ae8d569bSderaadt 	/* output changes to the "new" file */
1291ae8d569bSderaadt 	printf("--- ");
1292ae8d569bSderaadt 	range(lowc, upd, ",");
1293ae8d569bSderaadt 	printf(" ----\n");
1294ae8d569bSderaadt 
1295ae8d569bSderaadt 	do_output = 0;
1296ae8d569bSderaadt 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1297ae8d569bSderaadt 		if (cvp->c <= cvp->d) {
1298ae8d569bSderaadt 			cvp = context_vec_start;
1299ae8d569bSderaadt 			do_output++;
1300ae8d569bSderaadt 			break;
1301ae8d569bSderaadt 		}
1302ae8d569bSderaadt 	if (do_output) {
1303ae8d569bSderaadt 		while (cvp <= context_vec_ptr) {
130426da422aStedu 			a = cvp->a;
130526da422aStedu 			b = cvp->b;
130626da422aStedu 			c = cvp->c;
130726da422aStedu 			d = cvp->d;
1308ae8d569bSderaadt 
1309ae8d569bSderaadt 			if (a <= b && c <= d)
1310ae8d569bSderaadt 				ch = 'c';
1311ae8d569bSderaadt 			else
1312ae8d569bSderaadt 				ch = (a <= b) ? 'd' : 'a';
1313ae8d569bSderaadt 
1314ae8d569bSderaadt 			if (ch == 'd')
13151f9aa9e0Smillert 				fetch(ixnew, lowc, d, f2, ' ', 0);
1316ae8d569bSderaadt 			else {
13171f9aa9e0Smillert 				fetch(ixnew, lowc, c - 1, f2, ' ', 0);
13184ec4b3d5Smillert 				fetch(ixnew, c, d, f2,
13191f9aa9e0Smillert 				    ch == 'c' ? '!' : '+', 0);
1320ae8d569bSderaadt 			}
1321ae8d569bSderaadt 			lowc = d + 1;
1322ae8d569bSderaadt 			cvp++;
1323ae8d569bSderaadt 		}
13241f9aa9e0Smillert 		fetch(ixnew, d + 1, upd, f2, ' ', 0);
1325ae8d569bSderaadt 	}
1326ae8d569bSderaadt 	context_vec_ptr = context_vec_start - 1;
1327ae8d569bSderaadt }
13289de32c1bSmillert 
13299de32c1bSmillert /* dump accumulated "unified" diff changes */
13309de32c1bSmillert static void
13314ec4b3d5Smillert dump_unified_vec(FILE *f1, FILE *f2)
13329de32c1bSmillert {
13339de32c1bSmillert 	struct context_vec *cvp = context_vec_start;
13349de32c1bSmillert 	int lowa, upb, lowc, upd;
13359de32c1bSmillert 	int a, b, c, d;
13369de32c1bSmillert 	char ch;
13379de32c1bSmillert 
133890f56ad8Smillert 	if (context_vec_start > context_vec_ptr)
13399de32c1bSmillert 		return;
13409de32c1bSmillert 
13419de32c1bSmillert 	b = d = 0;		/* gcc */
13429de32c1bSmillert 	lowa = max(1, cvp->a - context);
13439de32c1bSmillert 	upb = min(len[0], context_vec_ptr->b + context);
13449de32c1bSmillert 	lowc = max(1, cvp->c - context);
13459de32c1bSmillert 	upd = min(len[1], context_vec_ptr->d + context);
13469de32c1bSmillert 
1347c72ea322Smillert 	fputs("@@ -", stdout);
1348c72ea322Smillert 	uni_range(lowa, upb);
1349c72ea322Smillert 	fputs(" +", stdout);
1350c72ea322Smillert 	uni_range(lowc, upd);
1351c72ea322Smillert 	fputs(" @@\n", stdout);
13529de32c1bSmillert 
13539de32c1bSmillert 	/*
13549de32c1bSmillert 	 * Output changes in "unified" diff format--the old and new lines
13559de32c1bSmillert 	 * are printed together.
13569de32c1bSmillert 	 */
13579de32c1bSmillert 	for (; cvp <= context_vec_ptr; cvp++) {
13589de32c1bSmillert 		a = cvp->a;
13599de32c1bSmillert 		b = cvp->b;
13609de32c1bSmillert 		c = cvp->c;
13619de32c1bSmillert 		d = cvp->d;
13629de32c1bSmillert 
13639de32c1bSmillert 		/*
13649de32c1bSmillert 		 * c: both new and old changes
13659de32c1bSmillert 		 * d: only changes in the old file
13669de32c1bSmillert 		 * a: only changes in the new file
13679de32c1bSmillert 		 */
13689de32c1bSmillert 		if (a <= b && c <= d)
13699de32c1bSmillert 			ch = 'c';
13709de32c1bSmillert 		else
13719de32c1bSmillert 			ch = (a <= b) ? 'd' : 'a';
13729de32c1bSmillert 
13739de32c1bSmillert 		switch (ch) {
13749de32c1bSmillert 		case 'c':
13751f9aa9e0Smillert 			fetch(ixold, lowa, a - 1, f1, ' ', 0);
13761f9aa9e0Smillert 			fetch(ixold, a, b, f1, '-', 0);
13771f9aa9e0Smillert 			fetch(ixnew, c, d, f2, '+', 0);
13789de32c1bSmillert 			break;
13799de32c1bSmillert 		case 'd':
13801f9aa9e0Smillert 			fetch(ixold, lowa, a - 1, f1, ' ', 0);
13811f9aa9e0Smillert 			fetch(ixold, a, b, f1, '-', 0);
13829de32c1bSmillert 			break;
13839de32c1bSmillert 		case 'a':
13841f9aa9e0Smillert 			fetch(ixnew, lowc, c - 1, f2, ' ', 0);
13851f9aa9e0Smillert 			fetch(ixnew, c, d, f2, '+', 0);
13869de32c1bSmillert 			break;
13879de32c1bSmillert 		}
13889de32c1bSmillert 		lowa = b + 1;
13899de32c1bSmillert 		lowc = d + 1;
13909de32c1bSmillert 	}
13911f9aa9e0Smillert 	fetch(ixnew, d + 1, upd, f2, ' ', 0);
13929de32c1bSmillert 
13939de32c1bSmillert 	context_vec_ptr = context_vec_start - 1;
13949de32c1bSmillert }
1395