1*b7041c07Sderaadt /* $OpenBSD: diffreg.c,v 1.95 2021/10/24 21:24:16 deraadt Exp $ */
2d0c3f575Sderaadt
3d0c3f575Sderaadt /*
4d0c3f575Sderaadt * Copyright (C) Caldera International Inc. 2001-2002.
5d0c3f575Sderaadt * All rights reserved.
6d0c3f575Sderaadt *
7d0c3f575Sderaadt * Redistribution and use in source and binary forms, with or without
8d0c3f575Sderaadt * modification, are permitted provided that the following conditions
9d0c3f575Sderaadt * are met:
10d0c3f575Sderaadt * 1. Redistributions of source code and documentation must retain the above
11d0c3f575Sderaadt * copyright notice, this list of conditions and the following disclaimer.
12d0c3f575Sderaadt * 2. Redistributions in binary form must reproduce the above copyright
13d0c3f575Sderaadt * notice, this list of conditions and the following disclaimer in the
14d0c3f575Sderaadt * documentation and/or other materials provided with the distribution.
15d0c3f575Sderaadt * 3. All advertising materials mentioning features or use of this software
16d0c3f575Sderaadt * must display the following acknowledgement:
17d0c3f575Sderaadt * This product includes software developed or owned by Caldera
18d0c3f575Sderaadt * International, Inc.
19d0c3f575Sderaadt * 4. Neither the name of Caldera International, Inc. nor the names of other
20d0c3f575Sderaadt * contributors may be used to endorse or promote products derived from
21d0c3f575Sderaadt * this software without specific prior written permission.
22d0c3f575Sderaadt *
23d0c3f575Sderaadt * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24d0c3f575Sderaadt * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25d0c3f575Sderaadt * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26d0c3f575Sderaadt * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27d0c3f575Sderaadt * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28d0c3f575Sderaadt * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29d0c3f575Sderaadt * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30d0c3f575Sderaadt * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31d0c3f575Sderaadt * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32d0c3f575Sderaadt * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33d0c3f575Sderaadt * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34d0c3f575Sderaadt * POSSIBILITY OF SUCH DAMAGE.
35d0c3f575Sderaadt */
364ec4b3d5Smillert /*-
374ec4b3d5Smillert * Copyright (c) 1991, 1993
384ec4b3d5Smillert * The Regents of the University of California. All rights reserved.
394ec4b3d5Smillert *
404ec4b3d5Smillert * Redistribution and use in source and binary forms, with or without
414ec4b3d5Smillert * modification, are permitted provided that the following conditions
424ec4b3d5Smillert * are met:
434ec4b3d5Smillert * 1. Redistributions of source code must retain the above copyright
444ec4b3d5Smillert * notice, this list of conditions and the following disclaimer.
454ec4b3d5Smillert * 2. Redistributions in binary form must reproduce the above copyright
464ec4b3d5Smillert * notice, this list of conditions and the following disclaimer in the
474ec4b3d5Smillert * documentation and/or other materials provided with the distribution.
484ec4b3d5Smillert * 3. Neither the name of the University nor the names of its contributors
494ec4b3d5Smillert * may be used to endorse or promote products derived from this software
504ec4b3d5Smillert * without specific prior written permission.
514ec4b3d5Smillert *
524ec4b3d5Smillert * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
534ec4b3d5Smillert * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
544ec4b3d5Smillert * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
554ec4b3d5Smillert * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
564ec4b3d5Smillert * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
574ec4b3d5Smillert * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
584ec4b3d5Smillert * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
594ec4b3d5Smillert * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
604ec4b3d5Smillert * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
614ec4b3d5Smillert * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
624ec4b3d5Smillert * SUCH DAMAGE.
634ec4b3d5Smillert *
644ec4b3d5Smillert * @(#)diffreg.c 8.1 (Berkeley) 6/6/93
654ec4b3d5Smillert */
664ec4b3d5Smillert
674ec4b3d5Smillert #include <sys/stat.h>
68b4bca33fSmillert #include <sys/wait.h>
6926da422aStedu
704ec4b3d5Smillert #include <ctype.h>
714ec4b3d5Smillert #include <err.h>
727b6ec9e4Smillert #include <errno.h>
7326da422aStedu #include <fcntl.h>
7440e7295bSmillert #include <paths.h>
750efcb26eSmillert #include <stddef.h>
761357284aSmillert #include <stdint.h>
774ec4b3d5Smillert #include <stdio.h>
784ec4b3d5Smillert #include <stdlib.h>
7926da422aStedu #include <string.h>
804ec4b3d5Smillert #include <unistd.h>
81b9fc9a72Sderaadt #include <limits.h>
82ae8d569bSderaadt
83ae8d569bSderaadt #include "diff.h"
844a034c3aSray #include "xmalloc.h"
8526da422aStedu
86b9fc9a72Sderaadt #define MINIMUM(a, b) (((a) < (b)) ? (a) : (b))
87b9fc9a72Sderaadt #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b))
88b9fc9a72Sderaadt
89ae8d569bSderaadt /*
90ae8d569bSderaadt * diff - compare two files.
91ae8d569bSderaadt */
92ae8d569bSderaadt
93ae8d569bSderaadt /*
94ae8d569bSderaadt * Uses an algorithm due to Harold Stone, which finds
95ae8d569bSderaadt * a pair of longest identical subsequences in the two
96ae8d569bSderaadt * files.
97ae8d569bSderaadt *
98ae8d569bSderaadt * The major goal is to generate the match vector J.
99ae8d569bSderaadt * J[i] is the index of the line in file1 corresponding
100ae8d569bSderaadt * to line i file0. J[i] = 0 if there is no
101ae8d569bSderaadt * such line in file1.
102ae8d569bSderaadt *
103ae8d569bSderaadt * Lines are hashed so as to work in core. All potential
104ae8d569bSderaadt * matches are located by sorting the lines of each file
105ae8d569bSderaadt * on the hash (called ``value''). In particular, this
106ae8d569bSderaadt * collects the equivalence classes in file1 together.
107ae8d569bSderaadt * Subroutine equiv replaces the value of each line in
108ae8d569bSderaadt * file0 by the index of the first element of its
109ae8d569bSderaadt * matching equivalence in (the reordered) file1.
110ae8d569bSderaadt * To save space equiv squeezes file1 into a single
111ae8d569bSderaadt * array member in which the equivalence classes
112ae8d569bSderaadt * are simply concatenated, except that their first
113ae8d569bSderaadt * members are flagged by changing sign.
114ae8d569bSderaadt *
115ae8d569bSderaadt * Next the indices that point into member are unsorted into
116ae8d569bSderaadt * array class according to the original order of file0.
117ae8d569bSderaadt *
118ae8d569bSderaadt * The cleverness lies in routine stone. This marches
119ae8d569bSderaadt * through the lines of file0, developing a vector klist
120ae8d569bSderaadt * of "k-candidates". At step i a k-candidate is a matched
121ae8d569bSderaadt * pair of lines x,y (x in file0 y in file1) such that
122ae8d569bSderaadt * there is a common subsequence of length k
123ae8d569bSderaadt * between the first i lines of file0 and the first y
124ae8d569bSderaadt * lines of file1, but there is no such subsequence for
125ae8d569bSderaadt * any smaller y. x is the earliest possible mate to y
126ae8d569bSderaadt * that occurs in such a subsequence.
127ae8d569bSderaadt *
128ae8d569bSderaadt * Whenever any of the members of the equivalence class of
129ae8d569bSderaadt * lines in file1 matable to a line in file0 has serial number
130ae8d569bSderaadt * less than the y of some k-candidate, that k-candidate
131ae8d569bSderaadt * with the smallest such y is replaced. The new
132ae8d569bSderaadt * k-candidate is chained (via pred) to the current
133ae8d569bSderaadt * k-1 candidate so that the actual subsequence can
134ae8d569bSderaadt * be recovered. When a member has serial number greater
135ae8d569bSderaadt * that the y of all k-candidates, the klist is extended.
136ae8d569bSderaadt * At the end, the longest subsequence is pulled out
137ae8d569bSderaadt * and placed in the array J by unravel
138ae8d569bSderaadt *
139ae8d569bSderaadt * With J in hand, the matches there recorded are
140ae8d569bSderaadt * check'ed against reality to assure that no spurious
141ae8d569bSderaadt * matches have crept in due to hashing. If they have,
142ae8d569bSderaadt * they are broken, and "jackpot" is recorded--a harmless
143ae8d569bSderaadt * matter except that a true match for a spuriously
144ae8d569bSderaadt * mated line may now be unnecessarily reported as a change.
145ae8d569bSderaadt *
146ae8d569bSderaadt * Much of the complexity of the program comes simply
147ae8d569bSderaadt * from trying to minimize core utilization and
148ae8d569bSderaadt * maximize the range of doable problems by dynamically
149ae8d569bSderaadt * allocating what is needed and reusing what is not.
150ae8d569bSderaadt * The core requirements for problems larger than somewhat
151ae8d569bSderaadt * are (in words) 2*length(file0) + length(file1) +
152ae8d569bSderaadt * 3*(number of k-candidates installed), typically about
153ae8d569bSderaadt * 6n words for files of length n.
154ae8d569bSderaadt */
155ae8d569bSderaadt
156ae8d569bSderaadt struct cand {
157ae8d569bSderaadt int x;
158ae8d569bSderaadt int y;
159ae8d569bSderaadt int pred;
16060b9d8fdSderaadt };
16126da422aStedu
162ae8d569bSderaadt struct line {
163ae8d569bSderaadt int serial;
164ae8d569bSderaadt int value;
16592af4c2dStedu } *file[2];
16626da422aStedu
1670efcb26eSmillert /*
1680efcb26eSmillert * The following struct is used to record change information when
1690efcb26eSmillert * doing a "context" or "unified" diff. (see routine "change" to
1700efcb26eSmillert * understand the highly mnemonic field names)
1710efcb26eSmillert */
1720efcb26eSmillert struct context_vec {
1730efcb26eSmillert int a; /* start line in old file */
1740efcb26eSmillert int b; /* end line in old file */
1750efcb26eSmillert int c; /* start line in new file */
1760efcb26eSmillert int d; /* end line in new file */
1770efcb26eSmillert };
1780efcb26eSmillert
1795e07282dSray #define diff_output printf
1807b6ec9e4Smillert static FILE *opentemp(const char *);
181ac73e8e6Smillert static void output(char *, FILE *, char *, FILE *, int);
1826fc40daeSray static void check(FILE *, FILE *, int);
18326da422aStedu static void range(int, int, char *);
184c72ea322Smillert static void uni_range(int, int);
185dba1d6eaSray static void dump_context_vec(FILE *, FILE *, int);
186dba1d6eaSray static void dump_unified_vec(FILE *, FILE *, int);
187dba1d6eaSray static void prepare(int, FILE *, off_t, int);
18826da422aStedu static void prune(void);
18926da422aStedu static void equiv(struct line *, int, struct line *, int, int *);
19026da422aStedu static void unravel(int);
19126da422aStedu static void unsort(struct line *, int, int *);
19261783bcdSespie static void change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
19326da422aStedu static void sort(struct line *, int);
1947bdb251cSmillert static void print_header(const char *, const char *);
195ccd55a2cSotto static int ignoreline(char *);
1964ec4b3d5Smillert static int asciifile(FILE *);
197dba1d6eaSray static int fetch(long *, int, int, FILE *, int, int, int);
19826da422aStedu static int newcand(int, int, int);
19926da422aStedu static int search(int *, int, int);
2004ec4b3d5Smillert static int skipline(FILE *);
2016e18f850Sotto static int isqrt(int);
202dba1d6eaSray static int stone(int *, int, int *, int *, int);
203dba1d6eaSray static int readhash(FILE *, int);
2044ec4b3d5Smillert static int files_differ(FILE *, FILE *, int);
20596e45528Sotto static char *match_function(const long *, int, FILE *);
206ccd55a2cSotto static char *preadline(int, size_t, off_t);
2076e18f850Sotto
208aa215433Sray static int *J; /* will be overlaid on class */
209aa215433Sray static int *class; /* will be overlaid on file[0] */
210aa215433Sray static int *klist; /* will be overlaid on file[0] after class */
211aa215433Sray static int *member; /* will be overlaid on file[1] */
212aa215433Sray static int clen;
213aa215433Sray static int inifdef; /* whether or not we are in a #ifdef block */
214aa215433Sray static int len[2];
215aa215433Sray static int pref, suff; /* length of prefix and suffix */
216aa215433Sray static int slen[2];
217aa215433Sray static int anychange;
218aa215433Sray static long *ixnew; /* will be overlaid on file[1] */
219aa215433Sray static long *ixold; /* will be overlaid on klist */
220aa215433Sray static struct cand *clist; /* merely a free storage pot for candidates */
221aa215433Sray static int clistlen; /* the length of clist */
222aa215433Sray static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */
223aa215433Sray static u_char *chrtran; /* translation table for case-folding */
224aa215433Sray static struct context_vec *context_vec_start;
225aa215433Sray static struct context_vec *context_vec_end;
226aa215433Sray static struct context_vec *context_vec_ptr;
227aa215433Sray
228aa215433Sray #define FUNCTION_CONTEXT_SIZE 55
229aa215433Sray static char lastbuf[FUNCTION_CONTEXT_SIZE];
230aa215433Sray static int lastline;
231aa215433Sray static int lastmatchline;
232aa215433Sray
23326da422aStedu
23426da422aStedu /*
23526da422aStedu * chrtran points to one of 2 translation tables: cup2low if folding upper to
23626da422aStedu * lower case clow2low if not folding case
237ae8d569bSderaadt */
2388a15d8deSderaadt u_char clow2low[256] = {
23926da422aStedu 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
24026da422aStedu 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
24126da422aStedu 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
24226da422aStedu 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
24326da422aStedu 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
24426da422aStedu 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
24526da422aStedu 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
24626da422aStedu 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
24726da422aStedu 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
24826da422aStedu 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
24926da422aStedu 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
25026da422aStedu 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
25126da422aStedu 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
25226da422aStedu 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
25326da422aStedu 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
25426da422aStedu 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
25526da422aStedu 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
25626da422aStedu 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
25726da422aStedu 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
25826da422aStedu 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
25926da422aStedu 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
26026da422aStedu 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
26126da422aStedu 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
26226da422aStedu 0xfd, 0xfe, 0xff
263ae8d569bSderaadt };
264ae8d569bSderaadt
2658a15d8deSderaadt u_char cup2low[256] = {
26626da422aStedu 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
26726da422aStedu 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
26826da422aStedu 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
26926da422aStedu 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
27026da422aStedu 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
27126da422aStedu 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
27226da422aStedu 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
27326da422aStedu 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
27426da422aStedu 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
27526da422aStedu 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
27626da422aStedu 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
27726da422aStedu 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
27826da422aStedu 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
27926da422aStedu 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
28026da422aStedu 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
28126da422aStedu 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
28226da422aStedu 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
28326da422aStedu 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
28426da422aStedu 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
28526da422aStedu 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
28626da422aStedu 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
28726da422aStedu 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
28826da422aStedu 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
28926da422aStedu 0xfd, 0xfe, 0xff
290ae8d569bSderaadt };
291ae8d569bSderaadt
292b4bca33fSmillert int
diffreg(char * file1,char * file2,int flags)29357003866Sray diffreg(char *file1, char *file2, int flags)
294ae8d569bSderaadt {
295dba1d6eaSray FILE *f1, *f2;
29640e7295bSmillert int i, rval;
297ae8d569bSderaadt
298dba1d6eaSray f1 = f2 = NULL;
299dba1d6eaSray rval = D_SAME;
3004ec4b3d5Smillert anychange = 0;
30196e45528Sotto lastline = 0;
30296e45528Sotto lastmatchline = 0;
3030efcb26eSmillert context_vec_ptr = context_vec_start - 1;
304dba1d6eaSray if (flags & D_IGNORECASE)
305dba1d6eaSray chrtran = cup2low;
306dba1d6eaSray else
307dba1d6eaSray chrtran = clow2low;
3087b6ec9e4Smillert if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
309fed3a06dSmillert return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
31066e5764eSmillert if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
311f28259e9Smillert goto closem;
3124ec4b3d5Smillert
3134ec4b3d5Smillert if (flags & D_EMPTY1)
3144ec4b3d5Smillert f1 = fopen(_PATH_DEVNULL, "r");
3154ec4b3d5Smillert else {
3167b6ec9e4Smillert if (!S_ISREG(stb1.st_mode)) {
3177b6ec9e4Smillert if ((f1 = opentemp(file1)) == NULL ||
3183aaa63ebSderaadt fstat(fileno(f1), &stb1) == -1) {
3194ec4b3d5Smillert warn("%s", file1);
3204ec4b3d5Smillert status |= 2;
3214ec4b3d5Smillert goto closem;
32248b947b7Smillert }
3237b6ec9e4Smillert } else if (strcmp(file1, "-") == 0)
324b1a26502Smillert f1 = stdin;
325b1a26502Smillert else
3264ec4b3d5Smillert f1 = fopen(file1, "r");
3274ec4b3d5Smillert }
3284ec4b3d5Smillert if (f1 == NULL) {
3294ec4b3d5Smillert warn("%s", file1);
3304ec4b3d5Smillert status |= 2;
3314ec4b3d5Smillert goto closem;
3324ec4b3d5Smillert }
3334ec4b3d5Smillert
3344ec4b3d5Smillert if (flags & D_EMPTY2)
3354ec4b3d5Smillert f2 = fopen(_PATH_DEVNULL, "r");
3364ec4b3d5Smillert else {
3377b6ec9e4Smillert if (!S_ISREG(stb2.st_mode)) {
3387b6ec9e4Smillert if ((f2 = opentemp(file2)) == NULL ||
3393aaa63ebSderaadt fstat(fileno(f2), &stb2) == -1) {
3404ec4b3d5Smillert warn("%s", file2);
3414ec4b3d5Smillert status |= 2;
3424ec4b3d5Smillert goto closem;
3434ec4b3d5Smillert }
3447b6ec9e4Smillert } else if (strcmp(file2, "-") == 0)
345b1a26502Smillert f2 = stdin;
346b1a26502Smillert else
3474ec4b3d5Smillert f2 = fopen(file2, "r");
3484ec4b3d5Smillert }
3494ec4b3d5Smillert if (f2 == NULL) {
3504ec4b3d5Smillert warn("%s", file2);
3514ec4b3d5Smillert status |= 2;
3524ec4b3d5Smillert goto closem;
3534ec4b3d5Smillert }
3544ec4b3d5Smillert
3554ec4b3d5Smillert switch (files_differ(f1, f2, flags)) {
3564ec4b3d5Smillert case 0:
357b4bca33fSmillert goto closem;
3584ec4b3d5Smillert case 1:
3594ec4b3d5Smillert break;
3604ec4b3d5Smillert default:
3614ec4b3d5Smillert /* error */
3624ec4b3d5Smillert status |= 2;
3634ec4b3d5Smillert goto closem;
364ae8d569bSderaadt }
3654ec4b3d5Smillert
366dba1d6eaSray if ((flags & D_FORCEASCII) == 0 &&
367dba1d6eaSray (!asciifile(f1) || !asciifile(f2))) {
368b4bca33fSmillert rval = D_BINARY;
3695f9fc8aaSmillert status |= 1;
370cab5d83cSmillert goto closem;
371cab5d83cSmillert }
372dba1d6eaSray prepare(0, f1, stb1.st_size, flags);
373dba1d6eaSray prepare(1, f2, stb2.st_size, flags);
37457003866Sray
375ae8d569bSderaadt prune();
376ae8d569bSderaadt sort(sfile[0], slen[0]);
377ae8d569bSderaadt sort(sfile[1], slen[1]);
378ae8d569bSderaadt
379ae8d569bSderaadt member = (int *)file[1];
380ae8d569bSderaadt equiv(sfile[0], slen[0], sfile[1], slen[1], member);
381371275caSderaadt member = xreallocarray(member, slen[1] + 2, sizeof(*member));
382ae8d569bSderaadt
383ae8d569bSderaadt class = (int *)file[0];
384ae8d569bSderaadt unsort(sfile[0], slen[0], class);
385371275caSderaadt class = xreallocarray(class, slen[0] + 2, sizeof(*class));
386ae8d569bSderaadt
38757003866Sray klist = xcalloc(slen[0] + 2, sizeof(*klist));
388058e94f4Smillert clen = 0;
3896e18f850Sotto clistlen = 100;
39057003866Sray clist = xcalloc(clistlen, sizeof(*clist));
391dba1d6eaSray i = stone(class, slen[0], member, klist, flags);
392a79a2617Stedu free(member);
393a79a2617Stedu free(class);
394ae8d569bSderaadt
395371275caSderaadt J = xreallocarray(J, len[0] + 2, sizeof(*J));
396ae8d569bSderaadt unravel(klist[i]);
397a79a2617Stedu free(clist);
398a79a2617Stedu free(klist);
399ae8d569bSderaadt
400371275caSderaadt ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold));
401371275caSderaadt ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew));
4026fc40daeSray check(f1, f2, flags);
403dba1d6eaSray output(file1, f1, file2, f2, flags);
4044ec4b3d5Smillert closem:
4055afc3be2Smillert if (anychange) {
4065afc3be2Smillert status |= 1;
4075afc3be2Smillert if (rval == D_SAME)
4085afc3be2Smillert rval = D_DIFFER;
4095afc3be2Smillert }
4104ec4b3d5Smillert if (f1 != NULL)
4114ec4b3d5Smillert fclose(f1);
4124ec4b3d5Smillert if (f2 != NULL)
4134ec4b3d5Smillert fclose(f2);
414dba1d6eaSray
415b4bca33fSmillert return (rval);
4164ec4b3d5Smillert }
4174ec4b3d5Smillert
4184ec4b3d5Smillert /*
4194ec4b3d5Smillert * Check to see if the given files differ.
4204ec4b3d5Smillert * Returns 0 if they are the same, 1 if different, and -1 on error.
4214ec4b3d5Smillert * XXX - could use code from cmp(1) [faster]
4224ec4b3d5Smillert */
4234ec4b3d5Smillert static int
files_differ(FILE * f1,FILE * f2,int flags)4244ec4b3d5Smillert files_differ(FILE *f1, FILE *f2, int flags)
4254ec4b3d5Smillert {
4264ec4b3d5Smillert char buf1[BUFSIZ], buf2[BUFSIZ];
4274ec4b3d5Smillert size_t i, j;
4284ec4b3d5Smillert
4294ec4b3d5Smillert if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
4304ec4b3d5Smillert (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
4314ec4b3d5Smillert return (1);
432a05fc07dShalex
433a05fc07dShalex if (stb1.st_dev == stb2.st_dev && stb1.st_ino == stb2.st_ino)
434a05fc07dShalex return (0);
435a05fc07dShalex
4364ec4b3d5Smillert for (;;) {
4374ec4b3d5Smillert i = fread(buf1, 1, sizeof(buf1), f1);
4384ec4b3d5Smillert j = fread(buf2, 1, sizeof(buf2), f2);
439bdcce04dSray if ((!i && ferror(f1)) || (!j && ferror(f2)))
440bdcce04dSray return (-1);
4414ec4b3d5Smillert if (i != j)
4424ec4b3d5Smillert return (1);
443bdcce04dSray if (i == 0)
4444ec4b3d5Smillert return (0);
4454ec4b3d5Smillert if (memcmp(buf1, buf2, i) != 0)
4464ec4b3d5Smillert return (1);
4474ec4b3d5Smillert }
448ae8d569bSderaadt }
449ae8d569bSderaadt
4507b6ec9e4Smillert static FILE *
opentemp(const char * file)4517b6ec9e4Smillert opentemp(const char *file)
452ae8d569bSderaadt {
4534c413bf6Stedu char buf[BUFSIZ], tempfile[PATH_MAX];
4547b6ec9e4Smillert ssize_t nread;
4557b6ec9e4Smillert int ifd, ofd;
45648b947b7Smillert
45748b947b7Smillert if (strcmp(file, "-") == 0)
45848b947b7Smillert ifd = STDIN_FILENO;
459*b7041c07Sderaadt else if ((ifd = open(file, O_RDONLY)) == -1)
4604ec4b3d5Smillert return (NULL);
46148b947b7Smillert
4624c413bf6Stedu (void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
4637b6ec9e4Smillert
464c2d43ecaSderaadt if ((ofd = mkstemp(tempfile)) == -1) {
465b5188ac1Sschwarze close(ifd);
4667b6ec9e4Smillert return (NULL);
467b5188ac1Sschwarze }
4687b6ec9e4Smillert unlink(tempfile);
4697b6ec9e4Smillert while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
4707b6ec9e4Smillert if (write(ofd, buf, nread) != nread) {
47148b947b7Smillert close(ifd);
47248b947b7Smillert close(ofd);
4737b6ec9e4Smillert return (NULL);
4747b6ec9e4Smillert }
4757b6ec9e4Smillert }
4767b6ec9e4Smillert close(ifd);
477f28259e9Smillert lseek(ofd, (off_t)0, SEEK_SET);
4787b6ec9e4Smillert return (fdopen(ofd, "r"));
479ae8d569bSderaadt }
480ae8d569bSderaadt
481ae8d569bSderaadt char *
splice(char * dir,char * file)48226da422aStedu splice(char *dir, char *file)
483ae8d569bSderaadt {
48449dffe13Smillert char *tail, *buf;
48578245330Smillert size_t dirlen;
486ae8d569bSderaadt
48778245330Smillert dirlen = strlen(dir);
48878245330Smillert while (dirlen != 0 && dir[dirlen - 1] == '/')
48978245330Smillert dirlen--;
4907b6ec9e4Smillert if ((tail = strrchr(file, '/')) == NULL)
491ae8d569bSderaadt tail = file;
492ae8d569bSderaadt else
493ae8d569bSderaadt tail++;
49478245330Smillert xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
49549dffe13Smillert return (buf);
496ae8d569bSderaadt }
497ae8d569bSderaadt
49826da422aStedu static void
prepare(int i,FILE * fd,off_t filesize,int flags)499dba1d6eaSray prepare(int i, FILE *fd, off_t filesize, int flags)
500ae8d569bSderaadt {
50126da422aStedu struct line *p;
50226da422aStedu int j, h;
503739e7267Sotto size_t sz;
504ae8d569bSderaadt
5054ec4b3d5Smillert rewind(fd);
506739e7267Sotto
507739e7267Sotto sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
508739e7267Sotto if (sz < 100)
509a8013e93Sotto sz = 100;
510739e7267Sotto
51157003866Sray p = xcalloc(sz + 3, sizeof(*p));
512dba1d6eaSray for (j = 0; (h = readhash(fd, flags));) {
513a8013e93Sotto if (j == sz) {
514a8013e93Sotto sz = sz * 3 / 2;
515371275caSderaadt p = xreallocarray(p, sz + 3, sizeof(*p));
516a8013e93Sotto }
517a8013e93Sotto p[++j].value = h;
518ae8d569bSderaadt }
519ae8d569bSderaadt len[i] = j;
520ae8d569bSderaadt file[i] = p;
521ae8d569bSderaadt }
522ae8d569bSderaadt
52326da422aStedu static void
prune(void)52426da422aStedu prune(void)
525ae8d569bSderaadt {
52626da422aStedu int i, j;
52748b8c3e3Sderaadt
528ae8d569bSderaadt for (pref = 0; pref < len[0] && pref < len[1] &&
529ae8d569bSderaadt file[0][pref + 1].value == file[1][pref + 1].value;
5307d2b2b91Sderaadt pref++)
5317d2b2b91Sderaadt ;
532ae8d569bSderaadt for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
533ae8d569bSderaadt file[0][len[0] - suff].value == file[1][len[1] - suff].value;
5347d2b2b91Sderaadt suff++)
5357d2b2b91Sderaadt ;
536ae8d569bSderaadt for (j = 0; j < 2; j++) {
537ae8d569bSderaadt sfile[j] = file[j] + pref;
538ae8d569bSderaadt slen[j] = len[j] - pref - suff;
539ae8d569bSderaadt for (i = 0; i <= slen[j]; i++)
540ae8d569bSderaadt sfile[j][i].serial = i;
541ae8d569bSderaadt }
542ae8d569bSderaadt }
543ae8d569bSderaadt
54426da422aStedu static void
equiv(struct line * a,int n,struct line * b,int m,int * c)54526da422aStedu equiv(struct line *a, int n, struct line *b, int m, int *c)
546ae8d569bSderaadt {
54726da422aStedu int i, j;
54848b8c3e3Sderaadt
549ae8d569bSderaadt i = j = 1;
550ae8d569bSderaadt while (i <= n && j <= m) {
551ae8d569bSderaadt if (a[i].value < b[j].value)
552ae8d569bSderaadt a[i++].value = 0;
553ae8d569bSderaadt else if (a[i].value == b[j].value)
554ae8d569bSderaadt a[i++].value = j;
555ae8d569bSderaadt else
556ae8d569bSderaadt j++;
557ae8d569bSderaadt }
558ae8d569bSderaadt while (i <= n)
559ae8d569bSderaadt a[i++].value = 0;
560ae8d569bSderaadt b[m + 1].value = 0;
561ae8d569bSderaadt j = 0;
562ae8d569bSderaadt while (++j <= m) {
563ae8d569bSderaadt c[j] = -b[j].serial;
564ae8d569bSderaadt while (b[j + 1].value == b[j].value) {
565ae8d569bSderaadt j++;
566ae8d569bSderaadt c[j] = b[j].serial;
567ae8d569bSderaadt }
568ae8d569bSderaadt }
569ae8d569bSderaadt c[j] = -1;
570ae8d569bSderaadt }
571ae8d569bSderaadt
5726e18f850Sotto /* Code taken from ping.c */
5736e18f850Sotto static int
isqrt(int n)5746e18f850Sotto isqrt(int n)
5756e18f850Sotto {
5766e18f850Sotto int y, x = 1;
5776e18f850Sotto
5786e18f850Sotto if (n == 0)
5796e18f850Sotto return (0);
5806e18f850Sotto
5816e18f850Sotto do { /* newton was a stinker */
5826e18f850Sotto y = x;
5836e18f850Sotto x = n / x;
5846e18f850Sotto x += y;
5856e18f850Sotto x /= 2;
5866e18f850Sotto } while ((x - y) > 1 || (x - y) < -1);
5876e18f850Sotto
5886e18f850Sotto return (x);
5896e18f850Sotto }
5906e18f850Sotto
59126da422aStedu static int
stone(int * a,int n,int * b,int * c,int flags)592dba1d6eaSray stone(int *a, int n, int *b, int *c, int flags)
593ae8d569bSderaadt {
59448b8c3e3Sderaadt int i, k, y, j, l;
595df890a16Snicm int oldc, tc, oldl, sq;
596df890a16Snicm u_int numtries, bound;
5976e18f850Sotto
598df890a16Snicm if (flags & D_MINIMAL)
599df890a16Snicm bound = UINT_MAX;
600df890a16Snicm else {
601df890a16Snicm sq = isqrt(n);
602b9fc9a72Sderaadt bound = MAXIMUM(256, sq);
603df890a16Snicm }
60448b8c3e3Sderaadt
605ae8d569bSderaadt k = 0;
606ae8d569bSderaadt c[0] = newcand(0, 0, 0);
607ae8d569bSderaadt for (i = 1; i <= n; i++) {
608ae8d569bSderaadt j = a[i];
609ae8d569bSderaadt if (j == 0)
610ae8d569bSderaadt continue;
611ae8d569bSderaadt y = -b[j];
612ae8d569bSderaadt oldl = 0;
613ae8d569bSderaadt oldc = c[0];
6142a89a2f7Smillert numtries = 0;
615ae8d569bSderaadt do {
616ae8d569bSderaadt if (y <= clist[oldc].y)
617ae8d569bSderaadt continue;
618ae8d569bSderaadt l = search(c, k, y);
619ae8d569bSderaadt if (l != oldl + 1)
620ae8d569bSderaadt oldc = c[l - 1];
621ae8d569bSderaadt if (l <= k) {
622ae8d569bSderaadt if (clist[c[l]].y <= y)
623ae8d569bSderaadt continue;
624ae8d569bSderaadt tc = c[l];
625ae8d569bSderaadt c[l] = newcand(i, y, oldc);
626ae8d569bSderaadt oldc = tc;
627ae8d569bSderaadt oldl = l;
6282a89a2f7Smillert numtries++;
629ae8d569bSderaadt } else {
630ae8d569bSderaadt c[l] = newcand(i, y, oldc);
631ae8d569bSderaadt k++;
632ae8d569bSderaadt break;
633ae8d569bSderaadt }
6342a89a2f7Smillert } while ((y = b[++j]) > 0 && numtries < bound);
635ae8d569bSderaadt }
636ae8d569bSderaadt return (k);
637ae8d569bSderaadt }
638ae8d569bSderaadt
63926da422aStedu static int
newcand(int x,int y,int pred)64026da422aStedu newcand(int x, int y, int pred)
641ae8d569bSderaadt {
64226da422aStedu struct cand *q;
64326da422aStedu
6446e18f850Sotto if (clen == clistlen) {
6456e18f850Sotto clistlen = clistlen * 11 / 10;
646371275caSderaadt clist = xreallocarray(clist, clistlen, sizeof(*clist));
6476e18f850Sotto }
6486e18f850Sotto q = clist + clen;
649ae8d569bSderaadt q->x = x;
650ae8d569bSderaadt q->y = y;
651ae8d569bSderaadt q->pred = pred;
6526e18f850Sotto return (clen++);
653ae8d569bSderaadt }
654ae8d569bSderaadt
65526da422aStedu static int
search(int * c,int k,int y)65626da422aStedu search(int *c, int k, int y)
657ae8d569bSderaadt {
65848b8c3e3Sderaadt int i, j, l, t;
65948b8c3e3Sderaadt
660ae8d569bSderaadt if (clist[c[k]].y < y) /* quick look for typical case */
661ae8d569bSderaadt return (k + 1);
662ae8d569bSderaadt i = 0;
663ae8d569bSderaadt j = k + 1;
664dba1d6eaSray for (;;) {
665dba1d6eaSray l = (i + j) / 2;
666dba1d6eaSray if (l <= i)
667ae8d569bSderaadt break;
668ae8d569bSderaadt t = clist[c[l]].y;
669ae8d569bSderaadt if (t > y)
670ae8d569bSderaadt j = l;
671ae8d569bSderaadt else if (t < y)
672ae8d569bSderaadt i = l;
673ae8d569bSderaadt else
674ae8d569bSderaadt return (l);
675ae8d569bSderaadt }
676ae8d569bSderaadt return (l + 1);
677ae8d569bSderaadt }
678ae8d569bSderaadt
67926da422aStedu static void
unravel(int p)68026da422aStedu unravel(int p)
681ae8d569bSderaadt {
68226da422aStedu struct cand *q;
68348b8c3e3Sderaadt int i;
68448b8c3e3Sderaadt
685ae8d569bSderaadt for (i = 0; i <= len[0]; i++)
686ae8d569bSderaadt J[i] = i <= pref ? i :
6877d2b2b91Sderaadt i > len[0] - suff ? i + len[1] - len[0] : 0;
688ae8d569bSderaadt for (q = clist + p; q->y != 0; q = clist + q->pred)
689ae8d569bSderaadt J[q->x + pref] = q->y + pref;
690ae8d569bSderaadt }
691ae8d569bSderaadt
69226da422aStedu /*
69349dffe13Smillert * Check does double duty:
69449dffe13Smillert * 1. ferret out any fortuitous correspondences due
69549dffe13Smillert * to confounding by hashing (which result in "jackpot")
69649dffe13Smillert * 2. collect random access indexes to the two files
69726da422aStedu */
69826da422aStedu static void
check(FILE * f1,FILE * f2,int flags)6996fc40daeSray check(FILE *f1, FILE *f2, int flags)
700ae8d569bSderaadt {
70148b8c3e3Sderaadt int i, j, jackpot, c, d;
702ae8d569bSderaadt long ctold, ctnew;
703ae8d569bSderaadt
7044ec4b3d5Smillert rewind(f1);
7054ec4b3d5Smillert rewind(f2);
706ae8d569bSderaadt j = 1;
707ae8d569bSderaadt ixold[0] = ixnew[0] = 0;
708ae8d569bSderaadt jackpot = 0;
709ae8d569bSderaadt ctold = ctnew = 0;
710ae8d569bSderaadt for (i = 1; i <= len[0]; i++) {
711ae8d569bSderaadt if (J[i] == 0) {
7124ec4b3d5Smillert ixold[i] = ctold += skipline(f1);
713ae8d569bSderaadt continue;
714ae8d569bSderaadt }
715ae8d569bSderaadt while (j < J[i]) {
7164ec4b3d5Smillert ixnew[j] = ctnew += skipline(f2);
717ae8d569bSderaadt j++;
718ae8d569bSderaadt }
719dba1d6eaSray if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
720ae8d569bSderaadt for (;;) {
7214ec4b3d5Smillert c = getc(f1);
7224ec4b3d5Smillert d = getc(f2);
723bb34d48bSmillert /*
724bb34d48bSmillert * GNU diff ignores a missing newline
725dba1d6eaSray * in one file for -b or -w.
726bb34d48bSmillert */
72782328041Skspillner if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
72882328041Skspillner if (c == EOF && d == '\n') {
72982328041Skspillner ctnew++;
730bb34d48bSmillert break;
73182328041Skspillner } else if (c == '\n' && d == EOF) {
73282328041Skspillner ctold++;
73382328041Skspillner break;
73482328041Skspillner }
735bb34d48bSmillert }
736ae8d569bSderaadt ctold++;
737ae8d569bSderaadt ctnew++;
738dba1d6eaSray if ((flags & D_FOLDBLANKS) && isspace(c) &&
739dba1d6eaSray isspace(d)) {
740ae8d569bSderaadt do {
741ae8d569bSderaadt if (c == '\n')
742ae8d569bSderaadt break;
743ae8d569bSderaadt ctold++;
7444ec4b3d5Smillert } while (isspace(c = getc(f1)));
745ae8d569bSderaadt do {
746ae8d569bSderaadt if (d == '\n')
747ae8d569bSderaadt break;
748ae8d569bSderaadt ctnew++;
7494ec4b3d5Smillert } while (isspace(d = getc(f2)));
750dba1d6eaSray } else if ((flags & D_IGNOREBLANKS)) {
751ae8d569bSderaadt while (isspace(c) && c != '\n') {
7524ec4b3d5Smillert c = getc(f1);
753ae8d569bSderaadt ctold++;
754ae8d569bSderaadt }
755ae8d569bSderaadt while (isspace(d) && d != '\n') {
7564ec4b3d5Smillert d = getc(f2);
757ae8d569bSderaadt ctnew++;
758ae8d569bSderaadt }
759ae8d569bSderaadt }
760ae8d569bSderaadt if (chrtran[c] != chrtran[d]) {
761ae8d569bSderaadt jackpot++;
762ae8d569bSderaadt J[i] = 0;
763bb34d48bSmillert if (c != '\n' && c != EOF)
7644ec4b3d5Smillert ctold += skipline(f1);
765bb34d48bSmillert if (d != '\n' && c != EOF)
7664ec4b3d5Smillert ctnew += skipline(f2);
767ae8d569bSderaadt break;
768ae8d569bSderaadt }
769bb34d48bSmillert if (c == '\n' || c == EOF)
770ae8d569bSderaadt break;
771ae8d569bSderaadt }
772ae8d569bSderaadt } else {
773ae8d569bSderaadt for (;;) {
774ae8d569bSderaadt ctold++;
775ae8d569bSderaadt ctnew++;
7764ec4b3d5Smillert if ((c = getc(f1)) != (d = getc(f2))) {
777ae8d569bSderaadt /* jackpot++; */
778ae8d569bSderaadt J[i] = 0;
779bb34d48bSmillert if (c != '\n' && c != EOF)
7804ec4b3d5Smillert ctold += skipline(f1);
781bb34d48bSmillert if (d != '\n' && c != EOF)
7824ec4b3d5Smillert ctnew += skipline(f2);
783ae8d569bSderaadt break;
784ae8d569bSderaadt }
785bb34d48bSmillert if (c == '\n' || c == EOF)
786ae8d569bSderaadt break;
787ae8d569bSderaadt }
788ae8d569bSderaadt }
789ae8d569bSderaadt ixold[i] = ctold;
790ae8d569bSderaadt ixnew[j] = ctnew;
791ae8d569bSderaadt j++;
792ae8d569bSderaadt }
7934ec4b3d5Smillert for (; j <= len[1]; j++)
7944ec4b3d5Smillert ixnew[j] = ctnew += skipline(f2);
795ae8d569bSderaadt /*
79648b8c3e3Sderaadt * if (jackpot)
79748b8c3e3Sderaadt * fprintf(stderr, "jackpot\n");
798ae8d569bSderaadt */
799ae8d569bSderaadt }
800ae8d569bSderaadt
80148b8c3e3Sderaadt /* shellsort CACM #201 */
80226da422aStedu static void
sort(struct line * a,int n)80326da422aStedu sort(struct line *a, int n)
80448b8c3e3Sderaadt {
80548b8c3e3Sderaadt struct line *ai, *aim, w;
80648b8c3e3Sderaadt int j, m = 0, k;
807ae8d569bSderaadt
808ae8d569bSderaadt if (n == 0)
809ae8d569bSderaadt return;
810ae8d569bSderaadt for (j = 1; j <= n; j *= 2)
811ae8d569bSderaadt m = 2 * j - 1;
812ae8d569bSderaadt for (m /= 2; m != 0; m /= 2) {
813ae8d569bSderaadt k = n - m;
814ae8d569bSderaadt for (j = 1; j <= k; j++) {
815ae8d569bSderaadt for (ai = &a[j]; ai > a; ai -= m) {
816ae8d569bSderaadt aim = &ai[m];
817ae8d569bSderaadt if (aim < ai)
818ae8d569bSderaadt break; /* wraparound */
819ae8d569bSderaadt if (aim->value > ai[0].value ||
82026da422aStedu (aim->value == ai[0].value &&
82126da422aStedu aim->serial > ai[0].serial))
822ae8d569bSderaadt break;
823ae8d569bSderaadt w.value = ai[0].value;
824ae8d569bSderaadt ai[0].value = aim->value;
825ae8d569bSderaadt aim->value = w.value;
826ae8d569bSderaadt w.serial = ai[0].serial;
827ae8d569bSderaadt ai[0].serial = aim->serial;
828ae8d569bSderaadt aim->serial = w.serial;
829ae8d569bSderaadt }
830ae8d569bSderaadt }
831ae8d569bSderaadt }
832ae8d569bSderaadt }
833ae8d569bSderaadt
83426da422aStedu static void
unsort(struct line * f,int l,int * b)83526da422aStedu unsort(struct line *f, int l, int *b)
836ae8d569bSderaadt {
83748b8c3e3Sderaadt int *a, i;
83826da422aStedu
83957003866Sray a = xcalloc(l + 1, sizeof(*a));
840ae8d569bSderaadt for (i = 1; i <= l; i++)
841ae8d569bSderaadt a[f[i].serial] = f[i].value;
842ae8d569bSderaadt for (i = 1; i <= l; i++)
843ae8d569bSderaadt b[i] = a[i];
844a79a2617Stedu free(a);
845ae8d569bSderaadt }
846ae8d569bSderaadt
84726da422aStedu static int
skipline(FILE * f)8484ec4b3d5Smillert skipline(FILE *f)
849ae8d569bSderaadt {
85026da422aStedu int i, c;
851ae8d569bSderaadt
852bb34d48bSmillert for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
853bb34d48bSmillert continue;
854ae8d569bSderaadt return (i);
855ae8d569bSderaadt }
856ae8d569bSderaadt
85726da422aStedu static void
output(char * file1,FILE * f1,char * file2,FILE * f2,int flags)858ac73e8e6Smillert output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
859ae8d569bSderaadt {
86048b8c3e3Sderaadt int m, i0, i1, j0, j1;
86148b8c3e3Sderaadt
8624ec4b3d5Smillert rewind(f1);
8634ec4b3d5Smillert rewind(f2);
864ae8d569bSderaadt m = len[0];
865ae8d569bSderaadt J[0] = 0;
866ae8d569bSderaadt J[m + 1] = len[1] + 1;
86757003866Sray if (diff_format != D_EDIT) {
86826da422aStedu for (i0 = 1; i0 <= m; i0 = i1 + 1) {
86926da422aStedu while (i0 <= m && J[i0] == J[i0 - 1] + 1)
87026da422aStedu i0++;
871ae8d569bSderaadt j0 = J[i0 - 1] + 1;
872ae8d569bSderaadt i1 = i0 - 1;
87326da422aStedu while (i1 < m && J[i1 + 1] == 0)
87426da422aStedu i1++;
875ae8d569bSderaadt j1 = J[i1 + 1] - 1;
876ae8d569bSderaadt J[i1] = j1;
87761783bcdSespie change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
87826da422aStedu }
87926da422aStedu } else {
88026da422aStedu for (i0 = m; i0 >= 1; i0 = i1 - 1) {
88126da422aStedu while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
88226da422aStedu i0--;
883ae8d569bSderaadt j0 = J[i0 + 1] - 1;
884ae8d569bSderaadt i1 = i0 + 1;
88526da422aStedu while (i1 > 1 && J[i1 - 1] == 0)
88626da422aStedu i1--;
887ae8d569bSderaadt j1 = J[i1 - 1] + 1;
888ae8d569bSderaadt J[i1] = j1;
88961783bcdSespie change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
890ae8d569bSderaadt }
89126da422aStedu }
892ae8d569bSderaadt if (m == 0)
89361783bcdSespie change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
89457003866Sray if (diff_format == D_IFDEF) {
895ae8d569bSderaadt for (;;) {
896ae8d569bSderaadt #define c i0
897bb34d48bSmillert if ((c = getc(f1)) == EOF)
898ae8d569bSderaadt return;
8995e07282dSray diff_output("%c", c);
900ae8d569bSderaadt }
901ae8d569bSderaadt #undef c
902ae8d569bSderaadt }
9039de32c1bSmillert if (anychange != 0) {
90457003866Sray if (diff_format == D_CONTEXT)
905dba1d6eaSray dump_context_vec(f1, f2, flags);
90657003866Sray else if (diff_format == D_UNIFIED)
907dba1d6eaSray dump_unified_vec(f1, f2, flags);
9089de32c1bSmillert }
909ae8d569bSderaadt }
910ae8d569bSderaadt
9114a034c3aSray static void
range(int a,int b,char * separator)912c72ea322Smillert range(int a, int b, char *separator)
913c72ea322Smillert {
9145e07282dSray diff_output("%d", a > b ? b : a);
915c72ea322Smillert if (a < b)
9165e07282dSray diff_output("%s%d", separator, b);
917c72ea322Smillert }
918c72ea322Smillert
9194a034c3aSray static void
uni_range(int a,int b)920c72ea322Smillert uni_range(int a, int b)
921c72ea322Smillert {
922c72ea322Smillert if (a < b)
9235e07282dSray diff_output("%d,%d", a, b - a + 1);
924c72ea322Smillert else if (a == b)
9255e07282dSray diff_output("%d", b);
926c72ea322Smillert else
9275e07282dSray diff_output("%d,0", b);
928c72ea322Smillert }
929c72ea322Smillert
930ccd55a2cSotto static char *
preadline(int fd,size_t rlen,off_t off)931dba1d6eaSray preadline(int fd, size_t rlen, off_t off)
932ccd55a2cSotto {
933ccd55a2cSotto char *line;
934ccd55a2cSotto ssize_t nr;
935ccd55a2cSotto
936dba1d6eaSray line = xmalloc(rlen + 1);
9373aaa63ebSderaadt if ((nr = pread(fd, line, rlen, off)) == -1)
93860b54a0eSray err(2, "preadline");
9398d981b00Sotto if (nr > 0 && line[nr-1] == '\n')
9408d981b00Sotto nr--;
941ccd55a2cSotto line[nr] = '\0';
942ccd55a2cSotto return (line);
943ccd55a2cSotto }
944ccd55a2cSotto
945ccd55a2cSotto static int
ignoreline(char * line)946ccd55a2cSotto ignoreline(char *line)
947ccd55a2cSotto {
948ccd55a2cSotto int ret;
949ccd55a2cSotto
950ccd55a2cSotto ret = regexec(&ignore_re, line, 0, NULL, 0);
951a79a2617Stedu free(line);
952ccd55a2cSotto return (ret == 0); /* if it matched, it should be ignored. */
953ccd55a2cSotto }
954ccd55a2cSotto
955ae8d569bSderaadt /*
9564ec4b3d5Smillert * Indicate that there is a difference between lines a and b of the from file
95726da422aStedu * to get to lines c to d of the to file. If a is greater then b then there
95826da422aStedu * are no lines in the from file involved and this means that there were
95926da422aStedu * lines appended (beginning at b). If c is greater than d then there are
96026da422aStedu * lines missing from the to file.
961ae8d569bSderaadt */
96226da422aStedu static void
change(char * file1,FILE * f1,char * file2,FILE * f2,int a,int b,int c,int d,int * pflags)963ac73e8e6Smillert change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
96461783bcdSespie int *pflags)
965ae8d569bSderaadt {
9660efcb26eSmillert static size_t max_context = 64;
967f8a6bf23Smillert int i;
9680efcb26eSmillert
969f8a6bf23Smillert restart:
97057003866Sray if (diff_format != D_IFDEF && a > b && c > d)
971ae8d569bSderaadt return;
972ccd55a2cSotto if (ignore_pats != NULL) {
973ccd55a2cSotto char *line;
974ccd55a2cSotto /*
975ccd55a2cSotto * All lines in the change, insert, or delete must
976ccd55a2cSotto * match an ignore pattern for the change to be
977ccd55a2cSotto * ignored.
978ccd55a2cSotto */
979ccd55a2cSotto if (a <= b) { /* Changes and deletes. */
980ccd55a2cSotto for (i = a; i <= b; i++) {
981ccd55a2cSotto line = preadline(fileno(f1),
982ccd55a2cSotto ixold[i] - ixold[i - 1], ixold[i - 1]);
983ccd55a2cSotto if (!ignoreline(line))
984ccd55a2cSotto goto proceed;
985ccd55a2cSotto }
986ccd55a2cSotto }
987ccd55a2cSotto if (a > b || c <= d) { /* Changes and inserts. */
988ccd55a2cSotto for (i = c; i <= d; i++) {
989ccd55a2cSotto line = preadline(fileno(f2),
990ccd55a2cSotto ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
991ccd55a2cSotto if (!ignoreline(line))
992ccd55a2cSotto goto proceed;
993ccd55a2cSotto }
994ccd55a2cSotto }
995ccd55a2cSotto return;
996ccd55a2cSotto }
997ccd55a2cSotto proceed:
99861783bcdSespie if (*pflags & D_HEADER) {
9995e07282dSray diff_output("%s %s %s\n", diffargs, file1, file2);
100061783bcdSespie *pflags &= ~D_HEADER;
100161783bcdSespie }
100257003866Sray if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
10030efcb26eSmillert /*
10040efcb26eSmillert * Allocate change records as needed.
10050efcb26eSmillert */
10060efcb26eSmillert if (context_vec_ptr == context_vec_end - 1) {
10070efcb26eSmillert ptrdiff_t offset = context_vec_ptr - context_vec_start;
10080efcb26eSmillert max_context <<= 1;
1009371275caSderaadt context_vec_start = xreallocarray(context_vec_start,
1010aa215433Sray max_context, sizeof(*context_vec_start));
10110efcb26eSmillert context_vec_end = context_vec_start + max_context;
10120efcb26eSmillert context_vec_ptr = context_vec_start + offset;
10130efcb26eSmillert }
10140efcb26eSmillert if (anychange == 0) {
10150efcb26eSmillert /*
10160efcb26eSmillert * Print the context/unidiff header first time through.
10170efcb26eSmillert */
10187bdb251cSmillert print_header(file1, file2);
10190efcb26eSmillert anychange = 1;
102057003866Sray } else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
102157003866Sray c > context_vec_ptr->d + (2 * diff_context) + 1) {
1022ae8d569bSderaadt /*
102357003866Sray * If this change is more than 'diff_context' lines from the
10240efcb26eSmillert * previous change, dump the record and reset it.
1025ae8d569bSderaadt */
102657003866Sray if (diff_format == D_CONTEXT)
1027dba1d6eaSray dump_context_vec(f1, f2, *pflags);
10289de32c1bSmillert else
1029dba1d6eaSray dump_unified_vec(f1, f2, *pflags);
10309de32c1bSmillert }
1031ae8d569bSderaadt context_vec_ptr++;
1032ae8d569bSderaadt context_vec_ptr->a = a;
1033ae8d569bSderaadt context_vec_ptr->b = b;
1034ae8d569bSderaadt context_vec_ptr->c = c;
1035ae8d569bSderaadt context_vec_ptr->d = d;
1036ae8d569bSderaadt return;
1037ae8d569bSderaadt }
10380efcb26eSmillert if (anychange == 0)
10390efcb26eSmillert anychange = 1;
104057003866Sray switch (diff_format) {
10415f9fc8aaSmillert case D_BRIEF:
10425f9fc8aaSmillert return;
1043ae8d569bSderaadt case D_NORMAL:
1044ae8d569bSderaadt case D_EDIT:
1045ae8d569bSderaadt range(a, b, ",");
10465e07282dSray diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
104757003866Sray if (diff_format == D_NORMAL)
1048ae8d569bSderaadt range(c, d, ",");
10495e07282dSray diff_output("\n");
1050ae8d569bSderaadt break;
1051ae8d569bSderaadt case D_REVERSE:
10525e07282dSray diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1053ae8d569bSderaadt range(a, b, " ");
10545e07282dSray diff_output("\n");
1055ae8d569bSderaadt break;
1056ae8d569bSderaadt case D_NREVERSE:
1057ae8d569bSderaadt if (a > b)
10585e07282dSray diff_output("a%d %d\n", b, d - c + 1);
1059ae8d569bSderaadt else {
10605e07282dSray diff_output("d%d %d\n", a, b - a + 1);
1061ae8d569bSderaadt if (!(c > d))
1062ae8d569bSderaadt /* add changed lines */
10635e07282dSray diff_output("a%d %d\n", b, d - c + 1);
1064ae8d569bSderaadt }
1065ae8d569bSderaadt break;
1066ae8d569bSderaadt }
106757003866Sray if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1068dba1d6eaSray fetch(ixold, a, b, f1, '<', 1, *pflags);
106957003866Sray if (a <= b && c <= d && diff_format == D_NORMAL)
10705e07282dSray diff_output("---\n");
1071ae8d569bSderaadt }
107257003866Sray i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
107357003866Sray if (i != 0 && diff_format == D_EDIT) {
1074f8a6bf23Smillert /*
1075f8a6bf23Smillert * A non-zero return value for D_EDIT indicates that the
1076f8a6bf23Smillert * last line printed was a bare dot (".") that has been
1077f8a6bf23Smillert * escaped as ".." to prevent ed(1) from misinterpreting
1078f8a6bf23Smillert * it. We have to add a substitute command to change this
1079f8a6bf23Smillert * back and restart where we left off.
1080f8a6bf23Smillert */
10815e07282dSray diff_output(".\n");
1082d1e24f31Snatano diff_output("%ds/.//\n", a + i - 1);
1083d1e24f31Snatano b = a + i - 1;
1084d1e24f31Snatano a = b + 1;
1085f8a6bf23Smillert c += i;
1086f8a6bf23Smillert goto restart;
1087f8a6bf23Smillert }
108857003866Sray if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
10895e07282dSray diff_output(".\n");
1090ae8d569bSderaadt if (inifdef) {
10915e07282dSray diff_output("#endif /* %s */\n", ifdefname);
1092ae8d569bSderaadt inifdef = 0;
1093ae8d569bSderaadt }
1094ae8d569bSderaadt }
1095ae8d569bSderaadt
1096f8a6bf23Smillert static int
fetch(long * f,int a,int b,FILE * lb,int ch,int oldfile,int flags)1097dba1d6eaSray fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1098ae8d569bSderaadt {
1099f8a6bf23Smillert int i, j, c, lastc, col, nc;
1100ae8d569bSderaadt
1101ae8d569bSderaadt /*
1102ae8d569bSderaadt * When doing #ifdef's, copy down to current line
1103ae8d569bSderaadt * if this is the first file, so that stuff makes it to output.
1104ae8d569bSderaadt */
110557003866Sray if (diff_format == D_IFDEF && oldfile) {
1106ae8d569bSderaadt long curpos = ftell(lb);
1107ae8d569bSderaadt /* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1108ae8d569bSderaadt nc = f[a > b ? b : a - 1] - curpos;
1109ae8d569bSderaadt for (i = 0; i < nc; i++)
11105e07282dSray diff_output("%c", getc(lb));
1111ae8d569bSderaadt }
1112ae8d569bSderaadt if (a > b)
1113f8a6bf23Smillert return (0);
111457003866Sray if (diff_format == D_IFDEF) {
111590f56ad8Smillert if (inifdef) {
11165e07282dSray diff_output("#else /* %s%s */\n",
111790f56ad8Smillert oldfile == 1 ? "!" : "", ifdefname);
111826da422aStedu } else {
111990f56ad8Smillert if (oldfile)
11205e07282dSray diff_output("#ifndef %s\n", ifdefname);
112190f56ad8Smillert else
11225e07282dSray diff_output("#ifdef %s\n", ifdefname);
1123ae8d569bSderaadt }
1124ae8d569bSderaadt inifdef = 1 + oldfile;
1125ae8d569bSderaadt }
1126ae8d569bSderaadt for (i = a; i <= b; i++) {
112791cf91eeSderaadt fseek(lb, f[i - 1], SEEK_SET);
1128ae8d569bSderaadt nc = f[i] - f[i - 1];
112957003866Sray if (diff_format != D_IFDEF && ch != '\0') {
11305e07282dSray diff_output("%c", ch);
113157003866Sray if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
113257003866Sray || diff_format == D_UNIFIED))
11335e07282dSray diff_output("\t");
113457003866Sray else if (diff_format != D_UNIFIED)
11355e07282dSray diff_output(" ");
11361f9aa9e0Smillert }
1137ae8d569bSderaadt col = 0;
1138f8a6bf23Smillert for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1139bb34d48bSmillert if ((c = getc(lb)) == EOF) {
114057003866Sray if (diff_format == D_EDIT || diff_format == D_REVERSE ||
114157003866Sray diff_format == D_NREVERSE)
1142643dc60cSmillert warnx("No newline at end of file");
1143643dc60cSmillert else
11445e07282dSray diff_output("\n\\ No newline at end of "
11455e07282dSray "file\n");
1146643dc60cSmillert return (0);
1147bb34d48bSmillert }
1148dba1d6eaSray if (c == '\t' && (flags & D_EXPANDTABS)) {
1149bb34d48bSmillert do {
11505e07282dSray diff_output(" ");
1151bb34d48bSmillert } while (++col & 7);
1152bb34d48bSmillert } else {
115357003866Sray if (diff_format == D_EDIT && j == 1 && c == '\n'
1154f8a6bf23Smillert && lastc == '.') {
1155f8a6bf23Smillert /*
1156f8a6bf23Smillert * Don't print a bare "." line
1157f8a6bf23Smillert * since that will confuse ed(1).
1158f8a6bf23Smillert * Print ".." instead and return,
1159f8a6bf23Smillert * giving the caller an offset
1160f8a6bf23Smillert * from which to restart.
1161f8a6bf23Smillert */
11625e07282dSray diff_output(".\n");
1163f8a6bf23Smillert return (i - a + 1);
1164f8a6bf23Smillert }
11655e07282dSray diff_output("%c", c);
1166ae8d569bSderaadt col++;
1167ae8d569bSderaadt }
1168ae8d569bSderaadt }
1169ae8d569bSderaadt }
1170f8a6bf23Smillert return (0);
1171ae8d569bSderaadt }
1172ae8d569bSderaadt
1173ae8d569bSderaadt /*
1174a8013e93Sotto * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1175ae8d569bSderaadt */
117626da422aStedu static int
readhash(FILE * f,int flags)1177dba1d6eaSray readhash(FILE *f, int flags)
1178ae8d569bSderaadt {
1179a8013e93Sotto int i, t, space;
1180a8013e93Sotto int sum;
1181ae8d569bSderaadt
1182ae8d569bSderaadt sum = 1;
1183ae8d569bSderaadt space = 0;
1184dba1d6eaSray if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1185dba1d6eaSray if (flags & D_IGNORECASE)
1186a8013e93Sotto for (i = 0; (t = getc(f)) != '\n'; i++) {
1187bb34d48bSmillert if (t == EOF) {
1188a8013e93Sotto if (i == 0)
1189ae8d569bSderaadt return (0);
1190bb34d48bSmillert break;
1191bb34d48bSmillert }
1192a8013e93Sotto sum = sum * 127 + chrtran[t];
1193ae8d569bSderaadt }
1194ae8d569bSderaadt else
1195a8013e93Sotto for (i = 0; (t = getc(f)) != '\n'; i++) {
1196bb34d48bSmillert if (t == EOF) {
1197a8013e93Sotto if (i == 0)
1198ae8d569bSderaadt return (0);
1199bb34d48bSmillert break;
1200bb34d48bSmillert }
1201a8013e93Sotto sum = sum * 127 + t;
1202ae8d569bSderaadt }
1203ae8d569bSderaadt } else {
1204a8013e93Sotto for (i = 0;;) {
1205ae8d569bSderaadt switch (t = getc(f)) {
1206ae8d569bSderaadt case '\t':
1207b5b605d5Sotto case '\r':
1208b5b605d5Sotto case '\v':
1209b5b605d5Sotto case '\f':
1210ae8d569bSderaadt case ' ':
1211ae8d569bSderaadt space++;
1212ae8d569bSderaadt continue;
1213ae8d569bSderaadt default:
1214dba1d6eaSray if (space && (flags & D_IGNOREBLANKS) == 0) {
1215a8013e93Sotto i++;
1216ae8d569bSderaadt space = 0;
1217ae8d569bSderaadt }
1218a8013e93Sotto sum = sum * 127 + chrtran[t];
1219a8013e93Sotto i++;
1220ae8d569bSderaadt continue;
1221bb34d48bSmillert case EOF:
1222a8013e93Sotto if (i == 0)
1223bb34d48bSmillert return (0);
1224bb34d48bSmillert /* FALLTHROUGH */
1225ae8d569bSderaadt case '\n':
1226ae8d569bSderaadt break;
1227ae8d569bSderaadt }
1228ae8d569bSderaadt break;
1229ae8d569bSderaadt }
1230ae8d569bSderaadt }
1231a8013e93Sotto /*
1232a8013e93Sotto * There is a remote possibility that we end up with a zero sum.
1233a8013e93Sotto * Zero is used as an EOF marker, so return 1 instead.
1234a8013e93Sotto */
1235a8013e93Sotto return (sum == 0 ? 1 : sum);
1236ae8d569bSderaadt }
1237ae8d569bSderaadt
1238774cb253Savsm static int
asciifile(FILE * f)123926da422aStedu asciifile(FILE *f)
1240ae8d569bSderaadt {
1241063293f0Sotto unsigned char buf[BUFSIZ];
12420631431fSstsp size_t cnt;
1243ae8d569bSderaadt
1244dba1d6eaSray if (f == NULL)
12452a1593dfStedu return (1);
12462a1593dfStedu
12474ec4b3d5Smillert rewind(f);
12484ec4b3d5Smillert cnt = fread(buf, 1, sizeof(buf), f);
12490631431fSstsp return (memchr(buf, '\0', cnt) == NULL);
1250ae8d569bSderaadt }
1251ae8d569bSderaadt
12525c68ba7eSespie #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
12535c68ba7eSespie
125496e45528Sotto static char *
match_function(const long * f,int pos,FILE * fp)1255dba1d6eaSray match_function(const long *f, int pos, FILE *fp)
125696e45528Sotto {
1257063293f0Sotto unsigned char buf[FUNCTION_CONTEXT_SIZE];
125896e45528Sotto size_t nc;
125996e45528Sotto int last = lastline;
12605c68ba7eSespie char *state = NULL;
126196e45528Sotto
126296e45528Sotto lastline = pos;
126396e45528Sotto while (pos > last) {
1264dba1d6eaSray fseek(fp, f[pos - 1], SEEK_SET);
126596e45528Sotto nc = f[pos] - f[pos - 1];
126696e45528Sotto if (nc >= sizeof(buf))
126796e45528Sotto nc = sizeof(buf) - 1;
1268dba1d6eaSray nc = fread(buf, 1, nc, fp);
126996e45528Sotto if (nc > 0) {
127096e45528Sotto buf[nc] = '\0';
12714fd6ed32Sgilles buf[strcspn(buf, "\n")] = '\0';
127296e45528Sotto if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
12735c68ba7eSespie if (begins_with(buf, "private:")) {
12745c68ba7eSespie if (!state)
12755c68ba7eSespie state = " (private)";
12765c68ba7eSespie } else if (begins_with(buf, "protected:")) {
12775c68ba7eSespie if (!state)
12785c68ba7eSespie state = " (protected)";
12795c68ba7eSespie } else if (begins_with(buf, "public:")) {
12805c68ba7eSespie if (!state)
12815c68ba7eSespie state = " (public)";
12825c68ba7eSespie } else {
128396e45528Sotto strlcpy(lastbuf, buf, sizeof lastbuf);
12845c68ba7eSespie if (state)
12855c68ba7eSespie strlcat(lastbuf, state,
12865c68ba7eSespie sizeof lastbuf);
128796e45528Sotto lastmatchline = pos;
128896e45528Sotto return lastbuf;
128996e45528Sotto }
129096e45528Sotto }
12915c68ba7eSespie }
129296e45528Sotto pos--;
129396e45528Sotto }
129496e45528Sotto return lastmatchline > 0 ? lastbuf : NULL;
129596e45528Sotto }
129696e45528Sotto
1297ae8d569bSderaadt /* dump accumulated "context" diff changes */
129826da422aStedu static void
dump_context_vec(FILE * f1,FILE * f2,int flags)1299dba1d6eaSray dump_context_vec(FILE *f1, FILE *f2, int flags)
1300ae8d569bSderaadt {
130148b8c3e3Sderaadt struct context_vec *cvp = context_vec_start;
130248b8c3e3Sderaadt int lowa, upb, lowc, upd, do_output;
130326da422aStedu int a, b, c, d;
130496e45528Sotto char ch, *f;
1305ae8d569bSderaadt
130690f56ad8Smillert if (context_vec_start > context_vec_ptr)
1307ae8d569bSderaadt return;
1308ae8d569bSderaadt
130926da422aStedu b = d = 0; /* gcc */
1310b9fc9a72Sderaadt lowa = MAXIMUM(1, cvp->a - diff_context);
1311b9fc9a72Sderaadt upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1312b9fc9a72Sderaadt lowc = MAXIMUM(1, cvp->c - diff_context);
1313b9fc9a72Sderaadt upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
1314ae8d569bSderaadt
13155e07282dSray diff_output("***************");
1316dba1d6eaSray if ((flags & D_PROTOTYPE)) {
131796e45528Sotto f = match_function(ixold, lowa-1, f1);
13185e07282dSray if (f != NULL)
13195e07282dSray diff_output(" %s", f);
132096e45528Sotto }
13215e07282dSray diff_output("\n*** ");
1322ae8d569bSderaadt range(lowa, upb, ",");
13235e07282dSray diff_output(" ****\n");
1324ae8d569bSderaadt
1325ae8d569bSderaadt /*
13264ec4b3d5Smillert * Output changes to the "old" file. The first loop suppresses
1327ae8d569bSderaadt * output if there were no changes to the "old" file (we'll see
1328ae8d569bSderaadt * the "old" lines as context in the "new" list).
1329ae8d569bSderaadt */
1330ae8d569bSderaadt do_output = 0;
1331ae8d569bSderaadt for (; cvp <= context_vec_ptr; cvp++)
1332ae8d569bSderaadt if (cvp->a <= cvp->b) {
1333ae8d569bSderaadt cvp = context_vec_start;
1334ae8d569bSderaadt do_output++;
1335ae8d569bSderaadt break;
1336ae8d569bSderaadt }
1337ae8d569bSderaadt if (do_output) {
1338ae8d569bSderaadt while (cvp <= context_vec_ptr) {
133926da422aStedu a = cvp->a;
134026da422aStedu b = cvp->b;
134126da422aStedu c = cvp->c;
134226da422aStedu d = cvp->d;
1343ae8d569bSderaadt
1344ae8d569bSderaadt if (a <= b && c <= d)
1345ae8d569bSderaadt ch = 'c';
1346ae8d569bSderaadt else
1347ae8d569bSderaadt ch = (a <= b) ? 'd' : 'a';
1348ae8d569bSderaadt
1349ae8d569bSderaadt if (ch == 'a')
1350dba1d6eaSray fetch(ixold, lowa, b, f1, ' ', 0, flags);
1351ae8d569bSderaadt else {
1352dba1d6eaSray fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
13534ec4b3d5Smillert fetch(ixold, a, b, f1,
1354dba1d6eaSray ch == 'c' ? '!' : '-', 0, flags);
1355ae8d569bSderaadt }
1356ae8d569bSderaadt lowa = b + 1;
1357ae8d569bSderaadt cvp++;
1358ae8d569bSderaadt }
1359dba1d6eaSray fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1360ae8d569bSderaadt }
1361ae8d569bSderaadt /* output changes to the "new" file */
13625e07282dSray diff_output("--- ");
1363ae8d569bSderaadt range(lowc, upd, ",");
13645e07282dSray diff_output(" ----\n");
1365ae8d569bSderaadt
1366ae8d569bSderaadt do_output = 0;
1367ae8d569bSderaadt for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1368ae8d569bSderaadt if (cvp->c <= cvp->d) {
1369ae8d569bSderaadt cvp = context_vec_start;
1370ae8d569bSderaadt do_output++;
1371ae8d569bSderaadt break;
1372ae8d569bSderaadt }
1373ae8d569bSderaadt if (do_output) {
1374ae8d569bSderaadt while (cvp <= context_vec_ptr) {
137526da422aStedu a = cvp->a;
137626da422aStedu b = cvp->b;
137726da422aStedu c = cvp->c;
137826da422aStedu d = cvp->d;
1379ae8d569bSderaadt
1380ae8d569bSderaadt if (a <= b && c <= d)
1381ae8d569bSderaadt ch = 'c';
1382ae8d569bSderaadt else
1383ae8d569bSderaadt ch = (a <= b) ? 'd' : 'a';
1384ae8d569bSderaadt
1385ae8d569bSderaadt if (ch == 'd')
1386dba1d6eaSray fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1387ae8d569bSderaadt else {
1388dba1d6eaSray fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
13894ec4b3d5Smillert fetch(ixnew, c, d, f2,
1390dba1d6eaSray ch == 'c' ? '!' : '+', 0, flags);
1391ae8d569bSderaadt }
1392ae8d569bSderaadt lowc = d + 1;
1393ae8d569bSderaadt cvp++;
1394ae8d569bSderaadt }
1395dba1d6eaSray fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1396ae8d569bSderaadt }
1397ae8d569bSderaadt context_vec_ptr = context_vec_start - 1;
1398ae8d569bSderaadt }
13999de32c1bSmillert
14009de32c1bSmillert /* dump accumulated "unified" diff changes */
14019de32c1bSmillert static void
dump_unified_vec(FILE * f1,FILE * f2,int flags)1402dba1d6eaSray dump_unified_vec(FILE *f1, FILE *f2, int flags)
14039de32c1bSmillert {
14049de32c1bSmillert struct context_vec *cvp = context_vec_start;
14059de32c1bSmillert int lowa, upb, lowc, upd;
14069de32c1bSmillert int a, b, c, d;
140796e45528Sotto char ch, *f;
14089de32c1bSmillert
140990f56ad8Smillert if (context_vec_start > context_vec_ptr)
14109de32c1bSmillert return;
14119de32c1bSmillert
14129de32c1bSmillert b = d = 0; /* gcc */
1413b9fc9a72Sderaadt lowa = MAXIMUM(1, cvp->a - diff_context);
1414b9fc9a72Sderaadt upb = MINIMUM(len[0], context_vec_ptr->b + diff_context);
1415b9fc9a72Sderaadt lowc = MAXIMUM(1, cvp->c - diff_context);
1416b9fc9a72Sderaadt upd = MINIMUM(len[1], context_vec_ptr->d + diff_context);
14179de32c1bSmillert
14185e07282dSray diff_output("@@ -");
1419c72ea322Smillert uni_range(lowa, upb);
14205e07282dSray diff_output(" +");
1421c72ea322Smillert uni_range(lowc, upd);
14225e07282dSray diff_output(" @@");
1423dba1d6eaSray if ((flags & D_PROTOTYPE)) {
142496e45528Sotto f = match_function(ixold, lowa-1, f1);
14255e07282dSray if (f != NULL)
14265e07282dSray diff_output(" %s", f);
142796e45528Sotto }
14285e07282dSray diff_output("\n");
14299de32c1bSmillert
14309de32c1bSmillert /*
14319de32c1bSmillert * Output changes in "unified" diff format--the old and new lines
14329de32c1bSmillert * are printed together.
14339de32c1bSmillert */
14349de32c1bSmillert for (; cvp <= context_vec_ptr; cvp++) {
14359de32c1bSmillert a = cvp->a;
14369de32c1bSmillert b = cvp->b;
14379de32c1bSmillert c = cvp->c;
14389de32c1bSmillert d = cvp->d;
14399de32c1bSmillert
14409de32c1bSmillert /*
14419de32c1bSmillert * c: both new and old changes
14429de32c1bSmillert * d: only changes in the old file
14439de32c1bSmillert * a: only changes in the new file
14449de32c1bSmillert */
14459de32c1bSmillert if (a <= b && c <= d)
14469de32c1bSmillert ch = 'c';
14479de32c1bSmillert else
14489de32c1bSmillert ch = (a <= b) ? 'd' : 'a';
14499de32c1bSmillert
14509de32c1bSmillert switch (ch) {
14519de32c1bSmillert case 'c':
1452dba1d6eaSray fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1453dba1d6eaSray fetch(ixold, a, b, f1, '-', 0, flags);
1454dba1d6eaSray fetch(ixnew, c, d, f2, '+', 0, flags);
14559de32c1bSmillert break;
14569de32c1bSmillert case 'd':
1457dba1d6eaSray fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1458dba1d6eaSray fetch(ixold, a, b, f1, '-', 0, flags);
14599de32c1bSmillert break;
14609de32c1bSmillert case 'a':
1461dba1d6eaSray fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1462dba1d6eaSray fetch(ixnew, c, d, f2, '+', 0, flags);
14639de32c1bSmillert break;
14649de32c1bSmillert }
14659de32c1bSmillert lowa = b + 1;
14669de32c1bSmillert lowc = d + 1;
14679de32c1bSmillert }
1468dba1d6eaSray fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
14699de32c1bSmillert
14709de32c1bSmillert context_vec_ptr = context_vec_start - 1;
14719de32c1bSmillert }
14727bdb251cSmillert
14737bdb251cSmillert static void
print_header(const char * file1,const char * file2)14747bdb251cSmillert print_header(const char *file1, const char *file2)
14757bdb251cSmillert {
14767bdb251cSmillert if (label[0] != NULL)
14775e07282dSray diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
14787bdb251cSmillert label[0]);
14797bdb251cSmillert else
14805e07282dSray diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
14817bdb251cSmillert file1, ctime(&stb1.st_mtime));
14827bdb251cSmillert if (label[1] != NULL)
14835e07282dSray diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
14847bdb251cSmillert label[1]);
14857bdb251cSmillert else
14865e07282dSray diff_output("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
14877bdb251cSmillert file2, ctime(&stb2.st_mtime));
14887bdb251cSmillert }
1489