1*dba1d6eaSray /* $OpenBSD: diffreg.c,v 1.71 2009/06/06 15:00:27 ray Exp $ */ 2d0c3f575Sderaadt 3d0c3f575Sderaadt /* 4d0c3f575Sderaadt * Copyright (C) Caldera International Inc. 2001-2002. 5d0c3f575Sderaadt * All rights reserved. 6d0c3f575Sderaadt * 7d0c3f575Sderaadt * Redistribution and use in source and binary forms, with or without 8d0c3f575Sderaadt * modification, are permitted provided that the following conditions 9d0c3f575Sderaadt * are met: 10d0c3f575Sderaadt * 1. Redistributions of source code and documentation must retain the above 11d0c3f575Sderaadt * copyright notice, this list of conditions and the following disclaimer. 12d0c3f575Sderaadt * 2. Redistributions in binary form must reproduce the above copyright 13d0c3f575Sderaadt * notice, this list of conditions and the following disclaimer in the 14d0c3f575Sderaadt * documentation and/or other materials provided with the distribution. 15d0c3f575Sderaadt * 3. All advertising materials mentioning features or use of this software 16d0c3f575Sderaadt * must display the following acknowledgement: 17d0c3f575Sderaadt * This product includes software developed or owned by Caldera 18d0c3f575Sderaadt * International, Inc. 19d0c3f575Sderaadt * 4. Neither the name of Caldera International, Inc. nor the names of other 20d0c3f575Sderaadt * contributors may be used to endorse or promote products derived from 21d0c3f575Sderaadt * this software without specific prior written permission. 22d0c3f575Sderaadt * 23d0c3f575Sderaadt * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24d0c3f575Sderaadt * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25d0c3f575Sderaadt * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26d0c3f575Sderaadt * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27d0c3f575Sderaadt * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28d0c3f575Sderaadt * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29d0c3f575Sderaadt * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30d0c3f575Sderaadt * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31d0c3f575Sderaadt * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32d0c3f575Sderaadt * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33d0c3f575Sderaadt * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34d0c3f575Sderaadt * POSSIBILITY OF SUCH DAMAGE. 35d0c3f575Sderaadt */ 364ec4b3d5Smillert /*- 374ec4b3d5Smillert * Copyright (c) 1991, 1993 384ec4b3d5Smillert * The Regents of the University of California. All rights reserved. 394ec4b3d5Smillert * 404ec4b3d5Smillert * Redistribution and use in source and binary forms, with or without 414ec4b3d5Smillert * modification, are permitted provided that the following conditions 424ec4b3d5Smillert * are met: 434ec4b3d5Smillert * 1. Redistributions of source code must retain the above copyright 444ec4b3d5Smillert * notice, this list of conditions and the following disclaimer. 454ec4b3d5Smillert * 2. Redistributions in binary form must reproduce the above copyright 464ec4b3d5Smillert * notice, this list of conditions and the following disclaimer in the 474ec4b3d5Smillert * documentation and/or other materials provided with the distribution. 484ec4b3d5Smillert * 3. Neither the name of the University nor the names of its contributors 494ec4b3d5Smillert * may be used to endorse or promote products derived from this software 504ec4b3d5Smillert * without specific prior written permission. 514ec4b3d5Smillert * 524ec4b3d5Smillert * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 534ec4b3d5Smillert * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 544ec4b3d5Smillert * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 554ec4b3d5Smillert * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 564ec4b3d5Smillert * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 574ec4b3d5Smillert * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 584ec4b3d5Smillert * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 594ec4b3d5Smillert * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 604ec4b3d5Smillert * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 614ec4b3d5Smillert * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 624ec4b3d5Smillert * SUCH DAMAGE. 634ec4b3d5Smillert * 644ec4b3d5Smillert * @(#)diffreg.c 8.1 (Berkeley) 6/6/93 654ec4b3d5Smillert */ 664ec4b3d5Smillert 674ec4b3d5Smillert #ifndef lint 68*dba1d6eaSray static const char rcsid[] = "$OpenBSD: diffreg.c,v 1.71 2009/06/06 15:00:27 ray Exp $"; 694ec4b3d5Smillert #endif /* not lint */ 70d0c3f575Sderaadt 717b6ec9e4Smillert #include <sys/param.h> 724ec4b3d5Smillert #include <sys/stat.h> 73b4bca33fSmillert #include <sys/wait.h> 7426da422aStedu 754ec4b3d5Smillert #include <ctype.h> 764ec4b3d5Smillert #include <err.h> 777b6ec9e4Smillert #include <errno.h> 7826da422aStedu #include <fcntl.h> 790efcb26eSmillert #include <stddef.h> 804ec4b3d5Smillert #include <stdio.h> 814ec4b3d5Smillert #include <stdlib.h> 8226da422aStedu #include <string.h> 834ec4b3d5Smillert #include <unistd.h> 84ae8d569bSderaadt 85ae8d569bSderaadt #include "diff.h" 86b4bca33fSmillert #include "pathnames.h" 874a034c3aSray #include "xmalloc.h" 8826da422aStedu 89ae8d569bSderaadt /* 90ae8d569bSderaadt * diff - compare two files. 91ae8d569bSderaadt */ 92ae8d569bSderaadt 93ae8d569bSderaadt /* 94ae8d569bSderaadt * Uses an algorithm due to Harold Stone, which finds 95ae8d569bSderaadt * a pair of longest identical subsequences in the two 96ae8d569bSderaadt * files. 97ae8d569bSderaadt * 98ae8d569bSderaadt * The major goal is to generate the match vector J. 99ae8d569bSderaadt * J[i] is the index of the line in file1 corresponding 100ae8d569bSderaadt * to line i file0. J[i] = 0 if there is no 101ae8d569bSderaadt * such line in file1. 102ae8d569bSderaadt * 103ae8d569bSderaadt * Lines are hashed so as to work in core. All potential 104ae8d569bSderaadt * matches are located by sorting the lines of each file 105ae8d569bSderaadt * on the hash (called ``value''). In particular, this 106ae8d569bSderaadt * collects the equivalence classes in file1 together. 107ae8d569bSderaadt * Subroutine equiv replaces the value of each line in 108ae8d569bSderaadt * file0 by the index of the first element of its 109ae8d569bSderaadt * matching equivalence in (the reordered) file1. 110ae8d569bSderaadt * To save space equiv squeezes file1 into a single 111ae8d569bSderaadt * array member in which the equivalence classes 112ae8d569bSderaadt * are simply concatenated, except that their first 113ae8d569bSderaadt * members are flagged by changing sign. 114ae8d569bSderaadt * 115ae8d569bSderaadt * Next the indices that point into member are unsorted into 116ae8d569bSderaadt * array class according to the original order of file0. 117ae8d569bSderaadt * 118ae8d569bSderaadt * The cleverness lies in routine stone. This marches 119ae8d569bSderaadt * through the lines of file0, developing a vector klist 120ae8d569bSderaadt * of "k-candidates". At step i a k-candidate is a matched 121ae8d569bSderaadt * pair of lines x,y (x in file0 y in file1) such that 122ae8d569bSderaadt * there is a common subsequence of length k 123ae8d569bSderaadt * between the first i lines of file0 and the first y 124ae8d569bSderaadt * lines of file1, but there is no such subsequence for 125ae8d569bSderaadt * any smaller y. x is the earliest possible mate to y 126ae8d569bSderaadt * that occurs in such a subsequence. 127ae8d569bSderaadt * 128ae8d569bSderaadt * Whenever any of the members of the equivalence class of 129ae8d569bSderaadt * lines in file1 matable to a line in file0 has serial number 130ae8d569bSderaadt * less than the y of some k-candidate, that k-candidate 131ae8d569bSderaadt * with the smallest such y is replaced. The new 132ae8d569bSderaadt * k-candidate is chained (via pred) to the current 133ae8d569bSderaadt * k-1 candidate so that the actual subsequence can 134ae8d569bSderaadt * be recovered. When a member has serial number greater 135ae8d569bSderaadt * that the y of all k-candidates, the klist is extended. 136ae8d569bSderaadt * At the end, the longest subsequence is pulled out 137ae8d569bSderaadt * and placed in the array J by unravel 138ae8d569bSderaadt * 139ae8d569bSderaadt * With J in hand, the matches there recorded are 140ae8d569bSderaadt * check'ed against reality to assure that no spurious 141ae8d569bSderaadt * matches have crept in due to hashing. If they have, 142ae8d569bSderaadt * they are broken, and "jackpot" is recorded--a harmless 143ae8d569bSderaadt * matter except that a true match for a spuriously 144ae8d569bSderaadt * mated line may now be unnecessarily reported as a change. 145ae8d569bSderaadt * 146ae8d569bSderaadt * Much of the complexity of the program comes simply 147ae8d569bSderaadt * from trying to minimize core utilization and 148ae8d569bSderaadt * maximize the range of doable problems by dynamically 149ae8d569bSderaadt * allocating what is needed and reusing what is not. 150ae8d569bSderaadt * The core requirements for problems larger than somewhat 151ae8d569bSderaadt * are (in words) 2*length(file0) + length(file1) + 152ae8d569bSderaadt * 3*(number of k-candidates installed), typically about 153ae8d569bSderaadt * 6n words for files of length n. 154ae8d569bSderaadt */ 155ae8d569bSderaadt 156ae8d569bSderaadt struct cand { 157ae8d569bSderaadt int x; 158ae8d569bSderaadt int y; 159ae8d569bSderaadt int pred; 16060b9d8fdSderaadt }; 16126da422aStedu 162ae8d569bSderaadt struct line { 163ae8d569bSderaadt int serial; 164ae8d569bSderaadt int value; 16592af4c2dStedu } *file[2]; 16626da422aStedu 1670efcb26eSmillert /* 1680efcb26eSmillert * The following struct is used to record change information when 1690efcb26eSmillert * doing a "context" or "unified" diff. (see routine "change" to 1700efcb26eSmillert * understand the highly mnemonic field names) 1710efcb26eSmillert */ 1720efcb26eSmillert struct context_vec { 1730efcb26eSmillert int a; /* start line in old file */ 1740efcb26eSmillert int b; /* end line in old file */ 1750efcb26eSmillert int c; /* start line in new file */ 1760efcb26eSmillert int d; /* end line in new file */ 1770efcb26eSmillert }; 1780efcb26eSmillert 1797b6ec9e4Smillert static FILE *opentemp(const char *); 180ac73e8e6Smillert static void output(char *, FILE *, char *, FILE *, int); 181*dba1d6eaSray static void check(char *, FILE *, char *, FILE *, int); 18226da422aStedu static void range(int, int, char *); 183c72ea322Smillert static void uni_range(int, int); 184*dba1d6eaSray static void dump_context_vec(FILE *, FILE *, int); 185*dba1d6eaSray static void dump_unified_vec(FILE *, FILE *, int); 186*dba1d6eaSray static void prepare(int, FILE *, off_t, int); 18726da422aStedu static void prune(void); 18826da422aStedu static void equiv(struct line *, int, struct line *, int, int *); 18926da422aStedu static void unravel(int); 19026da422aStedu static void unsort(struct line *, int, int *); 19161783bcdSespie static void change(char *, FILE *, char *, FILE *, int, int, int, int, int *); 19226da422aStedu static void sort(struct line *, int); 1937bdb251cSmillert static void print_header(const char *, const char *); 194ccd55a2cSotto static int ignoreline(char *); 1954ec4b3d5Smillert static int asciifile(FILE *); 196*dba1d6eaSray static int fetch(long *, int, int, FILE *, int, int, int); 19726da422aStedu static int newcand(int, int, int); 19826da422aStedu static int search(int *, int, int); 1994ec4b3d5Smillert static int skipline(FILE *); 2006e18f850Sotto static int isqrt(int); 201*dba1d6eaSray static int stone(int *, int, int *, int *, int); 202*dba1d6eaSray static int readhash(FILE *, int); 2034ec4b3d5Smillert static int files_differ(FILE *, FILE *, int); 20496e45528Sotto static char *match_function(const long *, int, FILE *); 205ccd55a2cSotto static char *preadline(int, size_t, off_t); 2066e18f850Sotto 207aa215433Sray static int *J; /* will be overlaid on class */ 208aa215433Sray static int *class; /* will be overlaid on file[0] */ 209aa215433Sray static int *klist; /* will be overlaid on file[0] after class */ 210aa215433Sray static int *member; /* will be overlaid on file[1] */ 211aa215433Sray static int clen; 212aa215433Sray static int inifdef; /* whether or not we are in a #ifdef block */ 213aa215433Sray static int len[2]; 214aa215433Sray static int pref, suff; /* length of prefix and suffix */ 215aa215433Sray static int slen[2]; 216aa215433Sray static int anychange; 217aa215433Sray static long *ixnew; /* will be overlaid on file[1] */ 218aa215433Sray static long *ixold; /* will be overlaid on klist */ 219aa215433Sray static struct cand *clist; /* merely a free storage pot for candidates */ 220aa215433Sray static int clistlen; /* the length of clist */ 221aa215433Sray static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ 222aa215433Sray static u_char *chrtran; /* translation table for case-folding */ 223aa215433Sray static struct context_vec *context_vec_start; 224aa215433Sray static struct context_vec *context_vec_end; 225aa215433Sray static struct context_vec *context_vec_ptr; 226aa215433Sray 227aa215433Sray #define FUNCTION_CONTEXT_SIZE 55 228aa215433Sray static char lastbuf[FUNCTION_CONTEXT_SIZE]; 229aa215433Sray static int lastline; 230aa215433Sray static int lastmatchline; 231aa215433Sray 23226da422aStedu 23326da422aStedu /* 23426da422aStedu * chrtran points to one of 2 translation tables: cup2low if folding upper to 23526da422aStedu * lower case clow2low if not folding case 236ae8d569bSderaadt */ 2378a15d8deSderaadt u_char clow2low[256] = { 23826da422aStedu 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 23926da422aStedu 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 24026da422aStedu 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 24126da422aStedu 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 24226da422aStedu 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 24326da422aStedu 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 24426da422aStedu 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 24526da422aStedu 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 24626da422aStedu 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 24726da422aStedu 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 24826da422aStedu 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 24926da422aStedu 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 25026da422aStedu 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 25126da422aStedu 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 25226da422aStedu 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 25326da422aStedu 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 25426da422aStedu 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 25526da422aStedu 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 25626da422aStedu 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 25726da422aStedu 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 25826da422aStedu 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 25926da422aStedu 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 26026da422aStedu 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 26126da422aStedu 0xfd, 0xfe, 0xff 262ae8d569bSderaadt }; 263ae8d569bSderaadt 2648a15d8deSderaadt u_char cup2low[256] = { 26526da422aStedu 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 26626da422aStedu 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 26726da422aStedu 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 26826da422aStedu 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 26926da422aStedu 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 27026da422aStedu 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 27126da422aStedu 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 27226da422aStedu 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 27326da422aStedu 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 27426da422aStedu 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 27526da422aStedu 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 27626da422aStedu 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 27726da422aStedu 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 27826da422aStedu 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 27926da422aStedu 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 28026da422aStedu 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 28126da422aStedu 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 28226da422aStedu 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 28326da422aStedu 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 28426da422aStedu 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 28526da422aStedu 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 28626da422aStedu 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 28726da422aStedu 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 28826da422aStedu 0xfd, 0xfe, 0xff 289ae8d569bSderaadt }; 290ae8d569bSderaadt 291b4bca33fSmillert int 2924ec4b3d5Smillert diffreg(char *ofile1, char *ofile2, int flags) 293ae8d569bSderaadt { 2944ec4b3d5Smillert char *file1 = ofile1; 2954ec4b3d5Smillert char *file2 = ofile2; 296*dba1d6eaSray FILE *f1, *f2; 297*dba1d6eaSray int i, rval, ostdout = -1; 298b4bca33fSmillert pid_t pid = -1; 299ae8d569bSderaadt 300*dba1d6eaSray f1 = f2 = NULL; 301*dba1d6eaSray rval = D_SAME; 3024ec4b3d5Smillert anychange = 0; 30396e45528Sotto lastline = 0; 30496e45528Sotto lastmatchline = 0; 3050efcb26eSmillert context_vec_ptr = context_vec_start - 1; 306*dba1d6eaSray if (flags & D_IGNORECASE) 307*dba1d6eaSray chrtran = cup2low; 308*dba1d6eaSray else 309*dba1d6eaSray chrtran = clow2low; 3107b6ec9e4Smillert if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) 311fed3a06dSmillert return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2); 31266e5764eSmillert if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) 313f28259e9Smillert goto closem; 3144ec4b3d5Smillert 3154ec4b3d5Smillert if (flags & D_EMPTY1) 3164ec4b3d5Smillert f1 = fopen(_PATH_DEVNULL, "r"); 3174ec4b3d5Smillert else { 3187b6ec9e4Smillert if (!S_ISREG(stb1.st_mode)) { 3197b6ec9e4Smillert if ((f1 = opentemp(file1)) == NULL || 3207b6ec9e4Smillert fstat(fileno(f1), &stb1) < 0) { 3214ec4b3d5Smillert warn("%s", file1); 3224ec4b3d5Smillert status |= 2; 3234ec4b3d5Smillert goto closem; 32448b947b7Smillert } 3257b6ec9e4Smillert } else if (strcmp(file1, "-") == 0) 326b1a26502Smillert f1 = stdin; 327b1a26502Smillert else 3284ec4b3d5Smillert f1 = fopen(file1, "r"); 3294ec4b3d5Smillert } 3304ec4b3d5Smillert if (f1 == NULL) { 3314ec4b3d5Smillert warn("%s", file1); 3324ec4b3d5Smillert status |= 2; 3334ec4b3d5Smillert goto closem; 3344ec4b3d5Smillert } 3354ec4b3d5Smillert 3364ec4b3d5Smillert if (flags & D_EMPTY2) 3374ec4b3d5Smillert f2 = fopen(_PATH_DEVNULL, "r"); 3384ec4b3d5Smillert else { 3397b6ec9e4Smillert if (!S_ISREG(stb2.st_mode)) { 3407b6ec9e4Smillert if ((f2 = opentemp(file2)) == NULL || 3417b6ec9e4Smillert fstat(fileno(f2), &stb2) < 0) { 3424ec4b3d5Smillert warn("%s", file2); 3434ec4b3d5Smillert status |= 2; 3444ec4b3d5Smillert goto closem; 3454ec4b3d5Smillert } 3467b6ec9e4Smillert } else if (strcmp(file2, "-") == 0) 347b1a26502Smillert f2 = stdin; 348b1a26502Smillert else 3494ec4b3d5Smillert f2 = fopen(file2, "r"); 3504ec4b3d5Smillert } 3514ec4b3d5Smillert if (f2 == NULL) { 3524ec4b3d5Smillert warn("%s", file2); 3534ec4b3d5Smillert status |= 2; 3544ec4b3d5Smillert goto closem; 3554ec4b3d5Smillert } 3564ec4b3d5Smillert 3574ec4b3d5Smillert switch (files_differ(f1, f2, flags)) { 3584ec4b3d5Smillert case 0: 359b4bca33fSmillert goto closem; 3604ec4b3d5Smillert case 1: 3614ec4b3d5Smillert break; 3624ec4b3d5Smillert default: 3634ec4b3d5Smillert /* error */ 3644ec4b3d5Smillert status |= 2; 3654ec4b3d5Smillert goto closem; 366ae8d569bSderaadt } 3674ec4b3d5Smillert 368*dba1d6eaSray if ((flags & D_FORCEASCII) == 0 && 369*dba1d6eaSray (!asciifile(f1) || !asciifile(f2))) { 370b4bca33fSmillert rval = D_BINARY; 3715f9fc8aaSmillert status |= 1; 372cab5d83cSmillert goto closem; 373cab5d83cSmillert } 374b4bca33fSmillert if (lflag) { 375b4bca33fSmillert /* redirect stdout to pr */ 376b4bca33fSmillert int pfd[2]; 377b4bca33fSmillert char *header; 378b4bca33fSmillert char *prargv[] = { "pr", "-h", NULL, "-f", NULL }; 379b4bca33fSmillert 3804a034c3aSray xasprintf(&header, "%s %s %s", diffargs, file1, file2); 381b4bca33fSmillert prargv[2] = header; 382b4bca33fSmillert fflush(stdout); 383b4bca33fSmillert rewind(stdout); 384b4bca33fSmillert pipe(pfd); 385b4bca33fSmillert switch ((pid = fork())) { 386b4bca33fSmillert case -1: 387b4bca33fSmillert warnx("No more processes"); 388b4bca33fSmillert status |= 2; 3894a034c3aSray xfree(header); 390b4bca33fSmillert return (D_ERROR); 391b4bca33fSmillert case 0: 392b4bca33fSmillert /* child */ 393b4bca33fSmillert if (pfd[0] != STDIN_FILENO) { 394b4bca33fSmillert dup2(pfd[0], STDIN_FILENO); 395b4bca33fSmillert close(pfd[0]); 396b4bca33fSmillert } 397b4bca33fSmillert close(pfd[1]); 398b4bca33fSmillert execv(_PATH_PR, prargv); 399b4bca33fSmillert _exit(127); 400b4bca33fSmillert default: 401b4bca33fSmillert /* parent */ 402b4bca33fSmillert if (pfd[1] != STDOUT_FILENO) { 403b4bca33fSmillert ostdout = dup(STDOUT_FILENO); 404b4bca33fSmillert dup2(pfd[1], STDOUT_FILENO); 405b4bca33fSmillert close(pfd[1]); 406b4bca33fSmillert } 407b4bca33fSmillert close(pfd[0]); 408b4bca33fSmillert rewind(stdout); 4094a034c3aSray xfree(header); 410b4bca33fSmillert } 411ac73e8e6Smillert } 412*dba1d6eaSray prepare(0, f1, stb1.st_size, flags); 413*dba1d6eaSray prepare(1, f2, stb2.st_size, flags); 414ae8d569bSderaadt prune(); 415ae8d569bSderaadt sort(sfile[0], slen[0]); 416ae8d569bSderaadt sort(sfile[1], slen[1]); 417ae8d569bSderaadt 418ae8d569bSderaadt member = (int *)file[1]; 419ae8d569bSderaadt equiv(sfile[0], slen[0], sfile[1], slen[1], member); 420aa215433Sray member = xrealloc(member, slen[1] + 2, sizeof(*member)); 421ae8d569bSderaadt 422ae8d569bSderaadt class = (int *)file[0]; 423ae8d569bSderaadt unsort(sfile[0], slen[0], class); 424aa215433Sray class = xrealloc(class, slen[0] + 2, sizeof(*class)); 425ae8d569bSderaadt 426aa215433Sray klist = xmalloc((slen[0] + 2) * sizeof(*klist)); 427058e94f4Smillert clen = 0; 4286e18f850Sotto clistlen = 100; 429aa215433Sray clist = xmalloc(clistlen * sizeof(*clist)); 430*dba1d6eaSray i = stone(class, slen[0], member, klist, flags); 4314a034c3aSray xfree(member); 4324a034c3aSray xfree(class); 433ae8d569bSderaadt 434aa215433Sray J = xrealloc(J, len[0] + 2, sizeof(*J)); 435ae8d569bSderaadt unravel(klist[i]); 4364a034c3aSray xfree(clist); 4374a034c3aSray xfree(klist); 438ae8d569bSderaadt 439aa215433Sray ixold = xrealloc(ixold, len[0] + 2, sizeof(*ixold)); 440aa215433Sray ixnew = xrealloc(ixnew, len[1] + 2, sizeof(*ixnew)); 441*dba1d6eaSray check(file1, f1, file2, f2, flags); 442*dba1d6eaSray output(file1, f1, file2, f2, flags); 443b4bca33fSmillert if (ostdout != -1) { 444b4bca33fSmillert int wstatus; 4454ec4b3d5Smillert 446b4bca33fSmillert /* close the pipe to pr and restore stdout */ 447b4bca33fSmillert fflush(stdout); 448b4bca33fSmillert rewind(stdout); 449b4bca33fSmillert if (ostdout != STDOUT_FILENO) { 450b4bca33fSmillert close(STDOUT_FILENO); 451b4bca33fSmillert dup2(ostdout, STDOUT_FILENO); 452b4bca33fSmillert close(ostdout); 453b4bca33fSmillert } 454b4bca33fSmillert waitpid(pid, &wstatus, 0); 45552e5bd6bSmillert } 4564ec4b3d5Smillert closem: 4575afc3be2Smillert if (anychange) { 4585afc3be2Smillert status |= 1; 4595afc3be2Smillert if (rval == D_SAME) 4605afc3be2Smillert rval = D_DIFFER; 4615afc3be2Smillert } 4624ec4b3d5Smillert if (f1 != NULL) 4634ec4b3d5Smillert fclose(f1); 4644ec4b3d5Smillert if (f2 != NULL) 4654ec4b3d5Smillert fclose(f2); 4667b6ec9e4Smillert if (file1 != ofile1) 4674a034c3aSray xfree(file1); 4687b6ec9e4Smillert if (file2 != ofile2) 4694a034c3aSray xfree(file2); 470*dba1d6eaSray 471b4bca33fSmillert return (rval); 4724ec4b3d5Smillert } 4734ec4b3d5Smillert 4744ec4b3d5Smillert /* 4754ec4b3d5Smillert * Check to see if the given files differ. 4764ec4b3d5Smillert * Returns 0 if they are the same, 1 if different, and -1 on error. 4774ec4b3d5Smillert * XXX - could use code from cmp(1) [faster] 4784ec4b3d5Smillert */ 4794ec4b3d5Smillert static int 4804ec4b3d5Smillert files_differ(FILE *f1, FILE *f2, int flags) 4814ec4b3d5Smillert { 4824ec4b3d5Smillert char buf1[BUFSIZ], buf2[BUFSIZ]; 4834ec4b3d5Smillert size_t i, j; 4844ec4b3d5Smillert 4854ec4b3d5Smillert if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size || 4864ec4b3d5Smillert (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT)) 4874ec4b3d5Smillert return (1); 4884ec4b3d5Smillert for (;;) { 4894ec4b3d5Smillert i = fread(buf1, 1, sizeof(buf1), f1); 4904ec4b3d5Smillert j = fread(buf2, 1, sizeof(buf2), f2); 4914ec4b3d5Smillert if (i != j) 4924ec4b3d5Smillert return (1); 4934ec4b3d5Smillert if (i == 0 && j == 0) { 4944ec4b3d5Smillert if (ferror(f1) || ferror(f2)) 4954ec4b3d5Smillert return (1); 4964ec4b3d5Smillert return (0); 4974ec4b3d5Smillert } 4984ec4b3d5Smillert if (memcmp(buf1, buf2, i) != 0) 4994ec4b3d5Smillert return (1); 5004ec4b3d5Smillert } 501ae8d569bSderaadt } 502ae8d569bSderaadt 5037b6ec9e4Smillert static FILE * 5047b6ec9e4Smillert opentemp(const char *file) 505ae8d569bSderaadt { 5067b6ec9e4Smillert char buf[BUFSIZ], *tempdir, tempfile[MAXPATHLEN]; 5077b6ec9e4Smillert ssize_t nread; 5087b6ec9e4Smillert int ifd, ofd; 50948b947b7Smillert 51048b947b7Smillert if (strcmp(file, "-") == 0) 51148b947b7Smillert ifd = STDIN_FILENO; 51266e5764eSmillert else if ((ifd = open(file, O_RDONLY, 0644)) < 0) 5134ec4b3d5Smillert return (NULL); 51448b947b7Smillert 51548b947b7Smillert if ((tempdir = getenv("TMPDIR")) == NULL) 51648b947b7Smillert tempdir = _PATH_TMP; 51709bddaddSotto 51809bddaddSotto if (strlcpy(tempfile, tempdir, sizeof(tempfile)) >= sizeof(tempfile) || 51909bddaddSotto strlcat(tempfile, "/diff.XXXXXXXX", sizeof(tempfile)) >= 52009bddaddSotto sizeof(tempfile)) { 5217b6ec9e4Smillert close(ifd); 5227b6ec9e4Smillert errno = ENAMETOOLONG; 5234ec4b3d5Smillert return (NULL); 524ae8d569bSderaadt } 5257b6ec9e4Smillert 5267b6ec9e4Smillert if ((ofd = mkstemp(tempfile)) < 0) 5277b6ec9e4Smillert return (NULL); 5287b6ec9e4Smillert unlink(tempfile); 5297b6ec9e4Smillert while ((nread = read(ifd, buf, BUFSIZ)) > 0) { 5307b6ec9e4Smillert if (write(ofd, buf, nread) != nread) { 53148b947b7Smillert close(ifd); 53248b947b7Smillert close(ofd); 5337b6ec9e4Smillert return (NULL); 5347b6ec9e4Smillert } 5357b6ec9e4Smillert } 5367b6ec9e4Smillert close(ifd); 537f28259e9Smillert lseek(ofd, (off_t)0, SEEK_SET); 5387b6ec9e4Smillert return (fdopen(ofd, "r")); 539ae8d569bSderaadt } 540ae8d569bSderaadt 541ae8d569bSderaadt char * 54226da422aStedu splice(char *dir, char *file) 543ae8d569bSderaadt { 54449dffe13Smillert char *tail, *buf; 545ae8d569bSderaadt 5467b6ec9e4Smillert if ((tail = strrchr(file, '/')) == NULL) 547ae8d569bSderaadt tail = file; 548ae8d569bSderaadt else 549ae8d569bSderaadt tail++; 5504a034c3aSray xasprintf(&buf, "%s/%s", dir, tail); 55149dffe13Smillert return (buf); 552ae8d569bSderaadt } 553ae8d569bSderaadt 55426da422aStedu static void 555*dba1d6eaSray prepare(int i, FILE *fd, off_t filesize, int flags) 556ae8d569bSderaadt { 55726da422aStedu struct line *p; 55826da422aStedu int j, h; 559739e7267Sotto size_t sz; 560ae8d569bSderaadt 5614ec4b3d5Smillert rewind(fd); 562739e7267Sotto 563739e7267Sotto sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25; 564739e7267Sotto if (sz < 100) 565a8013e93Sotto sz = 100; 566739e7267Sotto 567aa215433Sray p = xmalloc((sz + 3) * sizeof(*p)); 568*dba1d6eaSray for (j = 0; (h = readhash(fd, flags));) { 569a8013e93Sotto if (j == sz) { 570a8013e93Sotto sz = sz * 3 / 2; 571aa215433Sray p = xrealloc(p, sz + 3, sizeof(*p)); 572a8013e93Sotto } 573a8013e93Sotto p[++j].value = h; 574ae8d569bSderaadt } 575ae8d569bSderaadt len[i] = j; 576ae8d569bSderaadt file[i] = p; 577ae8d569bSderaadt } 578ae8d569bSderaadt 57926da422aStedu static void 58026da422aStedu prune(void) 581ae8d569bSderaadt { 58226da422aStedu int i, j; 58348b8c3e3Sderaadt 584ae8d569bSderaadt for (pref = 0; pref < len[0] && pref < len[1] && 585ae8d569bSderaadt file[0][pref + 1].value == file[1][pref + 1].value; 5867d2b2b91Sderaadt pref++) 5877d2b2b91Sderaadt ; 588ae8d569bSderaadt for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 589ae8d569bSderaadt file[0][len[0] - suff].value == file[1][len[1] - suff].value; 5907d2b2b91Sderaadt suff++) 5917d2b2b91Sderaadt ; 592ae8d569bSderaadt for (j = 0; j < 2; j++) { 593ae8d569bSderaadt sfile[j] = file[j] + pref; 594ae8d569bSderaadt slen[j] = len[j] - pref - suff; 595ae8d569bSderaadt for (i = 0; i <= slen[j]; i++) 596ae8d569bSderaadt sfile[j][i].serial = i; 597ae8d569bSderaadt } 598ae8d569bSderaadt } 599ae8d569bSderaadt 60026da422aStedu static void 60126da422aStedu equiv(struct line *a, int n, struct line *b, int m, int *c) 602ae8d569bSderaadt { 60326da422aStedu int i, j; 60448b8c3e3Sderaadt 605ae8d569bSderaadt i = j = 1; 606ae8d569bSderaadt while (i <= n && j <= m) { 607ae8d569bSderaadt if (a[i].value < b[j].value) 608ae8d569bSderaadt a[i++].value = 0; 609ae8d569bSderaadt else if (a[i].value == b[j].value) 610ae8d569bSderaadt a[i++].value = j; 611ae8d569bSderaadt else 612ae8d569bSderaadt j++; 613ae8d569bSderaadt } 614ae8d569bSderaadt while (i <= n) 615ae8d569bSderaadt a[i++].value = 0; 616ae8d569bSderaadt b[m + 1].value = 0; 617ae8d569bSderaadt j = 0; 618ae8d569bSderaadt while (++j <= m) { 619ae8d569bSderaadt c[j] = -b[j].serial; 620ae8d569bSderaadt while (b[j + 1].value == b[j].value) { 621ae8d569bSderaadt j++; 622ae8d569bSderaadt c[j] = b[j].serial; 623ae8d569bSderaadt } 624ae8d569bSderaadt } 625ae8d569bSderaadt c[j] = -1; 626ae8d569bSderaadt } 627ae8d569bSderaadt 6286e18f850Sotto /* Code taken from ping.c */ 6296e18f850Sotto static int 6306e18f850Sotto isqrt(int n) 6316e18f850Sotto { 6326e18f850Sotto int y, x = 1; 6336e18f850Sotto 6346e18f850Sotto if (n == 0) 6356e18f850Sotto return (0); 6366e18f850Sotto 6376e18f850Sotto do { /* newton was a stinker */ 6386e18f850Sotto y = x; 6396e18f850Sotto x = n / x; 6406e18f850Sotto x += y; 6416e18f850Sotto x /= 2; 6426e18f850Sotto } while ((x - y) > 1 || (x - y) < -1); 6436e18f850Sotto 6446e18f850Sotto return (x); 6456e18f850Sotto } 6466e18f850Sotto 64726da422aStedu static int 648*dba1d6eaSray stone(int *a, int n, int *b, int *c, int flags) 649ae8d569bSderaadt { 65048b8c3e3Sderaadt int i, k, y, j, l; 65148b8c3e3Sderaadt int oldc, tc, oldl; 6522a89a2f7Smillert u_int numtries; 6536e18f850Sotto 654*dba1d6eaSray /* XXX move the isqrt() out of the macro to avoid multiple calls */ 655*dba1d6eaSray const u_int bound = (flags & D_MINIMAL) ? UINT_MAX : 656*dba1d6eaSray MAX(256, isqrt(n)); 65748b8c3e3Sderaadt 658ae8d569bSderaadt k = 0; 659ae8d569bSderaadt c[0] = newcand(0, 0, 0); 660ae8d569bSderaadt for (i = 1; i <= n; i++) { 661ae8d569bSderaadt j = a[i]; 662ae8d569bSderaadt if (j == 0) 663ae8d569bSderaadt continue; 664ae8d569bSderaadt y = -b[j]; 665ae8d569bSderaadt oldl = 0; 666ae8d569bSderaadt oldc = c[0]; 6672a89a2f7Smillert numtries = 0; 668ae8d569bSderaadt do { 669ae8d569bSderaadt if (y <= clist[oldc].y) 670ae8d569bSderaadt continue; 671ae8d569bSderaadt l = search(c, k, y); 672ae8d569bSderaadt if (l != oldl + 1) 673ae8d569bSderaadt oldc = c[l - 1]; 674ae8d569bSderaadt if (l <= k) { 675ae8d569bSderaadt if (clist[c[l]].y <= y) 676ae8d569bSderaadt continue; 677ae8d569bSderaadt tc = c[l]; 678ae8d569bSderaadt c[l] = newcand(i, y, oldc); 679ae8d569bSderaadt oldc = tc; 680ae8d569bSderaadt oldl = l; 6812a89a2f7Smillert numtries++; 682ae8d569bSderaadt } else { 683ae8d569bSderaadt c[l] = newcand(i, y, oldc); 684ae8d569bSderaadt k++; 685ae8d569bSderaadt break; 686ae8d569bSderaadt } 6872a89a2f7Smillert } while ((y = b[++j]) > 0 && numtries < bound); 688ae8d569bSderaadt } 689ae8d569bSderaadt return (k); 690ae8d569bSderaadt } 691ae8d569bSderaadt 69226da422aStedu static int 69326da422aStedu newcand(int x, int y, int pred) 694ae8d569bSderaadt { 69526da422aStedu struct cand *q; 69626da422aStedu 6976e18f850Sotto if (clen == clistlen) { 6986e18f850Sotto clistlen = clistlen * 11 / 10; 699aa215433Sray clist = xrealloc(clist, clistlen, sizeof(*clist)); 7006e18f850Sotto } 7016e18f850Sotto q = clist + clen; 702ae8d569bSderaadt q->x = x; 703ae8d569bSderaadt q->y = y; 704ae8d569bSderaadt q->pred = pred; 7056e18f850Sotto return (clen++); 706ae8d569bSderaadt } 707ae8d569bSderaadt 70826da422aStedu static int 70926da422aStedu search(int *c, int k, int y) 710ae8d569bSderaadt { 71148b8c3e3Sderaadt int i, j, l, t; 71248b8c3e3Sderaadt 713ae8d569bSderaadt if (clist[c[k]].y < y) /* quick look for typical case */ 714ae8d569bSderaadt return (k + 1); 715ae8d569bSderaadt i = 0; 716ae8d569bSderaadt j = k + 1; 717*dba1d6eaSray for (;;) { 718*dba1d6eaSray l = (i + j) / 2; 719*dba1d6eaSray if (l <= i) 720ae8d569bSderaadt break; 721ae8d569bSderaadt t = clist[c[l]].y; 722ae8d569bSderaadt if (t > y) 723ae8d569bSderaadt j = l; 724ae8d569bSderaadt else if (t < y) 725ae8d569bSderaadt i = l; 726ae8d569bSderaadt else 727ae8d569bSderaadt return (l); 728ae8d569bSderaadt } 729ae8d569bSderaadt return (l + 1); 730ae8d569bSderaadt } 731ae8d569bSderaadt 73226da422aStedu static void 73326da422aStedu unravel(int p) 734ae8d569bSderaadt { 73526da422aStedu struct cand *q; 73648b8c3e3Sderaadt int i; 73748b8c3e3Sderaadt 738ae8d569bSderaadt for (i = 0; i <= len[0]; i++) 739ae8d569bSderaadt J[i] = i <= pref ? i : 7407d2b2b91Sderaadt i > len[0] - suff ? i + len[1] - len[0] : 0; 741ae8d569bSderaadt for (q = clist + p; q->y != 0; q = clist + q->pred) 742ae8d569bSderaadt J[q->x + pref] = q->y + pref; 743ae8d569bSderaadt } 744ae8d569bSderaadt 74526da422aStedu /* 74649dffe13Smillert * Check does double duty: 74749dffe13Smillert * 1. ferret out any fortuitous correspondences due 74849dffe13Smillert * to confounding by hashing (which result in "jackpot") 74949dffe13Smillert * 2. collect random access indexes to the two files 75026da422aStedu */ 75126da422aStedu static void 752*dba1d6eaSray check(char *file1, FILE *f1, char *file2, FILE *f2, int flags) 753ae8d569bSderaadt { 75448b8c3e3Sderaadt int i, j, jackpot, c, d; 755ae8d569bSderaadt long ctold, ctnew; 756ae8d569bSderaadt 7574ec4b3d5Smillert rewind(f1); 7584ec4b3d5Smillert rewind(f2); 759ae8d569bSderaadt j = 1; 760ae8d569bSderaadt ixold[0] = ixnew[0] = 0; 761ae8d569bSderaadt jackpot = 0; 762ae8d569bSderaadt ctold = ctnew = 0; 763ae8d569bSderaadt for (i = 1; i <= len[0]; i++) { 764ae8d569bSderaadt if (J[i] == 0) { 7654ec4b3d5Smillert ixold[i] = ctold += skipline(f1); 766ae8d569bSderaadt continue; 767ae8d569bSderaadt } 768ae8d569bSderaadt while (j < J[i]) { 7694ec4b3d5Smillert ixnew[j] = ctnew += skipline(f2); 770ae8d569bSderaadt j++; 771ae8d569bSderaadt } 772*dba1d6eaSray if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) { 773ae8d569bSderaadt for (;;) { 7744ec4b3d5Smillert c = getc(f1); 7754ec4b3d5Smillert d = getc(f2); 776bb34d48bSmillert /* 777bb34d48bSmillert * GNU diff ignores a missing newline 778*dba1d6eaSray * in one file for -b or -w. 779bb34d48bSmillert */ 780*dba1d6eaSray if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) && 781bb34d48bSmillert ((c == EOF && d == '\n') || 782bb34d48bSmillert (c == '\n' && d == EOF))) { 783bb34d48bSmillert break; 784bb34d48bSmillert } 785ae8d569bSderaadt ctold++; 786ae8d569bSderaadt ctnew++; 787*dba1d6eaSray if ((flags & D_FOLDBLANKS) && isspace(c) && 788*dba1d6eaSray isspace(d)) { 789ae8d569bSderaadt do { 790ae8d569bSderaadt if (c == '\n') 791ae8d569bSderaadt break; 792ae8d569bSderaadt ctold++; 7934ec4b3d5Smillert } while (isspace(c = getc(f1))); 794ae8d569bSderaadt do { 795ae8d569bSderaadt if (d == '\n') 796ae8d569bSderaadt break; 797ae8d569bSderaadt ctnew++; 7984ec4b3d5Smillert } while (isspace(d = getc(f2))); 799*dba1d6eaSray } else if ((flags & D_IGNOREBLANKS)) { 800ae8d569bSderaadt while (isspace(c) && c != '\n') { 8014ec4b3d5Smillert c = getc(f1); 802ae8d569bSderaadt ctold++; 803ae8d569bSderaadt } 804ae8d569bSderaadt while (isspace(d) && d != '\n') { 8054ec4b3d5Smillert d = getc(f2); 806ae8d569bSderaadt ctnew++; 807ae8d569bSderaadt } 808ae8d569bSderaadt } 809ae8d569bSderaadt if (chrtran[c] != chrtran[d]) { 810ae8d569bSderaadt jackpot++; 811ae8d569bSderaadt J[i] = 0; 812bb34d48bSmillert if (c != '\n' && c != EOF) 8134ec4b3d5Smillert ctold += skipline(f1); 814bb34d48bSmillert if (d != '\n' && c != EOF) 8154ec4b3d5Smillert ctnew += skipline(f2); 816ae8d569bSderaadt break; 817ae8d569bSderaadt } 818bb34d48bSmillert if (c == '\n' || c == EOF) 819ae8d569bSderaadt break; 820ae8d569bSderaadt } 821ae8d569bSderaadt } else { 822ae8d569bSderaadt for (;;) { 823ae8d569bSderaadt ctold++; 824ae8d569bSderaadt ctnew++; 8254ec4b3d5Smillert if ((c = getc(f1)) != (d = getc(f2))) { 826ae8d569bSderaadt /* jackpot++; */ 827ae8d569bSderaadt J[i] = 0; 828bb34d48bSmillert if (c != '\n' && c != EOF) 8294ec4b3d5Smillert ctold += skipline(f1); 830bb34d48bSmillert if (d != '\n' && c != EOF) 8314ec4b3d5Smillert ctnew += skipline(f2); 832ae8d569bSderaadt break; 833ae8d569bSderaadt } 834bb34d48bSmillert if (c == '\n' || c == EOF) 835ae8d569bSderaadt break; 836ae8d569bSderaadt } 837ae8d569bSderaadt } 838ae8d569bSderaadt ixold[i] = ctold; 839ae8d569bSderaadt ixnew[j] = ctnew; 840ae8d569bSderaadt j++; 841ae8d569bSderaadt } 8424ec4b3d5Smillert for (; j <= len[1]; j++) 8434ec4b3d5Smillert ixnew[j] = ctnew += skipline(f2); 844ae8d569bSderaadt /* 84548b8c3e3Sderaadt * if (jackpot) 84648b8c3e3Sderaadt * fprintf(stderr, "jackpot\n"); 847ae8d569bSderaadt */ 848ae8d569bSderaadt } 849ae8d569bSderaadt 85048b8c3e3Sderaadt /* shellsort CACM #201 */ 85126da422aStedu static void 85226da422aStedu sort(struct line *a, int n) 85348b8c3e3Sderaadt { 85448b8c3e3Sderaadt struct line *ai, *aim, w; 85548b8c3e3Sderaadt int j, m = 0, k; 856ae8d569bSderaadt 857ae8d569bSderaadt if (n == 0) 858ae8d569bSderaadt return; 859ae8d569bSderaadt for (j = 1; j <= n; j *= 2) 860ae8d569bSderaadt m = 2 * j - 1; 861ae8d569bSderaadt for (m /= 2; m != 0; m /= 2) { 862ae8d569bSderaadt k = n - m; 863ae8d569bSderaadt for (j = 1; j <= k; j++) { 864ae8d569bSderaadt for (ai = &a[j]; ai > a; ai -= m) { 865ae8d569bSderaadt aim = &ai[m]; 866ae8d569bSderaadt if (aim < ai) 867ae8d569bSderaadt break; /* wraparound */ 868ae8d569bSderaadt if (aim->value > ai[0].value || 86926da422aStedu (aim->value == ai[0].value && 87026da422aStedu aim->serial > ai[0].serial)) 871ae8d569bSderaadt break; 872ae8d569bSderaadt w.value = ai[0].value; 873ae8d569bSderaadt ai[0].value = aim->value; 874ae8d569bSderaadt aim->value = w.value; 875ae8d569bSderaadt w.serial = ai[0].serial; 876ae8d569bSderaadt ai[0].serial = aim->serial; 877ae8d569bSderaadt aim->serial = w.serial; 878ae8d569bSderaadt } 879ae8d569bSderaadt } 880ae8d569bSderaadt } 881ae8d569bSderaadt } 882ae8d569bSderaadt 88326da422aStedu static void 88426da422aStedu unsort(struct line *f, int l, int *b) 885ae8d569bSderaadt { 88648b8c3e3Sderaadt int *a, i; 88726da422aStedu 888aa215433Sray a = xmalloc((l + 1) * sizeof(*a)); 889ae8d569bSderaadt for (i = 1; i <= l; i++) 890ae8d569bSderaadt a[f[i].serial] = f[i].value; 891ae8d569bSderaadt for (i = 1; i <= l; i++) 892ae8d569bSderaadt b[i] = a[i]; 8934a034c3aSray xfree(a); 894ae8d569bSderaadt } 895ae8d569bSderaadt 89626da422aStedu static int 8974ec4b3d5Smillert skipline(FILE *f) 898ae8d569bSderaadt { 89926da422aStedu int i, c; 900ae8d569bSderaadt 901bb34d48bSmillert for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++) 902bb34d48bSmillert continue; 903ae8d569bSderaadt return (i); 904ae8d569bSderaadt } 905ae8d569bSderaadt 90626da422aStedu static void 907ac73e8e6Smillert output(char *file1, FILE *f1, char *file2, FILE *f2, int flags) 908ae8d569bSderaadt { 90948b8c3e3Sderaadt int m, i0, i1, j0, j1; 91048b8c3e3Sderaadt 9114ec4b3d5Smillert rewind(f1); 9124ec4b3d5Smillert rewind(f2); 913ae8d569bSderaadt m = len[0]; 914ae8d569bSderaadt J[0] = 0; 915ae8d569bSderaadt J[m + 1] = len[1] + 1; 9164ec4b3d5Smillert if (format != D_EDIT) { 91726da422aStedu for (i0 = 1; i0 <= m; i0 = i1 + 1) { 91826da422aStedu while (i0 <= m && J[i0] == J[i0 - 1] + 1) 91926da422aStedu i0++; 920ae8d569bSderaadt j0 = J[i0 - 1] + 1; 921ae8d569bSderaadt i1 = i0 - 1; 92226da422aStedu while (i1 < m && J[i1 + 1] == 0) 92326da422aStedu i1++; 924ae8d569bSderaadt j1 = J[i1 + 1] - 1; 925ae8d569bSderaadt J[i1] = j1; 92661783bcdSespie change(file1, f1, file2, f2, i0, i1, j0, j1, &flags); 92726da422aStedu } 92826da422aStedu } else { 92926da422aStedu for (i0 = m; i0 >= 1; i0 = i1 - 1) { 93026da422aStedu while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 93126da422aStedu i0--; 932ae8d569bSderaadt j0 = J[i0 + 1] - 1; 933ae8d569bSderaadt i1 = i0 + 1; 93426da422aStedu while (i1 > 1 && J[i1 - 1] == 0) 93526da422aStedu i1--; 936ae8d569bSderaadt j1 = J[i1 - 1] + 1; 937ae8d569bSderaadt J[i1] = j1; 93861783bcdSespie change(file1, f1, file2, f2, i1, i0, j1, j0, &flags); 939ae8d569bSderaadt } 94026da422aStedu } 941ae8d569bSderaadt if (m == 0) 94261783bcdSespie change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags); 9434ec4b3d5Smillert if (format == D_IFDEF) { 944ae8d569bSderaadt for (;;) { 945ae8d569bSderaadt #define c i0 946bb34d48bSmillert if ((c = getc(f1)) == EOF) 947ae8d569bSderaadt return; 948ae8d569bSderaadt putchar(c); 949ae8d569bSderaadt } 950ae8d569bSderaadt #undef c 951ae8d569bSderaadt } 9529de32c1bSmillert if (anychange != 0) { 9534ec4b3d5Smillert if (format == D_CONTEXT) 954*dba1d6eaSray dump_context_vec(f1, f2, flags); 9554ec4b3d5Smillert else if (format == D_UNIFIED) 956*dba1d6eaSray dump_unified_vec(f1, f2, flags); 9579de32c1bSmillert } 958ae8d569bSderaadt } 959ae8d569bSderaadt 9604a034c3aSray static void 961c72ea322Smillert range(int a, int b, char *separator) 962c72ea322Smillert { 963c72ea322Smillert printf("%d", a > b ? b : a); 964c72ea322Smillert if (a < b) 965c72ea322Smillert printf("%s%d", separator, b); 966c72ea322Smillert } 967c72ea322Smillert 9684a034c3aSray static void 969c72ea322Smillert uni_range(int a, int b) 970c72ea322Smillert { 971c72ea322Smillert if (a < b) 972c72ea322Smillert printf("%d,%d", a, b - a + 1); 973c72ea322Smillert else if (a == b) 974c72ea322Smillert printf("%d", b); 975c72ea322Smillert else 976c72ea322Smillert printf("%d,0", b); 977c72ea322Smillert } 978c72ea322Smillert 979ccd55a2cSotto static char * 980*dba1d6eaSray preadline(int fd, size_t rlen, off_t off) 981ccd55a2cSotto { 982ccd55a2cSotto char *line; 983ccd55a2cSotto ssize_t nr; 984ccd55a2cSotto 985*dba1d6eaSray line = xmalloc(rlen + 1); 986*dba1d6eaSray if ((nr = pread(fd, line, rlen, off)) < 0) 987ccd55a2cSotto err(1, "preadline"); 9888d981b00Sotto if (nr > 0 && line[nr-1] == '\n') 9898d981b00Sotto nr--; 990ccd55a2cSotto line[nr] = '\0'; 991ccd55a2cSotto return (line); 992ccd55a2cSotto } 993ccd55a2cSotto 994ccd55a2cSotto static int 995ccd55a2cSotto ignoreline(char *line) 996ccd55a2cSotto { 997ccd55a2cSotto int ret; 998ccd55a2cSotto 999ccd55a2cSotto ret = regexec(&ignore_re, line, 0, NULL, 0); 10004a034c3aSray xfree(line); 1001ccd55a2cSotto return (ret == 0); /* if it matched, it should be ignored. */ 1002ccd55a2cSotto } 1003ccd55a2cSotto 1004ae8d569bSderaadt /* 10054ec4b3d5Smillert * Indicate that there is a difference between lines a and b of the from file 100626da422aStedu * to get to lines c to d of the to file. If a is greater then b then there 100726da422aStedu * are no lines in the from file involved and this means that there were 100826da422aStedu * lines appended (beginning at b). If c is greater than d then there are 100926da422aStedu * lines missing from the to file. 1010ae8d569bSderaadt */ 101126da422aStedu static void 1012ac73e8e6Smillert change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d, 101361783bcdSespie int *pflags) 1014ae8d569bSderaadt { 10150efcb26eSmillert static size_t max_context = 64; 1016f8a6bf23Smillert int i; 10170efcb26eSmillert 1018f8a6bf23Smillert restart: 10194ec4b3d5Smillert if (format != D_IFDEF && a > b && c > d) 1020ae8d569bSderaadt return; 1021ccd55a2cSotto if (ignore_pats != NULL) { 1022ccd55a2cSotto char *line; 1023ccd55a2cSotto /* 1024ccd55a2cSotto * All lines in the change, insert, or delete must 1025ccd55a2cSotto * match an ignore pattern for the change to be 1026ccd55a2cSotto * ignored. 1027ccd55a2cSotto */ 1028ccd55a2cSotto if (a <= b) { /* Changes and deletes. */ 1029ccd55a2cSotto for (i = a; i <= b; i++) { 1030ccd55a2cSotto line = preadline(fileno(f1), 1031ccd55a2cSotto ixold[i] - ixold[i - 1], ixold[i - 1]); 1032ccd55a2cSotto if (!ignoreline(line)) 1033ccd55a2cSotto goto proceed; 1034ccd55a2cSotto } 1035ccd55a2cSotto } 1036ccd55a2cSotto if (a > b || c <= d) { /* Changes and inserts. */ 1037ccd55a2cSotto for (i = c; i <= d; i++) { 1038ccd55a2cSotto line = preadline(fileno(f2), 1039ccd55a2cSotto ixnew[i] - ixnew[i - 1], ixnew[i - 1]); 1040ccd55a2cSotto if (!ignoreline(line)) 1041ccd55a2cSotto goto proceed; 1042ccd55a2cSotto } 1043ccd55a2cSotto } 1044ccd55a2cSotto return; 1045ccd55a2cSotto } 1046ccd55a2cSotto proceed: 104761783bcdSespie if (*pflags & D_HEADER) { 1048ac73e8e6Smillert printf("%s %s %s\n", diffargs, file1, file2); 104961783bcdSespie *pflags &= ~D_HEADER; 105061783bcdSespie } 10514ec4b3d5Smillert if (format == D_CONTEXT || format == D_UNIFIED) { 10520efcb26eSmillert /* 10530efcb26eSmillert * Allocate change records as needed. 10540efcb26eSmillert */ 10550efcb26eSmillert if (context_vec_ptr == context_vec_end - 1) { 10560efcb26eSmillert ptrdiff_t offset = context_vec_ptr - context_vec_start; 10570efcb26eSmillert max_context <<= 1; 10584a034c3aSray context_vec_start = xrealloc(context_vec_start, 1059aa215433Sray max_context, sizeof(*context_vec_start)); 10600efcb26eSmillert context_vec_end = context_vec_start + max_context; 10610efcb26eSmillert context_vec_ptr = context_vec_start + offset; 10620efcb26eSmillert } 10630efcb26eSmillert if (anychange == 0) { 10640efcb26eSmillert /* 10650efcb26eSmillert * Print the context/unidiff header first time through. 10660efcb26eSmillert */ 10677bdb251cSmillert print_header(file1, file2); 10680efcb26eSmillert anychange = 1; 1069f02e3d86Sotto } else if (a > context_vec_ptr->b + (2 * context) + 1 && 1070f02e3d86Sotto c > context_vec_ptr->d + (2 * context) + 1) { 1071ae8d569bSderaadt /* 10720efcb26eSmillert * If this change is more than 'context' lines from the 10730efcb26eSmillert * previous change, dump the record and reset it. 1074ae8d569bSderaadt */ 10754ec4b3d5Smillert if (format == D_CONTEXT) 1076*dba1d6eaSray dump_context_vec(f1, f2, *pflags); 10779de32c1bSmillert else 1078*dba1d6eaSray dump_unified_vec(f1, f2, *pflags); 10799de32c1bSmillert } 1080ae8d569bSderaadt context_vec_ptr++; 1081ae8d569bSderaadt context_vec_ptr->a = a; 1082ae8d569bSderaadt context_vec_ptr->b = b; 1083ae8d569bSderaadt context_vec_ptr->c = c; 1084ae8d569bSderaadt context_vec_ptr->d = d; 1085ae8d569bSderaadt return; 1086ae8d569bSderaadt } 10870efcb26eSmillert if (anychange == 0) 10880efcb26eSmillert anychange = 1; 10894ec4b3d5Smillert switch (format) { 1090ae8d569bSderaadt 10915f9fc8aaSmillert case D_BRIEF: 10925f9fc8aaSmillert return; 1093ae8d569bSderaadt case D_NORMAL: 1094ae8d569bSderaadt case D_EDIT: 1095ae8d569bSderaadt range(a, b, ","); 1096ae8d569bSderaadt putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 10974ec4b3d5Smillert if (format == D_NORMAL) 1098ae8d569bSderaadt range(c, d, ","); 1099ae8d569bSderaadt putchar('\n'); 1100ae8d569bSderaadt break; 1101ae8d569bSderaadt case D_REVERSE: 1102ae8d569bSderaadt putchar(a > b ? 'a' : c > d ? 'd' : 'c'); 1103ae8d569bSderaadt range(a, b, " "); 1104ae8d569bSderaadt putchar('\n'); 1105ae8d569bSderaadt break; 1106ae8d569bSderaadt case D_NREVERSE: 1107ae8d569bSderaadt if (a > b) 1108ae8d569bSderaadt printf("a%d %d\n", b, d - c + 1); 1109ae8d569bSderaadt else { 1110ae8d569bSderaadt printf("d%d %d\n", a, b - a + 1); 1111ae8d569bSderaadt if (!(c > d)) 1112ae8d569bSderaadt /* add changed lines */ 1113ae8d569bSderaadt printf("a%d %d\n", b, d - c + 1); 1114ae8d569bSderaadt } 1115ae8d569bSderaadt break; 1116ae8d569bSderaadt } 11174ec4b3d5Smillert if (format == D_NORMAL || format == D_IFDEF) { 1118*dba1d6eaSray fetch(ixold, a, b, f1, '<', 1, *pflags); 11194ec4b3d5Smillert if (a <= b && c <= d && format == D_NORMAL) 11204ec4b3d5Smillert puts("---"); 1121ae8d569bSderaadt } 1122*dba1d6eaSray i = fetch(ixnew, c, d, f2, format == D_NORMAL ? '>' : '\0', 0, *pflags); 1123f8a6bf23Smillert if (i != 0 && format == D_EDIT) { 1124f8a6bf23Smillert /* 1125f8a6bf23Smillert * A non-zero return value for D_EDIT indicates that the 1126f8a6bf23Smillert * last line printed was a bare dot (".") that has been 1127f8a6bf23Smillert * escaped as ".." to prevent ed(1) from misinterpreting 1128f8a6bf23Smillert * it. We have to add a substitute command to change this 1129f8a6bf23Smillert * back and restart where we left off. 1130f8a6bf23Smillert */ 1131f8a6bf23Smillert puts("."); 1132f8a6bf23Smillert printf("%ds/^\\.\\././\n", a); 1133f8a6bf23Smillert a += i; 1134f8a6bf23Smillert c += i; 1135f8a6bf23Smillert goto restart; 1136f8a6bf23Smillert } 11374ec4b3d5Smillert if ((format == D_EDIT || format == D_REVERSE) && c <= d) 11384ec4b3d5Smillert puts("."); 1139ae8d569bSderaadt if (inifdef) { 1140b4bca33fSmillert printf("#endif /* %s */\n", ifdefname); 1141ae8d569bSderaadt inifdef = 0; 1142ae8d569bSderaadt } 1143ae8d569bSderaadt } 1144ae8d569bSderaadt 1145f8a6bf23Smillert static int 1146*dba1d6eaSray fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags) 1147ae8d569bSderaadt { 1148f8a6bf23Smillert int i, j, c, lastc, col, nc; 1149ae8d569bSderaadt 1150ae8d569bSderaadt /* 1151ae8d569bSderaadt * When doing #ifdef's, copy down to current line 1152ae8d569bSderaadt * if this is the first file, so that stuff makes it to output. 1153ae8d569bSderaadt */ 11544ec4b3d5Smillert if (format == D_IFDEF && oldfile) { 1155ae8d569bSderaadt long curpos = ftell(lb); 1156ae8d569bSderaadt /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 1157ae8d569bSderaadt nc = f[a > b ? b : a - 1] - curpos; 1158ae8d569bSderaadt for (i = 0; i < nc; i++) 1159ae8d569bSderaadt putchar(getc(lb)); 1160ae8d569bSderaadt } 1161ae8d569bSderaadt if (a > b) 1162f8a6bf23Smillert return (0); 11634ec4b3d5Smillert if (format == D_IFDEF) { 116490f56ad8Smillert if (inifdef) { 1165b4bca33fSmillert printf("#else /* %s%s */\n", 116690f56ad8Smillert oldfile == 1 ? "!" : "", ifdefname); 116726da422aStedu } else { 116890f56ad8Smillert if (oldfile) 1169b4bca33fSmillert printf("#ifndef %s\n", ifdefname); 117090f56ad8Smillert else 1171b4bca33fSmillert printf("#ifdef %s\n", ifdefname); 1172ae8d569bSderaadt } 1173ae8d569bSderaadt inifdef = 1 + oldfile; 1174ae8d569bSderaadt } 1175ae8d569bSderaadt for (i = a; i <= b; i++) { 117691cf91eeSderaadt fseek(lb, f[i - 1], SEEK_SET); 1177ae8d569bSderaadt nc = f[i] - f[i - 1]; 11781f9aa9e0Smillert if (format != D_IFDEF && ch != '\0') { 11791f9aa9e0Smillert putchar(ch); 11801f9aa9e0Smillert if (Tflag && (format == D_NORMAL || format == D_CONTEXT 11811f9aa9e0Smillert || format == D_UNIFIED)) 11821f9aa9e0Smillert putchar('\t'); 11831f9aa9e0Smillert else if (format != D_UNIFIED) 11841f9aa9e0Smillert putchar(' '); 11851f9aa9e0Smillert } 1186ae8d569bSderaadt col = 0; 1187f8a6bf23Smillert for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) { 1188bb34d48bSmillert if ((c = getc(lb)) == EOF) { 1189643dc60cSmillert if (format == D_EDIT || format == D_REVERSE || 1190643dc60cSmillert format == D_NREVERSE) 1191643dc60cSmillert warnx("No newline at end of file"); 1192643dc60cSmillert else 1193bb34d48bSmillert puts("\n\\ No newline at end of file"); 1194643dc60cSmillert return (0); 1195bb34d48bSmillert } 1196*dba1d6eaSray if (c == '\t' && (flags & D_EXPANDTABS)) { 1197bb34d48bSmillert do { 1198ae8d569bSderaadt putchar(' '); 1199bb34d48bSmillert } while (++col & 7); 1200bb34d48bSmillert } else { 1201f8a6bf23Smillert if (format == D_EDIT && j == 1 && c == '\n' 1202f8a6bf23Smillert && lastc == '.') { 1203f8a6bf23Smillert /* 1204f8a6bf23Smillert * Don't print a bare "." line 1205f8a6bf23Smillert * since that will confuse ed(1). 1206f8a6bf23Smillert * Print ".." instead and return, 1207f8a6bf23Smillert * giving the caller an offset 1208f8a6bf23Smillert * from which to restart. 1209f8a6bf23Smillert */ 1210f8a6bf23Smillert puts("."); 1211f8a6bf23Smillert return (i - a + 1); 1212f8a6bf23Smillert } 1213ae8d569bSderaadt putchar(c); 1214ae8d569bSderaadt col++; 1215ae8d569bSderaadt } 1216ae8d569bSderaadt } 1217ae8d569bSderaadt } 1218f8a6bf23Smillert return (0); 1219ae8d569bSderaadt } 1220ae8d569bSderaadt 1221ae8d569bSderaadt /* 1222a8013e93Sotto * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. 1223ae8d569bSderaadt */ 122426da422aStedu static int 1225*dba1d6eaSray readhash(FILE *f, int flags) 1226ae8d569bSderaadt { 1227a8013e93Sotto int i, t, space; 1228a8013e93Sotto int sum; 1229ae8d569bSderaadt 1230ae8d569bSderaadt sum = 1; 1231ae8d569bSderaadt space = 0; 1232*dba1d6eaSray if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) { 1233*dba1d6eaSray if (flags & D_IGNORECASE) 1234a8013e93Sotto for (i = 0; (t = getc(f)) != '\n'; i++) { 1235bb34d48bSmillert if (t == EOF) { 1236a8013e93Sotto if (i == 0) 1237ae8d569bSderaadt return (0); 1238bb34d48bSmillert break; 1239bb34d48bSmillert } 1240a8013e93Sotto sum = sum * 127 + chrtran[t]; 1241ae8d569bSderaadt } 1242ae8d569bSderaadt else 1243a8013e93Sotto for (i = 0; (t = getc(f)) != '\n'; i++) { 1244bb34d48bSmillert if (t == EOF) { 1245a8013e93Sotto if (i == 0) 1246ae8d569bSderaadt return (0); 1247bb34d48bSmillert break; 1248bb34d48bSmillert } 1249a8013e93Sotto sum = sum * 127 + t; 1250ae8d569bSderaadt } 1251ae8d569bSderaadt } else { 1252a8013e93Sotto for (i = 0;;) { 1253ae8d569bSderaadt switch (t = getc(f)) { 1254ae8d569bSderaadt case '\t': 1255b5b605d5Sotto case '\r': 1256b5b605d5Sotto case '\v': 1257b5b605d5Sotto case '\f': 1258ae8d569bSderaadt case ' ': 1259ae8d569bSderaadt space++; 1260ae8d569bSderaadt continue; 1261ae8d569bSderaadt default: 1262*dba1d6eaSray if (space && (flags & D_IGNOREBLANKS) == 0) { 1263a8013e93Sotto i++; 1264ae8d569bSderaadt space = 0; 1265ae8d569bSderaadt } 1266a8013e93Sotto sum = sum * 127 + chrtran[t]; 1267a8013e93Sotto i++; 1268ae8d569bSderaadt continue; 1269bb34d48bSmillert case EOF: 1270a8013e93Sotto if (i == 0) 1271bb34d48bSmillert return (0); 1272bb34d48bSmillert /* FALLTHROUGH */ 1273ae8d569bSderaadt case '\n': 1274ae8d569bSderaadt break; 1275ae8d569bSderaadt } 1276ae8d569bSderaadt break; 1277ae8d569bSderaadt } 1278ae8d569bSderaadt } 1279a8013e93Sotto /* 1280a8013e93Sotto * There is a remote possibility that we end up with a zero sum. 1281a8013e93Sotto * Zero is used as an EOF marker, so return 1 instead. 1282a8013e93Sotto */ 1283a8013e93Sotto return (sum == 0 ? 1 : sum); 1284ae8d569bSderaadt } 1285ae8d569bSderaadt 1286774cb253Savsm static int 128726da422aStedu asciifile(FILE *f) 1288ae8d569bSderaadt { 1289063293f0Sotto unsigned char buf[BUFSIZ]; 1290*dba1d6eaSray size_t i, cnt; 1291ae8d569bSderaadt 1292*dba1d6eaSray if (f == NULL) 12932a1593dfStedu return (1); 12942a1593dfStedu 12954ec4b3d5Smillert rewind(f); 12964ec4b3d5Smillert cnt = fread(buf, 1, sizeof(buf), f); 12974640f069Stedu for (i = 0; i < cnt; i++) 1298ed58cb82Stedu if (!isprint(buf[i]) && !isspace(buf[i])) 1299ae8d569bSderaadt return (0); 1300ae8d569bSderaadt return (1); 1301ae8d569bSderaadt } 1302ae8d569bSderaadt 13035c68ba7eSespie #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0) 13045c68ba7eSespie 130596e45528Sotto static char * 1306*dba1d6eaSray match_function(const long *f, int pos, FILE *fp) 130796e45528Sotto { 1308063293f0Sotto unsigned char buf[FUNCTION_CONTEXT_SIZE]; 130996e45528Sotto size_t nc; 131096e45528Sotto int last = lastline; 13115c68ba7eSespie char *state = NULL; 131296e45528Sotto 131396e45528Sotto lastline = pos; 131496e45528Sotto while (pos > last) { 1315*dba1d6eaSray fseek(fp, f[pos - 1], SEEK_SET); 131696e45528Sotto nc = f[pos] - f[pos - 1]; 131796e45528Sotto if (nc >= sizeof(buf)) 131896e45528Sotto nc = sizeof(buf) - 1; 1319*dba1d6eaSray nc = fread(buf, 1, nc, fp); 132096e45528Sotto if (nc > 0) { 132196e45528Sotto buf[nc] = '\0'; 13224fd6ed32Sgilles buf[strcspn(buf, "\n")] = '\0'; 13234fd6ed32Sgilles 132496e45528Sotto if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') { 13255c68ba7eSespie if (begins_with(buf, "private:")) { 13265c68ba7eSespie if (!state) 13275c68ba7eSespie state = " (private)"; 13285c68ba7eSespie } else if (begins_with(buf, "protected:")) { 13295c68ba7eSespie if (!state) 13305c68ba7eSespie state = " (protected)"; 13315c68ba7eSespie } else if (begins_with(buf, "public:")) { 13325c68ba7eSespie if (!state) 13335c68ba7eSespie state = " (public)"; 13345c68ba7eSespie } else { 133596e45528Sotto strlcpy(lastbuf, buf, sizeof lastbuf); 13365c68ba7eSespie if (state) 13375c68ba7eSespie strlcat(lastbuf, state, 13385c68ba7eSespie sizeof lastbuf); 133996e45528Sotto lastmatchline = pos; 134096e45528Sotto return lastbuf; 134196e45528Sotto } 134296e45528Sotto } 13435c68ba7eSespie } 134496e45528Sotto pos--; 134596e45528Sotto } 134696e45528Sotto return lastmatchline > 0 ? lastbuf : NULL; 134796e45528Sotto } 134896e45528Sotto 1349ae8d569bSderaadt /* dump accumulated "context" diff changes */ 135026da422aStedu static void 1351*dba1d6eaSray dump_context_vec(FILE *f1, FILE *f2, int flags) 1352ae8d569bSderaadt { 135348b8c3e3Sderaadt struct context_vec *cvp = context_vec_start; 135448b8c3e3Sderaadt int lowa, upb, lowc, upd, do_output; 135526da422aStedu int a, b, c, d; 135696e45528Sotto char ch, *f; 1357ae8d569bSderaadt 135890f56ad8Smillert if (context_vec_start > context_vec_ptr) 1359ae8d569bSderaadt return; 1360ae8d569bSderaadt 136126da422aStedu b = d = 0; /* gcc */ 13624a034c3aSray lowa = MAX(1, cvp->a - context); 13634a034c3aSray upb = MIN(len[0], context_vec_ptr->b + context); 13644a034c3aSray lowc = MAX(1, cvp->c - context); 13654a034c3aSray upd = MIN(len[1], context_vec_ptr->d + context); 1366ae8d569bSderaadt 136796e45528Sotto printf("***************"); 1368*dba1d6eaSray if ((flags & D_PROTOTYPE)) { 136996e45528Sotto f = match_function(ixold, lowa-1, f1); 137096e45528Sotto if (f != NULL) { 137196e45528Sotto putchar(' '); 137296e45528Sotto fputs(f, stdout); 137396e45528Sotto } 137496e45528Sotto } 137596e45528Sotto printf("\n*** "); 1376ae8d569bSderaadt range(lowa, upb, ","); 1377ae8d569bSderaadt printf(" ****\n"); 1378ae8d569bSderaadt 1379ae8d569bSderaadt /* 13804ec4b3d5Smillert * Output changes to the "old" file. The first loop suppresses 1381ae8d569bSderaadt * output if there were no changes to the "old" file (we'll see 1382ae8d569bSderaadt * the "old" lines as context in the "new" list). 1383ae8d569bSderaadt */ 1384ae8d569bSderaadt do_output = 0; 1385ae8d569bSderaadt for (; cvp <= context_vec_ptr; cvp++) 1386ae8d569bSderaadt if (cvp->a <= cvp->b) { 1387ae8d569bSderaadt cvp = context_vec_start; 1388ae8d569bSderaadt do_output++; 1389ae8d569bSderaadt break; 1390ae8d569bSderaadt } 1391ae8d569bSderaadt if (do_output) { 1392ae8d569bSderaadt while (cvp <= context_vec_ptr) { 139326da422aStedu a = cvp->a; 139426da422aStedu b = cvp->b; 139526da422aStedu c = cvp->c; 139626da422aStedu d = cvp->d; 1397ae8d569bSderaadt 1398ae8d569bSderaadt if (a <= b && c <= d) 1399ae8d569bSderaadt ch = 'c'; 1400ae8d569bSderaadt else 1401ae8d569bSderaadt ch = (a <= b) ? 'd' : 'a'; 1402ae8d569bSderaadt 1403ae8d569bSderaadt if (ch == 'a') 1404*dba1d6eaSray fetch(ixold, lowa, b, f1, ' ', 0, flags); 1405ae8d569bSderaadt else { 1406*dba1d6eaSray fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 14074ec4b3d5Smillert fetch(ixold, a, b, f1, 1408*dba1d6eaSray ch == 'c' ? '!' : '-', 0, flags); 1409ae8d569bSderaadt } 1410ae8d569bSderaadt lowa = b + 1; 1411ae8d569bSderaadt cvp++; 1412ae8d569bSderaadt } 1413*dba1d6eaSray fetch(ixold, b + 1, upb, f1, ' ', 0, flags); 1414ae8d569bSderaadt } 1415ae8d569bSderaadt /* output changes to the "new" file */ 1416ae8d569bSderaadt printf("--- "); 1417ae8d569bSderaadt range(lowc, upd, ","); 1418ae8d569bSderaadt printf(" ----\n"); 1419ae8d569bSderaadt 1420ae8d569bSderaadt do_output = 0; 1421ae8d569bSderaadt for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1422ae8d569bSderaadt if (cvp->c <= cvp->d) { 1423ae8d569bSderaadt cvp = context_vec_start; 1424ae8d569bSderaadt do_output++; 1425ae8d569bSderaadt break; 1426ae8d569bSderaadt } 1427ae8d569bSderaadt if (do_output) { 1428ae8d569bSderaadt while (cvp <= context_vec_ptr) { 142926da422aStedu a = cvp->a; 143026da422aStedu b = cvp->b; 143126da422aStedu c = cvp->c; 143226da422aStedu d = cvp->d; 1433ae8d569bSderaadt 1434ae8d569bSderaadt if (a <= b && c <= d) 1435ae8d569bSderaadt ch = 'c'; 1436ae8d569bSderaadt else 1437ae8d569bSderaadt ch = (a <= b) ? 'd' : 'a'; 1438ae8d569bSderaadt 1439ae8d569bSderaadt if (ch == 'd') 1440*dba1d6eaSray fetch(ixnew, lowc, d, f2, ' ', 0, flags); 1441ae8d569bSderaadt else { 1442*dba1d6eaSray fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); 14434ec4b3d5Smillert fetch(ixnew, c, d, f2, 1444*dba1d6eaSray ch == 'c' ? '!' : '+', 0, flags); 1445ae8d569bSderaadt } 1446ae8d569bSderaadt lowc = d + 1; 1447ae8d569bSderaadt cvp++; 1448ae8d569bSderaadt } 1449*dba1d6eaSray fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); 1450ae8d569bSderaadt } 1451ae8d569bSderaadt context_vec_ptr = context_vec_start - 1; 1452ae8d569bSderaadt } 14539de32c1bSmillert 14549de32c1bSmillert /* dump accumulated "unified" diff changes */ 14559de32c1bSmillert static void 1456*dba1d6eaSray dump_unified_vec(FILE *f1, FILE *f2, int flags) 14579de32c1bSmillert { 14589de32c1bSmillert struct context_vec *cvp = context_vec_start; 14599de32c1bSmillert int lowa, upb, lowc, upd; 14609de32c1bSmillert int a, b, c, d; 146196e45528Sotto char ch, *f; 14629de32c1bSmillert 146390f56ad8Smillert if (context_vec_start > context_vec_ptr) 14649de32c1bSmillert return; 14659de32c1bSmillert 14669de32c1bSmillert b = d = 0; /* gcc */ 14674a034c3aSray lowa = MAX(1, cvp->a - context); 14684a034c3aSray upb = MIN(len[0], context_vec_ptr->b + context); 14694a034c3aSray lowc = MAX(1, cvp->c - context); 14704a034c3aSray upd = MIN(len[1], context_vec_ptr->d + context); 14719de32c1bSmillert 1472c72ea322Smillert fputs("@@ -", stdout); 1473c72ea322Smillert uni_range(lowa, upb); 1474c72ea322Smillert fputs(" +", stdout); 1475c72ea322Smillert uni_range(lowc, upd); 147696e45528Sotto fputs(" @@", stdout); 1477*dba1d6eaSray if ((flags & D_PROTOTYPE)) { 147896e45528Sotto f = match_function(ixold, lowa-1, f1); 147996e45528Sotto if (f != NULL) { 148096e45528Sotto putchar(' '); 148196e45528Sotto fputs(f, stdout); 148296e45528Sotto } 148396e45528Sotto } 148496e45528Sotto putchar('\n'); 14859de32c1bSmillert 14869de32c1bSmillert /* 14879de32c1bSmillert * Output changes in "unified" diff format--the old and new lines 14889de32c1bSmillert * are printed together. 14899de32c1bSmillert */ 14909de32c1bSmillert for (; cvp <= context_vec_ptr; cvp++) { 14919de32c1bSmillert a = cvp->a; 14929de32c1bSmillert b = cvp->b; 14939de32c1bSmillert c = cvp->c; 14949de32c1bSmillert d = cvp->d; 14959de32c1bSmillert 14969de32c1bSmillert /* 14979de32c1bSmillert * c: both new and old changes 14989de32c1bSmillert * d: only changes in the old file 14999de32c1bSmillert * a: only changes in the new file 15009de32c1bSmillert */ 15019de32c1bSmillert if (a <= b && c <= d) 15029de32c1bSmillert ch = 'c'; 15039de32c1bSmillert else 15049de32c1bSmillert ch = (a <= b) ? 'd' : 'a'; 15059de32c1bSmillert 15069de32c1bSmillert switch (ch) { 15079de32c1bSmillert case 'c': 1508*dba1d6eaSray fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1509*dba1d6eaSray fetch(ixold, a, b, f1, '-', 0, flags); 1510*dba1d6eaSray fetch(ixnew, c, d, f2, '+', 0, flags); 15119de32c1bSmillert break; 15129de32c1bSmillert case 'd': 1513*dba1d6eaSray fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1514*dba1d6eaSray fetch(ixold, a, b, f1, '-', 0, flags); 15159de32c1bSmillert break; 15169de32c1bSmillert case 'a': 1517*dba1d6eaSray fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); 1518*dba1d6eaSray fetch(ixnew, c, d, f2, '+', 0, flags); 15199de32c1bSmillert break; 15209de32c1bSmillert } 15219de32c1bSmillert lowa = b + 1; 15229de32c1bSmillert lowc = d + 1; 15239de32c1bSmillert } 1524*dba1d6eaSray fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); 15259de32c1bSmillert 15269de32c1bSmillert context_vec_ptr = context_vec_start - 1; 15279de32c1bSmillert } 15287bdb251cSmillert 15297bdb251cSmillert static void 15307bdb251cSmillert print_header(const char *file1, const char *file2) 15317bdb251cSmillert { 15327bdb251cSmillert if (label[0] != NULL) 15337bdb251cSmillert printf("%s %s\n", format == D_CONTEXT ? "***" : "---", 15347bdb251cSmillert label[0]); 15357bdb251cSmillert else 15367bdb251cSmillert printf("%s %s\t%s", format == D_CONTEXT ? "***" : "---", 15377bdb251cSmillert file1, ctime(&stb1.st_mtime)); 15387bdb251cSmillert if (label[1] != NULL) 15397bdb251cSmillert printf("%s %s\n", format == D_CONTEXT ? "---" : "+++", 15407bdb251cSmillert label[1]); 15417bdb251cSmillert else 15427bdb251cSmillert printf("%s %s\t%s", format == D_CONTEXT ? "---" : "+++", 15437bdb251cSmillert file2, ctime(&stb2.st_mtime)); 15447bdb251cSmillert } 1545