1 /* $OpenBSD: diffreg.c,v 1.74 2010/03/22 19:33:19 schwarze Exp $ */
2
3 /*
4 * Copyright (C) Caldera International Inc. 2001-2002.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code and documentation must retain the above
11 * copyright notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed or owned by Caldera
18 * International, Inc.
19 * 4. Neither the name of Caldera International, Inc. nor the names of other
20 * contributors may be used to endorse or promote products derived from
21 * this software without specific prior written permission.
22 *
23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 */
36 /*-
37 * Copyright (c) 1991, 1993
38 * The Regents of the University of California. All rights reserved.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93
65 */
66
67 #include <sys/param.h>
68 #include <sys/stat.h>
69 #include <sys/wait.h>
70
71 #include <ctype.h>
72 #include <err.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <stddef.h>
76 #include <stdio.h>
77 #include <stdlib.h>
78 #include <string.h>
79 #include <unistd.h>
80 #include <string.h>
81 #include <time.h>
82
83 #include "diff.h"
84 #include "pathnames.h"
85 #include "xmalloc.h"
86
87 /*
88 * diff - compare two files.
89 */
90
91 /*
92 * Uses an algorithm due to Harold Stone, which finds
93 * a pair of longest identical subsequences in the two
94 * files.
95 *
96 * The major goal is to generate the match vector J.
97 * J[i] is the index of the line in file1 corresponding
98 * to line i file0. J[i] = 0 if there is no
99 * such line in file1.
100 *
101 * Lines are hashed so as to work in core. All potential
102 * matches are located by sorting the lines of each file
103 * on the hash (called ``value''). In particular, this
104 * collects the equivalence classes in file1 together.
105 * Subroutine equiv replaces the value of each line in
106 * file0 by the index of the first element of its
107 * matching equivalence in (the reordered) file1.
108 * To save space equiv squeezes file1 into a single
109 * array member in which the equivalence classes
110 * are simply concatenated, except that their first
111 * members are flagged by changing sign.
112 *
113 * Next the indices that point into member are unsorted into
114 * array class according to the original order of file0.
115 *
116 * The cleverness lies in routine stone. This marches
117 * through the lines of file0, developing a vector klist
118 * of "k-candidates". At step i a k-candidate is a matched
119 * pair of lines x,y (x in file0 y in file1) such that
120 * there is a common subsequence of length k
121 * between the first i lines of file0 and the first y
122 * lines of file1, but there is no such subsequence for
123 * any smaller y. x is the earliest possible mate to y
124 * that occurs in such a subsequence.
125 *
126 * Whenever any of the members of the equivalence class of
127 * lines in file1 matable to a line in file0 has serial number
128 * less than the y of some k-candidate, that k-candidate
129 * with the smallest such y is replaced. The new
130 * k-candidate is chained (via pred) to the current
131 * k-1 candidate so that the actual subsequence can
132 * be recovered. When a member has serial number greater
133 * that the y of all k-candidates, the klist is extended.
134 * At the end, the longest subsequence is pulled out
135 * and placed in the array J by unravel
136 *
137 * With J in hand, the matches there recorded are
138 * check'ed against reality to assure that no spurious
139 * matches have crept in due to hashing. If they have,
140 * they are broken, and "jackpot" is recorded--a harmless
141 * matter except that a true match for a spuriously
142 * mated line may now be unnecessarily reported as a change.
143 *
144 * Much of the complexity of the program comes simply
145 * from trying to minimize core utilization and
146 * maximize the range of doable problems by dynamically
147 * allocating what is needed and reusing what is not.
148 * The core requirements for problems larger than somewhat
149 * are (in words) 2*length(file0) + length(file1) +
150 * 3*(number of k-candidates installed), typically about
151 * 6n words for files of length n.
152 */
153
154 struct cand {
155 int x;
156 int y;
157 int pred;
158 };
159
160 struct line {
161 int serial;
162 int value;
163 } *file[2];
164
165 /*
166 * The following struct is used to record change information when
167 * doing a "context" or "unified" diff. (see routine "change" to
168 * understand the highly mnemonic field names)
169 */
170 struct context_vec {
171 int a; /* start line in old file */
172 int b; /* end line in old file */
173 int c; /* start line in new file */
174 int d; /* end line in new file */
175 };
176
177 static FILE *opentemp(const char *);
178 static void output(char *, FILE *, char *, FILE *, int);
179 static void check(char *, FILE *, char *, FILE *, int);
180 static void range(int, int, char *);
181 static void uni_range(int, int);
182 static void dump_context_vec(FILE *, FILE *, int);
183 static void dump_unified_vec(FILE *, FILE *, int);
184 static void prepare(int, FILE *, off_t, int);
185 static void prune(void);
186 static void equiv(struct line *, int, struct line *, int, int *);
187 static void unravel(int);
188 static void unsort(struct line *, int, int *);
189 static void change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
190 static void sort(struct line *, int);
191 static void print_header(const char *, const char *);
192 static int ignoreline(char *);
193 static int asciifile(FILE *);
194 static int fetch(long *, int, int, FILE *, int, int, int);
195 static int newcand(int, int, int);
196 static int search(int *, int, int);
197 static int skipline(FILE *);
198 static int isqrt(int);
199 static int stone(int *, int, int *, int *, int);
200 static int readhash(FILE *, int);
201 static int files_differ(FILE *, FILE *, int);
202 static char *match_function(const long *, int, FILE *);
203 static char *preadline(int, size_t, off_t);
204
205 static int *J; /* will be overlaid on class */
206 static int *class; /* will be overlaid on file[0] */
207 static int *klist; /* will be overlaid on file[0] after class */
208 static int *member; /* will be overlaid on file[1] */
209 static int clen;
210 static int inifdef; /* whether or not we are in a #ifdef block */
211 static int len[2];
212 static int pref, suff; /* length of prefix and suffix */
213 static int slen[2];
214 static int anychange;
215 static long *ixnew; /* will be overlaid on file[1] */
216 static long *ixold; /* will be overlaid on klist */
217 static struct cand *clist; /* merely a free storage pot for candidates */
218 static int clistlen; /* the length of clist */
219 static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */
220 static u_char *chrtran; /* translation table for case-folding */
221 static struct context_vec *context_vec_start;
222 static struct context_vec *context_vec_end;
223 static struct context_vec *context_vec_ptr;
224
225 #define FUNCTION_CONTEXT_SIZE 55
226 static char lastbuf[FUNCTION_CONTEXT_SIZE];
227 static int lastline;
228 static int lastmatchline;
229
230
231 /*
232 * chrtran points to one of 2 translation tables: cup2low if folding upper to
233 * lower case clow2low if not folding case
234 */
235 u_char clow2low[256] = {
236 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
237 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
238 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
239 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
240 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
241 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
242 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
243 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
244 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
245 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
246 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
247 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
248 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
249 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
250 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
251 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
252 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
253 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
254 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
255 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
256 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
257 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
258 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
259 0xfd, 0xfe, 0xff
260 };
261
262 u_char cup2low[256] = {
263 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
264 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
265 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
266 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
267 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
268 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
269 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
270 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
271 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
272 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
273 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
274 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
275 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
276 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
277 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
278 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
279 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
280 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
281 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
282 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
283 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
284 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
285 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
286 0xfd, 0xfe, 0xff
287 };
288
289 int
diffreg(char * file1,char * file2,int flags)290 diffreg(char *file1, char *file2, int flags)
291 {
292 FILE *f1, *f2;
293 int i, rval, ostdout = -1;
294 pid_t pid = -1;
295
296 f1 = f2 = NULL;
297 rval = D_SAME;
298 anychange = 0;
299 lastline = 0;
300 lastmatchline = 0;
301 context_vec_ptr = context_vec_start - 1;
302 if (flags & D_IGNORECASE)
303 chrtran = cup2low;
304 else
305 chrtran = clow2low;
306 if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
307 return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
308 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
309 goto closem;
310
311 if (flags & D_EMPTY1)
312 f1 = fopen(_PATH_DEVNULL, "r");
313 else {
314 if (!S_ISREG(stb1.st_mode)) {
315 if ((f1 = opentemp(file1)) == NULL ||
316 fstat(fileno(f1), &stb1) < 0) {
317 warn("%s", file1);
318 status |= 2;
319 goto closem;
320 }
321 } else if (strcmp(file1, "-") == 0)
322 f1 = stdin;
323 else
324 f1 = fopen(file1, "r");
325 }
326 if (f1 == NULL) {
327 warn("%s", file1);
328 status |= 2;
329 goto closem;
330 }
331
332 if (flags & D_EMPTY2)
333 f2 = fopen(_PATH_DEVNULL, "r");
334 else {
335 if (!S_ISREG(stb2.st_mode)) {
336 if ((f2 = opentemp(file2)) == NULL ||
337 fstat(fileno(f2), &stb2) < 0) {
338 warn("%s", file2);
339 status |= 2;
340 goto closem;
341 }
342 } else if (strcmp(file2, "-") == 0)
343 f2 = stdin;
344 else
345 f2 = fopen(file2, "r");
346 }
347 if (f2 == NULL) {
348 warn("%s", file2);
349 status |= 2;
350 goto closem;
351 }
352
353 switch (files_differ(f1, f2, flags)) {
354 case 0:
355 goto closem;
356 case 1:
357 break;
358 default:
359 /* error */
360 status |= 2;
361 goto closem;
362 }
363
364 if ((flags & D_FORCEASCII) == 0 &&
365 (!asciifile(f1) || !asciifile(f2))) {
366 rval = D_BINARY;
367 status |= 1;
368 goto closem;
369 }
370 if (lflag) {
371 /* redirect stdout to pr */
372 int pfd[2];
373 char *header;
374 int len;
375 char *prargv[] = { "pr", "-h", NULL, "-f", NULL };
376
377 len = strlen(diffargs) + strlen(file1) + strlen(file2) + 10;
378 if(!(header = malloc(len)))
379 errx(1, "diffreg can't alloc");
380 snprintf(header, len, "%s %s %s", diffargs, file1, file2);
381 prargv[2] = header;
382 fflush(stdout);
383 rewind(stdout);
384 pipe(pfd);
385 switch ((pid = fork())) {
386 case -1:
387 warnx("No more processes");
388 status |= 2;
389 xfree(header);
390 return (D_ERROR);
391 case 0:
392 /* child */
393 if (pfd[0] != STDIN_FILENO) {
394 dup2(pfd[0], STDIN_FILENO);
395 close(pfd[0]);
396 }
397 close(pfd[1]);
398 execv(_PATH_PR, prargv);
399 _exit(127);
400 default:
401 /* parent */
402 if (pfd[1] != STDOUT_FILENO) {
403 ostdout = dup(STDOUT_FILENO);
404 dup2(pfd[1], STDOUT_FILENO);
405 close(pfd[1]);
406 }
407 close(pfd[0]);
408 rewind(stdout);
409 xfree(header);
410 }
411 }
412 prepare(0, f1, stb1.st_size, flags);
413 prepare(1, f2, stb2.st_size, flags);
414
415 prune();
416 sort(sfile[0], slen[0]);
417 sort(sfile[1], slen[1]);
418
419 member = (int *)file[1];
420 equiv(sfile[0], slen[0], sfile[1], slen[1], member);
421 member = xrealloc(member, slen[1] + 2, sizeof(*member));
422
423 class = (int *)file[0];
424 unsort(sfile[0], slen[0], class);
425 class = xrealloc(class, slen[0] + 2, sizeof(*class));
426
427 klist = xcalloc(slen[0] + 2, sizeof(*klist));
428 clen = 0;
429 clistlen = 100;
430 clist = xcalloc(clistlen, sizeof(*clist));
431 i = stone(class, slen[0], member, klist, flags);
432 xfree(member);
433 xfree(class);
434
435 J = xrealloc(J, len[0] + 2, sizeof(*J));
436 unravel(klist[i]);
437 xfree(clist);
438 xfree(klist);
439
440 ixold = xrealloc(ixold, len[0] + 2, sizeof(*ixold));
441 ixnew = xrealloc(ixnew, len[1] + 2, sizeof(*ixnew));
442 check(file1, f1, file2, f2, flags);
443 output(file1, f1, file2, f2, flags);
444 if (ostdout != -1) {
445 int wstatus;
446
447 /* close the pipe to pr and restore stdout */
448 fflush(stdout);
449 rewind(stdout);
450 if (ostdout != STDOUT_FILENO) {
451 close(STDOUT_FILENO);
452 dup2(ostdout, STDOUT_FILENO);
453 close(ostdout);
454 }
455 waitpid(pid, &wstatus, 0);
456 }
457 closem:
458 if (anychange) {
459 status |= 1;
460 if (rval == D_SAME)
461 rval = D_DIFFER;
462 }
463 if (f1 != NULL)
464 fclose(f1);
465 if (f2 != NULL)
466 fclose(f2);
467
468 return (rval);
469 }
470
471 /*
472 * Check to see if the given files differ.
473 * Returns 0 if they are the same, 1 if different, and -1 on error.
474 * XXX - could use code from cmp(1) [faster]
475 */
476 static int
files_differ(FILE * f1,FILE * f2,int flags)477 files_differ(FILE *f1, FILE *f2, int flags)
478 {
479 char buf1[BUFSIZ], buf2[BUFSIZ];
480 size_t i, j;
481
482 if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
483 (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
484 return (1);
485 for (;;) {
486 i = fread(buf1, 1, sizeof(buf1), f1);
487 j = fread(buf2, 1, sizeof(buf2), f2);
488 if (i != j)
489 return (1);
490 if (i == 0 && j == 0) {
491 if (ferror(f1) || ferror(f2))
492 return (1);
493 return (0);
494 }
495 if (memcmp(buf1, buf2, i) != 0)
496 return (1);
497 }
498 }
499
500 static FILE *
opentemp(const char * file)501 opentemp(const char *file)
502 {
503 char buf[BUFSIZ], *tempdir, tempfile[MAXPATHLEN];
504 ssize_t nread;
505 int ifd, ofd;
506
507 if (strcmp(file, "-") == 0)
508 ifd = STDIN_FILENO;
509 else if ((ifd = open(file, O_RDONLY, 0644)) < 0)
510 return (NULL);
511
512 if ((tempdir = getenv("TMPDIR")) == NULL)
513 tempdir = _PATH_TMP;
514
515 if (strlcpy(tempfile, tempdir, sizeof(tempfile)) >= sizeof(tempfile) ||
516 strlcat(tempfile, "/diff.XXXXXXXX", sizeof(tempfile)) >=
517 sizeof(tempfile)) {
518 close(ifd);
519 errno = ENAMETOOLONG;
520 return (NULL);
521 }
522
523 if ((ofd = mkstemp(tempfile)) < 0) {
524 close(ifd);
525 return (NULL);
526 }
527 unlink(tempfile);
528 while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
529 if (write(ofd, buf, nread) != nread) {
530 close(ifd);
531 close(ofd);
532 return (NULL);
533 }
534 }
535 close(ifd);
536 lseek(ofd, (off_t)0, SEEK_SET);
537 return (fdopen(ofd, "r"));
538 }
539
540 char *
splice(char * dir,char * file)541 splice(char *dir, char *file)
542 {
543 char *tail, *buf;
544 int len;
545
546 if ((tail = strrchr(file, '/')) == NULL)
547 tail = file;
548 else
549 tail++;
550 len = strlen(dir) + strlen(tail) + 1;
551 if(!(buf = malloc(len)))
552 errx(1, "splice can't alloc");
553 snprintf(buf, len, "%s/%s", dir, tail);
554 return (buf);
555 }
556
557 static void
prepare(int i,FILE * fd,off_t filesize,int flags)558 prepare(int i, FILE *fd, off_t filesize, int flags)
559 {
560 struct line *p;
561 int j, h;
562 size_t sz;
563
564 rewind(fd);
565
566 sz = (filesize <= SIZE_MAX ? filesize : SIZE_MAX) / 25;
567 if (sz < 100)
568 sz = 100;
569
570 p = xcalloc(sz + 3, sizeof(*p));
571 for (j = 0; (h = readhash(fd, flags));) {
572 if (j == sz) {
573 sz = sz * 3 / 2;
574 p = xrealloc(p, sz + 3, sizeof(*p));
575 }
576 p[++j].value = h;
577 }
578 len[i] = j;
579 file[i] = p;
580 }
581
582 static void
prune(void)583 prune(void)
584 {
585 int i, j;
586
587 for (pref = 0; pref < len[0] && pref < len[1] &&
588 file[0][pref + 1].value == file[1][pref + 1].value;
589 pref++)
590 ;
591 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
592 file[0][len[0] - suff].value == file[1][len[1] - suff].value;
593 suff++)
594 ;
595 for (j = 0; j < 2; j++) {
596 sfile[j] = file[j] + pref;
597 slen[j] = len[j] - pref - suff;
598 for (i = 0; i <= slen[j]; i++)
599 sfile[j][i].serial = i;
600 }
601 }
602
603 static void
equiv(struct line * a,int n,struct line * b,int m,int * c)604 equiv(struct line *a, int n, struct line *b, int m, int *c)
605 {
606 int i, j;
607
608 i = j = 1;
609 while (i <= n && j <= m) {
610 if (a[i].value < b[j].value)
611 a[i++].value = 0;
612 else if (a[i].value == b[j].value)
613 a[i++].value = j;
614 else
615 j++;
616 }
617 while (i <= n)
618 a[i++].value = 0;
619 b[m + 1].value = 0;
620 j = 0;
621 while (++j <= m) {
622 c[j] = -b[j].serial;
623 while (b[j + 1].value == b[j].value) {
624 j++;
625 c[j] = b[j].serial;
626 }
627 }
628 c[j] = -1;
629 }
630
631 /* Code taken from ping.c */
632 static int
isqrt(int n)633 isqrt(int n)
634 {
635 int y, x = 1;
636
637 if (n == 0)
638 return (0);
639
640 do { /* newton was a stinker */
641 y = x;
642 x = n / x;
643 x += y;
644 x /= 2;
645 } while ((x - y) > 1 || (x - y) < -1);
646
647 return (x);
648 }
649
650 static int
stone(int * a,int n,int * b,int * c,int flags)651 stone(int *a, int n, int *b, int *c, int flags)
652 {
653 int i, k, y, j, l;
654 int oldc, tc, oldl;
655 u_int numtries;
656
657 /* XXX move the isqrt() out of the macro to avoid multiple calls */
658 const u_int bound = (flags & D_MINIMAL) ? UINT_MAX :
659 MAX(256, isqrt(n));
660
661 k = 0;
662 c[0] = newcand(0, 0, 0);
663 for (i = 1; i <= n; i++) {
664 j = a[i];
665 if (j == 0)
666 continue;
667 y = -b[j];
668 oldl = 0;
669 oldc = c[0];
670 numtries = 0;
671 do {
672 if (y <= clist[oldc].y)
673 continue;
674 l = search(c, k, y);
675 if (l != oldl + 1)
676 oldc = c[l - 1];
677 if (l <= k) {
678 if (clist[c[l]].y <= y)
679 continue;
680 tc = c[l];
681 c[l] = newcand(i, y, oldc);
682 oldc = tc;
683 oldl = l;
684 numtries++;
685 } else {
686 c[l] = newcand(i, y, oldc);
687 k++;
688 break;
689 }
690 } while ((y = b[++j]) > 0 && numtries < bound);
691 }
692 return (k);
693 }
694
695 static int
newcand(int x,int y,int pred)696 newcand(int x, int y, int pred)
697 {
698 struct cand *q;
699
700 if (clen == clistlen) {
701 clistlen = clistlen * 11 / 10;
702 clist = xrealloc(clist, clistlen, sizeof(*clist));
703 }
704 q = clist + clen;
705 q->x = x;
706 q->y = y;
707 q->pred = pred;
708 return (clen++);
709 }
710
711 static int
search(int * c,int k,int y)712 search(int *c, int k, int y)
713 {
714 int i, j, l, t;
715
716 if (clist[c[k]].y < y) /* quick look for typical case */
717 return (k + 1);
718 i = 0;
719 j = k + 1;
720 for (;;) {
721 l = (i + j) / 2;
722 if (l <= i)
723 break;
724 t = clist[c[l]].y;
725 if (t > y)
726 j = l;
727 else if (t < y)
728 i = l;
729 else
730 return (l);
731 }
732 return (l + 1);
733 }
734
735 static void
unravel(int p)736 unravel(int p)
737 {
738 struct cand *q;
739 int i;
740
741 for (i = 0; i <= len[0]; i++)
742 J[i] = i <= pref ? i :
743 i > len[0] - suff ? i + len[1] - len[0] : 0;
744 for (q = clist + p; q->y != 0; q = clist + q->pred)
745 J[q->x + pref] = q->y + pref;
746 }
747
748 /*
749 * Check does double duty:
750 * 1. ferret out any fortuitous correspondences due
751 * to confounding by hashing (which result in "jackpot")
752 * 2. collect random access indexes to the two files
753 */
754 static void
check(char * file1,FILE * f1,char * file2,FILE * f2,int flags)755 check(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
756 {
757 int i, j, jackpot, c, d;
758 long ctold, ctnew;
759
760 rewind(f1);
761 rewind(f2);
762 j = 1;
763 ixold[0] = ixnew[0] = 0;
764 jackpot = 0;
765 ctold = ctnew = 0;
766 for (i = 1; i <= len[0]; i++) {
767 if (J[i] == 0) {
768 ixold[i] = ctold += skipline(f1);
769 continue;
770 }
771 while (j < J[i]) {
772 ixnew[j] = ctnew += skipline(f2);
773 j++;
774 }
775 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE)) {
776 for (;;) {
777 c = getc(f1);
778 d = getc(f2);
779 /*
780 * GNU diff ignores a missing newline
781 * in one file for -b or -w.
782 */
783 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) &&
784 ((c == EOF && d == '\n') ||
785 (c == '\n' && d == EOF))) {
786 break;
787 }
788 ctold++;
789 ctnew++;
790 if ((flags & D_FOLDBLANKS) && isspace(c) &&
791 isspace(d)) {
792 do {
793 if (c == '\n')
794 break;
795 ctold++;
796 } while (isspace(c = getc(f1)));
797 do {
798 if (d == '\n')
799 break;
800 ctnew++;
801 } while (isspace(d = getc(f2)));
802 } else if ((flags & D_IGNOREBLANKS)) {
803 while (isspace(c) && c != '\n') {
804 c = getc(f1);
805 ctold++;
806 }
807 while (isspace(d) && d != '\n') {
808 d = getc(f2);
809 ctnew++;
810 }
811 }
812 if (chrtran[c] != chrtran[d]) {
813 jackpot++;
814 J[i] = 0;
815 if (c != '\n' && c != EOF)
816 ctold += skipline(f1);
817 if (d != '\n' && c != EOF)
818 ctnew += skipline(f2);
819 break;
820 }
821 if (c == '\n' || c == EOF)
822 break;
823 }
824 } else {
825 for (;;) {
826 ctold++;
827 ctnew++;
828 if ((c = getc(f1)) != (d = getc(f2))) {
829 /* jackpot++; */
830 J[i] = 0;
831 if (c != '\n' && c != EOF)
832 ctold += skipline(f1);
833 if (d != '\n' && c != EOF)
834 ctnew += skipline(f2);
835 break;
836 }
837 if (c == '\n' || c == EOF)
838 break;
839 }
840 }
841 ixold[i] = ctold;
842 ixnew[j] = ctnew;
843 j++;
844 }
845 for (; j <= len[1]; j++)
846 ixnew[j] = ctnew += skipline(f2);
847 /*
848 * if (jackpot)
849 * fprintf(stderr, "jackpot\n");
850 */
851 }
852
853 /* shellsort CACM #201 */
854 static void
sort(struct line * a,int n)855 sort(struct line *a, int n)
856 {
857 struct line *ai, *aim, w;
858 int j, m = 0, k;
859
860 if (n == 0)
861 return;
862 for (j = 1; j <= n; j *= 2)
863 m = 2 * j - 1;
864 for (m /= 2; m != 0; m /= 2) {
865 k = n - m;
866 for (j = 1; j <= k; j++) {
867 for (ai = &a[j]; ai > a; ai -= m) {
868 aim = &ai[m];
869 if (aim < ai)
870 break; /* wraparound */
871 if (aim->value > ai[0].value ||
872 (aim->value == ai[0].value &&
873 aim->serial > ai[0].serial))
874 break;
875 w.value = ai[0].value;
876 ai[0].value = aim->value;
877 aim->value = w.value;
878 w.serial = ai[0].serial;
879 ai[0].serial = aim->serial;
880 aim->serial = w.serial;
881 }
882 }
883 }
884 }
885
886 static void
unsort(struct line * f,int l,int * b)887 unsort(struct line *f, int l, int *b)
888 {
889 int *a, i;
890
891 a = xcalloc(l + 1, sizeof(*a));
892 for (i = 1; i <= l; i++)
893 a[f[i].serial] = f[i].value;
894 for (i = 1; i <= l; i++)
895 b[i] = a[i];
896 xfree(a);
897 }
898
899 static int
skipline(FILE * f)900 skipline(FILE *f)
901 {
902 int i, c;
903
904 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
905 continue;
906 return (i);
907 }
908
909 static void
output(char * file1,FILE * f1,char * file2,FILE * f2,int flags)910 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
911 {
912 int m, i0, i1, j0, j1;
913
914 rewind(f1);
915 rewind(f2);
916 m = len[0];
917 J[0] = 0;
918 J[m + 1] = len[1] + 1;
919 if (diff_format != D_EDIT) {
920 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
921 while (i0 <= m && J[i0] == J[i0 - 1] + 1)
922 i0++;
923 j0 = J[i0 - 1] + 1;
924 i1 = i0 - 1;
925 while (i1 < m && J[i1 + 1] == 0)
926 i1++;
927 j1 = J[i1 + 1] - 1;
928 J[i1] = j1;
929 change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
930 }
931 } else {
932 for (i0 = m; i0 >= 1; i0 = i1 - 1) {
933 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
934 i0--;
935 j0 = J[i0 + 1] - 1;
936 i1 = i0 + 1;
937 while (i1 > 1 && J[i1 - 1] == 0)
938 i1--;
939 j1 = J[i1 - 1] + 1;
940 J[i1] = j1;
941 change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
942 }
943 }
944 if (m == 0)
945 change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
946 if (diff_format == D_IFDEF) {
947 for (;;) {
948 #define c i0
949 if ((c = getc(f1)) == EOF)
950 return;
951 putchar(c);
952 }
953 #undef c
954 }
955 if (anychange != 0) {
956 if (diff_format == D_CONTEXT)
957 dump_context_vec(f1, f2, flags);
958 else if (diff_format == D_UNIFIED)
959 dump_unified_vec(f1, f2, flags);
960 }
961 }
962
963 static void
range(int a,int b,char * separator)964 range(int a, int b, char *separator)
965 {
966 printf("%d", a > b ? b : a);
967 if (a < b)
968 printf("%s%d", separator, b);
969 }
970
971 static void
uni_range(int a,int b)972 uni_range(int a, int b)
973 {
974 if (a < b)
975 printf("%d,%d", a, b - a + 1);
976 else if (a == b)
977 printf("%d", b);
978 else
979 printf("%d,0", b);
980 }
981
982 static char *
preadline(int fd,size_t rlen,off_t off)983 preadline(int fd, size_t rlen, off_t off)
984 {
985 char *line;
986 ssize_t nr;
987
988 line = xmalloc(rlen + 1);
989 if ((nr = pread(fd, line, rlen, off)) < 0)
990 err(1, "preadline");
991 if (nr > 0 && line[nr-1] == '\n')
992 nr--;
993 line[nr] = '\0';
994 return (line);
995 }
996
997 static int
ignoreline(char * line)998 ignoreline(char *line)
999 {
1000 int ret;
1001
1002 ret = regexec(&ignore_re, line, 0, NULL, 0);
1003 xfree(line);
1004 return (ret == 0); /* if it matched, it should be ignored. */
1005 }
1006
1007 /*
1008 * Indicate that there is a difference between lines a and b of the from file
1009 * to get to lines c to d of the to file. If a is greater then b then there
1010 * are no lines in the from file involved and this means that there were
1011 * lines appended (beginning at b). If c is greater than d then there are
1012 * lines missing from the to file.
1013 */
1014 static void
change(char * file1,FILE * f1,char * file2,FILE * f2,int a,int b,int c,int d,int * pflags)1015 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1016 int *pflags)
1017 {
1018 static size_t max_context = 64;
1019 int i;
1020
1021 restart:
1022 if (diff_format != D_IFDEF && a > b && c > d)
1023 return;
1024 if (ignore_pats != NULL) {
1025 char *line;
1026 /*
1027 * All lines in the change, insert, or delete must
1028 * match an ignore pattern for the change to be
1029 * ignored.
1030 */
1031 if (a <= b) { /* Changes and deletes. */
1032 for (i = a; i <= b; i++) {
1033 line = preadline(fileno(f1),
1034 ixold[i] - ixold[i - 1], ixold[i - 1]);
1035 if (!ignoreline(line))
1036 goto proceed;
1037 }
1038 }
1039 if (a > b || c <= d) { /* Changes and inserts. */
1040 for (i = c; i <= d; i++) {
1041 line = preadline(fileno(f2),
1042 ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1043 if (!ignoreline(line))
1044 goto proceed;
1045 }
1046 }
1047 return;
1048 }
1049 proceed:
1050 if (*pflags & D_HEADER) {
1051 printf("%s %s %s\n", diffargs, file1, file2);
1052 *pflags &= ~D_HEADER;
1053 }
1054 if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1055 /*
1056 * Allocate change records as needed.
1057 */
1058 if (context_vec_ptr == context_vec_end - 1) {
1059 ptrdiff_t offset = context_vec_ptr - context_vec_start;
1060 max_context <<= 1;
1061 context_vec_start = xrealloc(context_vec_start,
1062 max_context, sizeof(*context_vec_start));
1063 context_vec_end = context_vec_start + max_context;
1064 context_vec_ptr = context_vec_start + offset;
1065 }
1066 if (anychange == 0) {
1067 /*
1068 * Print the context/unidiff header first time through.
1069 */
1070 print_header(file1, file2);
1071 anychange = 1;
1072 } else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1073 c > context_vec_ptr->d + (2 * diff_context) + 1) {
1074 /*
1075 * If this change is more than 'diff_context' lines from the
1076 * previous change, dump the record and reset it.
1077 */
1078 if (diff_format == D_CONTEXT)
1079 dump_context_vec(f1, f2, *pflags);
1080 else
1081 dump_unified_vec(f1, f2, *pflags);
1082 }
1083 context_vec_ptr++;
1084 context_vec_ptr->a = a;
1085 context_vec_ptr->b = b;
1086 context_vec_ptr->c = c;
1087 context_vec_ptr->d = d;
1088 return;
1089 }
1090 if (anychange == 0)
1091 anychange = 1;
1092 switch (diff_format) {
1093 case D_BRIEF:
1094 return;
1095 case D_NORMAL:
1096 case D_EDIT:
1097 range(a, b, ",");
1098 putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1099 if (diff_format == D_NORMAL)
1100 range(c, d, ",");
1101 putchar('\n');
1102 break;
1103 case D_REVERSE:
1104 putchar(a > b ? 'a' : c > d ? 'd' : 'c');
1105 range(a, b, " ");
1106 putchar('\n');
1107 break;
1108 case D_NREVERSE:
1109 if (a > b)
1110 printf("a%d %d\n", b, d - c + 1);
1111 else {
1112 printf("d%d %d\n", a, b - a + 1);
1113 if (!(c > d))
1114 /* add changed lines */
1115 printf("a%d %d\n", b, d - c + 1);
1116 }
1117 break;
1118 }
1119 if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1120 fetch(ixold, a, b, f1, '<', 1, *pflags);
1121 if (a <= b && c <= d && diff_format == D_NORMAL)
1122 puts("---");
1123 }
1124 i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1125 if (i != 0 && diff_format == D_EDIT) {
1126 /*
1127 * A non-zero return value for D_EDIT indicates that the
1128 * last line printed was a bare dot (".") that has been
1129 * escaped as ".." to prevent ed(1) from misinterpreting
1130 * it. We have to add a substitute command to change this
1131 * back and restart where we left off.
1132 */
1133 puts(".");
1134 printf("%ds/^\\.\\././\n", a);
1135 a += i;
1136 c += i;
1137 goto restart;
1138 }
1139 if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1140 puts(".");
1141 if (inifdef) {
1142 printf("#endif /* %s */\n", ifdefname);
1143 inifdef = 0;
1144 }
1145 }
1146
1147 static int
fetch(long * f,int a,int b,FILE * lb,int ch,int oldfile,int flags)1148 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1149 {
1150 int i, j, c, lastc, col, nc;
1151
1152 /*
1153 * When doing #ifdef's, copy down to current line
1154 * if this is the first file, so that stuff makes it to output.
1155 */
1156 if (diff_format == D_IFDEF && oldfile) {
1157 long curpos = ftell(lb);
1158 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1159 nc = f[a > b ? b : a - 1] - curpos;
1160 for (i = 0; i < nc; i++)
1161 putchar(getc(lb));
1162 }
1163 if (a > b)
1164 return (0);
1165 if (diff_format == D_IFDEF) {
1166 if (inifdef) {
1167 printf("#else /* %s%s */\n",
1168 oldfile == 1 ? "!" : "", ifdefname);
1169 } else {
1170 if (oldfile)
1171 printf("#ifndef %s\n", ifdefname);
1172 else
1173 printf("#ifdef %s\n", ifdefname);
1174 }
1175 inifdef = 1 + oldfile;
1176 }
1177 for (i = a; i <= b; i++) {
1178 fseek(lb, f[i - 1], SEEK_SET);
1179 nc = f[i] - f[i - 1];
1180 if (diff_format != D_IFDEF && ch != '\0') {
1181 putchar(ch);
1182 if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1183 || diff_format == D_UNIFIED))
1184 putchar('\t');
1185 else if (diff_format != D_UNIFIED)
1186 putchar(' ');
1187 }
1188 col = 0;
1189 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1190 if ((c = getc(lb)) == EOF) {
1191 if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1192 diff_format == D_NREVERSE)
1193 warnx("No newline at end of file");
1194 else
1195 puts("\n\\ No newline at end of file");
1196 return (0);
1197 }
1198 if (c == '\t' && (flags & D_EXPANDTABS)) {
1199 do {
1200 putchar(' ');
1201 } while (++col & 7);
1202 } else {
1203 if (diff_format == D_EDIT && j == 1 && c == '\n'
1204 && lastc == '.') {
1205 /*
1206 * Don't print a bare "." line
1207 * since that will confuse ed(1).
1208 * Print ".." instead and return,
1209 * giving the caller an offset
1210 * from which to restart.
1211 */
1212 puts(".");
1213 return (i - a + 1);
1214 }
1215 putchar(c);
1216 col++;
1217 }
1218 }
1219 }
1220 return (0);
1221 }
1222
1223 /*
1224 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1225 */
1226 static int
readhash(FILE * f,int flags)1227 readhash(FILE *f, int flags)
1228 {
1229 int i, t, space;
1230 int sum;
1231
1232 sum = 1;
1233 space = 0;
1234 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1235 if (flags & D_IGNORECASE)
1236 for (i = 0; (t = getc(f)) != '\n'; i++) {
1237 if (t == EOF) {
1238 if (i == 0)
1239 return (0);
1240 break;
1241 }
1242 sum = sum * 127 + chrtran[t];
1243 }
1244 else
1245 for (i = 0; (t = getc(f)) != '\n'; i++) {
1246 if (t == EOF) {
1247 if (i == 0)
1248 return (0);
1249 break;
1250 }
1251 sum = sum * 127 + t;
1252 }
1253 } else {
1254 for (i = 0;;) {
1255 switch (t = getc(f)) {
1256 case '\t':
1257 case '\r':
1258 case '\v':
1259 case '\f':
1260 case ' ':
1261 space++;
1262 continue;
1263 default:
1264 if (space && (flags & D_IGNOREBLANKS) == 0) {
1265 i++;
1266 space = 0;
1267 }
1268 sum = sum * 127 + chrtran[t];
1269 i++;
1270 continue;
1271 case EOF:
1272 if (i == 0)
1273 return (0);
1274 /* FALLTHROUGH */
1275 case '\n':
1276 break;
1277 }
1278 break;
1279 }
1280 }
1281 /*
1282 * There is a remote possibility that we end up with a zero sum.
1283 * Zero is used as an EOF marker, so return 1 instead.
1284 */
1285 return (sum == 0 ? 1 : sum);
1286 }
1287
1288 static int
asciifile(FILE * f)1289 asciifile(FILE *f)
1290 {
1291 unsigned char buf[BUFSIZ];
1292 size_t i, cnt;
1293
1294 if (f == NULL)
1295 return (1);
1296
1297 rewind(f);
1298 cnt = fread(buf, 1, sizeof(buf), f);
1299 for (i = 0; i < cnt; i++)
1300 if (!isprint(buf[i]) && !isspace(buf[i]))
1301 return (0);
1302 return (1);
1303 }
1304
1305 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1306
1307 static char *
match_function(const long * f,int pos,FILE * fp)1308 match_function(const long *f, int pos, FILE *fp)
1309 {
1310 unsigned char buf[FUNCTION_CONTEXT_SIZE];
1311 size_t nc;
1312 int last = lastline;
1313 char *state = NULL;
1314
1315 lastline = pos;
1316 while (pos > last) {
1317 fseek(fp, f[pos - 1], SEEK_SET);
1318 nc = f[pos] - f[pos - 1];
1319 if (nc >= sizeof(buf))
1320 nc = sizeof(buf) - 1;
1321 nc = fread((char *) buf, 1, nc, fp);
1322 if (nc > 0) {
1323 buf[nc] = '\0';
1324 buf[strcspn((char *) buf, "\n")] = '\0';
1325 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1326 if (begins_with((char *)buf, "private:")) {
1327 if (!state)
1328 state = " (private)";
1329 } else if (begins_with((char *)buf, "protected:")) {
1330 if (!state)
1331 state = " (protected)";
1332 } else if (begins_with((char *)buf, "public:")) {
1333 if (!state)
1334 state = " (public)";
1335 } else {
1336 strlcpy(lastbuf, (char *) buf, sizeof lastbuf);
1337 if (state)
1338 strlcat(lastbuf, state,
1339 sizeof lastbuf);
1340 lastmatchline = pos;
1341 return lastbuf;
1342 }
1343 }
1344 }
1345 pos--;
1346 }
1347 return lastmatchline > 0 ? lastbuf : NULL;
1348 }
1349
1350 /* dump accumulated "context" diff changes */
1351 static void
dump_context_vec(FILE * f1,FILE * f2,int flags)1352 dump_context_vec(FILE *f1, FILE *f2, int flags)
1353 {
1354 struct context_vec *cvp = context_vec_start;
1355 int lowa, upb, lowc, upd, do_output;
1356 int a, b, c, d;
1357 char ch, *f;
1358
1359 if (context_vec_start > context_vec_ptr)
1360 return;
1361
1362 b = d = 0; /* gcc */
1363 lowa = MAX(1, cvp->a - diff_context);
1364 upb = MIN(len[0], context_vec_ptr->b + diff_context);
1365 lowc = MAX(1, cvp->c - diff_context);
1366 upd = MIN(len[1], context_vec_ptr->d + diff_context);
1367
1368 printf("***************");
1369 if ((flags & D_PROTOTYPE)) {
1370 f = match_function(ixold, lowa-1, f1);
1371 if (f != NULL) {
1372 putchar(' ');
1373 fputs(f, stdout);
1374 }
1375 }
1376 printf("\n*** ");
1377 range(lowa, upb, ",");
1378 printf(" ****\n");
1379
1380 /*
1381 * Output changes to the "old" file. The first loop suppresses
1382 * output if there were no changes to the "old" file (we'll see
1383 * the "old" lines as context in the "new" list).
1384 */
1385 do_output = 0;
1386 for (; cvp <= context_vec_ptr; cvp++)
1387 if (cvp->a <= cvp->b) {
1388 cvp = context_vec_start;
1389 do_output++;
1390 break;
1391 }
1392 if (do_output) {
1393 while (cvp <= context_vec_ptr) {
1394 a = cvp->a;
1395 b = cvp->b;
1396 c = cvp->c;
1397 d = cvp->d;
1398
1399 if (a <= b && c <= d)
1400 ch = 'c';
1401 else
1402 ch = (a <= b) ? 'd' : 'a';
1403
1404 if (ch == 'a')
1405 fetch(ixold, lowa, b, f1, ' ', 0, flags);
1406 else {
1407 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1408 fetch(ixold, a, b, f1,
1409 ch == 'c' ? '!' : '-', 0, flags);
1410 }
1411 lowa = b + 1;
1412 cvp++;
1413 }
1414 fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1415 }
1416 /* output changes to the "new" file */
1417 printf("--- ");
1418 range(lowc, upd, ",");
1419 printf(" ----\n");
1420
1421 do_output = 0;
1422 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1423 if (cvp->c <= cvp->d) {
1424 cvp = context_vec_start;
1425 do_output++;
1426 break;
1427 }
1428 if (do_output) {
1429 while (cvp <= context_vec_ptr) {
1430 a = cvp->a;
1431 b = cvp->b;
1432 c = cvp->c;
1433 d = cvp->d;
1434
1435 if (a <= b && c <= d)
1436 ch = 'c';
1437 else
1438 ch = (a <= b) ? 'd' : 'a';
1439
1440 if (ch == 'd')
1441 fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1442 else {
1443 fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1444 fetch(ixnew, c, d, f2,
1445 ch == 'c' ? '!' : '+', 0, flags);
1446 }
1447 lowc = d + 1;
1448 cvp++;
1449 }
1450 fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1451 }
1452 context_vec_ptr = context_vec_start - 1;
1453 }
1454
1455 /* dump accumulated "unified" diff changes */
1456 static void
dump_unified_vec(FILE * f1,FILE * f2,int flags)1457 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1458 {
1459 struct context_vec *cvp = context_vec_start;
1460 int lowa, upb, lowc, upd;
1461 int a, b, c, d;
1462 char ch, *f;
1463
1464 if (context_vec_start > context_vec_ptr)
1465 return;
1466
1467 b = d = 0; /* gcc */
1468 lowa = MAX(1, cvp->a - diff_context);
1469 upb = MIN(len[0], context_vec_ptr->b + diff_context);
1470 lowc = MAX(1, cvp->c - diff_context);
1471 upd = MIN(len[1], context_vec_ptr->d + diff_context);
1472
1473 fputs("@@ -", stdout);
1474 uni_range(lowa, upb);
1475 fputs(" +", stdout);
1476 uni_range(lowc, upd);
1477 fputs(" @@", stdout);
1478 if ((flags & D_PROTOTYPE)) {
1479 f = match_function(ixold, lowa-1, f1);
1480 if (f != NULL) {
1481 putchar(' ');
1482 fputs(f, stdout);
1483 }
1484 }
1485 putchar('\n');
1486
1487 /*
1488 * Output changes in "unified" diff format--the old and new lines
1489 * are printed together.
1490 */
1491 for (; cvp <= context_vec_ptr; cvp++) {
1492 a = cvp->a;
1493 b = cvp->b;
1494 c = cvp->c;
1495 d = cvp->d;
1496
1497 /*
1498 * c: both new and old changes
1499 * d: only changes in the old file
1500 * a: only changes in the new file
1501 */
1502 if (a <= b && c <= d)
1503 ch = 'c';
1504 else
1505 ch = (a <= b) ? 'd' : 'a';
1506
1507 switch (ch) {
1508 case 'c':
1509 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1510 fetch(ixold, a, b, f1, '-', 0, flags);
1511 fetch(ixnew, c, d, f2, '+', 0, flags);
1512 break;
1513 case 'd':
1514 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1515 fetch(ixold, a, b, f1, '-', 0, flags);
1516 break;
1517 case 'a':
1518 fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1519 fetch(ixnew, c, d, f2, '+', 0, flags);
1520 break;
1521 }
1522 lowa = b + 1;
1523 lowc = d + 1;
1524 }
1525 fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1526
1527 context_vec_ptr = context_vec_start - 1;
1528 }
1529
1530 static void
print_header(const char * file1,const char * file2)1531 print_header(const char *file1, const char *file2)
1532 {
1533 if (label[0] != NULL)
1534 printf("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1535 label[0]);
1536 else
1537 printf("%s %s\t%s", diff_format == D_CONTEXT ? "***" : "---",
1538 file1, ctime(&stb1.st_mtime));
1539 if (label[1] != NULL)
1540 printf("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1541 label[1]);
1542 else
1543 printf("%s %s\t%s", diff_format == D_CONTEXT ? "---" : "+++",
1544 file2, ctime(&stb2.st_mtime));
1545 }
1546