xref: /plan9/sys/src/cmd/diff/diffio.c (revision ee55fa657195c926cbec4ccdbfcc06617d77750e)
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
5 #include "diff.h"
6 
7 struct line {
8 	int	serial;
9 	int	value;
10 };
11 extern struct line *file[2];
12 extern int len[2];
13 extern long *ixold, *ixnew;
14 extern int *J;
15 
16 static Biobuf *input[2];
17 static char *file1, *file2;
18 static int firstchange;
19 
20 #define MAXLINELEN	4096
21 #define MIN(x, y)	((x) < (y) ? (x): (y))
22 
23 static int
readline(Biobuf * bp,char * buf)24 readline(Biobuf *bp, char *buf)
25 {
26 	int c;
27 	char *p, *e;
28 
29 	p = buf;
30 	e = p + MAXLINELEN-1;
31 	do {
32 		c = Bgetc(bp);
33 		if (c < 0) {
34 			if (p == buf)
35 				return -1;
36 			break;
37 		}
38 		if (c == '\n')
39 			break;
40 		*p++ = c;
41 	} while (p < e);
42 	*p = 0;
43 	if (c != '\n' && c >= 0) {
44 		do c = Bgetc(bp);
45 		while (c >= 0 && c != '\n');
46 	}
47 	return p - buf;
48 }
49 
50 #define HALFLONG 16
51 #define low(x)	(x&((1L<<HALFLONG)-1))
52 #define high(x)	(x>>HALFLONG)
53 
54 /*
55  * hashing has the effect of
56  * arranging line in 7-bit bytes and then
57  * summing 1-s complement in 16-bit hunks
58  */
59 static int
readhash(Biobuf * bp,char * buf)60 readhash(Biobuf *bp, char *buf)
61 {
62 	long sum;
63 	unsigned shift;
64 	char *p;
65 	int len, space;
66 
67 	sum = 1;
68 	shift = 0;
69 	if ((len = readline(bp, buf)) == -1)
70 		return 0;
71 	p = buf;
72 	switch(bflag)	/* various types of white space handling */
73 	{
74 	case 0:
75 		while (len--) {
76 			sum += (long)*p++ << (shift &= (HALFLONG-1));
77 			shift += 7;
78 		}
79 		break;
80 	case 1:
81 		/*
82 		 * coalesce multiple white-space
83 		 */
84 		for (space = 0; len--; p++) {
85 			if (isspace(*p)) {
86 				space++;
87 				continue;
88 			}
89 			if (space) {
90 				shift += 7;
91 				space = 0;
92 			}
93 			sum += (long)*p << (shift &= (HALFLONG-1));
94 			shift += 7;
95 		}
96 		break;
97 	default:
98 		/*
99 		 * strip all white-space
100 		 */
101 		while (len--) {
102 			if (isspace(*p)) {
103 				p++;
104 				continue;
105 			}
106 			sum += (long)*p++ << (shift &= (HALFLONG-1));
107 			shift += 7;
108 		}
109 		break;
110 	}
111 	sum = low(sum) + high(sum);
112 	return ((short)low(sum) + (short)high(sum));
113 }
114 
115 Biobuf *
prepare(int i,char * arg)116 prepare(int i, char *arg)
117 {
118 	struct line *p;
119 	int j, h;
120 	Biobuf *bp;
121 	char *cp, buf[MAXLINELEN];
122 	int nbytes;
123 	Rune r;
124 
125 	bp = Bopen(arg, OREAD);
126 	if (!bp) {
127 		panic(mflag ? 0: 2, "cannot open %s: %r\n", arg);
128 		return 0;
129 	}
130 	if (binary)
131 		return bp;
132 	nbytes = Bread(bp, buf, MIN(1024, MAXLINELEN));
133 	if (nbytes > 0) {
134 		cp = buf;
135 		while (cp < buf+nbytes-UTFmax) {
136 			/*
137 			 * heuristic for a binary file in the
138 			 * brave new UNICODE world
139 			 */
140 			cp += chartorune(&r, cp);
141 			if (r == 0 || (r > 0x7f && r <= 0xa0)) {
142 				binary++;
143 				return bp;
144 			}
145 		}
146 		Bseek(bp, 0, 0);
147 	}
148 	p = MALLOC(struct line, 3);
149 	for (j = 0; h = readhash(bp, buf); p[j].value = h)
150 		p = REALLOC(p, struct line, (++j+3));
151 	len[i] = j;
152 	file[i] = p;
153 	input[i] = bp;			/*fix*/
154 	if (i == 0) {			/*fix*/
155 		file1 = arg;
156 		firstchange = 0;
157 	}
158 	else
159 		file2 = arg;
160 	return bp;
161 }
162 
163 static int
squishspace(char * buf)164 squishspace(char *buf)
165 {
166 	char *p, *q;
167 	int space;
168 
169 	for (space = 0, q = p = buf; *q; q++) {
170 		if (isspace(*q)) {
171 			space++;
172 			continue;
173 		}
174 		if (space && bflag == 1) {
175 			*p++ = ' ';
176 			space = 0;
177 		}
178 		*p++ = *q;
179 	}
180 	*p = 0;
181 	return p - buf;
182 }
183 
184 /*
185  * need to fix up for unexpected EOF's
186  */
187 void
check(Biobuf * bf,Biobuf * bt)188 check(Biobuf *bf, Biobuf *bt)
189 {
190 	int f, t, flen, tlen;
191 	char fbuf[MAXLINELEN], tbuf[MAXLINELEN];
192 
193 	ixold[0] = ixnew[0] = 0;
194 	for (f = t = 1; f < len[0]; f++) {
195 		flen = readline(bf, fbuf);
196 		ixold[f] = ixold[f-1] + flen + 1;		/* ftell(bf) */
197 		if (J[f] == 0)
198 			continue;
199 		do {
200 			tlen = readline(bt, tbuf);
201 			ixnew[t] = ixnew[t-1] + tlen + 1;	/* ftell(bt) */
202 		} while (t++ < J[f]);
203 		if (bflag) {
204 			flen = squishspace(fbuf);
205 			tlen = squishspace(tbuf);
206 		}
207 		if (flen != tlen || strcmp(fbuf, tbuf))
208 			J[f] = 0;
209 	}
210 	while (t < len[1]) {
211 		tlen = readline(bt, tbuf);
212 		ixnew[t] = ixnew[t-1] + tlen + 1;	/* fseek(bt) */
213 		t++;
214 	}
215 }
216 
217 static void
range(int a,int b,char * separator)218 range(int a, int b, char *separator)
219 {
220 	Bprint(&stdout, "%d", a > b ? b: a);
221 	if (a < b)
222 		Bprint(&stdout, "%s%d", separator, b);
223 }
224 
225 static void
fetch(long * f,int a,int b,Biobuf * bp,char * s)226 fetch(long *f, int a, int b, Biobuf *bp, char *s)
227 {
228 	char buf[MAXLINELEN];
229 	int maxb;
230 
231 	if(a <= 1)
232 		a = 1;
233 	if(bp == input[0])
234 		maxb = len[0];
235 	else
236 		maxb = len[1];
237 	if(b > maxb)
238 		b = maxb;
239 	if(a > maxb)
240 		return;
241 	Bseek(bp, f[a-1], 0);
242 	while (a++ <= b) {
243 		readline(bp, buf);
244 		Bprint(&stdout, "%s%s\n", s, buf);
245 	}
246 }
247 
248 typedef struct Change Change;
249 struct Change
250 {
251 	int a;
252 	int b;
253 	int c;
254 	int d;
255 };
256 
257 Change *changes;
258 int nchanges;
259 
260 void
change(int a,int b,int c,int d)261 change(int a, int b, int c, int d)
262 {
263 	char verb;
264 	char buf[4];
265 	Change *ch;
266 
267 	if (a > b && c > d)
268 		return;
269 	anychange = 1;
270 	if (mflag && firstchange == 0) {
271 		if(mode) {
272 			buf[0] = '-';
273 			buf[1] = mode;
274 			buf[2] = ' ';
275 			buf[3] = '\0';
276 		} else {
277 			buf[0] = '\0';
278 		}
279 		Bprint(&stdout, "diff %s%s %s\n", buf, file1, file2);
280 		firstchange = 1;
281 	}
282 	verb = a > b ? 'a': c > d ? 'd': 'c';
283 	switch(mode) {
284 	case 'e':
285 		range(a, b, ",");
286 		Bputc(&stdout, verb);
287 		break;
288 	case 0:
289 		range(a, b, ",");
290 		Bputc(&stdout, verb);
291 		range(c, d, ",");
292 		break;
293 	case 'n':
294 		Bprint(&stdout, "%s:", file1);
295 		range(a, b, ",");
296 		Bprint(&stdout, " %c ", verb);
297 		Bprint(&stdout, "%s:", file2);
298 		range(c, d, ",");
299 		break;
300 	case 'f':
301 		Bputc(&stdout, verb);
302 		range(a, b, " ");
303 		break;
304 	case 'c':
305 	case 'a':
306 		if(nchanges%1024 == 0)
307 			changes = erealloc(changes, (nchanges+1024)*sizeof(changes[0]));
308 		ch = &changes[nchanges++];
309 		ch->a = a;
310 		ch->b = b;
311 		ch->c = c;
312 		ch->d = d;
313 		return;
314 	}
315 	Bputc(&stdout, '\n');
316 	if (mode == 0 || mode == 'n') {
317 		fetch(ixold, a, b, input[0], "< ");
318 		if (a <= b && c <= d)
319 			Bprint(&stdout, "---\n");
320 	}
321 	fetch(ixnew, c, d, input[1], mode == 0 || mode == 'n' ? "> ": "");
322 	if (mode != 0 && mode != 'n' && c <= d)
323 		Bprint(&stdout, ".\n");
324 }
325 
326 enum
327 {
328 	Lines = 3,	/* number of lines of context shown */
329 };
330 
331 int
changeset(int i)332 changeset(int i)
333 {
334 	while(i<nchanges && changes[i].b+1+2*Lines > changes[i+1].a)
335 		i++;
336 	if(i<nchanges)
337 		return i+1;
338 	return nchanges;
339 }
340 
341 void
flushchanges(void)342 flushchanges(void)
343 {
344 	int a, b, c, d, at;
345 	int i, j;
346 
347 	if(nchanges == 0)
348 		return;
349 
350 	for(i=0; i<nchanges; ){
351 		j = changeset(i);
352 		a = changes[i].a-Lines;
353 		b = changes[j-1].b+Lines;
354 		c = changes[i].c-Lines;
355 		d = changes[j-1].d+Lines;
356 		if(a < 1)
357 			a = 1;
358 		if(c < 1)
359 			c = 1;
360 		if(b > len[0])
361 			b = len[0];
362 		if(d > len[1])
363 			d = len[1];
364 		if(mode == 'a'){
365 			a = 1;
366 			b = len[0];
367 			c = 1;
368 			d = len[1];
369 			j = nchanges;
370 		}
371 		Bprint(&stdout, "%s:", file1);
372 		range(a, b, ",");
373 		Bprint(&stdout, " - ");
374 		Bprint(&stdout, "%s:", file2);
375 		range(c, d, ",");
376 		Bputc(&stdout, '\n');
377 		at = a;
378 		for(; i<j; i++){
379 			fetch(ixold, at, changes[i].a-1, input[0], "  ");
380 			fetch(ixold, changes[i].a, changes[i].b, input[0], "- ");
381 			fetch(ixnew, changes[i].c, changes[i].d, input[1], "+ ");
382 			at = changes[i].b+1;
383 		}
384 		fetch(ixold, at, b, input[0], "  ");
385 	}
386 	nchanges = 0;
387 }
388