xref: /plan9/sys/src/cmd/diff/diffio.c (revision 9a747e4fd48b9f4522c70c07e8f882a15030f964)
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
5 #include "diff.h"
6 
7 struct line {
8 	int	serial;
9 	int	value;
10 };
11 extern struct line *file[2];
12 extern int len[2];
13 extern long *ixold, *ixnew;
14 extern int *J;
15 
16 static Biobuf *input[2];
17 static char *file1, *file2;
18 static int firstchange;
19 
20 #define MAXLINELEN	4096
21 #define MIN(x, y)	((x) < (y) ? (x): (y))
22 
23 static int
24 readline(Biobuf *bp, char *buf)
25 {
26 	int c;
27 	char *p, *e;
28 
29 	p = buf;
30 	e = p + MAXLINELEN-1;
31 	do {
32 		c = Bgetc(bp);
33 		if (c < 0) {
34 			if (p == buf)
35 				return -1;
36 			break;
37 		}
38 		if (c == '\n')
39 			break;
40 		*p++ = c;
41 	} while (p < e);
42 	*p = 0;
43 	if (c != '\n' && c >= 0) {
44 		do c = Bgetc(bp);
45 		while (c >= 0 && c != '\n');
46 	}
47 	return p - buf;
48 }
49 
50 #define HALFLONG 16
51 #define low(x)	(x&((1L<<HALFLONG)-1))
52 #define high(x)	(x>>HALFLONG)
53 
54 /*
55  * hashing has the effect of
56  * arranging line in 7-bit bytes and then
57  * summing 1-s complement in 16-bit hunks
58  */
59 static int
60 readhash(Biobuf *bp, char *buf)
61 {
62 	long sum;
63 	unsigned shift;
64 	char *p;
65 	int len, space;
66 
67 	sum = 1;
68 	shift = 0;
69 	if ((len = readline(bp, buf)) == -1)
70 		return 0;
71 	p = buf;
72 	switch(bflag)	/* various types of white space handling */
73 	{
74 	case 0:
75 		while (len--) {
76 			sum += (long)*p++ << (shift &= (HALFLONG-1));
77 			shift += 7;
78 		}
79 		break;
80 	case 1:
81 		/*
82 		 * coalesce multiple white-space
83 		 */
84 		for (space = 0; len--; p++) {
85 			if (isspace(*p)) {
86 				space++;
87 				continue;
88 			}
89 			if (space) {
90 				shift += 7;
91 				space = 0;
92 			}
93 			sum += (long)*p << (shift &= (HALFLONG-1));
94 			shift += 7;
95 		}
96 		break;
97 	default:
98 		/*
99 		 * strip all white-space
100 		 */
101 		while (len--) {
102 			if (isspace(*p)) {
103 				p++;
104 				continue;
105 			}
106 			sum += (long)*p++ << (shift &= (HALFLONG-1));
107 			shift += 7;
108 		}
109 		break;
110 	}
111 	sum = low(sum) + high(sum);
112 	return ((short)low(sum) + (short)high(sum));
113 }
114 
115 Biobuf *
116 prepare(int i, char *arg)
117 {
118 	struct line *p;
119 	int j, h;
120 	Biobuf *bp;
121 	char *cp, buf[MAXLINELEN];
122 	int nbytes;
123 	Rune r;
124 
125 	bp = Bopen(arg, OREAD);
126 	if (!bp) {
127 		panic(mflag ? 0: 2, "cannot open %s: %r\n", arg);
128 		return 0;
129 	}
130 	if (binary)
131 		return bp;
132 	nbytes = Bread(bp, buf, MIN(1024, MAXLINELEN));
133 	if (nbytes > 0) {
134 		cp = buf;
135 		while (cp < buf+nbytes-UTFmax) {
136 			/*
137 			 * heuristic for a binary file in the
138 			 * brave new UNICODE world
139 			 */
140 			cp += chartorune(&r, cp);
141 			if (r == 0 || (r > 0x7f && r <= 0xa0)) {
142 				binary++;
143 				return bp;
144 			}
145 		}
146 		Bseek(bp, 0, 0);
147 	}
148 	p = MALLOC(struct line, 3);
149 	for (j = 0; h = readhash(bp, buf); p[j].value = h)
150 		p = REALLOC(p, struct line, (++j+3));
151 	len[i] = j;
152 	file[i] = p;
153 	input[i] = bp;			/*fix*/
154 	if (i == 0) {			/*fix*/
155 		file1 = arg;
156 		firstchange = 0;
157 	}
158 	else
159 		file2 = arg;
160 	return bp;
161 }
162 
163 static int
164 squishspace(char *buf)
165 {
166 	char *p, *q;
167 	int space;
168 
169 	for (space = 0, q = p = buf; *q; q++) {
170 		if (isspace(*q)) {
171 			space++;
172 			continue;
173 		}
174 		if (space && bflag == 1) {
175 			*p++ = ' ';
176 			space = 0;
177 		}
178 		*p++ = *q;
179 	}
180 	*p = 0;
181 	return p - buf;
182 }
183 
184 /*
185  * need to fix up for unexpected EOF's
186  */
187 void
188 check(Biobuf *bf, Biobuf *bt)
189 {
190 	int f, t, flen, tlen;
191 	char fbuf[MAXLINELEN], tbuf[MAXLINELEN];
192 
193 	ixold[0] = ixnew[0] = 0;
194 	for (f = t = 1; f < len[0]; f++) {
195 		flen = readline(bf, fbuf);
196 		ixold[f] = ixold[f-1] + flen + 1;		/* ftell(bf) */
197 		if (J[f] == 0)
198 			continue;
199 		do {
200 			tlen = readline(bt, tbuf);
201 			ixnew[t] = ixnew[t-1] + tlen + 1;	/* ftell(bt) */
202 		} while (t++ < J[f]);
203 		if (bflag) {
204 			flen = squishspace(fbuf);
205 			tlen = squishspace(tbuf);
206 		}
207 		if (flen != tlen || strcmp(fbuf, tbuf))
208 			J[f] = 0;
209 	}
210 	while (t < len[1]) {
211 		tlen = readline(bt, tbuf);
212 		ixnew[t] = ixnew[t-1] + tlen + 1;	/* fseek(bt) */
213 		t++;
214 	}
215 }
216 
217 static void
218 range(int a, int b, char *separator)
219 {
220 	Bprint(&stdout, "%d", a > b ? b: a);
221 	if (a < b)
222 		Bprint(&stdout, "%s%d", separator, b);
223 }
224 
225 static void
226 fetch(long *f, int a, int b, Biobuf *bp, char *s)
227 {
228 	char buf[MAXLINELEN];
229 
230 	Bseek(bp, f[a-1], 0);
231 	while (a++ <= b) {
232 		readline(bp, buf);
233 		Bprint(&stdout, "%s%s\n", s, buf);
234 	}
235 }
236 
237 void
238 change(int a, int b, int c, int d)
239 {
240 	char verb;
241 	char buf[4];
242 
243 	if (a > b && c > d)
244 		return;
245 	anychange = 1;
246 	if (mflag && firstchange == 0) {
247 		if(mode) {
248 			buf[0] = '-';
249 			buf[1] = mode;
250 			buf[2] = ' ';
251 			buf[3] = '\0';
252 		} else {
253 			buf[0] = '\0';
254 		}
255 		Bprint(&stdout, "diff %s%s %s\n", buf, file1, file2);
256 		firstchange = 1;
257 	}
258 	verb = a > b ? 'a': c > d ? 'd': 'c';
259 	switch(mode) {
260 	case 'e':
261 		range(a, b, ",");
262 		Bputc(&stdout, verb);
263 		break;
264 	case 0:
265 		range(a, b, ",");
266 		Bputc(&stdout, verb);
267 		range(c, d, ",");
268 		break;
269 	case 'n':
270 		Bprint(&stdout, "%s:", file1);
271 		range(a, b, ",");
272 		Bprint(&stdout, " %c ", verb);
273 		Bprint(&stdout, "%s:", file2);
274 		range(c, d, ",");
275 		break;
276 	case 'f':
277 		Bputc(&stdout, verb);
278 		range(a, b, " ");
279 		break;
280 	}
281 	Bputc(&stdout, '\n');
282 	if (mode == 0 || mode == 'n') {
283 		fetch(ixold, a, b, input[0], "< ");
284 		if (a <= b && c <= d)
285 			Bprint(&stdout, "---\n");
286 	}
287 	fetch(ixnew, c, d, input[1], mode == 0 || mode == 'n' ? "> ": "");
288 	if (mode != 0 && mode != 'n' && c <= d)
289 		Bprint(&stdout, ".\n");
290 }
291 
292