xref: /plan9/sys/src/cmd/diff/diffio.c (revision 7dd7cddf99dd7472612f1413b4da293630e6b1bc)
1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
5 #include "diff.h"
6 
7 struct line {
8 	int	serial;
9 	int	value;
10 };
11 extern struct line *file[2];
12 extern int len[2];
13 extern long *ixold, *ixnew;
14 extern int *J;
15 
16 static Biobuf *input[2];
17 static char *file1, *file2;
18 static int firstchange;
19 
20 #define MAXLINELEN	4096
21 #define MIN(x, y)	((x) < (y) ? (x): (y))
22 
23 static int
24 readline(Biobuf *bp, char *buf)
25 {
26 	int c;
27 	char *p, *e;
28 
29 	p = buf;
30 	e = p + MAXLINELEN-1;
31 	do {
32 		c = Bgetc(bp);
33 		if (c < 0) {
34 			if (p == buf)
35 				return -1;
36 			break;
37 		}
38 		if (c == '\n')
39 			break;
40 		*p++ = c;
41 	} while (p < e);
42 	*p = 0;
43 	if (c != '\n' && c >= 0) {
44 		do c = Bgetc(bp);
45 		while (c >= 0 && c != '\n');
46 	}
47 	return p - buf;
48 }
49 
50 #define HALFLONG 16
51 #define low(x)	(x&((1L<<HALFLONG)-1))
52 #define high(x)	(x>>HALFLONG)
53 
54 /*
55  * hashing has the effect of
56  * arranging line in 7-bit bytes and then
57  * summing 1-s complement in 16-bit hunks
58  */
59 static int
60 readhash(Biobuf *bp, char *buf)
61 {
62 	long sum;
63 	unsigned shift;
64 	char *p;
65 	int len, space;
66 
67 	sum = 1;
68 	shift = 0;
69 	if ((len = readline(bp, buf)) == -1)
70 		return 0;
71 	p = buf;
72 	switch(bflag)	/* various types of white space handling */
73 	{
74 	case 0:
75 		while (len--) {
76 			sum += (long)*p++ << (shift &= (HALFLONG-1));
77 			shift += 7;
78 		}
79 		break;
80 	case 1:
81 		/*
82 		 * coalesce multiple white-space
83 		 */
84 		for (space = 0; len--; p++) {
85 			if (isspace(*p)) {
86 				space++;
87 				continue;
88 			}
89 			if (space) {
90 				shift += 7;
91 				space = 0;
92 			}
93 			sum += (long)*p << (shift &= (HALFLONG-1));
94 			shift += 7;
95 		}
96 		break;
97 	default:
98 		/*
99 		 * strip all white-space
100 		 */
101 		while (len--) {
102 			if (isspace(*p)) {
103 				p++;
104 				continue;
105 			}
106 			sum += (long)*p++ << (shift &= (HALFLONG-1));
107 			shift += 7;
108 		}
109 		break;
110 	}
111 	sum = low(sum) + high(sum);
112 	return ((short)low(sum) + (short)high(sum));
113 }
114 
115 Biobuf *
116 prepare(int i, char *arg)
117 {
118 	struct line *p;
119 	int j, h;
120 	Biobuf *bp;
121 	char *cp, buf[MAXLINELEN];
122 	int nbytes;
123 	Rune r;
124 
125 	bp = Bopen(arg, OREAD);
126 	if (!bp) {
127 		panic(mflag ? 0: 2, "cannot open %s: %r\n", arg);
128 		return 0;
129 	}
130 	nbytes = Bread(bp, buf, MIN(1024, MAXLINELEN));
131 	if (nbytes > 0) {
132 		cp = buf;
133 		while (cp < buf+nbytes-UTFmax) {
134 			/*
135 			 * heuristic for a binary file in the
136 			 * brave new UNICODE world
137 			 */
138 			cp += chartorune(&r, cp);
139 			if (r == 0 || (r > 0x7f && r <= 0xa0)) {
140 				Bterm(bp);
141 				panic(mflag ? 0: 2, "binary file %s\n", arg);
142 				return 0;
143 			}
144 		}
145 		Bseek(bp, 0, 0);
146 	}
147 	p = MALLOC(struct line, 3);
148 	for (j = 0; h = readhash(bp, buf); p[j].value = h)
149 		p = REALLOC(p, struct line, (++j+3));
150 	len[i] = j;
151 	file[i] = p;
152 	input[i] = bp;			/*fix*/
153 	if (i == 0) {			/*fix*/
154 		file1 = arg;
155 		firstchange = 0;
156 	}
157 	else
158 		file2 = arg;
159 	return bp;
160 }
161 
162 static int
163 squishspace(char *buf)
164 {
165 	char *p, *q;
166 	int space;
167 
168 	for (space = 0, q = p = buf; *q; q++) {
169 		if (isspace(*q)) {
170 			space++;
171 			continue;
172 		}
173 		if (space && bflag == 1) {
174 			*p++ = ' ';
175 			space = 0;
176 		}
177 		*p++ = *q;
178 	}
179 	*p = 0;
180 	return p - buf;
181 }
182 
183 /*
184  * need to fix up for unexpected EOF's
185  */
186 void
187 check(Biobuf *bf, Biobuf *bt)
188 {
189 	int f, t, flen, tlen;
190 	char fbuf[MAXLINELEN], tbuf[MAXLINELEN];
191 
192 	ixold[0] = ixnew[0] = 0;
193 	for (f = t = 1; f < len[0]; f++) {
194 		flen = readline(bf, fbuf);
195 		ixold[f] = ixold[f-1] + flen + 1;		/* ftell(bf) */
196 		if (J[f] == 0)
197 			continue;
198 		do {
199 			tlen = readline(bt, tbuf);
200 			ixnew[t] = ixnew[t-1] + tlen + 1;	/* ftell(bt) */
201 		} while (t++ < J[f]);
202 		if (bflag) {
203 			flen = squishspace(fbuf);
204 			tlen = squishspace(tbuf);
205 		}
206 		if (flen != tlen || strcmp(fbuf, tbuf))
207 			J[f] = 0;
208 	}
209 	while (t < len[1]) {
210 		tlen = readline(bt, tbuf);
211 		ixnew[t] = ixnew[t-1] + tlen + 1;	/* fseek(bt) */
212 		t++;
213 	}
214 }
215 
216 static void
217 range(int a, int b, char *separator)
218 {
219 	Bprint(&stdout, "%d", a > b ? b: a);
220 	if (a < b)
221 		Bprint(&stdout, "%s%d", separator, b);
222 }
223 
224 static void
225 fetch(long *f, int a, int b, Biobuf *bp, char *s)
226 {
227 	char buf[MAXLINELEN];
228 
229 	Bseek(bp, f[a-1], 0);
230 	while (a++ <= b) {
231 		readline(bp, buf);
232 		Bprint(&stdout, "%s%s\n", s, buf);
233 	}
234 }
235 
236 void
237 change(int a, int b, int c, int d)
238 {
239 	char verb;
240 	char buf[4];
241 
242 	if (a > b && c > d)
243 		return;
244 	anychange = 1;
245 	if (mflag && firstchange == 0) {
246 		if(mode) {
247 			buf[0] = '-';
248 			buf[1] = mode;
249 			buf[2] = ' ';
250 			buf[3] = '\0';
251 		} else {
252 			buf[0] = '\0';
253 		}
254 		Bprint(&stdout, "diff %s%s %s\n", buf, file1, file2);
255 		firstchange = 1;
256 	}
257 	verb = a > b ? 'a': c > d ? 'd': 'c';
258 	switch(mode) {
259 	case 'e':
260 		range(a, b, ",");
261 		Bputc(&stdout, verb);
262 		break;
263 	case 0:
264 		range(a, b, ",");
265 		Bputc(&stdout, verb);
266 		range(c, d, ",");
267 		break;
268 	case 'n':
269 		Bprint(&stdout, "%s:", file1);
270 		range(a, b, ",");
271 		Bprint(&stdout, " %c ", verb);
272 		Bprint(&stdout, "%s:", file2);
273 		range(c, d, ",");
274 		break;
275 	case 'f':
276 		Bputc(&stdout, verb);
277 		range(a, b, " ");
278 		break;
279 	}
280 	Bputc(&stdout, '\n');
281 	if (mode == 0 || mode == 'n') {
282 		fetch(ixold, a, b, input[0], "< ");
283 		if (a <= b && c <= d)
284 			Bprint(&stdout, "---\n");
285 	}
286 	fetch(ixnew, c, d, input[1], mode == 0 || mode == 'n' ? "> ": "");
287 	if (mode != 0 && mode != 'n' && c <= d)
288 		Bprint(&stdout, ".\n");
289 }
290