xref: /inferno-os/os/boot/mpc/fblt.c (revision 74a4d8c26dd3c1e9febcb717cfd6cb6512991a7a)
1 #include <u.h>
2 #include <libc.h>
3 #include <libg.h>
4 #include <gnot.h>
5 
6 /*
7  * bitblt operates a 'word' at a time.
8  * WBITS is the number of bits in a word
9  * LWBITS=log2(WBITS),
10  * W2L is the number of words in a long
11  * WMASK has bits set for the low order word of a long
12  * WType is a pointer to a word
13  */
14 #ifndef WBITS
15 #define WBITS	32
16 #define LWBITS	5
17 #define	W2L	1
18 #define WMASK	~0UL
19 typedef ulong	*WType;
20 #endif
21 
22 #define DEBUG
23 
24 #ifdef TEST
25 /*
26  * globals used for testing
27  */
28 int	FORCEFORW;
29 int	FORCEBAKW;
30 GBitmap	*curdm, *cursm;
31 Point	curpt;
32 Rectangle curr;
33 Fcode	curf;
34 void	*mem;
35 #endif
36 
37 static void
gbitexplode(ulong sw,ulong * buf,int sdep,int x)38 gbitexplode(ulong sw, ulong *buf, int sdep, int x)
39 {
40 	int j, o, q, n, nw, inc, qinc;
41 	ulong s, dw, pix;
42 
43 	inc = 1 << sdep;
44 	pix = (1 << inc) - 1;
45 	nw = 1 << x;
46 	n = 32 >> x;
47 	qinc = (nw << sdep) - inc;
48 	for(o = 32 - n; o >= 0; o -= n){
49 		dw = 0;
50 		s = sw >> o;
51 		q = 0;
52 		for(j = 0; j < n; j += inc){
53 			dw |= (s & (pix << j)) << q;
54 			q += qinc;
55 		}
56 		for(j = 0; j < x; j++)
57 			dw |= dw << (inc << j);
58 		*buf++ = dw;
59 	}
60 }
61 
62 /*
63 void
64 main(void)
65 {
66 	ulong buf[128];
67 
68 	gbitexplode(0x7777, buf, 0, 3);
69 	exits(0);
70 }
71 */
72 
73 void
gbitblt(GBitmap * dm,Point pt,GBitmap * sm,Rectangle r,Fcode fcode)74 gbitblt(GBitmap *dm, Point pt, GBitmap *sm, Rectangle r, Fcode fcode)
75 {
76 	int	width;		/* width in bits of dst */
77 	int	wwidth;		/* floor width in words */
78 	int	height;		/* height in pixels minus 1 */
79 	int	sdep;		/* src ldepth */
80 	int 	ddep;		/* dst ldepth */
81 	int	deltadep;	/* diff between ldepths */
82 	int	sspan;		/* words between scanlines in src */
83 	int	dspan;		/* words between scanlines in dst */
84 	int	soff;		/* bit offset of src start point */
85 	int	sdest;		/* bit offset of src start point that matches doff when expanded */
86 	int	doff;		/* bit offset of dst start point */
87 	int	delta;		/* amount to shift src by */
88 	int	sign;		/* of delta */
89 	ulong	*saddr;
90 	ulong	*daddr;
91 	ulong	*s;
92 	ulong	*d;
93 	ulong	mask;
94 	ulong	tmp;		/* temp storage source word */
95 	ulong	sw;		/* source word constructed */
96 	ulong	dw;		/* dest word fetched */
97 	ulong	lmask;		/* affected pixels in leftmost dst word */
98 	ulong	rmask;		/* affected pixels in rightmost dst word */
99 	int	i;
100 	int	j;
101 	ulong	buf[32];	/* for expanding a source */
102 	ulong	*p;		/* pointer into buf */
103 	int	spare;		/* number of words already converted */
104 
105 
106 #ifdef TEST
107 	curdm = dm;
108 	cursm = sm;
109 	curpt = pt;
110 	curr = r;
111 	curf = fcode;
112 #endif
113 
114 	gbitbltclip(&dm);
115 
116 	width = r.max.x - r.min.x;
117 	if(width <= 0)
118 		return;
119 	height = r.max.y - r.min.y - 1;
120 	if(height < 0)
121 		return;
122 
123 	ddep = dm->ldepth;
124 	pt.x <<= ddep;
125 	width <<= ddep;
126 
127 	sdep = sm->ldepth;
128 	r.min.x <<= sdep;
129 	r.max.x <<= sdep;
130 
131 	dspan = dm->width * W2L;
132 	sspan = sm->width * W2L;
133 
134 	daddr = (ulong*)((WType)dm->base
135 			+ dm->zero*W2L + pt.y*dspan
136 			+ (pt.x >> LWBITS));
137 	saddr = (ulong*)((WType)sm->base
138 			+ sm->zero*W2L + r.min.y*sspan
139 			+ (r.min.x >> LWBITS));
140 
141 	doff = pt.x & (WBITS - 1);
142 	lmask = WMASK >> doff;
143 	rmask = (WMASK << (WBITS - ((doff+width) & (WBITS-1))))&WMASK;
144 	if(!rmask)
145 		rmask = WMASK;
146 	soff = r.min.x & (WBITS-1);
147 	wwidth = ((pt.x+width-1)>>LWBITS) - (pt.x>>LWBITS);
148 
149 	if(sm == dm){
150 #ifdef TEST
151 		if(!FORCEBAKW &&
152 		   (FORCEFORW || sm != dm || saddr > daddr ||
153 		    (saddr == daddr && soff > doff)))
154 			;
155 		else{
156 			daddr += height * dspan;
157 			saddr += height * sspan;
158 			sspan -= 2 * W2L * sm->width;
159 			dspan -= 2 * W2L * dm->width;
160 		}
161 #else
162 		if(r.min.y < pt.y){	/* bottom to top */
163 			daddr += height * dspan;
164 			saddr += height * sspan;
165 			sspan -= 2 * W2L * sm->width;
166 			dspan -= 2 * W2L * dm->width;
167 		}else if(r.min.y == pt.y && r.min.x < pt.x)
168 			abort()/*goto right*/;
169 #endif
170 	}
171 	if(wwidth == 0)		/* collapse masks for narrow cases */
172 		lmask &= rmask;
173 	fcode &= F;
174 
175 	deltadep = ddep - sdep;
176 	sdest = doff >> deltadep;
177 	delta = soff - sdest;
178 	sign = 0;
179 	if(delta < 0){
180 		sign = 1;
181 		delta = -delta;
182 	}
183 
184 	p = 0;
185 	for(j = 0; j <= height; j++){
186 		d = daddr;
187 		s = saddr;
188 		mask = lmask;
189 		tmp = 0;
190 		if(!sign)
191 			tmp = *s++;
192 		spare = 0;
193 		for(i = wwidth; i >= 0; i--){
194 			if(spare)
195 				sw = *p++;
196 			else{
197 				if(sign){
198 					sw = tmp << (WBITS-delta);
199 					tmp = *s++;
200 					sw |= tmp >> delta;
201 				}else{
202 					sw = tmp << delta;
203 					tmp = *s++;
204 					if(delta)
205 						sw |= tmp >> (WBITS-delta);
206 				}
207 				spare = 1 << deltadep;
208 				if(deltadep >= 1){
209 					gbitexplode(sw, buf, sdep, deltadep);
210 					p = buf;
211 					sw = *p++;
212 				}
213 			}
214 
215 			dw = *d;
216 			switch(fcode){		/* ltor bit aligned */
217 			case Zero:	*d = dw & ~mask;		break;
218 			case DnorS:	*d = dw ^ ((~sw | dw) & mask);	break;
219 			case DandnotS:	*d = dw ^ ((sw & dw) & mask);	break;
220 			case notS:	*d = dw ^ ((~sw ^ dw) & mask);	break;
221 			case notDandS:	*d = dw ^ ((sw | dw) & mask);	break;
222 			case notD:	*d = dw ^ mask;			break;
223 			case DxorS:	*d = dw ^ (sw & mask);		break;
224 			case DnandS:	*d = dw ^ ((sw | ~dw) & mask);	break;
225 			case DandS:	*d = dw ^ ((~sw & dw) & mask);	break;
226 			case DxnorS:	*d = dw ^ (~sw & mask);		break;
227 			case D:						break;
228 			case DornotS:	*d = dw | (~sw & mask);		break;
229 			case S:		*d = dw ^ ((sw ^ dw) & mask);	break;
230 			case notDorS:	*d = dw ^ (~(sw & dw) & mask);	break;
231 			case DorS:	*d = dw | (sw & mask);		break;
232 			case F:		*d = dw | mask;			break;
233 			}
234 			d++;
235 
236 			mask = WMASK;
237 			if(i == 1)
238 				mask = rmask;
239 			spare--;
240 		}
241 		saddr += sspan;
242 		daddr += dspan;
243 	}
244 }
245 
246 #ifdef TEST
247 void	prprog(void);
248 GBitmap *bb1, *bb2;
249 ulong	*src, *dst, *xdst, *xans;
250 int	swds, dwds;
251 long	ticks;
252 int	timeit;
253 
254 long
func(int f,long s,int sld,long d,int dld)255 func(int f, long s, int sld, long d, int dld)
256 {
257 	long a;
258 	int sh, i, db, sb;
259 
260 	db = 1 << dld;
261 	sb = 1 << sld;
262 	sh = db - sb;
263 	if(sh > 0) {
264 		a = s;
265 		for(i = sb; i<db; i += sb){
266 			a <<= sb;
267 			s |= a;
268 		}
269 	} else if(sh < 0)
270 		s >>= -sh;
271 
272 	switch(f){
273 	case Zero:	d = 0;			break;
274 	case DnorS:	d = ~(d|s);		break;
275 	case DandnotS:	d = d & ~s;		break;
276 	case notS:	d = ~s;			break;
277 	case notDandS:	d = ~d & s;		break;
278 	case notD:	d = ~d;			break;
279 	case DxorS:	d = d ^ s;		break;
280 	case DnandS:	d = ~(d&s);		break;
281 	case DandS:	d = d & s;		break;
282 	case DxnorS:	d = ~(d^s);		break;
283 	case S:		d = s;			break;
284 	case DornotS:	d = d | ~s;		break;
285 	case D:		d = d;			break;
286 	case notDorS:	d = ~d | s;		break;
287 	case DorS:	d = d | s;		break;
288 	case F:		d = ~0;			break;
289 	}
290 
291 	d &= ((1<<db)-1);
292 	return d;
293 }
294 
295 void
run(int fr,int to,int w,int op)296 run(int fr, int to, int w, int op)
297 {
298 	int i, j, f, t, fy, ty;
299 	extern long *_clock;
300 
301 	fr += bb2->r.min.x;
302 	to += bb1->r.min.x;
303 	fy = bb2->r.min.y + 1;
304 	ty = bb1->r.min.y + 1;
305 	if(timeit) {
306 		memcpy(dst, xdst, dwds * sizeof(long));
307 		ticks -= *_clock;
308 		gbitblt(bb1, Pt(to,ty), bb2, Rect(fr,fy,fr+w,fy+2), op);
309 		ticks += *_clock;
310 		return;
311 	}
312 	f = fr;
313 	t = to;
314 	memcpy(dst, xdst, dwds * sizeof(long));
315 	for(i=0; i<w; i++) {
316 		gbitblt(bb1, Pt(t,ty), bb2, Rect(f,fy,f+1,fy+1), op);
317 		gbitblt(bb1, Pt(t,ty+1), bb2, Rect(f,fy+1,f+1,fy+2), op);
318 		f++;
319 		t++;
320 	}
321 	memcpy(xans, dst, dwds * sizeof(long));
322 
323 	memcpy(dst, xdst, dwds * sizeof(long));
324 	gbitblt(bb1, Pt(to,ty), bb2, Rect(fr,fy,fr+w,fy+2), op);
325 
326 	if(memcmp(xans, dst, dwds * sizeof(long))) {
327 		/*
328 		 * print src and dst row offset, width in bits, and forw/back
329 		 * then print for each of the four rows: the source (s),
330 		 * the dest (d), the good value of the answer (g),
331 		 * and the actual bad value of the answer (b)
332 		 */
333 		print("fr=%d to=%d w=%d fb=%d%d\n",
334 			fr, to, w, FORCEFORW, FORCEBAKW);
335 		print("dst bitmap b %#lux, z %d, w %d, ld %d, r [%d,%d][%d,%d]\n",
336 			bb1->base, bb1->zero, bb1->width, bb1->ldepth,
337 			bb1->r.min.x, bb1->r.min.y, bb1->r.max.x, bb1->r.max.y);
338 		print("src bitmap b %#lux, z %d, w %d, ld %d, r [%d,%d][%d,%d]\n",
339 			bb2->base, bb2->zero, bb2->width, bb2->ldepth,
340 			bb2->r.min.x, bb2->r.min.y, bb2->r.max.x, bb2->r.max.y);
341 		for(j=0; 7*j < dwds; j++) {
342 			print("\ns");
343 			for(i=0; i<7 && 7*j+i < dwds; i++)
344 				print(" %.8lux", src[7*j + i]);
345 			print("\nd");
346 			for(i=0; i<7 && 7*j+i < dwds; i++)
347 				print(" %.8lux", xdst[7*j + i]);
348 			print("\ng");
349 			for(i=0; i<7 && 7*j+i < dwds; i++)
350 				print(" %.8lux", xans[7*j + i]);
351 			print("\nb");
352 			for(i=0; i<7 && 7*j+i < dwds; i++)
353 				print(" %.8lux", dst[7*j + i]);
354 			print("\n");
355 		}
356 		prprog();
357 	}
358 }
359 
360 void
prprog(void)361 prprog(void)
362 {
363 	exits(0);
364 }
365 
366 int
main(int argc,char * argv[])367 main(int argc, char *argv[])
368 {
369 	int f, t, w, i, sld, dld, op, iters, simple;
370 	ulong s, d, spix, dpix, apix, fpix, m, *ps, *pd;
371 	Point sorg, dorg;
372 	GBitmap *bs, *bd;
373 	long seed;
374 	char *ct;
375 
376 	sld = 0;
377 	dld = 0;
378 	timeit = 0;
379 	iters = 200;
380 	simple = 0;
381 	ARGBEGIN {
382 	case 'i':
383 		iters = atoi(ARGF());
384 		break;
385 	case 's':
386 		simple = 1;
387 		break;
388 	case 't':
389 		timeit = 1;
390 		ct = ARGF();
391 		if(ct)
392 			iters = atoi(ct);
393 		break;
394 	} ARGEND
395 	if(argc > 0)
396 		sld = atoi(argv[0]);
397 	if(argc > 1)
398 		dld = atoi(argv[1]);
399 	if(!timeit && !simple) {
400 		seed = time(0);
401 		print("seed %lux\n", seed); srand(seed);	/**/
402 	}
403 
404 	print("sld %d dld %d\n", sld, dld);
405 	op = 1;
406 
407 	/* bitmaps for 1-bit tests */
408 	bd = gballoc(Rect(0,0,32,1), dld);
409 	bs = gballoc(Rect(0,0,32,1), sld);
410 	for(i=0; i<bs->width; i++)
411 		bs->base[i] = lrand();
412 
413 	/* bitmaps for rect tests */
414 	if(simple) {
415 		dorg = Pt(0,0);
416 		sorg = Pt(0,0);
417 	} else {
418 		dorg = Pt(nrand(63)-31,nrand(63)-31);
419 		sorg = Pt(nrand(63)-31,nrand(63)-31);
420 	}
421 	bb1 = gballoc(Rpt(dorg,add(dorg,Pt(200,4))), dld);
422 	bb2 = gballoc(Rpt(sorg,add(sorg,Pt(200,4))), sld);
423 	dwds = bb1->width * Dy(bb1->r);
424 	swds = bb2->width * Dy(bb2->r);
425 	dst = bb1->base;
426 	src = bb2->base;
427 	xdst = malloc(dwds * sizeof(long));
428 	xans =  malloc(dwds * sizeof(long));
429 	for(i=0; i<swds; i++)
430 		src[i] = lrand();
431 	for(i=0; i<dwds; i++)
432 		xdst[i] = lrand();
433 
434 loop:
435 	print("Op %d\n", op);
436 	if(!timeit) {
437 		print("one pixel\n");
438 		ps = bs->base;
439 		pd = bd->base;
440 		FORCEFORW = 1;
441 		FORCEBAKW = 0;
442 		for(i=0; i<1000; i++, FORCEFORW = !FORCEFORW, FORCEBAKW = !FORCEBAKW) {
443 			f = nrand(32 >> sld);
444 			t = nrand(32 >> dld);
445 			s = lrand();
446 			d = lrand();
447 			ps[0] = s;
448 			pd[0] = d;
449 #ifdef T386
450 			spix = (byterev(s) >> (32 - ((f+1)<<sld))) & ((1 << (1<<sld)) - 1);
451 			dpix = (byterev(d) >> (32 - ((t+1)<<dld))) & ((1 << (1<<dld)) - 1);
452 #else
453 			spix = (s >> (32 - ((f+1)<<sld))) & ((1 << (1<<sld)) - 1);
454 			dpix = (d >> (32 - ((t+1)<<dld))) & ((1 << (1<<dld)) - 1);
455 #endif
456 #ifdef T386
457 			apix = byterev(func(op, spix, sld, dpix, dld) << (32 - ((t+1)<<dld)));
458 #else
459 			apix = func(op, spix, sld, dpix, dld) << (32 - ((t+1)<<dld));
460 #endif
461 			gbitblt(bd, Pt(t,0), bs, Rect(f,0,f+1,1), op);
462 			if(ps[0] != s) {
463 				print("bb src %.8lux %.8lux %d %d\n", ps[0], s, f, t);
464 				exits("error");
465 			}
466 			m = ((1 << (1<<dld)) - 1) << (32 - ((t+1)<<dld));
467 #ifdef T386
468 			m = byterev(m);
469 #endif
470 			if((pd[0] & ~m) != (d & ~m)) {
471 					print("bb dst1 %.8lux %.8lux\n",
472 						s, d);
473 					print("bb      %.8lux %.8lux %d %d\n",
474 						ps[0], pd[0], f, t);
475 					prprog();
476 					exits("error");
477 			}
478 			if((pd[0] & m) != apix) {
479 				spix <<= 32 - ((f+1)<<sld);
480 				dpix <<= 32 - ((t+1)<<dld);
481 #ifdef T386
482 				spix = byterev(spix);
483 				dpix = byterev(dpix);
484 #endif
485 				print("bb dst2 %.8lux %.8lux\n",
486 					s, d);
487 				print("bb      %.8lux %.8lux %d %d\n",
488 					ps[0], pd[0], f, t);
489 				print("bb      %.8lux %.8lux %.8lux %.8lux\n",
490 					spix, dpix, apix, pd[0] & m);
491 				prprog();
492 				exits("error");
493 			}
494 		}
495 	}
496 
497 	print("for\n");
498 	FORCEFORW = 1;
499 	FORCEBAKW = 0;
500 
501 	for(i=0; i<iters; i++) {
502 		f = nrand(64);
503 		t = nrand(64);
504 		w = nrand(130);
505 		run(f, t, w, op);
506 	}
507 
508 	if(sld == dld) {
509 		print("bak\n");
510 		FORCEFORW = 0;
511 		FORCEBAKW = 1;
512 
513 		for(i=0; i<iters; i++) {
514 			f = nrand(64);
515 			t = nrand(64);
516 			w = nrand(130);
517 			run(f, t, w, op);
518 		}
519 	}
520 
521 	if(op < F) {
522 		op++;
523 		goto loop;
524 	}
525 	if(timeit)
526 		print("time: %d ticks\n", ticks);
527 	exits(0);
528 }
529 
530 
531 #endif
532