xref: /inferno-os/os/boot/mpc/fblt.c (revision 74a4d8c26dd3c1e9febcb717cfd6cb6512991a7a)
1*74a4d8c2SCharles.Forsyth #include <u.h>
2*74a4d8c2SCharles.Forsyth #include <libc.h>
3*74a4d8c2SCharles.Forsyth #include <libg.h>
4*74a4d8c2SCharles.Forsyth #include <gnot.h>
5*74a4d8c2SCharles.Forsyth 
6*74a4d8c2SCharles.Forsyth /*
7*74a4d8c2SCharles.Forsyth  * bitblt operates a 'word' at a time.
8*74a4d8c2SCharles.Forsyth  * WBITS is the number of bits in a word
9*74a4d8c2SCharles.Forsyth  * LWBITS=log2(WBITS),
10*74a4d8c2SCharles.Forsyth  * W2L is the number of words in a long
11*74a4d8c2SCharles.Forsyth  * WMASK has bits set for the low order word of a long
12*74a4d8c2SCharles.Forsyth  * WType is a pointer to a word
13*74a4d8c2SCharles.Forsyth  */
14*74a4d8c2SCharles.Forsyth #ifndef WBITS
15*74a4d8c2SCharles.Forsyth #define WBITS	32
16*74a4d8c2SCharles.Forsyth #define LWBITS	5
17*74a4d8c2SCharles.Forsyth #define	W2L	1
18*74a4d8c2SCharles.Forsyth #define WMASK	~0UL
19*74a4d8c2SCharles.Forsyth typedef ulong	*WType;
20*74a4d8c2SCharles.Forsyth #endif
21*74a4d8c2SCharles.Forsyth 
22*74a4d8c2SCharles.Forsyth #define DEBUG
23*74a4d8c2SCharles.Forsyth 
24*74a4d8c2SCharles.Forsyth #ifdef TEST
25*74a4d8c2SCharles.Forsyth /*
26*74a4d8c2SCharles.Forsyth  * globals used for testing
27*74a4d8c2SCharles.Forsyth  */
28*74a4d8c2SCharles.Forsyth int	FORCEFORW;
29*74a4d8c2SCharles.Forsyth int	FORCEBAKW;
30*74a4d8c2SCharles.Forsyth GBitmap	*curdm, *cursm;
31*74a4d8c2SCharles.Forsyth Point	curpt;
32*74a4d8c2SCharles.Forsyth Rectangle curr;
33*74a4d8c2SCharles.Forsyth Fcode	curf;
34*74a4d8c2SCharles.Forsyth void	*mem;
35*74a4d8c2SCharles.Forsyth #endif
36*74a4d8c2SCharles.Forsyth 
37*74a4d8c2SCharles.Forsyth static void
gbitexplode(ulong sw,ulong * buf,int sdep,int x)38*74a4d8c2SCharles.Forsyth gbitexplode(ulong sw, ulong *buf, int sdep, int x)
39*74a4d8c2SCharles.Forsyth {
40*74a4d8c2SCharles.Forsyth 	int j, o, q, n, nw, inc, qinc;
41*74a4d8c2SCharles.Forsyth 	ulong s, dw, pix;
42*74a4d8c2SCharles.Forsyth 
43*74a4d8c2SCharles.Forsyth 	inc = 1 << sdep;
44*74a4d8c2SCharles.Forsyth 	pix = (1 << inc) - 1;
45*74a4d8c2SCharles.Forsyth 	nw = 1 << x;
46*74a4d8c2SCharles.Forsyth 	n = 32 >> x;
47*74a4d8c2SCharles.Forsyth 	qinc = (nw << sdep) - inc;
48*74a4d8c2SCharles.Forsyth 	for(o = 32 - n; o >= 0; o -= n){
49*74a4d8c2SCharles.Forsyth 		dw = 0;
50*74a4d8c2SCharles.Forsyth 		s = sw >> o;
51*74a4d8c2SCharles.Forsyth 		q = 0;
52*74a4d8c2SCharles.Forsyth 		for(j = 0; j < n; j += inc){
53*74a4d8c2SCharles.Forsyth 			dw |= (s & (pix << j)) << q;
54*74a4d8c2SCharles.Forsyth 			q += qinc;
55*74a4d8c2SCharles.Forsyth 		}
56*74a4d8c2SCharles.Forsyth 		for(j = 0; j < x; j++)
57*74a4d8c2SCharles.Forsyth 			dw |= dw << (inc << j);
58*74a4d8c2SCharles.Forsyth 		*buf++ = dw;
59*74a4d8c2SCharles.Forsyth 	}
60*74a4d8c2SCharles.Forsyth }
61*74a4d8c2SCharles.Forsyth 
62*74a4d8c2SCharles.Forsyth /*
63*74a4d8c2SCharles.Forsyth void
64*74a4d8c2SCharles.Forsyth main(void)
65*74a4d8c2SCharles.Forsyth {
66*74a4d8c2SCharles.Forsyth 	ulong buf[128];
67*74a4d8c2SCharles.Forsyth 
68*74a4d8c2SCharles.Forsyth 	gbitexplode(0x7777, buf, 0, 3);
69*74a4d8c2SCharles.Forsyth 	exits(0);
70*74a4d8c2SCharles.Forsyth }
71*74a4d8c2SCharles.Forsyth */
72*74a4d8c2SCharles.Forsyth 
73*74a4d8c2SCharles.Forsyth void
gbitblt(GBitmap * dm,Point pt,GBitmap * sm,Rectangle r,Fcode fcode)74*74a4d8c2SCharles.Forsyth gbitblt(GBitmap *dm, Point pt, GBitmap *sm, Rectangle r, Fcode fcode)
75*74a4d8c2SCharles.Forsyth {
76*74a4d8c2SCharles.Forsyth 	int	width;		/* width in bits of dst */
77*74a4d8c2SCharles.Forsyth 	int	wwidth;		/* floor width in words */
78*74a4d8c2SCharles.Forsyth 	int	height;		/* height in pixels minus 1 */
79*74a4d8c2SCharles.Forsyth 	int	sdep;		/* src ldepth */
80*74a4d8c2SCharles.Forsyth 	int 	ddep;		/* dst ldepth */
81*74a4d8c2SCharles.Forsyth 	int	deltadep;	/* diff between ldepths */
82*74a4d8c2SCharles.Forsyth 	int	sspan;		/* words between scanlines in src */
83*74a4d8c2SCharles.Forsyth 	int	dspan;		/* words between scanlines in dst */
84*74a4d8c2SCharles.Forsyth 	int	soff;		/* bit offset of src start point */
85*74a4d8c2SCharles.Forsyth 	int	sdest;		/* bit offset of src start point that matches doff when expanded */
86*74a4d8c2SCharles.Forsyth 	int	doff;		/* bit offset of dst start point */
87*74a4d8c2SCharles.Forsyth 	int	delta;		/* amount to shift src by */
88*74a4d8c2SCharles.Forsyth 	int	sign;		/* of delta */
89*74a4d8c2SCharles.Forsyth 	ulong	*saddr;
90*74a4d8c2SCharles.Forsyth 	ulong	*daddr;
91*74a4d8c2SCharles.Forsyth 	ulong	*s;
92*74a4d8c2SCharles.Forsyth 	ulong	*d;
93*74a4d8c2SCharles.Forsyth 	ulong	mask;
94*74a4d8c2SCharles.Forsyth 	ulong	tmp;		/* temp storage source word */
95*74a4d8c2SCharles.Forsyth 	ulong	sw;		/* source word constructed */
96*74a4d8c2SCharles.Forsyth 	ulong	dw;		/* dest word fetched */
97*74a4d8c2SCharles.Forsyth 	ulong	lmask;		/* affected pixels in leftmost dst word */
98*74a4d8c2SCharles.Forsyth 	ulong	rmask;		/* affected pixels in rightmost dst word */
99*74a4d8c2SCharles.Forsyth 	int	i;
100*74a4d8c2SCharles.Forsyth 	int	j;
101*74a4d8c2SCharles.Forsyth 	ulong	buf[32];	/* for expanding a source */
102*74a4d8c2SCharles.Forsyth 	ulong	*p;		/* pointer into buf */
103*74a4d8c2SCharles.Forsyth 	int	spare;		/* number of words already converted */
104*74a4d8c2SCharles.Forsyth 
105*74a4d8c2SCharles.Forsyth 
106*74a4d8c2SCharles.Forsyth #ifdef TEST
107*74a4d8c2SCharles.Forsyth 	curdm = dm;
108*74a4d8c2SCharles.Forsyth 	cursm = sm;
109*74a4d8c2SCharles.Forsyth 	curpt = pt;
110*74a4d8c2SCharles.Forsyth 	curr = r;
111*74a4d8c2SCharles.Forsyth 	curf = fcode;
112*74a4d8c2SCharles.Forsyth #endif
113*74a4d8c2SCharles.Forsyth 
114*74a4d8c2SCharles.Forsyth 	gbitbltclip(&dm);
115*74a4d8c2SCharles.Forsyth 
116*74a4d8c2SCharles.Forsyth 	width = r.max.x - r.min.x;
117*74a4d8c2SCharles.Forsyth 	if(width <= 0)
118*74a4d8c2SCharles.Forsyth 		return;
119*74a4d8c2SCharles.Forsyth 	height = r.max.y - r.min.y - 1;
120*74a4d8c2SCharles.Forsyth 	if(height < 0)
121*74a4d8c2SCharles.Forsyth 		return;
122*74a4d8c2SCharles.Forsyth 
123*74a4d8c2SCharles.Forsyth 	ddep = dm->ldepth;
124*74a4d8c2SCharles.Forsyth 	pt.x <<= ddep;
125*74a4d8c2SCharles.Forsyth 	width <<= ddep;
126*74a4d8c2SCharles.Forsyth 
127*74a4d8c2SCharles.Forsyth 	sdep = sm->ldepth;
128*74a4d8c2SCharles.Forsyth 	r.min.x <<= sdep;
129*74a4d8c2SCharles.Forsyth 	r.max.x <<= sdep;
130*74a4d8c2SCharles.Forsyth 
131*74a4d8c2SCharles.Forsyth 	dspan = dm->width * W2L;
132*74a4d8c2SCharles.Forsyth 	sspan = sm->width * W2L;
133*74a4d8c2SCharles.Forsyth 
134*74a4d8c2SCharles.Forsyth 	daddr = (ulong*)((WType)dm->base
135*74a4d8c2SCharles.Forsyth 			+ dm->zero*W2L + pt.y*dspan
136*74a4d8c2SCharles.Forsyth 			+ (pt.x >> LWBITS));
137*74a4d8c2SCharles.Forsyth 	saddr = (ulong*)((WType)sm->base
138*74a4d8c2SCharles.Forsyth 			+ sm->zero*W2L + r.min.y*sspan
139*74a4d8c2SCharles.Forsyth 			+ (r.min.x >> LWBITS));
140*74a4d8c2SCharles.Forsyth 
141*74a4d8c2SCharles.Forsyth 	doff = pt.x & (WBITS - 1);
142*74a4d8c2SCharles.Forsyth 	lmask = WMASK >> doff;
143*74a4d8c2SCharles.Forsyth 	rmask = (WMASK << (WBITS - ((doff+width) & (WBITS-1))))&WMASK;
144*74a4d8c2SCharles.Forsyth 	if(!rmask)
145*74a4d8c2SCharles.Forsyth 		rmask = WMASK;
146*74a4d8c2SCharles.Forsyth 	soff = r.min.x & (WBITS-1);
147*74a4d8c2SCharles.Forsyth 	wwidth = ((pt.x+width-1)>>LWBITS) - (pt.x>>LWBITS);
148*74a4d8c2SCharles.Forsyth 
149*74a4d8c2SCharles.Forsyth 	if(sm == dm){
150*74a4d8c2SCharles.Forsyth #ifdef TEST
151*74a4d8c2SCharles.Forsyth 		if(!FORCEBAKW &&
152*74a4d8c2SCharles.Forsyth 		   (FORCEFORW || sm != dm || saddr > daddr ||
153*74a4d8c2SCharles.Forsyth 		    (saddr == daddr && soff > doff)))
154*74a4d8c2SCharles.Forsyth 			;
155*74a4d8c2SCharles.Forsyth 		else{
156*74a4d8c2SCharles.Forsyth 			daddr += height * dspan;
157*74a4d8c2SCharles.Forsyth 			saddr += height * sspan;
158*74a4d8c2SCharles.Forsyth 			sspan -= 2 * W2L * sm->width;
159*74a4d8c2SCharles.Forsyth 			dspan -= 2 * W2L * dm->width;
160*74a4d8c2SCharles.Forsyth 		}
161*74a4d8c2SCharles.Forsyth #else
162*74a4d8c2SCharles.Forsyth 		if(r.min.y < pt.y){	/* bottom to top */
163*74a4d8c2SCharles.Forsyth 			daddr += height * dspan;
164*74a4d8c2SCharles.Forsyth 			saddr += height * sspan;
165*74a4d8c2SCharles.Forsyth 			sspan -= 2 * W2L * sm->width;
166*74a4d8c2SCharles.Forsyth 			dspan -= 2 * W2L * dm->width;
167*74a4d8c2SCharles.Forsyth 		}else if(r.min.y == pt.y && r.min.x < pt.x)
168*74a4d8c2SCharles.Forsyth 			abort()/*goto right*/;
169*74a4d8c2SCharles.Forsyth #endif
170*74a4d8c2SCharles.Forsyth 	}
171*74a4d8c2SCharles.Forsyth 	if(wwidth == 0)		/* collapse masks for narrow cases */
172*74a4d8c2SCharles.Forsyth 		lmask &= rmask;
173*74a4d8c2SCharles.Forsyth 	fcode &= F;
174*74a4d8c2SCharles.Forsyth 
175*74a4d8c2SCharles.Forsyth 	deltadep = ddep - sdep;
176*74a4d8c2SCharles.Forsyth 	sdest = doff >> deltadep;
177*74a4d8c2SCharles.Forsyth 	delta = soff - sdest;
178*74a4d8c2SCharles.Forsyth 	sign = 0;
179*74a4d8c2SCharles.Forsyth 	if(delta < 0){
180*74a4d8c2SCharles.Forsyth 		sign = 1;
181*74a4d8c2SCharles.Forsyth 		delta = -delta;
182*74a4d8c2SCharles.Forsyth 	}
183*74a4d8c2SCharles.Forsyth 
184*74a4d8c2SCharles.Forsyth 	p = 0;
185*74a4d8c2SCharles.Forsyth 	for(j = 0; j <= height; j++){
186*74a4d8c2SCharles.Forsyth 		d = daddr;
187*74a4d8c2SCharles.Forsyth 		s = saddr;
188*74a4d8c2SCharles.Forsyth 		mask = lmask;
189*74a4d8c2SCharles.Forsyth 		tmp = 0;
190*74a4d8c2SCharles.Forsyth 		if(!sign)
191*74a4d8c2SCharles.Forsyth 			tmp = *s++;
192*74a4d8c2SCharles.Forsyth 		spare = 0;
193*74a4d8c2SCharles.Forsyth 		for(i = wwidth; i >= 0; i--){
194*74a4d8c2SCharles.Forsyth 			if(spare)
195*74a4d8c2SCharles.Forsyth 				sw = *p++;
196*74a4d8c2SCharles.Forsyth 			else{
197*74a4d8c2SCharles.Forsyth 				if(sign){
198*74a4d8c2SCharles.Forsyth 					sw = tmp << (WBITS-delta);
199*74a4d8c2SCharles.Forsyth 					tmp = *s++;
200*74a4d8c2SCharles.Forsyth 					sw |= tmp >> delta;
201*74a4d8c2SCharles.Forsyth 				}else{
202*74a4d8c2SCharles.Forsyth 					sw = tmp << delta;
203*74a4d8c2SCharles.Forsyth 					tmp = *s++;
204*74a4d8c2SCharles.Forsyth 					if(delta)
205*74a4d8c2SCharles.Forsyth 						sw |= tmp >> (WBITS-delta);
206*74a4d8c2SCharles.Forsyth 				}
207*74a4d8c2SCharles.Forsyth 				spare = 1 << deltadep;
208*74a4d8c2SCharles.Forsyth 				if(deltadep >= 1){
209*74a4d8c2SCharles.Forsyth 					gbitexplode(sw, buf, sdep, deltadep);
210*74a4d8c2SCharles.Forsyth 					p = buf;
211*74a4d8c2SCharles.Forsyth 					sw = *p++;
212*74a4d8c2SCharles.Forsyth 				}
213*74a4d8c2SCharles.Forsyth 			}
214*74a4d8c2SCharles.Forsyth 
215*74a4d8c2SCharles.Forsyth 			dw = *d;
216*74a4d8c2SCharles.Forsyth 			switch(fcode){		/* ltor bit aligned */
217*74a4d8c2SCharles.Forsyth 			case Zero:	*d = dw & ~mask;		break;
218*74a4d8c2SCharles.Forsyth 			case DnorS:	*d = dw ^ ((~sw | dw) & mask);	break;
219*74a4d8c2SCharles.Forsyth 			case DandnotS:	*d = dw ^ ((sw & dw) & mask);	break;
220*74a4d8c2SCharles.Forsyth 			case notS:	*d = dw ^ ((~sw ^ dw) & mask);	break;
221*74a4d8c2SCharles.Forsyth 			case notDandS:	*d = dw ^ ((sw | dw) & mask);	break;
222*74a4d8c2SCharles.Forsyth 			case notD:	*d = dw ^ mask;			break;
223*74a4d8c2SCharles.Forsyth 			case DxorS:	*d = dw ^ (sw & mask);		break;
224*74a4d8c2SCharles.Forsyth 			case DnandS:	*d = dw ^ ((sw | ~dw) & mask);	break;
225*74a4d8c2SCharles.Forsyth 			case DandS:	*d = dw ^ ((~sw & dw) & mask);	break;
226*74a4d8c2SCharles.Forsyth 			case DxnorS:	*d = dw ^ (~sw & mask);		break;
227*74a4d8c2SCharles.Forsyth 			case D:						break;
228*74a4d8c2SCharles.Forsyth 			case DornotS:	*d = dw | (~sw & mask);		break;
229*74a4d8c2SCharles.Forsyth 			case S:		*d = dw ^ ((sw ^ dw) & mask);	break;
230*74a4d8c2SCharles.Forsyth 			case notDorS:	*d = dw ^ (~(sw & dw) & mask);	break;
231*74a4d8c2SCharles.Forsyth 			case DorS:	*d = dw | (sw & mask);		break;
232*74a4d8c2SCharles.Forsyth 			case F:		*d = dw | mask;			break;
233*74a4d8c2SCharles.Forsyth 			}
234*74a4d8c2SCharles.Forsyth 			d++;
235*74a4d8c2SCharles.Forsyth 
236*74a4d8c2SCharles.Forsyth 			mask = WMASK;
237*74a4d8c2SCharles.Forsyth 			if(i == 1)
238*74a4d8c2SCharles.Forsyth 				mask = rmask;
239*74a4d8c2SCharles.Forsyth 			spare--;
240*74a4d8c2SCharles.Forsyth 		}
241*74a4d8c2SCharles.Forsyth 		saddr += sspan;
242*74a4d8c2SCharles.Forsyth 		daddr += dspan;
243*74a4d8c2SCharles.Forsyth 	}
244*74a4d8c2SCharles.Forsyth }
245*74a4d8c2SCharles.Forsyth 
246*74a4d8c2SCharles.Forsyth #ifdef TEST
247*74a4d8c2SCharles.Forsyth void	prprog(void);
248*74a4d8c2SCharles.Forsyth GBitmap *bb1, *bb2;
249*74a4d8c2SCharles.Forsyth ulong	*src, *dst, *xdst, *xans;
250*74a4d8c2SCharles.Forsyth int	swds, dwds;
251*74a4d8c2SCharles.Forsyth long	ticks;
252*74a4d8c2SCharles.Forsyth int	timeit;
253*74a4d8c2SCharles.Forsyth 
254*74a4d8c2SCharles.Forsyth long
func(int f,long s,int sld,long d,int dld)255*74a4d8c2SCharles.Forsyth func(int f, long s, int sld, long d, int dld)
256*74a4d8c2SCharles.Forsyth {
257*74a4d8c2SCharles.Forsyth 	long a;
258*74a4d8c2SCharles.Forsyth 	int sh, i, db, sb;
259*74a4d8c2SCharles.Forsyth 
260*74a4d8c2SCharles.Forsyth 	db = 1 << dld;
261*74a4d8c2SCharles.Forsyth 	sb = 1 << sld;
262*74a4d8c2SCharles.Forsyth 	sh = db - sb;
263*74a4d8c2SCharles.Forsyth 	if(sh > 0) {
264*74a4d8c2SCharles.Forsyth 		a = s;
265*74a4d8c2SCharles.Forsyth 		for(i = sb; i<db; i += sb){
266*74a4d8c2SCharles.Forsyth 			a <<= sb;
267*74a4d8c2SCharles.Forsyth 			s |= a;
268*74a4d8c2SCharles.Forsyth 		}
269*74a4d8c2SCharles.Forsyth 	} else if(sh < 0)
270*74a4d8c2SCharles.Forsyth 		s >>= -sh;
271*74a4d8c2SCharles.Forsyth 
272*74a4d8c2SCharles.Forsyth 	switch(f){
273*74a4d8c2SCharles.Forsyth 	case Zero:	d = 0;			break;
274*74a4d8c2SCharles.Forsyth 	case DnorS:	d = ~(d|s);		break;
275*74a4d8c2SCharles.Forsyth 	case DandnotS:	d = d & ~s;		break;
276*74a4d8c2SCharles.Forsyth 	case notS:	d = ~s;			break;
277*74a4d8c2SCharles.Forsyth 	case notDandS:	d = ~d & s;		break;
278*74a4d8c2SCharles.Forsyth 	case notD:	d = ~d;			break;
279*74a4d8c2SCharles.Forsyth 	case DxorS:	d = d ^ s;		break;
280*74a4d8c2SCharles.Forsyth 	case DnandS:	d = ~(d&s);		break;
281*74a4d8c2SCharles.Forsyth 	case DandS:	d = d & s;		break;
282*74a4d8c2SCharles.Forsyth 	case DxnorS:	d = ~(d^s);		break;
283*74a4d8c2SCharles.Forsyth 	case S:		d = s;			break;
284*74a4d8c2SCharles.Forsyth 	case DornotS:	d = d | ~s;		break;
285*74a4d8c2SCharles.Forsyth 	case D:		d = d;			break;
286*74a4d8c2SCharles.Forsyth 	case notDorS:	d = ~d | s;		break;
287*74a4d8c2SCharles.Forsyth 	case DorS:	d = d | s;		break;
288*74a4d8c2SCharles.Forsyth 	case F:		d = ~0;			break;
289*74a4d8c2SCharles.Forsyth 	}
290*74a4d8c2SCharles.Forsyth 
291*74a4d8c2SCharles.Forsyth 	d &= ((1<<db)-1);
292*74a4d8c2SCharles.Forsyth 	return d;
293*74a4d8c2SCharles.Forsyth }
294*74a4d8c2SCharles.Forsyth 
295*74a4d8c2SCharles.Forsyth void
run(int fr,int to,int w,int op)296*74a4d8c2SCharles.Forsyth run(int fr, int to, int w, int op)
297*74a4d8c2SCharles.Forsyth {
298*74a4d8c2SCharles.Forsyth 	int i, j, f, t, fy, ty;
299*74a4d8c2SCharles.Forsyth 	extern long *_clock;
300*74a4d8c2SCharles.Forsyth 
301*74a4d8c2SCharles.Forsyth 	fr += bb2->r.min.x;
302*74a4d8c2SCharles.Forsyth 	to += bb1->r.min.x;
303*74a4d8c2SCharles.Forsyth 	fy = bb2->r.min.y + 1;
304*74a4d8c2SCharles.Forsyth 	ty = bb1->r.min.y + 1;
305*74a4d8c2SCharles.Forsyth 	if(timeit) {
306*74a4d8c2SCharles.Forsyth 		memcpy(dst, xdst, dwds * sizeof(long));
307*74a4d8c2SCharles.Forsyth 		ticks -= *_clock;
308*74a4d8c2SCharles.Forsyth 		gbitblt(bb1, Pt(to,ty), bb2, Rect(fr,fy,fr+w,fy+2), op);
309*74a4d8c2SCharles.Forsyth 		ticks += *_clock;
310*74a4d8c2SCharles.Forsyth 		return;
311*74a4d8c2SCharles.Forsyth 	}
312*74a4d8c2SCharles.Forsyth 	f = fr;
313*74a4d8c2SCharles.Forsyth 	t = to;
314*74a4d8c2SCharles.Forsyth 	memcpy(dst, xdst, dwds * sizeof(long));
315*74a4d8c2SCharles.Forsyth 	for(i=0; i<w; i++) {
316*74a4d8c2SCharles.Forsyth 		gbitblt(bb1, Pt(t,ty), bb2, Rect(f,fy,f+1,fy+1), op);
317*74a4d8c2SCharles.Forsyth 		gbitblt(bb1, Pt(t,ty+1), bb2, Rect(f,fy+1,f+1,fy+2), op);
318*74a4d8c2SCharles.Forsyth 		f++;
319*74a4d8c2SCharles.Forsyth 		t++;
320*74a4d8c2SCharles.Forsyth 	}
321*74a4d8c2SCharles.Forsyth 	memcpy(xans, dst, dwds * sizeof(long));
322*74a4d8c2SCharles.Forsyth 
323*74a4d8c2SCharles.Forsyth 	memcpy(dst, xdst, dwds * sizeof(long));
324*74a4d8c2SCharles.Forsyth 	gbitblt(bb1, Pt(to,ty), bb2, Rect(fr,fy,fr+w,fy+2), op);
325*74a4d8c2SCharles.Forsyth 
326*74a4d8c2SCharles.Forsyth 	if(memcmp(xans, dst, dwds * sizeof(long))) {
327*74a4d8c2SCharles.Forsyth 		/*
328*74a4d8c2SCharles.Forsyth 		 * print src and dst row offset, width in bits, and forw/back
329*74a4d8c2SCharles.Forsyth 		 * then print for each of the four rows: the source (s),
330*74a4d8c2SCharles.Forsyth 		 * the dest (d), the good value of the answer (g),
331*74a4d8c2SCharles.Forsyth 		 * and the actual bad value of the answer (b)
332*74a4d8c2SCharles.Forsyth 		 */
333*74a4d8c2SCharles.Forsyth 		print("fr=%d to=%d w=%d fb=%d%d\n",
334*74a4d8c2SCharles.Forsyth 			fr, to, w, FORCEFORW, FORCEBAKW);
335*74a4d8c2SCharles.Forsyth 		print("dst bitmap b %#lux, z %d, w %d, ld %d, r [%d,%d][%d,%d]\n",
336*74a4d8c2SCharles.Forsyth 			bb1->base, bb1->zero, bb1->width, bb1->ldepth,
337*74a4d8c2SCharles.Forsyth 			bb1->r.min.x, bb1->r.min.y, bb1->r.max.x, bb1->r.max.y);
338*74a4d8c2SCharles.Forsyth 		print("src bitmap b %#lux, z %d, w %d, ld %d, r [%d,%d][%d,%d]\n",
339*74a4d8c2SCharles.Forsyth 			bb2->base, bb2->zero, bb2->width, bb2->ldepth,
340*74a4d8c2SCharles.Forsyth 			bb2->r.min.x, bb2->r.min.y, bb2->r.max.x, bb2->r.max.y);
341*74a4d8c2SCharles.Forsyth 		for(j=0; 7*j < dwds; j++) {
342*74a4d8c2SCharles.Forsyth 			print("\ns");
343*74a4d8c2SCharles.Forsyth 			for(i=0; i<7 && 7*j+i < dwds; i++)
344*74a4d8c2SCharles.Forsyth 				print(" %.8lux", src[7*j + i]);
345*74a4d8c2SCharles.Forsyth 			print("\nd");
346*74a4d8c2SCharles.Forsyth 			for(i=0; i<7 && 7*j+i < dwds; i++)
347*74a4d8c2SCharles.Forsyth 				print(" %.8lux", xdst[7*j + i]);
348*74a4d8c2SCharles.Forsyth 			print("\ng");
349*74a4d8c2SCharles.Forsyth 			for(i=0; i<7 && 7*j+i < dwds; i++)
350*74a4d8c2SCharles.Forsyth 				print(" %.8lux", xans[7*j + i]);
351*74a4d8c2SCharles.Forsyth 			print("\nb");
352*74a4d8c2SCharles.Forsyth 			for(i=0; i<7 && 7*j+i < dwds; i++)
353*74a4d8c2SCharles.Forsyth 				print(" %.8lux", dst[7*j + i]);
354*74a4d8c2SCharles.Forsyth 			print("\n");
355*74a4d8c2SCharles.Forsyth 		}
356*74a4d8c2SCharles.Forsyth 		prprog();
357*74a4d8c2SCharles.Forsyth 	}
358*74a4d8c2SCharles.Forsyth }
359*74a4d8c2SCharles.Forsyth 
360*74a4d8c2SCharles.Forsyth void
prprog(void)361*74a4d8c2SCharles.Forsyth prprog(void)
362*74a4d8c2SCharles.Forsyth {
363*74a4d8c2SCharles.Forsyth 	exits(0);
364*74a4d8c2SCharles.Forsyth }
365*74a4d8c2SCharles.Forsyth 
366*74a4d8c2SCharles.Forsyth int
main(int argc,char * argv[])367*74a4d8c2SCharles.Forsyth main(int argc, char *argv[])
368*74a4d8c2SCharles.Forsyth {
369*74a4d8c2SCharles.Forsyth 	int f, t, w, i, sld, dld, op, iters, simple;
370*74a4d8c2SCharles.Forsyth 	ulong s, d, spix, dpix, apix, fpix, m, *ps, *pd;
371*74a4d8c2SCharles.Forsyth 	Point sorg, dorg;
372*74a4d8c2SCharles.Forsyth 	GBitmap *bs, *bd;
373*74a4d8c2SCharles.Forsyth 	long seed;
374*74a4d8c2SCharles.Forsyth 	char *ct;
375*74a4d8c2SCharles.Forsyth 
376*74a4d8c2SCharles.Forsyth 	sld = 0;
377*74a4d8c2SCharles.Forsyth 	dld = 0;
378*74a4d8c2SCharles.Forsyth 	timeit = 0;
379*74a4d8c2SCharles.Forsyth 	iters = 200;
380*74a4d8c2SCharles.Forsyth 	simple = 0;
381*74a4d8c2SCharles.Forsyth 	ARGBEGIN {
382*74a4d8c2SCharles.Forsyth 	case 'i':
383*74a4d8c2SCharles.Forsyth 		iters = atoi(ARGF());
384*74a4d8c2SCharles.Forsyth 		break;
385*74a4d8c2SCharles.Forsyth 	case 's':
386*74a4d8c2SCharles.Forsyth 		simple = 1;
387*74a4d8c2SCharles.Forsyth 		break;
388*74a4d8c2SCharles.Forsyth 	case 't':
389*74a4d8c2SCharles.Forsyth 		timeit = 1;
390*74a4d8c2SCharles.Forsyth 		ct = ARGF();
391*74a4d8c2SCharles.Forsyth 		if(ct)
392*74a4d8c2SCharles.Forsyth 			iters = atoi(ct);
393*74a4d8c2SCharles.Forsyth 		break;
394*74a4d8c2SCharles.Forsyth 	} ARGEND
395*74a4d8c2SCharles.Forsyth 	if(argc > 0)
396*74a4d8c2SCharles.Forsyth 		sld = atoi(argv[0]);
397*74a4d8c2SCharles.Forsyth 	if(argc > 1)
398*74a4d8c2SCharles.Forsyth 		dld = atoi(argv[1]);
399*74a4d8c2SCharles.Forsyth 	if(!timeit && !simple) {
400*74a4d8c2SCharles.Forsyth 		seed = time(0);
401*74a4d8c2SCharles.Forsyth 		print("seed %lux\n", seed); srand(seed);	/**/
402*74a4d8c2SCharles.Forsyth 	}
403*74a4d8c2SCharles.Forsyth 
404*74a4d8c2SCharles.Forsyth 	print("sld %d dld %d\n", sld, dld);
405*74a4d8c2SCharles.Forsyth 	op = 1;
406*74a4d8c2SCharles.Forsyth 
407*74a4d8c2SCharles.Forsyth 	/* bitmaps for 1-bit tests */
408*74a4d8c2SCharles.Forsyth 	bd = gballoc(Rect(0,0,32,1), dld);
409*74a4d8c2SCharles.Forsyth 	bs = gballoc(Rect(0,0,32,1), sld);
410*74a4d8c2SCharles.Forsyth 	for(i=0; i<bs->width; i++)
411*74a4d8c2SCharles.Forsyth 		bs->base[i] = lrand();
412*74a4d8c2SCharles.Forsyth 
413*74a4d8c2SCharles.Forsyth 	/* bitmaps for rect tests */
414*74a4d8c2SCharles.Forsyth 	if(simple) {
415*74a4d8c2SCharles.Forsyth 		dorg = Pt(0,0);
416*74a4d8c2SCharles.Forsyth 		sorg = Pt(0,0);
417*74a4d8c2SCharles.Forsyth 	} else {
418*74a4d8c2SCharles.Forsyth 		dorg = Pt(nrand(63)-31,nrand(63)-31);
419*74a4d8c2SCharles.Forsyth 		sorg = Pt(nrand(63)-31,nrand(63)-31);
420*74a4d8c2SCharles.Forsyth 	}
421*74a4d8c2SCharles.Forsyth 	bb1 = gballoc(Rpt(dorg,add(dorg,Pt(200,4))), dld);
422*74a4d8c2SCharles.Forsyth 	bb2 = gballoc(Rpt(sorg,add(sorg,Pt(200,4))), sld);
423*74a4d8c2SCharles.Forsyth 	dwds = bb1->width * Dy(bb1->r);
424*74a4d8c2SCharles.Forsyth 	swds = bb2->width * Dy(bb2->r);
425*74a4d8c2SCharles.Forsyth 	dst = bb1->base;
426*74a4d8c2SCharles.Forsyth 	src = bb2->base;
427*74a4d8c2SCharles.Forsyth 	xdst = malloc(dwds * sizeof(long));
428*74a4d8c2SCharles.Forsyth 	xans =  malloc(dwds * sizeof(long));
429*74a4d8c2SCharles.Forsyth 	for(i=0; i<swds; i++)
430*74a4d8c2SCharles.Forsyth 		src[i] = lrand();
431*74a4d8c2SCharles.Forsyth 	for(i=0; i<dwds; i++)
432*74a4d8c2SCharles.Forsyth 		xdst[i] = lrand();
433*74a4d8c2SCharles.Forsyth 
434*74a4d8c2SCharles.Forsyth loop:
435*74a4d8c2SCharles.Forsyth 	print("Op %d\n", op);
436*74a4d8c2SCharles.Forsyth 	if(!timeit) {
437*74a4d8c2SCharles.Forsyth 		print("one pixel\n");
438*74a4d8c2SCharles.Forsyth 		ps = bs->base;
439*74a4d8c2SCharles.Forsyth 		pd = bd->base;
440*74a4d8c2SCharles.Forsyth 		FORCEFORW = 1;
441*74a4d8c2SCharles.Forsyth 		FORCEBAKW = 0;
442*74a4d8c2SCharles.Forsyth 		for(i=0; i<1000; i++, FORCEFORW = !FORCEFORW, FORCEBAKW = !FORCEBAKW) {
443*74a4d8c2SCharles.Forsyth 			f = nrand(32 >> sld);
444*74a4d8c2SCharles.Forsyth 			t = nrand(32 >> dld);
445*74a4d8c2SCharles.Forsyth 			s = lrand();
446*74a4d8c2SCharles.Forsyth 			d = lrand();
447*74a4d8c2SCharles.Forsyth 			ps[0] = s;
448*74a4d8c2SCharles.Forsyth 			pd[0] = d;
449*74a4d8c2SCharles.Forsyth #ifdef T386
450*74a4d8c2SCharles.Forsyth 			spix = (byterev(s) >> (32 - ((f+1)<<sld))) & ((1 << (1<<sld)) - 1);
451*74a4d8c2SCharles.Forsyth 			dpix = (byterev(d) >> (32 - ((t+1)<<dld))) & ((1 << (1<<dld)) - 1);
452*74a4d8c2SCharles.Forsyth #else
453*74a4d8c2SCharles.Forsyth 			spix = (s >> (32 - ((f+1)<<sld))) & ((1 << (1<<sld)) - 1);
454*74a4d8c2SCharles.Forsyth 			dpix = (d >> (32 - ((t+1)<<dld))) & ((1 << (1<<dld)) - 1);
455*74a4d8c2SCharles.Forsyth #endif
456*74a4d8c2SCharles.Forsyth #ifdef T386
457*74a4d8c2SCharles.Forsyth 			apix = byterev(func(op, spix, sld, dpix, dld) << (32 - ((t+1)<<dld)));
458*74a4d8c2SCharles.Forsyth #else
459*74a4d8c2SCharles.Forsyth 			apix = func(op, spix, sld, dpix, dld) << (32 - ((t+1)<<dld));
460*74a4d8c2SCharles.Forsyth #endif
461*74a4d8c2SCharles.Forsyth 			gbitblt(bd, Pt(t,0), bs, Rect(f,0,f+1,1), op);
462*74a4d8c2SCharles.Forsyth 			if(ps[0] != s) {
463*74a4d8c2SCharles.Forsyth 				print("bb src %.8lux %.8lux %d %d\n", ps[0], s, f, t);
464*74a4d8c2SCharles.Forsyth 				exits("error");
465*74a4d8c2SCharles.Forsyth 			}
466*74a4d8c2SCharles.Forsyth 			m = ((1 << (1<<dld)) - 1) << (32 - ((t+1)<<dld));
467*74a4d8c2SCharles.Forsyth #ifdef T386
468*74a4d8c2SCharles.Forsyth 			m = byterev(m);
469*74a4d8c2SCharles.Forsyth #endif
470*74a4d8c2SCharles.Forsyth 			if((pd[0] & ~m) != (d & ~m)) {
471*74a4d8c2SCharles.Forsyth 					print("bb dst1 %.8lux %.8lux\n",
472*74a4d8c2SCharles.Forsyth 						s, d);
473*74a4d8c2SCharles.Forsyth 					print("bb      %.8lux %.8lux %d %d\n",
474*74a4d8c2SCharles.Forsyth 						ps[0], pd[0], f, t);
475*74a4d8c2SCharles.Forsyth 					prprog();
476*74a4d8c2SCharles.Forsyth 					exits("error");
477*74a4d8c2SCharles.Forsyth 			}
478*74a4d8c2SCharles.Forsyth 			if((pd[0] & m) != apix) {
479*74a4d8c2SCharles.Forsyth 				spix <<= 32 - ((f+1)<<sld);
480*74a4d8c2SCharles.Forsyth 				dpix <<= 32 - ((t+1)<<dld);
481*74a4d8c2SCharles.Forsyth #ifdef T386
482*74a4d8c2SCharles.Forsyth 				spix = byterev(spix);
483*74a4d8c2SCharles.Forsyth 				dpix = byterev(dpix);
484*74a4d8c2SCharles.Forsyth #endif
485*74a4d8c2SCharles.Forsyth 				print("bb dst2 %.8lux %.8lux\n",
486*74a4d8c2SCharles.Forsyth 					s, d);
487*74a4d8c2SCharles.Forsyth 				print("bb      %.8lux %.8lux %d %d\n",
488*74a4d8c2SCharles.Forsyth 					ps[0], pd[0], f, t);
489*74a4d8c2SCharles.Forsyth 				print("bb      %.8lux %.8lux %.8lux %.8lux\n",
490*74a4d8c2SCharles.Forsyth 					spix, dpix, apix, pd[0] & m);
491*74a4d8c2SCharles.Forsyth 				prprog();
492*74a4d8c2SCharles.Forsyth 				exits("error");
493*74a4d8c2SCharles.Forsyth 			}
494*74a4d8c2SCharles.Forsyth 		}
495*74a4d8c2SCharles.Forsyth 	}
496*74a4d8c2SCharles.Forsyth 
497*74a4d8c2SCharles.Forsyth 	print("for\n");
498*74a4d8c2SCharles.Forsyth 	FORCEFORW = 1;
499*74a4d8c2SCharles.Forsyth 	FORCEBAKW = 0;
500*74a4d8c2SCharles.Forsyth 
501*74a4d8c2SCharles.Forsyth 	for(i=0; i<iters; i++) {
502*74a4d8c2SCharles.Forsyth 		f = nrand(64);
503*74a4d8c2SCharles.Forsyth 		t = nrand(64);
504*74a4d8c2SCharles.Forsyth 		w = nrand(130);
505*74a4d8c2SCharles.Forsyth 		run(f, t, w, op);
506*74a4d8c2SCharles.Forsyth 	}
507*74a4d8c2SCharles.Forsyth 
508*74a4d8c2SCharles.Forsyth 	if(sld == dld) {
509*74a4d8c2SCharles.Forsyth 		print("bak\n");
510*74a4d8c2SCharles.Forsyth 		FORCEFORW = 0;
511*74a4d8c2SCharles.Forsyth 		FORCEBAKW = 1;
512*74a4d8c2SCharles.Forsyth 
513*74a4d8c2SCharles.Forsyth 		for(i=0; i<iters; i++) {
514*74a4d8c2SCharles.Forsyth 			f = nrand(64);
515*74a4d8c2SCharles.Forsyth 			t = nrand(64);
516*74a4d8c2SCharles.Forsyth 			w = nrand(130);
517*74a4d8c2SCharles.Forsyth 			run(f, t, w, op);
518*74a4d8c2SCharles.Forsyth 		}
519*74a4d8c2SCharles.Forsyth 	}
520*74a4d8c2SCharles.Forsyth 
521*74a4d8c2SCharles.Forsyth 	if(op < F) {
522*74a4d8c2SCharles.Forsyth 		op++;
523*74a4d8c2SCharles.Forsyth 		goto loop;
524*74a4d8c2SCharles.Forsyth 	}
525*74a4d8c2SCharles.Forsyth 	if(timeit)
526*74a4d8c2SCharles.Forsyth 		print("time: %d ticks\n", ticks);
527*74a4d8c2SCharles.Forsyth 	exits(0);
528*74a4d8c2SCharles.Forsyth }
529*74a4d8c2SCharles.Forsyth 
530*74a4d8c2SCharles.Forsyth 
531*74a4d8c2SCharles.Forsyth #endif
532