1*74a4d8c2SCharles.Forsyth #include <u.h>
2*74a4d8c2SCharles.Forsyth #include <libc.h>
3*74a4d8c2SCharles.Forsyth #include <libg.h>
4*74a4d8c2SCharles.Forsyth #include <gnot.h>
5*74a4d8c2SCharles.Forsyth
6*74a4d8c2SCharles.Forsyth /*
7*74a4d8c2SCharles.Forsyth * bitblt operates a 'word' at a time.
8*74a4d8c2SCharles.Forsyth * WBITS is the number of bits in a word
9*74a4d8c2SCharles.Forsyth * LWBITS=log2(WBITS),
10*74a4d8c2SCharles.Forsyth * W2L is the number of words in a long
11*74a4d8c2SCharles.Forsyth * WMASK has bits set for the low order word of a long
12*74a4d8c2SCharles.Forsyth * WType is a pointer to a word
13*74a4d8c2SCharles.Forsyth */
14*74a4d8c2SCharles.Forsyth #ifndef WBITS
15*74a4d8c2SCharles.Forsyth #define WBITS 32
16*74a4d8c2SCharles.Forsyth #define LWBITS 5
17*74a4d8c2SCharles.Forsyth #define W2L 1
18*74a4d8c2SCharles.Forsyth #define WMASK ~0UL
19*74a4d8c2SCharles.Forsyth typedef ulong *WType;
20*74a4d8c2SCharles.Forsyth #endif
21*74a4d8c2SCharles.Forsyth
22*74a4d8c2SCharles.Forsyth #define DEBUG
23*74a4d8c2SCharles.Forsyth
24*74a4d8c2SCharles.Forsyth #ifdef TEST
25*74a4d8c2SCharles.Forsyth /*
26*74a4d8c2SCharles.Forsyth * globals used for testing
27*74a4d8c2SCharles.Forsyth */
28*74a4d8c2SCharles.Forsyth int FORCEFORW;
29*74a4d8c2SCharles.Forsyth int FORCEBAKW;
30*74a4d8c2SCharles.Forsyth GBitmap *curdm, *cursm;
31*74a4d8c2SCharles.Forsyth Point curpt;
32*74a4d8c2SCharles.Forsyth Rectangle curr;
33*74a4d8c2SCharles.Forsyth Fcode curf;
34*74a4d8c2SCharles.Forsyth void *mem;
35*74a4d8c2SCharles.Forsyth #endif
36*74a4d8c2SCharles.Forsyth
37*74a4d8c2SCharles.Forsyth static void
gbitexplode(ulong sw,ulong * buf,int sdep,int x)38*74a4d8c2SCharles.Forsyth gbitexplode(ulong sw, ulong *buf, int sdep, int x)
39*74a4d8c2SCharles.Forsyth {
40*74a4d8c2SCharles.Forsyth int j, o, q, n, nw, inc, qinc;
41*74a4d8c2SCharles.Forsyth ulong s, dw, pix;
42*74a4d8c2SCharles.Forsyth
43*74a4d8c2SCharles.Forsyth inc = 1 << sdep;
44*74a4d8c2SCharles.Forsyth pix = (1 << inc) - 1;
45*74a4d8c2SCharles.Forsyth nw = 1 << x;
46*74a4d8c2SCharles.Forsyth n = 32 >> x;
47*74a4d8c2SCharles.Forsyth qinc = (nw << sdep) - inc;
48*74a4d8c2SCharles.Forsyth for(o = 32 - n; o >= 0; o -= n){
49*74a4d8c2SCharles.Forsyth dw = 0;
50*74a4d8c2SCharles.Forsyth s = sw >> o;
51*74a4d8c2SCharles.Forsyth q = 0;
52*74a4d8c2SCharles.Forsyth for(j = 0; j < n; j += inc){
53*74a4d8c2SCharles.Forsyth dw |= (s & (pix << j)) << q;
54*74a4d8c2SCharles.Forsyth q += qinc;
55*74a4d8c2SCharles.Forsyth }
56*74a4d8c2SCharles.Forsyth for(j = 0; j < x; j++)
57*74a4d8c2SCharles.Forsyth dw |= dw << (inc << j);
58*74a4d8c2SCharles.Forsyth *buf++ = dw;
59*74a4d8c2SCharles.Forsyth }
60*74a4d8c2SCharles.Forsyth }
61*74a4d8c2SCharles.Forsyth
62*74a4d8c2SCharles.Forsyth /*
63*74a4d8c2SCharles.Forsyth void
64*74a4d8c2SCharles.Forsyth main(void)
65*74a4d8c2SCharles.Forsyth {
66*74a4d8c2SCharles.Forsyth ulong buf[128];
67*74a4d8c2SCharles.Forsyth
68*74a4d8c2SCharles.Forsyth gbitexplode(0x7777, buf, 0, 3);
69*74a4d8c2SCharles.Forsyth exits(0);
70*74a4d8c2SCharles.Forsyth }
71*74a4d8c2SCharles.Forsyth */
72*74a4d8c2SCharles.Forsyth
73*74a4d8c2SCharles.Forsyth void
gbitblt(GBitmap * dm,Point pt,GBitmap * sm,Rectangle r,Fcode fcode)74*74a4d8c2SCharles.Forsyth gbitblt(GBitmap *dm, Point pt, GBitmap *sm, Rectangle r, Fcode fcode)
75*74a4d8c2SCharles.Forsyth {
76*74a4d8c2SCharles.Forsyth int width; /* width in bits of dst */
77*74a4d8c2SCharles.Forsyth int wwidth; /* floor width in words */
78*74a4d8c2SCharles.Forsyth int height; /* height in pixels minus 1 */
79*74a4d8c2SCharles.Forsyth int sdep; /* src ldepth */
80*74a4d8c2SCharles.Forsyth int ddep; /* dst ldepth */
81*74a4d8c2SCharles.Forsyth int deltadep; /* diff between ldepths */
82*74a4d8c2SCharles.Forsyth int sspan; /* words between scanlines in src */
83*74a4d8c2SCharles.Forsyth int dspan; /* words between scanlines in dst */
84*74a4d8c2SCharles.Forsyth int soff; /* bit offset of src start point */
85*74a4d8c2SCharles.Forsyth int sdest; /* bit offset of src start point that matches doff when expanded */
86*74a4d8c2SCharles.Forsyth int doff; /* bit offset of dst start point */
87*74a4d8c2SCharles.Forsyth int delta; /* amount to shift src by */
88*74a4d8c2SCharles.Forsyth int sign; /* of delta */
89*74a4d8c2SCharles.Forsyth ulong *saddr;
90*74a4d8c2SCharles.Forsyth ulong *daddr;
91*74a4d8c2SCharles.Forsyth ulong *s;
92*74a4d8c2SCharles.Forsyth ulong *d;
93*74a4d8c2SCharles.Forsyth ulong mask;
94*74a4d8c2SCharles.Forsyth ulong tmp; /* temp storage source word */
95*74a4d8c2SCharles.Forsyth ulong sw; /* source word constructed */
96*74a4d8c2SCharles.Forsyth ulong dw; /* dest word fetched */
97*74a4d8c2SCharles.Forsyth ulong lmask; /* affected pixels in leftmost dst word */
98*74a4d8c2SCharles.Forsyth ulong rmask; /* affected pixels in rightmost dst word */
99*74a4d8c2SCharles.Forsyth int i;
100*74a4d8c2SCharles.Forsyth int j;
101*74a4d8c2SCharles.Forsyth ulong buf[32]; /* for expanding a source */
102*74a4d8c2SCharles.Forsyth ulong *p; /* pointer into buf */
103*74a4d8c2SCharles.Forsyth int spare; /* number of words already converted */
104*74a4d8c2SCharles.Forsyth
105*74a4d8c2SCharles.Forsyth
106*74a4d8c2SCharles.Forsyth #ifdef TEST
107*74a4d8c2SCharles.Forsyth curdm = dm;
108*74a4d8c2SCharles.Forsyth cursm = sm;
109*74a4d8c2SCharles.Forsyth curpt = pt;
110*74a4d8c2SCharles.Forsyth curr = r;
111*74a4d8c2SCharles.Forsyth curf = fcode;
112*74a4d8c2SCharles.Forsyth #endif
113*74a4d8c2SCharles.Forsyth
114*74a4d8c2SCharles.Forsyth gbitbltclip(&dm);
115*74a4d8c2SCharles.Forsyth
116*74a4d8c2SCharles.Forsyth width = r.max.x - r.min.x;
117*74a4d8c2SCharles.Forsyth if(width <= 0)
118*74a4d8c2SCharles.Forsyth return;
119*74a4d8c2SCharles.Forsyth height = r.max.y - r.min.y - 1;
120*74a4d8c2SCharles.Forsyth if(height < 0)
121*74a4d8c2SCharles.Forsyth return;
122*74a4d8c2SCharles.Forsyth
123*74a4d8c2SCharles.Forsyth ddep = dm->ldepth;
124*74a4d8c2SCharles.Forsyth pt.x <<= ddep;
125*74a4d8c2SCharles.Forsyth width <<= ddep;
126*74a4d8c2SCharles.Forsyth
127*74a4d8c2SCharles.Forsyth sdep = sm->ldepth;
128*74a4d8c2SCharles.Forsyth r.min.x <<= sdep;
129*74a4d8c2SCharles.Forsyth r.max.x <<= sdep;
130*74a4d8c2SCharles.Forsyth
131*74a4d8c2SCharles.Forsyth dspan = dm->width * W2L;
132*74a4d8c2SCharles.Forsyth sspan = sm->width * W2L;
133*74a4d8c2SCharles.Forsyth
134*74a4d8c2SCharles.Forsyth daddr = (ulong*)((WType)dm->base
135*74a4d8c2SCharles.Forsyth + dm->zero*W2L + pt.y*dspan
136*74a4d8c2SCharles.Forsyth + (pt.x >> LWBITS));
137*74a4d8c2SCharles.Forsyth saddr = (ulong*)((WType)sm->base
138*74a4d8c2SCharles.Forsyth + sm->zero*W2L + r.min.y*sspan
139*74a4d8c2SCharles.Forsyth + (r.min.x >> LWBITS));
140*74a4d8c2SCharles.Forsyth
141*74a4d8c2SCharles.Forsyth doff = pt.x & (WBITS - 1);
142*74a4d8c2SCharles.Forsyth lmask = WMASK >> doff;
143*74a4d8c2SCharles.Forsyth rmask = (WMASK << (WBITS - ((doff+width) & (WBITS-1))))&WMASK;
144*74a4d8c2SCharles.Forsyth if(!rmask)
145*74a4d8c2SCharles.Forsyth rmask = WMASK;
146*74a4d8c2SCharles.Forsyth soff = r.min.x & (WBITS-1);
147*74a4d8c2SCharles.Forsyth wwidth = ((pt.x+width-1)>>LWBITS) - (pt.x>>LWBITS);
148*74a4d8c2SCharles.Forsyth
149*74a4d8c2SCharles.Forsyth if(sm == dm){
150*74a4d8c2SCharles.Forsyth #ifdef TEST
151*74a4d8c2SCharles.Forsyth if(!FORCEBAKW &&
152*74a4d8c2SCharles.Forsyth (FORCEFORW || sm != dm || saddr > daddr ||
153*74a4d8c2SCharles.Forsyth (saddr == daddr && soff > doff)))
154*74a4d8c2SCharles.Forsyth ;
155*74a4d8c2SCharles.Forsyth else{
156*74a4d8c2SCharles.Forsyth daddr += height * dspan;
157*74a4d8c2SCharles.Forsyth saddr += height * sspan;
158*74a4d8c2SCharles.Forsyth sspan -= 2 * W2L * sm->width;
159*74a4d8c2SCharles.Forsyth dspan -= 2 * W2L * dm->width;
160*74a4d8c2SCharles.Forsyth }
161*74a4d8c2SCharles.Forsyth #else
162*74a4d8c2SCharles.Forsyth if(r.min.y < pt.y){ /* bottom to top */
163*74a4d8c2SCharles.Forsyth daddr += height * dspan;
164*74a4d8c2SCharles.Forsyth saddr += height * sspan;
165*74a4d8c2SCharles.Forsyth sspan -= 2 * W2L * sm->width;
166*74a4d8c2SCharles.Forsyth dspan -= 2 * W2L * dm->width;
167*74a4d8c2SCharles.Forsyth }else if(r.min.y == pt.y && r.min.x < pt.x)
168*74a4d8c2SCharles.Forsyth abort()/*goto right*/;
169*74a4d8c2SCharles.Forsyth #endif
170*74a4d8c2SCharles.Forsyth }
171*74a4d8c2SCharles.Forsyth if(wwidth == 0) /* collapse masks for narrow cases */
172*74a4d8c2SCharles.Forsyth lmask &= rmask;
173*74a4d8c2SCharles.Forsyth fcode &= F;
174*74a4d8c2SCharles.Forsyth
175*74a4d8c2SCharles.Forsyth deltadep = ddep - sdep;
176*74a4d8c2SCharles.Forsyth sdest = doff >> deltadep;
177*74a4d8c2SCharles.Forsyth delta = soff - sdest;
178*74a4d8c2SCharles.Forsyth sign = 0;
179*74a4d8c2SCharles.Forsyth if(delta < 0){
180*74a4d8c2SCharles.Forsyth sign = 1;
181*74a4d8c2SCharles.Forsyth delta = -delta;
182*74a4d8c2SCharles.Forsyth }
183*74a4d8c2SCharles.Forsyth
184*74a4d8c2SCharles.Forsyth p = 0;
185*74a4d8c2SCharles.Forsyth for(j = 0; j <= height; j++){
186*74a4d8c2SCharles.Forsyth d = daddr;
187*74a4d8c2SCharles.Forsyth s = saddr;
188*74a4d8c2SCharles.Forsyth mask = lmask;
189*74a4d8c2SCharles.Forsyth tmp = 0;
190*74a4d8c2SCharles.Forsyth if(!sign)
191*74a4d8c2SCharles.Forsyth tmp = *s++;
192*74a4d8c2SCharles.Forsyth spare = 0;
193*74a4d8c2SCharles.Forsyth for(i = wwidth; i >= 0; i--){
194*74a4d8c2SCharles.Forsyth if(spare)
195*74a4d8c2SCharles.Forsyth sw = *p++;
196*74a4d8c2SCharles.Forsyth else{
197*74a4d8c2SCharles.Forsyth if(sign){
198*74a4d8c2SCharles.Forsyth sw = tmp << (WBITS-delta);
199*74a4d8c2SCharles.Forsyth tmp = *s++;
200*74a4d8c2SCharles.Forsyth sw |= tmp >> delta;
201*74a4d8c2SCharles.Forsyth }else{
202*74a4d8c2SCharles.Forsyth sw = tmp << delta;
203*74a4d8c2SCharles.Forsyth tmp = *s++;
204*74a4d8c2SCharles.Forsyth if(delta)
205*74a4d8c2SCharles.Forsyth sw |= tmp >> (WBITS-delta);
206*74a4d8c2SCharles.Forsyth }
207*74a4d8c2SCharles.Forsyth spare = 1 << deltadep;
208*74a4d8c2SCharles.Forsyth if(deltadep >= 1){
209*74a4d8c2SCharles.Forsyth gbitexplode(sw, buf, sdep, deltadep);
210*74a4d8c2SCharles.Forsyth p = buf;
211*74a4d8c2SCharles.Forsyth sw = *p++;
212*74a4d8c2SCharles.Forsyth }
213*74a4d8c2SCharles.Forsyth }
214*74a4d8c2SCharles.Forsyth
215*74a4d8c2SCharles.Forsyth dw = *d;
216*74a4d8c2SCharles.Forsyth switch(fcode){ /* ltor bit aligned */
217*74a4d8c2SCharles.Forsyth case Zero: *d = dw & ~mask; break;
218*74a4d8c2SCharles.Forsyth case DnorS: *d = dw ^ ((~sw | dw) & mask); break;
219*74a4d8c2SCharles.Forsyth case DandnotS: *d = dw ^ ((sw & dw) & mask); break;
220*74a4d8c2SCharles.Forsyth case notS: *d = dw ^ ((~sw ^ dw) & mask); break;
221*74a4d8c2SCharles.Forsyth case notDandS: *d = dw ^ ((sw | dw) & mask); break;
222*74a4d8c2SCharles.Forsyth case notD: *d = dw ^ mask; break;
223*74a4d8c2SCharles.Forsyth case DxorS: *d = dw ^ (sw & mask); break;
224*74a4d8c2SCharles.Forsyth case DnandS: *d = dw ^ ((sw | ~dw) & mask); break;
225*74a4d8c2SCharles.Forsyth case DandS: *d = dw ^ ((~sw & dw) & mask); break;
226*74a4d8c2SCharles.Forsyth case DxnorS: *d = dw ^ (~sw & mask); break;
227*74a4d8c2SCharles.Forsyth case D: break;
228*74a4d8c2SCharles.Forsyth case DornotS: *d = dw | (~sw & mask); break;
229*74a4d8c2SCharles.Forsyth case S: *d = dw ^ ((sw ^ dw) & mask); break;
230*74a4d8c2SCharles.Forsyth case notDorS: *d = dw ^ (~(sw & dw) & mask); break;
231*74a4d8c2SCharles.Forsyth case DorS: *d = dw | (sw & mask); break;
232*74a4d8c2SCharles.Forsyth case F: *d = dw | mask; break;
233*74a4d8c2SCharles.Forsyth }
234*74a4d8c2SCharles.Forsyth d++;
235*74a4d8c2SCharles.Forsyth
236*74a4d8c2SCharles.Forsyth mask = WMASK;
237*74a4d8c2SCharles.Forsyth if(i == 1)
238*74a4d8c2SCharles.Forsyth mask = rmask;
239*74a4d8c2SCharles.Forsyth spare--;
240*74a4d8c2SCharles.Forsyth }
241*74a4d8c2SCharles.Forsyth saddr += sspan;
242*74a4d8c2SCharles.Forsyth daddr += dspan;
243*74a4d8c2SCharles.Forsyth }
244*74a4d8c2SCharles.Forsyth }
245*74a4d8c2SCharles.Forsyth
246*74a4d8c2SCharles.Forsyth #ifdef TEST
247*74a4d8c2SCharles.Forsyth void prprog(void);
248*74a4d8c2SCharles.Forsyth GBitmap *bb1, *bb2;
249*74a4d8c2SCharles.Forsyth ulong *src, *dst, *xdst, *xans;
250*74a4d8c2SCharles.Forsyth int swds, dwds;
251*74a4d8c2SCharles.Forsyth long ticks;
252*74a4d8c2SCharles.Forsyth int timeit;
253*74a4d8c2SCharles.Forsyth
254*74a4d8c2SCharles.Forsyth long
func(int f,long s,int sld,long d,int dld)255*74a4d8c2SCharles.Forsyth func(int f, long s, int sld, long d, int dld)
256*74a4d8c2SCharles.Forsyth {
257*74a4d8c2SCharles.Forsyth long a;
258*74a4d8c2SCharles.Forsyth int sh, i, db, sb;
259*74a4d8c2SCharles.Forsyth
260*74a4d8c2SCharles.Forsyth db = 1 << dld;
261*74a4d8c2SCharles.Forsyth sb = 1 << sld;
262*74a4d8c2SCharles.Forsyth sh = db - sb;
263*74a4d8c2SCharles.Forsyth if(sh > 0) {
264*74a4d8c2SCharles.Forsyth a = s;
265*74a4d8c2SCharles.Forsyth for(i = sb; i<db; i += sb){
266*74a4d8c2SCharles.Forsyth a <<= sb;
267*74a4d8c2SCharles.Forsyth s |= a;
268*74a4d8c2SCharles.Forsyth }
269*74a4d8c2SCharles.Forsyth } else if(sh < 0)
270*74a4d8c2SCharles.Forsyth s >>= -sh;
271*74a4d8c2SCharles.Forsyth
272*74a4d8c2SCharles.Forsyth switch(f){
273*74a4d8c2SCharles.Forsyth case Zero: d = 0; break;
274*74a4d8c2SCharles.Forsyth case DnorS: d = ~(d|s); break;
275*74a4d8c2SCharles.Forsyth case DandnotS: d = d & ~s; break;
276*74a4d8c2SCharles.Forsyth case notS: d = ~s; break;
277*74a4d8c2SCharles.Forsyth case notDandS: d = ~d & s; break;
278*74a4d8c2SCharles.Forsyth case notD: d = ~d; break;
279*74a4d8c2SCharles.Forsyth case DxorS: d = d ^ s; break;
280*74a4d8c2SCharles.Forsyth case DnandS: d = ~(d&s); break;
281*74a4d8c2SCharles.Forsyth case DandS: d = d & s; break;
282*74a4d8c2SCharles.Forsyth case DxnorS: d = ~(d^s); break;
283*74a4d8c2SCharles.Forsyth case S: d = s; break;
284*74a4d8c2SCharles.Forsyth case DornotS: d = d | ~s; break;
285*74a4d8c2SCharles.Forsyth case D: d = d; break;
286*74a4d8c2SCharles.Forsyth case notDorS: d = ~d | s; break;
287*74a4d8c2SCharles.Forsyth case DorS: d = d | s; break;
288*74a4d8c2SCharles.Forsyth case F: d = ~0; break;
289*74a4d8c2SCharles.Forsyth }
290*74a4d8c2SCharles.Forsyth
291*74a4d8c2SCharles.Forsyth d &= ((1<<db)-1);
292*74a4d8c2SCharles.Forsyth return d;
293*74a4d8c2SCharles.Forsyth }
294*74a4d8c2SCharles.Forsyth
295*74a4d8c2SCharles.Forsyth void
run(int fr,int to,int w,int op)296*74a4d8c2SCharles.Forsyth run(int fr, int to, int w, int op)
297*74a4d8c2SCharles.Forsyth {
298*74a4d8c2SCharles.Forsyth int i, j, f, t, fy, ty;
299*74a4d8c2SCharles.Forsyth extern long *_clock;
300*74a4d8c2SCharles.Forsyth
301*74a4d8c2SCharles.Forsyth fr += bb2->r.min.x;
302*74a4d8c2SCharles.Forsyth to += bb1->r.min.x;
303*74a4d8c2SCharles.Forsyth fy = bb2->r.min.y + 1;
304*74a4d8c2SCharles.Forsyth ty = bb1->r.min.y + 1;
305*74a4d8c2SCharles.Forsyth if(timeit) {
306*74a4d8c2SCharles.Forsyth memcpy(dst, xdst, dwds * sizeof(long));
307*74a4d8c2SCharles.Forsyth ticks -= *_clock;
308*74a4d8c2SCharles.Forsyth gbitblt(bb1, Pt(to,ty), bb2, Rect(fr,fy,fr+w,fy+2), op);
309*74a4d8c2SCharles.Forsyth ticks += *_clock;
310*74a4d8c2SCharles.Forsyth return;
311*74a4d8c2SCharles.Forsyth }
312*74a4d8c2SCharles.Forsyth f = fr;
313*74a4d8c2SCharles.Forsyth t = to;
314*74a4d8c2SCharles.Forsyth memcpy(dst, xdst, dwds * sizeof(long));
315*74a4d8c2SCharles.Forsyth for(i=0; i<w; i++) {
316*74a4d8c2SCharles.Forsyth gbitblt(bb1, Pt(t,ty), bb2, Rect(f,fy,f+1,fy+1), op);
317*74a4d8c2SCharles.Forsyth gbitblt(bb1, Pt(t,ty+1), bb2, Rect(f,fy+1,f+1,fy+2), op);
318*74a4d8c2SCharles.Forsyth f++;
319*74a4d8c2SCharles.Forsyth t++;
320*74a4d8c2SCharles.Forsyth }
321*74a4d8c2SCharles.Forsyth memcpy(xans, dst, dwds * sizeof(long));
322*74a4d8c2SCharles.Forsyth
323*74a4d8c2SCharles.Forsyth memcpy(dst, xdst, dwds * sizeof(long));
324*74a4d8c2SCharles.Forsyth gbitblt(bb1, Pt(to,ty), bb2, Rect(fr,fy,fr+w,fy+2), op);
325*74a4d8c2SCharles.Forsyth
326*74a4d8c2SCharles.Forsyth if(memcmp(xans, dst, dwds * sizeof(long))) {
327*74a4d8c2SCharles.Forsyth /*
328*74a4d8c2SCharles.Forsyth * print src and dst row offset, width in bits, and forw/back
329*74a4d8c2SCharles.Forsyth * then print for each of the four rows: the source (s),
330*74a4d8c2SCharles.Forsyth * the dest (d), the good value of the answer (g),
331*74a4d8c2SCharles.Forsyth * and the actual bad value of the answer (b)
332*74a4d8c2SCharles.Forsyth */
333*74a4d8c2SCharles.Forsyth print("fr=%d to=%d w=%d fb=%d%d\n",
334*74a4d8c2SCharles.Forsyth fr, to, w, FORCEFORW, FORCEBAKW);
335*74a4d8c2SCharles.Forsyth print("dst bitmap b %#lux, z %d, w %d, ld %d, r [%d,%d][%d,%d]\n",
336*74a4d8c2SCharles.Forsyth bb1->base, bb1->zero, bb1->width, bb1->ldepth,
337*74a4d8c2SCharles.Forsyth bb1->r.min.x, bb1->r.min.y, bb1->r.max.x, bb1->r.max.y);
338*74a4d8c2SCharles.Forsyth print("src bitmap b %#lux, z %d, w %d, ld %d, r [%d,%d][%d,%d]\n",
339*74a4d8c2SCharles.Forsyth bb2->base, bb2->zero, bb2->width, bb2->ldepth,
340*74a4d8c2SCharles.Forsyth bb2->r.min.x, bb2->r.min.y, bb2->r.max.x, bb2->r.max.y);
341*74a4d8c2SCharles.Forsyth for(j=0; 7*j < dwds; j++) {
342*74a4d8c2SCharles.Forsyth print("\ns");
343*74a4d8c2SCharles.Forsyth for(i=0; i<7 && 7*j+i < dwds; i++)
344*74a4d8c2SCharles.Forsyth print(" %.8lux", src[7*j + i]);
345*74a4d8c2SCharles.Forsyth print("\nd");
346*74a4d8c2SCharles.Forsyth for(i=0; i<7 && 7*j+i < dwds; i++)
347*74a4d8c2SCharles.Forsyth print(" %.8lux", xdst[7*j + i]);
348*74a4d8c2SCharles.Forsyth print("\ng");
349*74a4d8c2SCharles.Forsyth for(i=0; i<7 && 7*j+i < dwds; i++)
350*74a4d8c2SCharles.Forsyth print(" %.8lux", xans[7*j + i]);
351*74a4d8c2SCharles.Forsyth print("\nb");
352*74a4d8c2SCharles.Forsyth for(i=0; i<7 && 7*j+i < dwds; i++)
353*74a4d8c2SCharles.Forsyth print(" %.8lux", dst[7*j + i]);
354*74a4d8c2SCharles.Forsyth print("\n");
355*74a4d8c2SCharles.Forsyth }
356*74a4d8c2SCharles.Forsyth prprog();
357*74a4d8c2SCharles.Forsyth }
358*74a4d8c2SCharles.Forsyth }
359*74a4d8c2SCharles.Forsyth
360*74a4d8c2SCharles.Forsyth void
prprog(void)361*74a4d8c2SCharles.Forsyth prprog(void)
362*74a4d8c2SCharles.Forsyth {
363*74a4d8c2SCharles.Forsyth exits(0);
364*74a4d8c2SCharles.Forsyth }
365*74a4d8c2SCharles.Forsyth
366*74a4d8c2SCharles.Forsyth int
main(int argc,char * argv[])367*74a4d8c2SCharles.Forsyth main(int argc, char *argv[])
368*74a4d8c2SCharles.Forsyth {
369*74a4d8c2SCharles.Forsyth int f, t, w, i, sld, dld, op, iters, simple;
370*74a4d8c2SCharles.Forsyth ulong s, d, spix, dpix, apix, fpix, m, *ps, *pd;
371*74a4d8c2SCharles.Forsyth Point sorg, dorg;
372*74a4d8c2SCharles.Forsyth GBitmap *bs, *bd;
373*74a4d8c2SCharles.Forsyth long seed;
374*74a4d8c2SCharles.Forsyth char *ct;
375*74a4d8c2SCharles.Forsyth
376*74a4d8c2SCharles.Forsyth sld = 0;
377*74a4d8c2SCharles.Forsyth dld = 0;
378*74a4d8c2SCharles.Forsyth timeit = 0;
379*74a4d8c2SCharles.Forsyth iters = 200;
380*74a4d8c2SCharles.Forsyth simple = 0;
381*74a4d8c2SCharles.Forsyth ARGBEGIN {
382*74a4d8c2SCharles.Forsyth case 'i':
383*74a4d8c2SCharles.Forsyth iters = atoi(ARGF());
384*74a4d8c2SCharles.Forsyth break;
385*74a4d8c2SCharles.Forsyth case 's':
386*74a4d8c2SCharles.Forsyth simple = 1;
387*74a4d8c2SCharles.Forsyth break;
388*74a4d8c2SCharles.Forsyth case 't':
389*74a4d8c2SCharles.Forsyth timeit = 1;
390*74a4d8c2SCharles.Forsyth ct = ARGF();
391*74a4d8c2SCharles.Forsyth if(ct)
392*74a4d8c2SCharles.Forsyth iters = atoi(ct);
393*74a4d8c2SCharles.Forsyth break;
394*74a4d8c2SCharles.Forsyth } ARGEND
395*74a4d8c2SCharles.Forsyth if(argc > 0)
396*74a4d8c2SCharles.Forsyth sld = atoi(argv[0]);
397*74a4d8c2SCharles.Forsyth if(argc > 1)
398*74a4d8c2SCharles.Forsyth dld = atoi(argv[1]);
399*74a4d8c2SCharles.Forsyth if(!timeit && !simple) {
400*74a4d8c2SCharles.Forsyth seed = time(0);
401*74a4d8c2SCharles.Forsyth print("seed %lux\n", seed); srand(seed); /**/
402*74a4d8c2SCharles.Forsyth }
403*74a4d8c2SCharles.Forsyth
404*74a4d8c2SCharles.Forsyth print("sld %d dld %d\n", sld, dld);
405*74a4d8c2SCharles.Forsyth op = 1;
406*74a4d8c2SCharles.Forsyth
407*74a4d8c2SCharles.Forsyth /* bitmaps for 1-bit tests */
408*74a4d8c2SCharles.Forsyth bd = gballoc(Rect(0,0,32,1), dld);
409*74a4d8c2SCharles.Forsyth bs = gballoc(Rect(0,0,32,1), sld);
410*74a4d8c2SCharles.Forsyth for(i=0; i<bs->width; i++)
411*74a4d8c2SCharles.Forsyth bs->base[i] = lrand();
412*74a4d8c2SCharles.Forsyth
413*74a4d8c2SCharles.Forsyth /* bitmaps for rect tests */
414*74a4d8c2SCharles.Forsyth if(simple) {
415*74a4d8c2SCharles.Forsyth dorg = Pt(0,0);
416*74a4d8c2SCharles.Forsyth sorg = Pt(0,0);
417*74a4d8c2SCharles.Forsyth } else {
418*74a4d8c2SCharles.Forsyth dorg = Pt(nrand(63)-31,nrand(63)-31);
419*74a4d8c2SCharles.Forsyth sorg = Pt(nrand(63)-31,nrand(63)-31);
420*74a4d8c2SCharles.Forsyth }
421*74a4d8c2SCharles.Forsyth bb1 = gballoc(Rpt(dorg,add(dorg,Pt(200,4))), dld);
422*74a4d8c2SCharles.Forsyth bb2 = gballoc(Rpt(sorg,add(sorg,Pt(200,4))), sld);
423*74a4d8c2SCharles.Forsyth dwds = bb1->width * Dy(bb1->r);
424*74a4d8c2SCharles.Forsyth swds = bb2->width * Dy(bb2->r);
425*74a4d8c2SCharles.Forsyth dst = bb1->base;
426*74a4d8c2SCharles.Forsyth src = bb2->base;
427*74a4d8c2SCharles.Forsyth xdst = malloc(dwds * sizeof(long));
428*74a4d8c2SCharles.Forsyth xans = malloc(dwds * sizeof(long));
429*74a4d8c2SCharles.Forsyth for(i=0; i<swds; i++)
430*74a4d8c2SCharles.Forsyth src[i] = lrand();
431*74a4d8c2SCharles.Forsyth for(i=0; i<dwds; i++)
432*74a4d8c2SCharles.Forsyth xdst[i] = lrand();
433*74a4d8c2SCharles.Forsyth
434*74a4d8c2SCharles.Forsyth loop:
435*74a4d8c2SCharles.Forsyth print("Op %d\n", op);
436*74a4d8c2SCharles.Forsyth if(!timeit) {
437*74a4d8c2SCharles.Forsyth print("one pixel\n");
438*74a4d8c2SCharles.Forsyth ps = bs->base;
439*74a4d8c2SCharles.Forsyth pd = bd->base;
440*74a4d8c2SCharles.Forsyth FORCEFORW = 1;
441*74a4d8c2SCharles.Forsyth FORCEBAKW = 0;
442*74a4d8c2SCharles.Forsyth for(i=0; i<1000; i++, FORCEFORW = !FORCEFORW, FORCEBAKW = !FORCEBAKW) {
443*74a4d8c2SCharles.Forsyth f = nrand(32 >> sld);
444*74a4d8c2SCharles.Forsyth t = nrand(32 >> dld);
445*74a4d8c2SCharles.Forsyth s = lrand();
446*74a4d8c2SCharles.Forsyth d = lrand();
447*74a4d8c2SCharles.Forsyth ps[0] = s;
448*74a4d8c2SCharles.Forsyth pd[0] = d;
449*74a4d8c2SCharles.Forsyth #ifdef T386
450*74a4d8c2SCharles.Forsyth spix = (byterev(s) >> (32 - ((f+1)<<sld))) & ((1 << (1<<sld)) - 1);
451*74a4d8c2SCharles.Forsyth dpix = (byterev(d) >> (32 - ((t+1)<<dld))) & ((1 << (1<<dld)) - 1);
452*74a4d8c2SCharles.Forsyth #else
453*74a4d8c2SCharles.Forsyth spix = (s >> (32 - ((f+1)<<sld))) & ((1 << (1<<sld)) - 1);
454*74a4d8c2SCharles.Forsyth dpix = (d >> (32 - ((t+1)<<dld))) & ((1 << (1<<dld)) - 1);
455*74a4d8c2SCharles.Forsyth #endif
456*74a4d8c2SCharles.Forsyth #ifdef T386
457*74a4d8c2SCharles.Forsyth apix = byterev(func(op, spix, sld, dpix, dld) << (32 - ((t+1)<<dld)));
458*74a4d8c2SCharles.Forsyth #else
459*74a4d8c2SCharles.Forsyth apix = func(op, spix, sld, dpix, dld) << (32 - ((t+1)<<dld));
460*74a4d8c2SCharles.Forsyth #endif
461*74a4d8c2SCharles.Forsyth gbitblt(bd, Pt(t,0), bs, Rect(f,0,f+1,1), op);
462*74a4d8c2SCharles.Forsyth if(ps[0] != s) {
463*74a4d8c2SCharles.Forsyth print("bb src %.8lux %.8lux %d %d\n", ps[0], s, f, t);
464*74a4d8c2SCharles.Forsyth exits("error");
465*74a4d8c2SCharles.Forsyth }
466*74a4d8c2SCharles.Forsyth m = ((1 << (1<<dld)) - 1) << (32 - ((t+1)<<dld));
467*74a4d8c2SCharles.Forsyth #ifdef T386
468*74a4d8c2SCharles.Forsyth m = byterev(m);
469*74a4d8c2SCharles.Forsyth #endif
470*74a4d8c2SCharles.Forsyth if((pd[0] & ~m) != (d & ~m)) {
471*74a4d8c2SCharles.Forsyth print("bb dst1 %.8lux %.8lux\n",
472*74a4d8c2SCharles.Forsyth s, d);
473*74a4d8c2SCharles.Forsyth print("bb %.8lux %.8lux %d %d\n",
474*74a4d8c2SCharles.Forsyth ps[0], pd[0], f, t);
475*74a4d8c2SCharles.Forsyth prprog();
476*74a4d8c2SCharles.Forsyth exits("error");
477*74a4d8c2SCharles.Forsyth }
478*74a4d8c2SCharles.Forsyth if((pd[0] & m) != apix) {
479*74a4d8c2SCharles.Forsyth spix <<= 32 - ((f+1)<<sld);
480*74a4d8c2SCharles.Forsyth dpix <<= 32 - ((t+1)<<dld);
481*74a4d8c2SCharles.Forsyth #ifdef T386
482*74a4d8c2SCharles.Forsyth spix = byterev(spix);
483*74a4d8c2SCharles.Forsyth dpix = byterev(dpix);
484*74a4d8c2SCharles.Forsyth #endif
485*74a4d8c2SCharles.Forsyth print("bb dst2 %.8lux %.8lux\n",
486*74a4d8c2SCharles.Forsyth s, d);
487*74a4d8c2SCharles.Forsyth print("bb %.8lux %.8lux %d %d\n",
488*74a4d8c2SCharles.Forsyth ps[0], pd[0], f, t);
489*74a4d8c2SCharles.Forsyth print("bb %.8lux %.8lux %.8lux %.8lux\n",
490*74a4d8c2SCharles.Forsyth spix, dpix, apix, pd[0] & m);
491*74a4d8c2SCharles.Forsyth prprog();
492*74a4d8c2SCharles.Forsyth exits("error");
493*74a4d8c2SCharles.Forsyth }
494*74a4d8c2SCharles.Forsyth }
495*74a4d8c2SCharles.Forsyth }
496*74a4d8c2SCharles.Forsyth
497*74a4d8c2SCharles.Forsyth print("for\n");
498*74a4d8c2SCharles.Forsyth FORCEFORW = 1;
499*74a4d8c2SCharles.Forsyth FORCEBAKW = 0;
500*74a4d8c2SCharles.Forsyth
501*74a4d8c2SCharles.Forsyth for(i=0; i<iters; i++) {
502*74a4d8c2SCharles.Forsyth f = nrand(64);
503*74a4d8c2SCharles.Forsyth t = nrand(64);
504*74a4d8c2SCharles.Forsyth w = nrand(130);
505*74a4d8c2SCharles.Forsyth run(f, t, w, op);
506*74a4d8c2SCharles.Forsyth }
507*74a4d8c2SCharles.Forsyth
508*74a4d8c2SCharles.Forsyth if(sld == dld) {
509*74a4d8c2SCharles.Forsyth print("bak\n");
510*74a4d8c2SCharles.Forsyth FORCEFORW = 0;
511*74a4d8c2SCharles.Forsyth FORCEBAKW = 1;
512*74a4d8c2SCharles.Forsyth
513*74a4d8c2SCharles.Forsyth for(i=0; i<iters; i++) {
514*74a4d8c2SCharles.Forsyth f = nrand(64);
515*74a4d8c2SCharles.Forsyth t = nrand(64);
516*74a4d8c2SCharles.Forsyth w = nrand(130);
517*74a4d8c2SCharles.Forsyth run(f, t, w, op);
518*74a4d8c2SCharles.Forsyth }
519*74a4d8c2SCharles.Forsyth }
520*74a4d8c2SCharles.Forsyth
521*74a4d8c2SCharles.Forsyth if(op < F) {
522*74a4d8c2SCharles.Forsyth op++;
523*74a4d8c2SCharles.Forsyth goto loop;
524*74a4d8c2SCharles.Forsyth }
525*74a4d8c2SCharles.Forsyth if(timeit)
526*74a4d8c2SCharles.Forsyth print("time: %d ticks\n", ticks);
527*74a4d8c2SCharles.Forsyth exits(0);
528*74a4d8c2SCharles.Forsyth }
529*74a4d8c2SCharles.Forsyth
530*74a4d8c2SCharles.Forsyth
531*74a4d8c2SCharles.Forsyth #endif
532