1 #include <u.h>
2 #include <libc.h>
3 #include <libg.h>
4 #include <gnot.h>
5
6 /*
7 * bitblt operates a 'word' at a time.
8 * WBITS is the number of bits in a word
9 * LWBITS=log2(WBITS),
10 * W2L is the number of words in a long
11 * WMASK has bits set for the low order word of a long
12 * WType is a pointer to a word
13 */
14 #ifndef WBITS
15 #define WBITS 32
16 #define LWBITS 5
17 #define W2L 1
18 #define WMASK ~0UL
19 typedef ulong *WType;
20 #endif
21
22 #define DEBUG
23
24 #ifdef TEST
25 /*
26 * globals used for testing
27 */
28 int FORCEFORW;
29 int FORCEBAKW;
30 GBitmap *curdm, *cursm;
31 Point curpt;
32 Rectangle curr;
33 Fcode curf;
34 void *mem;
35 #endif
36
37 static void
gbitexplode(ulong sw,ulong * buf,int sdep,int x)38 gbitexplode(ulong sw, ulong *buf, int sdep, int x)
39 {
40 int j, o, q, n, nw, inc, qinc;
41 ulong s, dw, pix;
42
43 inc = 1 << sdep;
44 pix = (1 << inc) - 1;
45 nw = 1 << x;
46 n = 32 >> x;
47 qinc = (nw << sdep) - inc;
48 for(o = 32 - n; o >= 0; o -= n){
49 dw = 0;
50 s = sw >> o;
51 q = 0;
52 for(j = 0; j < n; j += inc){
53 dw |= (s & (pix << j)) << q;
54 q += qinc;
55 }
56 for(j = 0; j < x; j++)
57 dw |= dw << (inc << j);
58 *buf++ = dw;
59 }
60 }
61
62 /*
63 void
64 main(void)
65 {
66 ulong buf[128];
67
68 gbitexplode(0x7777, buf, 0, 3);
69 exits(0);
70 }
71 */
72
73 void
gbitblt(GBitmap * dm,Point pt,GBitmap * sm,Rectangle r,Fcode fcode)74 gbitblt(GBitmap *dm, Point pt, GBitmap *sm, Rectangle r, Fcode fcode)
75 {
76 int width; /* width in bits of dst */
77 int wwidth; /* floor width in words */
78 int height; /* height in pixels minus 1 */
79 int sdep; /* src ldepth */
80 int ddep; /* dst ldepth */
81 int deltadep; /* diff between ldepths */
82 int sspan; /* words between scanlines in src */
83 int dspan; /* words between scanlines in dst */
84 int soff; /* bit offset of src start point */
85 int sdest; /* bit offset of src start point that matches doff when expanded */
86 int doff; /* bit offset of dst start point */
87 int delta; /* amount to shift src by */
88 int sign; /* of delta */
89 ulong *saddr;
90 ulong *daddr;
91 ulong *s;
92 ulong *d;
93 ulong mask;
94 ulong tmp; /* temp storage source word */
95 ulong sw; /* source word constructed */
96 ulong dw; /* dest word fetched */
97 ulong lmask; /* affected pixels in leftmost dst word */
98 ulong rmask; /* affected pixels in rightmost dst word */
99 int i;
100 int j;
101 ulong buf[32]; /* for expanding a source */
102 ulong *p; /* pointer into buf */
103 int spare; /* number of words already converted */
104
105
106 #ifdef TEST
107 curdm = dm;
108 cursm = sm;
109 curpt = pt;
110 curr = r;
111 curf = fcode;
112 #endif
113
114 gbitbltclip(&dm);
115
116 width = r.max.x - r.min.x;
117 if(width <= 0)
118 return;
119 height = r.max.y - r.min.y - 1;
120 if(height < 0)
121 return;
122
123 ddep = dm->ldepth;
124 pt.x <<= ddep;
125 width <<= ddep;
126
127 sdep = sm->ldepth;
128 r.min.x <<= sdep;
129 r.max.x <<= sdep;
130
131 dspan = dm->width * W2L;
132 sspan = sm->width * W2L;
133
134 daddr = (ulong*)((WType)dm->base
135 + dm->zero*W2L + pt.y*dspan
136 + (pt.x >> LWBITS));
137 saddr = (ulong*)((WType)sm->base
138 + sm->zero*W2L + r.min.y*sspan
139 + (r.min.x >> LWBITS));
140
141 doff = pt.x & (WBITS - 1);
142 lmask = WMASK >> doff;
143 rmask = (WMASK << (WBITS - ((doff+width) & (WBITS-1))))&WMASK;
144 if(!rmask)
145 rmask = WMASK;
146 soff = r.min.x & (WBITS-1);
147 wwidth = ((pt.x+width-1)>>LWBITS) - (pt.x>>LWBITS);
148
149 if(sm == dm){
150 #ifdef TEST
151 if(!FORCEBAKW &&
152 (FORCEFORW || sm != dm || saddr > daddr ||
153 (saddr == daddr && soff > doff)))
154 ;
155 else{
156 daddr += height * dspan;
157 saddr += height * sspan;
158 sspan -= 2 * W2L * sm->width;
159 dspan -= 2 * W2L * dm->width;
160 }
161 #else
162 if(r.min.y < pt.y){ /* bottom to top */
163 daddr += height * dspan;
164 saddr += height * sspan;
165 sspan -= 2 * W2L * sm->width;
166 dspan -= 2 * W2L * dm->width;
167 }else if(r.min.y == pt.y && r.min.x < pt.x)
168 abort()/*goto right*/;
169 #endif
170 }
171 if(wwidth == 0) /* collapse masks for narrow cases */
172 lmask &= rmask;
173 fcode &= F;
174
175 deltadep = ddep - sdep;
176 sdest = doff >> deltadep;
177 delta = soff - sdest;
178 sign = 0;
179 if(delta < 0){
180 sign = 1;
181 delta = -delta;
182 }
183
184 p = 0;
185 for(j = 0; j <= height; j++){
186 d = daddr;
187 s = saddr;
188 mask = lmask;
189 tmp = 0;
190 if(!sign)
191 tmp = *s++;
192 spare = 0;
193 for(i = wwidth; i >= 0; i--){
194 if(spare)
195 sw = *p++;
196 else{
197 if(sign){
198 sw = tmp << (WBITS-delta);
199 tmp = *s++;
200 sw |= tmp >> delta;
201 }else{
202 sw = tmp << delta;
203 tmp = *s++;
204 if(delta)
205 sw |= tmp >> (WBITS-delta);
206 }
207 spare = 1 << deltadep;
208 if(deltadep >= 1){
209 gbitexplode(sw, buf, sdep, deltadep);
210 p = buf;
211 sw = *p++;
212 }
213 }
214
215 dw = *d;
216 switch(fcode){ /* ltor bit aligned */
217 case Zero: *d = dw & ~mask; break;
218 case DnorS: *d = dw ^ ((~sw | dw) & mask); break;
219 case DandnotS: *d = dw ^ ((sw & dw) & mask); break;
220 case notS: *d = dw ^ ((~sw ^ dw) & mask); break;
221 case notDandS: *d = dw ^ ((sw | dw) & mask); break;
222 case notD: *d = dw ^ mask; break;
223 case DxorS: *d = dw ^ (sw & mask); break;
224 case DnandS: *d = dw ^ ((sw | ~dw) & mask); break;
225 case DandS: *d = dw ^ ((~sw & dw) & mask); break;
226 case DxnorS: *d = dw ^ (~sw & mask); break;
227 case D: break;
228 case DornotS: *d = dw | (~sw & mask); break;
229 case S: *d = dw ^ ((sw ^ dw) & mask); break;
230 case notDorS: *d = dw ^ (~(sw & dw) & mask); break;
231 case DorS: *d = dw | (sw & mask); break;
232 case F: *d = dw | mask; break;
233 }
234 d++;
235
236 mask = WMASK;
237 if(i == 1)
238 mask = rmask;
239 spare--;
240 }
241 saddr += sspan;
242 daddr += dspan;
243 }
244 }
245
246 #ifdef TEST
247 void prprog(void);
248 GBitmap *bb1, *bb2;
249 ulong *src, *dst, *xdst, *xans;
250 int swds, dwds;
251 long ticks;
252 int timeit;
253
254 long
func(int f,long s,int sld,long d,int dld)255 func(int f, long s, int sld, long d, int dld)
256 {
257 long a;
258 int sh, i, db, sb;
259
260 db = 1 << dld;
261 sb = 1 << sld;
262 sh = db - sb;
263 if(sh > 0) {
264 a = s;
265 for(i = sb; i<db; i += sb){
266 a <<= sb;
267 s |= a;
268 }
269 } else if(sh < 0)
270 s >>= -sh;
271
272 switch(f){
273 case Zero: d = 0; break;
274 case DnorS: d = ~(d|s); break;
275 case DandnotS: d = d & ~s; break;
276 case notS: d = ~s; break;
277 case notDandS: d = ~d & s; break;
278 case notD: d = ~d; break;
279 case DxorS: d = d ^ s; break;
280 case DnandS: d = ~(d&s); break;
281 case DandS: d = d & s; break;
282 case DxnorS: d = ~(d^s); break;
283 case S: d = s; break;
284 case DornotS: d = d | ~s; break;
285 case D: d = d; break;
286 case notDorS: d = ~d | s; break;
287 case DorS: d = d | s; break;
288 case F: d = ~0; break;
289 }
290
291 d &= ((1<<db)-1);
292 return d;
293 }
294
295 void
run(int fr,int to,int w,int op)296 run(int fr, int to, int w, int op)
297 {
298 int i, j, f, t, fy, ty;
299 extern long *_clock;
300
301 fr += bb2->r.min.x;
302 to += bb1->r.min.x;
303 fy = bb2->r.min.y + 1;
304 ty = bb1->r.min.y + 1;
305 if(timeit) {
306 memcpy(dst, xdst, dwds * sizeof(long));
307 ticks -= *_clock;
308 gbitblt(bb1, Pt(to,ty), bb2, Rect(fr,fy,fr+w,fy+2), op);
309 ticks += *_clock;
310 return;
311 }
312 f = fr;
313 t = to;
314 memcpy(dst, xdst, dwds * sizeof(long));
315 for(i=0; i<w; i++) {
316 gbitblt(bb1, Pt(t,ty), bb2, Rect(f,fy,f+1,fy+1), op);
317 gbitblt(bb1, Pt(t,ty+1), bb2, Rect(f,fy+1,f+1,fy+2), op);
318 f++;
319 t++;
320 }
321 memcpy(xans, dst, dwds * sizeof(long));
322
323 memcpy(dst, xdst, dwds * sizeof(long));
324 gbitblt(bb1, Pt(to,ty), bb2, Rect(fr,fy,fr+w,fy+2), op);
325
326 if(memcmp(xans, dst, dwds * sizeof(long))) {
327 /*
328 * print src and dst row offset, width in bits, and forw/back
329 * then print for each of the four rows: the source (s),
330 * the dest (d), the good value of the answer (g),
331 * and the actual bad value of the answer (b)
332 */
333 print("fr=%d to=%d w=%d fb=%d%d\n",
334 fr, to, w, FORCEFORW, FORCEBAKW);
335 print("dst bitmap b %#lux, z %d, w %d, ld %d, r [%d,%d][%d,%d]\n",
336 bb1->base, bb1->zero, bb1->width, bb1->ldepth,
337 bb1->r.min.x, bb1->r.min.y, bb1->r.max.x, bb1->r.max.y);
338 print("src bitmap b %#lux, z %d, w %d, ld %d, r [%d,%d][%d,%d]\n",
339 bb2->base, bb2->zero, bb2->width, bb2->ldepth,
340 bb2->r.min.x, bb2->r.min.y, bb2->r.max.x, bb2->r.max.y);
341 for(j=0; 7*j < dwds; j++) {
342 print("\ns");
343 for(i=0; i<7 && 7*j+i < dwds; i++)
344 print(" %.8lux", src[7*j + i]);
345 print("\nd");
346 for(i=0; i<7 && 7*j+i < dwds; i++)
347 print(" %.8lux", xdst[7*j + i]);
348 print("\ng");
349 for(i=0; i<7 && 7*j+i < dwds; i++)
350 print(" %.8lux", xans[7*j + i]);
351 print("\nb");
352 for(i=0; i<7 && 7*j+i < dwds; i++)
353 print(" %.8lux", dst[7*j + i]);
354 print("\n");
355 }
356 prprog();
357 }
358 }
359
360 void
prprog(void)361 prprog(void)
362 {
363 exits(0);
364 }
365
366 int
main(int argc,char * argv[])367 main(int argc, char *argv[])
368 {
369 int f, t, w, i, sld, dld, op, iters, simple;
370 ulong s, d, spix, dpix, apix, fpix, m, *ps, *pd;
371 Point sorg, dorg;
372 GBitmap *bs, *bd;
373 long seed;
374 char *ct;
375
376 sld = 0;
377 dld = 0;
378 timeit = 0;
379 iters = 200;
380 simple = 0;
381 ARGBEGIN {
382 case 'i':
383 iters = atoi(ARGF());
384 break;
385 case 's':
386 simple = 1;
387 break;
388 case 't':
389 timeit = 1;
390 ct = ARGF();
391 if(ct)
392 iters = atoi(ct);
393 break;
394 } ARGEND
395 if(argc > 0)
396 sld = atoi(argv[0]);
397 if(argc > 1)
398 dld = atoi(argv[1]);
399 if(!timeit && !simple) {
400 seed = time(0);
401 print("seed %lux\n", seed); srand(seed); /**/
402 }
403
404 print("sld %d dld %d\n", sld, dld);
405 op = 1;
406
407 /* bitmaps for 1-bit tests */
408 bd = gballoc(Rect(0,0,32,1), dld);
409 bs = gballoc(Rect(0,0,32,1), sld);
410 for(i=0; i<bs->width; i++)
411 bs->base[i] = lrand();
412
413 /* bitmaps for rect tests */
414 if(simple) {
415 dorg = Pt(0,0);
416 sorg = Pt(0,0);
417 } else {
418 dorg = Pt(nrand(63)-31,nrand(63)-31);
419 sorg = Pt(nrand(63)-31,nrand(63)-31);
420 }
421 bb1 = gballoc(Rpt(dorg,add(dorg,Pt(200,4))), dld);
422 bb2 = gballoc(Rpt(sorg,add(sorg,Pt(200,4))), sld);
423 dwds = bb1->width * Dy(bb1->r);
424 swds = bb2->width * Dy(bb2->r);
425 dst = bb1->base;
426 src = bb2->base;
427 xdst = malloc(dwds * sizeof(long));
428 xans = malloc(dwds * sizeof(long));
429 for(i=0; i<swds; i++)
430 src[i] = lrand();
431 for(i=0; i<dwds; i++)
432 xdst[i] = lrand();
433
434 loop:
435 print("Op %d\n", op);
436 if(!timeit) {
437 print("one pixel\n");
438 ps = bs->base;
439 pd = bd->base;
440 FORCEFORW = 1;
441 FORCEBAKW = 0;
442 for(i=0; i<1000; i++, FORCEFORW = !FORCEFORW, FORCEBAKW = !FORCEBAKW) {
443 f = nrand(32 >> sld);
444 t = nrand(32 >> dld);
445 s = lrand();
446 d = lrand();
447 ps[0] = s;
448 pd[0] = d;
449 #ifdef T386
450 spix = (byterev(s) >> (32 - ((f+1)<<sld))) & ((1 << (1<<sld)) - 1);
451 dpix = (byterev(d) >> (32 - ((t+1)<<dld))) & ((1 << (1<<dld)) - 1);
452 #else
453 spix = (s >> (32 - ((f+1)<<sld))) & ((1 << (1<<sld)) - 1);
454 dpix = (d >> (32 - ((t+1)<<dld))) & ((1 << (1<<dld)) - 1);
455 #endif
456 #ifdef T386
457 apix = byterev(func(op, spix, sld, dpix, dld) << (32 - ((t+1)<<dld)));
458 #else
459 apix = func(op, spix, sld, dpix, dld) << (32 - ((t+1)<<dld));
460 #endif
461 gbitblt(bd, Pt(t,0), bs, Rect(f,0,f+1,1), op);
462 if(ps[0] != s) {
463 print("bb src %.8lux %.8lux %d %d\n", ps[0], s, f, t);
464 exits("error");
465 }
466 m = ((1 << (1<<dld)) - 1) << (32 - ((t+1)<<dld));
467 #ifdef T386
468 m = byterev(m);
469 #endif
470 if((pd[0] & ~m) != (d & ~m)) {
471 print("bb dst1 %.8lux %.8lux\n",
472 s, d);
473 print("bb %.8lux %.8lux %d %d\n",
474 ps[0], pd[0], f, t);
475 prprog();
476 exits("error");
477 }
478 if((pd[0] & m) != apix) {
479 spix <<= 32 - ((f+1)<<sld);
480 dpix <<= 32 - ((t+1)<<dld);
481 #ifdef T386
482 spix = byterev(spix);
483 dpix = byterev(dpix);
484 #endif
485 print("bb dst2 %.8lux %.8lux\n",
486 s, d);
487 print("bb %.8lux %.8lux %d %d\n",
488 ps[0], pd[0], f, t);
489 print("bb %.8lux %.8lux %.8lux %.8lux\n",
490 spix, dpix, apix, pd[0] & m);
491 prprog();
492 exits("error");
493 }
494 }
495 }
496
497 print("for\n");
498 FORCEFORW = 1;
499 FORCEBAKW = 0;
500
501 for(i=0; i<iters; i++) {
502 f = nrand(64);
503 t = nrand(64);
504 w = nrand(130);
505 run(f, t, w, op);
506 }
507
508 if(sld == dld) {
509 print("bak\n");
510 FORCEFORW = 0;
511 FORCEBAKW = 1;
512
513 for(i=0; i<iters; i++) {
514 f = nrand(64);
515 t = nrand(64);
516 w = nrand(130);
517 run(f, t, w, op);
518 }
519 }
520
521 if(op < F) {
522 op++;
523 goto loop;
524 }
525 if(timeit)
526 print("time: %d ticks\n", ticks);
527 exits(0);
528 }
529
530
531 #endif
532