xref: /plan9-contrib/sys/src/cmd/unix/drawterm/libmemdraw/draw.c (revision 7dd7cddf99dd7472612f1413b4da293630e6b1bc)
1 #include "../lib9.h"
2 
3 #include "../libdraw/draw.h"
4 #include "../libmemdraw/memdraw.h"
5 
6 /* perfect approximation to NTSC = .299r+.587g+.114b when 0 ≤ r,g,b < 256 */
7 #define RGB2K(r,g,b)	((156763*(r)+307758*(g)+59769*(b))>>19)
8 
9 /*
10  * for 0 ≤ x ≤ 255*255, (x*0x0101+0x100)>>16 is a perfect approximation.
11  * for 0 ≤ x < (1<<16), x/255 = ((x+1)*0x0101)>>16 is a perfect approximation.
12  * the last one is perfect for all up to 1<<16, avoids a multiply, but requires a rathole.
13  */
14 /* #define DIV255(x) (((x)*257+256)>>16)  */
15 #define DIV255(x) ((((x)+1)*257)>>16)
16 /* #define DIV255(x) (tmp=(x)+1, (tmp+(tmp>>8))>>8) */
17 
18 static void mktables(void);
19 typedef int Subdraw(Memdrawparam*);
20 static Subdraw chardraw, alphadraw, memoptdraw;
21 
22 static Memimage*	memones;
23 static Memimage*	memzeros;
24 Memimage *memwhite;
25 Memimage *memblack;
26 Memimage *memtransparent;
27 Memimage *memopaque;
28 
29 int
30 Rconv(va_list *o, Fconv *f)
31 {
32 	Rectangle r;
33 	char buf[128];
34 
35 	r = va_arg(*o, Rectangle);
36 	sprint(buf, "%P %P", r.min, r.max);
37 	strconv(buf, f);
38 	return sizeof r;
39 }
40 
41 int
42 Pconv(va_list *o, Fconv *f)
43 {
44 	Point p;
45 	char buf[64];
46 
47 	p = va_arg(*o, Point);
48 	sprint(buf, "[%d %d]", p.x, p.y);
49 	strconv(buf, f);
50 	return sizeof p;
51 }
52 
53 void
54 _memimageinit(void)
55 {
56 	static int didinit = 0;
57 
58 	if(didinit)
59 		return;
60 
61 	didinit = 1;
62 
63 	fmtinstall('R', Rconv);
64 	fmtinstall('P', Pconv);
65 
66 	mktables();
67 	memmkcmap();
68 
69 	memones = allocmemimage(Rect(0,0,1,1), GREY1);
70 	memones->flags |= Frepl;
71 	memones->clipr = Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF);
72 	*byteaddr(memones, ZP) = ~0;
73 
74 	memzeros = allocmemimage(Rect(0,0,1,1), GREY1);
75 	memzeros->flags |= Frepl;
76 	memzeros->clipr = Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF);
77 	*byteaddr(memzeros, ZP) = 0;
78 
79 	if(memones == nil || memzeros == nil)
80 		assert(0 /*cannot initialize memimage library */);	/* RSC BUG */
81 
82 	memwhite = memones;
83 	memblack = memzeros;
84 	memopaque = memones;
85 	memtransparent = memzeros;
86 }
87 
88 int
89 drawreplxy(int min, int max, int x)
90 {
91 	int sx;
92 
93 	sx = (x-min)%(max-min);
94 	if(sx < 0)
95 		sx += max-min;
96 	return sx+min;
97 }
98 
99 Point
100 drawrepl(Rectangle r, Point p)
101 {
102 	p.x = drawreplxy(r.min.x, r.max.x, p.x);
103 	p.y = drawreplxy(r.min.y, r.max.y, p.y);
104 	return p;
105 }
106 
107 #define DBG if(0)
108 static	Memdrawparam par;	/* sleazily left for the X implementation */
109 Memdrawparam*
110 _memimagedrawsetup(Memimage *dst, Rectangle r, Memimage *src, Point p0, Memimage *mask, Point p1)
111 {
112 	static int n = 0;
113 
114 	if(mask == nil)
115 		mask = memopaque;
116 
117 	if(drawdebug)
118 		iprint("memimagedraw %p/%luX %R %p/%luX %P %p/%luX %P... ", dst, dst->chan, r, src, src->chan, p0, mask, mask->chan, p1);
119 
120 	if(drawclip(dst, &r, src, &p0, mask, &p1, &par.sr, &par.mr) == 0){
121 		if(drawdebug)
122 			iprint("empty clipped rectangle\n");
123 		return nil;
124 	}
125 
126 	par.dst = dst;
127 	par.r = r;
128 	par.src = src;
129 	/* par.sr set by drawclip */
130 	par.mask = mask;
131 	/* par.mr set by drawclip */
132 
133 	par.state = 0;
134 	if(src->flags&Frepl){
135 		par.state |= Replsrc;
136 		if(Dx(src->r)==1 && Dy(src->r)==1){
137 			par.sval = pixelbits(src, src->r.min);
138 			par.state |= Simplesrc;
139 			par.srgba = _imgtorgba(src, par.sval);
140 			par.sdval = _rgbatoimg(dst, par.srgba);
141 		}
142 	}
143 
144 	if(mask->flags & Frepl){
145 		par.state |= Replmask;
146 		if(Dx(mask->r)==1 && Dy(mask->r)==1){
147 			par.mval = pixelbits(mask, mask->r.min);
148 			if(par.mval == 0){
149 				if(drawdebug) iprint("fill with zero mask\n");
150 				return nil;	/* no-op successfully handled */
151 			}
152 			par.state |= Simplemask;
153 			if(par.mval == ~0)
154 				par.state |= Fullmask;
155 			par.mrgba = _imgtorgba(mask, par.mval);
156 		}
157 	}
158 
159 	if(drawdebug)
160 		iprint("dr %R sr %R mr %R...", r, par.sr, par.mr);
161 DBG print("draw dr %R sr %R mr %R\n", r, par.sr, par.mr);
162 
163 	return &par;
164 }
165 
166 void
167 _memimagedraw(Memdrawparam *par)
168 {
169 	if(par == nil)
170 		return;
171 
172 	/*
173 	 * Now that we've clipped the parameters down to be consistent, we
174 	 * simply try sub-drawing routines in order until we find one that was able
175 	 * to handle us.  If the sub-drawing routine returns zero, it means it was
176 	 * unable to satisfy the request, so we do not return.
177 	 */
178 
179 	/*
180 	 * Hardware support.  Each video driver provides this function,
181 	 * which checks to see if there is anything it can help with.
182 	 * There could be an if around this checking to see if dst is in video memory.
183 	 */
184 	if(hwdraw(par))
185 {
186 if(drawdebug) iprint("hw handled\n");
187 		return;
188 }
189 	/*
190 	 * Optimizations using memmove and memset.
191 	 */
192 	if(memoptdraw(par)){
193 if(drawdebug) iprint("memopt handled\n");
194 {
195 		return;
196 }
197 	}
198 
199 	/*
200 	 * Character drawing.
201 	 * Solid source color being painted through a boolean mask onto a high res image.
202 	 */
203 	if(chardraw(par)){
204 if(drawdebug) iprint("chardraw handled\n");
205 		return;
206 	}
207 
208 	/*
209 	 * General calculation-laden case that does alpha for each pixel.
210 	 */
211 	alphadraw(par);
212 if(drawdebug) iprint("alphadraw handled\n");
213 	return;
214 }
215 #undef DBG
216 
217 /*
218  * Clip the destination rectangle further based on the properties of the
219  * source and mask rectangles.  Once the destination rectangle is properly
220  * clipped, adjust the source and mask rectangles to be the same size.
221  * Then if source or mask is replicated, move its clipped rectangle
222  * so that its minimum point falls within the repl rectangle.
223  *
224  * Return zero if the final rectangle is null.
225  */
226 int
227 drawclip(Memimage *dst, Rectangle *r, Memimage *src, Point *p0, Memimage *mask, Point *p1, Rectangle *sr, Rectangle *mr)
228 {
229 	Point rmin, delta;
230 	int splitcoords;
231 	Rectangle omr;
232 	Point p;
233 
234 	if(r->min.x>=r->max.x || r->min.y>=r->max.y)
235 		return 0;
236 	splitcoords = (p0->x!=p1->x) || (p0->y!=p1->y);
237 	/* clip to destination */
238 	rmin = r->min;
239 	if(!rectclip(r, dst->r) || !rectclip(r, dst->clipr))
240 		return 0;
241 	/* move mask point */
242 	p1->x += r->min.x-rmin.x;
243 	p1->y += r->min.y-rmin.y;
244 	/* move source point */
245 	p0->x += r->min.x-rmin.x;
246 	p0->y += r->min.y-rmin.y;
247 	/* map destination rectangle into source */
248 	sr->min = *p0;
249 	sr->max.x = p0->x+Dx(*r);
250 	sr->max.y = p0->y+Dy(*r);
251 	/* sr is r in source coordinates; clip to source */
252 	if(!(src->flags&Frepl) && !rectclip(sr, src->r))
253 		return 0;
254 	if(!rectclip(sr, src->clipr))
255 		return 0;
256 	/* compute and clip rectangle in mask */
257 	if(splitcoords){
258 		/* move mask point with source */
259 		p1->x += sr->min.x-p0->x;
260 		p1->y += sr->min.y-p0->y;
261 		mr->min = *p1;
262 		mr->max.x = p1->x+Dx(*sr);
263 		mr->max.y = p1->y+Dy(*sr);
264 		omr = *mr;
265 		/* mr is now rectangle in mask; clip it */
266 		if(!(mask->flags&Frepl) && !rectclip(mr, mask->r))
267 			return 0;
268 		if(!rectclip(mr, mask->clipr))
269 			return 0;
270 		/* reflect any clips back to source */
271 		sr->min.x += mr->min.x-omr.min.x;
272 		sr->min.y += mr->min.y-omr.min.y;
273 		sr->max.x += mr->max.x-omr.max.x;
274 		sr->max.y += mr->max.y-omr.max.y;
275 		*p1 = mr->min;
276 	}else{
277 		if(!(mask->flags&Frepl) && !rectclip(sr, mask->r))
278 			return 0;
279 		if(!rectclip(sr, mask->clipr))
280 			return 0;
281 		*p1 = sr->min;
282 	}
283 
284 	/* move source clipping back to destination */
285 	delta.x = r->min.x - p0->x;
286 	delta.y = r->min.y - p0->y;
287 	r->min.x = sr->min.x + delta.x;
288 	r->min.y = sr->min.y + delta.y;
289 	r->max.x = sr->max.x + delta.x;
290 	r->max.y = sr->max.y + delta.y;
291 
292 	/* move source rectangle so sr->min is in src->r */
293 	if(src->flags&Frepl) {
294 		delta.x = drawreplxy(src->r.min.x, src->r.max.x, sr->min.x) - sr->min.x;
295 		delta.y = drawreplxy(src->r.min.y, src->r.max.y, sr->min.y) - sr->min.y;
296 		sr->min.x += delta.x;
297 		sr->min.y += delta.y;
298 		sr->max.x += delta.x;
299 		sr->max.y += delta.y;
300 	}
301 	*p0 = sr->min;
302 
303 	/* move mask point so it is in mask->r */
304 	/* use temporary point p to avoid warnings about unaligned volatiles on digital unix */
305 	p = *p1;
306 	p = drawrepl(mask->r, p);
307 	*p1 = p;
308 	mr->min = *p1;
309 	mr->max.x = p1->x+Dx(*sr);
310 	mr->max.y = p1->y+Dy(*sr);
311 
312 	assert(Dx(*sr) == Dx(*mr) && Dx(*mr) == Dx(*r));
313 	assert(Dy(*sr) == Dy(*mr) && Dy(*mr) == Dy(*r));
314 	assert((p=*p0, ptinrect(p, src->r)));
315 	assert((p=*p1, ptinrect(p, mask->r)));
316 	assert((p=r->min, ptinrect(p, dst->r)));
317 
318 	return 1;
319 }
320 
321 /*
322  * Conversion tables.
323  */
324 static uchar replbit[1+8][256];		/* replbit[x][y] is the replication of the x-bit quantity y to 8-bit depth */
325 static uchar conv18[256][8];		/* conv18[x][y] is the yth pixel in the depth-1 pixel x */
326 static uchar conv28[256][4];		/* ... */
327 static uchar conv48[256][2];
328 static int	tablesbuilt;
329 
330 /*
331  * bitmap of how to replicate n bits to fill 8, for 1 ≤ n ≤ 8.
332  * the X's are where to put the bottom (ones) bit of the n-bit pattern.
333  * only the top 8 bits of the result are actually used.
334  * (the lower 8 bits are needed to get bits in the right place
335  * when n is not a divisor of 8.)
336  *
337  * Should check to see if its easier to just refer to replmul than
338  * use the precomputed values in replbit.  On PCs it may well
339  * be; on machines with slow multiply instructions it probably isn't.
340  */
341 #define a ((((((((((((((((0
342 #define X *2+1)
343 #define _ *2)
344 static int replmul[1+8] = {
345 	0,
346 	a X X X X X X X X X X X X X X X X,
347 	a _ X _ X _ X _ X _ X _ X _ X _ X,
348 	a _ _ X _ _ X _ _ X _ _ X _ _ X _,
349 	a _ _ _ X _ _ _ X _ _ _ X _ _ _ X,
350 	a _ _ _ _ X _ _ _ _ X _ _ _ _ X _,
351 	a _ _ _ _ _ X _ _ _ _ _ X _ _ _ _,
352 	a _ _ _ _ _ _ X _ _ _ _ _ _ X _ _,
353 	a _ _ _ _ _ _ _ X _ _ _ _ _ _ _ X,
354 };
355 #undef a
356 #undef X
357 #undef _
358 
359 static void
360 mktables(void)
361 {
362 	int i, j, mask, sh, small;
363 
364 	if(tablesbuilt)
365 		return;
366 
367 	tablesbuilt = 1;
368 	/* bit replication up to 8 bits */
369 	for(i=0; i<256; i++){
370 		for(j=0; j<=8; j++){	/* j <= 8 [sic] */
371 			small = i & ((1<<j)-1);
372 			replbit[j][i] = (small*replmul[j])>>8;
373 		}
374 	}
375 
376 	/* bit unpacking up to 8 bits, only powers of 2 */
377 	for(i=0; i<256; i++){
378 		for(j=0, sh=7, mask=1; j<8; j++, sh--)
379 			conv18[i][j] = replbit[1][(i>>sh)&mask];
380 
381 		for(j=0, sh=6, mask=3; j<4; j++, sh-=2)
382 			conv28[i][j] = replbit[2][(i>>sh)&mask];
383 
384 		for(j=0, sh=4, mask=15; j<2; j++, sh-=4)
385 			conv48[i][j] = replbit[4][(i>>sh)&mask];
386 	}
387 }
388 
389 /*
390  * General alpha drawing case.  Can handle anything.
391  */
392 typedef struct	Buffer	Buffer;
393 struct Buffer {
394 	uchar	*red;
395 	uchar	*grn;
396 	uchar	*blu;
397 	uchar	*alpha;
398 	uchar	*grey;
399 
400 	int	delta;	/* number of bytes to add to pointer to get next pixel to the right */
401 	uchar	*m;		/* ptr to mask data r.min byte; like p->bytermin */
402 	int		mskip;	/* no. of left bits to skip in *m */
403 	uchar	*bm;		/* ptr to mask data img->r.min byte; like p->bytey0s */
404 	int		bmskip;	/* no. of left bits to skip in *bm */
405 	uchar	*em;		/* ptr to mask data img->r.max.x byte; like p->bytey0e */
406 	int		emskip;	/* no. of right bits to skip in *em */
407 };
408 
409 typedef struct	Param	Param;
410 typedef Buffer	Readfn(Param *notusedpar, uchar *notusedbuf, int notusedi);
411 typedef void	Writefn(Param *notusedpar, uchar *notusedbuf, Buffer notusedb);
412 typedef Buffer	Calcfn(Buffer, Buffer, Buffer, int, int notusedi);
413 
414 enum {
415 	MAXBCACHE = 16
416 };
417 
418 /* giant rathole to customize functions with */
419 struct Param {
420 	Readfn	*replcall;
421 	Readfn	*greymaskcall;
422 	Readfn	*convreadcall;
423 	Writefn	*convwritecall;
424 
425 	Memimage *img;
426 	Rectangle	r;
427 	int	dx;	/* of r */
428 	int	needbuf;
429 	int	convgrey;
430 	int	alphaonly;
431 
432 	uchar	*bytey0s;		/* byteaddr(Pt(img->r.min.x, img->r.min.y)) */
433 	uchar	*bytermin;	/* byteaddr(Pt(r.min.x, img->r.min.y)) */
434 	uchar	*bytey0e;		/* byteaddr(Pt(img->r.max.x, img->r.min.y)) */
435 	int		bwidth;
436 
437 	int	replcache;	/* if set, cache buffers */
438 	Buffer	bcache[MAXBCACHE];
439 	ulong	bfilled;
440 	uchar	*bufbase;
441 	int	bufoff;
442 	int	bufdelta;
443 
444 	int	dir;
445 
446 	int	convbufoff;
447 	uchar	*convbuf;
448 	Param	*convdpar;
449 	int	convdx;
450 };
451 
452 static uchar *drawbuf;
453 static int	ndrawbuf;
454 static int	mdrawbuf;
455 static Param spar, mpar, dpar;	/* easier on the stacks */
456 static Readfn	greymaskread, replread, readptr;
457 static Writefn	nullwrite;
458 static Calcfn	alphacalc;
459 static Calcfn	boolcalc;
460 
461 static Readfn*	readfn(Memimage*);
462 static Readfn*	readalphafn(Memimage*);
463 static Writefn*	writefn(Memimage*);
464 
465 static Calcfn*	boolcopyfn(Memimage*, Memimage*);
466 static Readfn*	convfn(Memimage*, Param *notusedpar, Memimage*, Param*);
467 static Readfn*	ptrfn(Memimage*);
468 
469 static int
470 allocdrawbuf(void)
471 {
472 	uchar *p;
473 
474 	if(ndrawbuf > mdrawbuf){
475 		p = realloc(drawbuf, ndrawbuf);
476 		if(p == nil){
477 			werrstr("memimagedraw out of memory");
478 			return -1;
479 		}
480 		drawbuf = p;
481 		mdrawbuf = ndrawbuf;
482 	}
483 	return 0;
484 }
485 
486 static Param
487 getparam(Memimage *img, Rectangle r, int convgrey, int needbuf)
488 {
489 	Param p;
490 	int nbuf;
491 
492 	memset(&p, 0, sizeof p);
493 
494 	p.img = img;
495 	p.r = r;
496 	p.dx = Dx(r);
497 	p.needbuf = needbuf;
498 	p.convgrey = convgrey;
499 
500 	assert(img->r.min.x <= r.min.x && r.min.x < img->r.max.x);
501 
502 	p.bytey0s = byteaddr(img, Pt(img->r.min.x, img->r.min.y));
503 	p.bytermin = byteaddr(img, Pt(r.min.x, img->r.min.y));
504 	p.bytey0e = byteaddr(img, Pt(img->r.max.x, img->r.min.y));
505 	p.bwidth = sizeof(ulong)*img->width;
506 
507 	assert(p.bytey0s <= p.bytermin && p.bytermin <= p.bytey0e);
508 
509 	if(p.r.min.x == p.img->r.min.x)
510 		assert(p.bytermin == p.bytey0s);
511 
512 	nbuf = 1;
513 	if((img->flags&Frepl) && Dy(img->r) <= MAXBCACHE && Dy(img->r) < Dy(r)){
514 		p.replcache = 1;
515 		nbuf = Dy(img->r);
516 	}
517 	p.bufdelta = 4*p.dx;
518 	p.bufoff = ndrawbuf;
519 	ndrawbuf += p.bufdelta*nbuf;
520 
521 	return p;
522 }
523 
524 static void
525 clipy(Memimage *img, int *y)
526 {
527 	int dy;
528 
529 	dy = Dy(img->r);
530 	if(*y == dy)
531 		*y = 0;
532 	else if(*y == -1)
533 		*y = dy-1;
534 	assert(0 <= *y && *y < dy);
535 }
536 
537 static void
538 dumpbuf(char *s, Buffer b, int n)
539 {
540 	int i;
541 	uchar *p;
542 
543 	print("%s", s);
544 	for(i=0; i<n; i++){
545 		print(" ");
546 		if(p=b.grey){
547 			print(" k%.2uX", *p);
548 			b.grey += b.delta;
549 		}else{
550 			if(p=b.red){
551 				print(" r%.2uX", *p);
552 				b.red += b.delta;
553 			}
554 			if(p=b.grn){
555 				print(" g%.2uX", *p);
556 				b.grn += b.delta;
557 			}
558 			if(p=b.blu){
559 				print(" b%.2uX", *p);
560 				b.blu += b.delta;
561 			}
562 		}
563 		if(p=b.alpha){
564 			print(" α%.2uX", *p);
565 			b.alpha += b.delta;
566 		}
567 	}
568 	print("\n");
569 }
570 
571 /*
572  * For each scan line, we expand the pixels from source, mask, and destination
573  * into byte-aligned red, green, blue, alpha, and grey channels.  If buffering is not
574  * needed and the channels were already byte-aligned (grey8, rgb24, rgba32, rgb32),
575  * the readers need not copy the data: they can simply return pointers to the data.
576  * If the destination image is grey and the source is not, it is converted using the NTSC
577  * formula.
578  *
579  * Once we have all the channels, we call either rgbcalc or greycalc, depending on
580  * whether the destination image is color.  This is allowed to overwrite the dst buffer (perhaps
581  * the actual data, perhaps a copy) with its result.  It should only overwrite the dst buffer
582  * with the same format (i.e. red bytes with red bytes, etc.)  A new buffer is returned from
583  * the calculator, and that buffer is passed to a function to write it to the destination.
584  * If the buffer is already pointing at the destination, the writing function is a no-op.
585  */
586 #define DBG if(0)
587 static int
588 alphadraw(Memdrawparam *par)
589 {
590 	int isgrey, starty, endy;
591 	int needbuf, dsty, srcy, masky;
592 	int y, dir, dx, dy;
593 	Buffer bsrc, bdst, bmask;
594 	Readfn *rdsrc, *rdmask, *rddst;
595 	Calcfn *calc;
596 	Writefn *wrdst;
597 	Memimage *src, *mask, *dst;
598 	Rectangle r, sr, mr;
599 
600 	r = par->r;
601 	dx = Dx(r);
602 	dy = Dy(r);
603 
604 	ndrawbuf = 0;
605 
606 	src = par->src;
607 	mask = par->mask;
608 	dst = par->dst;
609 	sr = par->sr;
610 	mr = par->mr;
611 
612 	isgrey = dst->flags&Fgrey;
613 
614 	/*
615 	 * Buffering when src and dst are the same bitmap is sufficient but not
616 	 * necessary.  There are stronger conditions we could use.  We could
617 	 * check to see if the rectangles intersect, and if simply moving in the
618 	 * correct y direction can avoid the need to buffer.
619 	 */
620 	needbuf = (src->data == dst->data);
621 
622 	spar = getparam(src, sr, isgrey, needbuf);
623 	mpar = getparam(mask, mr, isgrey, needbuf);
624 	dpar = getparam(dst, r, isgrey, needbuf);
625 
626 	dir = (needbuf && byteaddr(dst, r.min) > byteaddr(src, sr.min)) ? -1 : 1;
627 	spar.dir = mpar.dir = dpar.dir = dir;
628 
629 	/*
630 	 * If the mask is purely boolean, we can convert from src to dst format
631 	 * when we read src, and then just copy it to dst where the mask tells us to.
632 	 * This requires a boolean (1-bit grey) mask and lack of a source alpha channel.
633 	 *
634 	 * The computation is accomplished by assigning the function pointers as follows:
635 	 *	rdsrc - read and convert source into dst format in a buffer
636 	 * 	rdmask - convert mask to bytes, set pointer to it
637 	 * 	rddst - fill with pointer to real dst data, but do no reads
638 	 *	calc - copy src onto dst when mask says to.
639 	 *	wrdst - do nothing
640 	 * This is slightly sleazy, since things aren't doing exactly what their names say,
641 	 * but it avoids a fair amount of code duplication to make this a case here
642 	 * rather than have a separate booldraw.
643 	 */
644 if(drawdebug) iprint("flag %lud mchan %lux=?%x dd %d\n", src->flags&Falpha, mask->chan, GREY1, dst->depth);
645 	if(!(src->flags&Falpha) && mask->chan == GREY1 && dst->depth >= 8){
646 if(drawdebug) iprint("boolcopy...");
647 		rdsrc = convfn(dst, &dpar, src, &spar);
648 		rddst = readptr;
649 		rdmask = readfn(mask);
650 		calc = boolcopyfn(dst, mask);
651 		wrdst = nullwrite;
652 	}else{
653 		/* usual alphadraw parameter fetching */
654 		rdsrc = readfn(src);
655 		rddst = readfn(dst);
656 		wrdst = writefn(dst);
657 		calc = alphacalc;
658 
659 		/*
660 		 * If there is no alpha channel, we'll ask for a grey channel
661 		 * and pretend it is the alpha.
662 		 */
663 		if(mask->flags&Falpha){
664 			rdmask = readalphafn(mask);
665 			mpar.alphaonly = 1;
666 		}else{
667 			mpar.greymaskcall = readfn(mask);
668 			mpar.convgrey = 1;
669 			rdmask = greymaskread;
670 
671 			/*
672 			 * Should really be above, but then boolcopyfns would have
673 			 * to deal with bit alignment, and I haven't written that.
674 			 *
675 			 * This is a common case for things like ellipse drawing.
676 			 * When there's no alpha involved and the mask is boolean,
677 			 * we can avoid all the division and multiplication.
678 			 */
679 			if(mask->chan == GREY1 && !(src->flags&Falpha))
680 				calc = boolcalc;
681 		}
682 	}
683 
684 	/*
685 	 * If the image has a small enough repl rectangle,
686 	 * we can just read each line once and cache them.
687 	 */
688 	if(spar.replcache){
689 		spar.replcall = rdsrc;
690 		rdsrc = replread;
691 	}
692 	if(mpar.replcache){
693 		mpar.replcall = rdmask;
694 		rdmask = replread;
695 	}
696 
697 	if(allocdrawbuf() < 0)
698 		return 0;
699 
700 	/*
701 	 * Before we were saving only offsets from drawbuf in the parameter
702 	 * structures; now that drawbuf has been grown to accomodate us,
703 	 * we can fill in the pointers.
704 	 */
705 	spar.bufbase = drawbuf+spar.bufoff;
706 	mpar.bufbase = drawbuf+mpar.bufoff;
707 	dpar.bufbase = drawbuf+dpar.bufoff;
708 	spar.convbuf = drawbuf+spar.convbufoff;
709 
710 	if(dir == 1){
711 		starty = 0;
712 		endy = dy;
713 	}else{
714 		starty = dy-1;
715 		endy = -1;
716 	}
717 
718 	/*
719 	 * srcy, masky, and dsty are offsets from the top of their
720 	 * respective Rectangles.  they need to be contained within
721 	 * the rectangles, so clipy can keep them there without division.
722  	 */
723 	srcy = (starty + sr.min.y - src->r.min.y)%Dy(src->r);
724 	masky = (starty + mr.min.y - mask->r.min.y)%Dy(mask->r);
725 	dsty = starty + r.min.y - dst->r.min.y;
726 
727 	assert(0 <= srcy && srcy < Dy(src->r));
728 	assert(0 <= masky && masky < Dy(mask->r));
729 	assert(0 <= dsty && dsty < Dy(dst->r));
730 
731 	for(y=starty; y!=endy; y+=dir, srcy+=dir, masky+=dir, dsty+=dir){
732 		clipy(src, &srcy);
733 		clipy(dst, &dsty);
734 		clipy(mask, &masky);
735 
736 		bsrc = rdsrc(&spar, spar.bufbase, srcy);
737 DBG print("[");
738 		bmask = rdmask(&mpar, mpar.bufbase, masky);
739 DBG print("]\n");
740 		bdst = rddst(&dpar, dpar.bufbase, dsty);
741 DBG		dumpbuf("src", bsrc, dx);
742 DBG		dumpbuf("mask", bmask, dx);
743 DBG		dumpbuf("dst", bdst, dx);
744 		bdst = calc(bdst, bsrc, bmask, dx, isgrey);
745 		wrdst(&dpar, dpar.bytermin+dsty*dpar.bwidth, bdst);
746 	}
747 
748 	return 1;
749 }
750 #undef DBG
751 
752 #define DBG if(0)
753 static Buffer
754 alphacalc(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey)
755 {
756 	Buffer obdst;
757 	uchar *salpha, ones = ~0;
758 	int antialpha, sadelta;
759 	int i, sa, ma;
760 
761 	obdst = bdst;
762 	if((salpha = bsrc.alpha) == nil)
763 		salpha = &ones, sadelta = 0;
764 	else
765 		sadelta = bsrc.delta;
766 
767 	for(i=0; i<dx; i++){
768 		sa = *salpha;
769 		ma = *bmask.alpha;
770 		antialpha = 255-DIV255(ma*sa);
771 
772 		if(grey){
773 DBG print("(%d %d in %d) over %d =", *bsrc.grey, sa, ma, *bdst.grey);
774 			*bdst.grey = DIV255(*bsrc.grey*ma+*bdst.grey*antialpha);
775 DBG print(" %d\n", *bdst.grey);
776 			bsrc.grey += bsrc.delta;
777 			bdst.grey += bdst.delta;
778 		}else{
779 			*bdst.red = DIV255(*bsrc.red*ma+*bdst.red*antialpha);
780 			*bdst.grn = DIV255(*bsrc.grn*ma+*bdst.grn*antialpha);
781 			*bdst.blu = DIV255(*bsrc.blu*ma+*bdst.blu*antialpha);
782 
783 			bsrc.red += bsrc.delta;
784 			bsrc.blu += bsrc.delta;
785 			bsrc.grn += bsrc.delta;
786 
787 			bdst.red += bdst.delta;
788 			bdst.blu += bdst.delta;
789 			bdst.grn += bdst.delta;
790 		}
791 
792 		salpha += sadelta;
793 		bmask.alpha += bmask.delta;
794 
795 		if(bdst.alpha) {
796 			*bdst.alpha = DIV255(sa*ma+*bdst.alpha*(antialpha));
797 			bdst.alpha += bdst.delta;
798 		}
799 	}
800 	return obdst;
801 }
802 #undef DBG
803 
804 #define DBG if(0)
805 static Buffer
806 boolcalc(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey)
807 {
808 	Buffer obdst;
809 	int i, ma;
810 
811 	obdst = bdst;
812 	for(i=0; i<dx; i++){
813 		ma = *bmask.alpha;
814 
815 		if(grey){
816 DBG print("(%d) in %d over %d =", *bsrc.grey, ma, *bdst.grey);
817 			if(ma)
818 				*bdst.grey = *bsrc.grey;
819 DBG print(" %d\n", *bdst.grey);
820 			bsrc.grey += bsrc.delta;
821 			bdst.grey += bdst.delta;
822 		}else{
823 			if(ma){
824 				*bdst.red = *bsrc.red*ma;
825 				*bdst.grn = *bsrc.grn*ma;
826 				*bdst.blu = *bsrc.blu*ma;
827 			}
828 
829 			bsrc.red += bsrc.delta;
830 			bsrc.blu += bsrc.delta;
831 			bsrc.grn += bsrc.delta;
832 
833 			bdst.red += bdst.delta;
834 			bdst.blu += bdst.delta;
835 			bdst.grn += bdst.delta;
836 		}
837 
838 		bmask.alpha += bmask.delta;
839 
840 		if(bdst.alpha) {
841 			if(ma)
842 				*bdst.alpha = ma;
843 			bdst.alpha += bdst.delta;
844 		}
845 	}
846 	return obdst;
847 }
848 #undef DBG
849 
850 /*
851  * Replicated cached scan line read.  Call the function listed in the Param,
852  * but cache the result so that for replicated images we only do the work once.
853  */
854 static Buffer
855 replread(Param *p, uchar *notusedbuf, int y)
856 {
857 	Buffer *b;
858 
859 	b = &p->bcache[y];
860 	if((p->bfilled & (1<<y)) == 0){
861 		p->bfilled |= 1<<y;
862 		*b = p->replcall(p, p->bufbase+y*p->bufdelta, y);
863 	}
864 	return *b;
865 }
866 
867 /*
868  * Alpha reading function that simply relabels the grey pointer.
869  */
870 static Buffer
871 greymaskread(Param *p, uchar *buf, int y)
872 {
873 	Buffer b;
874 
875 	b = p->greymaskcall(p, buf, y);
876 	b.alpha = b.grey;
877 	return b;
878 }
879 
880 #define DBG if(0)
881 static Buffer
882 readnbit(Param *p, uchar *buf, int y)
883 {
884 	Buffer b;
885 	Memimage *img;
886 	uchar *repl, *r, *w, *ow, bits;
887 	int i, n, sh, depth, x, dx, npack, nbits;
888 
889 	b.grey = w = buf;
890 	b.red = b.blu = b.grn = w;
891 	b.alpha = nil;
892 	b.delta = 1;
893 
894 	dx = p->dx;
895 	img = p->img;
896 	depth = img->depth;
897 	repl = &replbit[depth][0];
898 	npack = 8/depth;
899 	sh = 8-depth;
900 
901 	/* copy from p->r.min.x until end of repl rectangle */
902 	x = p->r.min.x;
903 	n = dx;
904 	if(n > p->img->r.max.x - x)
905 		n = p->img->r.max.x - x;
906 
907 	r = p->bytermin + y*p->bwidth;
908 DBG print("readnbit dx %d %p=%p+%d*%d, *r=%d fetch %d ", dx, r, p->bytermin, y, p->bwidth, *r, n);
909 	bits = *r++;
910 	nbits = 8;
911 	if(i=x&(npack-1)){
912 DBG print("throwaway %d...", i);
913 		bits <<= depth*i;
914 		nbits -= depth*i;
915 	}
916 	for(i=0; i<n; i++){
917 		if(nbits == 0){
918 DBG print("(%.2ux)...", *r);
919 			bits = *r++;
920 			nbits = 8;
921 		}
922 		*w++ = repl[bits>>sh];
923 DBG print("bit %x...", repl[bits>>sh]);
924 		bits <<= depth;
925 		nbits -= depth;
926 	}
927 	dx -= n;
928 	if(dx == 0)
929 		return b;
930 
931 	assert(x+i == p->img->r.max.x);
932 
933 	/* copy from beginning of repl rectangle until where we were before. */
934 	x = p->img->r.min.x;
935 	n = dx;
936 	if(n > p->r.min.x - x)
937 		n = p->r.min.x - x;
938 
939 	r = p->bytey0s + y*p->bwidth;
940 DBG print("x=%d r=%p...", x, r);
941 	bits = *r++;
942 	nbits = 8;
943 	if(i=x&(npack-1)){
944 		bits <<= depth*i;
945 		nbits -= depth*i;
946 	}
947 DBG print("nbits=%d...", nbits);
948 	for(i=0; i<n; i++){
949 		if(nbits == 0){
950 			bits = *r++;
951 			nbits = 8;
952 		}
953 		*w++ = repl[bits>>sh];
954 DBG print("bit %x...", repl[bits>>sh]);
955 		bits <<= depth;
956 		nbits -= depth;
957 DBG print("bits %x nbits %d...", bits, nbits);
958 	}
959 	dx -= n;
960 	if(dx == 0)
961 		return b;
962 
963 	assert(dx > 0);
964 	/* now we have exactly one full scan line: just replicate the buffer itself until we are done */
965 	ow = buf;
966 	while(dx--)
967 		*w++ = *ow++;
968 
969 	return b;
970 }
971 #undef DBG
972 
973 #define DBG if(0)
974 static void
975 writenbit(Param *p, uchar *w, Buffer src)
976 {
977 	uchar *r;
978 	ulong bits;
979 	int i, sh, depth, npack, nbits, x, ex;
980 
981 	assert(src.grey != nil && src.delta == 1);
982 
983 	x = p->r.min.x;
984 	ex = x+p->dx;
985 	depth = p->img->depth;
986 	npack = 8/depth;
987 
988 	i=x&(npack-1);
989 	bits = i ? (*w >> (8-depth*i)) : 0;
990 	nbits = depth*i;
991 	sh = 8-depth;
992 	r = src.grey;
993 
994 	for(; x<ex; x++){
995 		bits <<= depth;
996 DBG print(" %x", *r);
997 		bits |= (*r++ >> sh);
998 		nbits += depth;
999 		if(nbits == 8){
1000 			*w++ = bits;
1001 			nbits = 0;
1002 		}
1003 	}
1004 
1005 	if(nbits){
1006 		sh = 8-nbits;
1007 		bits <<= sh;
1008 		bits |= *w & ((1<<sh)-1);
1009 		*w = bits;
1010 	}
1011 DBG print("\n");
1012 	return;
1013 }
1014 #undef DBG
1015 
1016 static Buffer
1017 readcmap(Param *p, uchar *buf, int y)
1018 {
1019 	Buffer b;
1020 	int i, dx, convgrey;
1021 	uchar *q, *cmap, *begin, *end, *r, *w;
1022 
1023 	begin = p->bytey0s + y*p->bwidth;
1024 	r = p->bytermin + y*p->bwidth;
1025 	end = p->bytey0e + y*p->bwidth;
1026 	cmap = p->img->cmap->cmap2rgb;
1027 	convgrey = p->convgrey;
1028 
1029 	w = buf;
1030 	dx = p->dx;
1031 	for(i=0; i<dx; i++){
1032 		q = cmap+*r++*3;
1033 		if(r == end)
1034 			r = begin;
1035 		if(convgrey){
1036 			*w++ = RGB2K(q[0], q[1], q[2]);
1037 		}else{
1038 			*w++ = q[2];	/* blue */
1039 			*w++ = q[1];	/* green */
1040 			*w++ = q[0];	/* red */
1041 		}
1042 	}
1043 
1044 	if(convgrey){
1045 		b.alpha = nil;
1046 		b.grey = buf;
1047 		b.red = b.blu = b.grn = buf;
1048 		b.delta = 1;
1049 	}else{
1050 		b.blu = buf;
1051 		b.grn = buf+1;
1052 		b.red = buf+2;
1053 		b.alpha = nil;
1054 		b.grey = nil;
1055 		b.delta = 3;
1056 	}
1057 	return b;
1058 }
1059 
1060 static void
1061 writecmap(Param *p, uchar *w, Buffer src)
1062 {
1063 	uchar *cmap, *red, *grn, *blu;
1064 	int i, dx, delta;
1065 
1066 	cmap = p->img->cmap->rgb2cmap;
1067 
1068 	delta = src.delta;
1069 	red= src.red;
1070 	grn = src.grn;
1071 	blu = src.blu;
1072 
1073 	dx = p->dx;
1074 	for(i=0; i<dx; i++, red+=delta, grn+=delta, blu+=delta)
1075 		*w++ = cmap[(*red>>4)*256+(*grn>>4)*16+(*blu>>4)];
1076 }
1077 
1078 static Buffer
1079 readbyte(Param *p, uchar *buf, int y)
1080 {
1081 	Buffer b;
1082 	Memimage *img;
1083 	int dx, isgrey, convgrey, alphaonly, copyalpha, i, nb;
1084 	uchar *begin, *end, *r, *w, *rrepl, *grepl, *brepl, *arepl, *krepl;
1085 	uchar ured, ugrn, ublu;
1086 	ulong u;
1087 
1088 	img = p->img;
1089 	begin = p->bytey0s + y*p->bwidth;
1090 	r = p->bytermin + y*p->bwidth;
1091 	end = p->bytey0e + y*p->bwidth;
1092 
1093 	w = buf;
1094 	dx = p->dx;
1095 	nb = img->depth/8;
1096 
1097 	convgrey = p->convgrey;	/* convert rgb to grey */
1098 	isgrey = img->flags&Fgrey;
1099 	alphaonly = p->alphaonly;
1100 	copyalpha = convgrey==0 && (img->flags&Falpha);
1101 
1102 	/* if we can, avoid processing everything */
1103 	if(!(img->flags&Frepl) && !convgrey && (img->flags&Fbytes)){
1104 		memset(&b, 0, sizeof b);
1105 		if(p->needbuf){
1106 			memmove(buf, r, dx*nb);
1107 			r = buf;
1108 		}
1109 		if(copyalpha)
1110 			b.alpha = r+img->shift[CAlpha]/8;
1111 		if(isgrey){
1112 			b.grey = r+img->shift[CGrey]/8;
1113 			b.red = b.grn = b.blu = b.grey;
1114 		}else{
1115 			b.red = r+img->shift[CRed]/8;
1116 			b.grn = r+img->shift[CGreen]/8;
1117 			b.blu = r+img->shift[CBlue]/8;
1118 		}
1119 		b.delta = nb;
1120 		return b;
1121 	}
1122 
1123 	rrepl = replbit[img->nbits[CRed]];
1124 	grepl = replbit[img->nbits[CGreen]];
1125 	brepl = replbit[img->nbits[CBlue]];
1126 	arepl = replbit[img->nbits[CAlpha]];
1127 	krepl = replbit[img->nbits[CGrey]];
1128 
1129 	for(i=0; i<dx; i++){
1130 		u = r[0] | (r[1]<<8) | (r[2]<<16) | (r[3]<<24);
1131 		if(copyalpha)
1132 			*w++ = arepl[(u>>img->shift[CAlpha]) & img->mask[CAlpha]];
1133 
1134 		if(isgrey)
1135 			*w++ = krepl[(u >> img->shift[CGrey]) & img->mask[CGrey]];
1136 		else if(!alphaonly){
1137 			ured = rrepl[(u >> img->shift[CRed]) & img->mask[CRed]];
1138 			ugrn = grepl[(u >> img->shift[CGreen]) & img->mask[CGreen]];
1139 			ublu = brepl[(u >> img->shift[CBlue]) & img->mask[CBlue]];
1140 			if(convgrey){
1141 				*w++ = RGB2K(ured, ugrn, ublu);
1142 			}else{
1143 				*w++ = brepl[(u >> img->shift[CBlue]) & img->mask[CBlue]];
1144 				*w++ = grepl[(u >> img->shift[CGreen]) & img->mask[CGreen]];
1145 				*w++ = rrepl[(u >> img->shift[CRed]) & img->mask[CRed]];
1146 			}
1147 		}
1148 		r += nb;
1149 		if(r == end)
1150 			r = begin;
1151 	}
1152 
1153 	b.alpha = copyalpha ? buf : nil;
1154 	if(alphaonly){
1155 		b.red = b.grn = b.blu = b.grey = nil;
1156 		b.delta = 1;
1157 	}else if(isgrey || convgrey){
1158 		b.grey = buf+copyalpha;
1159 		b.red = b.grn = b.blu = buf+copyalpha;
1160 		b.delta = copyalpha+1;
1161 	}else{
1162 		b.blu = buf+copyalpha;
1163 		b.grn = buf+copyalpha+1;
1164 		b.grey = nil;
1165 		b.red = buf+copyalpha+2;
1166 		b.delta = copyalpha+3;
1167 	}
1168 	return b;
1169 }
1170 
1171 #define DBG if(0)
1172 static void
1173 writebyte(Param *p, uchar *w, Buffer src)
1174 {
1175 	Memimage *img;
1176 	int i, isalpha, isgrey, nb, delta, dx, adelta;
1177 	uchar ff, *red, *grn, *blu, *grey, *alpha;
1178 	ulong u, mask;
1179 
1180 	img = p->img;
1181 
1182 	red = src.red;
1183 	grn = src.grn;
1184 	blu = src.blu;
1185 	alpha = src.alpha;
1186 	delta = src.delta;
1187 	grey = src.grey;
1188 	dx = p->dx;
1189 
1190 	nb = img->depth/8;
1191 	mask = (nb==4) ? 0 : ~((1<<img->depth)-1);
1192 
1193 	isalpha = img->flags&Falpha;
1194 	isgrey = img->flags&Fgrey;
1195 	adelta = src.delta;
1196 
1197 	if(isalpha && alpha == nil){
1198 		ff = 0xFF;
1199 		alpha = &ff;
1200 		adelta = 0;
1201 	}
1202 
1203 	for(i=0; i<dx; i++){
1204 		u = w[0] | (w[1]<<8) | (w[2]<<16) | (w[3]<<24);
1205 DBG print("u %.8lux...", u);
1206 		u &= mask;
1207 DBG print("&mask %.8lux...", u);
1208 		if(isgrey){
1209 			u |= ((*grey >> (8-img->nbits[CGrey])) & img->mask[CGrey]) << img->shift[CGrey];
1210 DBG print("|grey %.8lux...", u);
1211 			grey += delta;
1212 		}else{
1213 			u |= ((*red >> (8-img->nbits[CRed])) & img->mask[CRed]) << img->shift[CRed];
1214 			u |= ((*grn >> (8-img->nbits[CGreen])) & img->mask[CGreen]) << img->shift[CGreen];
1215 			u |= ((*blu >> (8-img->nbits[CBlue])) & img->mask[CBlue]) << img->shift[CBlue];
1216 			red += delta;
1217 			grn += delta;
1218 			blu += delta;
1219 DBG print("|rgb %.8lux...", u);
1220 		}
1221 
1222 		if(isalpha){
1223 			u |= ((*alpha >> (8-img->nbits[CAlpha])) & img->mask[CAlpha]) << img->shift[CAlpha];
1224 			alpha += adelta;
1225 DBG print("|alpha %.8lux...", u);
1226 		}
1227 
1228 		w[0] = u;
1229 		w[1] = u>>8;
1230 		w[2] = u>>16;
1231 		w[3] = u>>24;
1232 		w += nb;
1233 	}
1234 }
1235 #undef DBG
1236 
1237 static Readfn*
1238 readfn(Memimage *img)
1239 {
1240 	if(img->depth < 8)
1241 		return readnbit;
1242 	if(img->chan == CMAP8)
1243 		return readcmap;
1244 	return readbyte;
1245 }
1246 
1247 static Readfn*
1248 readalphafn(Memimage *notused)
1249 {
1250 	return readbyte;
1251 }
1252 
1253 static Writefn*
1254 writefn(Memimage *img)
1255 {
1256 	if(img->depth < 8)
1257 		return writenbit;
1258 	if(img->chan == CMAP8)
1259 		return writecmap;
1260 	return writebyte;
1261 }
1262 
1263 static void
1264 nullwrite(Param *notusedpar, uchar *notusedbuf, Buffer notusedb)
1265 {
1266 }
1267 
1268 static Buffer
1269 readptr(Param *p, uchar *notusedbuf, int y)
1270 {
1271 	Buffer b;
1272 	uchar *q;
1273 
1274 	q = p->bytermin + y*p->bwidth;
1275 	b.red = q;	/* ptr to data */
1276 	b.grn = b.blu = b.grey = b.alpha = nil;
1277 	b.delta = p->img->depth/8;
1278 	return b;
1279 }
1280 
1281 static Buffer
1282 boolmemmove(Buffer bdst, Buffer bsrc, Buffer notusedb, int dx, int notusedi)
1283 {
1284 	memmove(bdst.red, bsrc.red, dx*bdst.delta);
1285 	return bdst;
1286 }
1287 
1288 static Buffer
1289 boolcopy8(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int notusedi)
1290 {
1291 	uchar *m, *r, *w, *ew;
1292 
1293 	m = bmask.grey;
1294 	w = bdst.red;
1295 	r = bsrc.red;
1296 	ew = w+dx;
1297 	for(; w < ew; w++,r++)
1298 		if(*m++)
1299 			*w = *r;
1300 	return bdst;	/* not used */
1301 }
1302 
1303 static Buffer
1304 boolcopy16(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int notusedi)
1305 {
1306 	uchar *m;
1307 	ushort *r, *w, *ew;
1308 
1309 	m = bmask.grey;
1310 	w = (ushort*)bdst.red;
1311 	r = (ushort*)bsrc.red;
1312 	ew = w+dx;
1313 	for(; w < ew; w++,r++)
1314 		if(*m++)
1315 			*w = *r;
1316 	return bdst;	/* not used */
1317 }
1318 
1319 static Buffer
1320 boolcopy24(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int notusedi)
1321 {
1322 	uchar *m;
1323 	uchar *r, *w, *ew;
1324 
1325 	m = bmask.grey;
1326 	w = bdst.red;
1327 	r = bsrc.red;
1328 	ew = w+dx*3;
1329 	while(w < ew){
1330 		if(*m++){
1331 			*w++ = *r++;
1332 			*w++ = *r++;
1333 			*w++ = *r++;
1334 		}else{
1335 			w += 3;
1336 			r += 3;
1337 		}
1338 	}
1339 	return bdst;	/* not used */
1340 }
1341 
1342 static Buffer
1343 boolcopy32(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int notusedi)
1344 {
1345 	uchar *m;
1346 	ulong *r, *w, *ew;
1347 
1348 	m = bmask.grey;
1349 	w = (ulong*)bdst.red;
1350 	r = (ulong*)bsrc.red;
1351 	ew = w+dx;
1352 	for(; w < ew; w++,r++)
1353 		if(*m++)
1354 			*w = *r;
1355 	return bdst;	/* not used */
1356 }
1357 
1358 static Buffer
1359 genconv(Param *p, uchar *buf, int y)
1360 {
1361 	Buffer b;
1362 	int nb;
1363 	uchar *r, *w, *ew;
1364 
1365 	/* read from source into RGB format in convbuf */
1366 	b = p->convreadcall(p, p->convbuf, y);
1367 
1368 	/* write RGB format into dst format in buf */
1369 	p->convwritecall(p->convdpar, buf, b);
1370 
1371 	if(p->convdx){
1372 		nb = p->convdpar->img->depth/8;
1373 		r = buf;
1374 		w = buf+nb*p->dx;
1375 		ew = buf+nb*p->convdx;
1376 		while(w<ew)
1377 			*w++ = *r++;
1378 	}
1379 
1380 	b.red = buf;
1381 	b.blu = b.grn = b.grey = b.alpha = nil;
1382 	b.delta = 0;
1383 
1384 	return b;
1385 }
1386 
1387 static Readfn*
1388 convfn(Memimage *dst, Param *dpar, Memimage *src, Param *spar)
1389 {
1390 	if(dst->chan == src->chan && !(src->flags&Frepl)){
1391 		return readptr;
1392 	}
1393 
1394 	if(dst->chan==CMAP8 && (src->chan==GREY1||src->chan==GREY2||src->chan==GREY4)){
1395 		/* cheat because we know the replicated value is exactly the color map entry. */
1396 		return readnbit;
1397 	}
1398 
1399 	spar->convreadcall = readfn(src);
1400 	spar->convwritecall = writefn(dst);
1401 	spar->convdpar = dpar;
1402 
1403 	/* allocate a conversion buffer */
1404 	spar->convbufoff = ndrawbuf;
1405 	ndrawbuf += spar->dx*4;
1406 
1407 	if(spar->dx > Dx(spar->img->r)){
1408 		spar->convdx = spar->dx;
1409 		spar->dx = Dx(spar->img->r);
1410 	}
1411 
1412 	return genconv;
1413 }
1414 
1415 ulong
1416 _pixelbits(Memimage *i, Point pt)
1417 {
1418 	uchar *p;
1419 	ulong val;
1420 	int off, bpp, npack;
1421 
1422 	val = 0;
1423 	p = byteaddr(i, pt);
1424 	switch(bpp=i->depth){
1425 	case 1:
1426 	case 2:
1427 	case 4:
1428 		npack = 8/bpp;
1429 		off = pt.x%npack;
1430 		val = p[0] >> bpp*(npack-1-off);
1431 		val &= (1<<bpp)-1;
1432 		break;
1433 	case 8:
1434 		val = p[0];
1435 		break;
1436 	case 16:
1437 		val = p[0]|(p[1]<<8);
1438 		break;
1439 	case 24:
1440 		val = p[0]|(p[1]<<8)|(p[2]<<16);
1441 		break;
1442 	case 32:
1443 		val = p[0]|(p[1]<<8)|(p[2]<<16)|(p[3]<<24);
1444 		break;
1445 	}
1446 	while(bpp<32){
1447 		val |= val<<bpp;
1448 		bpp *= 2;
1449 	}
1450 	return val;
1451 }
1452 
1453 static Calcfn*
1454 boolcopyfn(Memimage *img, Memimage *mask)
1455 {
1456 	if(mask->flags&Frepl && Dx(mask->r)==1 && Dy(mask->r)==1 && _pixelbits(mask, mask->r.min)==~0)
1457 		return boolmemmove;
1458 
1459 	switch(img->depth){
1460 	case 8:
1461 		return boolcopy8;
1462 	case 16:
1463 		return boolcopy16;
1464 	case 24:
1465 		return boolcopy24;
1466 	case 32:
1467 		return boolcopy32;
1468 	default:
1469 		assert(0 /* boolcopyfn */);
1470 	}
1471 	return nil;
1472 }
1473 
1474 /*
1475  * Optimized draw for filling and scrolling; uses memset and memmove.
1476  */
1477 static void
1478 memsetb(void *vp, uchar val, int n)
1479 {
1480 	uchar *p, *ep;
1481 
1482 	p = vp;
1483 	ep = p+n;
1484 	while(p<ep)
1485 		*p++ = val;
1486 }
1487 
1488 static void
1489 memsets(void *vp, ushort val, int n)
1490 {
1491 	ushort *p, *ep;
1492 
1493 	p = vp;
1494 	ep = p+n;
1495 	while(p<ep)
1496 		*p++ = val;
1497 }
1498 
1499 static void
1500 memsetl(void *vp, ulong val, int n)
1501 {
1502 	ulong *p, *ep;
1503 
1504 	p = vp;
1505 	ep = p+n;
1506 	while(p<ep)
1507 		*p++ = val;
1508 }
1509 
1510 void
1511 memset24(void *vp, ulong val, int n)
1512 {
1513 	uchar *p, *ep;
1514 	uchar a,b,c;
1515 
1516 	p = vp;
1517 	ep = p+3*n;
1518 	a = val;
1519 	b = val>>8;
1520 	c = val>>16;
1521 	while(p<ep){
1522 		*p++ = a;
1523 		*p++ = b;
1524 		*p++ = c;
1525 	}
1526 }
1527 
1528 ulong
1529 _imgtorgba(Memimage *img, ulong val)
1530 {
1531 	uchar r, g, b, a;
1532 	int nb, ov, v;
1533 	ulong chan;
1534 	uchar *p;
1535 
1536 	a = 0xFF;
1537 	r = g = b = 0xAA;	/* garbage */
1538 	for(chan=img->chan; chan; chan>>=8){
1539 		nb = NBITS(chan);
1540 		ov = v = val&((1<<nb)-1);
1541 		val >>= nb;
1542 
1543 		while(nb < 8){
1544 			v |= v<<nb;
1545 			nb *= 2;
1546 		}
1547 		v >>= (nb-8);
1548 
1549 		switch(TYPE(chan)){
1550 		case CRed:
1551 			r = v;
1552 			break;
1553 		case CGreen:
1554 			g = v;
1555 			break;
1556 		case CBlue:
1557 			b = v;
1558 			break;
1559 		case CAlpha:
1560 			a = v;
1561 			break;
1562 		case CGrey:
1563 			r = g = b = v;
1564 			break;
1565 		case CMap:
1566 			p = img->cmap->cmap2rgb+3*ov;
1567 			r = *p++;
1568 			g = *p++;
1569 			b = *p;
1570 			break;
1571 		}
1572 	}
1573 	return (r<<24)|(g<<16)|(b<<8)|a;
1574 }
1575 
1576 ulong
1577 _rgbatoimg(Memimage *img, ulong rgba)
1578 {
1579 	ulong chan;
1580 	int d, nb;
1581 	ulong v;
1582 	uchar *p, r, g, b, a, m;
1583 
1584 	v = 0;
1585 	r = rgba>>24;
1586 	g = rgba>>16;
1587 	b = rgba>>8;
1588 	a = rgba;
1589 	d = 0;
1590 	for(chan=img->chan; chan; chan>>=8){
1591 		nb = NBITS(chan);
1592 		switch(TYPE(chan)){
1593 		case CRed:
1594 			v |= (r>>(8-nb))<<d;
1595 			break;
1596 		case CGreen:
1597 			v |= (g>>(8-nb))<<d;
1598 			break;
1599 		case CBlue:
1600 			v |= (b>>(8-nb))<<d;
1601 			break;
1602 		case CAlpha:
1603 			v |= (a>>(8-nb))<<d;
1604 			break;
1605 		case CMap:
1606 			p = img->cmap->rgb2cmap;
1607 			m = p[(r>>4)*256+(g>>4)*16+(b>>4)];
1608 			v |= m<<d;
1609 			break;
1610 		case CGrey:
1611 			m = RGB2K(r,g,b);
1612 			v |= m<<d;
1613 			break;
1614 		}
1615 		d += nb;
1616 	}
1617 	return v;
1618 }
1619 
1620 static int
1621 memoptdraw(Memdrawparam *par)
1622 {
1623 	int y, dy, dx;
1624 	ulong v;
1625 	unsigned m;
1626 	Memimage *src;
1627 	Memimage *dst;
1628 
1629 	dx = Dx(par->r);
1630 	dy = Dy(par->r);
1631 	src = par->src;
1632 	dst = par->dst;
1633 
1634 	/*
1635 	 * If we have an opaque mask and source is one opaque pixel we can convert to the
1636 	 * destination format and just replicate with memset.
1637 	 */
1638 	m = Simplesrc|Simplemask|Fullmask;
1639 	if((par->state&m)==m){
1640 		uchar *dp, p[4];
1641 		int dwid, ppb, np, nb;
1642 		uchar lm, rm;
1643 
1644 		dwid = dst->width*sizeof(ulong);
1645 		dp = byteaddr(dst, par->r.min);
1646 		v = par->sdval;
1647 		switch(dst->depth){
1648 		case 1:
1649 		case 2:
1650 		case 4:
1651 			ppb = 8/dst->depth;	/* pixels per byte */
1652 			m = ppb-1;
1653 			/* left edge */
1654 			np = par->r.min.x&m;		/* no. pixels unused on left side of word */
1655 			dx -= (ppb-np);
1656 			nb = 8 - np * dst->depth;		/* no. bits used on right side of word */
1657 			lm = (1<<nb)-1;
1658 
1659 			/* right edge */
1660 			np = par->r.max.x&m;	/* no. pixels used on left side of word */
1661 			dx -= np;
1662 			nb = 8 - np * dst->depth;		/* no. bits unused on right side of word */
1663 			rm = ~((1<<nb)-1);
1664 
1665 			/* lm, rm are masks that are 1 where we should touch the bits */
1666 			if(dx < 0){	/* just one byte */
1667 				lm &= rm;
1668 				for(y=0; y<dy; y++, dp+=dwid)
1669 					*dp ^= (v ^ *dp) & lm;
1670 			}else if(dx == 0){	/* no full bytes */
1671 				if(lm)
1672 					dwid--;
1673 
1674 				for(y=0; y<dy; y++, dp+=dwid){
1675 					if(lm){
1676 						*dp ^= (v ^ *dp) & lm;
1677 						dp++;
1678 					}
1679 					*dp ^= (v ^ *dp) & rm;
1680 				}
1681 			}else{		/* full bytes in middle */
1682 				dx /= ppb;
1683 				if(lm)
1684 					dwid--;
1685 				dwid -= dx;
1686 
1687 				for(y=0; y<dy; y++, dp+=dwid){
1688 					if(lm){
1689 						*dp ^= (v ^ *dp) & lm;
1690 						dp++;
1691 					}
1692 					memset(dp, v, dx);
1693 					dp += dx;
1694 					*dp ^= (v ^ *dp) & rm;
1695 				}
1696 			}
1697 			return 1;
1698 		case 8:
1699 			for(y=0; y<dy; y++, dp+=dwid)
1700 				memset(dp, v, dx);
1701 			return 1;
1702 		case 16:
1703 			p[0] = v;		/* make little endian */
1704 			p[1] = v>>8;
1705 			v = *(ushort*)p;
1706 			for(y=0; y<dy; y++, dp+=dwid)
1707 				memsets(dp, v, dx);
1708 			return 1;
1709 		case 24:
1710 			for(y=0; y<dy; y++, dp+=dwid)
1711 				memset24(dp, v, dx);
1712 			return 1;
1713 		case 32:
1714 			p[0] = v;		/* make little endian */
1715 			p[1] = v>>8;
1716 			p[2] = v>>16;
1717 			p[3] = v>>24;
1718 			v = *(ulong*)p;
1719 			for(y=0; y<dy; y++, dp+=dwid)
1720 				memsetl(dp, v, dx);
1721 			return 1;
1722 		default:
1723 			assert(0 /* bad dest depth in memoptdraw */);
1724 		}
1725 	}
1726 
1727 	/*
1728 	 * If no source alpha, an opaque mask, we can just copy the
1729 	 * source onto the destination.  If the channels are the same and
1730 	 * the source is not replicated, memmove suffices.
1731 	 */
1732 	m = Simplemask|Fullmask;
1733 	if((par->state&(m|Replsrc))==m && src->depth >= 8
1734 	&& src->chan == dst->chan && !(src->flags&Falpha)){
1735 		uchar *sp, *dp;
1736 		long swid, dwid, nb;
1737 		int dir;
1738 
1739 		if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min))
1740 			dir = -1;
1741 		else
1742 			dir = 1;
1743 
1744 		swid = src->width*sizeof(ulong);
1745 		dwid = dst->width*sizeof(ulong);
1746 		sp = byteaddr(src, par->sr.min);
1747 		dp = byteaddr(dst, par->r.min);
1748 		if(dir == -1){
1749 			sp += (dy-1)*swid;
1750 			dp += (dy-1)*dwid;
1751 			swid = -swid;
1752 			dwid = -dwid;
1753 		}
1754 		nb = (dx*src->depth)/8;
1755 		for(y=0; y<dy; y++, sp+=swid, dp+=dwid)
1756 			memmove(dp, sp, nb);
1757 		return 1;
1758 	}
1759 
1760 	/*
1761 	 * If we have a 1-bit mask, 1-bit source, and 1-bit destination, and
1762 	 * they're all bit aligned, we can just use bit operators.  This happens
1763 	 * when we're manipulating boolean masks, e.g. in the arc code.
1764 	 */
1765 	if((par->state&(Simplemask|Simplesrc|Replmask|Replsrc))==0
1766 	&& dst->chan==GREY1 && src->chan==GREY1 && par->mask->chan==GREY1
1767 	&& (par->r.min.x&7)==(par->sr.min.x&7) && (par->r.min.x&7)==(par->mr.min.x&7)){
1768 		uchar *sp, *dp, *mp;
1769 		uchar lm, rm;
1770 		long swid, dwid, mwid;
1771 		int i, x, dir;
1772 
1773 		sp = byteaddr(src, par->sr.min);
1774 		dp = byteaddr(dst, par->r.min);
1775 		mp = byteaddr(par->mask, par->mr.min);
1776 		swid = src->width*sizeof(ulong);
1777 		dwid = dst->width*sizeof(ulong);
1778 		mwid = par->mask->width*sizeof(ulong);
1779 
1780 		if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min)){
1781 			dir = -1;
1782 		}else
1783 			dir = 1;
1784 
1785 		lm = 0xFF>>(par->r.min.x&7);
1786 		rm = 0xFF<<(8-(par->r.max.x&7));
1787 		dx -= (8-(par->r.min.x&7)) + (par->r.max.x&7);
1788 
1789 		if(dx < 0){	/* one byte wide */
1790 			lm &= rm;
1791 			if(dir == -1){
1792 				dp += dwid*(dy-1);
1793 				sp += swid*(dy-1);
1794 				mp += mwid*(dy-1);
1795 				dwid = -dwid;
1796 				swid = -swid;
1797 				mwid = -mwid;
1798 			}
1799 			for(y=0; y<dy; y++){
1800 				*dp ^= (*dp ^ *sp) & *mp & lm;
1801 				dp += dwid;
1802 				sp += swid;
1803 				mp += mwid;
1804 			}
1805 			return 1;
1806 		}
1807 
1808 		dx /= 8;
1809 		if(dir == 1){
1810 			i = (lm!=0)+dx+(rm!=0);
1811 			mwid -= i;
1812 			swid -= i;
1813 			dwid -= i;
1814 			for(y=0; y<dy; y++, dp+=dwid, sp+=swid, mp+=mwid){
1815 				if(lm){
1816 					*dp ^= (*dp ^ *sp++) & *mp++ & lm;
1817 					dp++;
1818 				}
1819 				for(x=0; x<dx; x++){
1820 					*dp ^= (*dp ^ *sp++) & *mp++;
1821 					dp++;
1822 				}
1823 				if(rm){
1824 					*dp ^= (*dp ^ *sp++) & *mp++ & rm;
1825 					dp++;
1826 				}
1827 			}
1828 			return 1;
1829 		}else{
1830 		/* dir == -1 */
1831 			i = (lm!=0)+dx+(rm!=0);
1832 			dp += dwid*(dy-1)+i-1;
1833 			sp += swid*(dy-1)+i-1;
1834 			mp += mwid*(dy-1)+i-1;
1835 			dwid = -dwid+i;
1836 			swid = -swid+i;
1837 			mwid = -mwid+i;
1838 			for(y=0; y<dy; y++, dp+=dwid, sp+=swid, mp+=mwid){
1839 				if(rm){
1840 					*dp ^= (*dp ^ *sp--) & *mp-- & rm;
1841 					dp--;
1842 				}
1843 				for(x=0; x<dx; x++){
1844 					*dp ^= (*dp ^ *sp--) & *mp--;
1845 					dp--;
1846 				}
1847 				if(lm){
1848 					*dp ^= (*dp ^ *sp--) & *mp-- & lm;
1849 					dp--;
1850 				}
1851 			}
1852 		}
1853 		return 1;
1854 	}
1855 	return 0;
1856 }
1857 
1858 /*
1859  * Boolean character drawing.
1860  * Solid opaque color through a 1-bit greyscale mask.
1861  */
1862 #define DBG if(0)
1863 static int
1864 chardraw(Memdrawparam *par)
1865 {
1866 	ulong bits;
1867 	int i, ddepth, dy, dx, x, bx, ex, y, npack, bsh, depth;
1868 	ulong v, maskwid, dstwid;
1869 	uchar *wp, *rp, *q, *wc;
1870 	ushort *ws;
1871 	ulong *wl;
1872 	uchar sp[4];
1873 	Rectangle r, mr;
1874 	Memimage *mask, *src, *dst;
1875 
1876 if(0) if(drawdebug) iprint("chardraw? mf %lux md %d sf %lux dxs %d dys %d dd %d ddat %p sdat %p\n",
1877 		par->mask->flags, par->mask->depth, par->src->flags,
1878 		Dx(par->src->r), Dy(par->src->r), par->dst->depth, par->dst->data, par->src->data);
1879 
1880 	mask = par->mask;
1881 	src = par->src;
1882 	dst = par->dst;
1883 	r = par->r;
1884 	mr = par->mr;
1885 
1886 	if((par->state&(Replsrc|Simplesrc|Replmask)) != (Replsrc|Simplesrc)
1887 	|| mask->depth != 1 || src->flags&Falpha || dst->depth<8 || dst->data==src->data)
1888 		return 0;
1889 
1890 	depth = mask->depth;
1891 	maskwid = mask->width*sizeof(ulong);
1892 	rp = byteaddr(mask, mr.min);
1893 	npack = 8/depth;
1894 	bsh = (mr.min.x % npack) * depth;
1895 
1896 	wp = byteaddr(dst, r.min);
1897 	dstwid = dst->width*sizeof(ulong);
1898 DBG print("bsh %d\n", bsh);
1899 	dy = Dy(r);
1900 	dx = Dx(r);
1901 
1902 	ddepth = dst->depth;
1903 
1904 	/*
1905 	 * for loop counts from bsh to bsh+dx
1906 	 *
1907 	 * we want the bottom bits to be the amount
1908 	 * to shift the pixels down, so for n≡0 (mod 8) we want
1909 	 * bottom bits 7.  for n≡1, 6, etc.
1910 	 * the bits come from -n-1.
1911 	 */
1912 
1913 	bx = -bsh-1;
1914 	ex = -bsh-1-dx;
1915 	SET(bits);
1916 	v = par->sdval;
1917 
1918 	/* make little endian */
1919 	sp[0] = v;
1920 	sp[1] = v>>8;
1921 	sp[2] = v>>16;
1922 	sp[3] = v>>24;
1923 
1924 	for(y=0; y<dy; y++, rp+=maskwid, wp+=dstwid){
1925 		q = rp;
1926 		if(bsh)
1927 			bits = *q++;
1928 		switch(ddepth){
1929 		case 8:
1930 			wc = wp;
1931 			for(x=bx; x>ex; x--, wc++){
1932 				i = x&7;
1933 				if(i == 8-1)
1934 					bits = *q++;
1935 DBG print("bits %lux sh %d...", bits, i);
1936 				if((bits>>i)&1)
1937 					*wc = v;
1938 			}
1939 			break;
1940 		case 16:
1941 			ws = (ushort*)wp;
1942 			v = *(ushort*)sp;
1943 			for(x=bx; x>ex; x--, ws++){
1944 				i = x&7;
1945 				if(i == 8-1)
1946 					bits = *q++;
1947 DBG print("bits %lux sh %d...", bits, i);
1948 				if((bits>>i)&1)
1949 					*ws = v;
1950 			}
1951 			break;
1952 		case 24:
1953 			wc = wp;
1954 			for(x=bx; x>ex; x--, wc+=3){
1955 				i = x&7;
1956 				if(i == 8-1)
1957 					bits = *q++;
1958 DBG print("bits %lux sh %d...", bits, i);
1959 				if((bits>>i)&1){
1960 					wc[0] = sp[0];
1961 					wc[1] = sp[1];
1962 					wc[2] = sp[2];
1963 				}
1964 			}
1965 			break;
1966 		case 32:
1967 			wl = (ulong*)wp;
1968 			v = *(ulong*)sp;
1969 			for(x=bx; x>ex; x--, wl++){
1970 				i = x&7;
1971 				if(i == 8-1)
1972 					bits = *q++;
1973 DBG iprint("bits %lux sh %d...", bits, i);
1974 				if((bits>>i)&1)
1975 					*wl = v;
1976 			}
1977 			break;
1978 		}
1979 	}
1980 
1981 DBG print("\n");
1982 	return 1;
1983 }
1984 #undef DBG
1985 
1986