xref: /inferno-os/libmemdraw/draw.c (revision adad243147f6102c6e975f48951c05745d56b92d)
1 #include "lib9.h"
2 #include "draw.h"
3 #include "memdraw.h"
4 #include "pool.h"
5 
6 extern Pool* imagmem;
7 int drawdebug;
8 static int	tablesbuilt;
9 
10 /* perfect approximation to NTSC = .299r+.587g+.114b when 0 ≤ r,g,b < 256 */
11 #define RGB2K(r,g,b)	((156763*(r)+307758*(g)+59769*(b))>>19)
12 
13 /*
14  * for 0 ≤ x ≤ 255*255, (x*0x0101+0x100)>>16 is a perfect approximation.
15  * for 0 ≤ x < (1<<16), x/255 = ((x+1)*0x0101)>>16 is a perfect approximation.
16  * the last one is perfect for all up to 1<<16, avoids a multiply, but requires a rathole.
17  */
18 /* #define DIV255(x) (((x)*257+256)>>16)  */
19 #define DIV255(x) ((((x)+1)*257)>>16)
20 /* #define DIV255(x) (tmp=(x)+1, (tmp+(tmp>>8))>>8) */
21 
22 #define MUL(x, y, t)	(t = (x)*(y)+128, (t+(t>>8))>>8)
23 #define MASK13	0xFF00FF00
24 #define MASK02	0x00FF00FF
25 #define MUL13(a, x, t)		(t = (a)*(((x)&MASK13)>>8)+0x800080, ((t+((t>>8)&MASK02))>>8)&MASK02)
26 #define MUL02(a, x, t)		(t = (a)*(((x)&MASK02)>>0)+0x800080, ((t+((t>>8)&MASK02))>>8)&MASK02)
27 #define MUL0123(a, x, s, t)	((MUL13(a, x, s)<<8)|MUL02(a, x, t))
28 
29 #define MUL2(u, v, x, y)	(t = (u)*(v)+(x)*(y)+256, (t+(t>>8))>>8)
30 
31 #define BWADD(x, y)	((((x)&MASK13)+((y)&MASK13))&MASK13|(((x)&MASK02)+((y)&MASK02))&MASK02)
32 
33 static void mktables(void);
34 typedef int Subdraw(Memdrawparam*);
35 static Subdraw chardraw, alphadraw, memoptdraw;
36 
37 static Memimage*	memones;
38 static Memimage*	memzeros;
39 Memimage *memwhite;
40 Memimage *memblack;
41 Memimage *memtransparent;
42 Memimage *memopaque;
43 
44 int	_ifmt(Fmt*);
45 
46 void
memimageinit(void)47 memimageinit(void)
48 {
49 	static int didinit = 0;
50 	char *n;
51 
52 	if(didinit)
53 		return;
54 
55 	didinit = 1;
56 
57 	n = poolname(imagmem);
58 	if(strcmp(n, "Image") == 0 || strcmp(n, "image") == 0)
59 		poolsetcompact(imagmem, memimagemove);
60 	mktables();
61 	_memmkcmap();
62 
63 	fmtinstall('R', Rfmt);
64 	fmtinstall('P', Pfmt);
65 	fmtinstall('b', _ifmt);
66 
67 	memones = allocmemimage(Rect(0,0,1,1), GREY1);
68 	memones->flags |= Frepl;
69 	memones->clipr = Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF);
70 	*byteaddr(memones, ZP) = ~0;
71 
72 	memzeros = allocmemimage(Rect(0,0,1,1), GREY1);
73 	memzeros->flags |= Frepl;
74 	memzeros->clipr = Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF);
75 	*byteaddr(memzeros, ZP) = 0;
76 
77 	if(memones == nil || memzeros == nil)
78 		assert(0 /*cannot initialize memimage library */);	/* RSC BUG */
79 
80 	memwhite = memones;
81 	memblack = memzeros;
82 	memopaque = memones;
83 	memtransparent = memzeros;
84 }
85 
86 static ulong imgtorgba(Memimage*, ulong);
87 static ulong rgbatoimg(Memimage*, ulong);
88 static ulong pixelbits(Memimage*, Point);
89 
90 #define DBG if(0)
91 void
memimagedraw(Memimage * dst,Rectangle r,Memimage * src,Point p0,Memimage * mask,Point p1,int op)92 memimagedraw(Memimage *dst, Rectangle r, Memimage *src, Point p0, Memimage *mask, Point p1, int op)
93 {
94 	Memdrawparam par;
95 
96 	if(mask == nil)
97 		mask = memopaque;
98 
99 DBG	print("memimagedraw %p/%luX %R @ %p %p/%luX %P %p/%luX %P... ", dst, dst->chan, r, dst->data->bdata, src, src->chan, p0, mask, mask->chan, p1);
100 
101 	if(drawclip(dst, &r, src, &p0, mask, &p1, &par.sr, &par.mr) == 0){
102 //		if(drawdebug)
103 //			iprint("empty clipped rectangle\n");
104 		return;
105 	}
106 
107 	if(op < Clear || op > SoverD){
108 //		if(drawdebug)
109 //			iprint("op out of range: %d\n", op);
110 		return;
111 	}
112 
113 	par.op = op;
114 	par.dst = dst;
115 	par.r = r;
116 	par.src = src;
117 	/* par.sr set by drawclip */
118 	par.mask = mask;
119 	/* par.mr set by drawclip */
120 
121 	par.state = 0;
122 	if(src->flags&Frepl){
123 		par.state |= Replsrc;
124 		if(Dx(src->r)==1 && Dy(src->r)==1){
125 			par.sval = pixelbits(src, src->r.min);
126 			par.state |= Simplesrc;
127 			par.srgba = imgtorgba(src, par.sval);
128 			par.sdval = rgbatoimg(dst, par.srgba);
129 			if((par.srgba&0xFF) == 0 && (op&DoutS)){
130 //				if (drawdebug) iprint("fill with transparent source\n");
131 				return;	/* no-op successfully handled */
132 			}
133 		}
134 	}
135 
136 	if(mask->flags & Frepl){
137 		par.state |= Replmask;
138 		if(Dx(mask->r)==1 && Dy(mask->r)==1){
139 			par.mval = pixelbits(mask, mask->r.min);
140 			if(par.mval == 0 && (op&DoutS)){
141 //				if(drawdebug) iprint("fill with zero mask\n");
142 				return;	/* no-op successfully handled */
143 			}
144 			par.state |= Simplemask;
145 			if(par.mval == ~0)
146 				par.state |= Fullmask;
147 			par.mrgba = imgtorgba(mask, par.mval);
148 		}
149 	}
150 
151 //	if(drawdebug)
152 //		iprint("dr %R sr %R mr %R...", r, par.sr, par.mr);
153 DBG print("draw dr %R sr %R mr %R %lux\n", r, par.sr, par.mr, par.state);
154 
155 	/*
156 	 * Now that we've clipped the parameters down to be consistent, we
157 	 * simply try sub-drawing routines in order until we find one that was able
158 	 * to handle us.  If the sub-drawing routine returns zero, it means it was
159 	 * unable to satisfy the request, so we do not return.
160 	 */
161 
162 	/*
163 	 * Hardware support.  Each video driver provides this function,
164 	 * which checks to see if there is anything it can help with.
165 	 * There could be an if around this checking to see if dst is in video memory.
166 	 */
167 DBG print("test hwdraw\n");
168 	if(hwdraw(&par)){
169 //if(drawdebug) iprint("hw handled\n");
170 DBG print("hwdraw handled\n");
171 		return;
172 	}
173 	/*
174 	 * Optimizations using memmove and memset.
175 	 */
176 DBG print("test memoptdraw\n");
177 	if(memoptdraw(&par)){
178 //if(drawdebug) iprint("memopt handled\n");
179 DBG print("memopt handled\n");
180 		return;
181 	}
182 
183 	/*
184 	 * Character drawing.
185 	 * Solid source color being painted through a boolean mask onto a high res image.
186 	 */
187 DBG print("test chardraw\n");
188 	if(chardraw(&par)){
189 //if(drawdebug) iprint("chardraw handled\n");
190 DBG print("chardraw handled\n");
191 		return;
192 	}
193 
194 	/*
195 	 * General calculation-laden case that does alpha for each pixel.
196 	 */
197 DBG print("do alphadraw\n");
198 	alphadraw(&par);
199 //if(drawdebug) iprint("alphadraw handled\n");
200 DBG print("alphadraw handled\n");
201 }
202 #undef DBG
203 
204 /*
205  * Clip the destination rectangle further based on the properties of the
206  * source and mask rectangles.  Once the destination rectangle is properly
207  * clipped, adjust the source and mask rectangles to be the same size.
208  * Then if source or mask is replicated, move its clipped rectangle
209  * so that its minimum point falls within the repl rectangle.
210  *
211  * Return zero if the final rectangle is null.
212  */
213 int
drawclip(Memimage * dst,Rectangle * r,Memimage * src,Point * p0,Memimage * mask,Point * p1,Rectangle * sr,Rectangle * mr)214 drawclip(Memimage *dst, Rectangle *r, Memimage *src, Point *p0, Memimage *mask, Point *p1, Rectangle *sr, Rectangle *mr)
215 {
216 	Point rmin, delta;
217 	int splitcoords;
218 	Rectangle omr;
219 
220 	if(r->min.x>=r->max.x || r->min.y>=r->max.y)
221 		return 0;
222 	splitcoords = (p0->x!=p1->x) || (p0->y!=p1->y);
223 	/* clip to destination */
224 	rmin = r->min;
225 	if(!rectclip(r, dst->r) || !rectclip(r, dst->clipr))
226 		return 0;
227 	/* move mask point */
228 	p1->x += r->min.x-rmin.x;
229 	p1->y += r->min.y-rmin.y;
230 	/* move source point */
231 	p0->x += r->min.x-rmin.x;
232 	p0->y += r->min.y-rmin.y;
233 	/* map destination rectangle into source */
234 	sr->min = *p0;
235 	sr->max.x = p0->x+Dx(*r);
236 	sr->max.y = p0->y+Dy(*r);
237 	/* sr is r in source coordinates; clip to source */
238 	if(!(src->flags&Frepl) && !rectclip(sr, src->r))
239 		return 0;
240 	if(!rectclip(sr, src->clipr))
241 		return 0;
242 	/* compute and clip rectangle in mask */
243 	if(splitcoords){
244 		/* move mask point with source */
245 		p1->x += sr->min.x-p0->x;
246 		p1->y += sr->min.y-p0->y;
247 		mr->min = *p1;
248 		mr->max.x = p1->x+Dx(*sr);
249 		mr->max.y = p1->y+Dy(*sr);
250 		omr = *mr;
251 		/* mr is now rectangle in mask; clip it */
252 		if(!(mask->flags&Frepl) && !rectclip(mr, mask->r))
253 			return 0;
254 		if(!rectclip(mr, mask->clipr))
255 			return 0;
256 		/* reflect any clips back to source */
257 		sr->min.x += mr->min.x-omr.min.x;
258 		sr->min.y += mr->min.y-omr.min.y;
259 		sr->max.x += mr->max.x-omr.max.x;
260 		sr->max.y += mr->max.y-omr.max.y;
261 		*p1 = mr->min;
262 	}else{
263 		if(!(mask->flags&Frepl) && !rectclip(sr, mask->r))
264 			return 0;
265 		if(!rectclip(sr, mask->clipr))
266 			return 0;
267 		*p1 = sr->min;
268 	}
269 
270 	/* move source clipping back to destination */
271 	delta.x = r->min.x - p0->x;
272 	delta.y = r->min.y - p0->y;
273 	r->min.x = sr->min.x + delta.x;
274 	r->min.y = sr->min.y + delta.y;
275 	r->max.x = sr->max.x + delta.x;
276 	r->max.y = sr->max.y + delta.y;
277 
278 	/* move source rectangle so sr->min is in src->r */
279 	if(src->flags&Frepl) {
280 		delta.x = drawreplxy(src->r.min.x, src->r.max.x, sr->min.x) - sr->min.x;
281 		delta.y = drawreplxy(src->r.min.y, src->r.max.y, sr->min.y) - sr->min.y;
282 		sr->min.x += delta.x;
283 		sr->min.y += delta.y;
284 		sr->max.x += delta.x;
285 		sr->max.y += delta.y;
286 	}
287 	*p0 = sr->min;
288 
289 	/* move mask point so it is in mask->r */
290 	*p1 = drawrepl(mask->r, *p1);
291 	mr->min = *p1;
292 	mr->max.x = p1->x+Dx(*sr);
293 	mr->max.y = p1->y+Dy(*sr);
294 
295 	assert(Dx(*sr) == Dx(*mr) && Dx(*mr) == Dx(*r));
296 	assert(Dy(*sr) == Dy(*mr) && Dy(*mr) == Dy(*r));
297 	assert(ptinrect(*p0, src->r));
298 	assert(ptinrect(*p1, mask->r));
299 	assert(ptinrect(r->min, dst->r));
300 
301 	return 1;
302 }
303 
304 /*
305  * Conversion tables.
306  */
307 static uchar replbit[1+8][256];		/* replbit[x][y] is the replication of the x-bit quantity y to 8-bit depth */
308 
309 /*
310  * bitmap of how to replicate n bits to fill 8, for 1 ≤ n ≤ 8.
311  * the X's are where to put the bottom (ones) bit of the n-bit pattern.
312  * only the top 8 bits of the result are actually used.
313  * (the lower 8 bits are needed to get bits in the right place
314  * when n is not a divisor of 8.)
315  *
316  * Should check to see if its easier to just refer to replmul than
317  * use the precomputed values in replbit.  On PCs it may well
318  * be; on machines with slow multiply instructions it probably isn't.
319  */
320 #define a ((((((((((((((((0
321 #define X *2+1)
322 #define _ *2)
323 static int replmul[1+8] = {
324 	0,
325 	a X X X X X X X X X X X X X X X X,
326 	a _ X _ X _ X _ X _ X _ X _ X _ X,
327 	a _ _ X _ _ X _ _ X _ _ X _ _ X _,
328 	a _ _ _ X _ _ _ X _ _ _ X _ _ _ X,
329 	a _ _ _ _ X _ _ _ _ X _ _ _ _ X _,
330 	a _ _ _ _ _ X _ _ _ _ _ X _ _ _ _,
331 	a _ _ _ _ _ _ X _ _ _ _ _ _ X _ _,
332 	a _ _ _ _ _ _ _ X _ _ _ _ _ _ _ X,
333 };
334 #undef a
335 #undef X
336 #undef _
337 
338 static void
mktables(void)339 mktables(void)
340 {
341 	int i, j, small;
342 
343 	if(tablesbuilt)
344 		return;
345 
346 	fmtinstall('R', Rfmt);
347 	fmtinstall('P', Pfmt);
348 	tablesbuilt = 1;
349 
350 	/* bit replication up to 8 bits */
351 	for(i=0; i<256; i++){
352 		for(j=0; j<=8; j++){	/* j <= 8 [sic] */
353 			small = i & ((1<<j)-1);
354 			replbit[j][i] = (small*replmul[j])>>8;
355 		}
356 	}
357 }
358 
359 static uchar ones = 0xff;
360 
361 /*
362  * General alpha drawing case.  Can handle anything.
363  */
364 typedef struct	Buffer	Buffer;
365 struct Buffer {
366 	/* used by most routines */
367 	uchar	*red;
368 	uchar	*grn;
369 	uchar	*blu;
370 	uchar	*alpha;
371 	uchar	*grey;
372 	ulong	*rgba;
373 	int	delta;	/* number of bytes to add to pointer to get next pixel to the right */
374 
375 	/* used by boolcalc* for mask data */
376 	uchar	*m;		/* ptr to mask data r.min byte; like p->bytermin */
377 	int		mskip;	/* no. of left bits to skip in *m */
378 	uchar	*bm;		/* ptr to mask data img->r.min byte; like p->bytey0s */
379 	int		bmskip;	/* no. of left bits to skip in *bm */
380 	uchar	*em;		/* ptr to mask data img->r.max.x byte; like p->bytey0e */
381 	int		emskip;	/* no. of right bits to skip in *em */
382 };
383 
384 typedef struct	Param	Param;
385 typedef Buffer	Readfn(Param*, uchar*, int);
386 typedef void	Writefn(Param*, uchar*, Buffer);
387 typedef Buffer	Calcfn(Buffer, Buffer, Buffer, int, int, int);
388 
389 enum {
390 	MAXBCACHE = 16
391 };
392 
393 /* giant rathole to customize functions with */
394 struct Param {
395 	Readfn	*replcall;
396 	Readfn	*greymaskcall;
397 	Readfn	*convreadcall;
398 	Writefn	*convwritecall;
399 
400 	Memimage *img;
401 	Rectangle	r;
402 	int	dx;	/* of r */
403 	int	needbuf;
404 	int	convgrey;
405 	int	alphaonly;
406 
407 	uchar	*bytey0s;		/* byteaddr(Pt(img->r.min.x, img->r.min.y)) */
408 	uchar	*bytermin;	/* byteaddr(Pt(r.min.x, img->r.min.y)) */
409 	uchar	*bytey0e;		/* byteaddr(Pt(img->r.max.x, img->r.min.y)) */
410 	int		bwidth;
411 
412 	int	replcache;	/* if set, cache buffers */
413 	Buffer	bcache[MAXBCACHE];
414 	ulong	bfilled;
415 	uchar	*bufbase;
416 	int	bufoff;
417 	int	bufdelta;
418 
419 	int	dir;
420 
421 	int	convbufoff;
422 	uchar	*convbuf;
423 	Param	*convdpar;
424 	int	convdx;
425 };
426 
427 static Readfn	greymaskread, replread, readptr;
428 static Writefn	nullwrite;
429 static Calcfn	alphacalc0, alphacalc14, alphacalc2810, alphacalc3679, alphacalc5, alphacalc11, alphacalcS;
430 static Calcfn	boolcalc14, boolcalc236789, boolcalc1011;
431 
432 static Readfn*	readfn(Memimage*);
433 static Readfn*	readalphafn(Memimage*);
434 static Writefn*	writefn(Memimage*);
435 
436 static Calcfn*	boolcopyfn(Memimage*, Memimage*);
437 static Readfn*	convfn(Memimage*, Param*, Memimage*, Param*, int*);
438 static Readfn*	ptrfn(Memimage*);
439 
440 static Calcfn *alphacalc[Ncomp] =
441 {
442 	alphacalc0,		/* Clear */
443 	alphacalc14,		/* DoutS */
444 	alphacalc2810,		/* SoutD */
445 	alphacalc3679,		/* DxorS */
446 	alphacalc14,		/* DinS */
447 	alphacalc5,		/* D */
448 	alphacalc3679,		/* DatopS */
449 	alphacalc3679,		/* DoverS */
450 	alphacalc2810,		/* SinD */
451 	alphacalc3679,		/* SatopD */
452 	alphacalc2810,		/* S */
453 	alphacalc11,		/* SoverD */
454 };
455 
456 static Calcfn *boolcalc[Ncomp] =
457 {
458 	alphacalc0,		/* Clear */
459 	boolcalc14,		/* DoutS */
460 	boolcalc236789,		/* SoutD */
461 	boolcalc236789,		/* DxorS */
462 	boolcalc14,		/* DinS */
463 	alphacalc5,		/* D */
464 	boolcalc236789,		/* DatopS */
465 	boolcalc236789,		/* DoverS */
466 	boolcalc236789,		/* SinD */
467 	boolcalc236789,		/* SatopD */
468 	boolcalc1011,		/* S */
469 	boolcalc1011,		/* SoverD */
470 };
471 
472 /*
473  * Avoid standard Lock, QLock so that can be used in kernel.
474  */
475 typedef struct Dbuf Dbuf;
476 struct Dbuf
477 {
478 	uchar *p;
479 	int n;
480 	Param spar, mpar, dpar;
481 	int inuse;
482 };
483 static Dbuf dbuf[10];
484 
485 static Dbuf*
allocdbuf(void)486 allocdbuf(void)
487 {
488 	int i;
489 
490 	for(i=0; i<nelem(dbuf); i++){
491 		if(dbuf[i].inuse)
492 			continue;
493 		if(!_tas(&dbuf[i].inuse))
494 			return &dbuf[i];
495 	}
496 	return nil;
497 }
498 
499 static void
getparam(Param * p,Memimage * img,Rectangle r,int convgrey,int needbuf,int * ndrawbuf)500 getparam(Param *p, Memimage *img, Rectangle r, int convgrey, int needbuf, int *ndrawbuf)
501 {
502 	int nbuf;
503 
504 	memset(p, 0, sizeof *p);
505 
506 	p->img = img;
507 	p->r = r;
508 	p->dx = Dx(r);
509 	p->needbuf = needbuf;
510 	p->convgrey = convgrey;
511 
512 	assert(img->r.min.x <= r.min.x && r.min.x < img->r.max.x);
513 
514 	p->bytey0s = byteaddr(img, Pt(img->r.min.x, img->r.min.y));
515 	p->bytermin = byteaddr(img, Pt(r.min.x, img->r.min.y));
516 	p->bytey0e = byteaddr(img, Pt(img->r.max.x, img->r.min.y));
517 	p->bwidth = sizeof(ulong)*img->width;
518 
519 	assert(p->bytey0s <= p->bytermin && p->bytermin <= p->bytey0e);
520 
521 	if(p->r.min.x == p->img->r.min.x)
522 		assert(p->bytermin == p->bytey0s);
523 
524 	nbuf = 1;
525 	if((img->flags&Frepl) && Dy(img->r) <= MAXBCACHE && Dy(img->r) < Dy(r)){
526 		p->replcache = 1;
527 		nbuf = Dy(img->r);
528 	}
529 	p->bufdelta = 4*p->dx;
530 	p->bufoff = *ndrawbuf;
531 	*ndrawbuf += p->bufdelta*nbuf;
532 }
533 
534 static void
clipy(Memimage * img,int * y)535 clipy(Memimage *img, int *y)
536 {
537 	int dy;
538 
539 	dy = Dy(img->r);
540 	if(*y == dy)
541 		*y = 0;
542 	else if(*y == -1)
543 		*y = dy-1;
544 	assert(0 <= *y && *y < dy);
545 }
546 
547 static void
dumpbuf(char * s,Buffer b,int n)548 dumpbuf(char *s, Buffer b, int n)
549 {
550 	int i;
551 	uchar *p;
552 
553 	print("%s", s);
554 	for(i=0; i<n; i++){
555 		print(" ");
556 		if(p=b.grey){
557 			print(" k%.2uX", *p);
558 			b.grey += b.delta;
559 		}else{
560 			if(p=b.red){
561 				print(" r%.2uX", *p);
562 				b.red += b.delta;
563 			}
564 			if(p=b.grn){
565 				print(" g%.2uX", *p);
566 				b.grn += b.delta;
567 			}
568 			if(p=b.blu){
569 				print(" b%.2uX", *p);
570 				b.blu += b.delta;
571 			}
572 		}
573 		if((p=b.alpha) != &ones){
574 			print(" α%.2uX", *p);
575 			b.alpha += b.delta;
576 		}
577 	}
578 	print("\n");
579 }
580 
581 /*
582  * For each scan line, we expand the pixels from source, mask, and destination
583  * into byte-aligned red, green, blue, alpha, and grey channels.  If buffering is not
584  * needed and the channels were already byte-aligned (grey8, rgb24, rgba32, rgb32),
585  * the readers need not copy the data: they can simply return pointers to the data.
586  * If the destination image is grey and the source is not, it is converted using the NTSC
587  * formula.
588  *
589  * Once we have all the channels, we call either rgbcalc or greycalc, depending on
590  * whether the destination image is color.  This is allowed to overwrite the dst buffer (perhaps
591  * the actual data, perhaps a copy) with its result.  It should only overwrite the dst buffer
592  * with the same format (i.e. red bytes with red bytes, etc.)  A new buffer is returned from
593  * the calculator, and that buffer is passed to a function to write it to the destination.
594  * If the buffer is already pointing at the destination, the writing function is a no-op.
595  */
596 #define DBG if(0)
597 static int
alphadraw(Memdrawparam * par)598 alphadraw(Memdrawparam *par)
599 {
600 	int isgrey, starty, endy, op;
601 	int needbuf, dsty, srcy, masky;
602 	int y, dir, dx, dy, ndrawbuf;
603 	uchar *drawbuf;
604 	Buffer bsrc, bdst, bmask;
605 	Readfn *rdsrc, *rdmask, *rddst;
606 	Calcfn *calc;
607 	Writefn *wrdst;
608 	Memimage *src, *mask, *dst;
609 	Rectangle r, sr, mr;
610 	Dbuf *z;
611 
612 	z = allocdbuf();
613 	if(z == nil)
614 		return 0;
615 
616 	r = par->r;
617 	dx = Dx(r);
618 	dy = Dy(r);
619 
620 	src = par->src;
621 	mask = par->mask;
622 	dst = par->dst;
623 	sr = par->sr;
624 	mr = par->mr;
625 	op = par->op;
626 
627 	isgrey = dst->flags&Fgrey;
628 
629 	/*
630 	 * Buffering when src and dst are the same bitmap is sufficient but not
631 	 * necessary.  There are stronger conditions we could use.  We could
632 	 * check to see if the rectangles intersect, and if simply moving in the
633 	 * correct y direction can avoid the need to buffer.
634 	 */
635 	needbuf = (src->data == dst->data);
636 
637 	ndrawbuf = 0;
638 	getparam(&z->spar, src, sr, isgrey, needbuf, &ndrawbuf);
639 	getparam(&z->dpar, dst, r, isgrey, needbuf, &ndrawbuf);
640 	getparam(&z->mpar, mask, mr, 0, needbuf, &ndrawbuf);
641 
642 	dir = (needbuf && byteaddr(dst, r.min) > byteaddr(src, sr.min)) ? -1 : 1;
643 	z->spar.dir = z->mpar.dir = z->dpar.dir = dir;
644 
645 	/*
646 	 * If the mask is purely boolean, we can convert from src to dst format
647 	 * when we read src, and then just copy it to dst where the mask tells us to.
648 	 * This requires a boolean (1-bit grey) mask and lack of a source alpha channel.
649 	 *
650 	 * The computation is accomplished by assigning the function pointers as follows:
651 	 *	rdsrc - read and convert source into dst format in a buffer
652 	 * 	rdmask - convert mask to bytes, set pointer to it
653 	 * 	rddst - fill with pointer to real dst data, but do no reads
654 	 *	calc - copy src onto dst when mask says to.
655 	 *	wrdst - do nothing
656 	 * This is slightly sleazy, since things aren't doing exactly what their names say,
657 	 * but it avoids a fair amount of code duplication to make this a case here
658 	 * rather than have a separate booldraw.
659 	 */
660 //if(drawdebug) iprint("flag %lud mchan %lux=?%x dd %d\n", src->flags&Falpha, mask->chan, GREY1, dst->depth);
661 	if(!(src->flags&Falpha) && mask->chan == GREY1 && dst->depth >= 8 && op == SoverD){
662 //if(drawdebug) iprint("boolcopy...");
663 		rdsrc = convfn(dst, &z->dpar, src, &z->spar, &ndrawbuf);
664 		rddst = readptr;
665 		rdmask = readfn(mask);
666 		calc = boolcopyfn(dst, mask);
667 		wrdst = nullwrite;
668 	}else{
669 		/* usual alphadraw parameter fetching */
670 		rdsrc = readfn(src);
671 		rddst = readfn(dst);
672 		wrdst = writefn(dst);
673 		calc = alphacalc[op];
674 
675 		/*
676 		 * If there is no alpha channel, we'll ask for a grey channel
677 		 * and pretend it is the alpha.
678 		 */
679 		if(mask->flags&Falpha){
680 			rdmask = readalphafn(mask);
681 			z->mpar.alphaonly = 1;
682 		}else{
683 			z->mpar.greymaskcall = readfn(mask);
684 			z->mpar.convgrey = 1;
685 			rdmask = greymaskread;
686 
687 			/*
688 			 * Should really be above, but then boolcopyfns would have
689 			 * to deal with bit alignment, and I haven't written that.
690 			 *
691 			 * This is a common case for things like ellipse drawing.
692 			 * When there's no alpha involved and the mask is boolean,
693 			 * we can avoid all the division and multiplication.
694 			 */
695 			if(mask->chan == GREY1 && !(src->flags&Falpha))
696 				calc = boolcalc[op];
697 			else if(op == SoverD && !(src->flags&Falpha))
698 				calc = alphacalcS;
699 		}
700 	}
701 
702 	/*
703 	 * If the image has a small enough repl rectangle,
704 	 * we can just read each line once and cache them.
705 	 */
706 	if(z->spar.replcache){
707 		z->spar.replcall = rdsrc;
708 		rdsrc = replread;
709 	}
710 	if(z->mpar.replcache){
711 		z->mpar.replcall = rdmask;
712 		rdmask = replread;
713 	}
714 
715 	if(z->n < ndrawbuf){
716 		free(z->p);
717 		if((z->p = mallocz(ndrawbuf, 0)) == nil){
718 			z->inuse = 0;
719 			return 0;
720 		}
721 		z->n = ndrawbuf;
722 	}
723 	drawbuf = z->p;
724 
725 	/*
726 	 * Before we were saving only offsets from drawbuf in the parameter
727 	 * structures; now that drawbuf has been grown to accomodate us,
728 	 * we can fill in the pointers.
729 	 */
730 	z->spar.bufbase = drawbuf+z->spar.bufoff;
731 	z->mpar.bufbase = drawbuf+z->mpar.bufoff;
732 	z->dpar.bufbase = drawbuf+z->dpar.bufoff;
733 	z->spar.convbuf = drawbuf+z->spar.convbufoff;
734 
735 	if(dir == 1){
736 		starty = 0;
737 		endy = dy;
738 	}else{
739 		starty = dy-1;
740 		endy = -1;
741 	}
742 
743 	/*
744 	 * srcy, masky, and dsty are offsets from the top of their
745 	 * respective Rectangles.  they need to be contained within
746 	 * the rectangles, so clipy can keep them there without division.
747  	 */
748 	srcy = (starty + sr.min.y - src->r.min.y)%Dy(src->r);
749 	masky = (starty + mr.min.y - mask->r.min.y)%Dy(mask->r);
750 	dsty = starty + r.min.y - dst->r.min.y;
751 
752 	assert(0 <= srcy && srcy < Dy(src->r));
753 	assert(0 <= masky && masky < Dy(mask->r));
754 	assert(0 <= dsty && dsty < Dy(dst->r));
755 
756 	for(y=starty; y!=endy; y+=dir, srcy+=dir, masky+=dir, dsty+=dir){
757 		clipy(src, &srcy);
758 		clipy(dst, &dsty);
759 		clipy(mask, &masky);
760 
761 		bsrc = rdsrc(&z->spar, z->spar.bufbase, srcy);
762 DBG print("[");
763 		bmask = rdmask(&z->mpar, z->mpar.bufbase, masky);
764 DBG print("]\n");
765 		bdst = rddst(&z->dpar, z->dpar.bufbase, dsty);
766 		if(op != Clear && (bsrc.delta != 4 || bdst.delta != 4 || src->chan != dst->chan))
767 			bdst.rgba = nil;
768 DBG		dumpbuf("src", bsrc, dx);
769 DBG		dumpbuf("mask", bmask, dx);
770 DBG		dumpbuf("dst", bdst, dx);
771 		bdst = calc(bdst, bsrc, bmask, dx, isgrey, op);
772 		wrdst(&z->dpar, z->dpar.bytermin+dsty*z->dpar.bwidth, bdst);
773 	}
774 
775 	z->inuse = 0;
776 	return 1;
777 }
778 #undef DBG
779 
780 static Buffer
alphacalc0(Buffer bdst,Buffer b1,Buffer b2,int dx,int grey,int op)781 alphacalc0(Buffer bdst, Buffer b1, Buffer b2, int dx, int grey, int op)
782 {
783 	USED(b1.grey);
784 	USED(b2.grey);
785 	USED(grey);
786 	USED(op);
787 	memset(bdst.rgba, 0, dx*bdst.delta);
788 	return bdst;
789 }
790 
791 static Buffer
alphacalc14(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int grey,int op)792 alphacalc14(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
793 {
794 	Buffer obdst;
795 	int fd, sadelta;
796 	int i, sa, ma;
797 	ulong s, t;
798 
799 	obdst = bdst;
800 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
801 
802 	for(i=0; i<dx; i++){
803 		sa = *bsrc.alpha;
804 		ma = *bmask.alpha;
805 		fd = MUL(sa, ma, t);
806 		if(op == DoutS)
807 			fd = 255-fd;
808 
809 		if(grey){
810 			*bdst.grey = MUL(fd, *bdst.grey, t);
811 			bsrc.grey += bsrc.delta;
812 			bdst.grey += bdst.delta;
813 		}else{
814 			if(bdst.rgba){
815 				*bdst.rgba = MUL0123(fd, *bdst.rgba, s, t);
816 				bsrc.rgba++;
817 				bdst.rgba++;
818 				bsrc.alpha += sadelta;
819 				bmask.alpha += bmask.delta;
820 				continue;
821 			}
822 			*bdst.red = MUL(fd, *bdst.red, t);
823 			*bdst.grn = MUL(fd, *bdst.grn, t);
824 			*bdst.blu = MUL(fd, *bdst.blu, t);
825 			bsrc.red += bsrc.delta;
826 			bsrc.blu += bsrc.delta;
827 			bsrc.grn += bsrc.delta;
828 			bdst.red += bdst.delta;
829 			bdst.blu += bdst.delta;
830 			bdst.grn += bdst.delta;
831 		}
832 		if(bdst.alpha != &ones){
833 			*bdst.alpha = MUL(fd, *bdst.alpha, t);
834 			bdst.alpha += bdst.delta;
835 		}
836 		bmask.alpha += bmask.delta;
837 		bsrc.alpha += sadelta;
838 	}
839 	return obdst;
840 }
841 
842 static Buffer
alphacalc2810(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int grey,int op)843 alphacalc2810(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
844 {
845 	Buffer obdst;
846 	int fs, sadelta;
847 	int i, ma, da;
848 	ulong s, t;
849 
850 	obdst = bdst;
851 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
852 
853 	for(i=0; i<dx; i++){
854 		ma = *bmask.alpha;
855 		da = *bdst.alpha;
856 		if(op == SoutD)
857 			da = 255-da;
858 		fs = ma;
859 		if(op != S)
860 			fs = MUL(fs, da, t);
861 
862 		if(grey){
863 			*bdst.grey = MUL(fs, *bsrc.grey, t);
864 			bsrc.grey += bsrc.delta;
865 			bdst.grey += bdst.delta;
866 		}else{
867 			if(bdst.rgba){
868 				*bdst.rgba = MUL0123(fs, *bsrc.rgba, s, t);
869 				bsrc.rgba++;
870 				bdst.rgba++;
871 				bmask.alpha += bmask.delta;
872 				bdst.alpha += bdst.delta;
873 				continue;
874 			}
875 			*bdst.red = MUL(fs, *bsrc.red, t);
876 			*bdst.grn = MUL(fs, *bsrc.grn, t);
877 			*bdst.blu = MUL(fs, *bsrc.blu, t);
878 			bsrc.red += bsrc.delta;
879 			bsrc.blu += bsrc.delta;
880 			bsrc.grn += bsrc.delta;
881 			bdst.red += bdst.delta;
882 			bdst.blu += bdst.delta;
883 			bdst.grn += bdst.delta;
884 		}
885 		if(bdst.alpha != &ones){
886 			*bdst.alpha = MUL(fs, *bsrc.alpha, t);
887 			bdst.alpha += bdst.delta;
888 		}
889 		bmask.alpha += bmask.delta;
890 		bsrc.alpha += sadelta;
891 	}
892 	return obdst;
893 }
894 
895 static Buffer
alphacalc3679(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int grey,int op)896 alphacalc3679(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
897 {
898 	Buffer obdst;
899 	int fs, fd, sadelta;
900 	int i, sa, ma, da;
901 	ulong s, t, q1, q2;
902 
903 	obdst = bdst;
904 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
905 
906 	for(i=0; i<dx; i++){
907 		sa = *bsrc.alpha;
908 		ma = *bmask.alpha;
909 		da = *bdst.alpha;
910 		if(op == SatopD)
911 			fs = MUL(ma, da, t);
912 		else
913 			fs = MUL(ma, 255-da, t);
914 		if(op == DoverS)
915 			fd = 255;
916 		else{
917 			fd = MUL(sa, ma, t);
918 			if(op != DatopS)
919 				fd = 255-fd;
920 		}
921 
922 		if(grey){
923 			*bdst.grey = MUL(fs, *bsrc.grey, s)+MUL(fd, *bdst.grey, t);
924 			bsrc.grey += bsrc.delta;
925 			bdst.grey += bdst.delta;
926 		}else{
927 			if(bdst.rgba){
928 				q1 = MUL0123(fs, *bsrc.rgba, s, t);
929 				q2 = MUL0123(fd, *bdst.rgba, s, t);
930 				*bdst.rgba = BWADD(q1, q2);
931 				bsrc.rgba++;
932 				bdst.rgba++;
933 				bsrc.alpha += sadelta;
934 				bmask.alpha += bmask.delta;
935 				bdst.alpha += bdst.delta;
936 				continue;
937 			}
938 			*bdst.red = MUL(fs, *bsrc.red, s)+MUL(fd, *bdst.red, t);
939 			*bdst.grn = MUL(fs, *bsrc.grn, s)+MUL(fd, *bdst.grn, t);
940 			*bdst.blu = MUL(fs, *bsrc.blu, s)+MUL(fd, *bdst.blu, t);
941 			bsrc.red += bsrc.delta;
942 			bsrc.blu += bsrc.delta;
943 			bsrc.grn += bsrc.delta;
944 			bdst.red += bdst.delta;
945 			bdst.blu += bdst.delta;
946 			bdst.grn += bdst.delta;
947 		}
948 		if(bdst.alpha != &ones){
949 			*bdst.alpha = MUL(fs, sa, s)+MUL(fd, da, t);
950 			bdst.alpha += bdst.delta;
951 		}
952 		bmask.alpha += bmask.delta;
953 		bsrc.alpha += sadelta;
954 	}
955 	return obdst;
956 }
957 
958 static Buffer
alphacalc5(Buffer bdst,Buffer b1,Buffer b2,int dx,int grey,int op)959 alphacalc5(Buffer bdst, Buffer b1, Buffer b2, int dx, int grey, int op)
960 {
961 	USED(b1.grey);
962 	USED(b2.grey);
963 	USED(dx);
964 	USED(grey);
965 	USED(op);
966 	return bdst;
967 }
968 
969 static Buffer
alphacalc11(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int grey,int op)970 alphacalc11(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
971 {
972 	Buffer obdst;
973 	int fd, sadelta;
974 	int i, sa, ma;
975 	ulong s, t, q1, q2;
976 
977 	USED(op);
978 	obdst = bdst;
979 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
980 
981 	for(i=0; i<dx; i++){
982 		sa = *bsrc.alpha;
983 		ma = *bmask.alpha;
984 		fd = 255-MUL(sa, ma, t);
985 
986 		if(grey){
987 			*bdst.grey = MUL(ma, *bsrc.grey, s)+MUL(fd, *bdst.grey, t);
988 			bsrc.grey += bsrc.delta;
989 			bdst.grey += bdst.delta;
990 		}else{
991 			if(bdst.rgba){
992 				q1 = MUL0123(ma, *bsrc.rgba, s, t);
993 				q2 = MUL0123(fd, *bdst.rgba, s, t);
994 				*bdst.rgba = BWADD(q1, q2);
995 				bsrc.rgba++;
996 				bdst.rgba++;
997 				bsrc.alpha += sadelta;
998 				bmask.alpha += bmask.delta;
999 				continue;
1000 			}
1001 			*bdst.red = MUL(ma, *bsrc.red, s)+MUL(fd, *bdst.red, t);
1002 			*bdst.grn = MUL(ma, *bsrc.grn, s)+MUL(fd, *bdst.grn, t);
1003 			*bdst.blu = MUL(ma, *bsrc.blu, s)+MUL(fd, *bdst.blu, t);
1004 			bsrc.red += bsrc.delta;
1005 			bsrc.blu += bsrc.delta;
1006 			bsrc.grn += bsrc.delta;
1007 			bdst.red += bdst.delta;
1008 			bdst.blu += bdst.delta;
1009 			bdst.grn += bdst.delta;
1010 		}
1011 		if(bdst.alpha != &ones){
1012 			*bdst.alpha = MUL(ma, sa, s)+MUL(fd, *bdst.alpha, t);
1013 			bdst.alpha += bdst.delta;
1014 		}
1015 		bmask.alpha += bmask.delta;
1016 		bsrc.alpha += sadelta;
1017 	}
1018 	return obdst;
1019 }
1020 
1021 /*
1022 not used yet
1023 source and mask alpha 1
1024 static Buffer
1025 alphacalcS0(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1026 {
1027 	Buffer obdst;
1028 	int i;
1029 
1030 	USED(op);
1031 	obdst = bdst;
1032 	if(bsrc.delta == bdst.delta){
1033 		memmove(bdst.rgba, bsrc.rgba, dx*bdst.delta);
1034 		return obdst;
1035 	}
1036 	for(i=0; i<dx; i++){
1037 		if(grey){
1038 			*bdst.grey = *bsrc.grey;
1039 			bsrc.grey += bsrc.delta;
1040 			bdst.grey += bdst.delta;
1041 		}else{
1042 			*bdst.red = *bsrc.red;
1043 			*bdst.grn = *bsrc.grn;
1044 			*bdst.blu = *bsrc.blu;
1045 			bsrc.red += bsrc.delta;
1046 			bsrc.blu += bsrc.delta;
1047 			bsrc.grn += bsrc.delta;
1048 			bdst.red += bdst.delta;
1049 			bdst.blu += bdst.delta;
1050 			bdst.grn += bdst.delta;
1051 		}
1052 		if(bdst.alpha != &ones){
1053 			*bdst.alpha = 255;
1054 			bdst.alpha += bdst.delta;
1055 		}
1056 	}
1057 	return obdst;
1058 }
1059 */
1060 
1061 /* source alpha 1 */
1062 static Buffer
alphacalcS(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int grey,int op)1063 alphacalcS(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1064 {
1065 	Buffer obdst;
1066 	int fd;
1067 	int i, ma;
1068 	ulong s, t;
1069 
1070 	USED(op);
1071 	obdst = bdst;
1072 
1073 	for(i=0; i<dx; i++){
1074 		ma = *bmask.alpha;
1075 		fd = 255-ma;
1076 
1077 		if(grey){
1078 			*bdst.grey = MUL(ma, *bsrc.grey, s)+MUL(fd, *bdst.grey, t);
1079 			bsrc.grey += bsrc.delta;
1080 			bdst.grey += bdst.delta;
1081 		}else{
1082 			*bdst.red = MUL(ma, *bsrc.red, s)+MUL(fd, *bdst.red, t);
1083 			*bdst.grn = MUL(ma, *bsrc.grn, s)+MUL(fd, *bdst.grn, t);
1084 			*bdst.blu = MUL(ma, *bsrc.blu, s)+MUL(fd, *bdst.blu, t);
1085 			bsrc.red += bsrc.delta;
1086 			bsrc.blu += bsrc.delta;
1087 			bsrc.grn += bsrc.delta;
1088 			bdst.red += bdst.delta;
1089 			bdst.blu += bdst.delta;
1090 			bdst.grn += bdst.delta;
1091 		}
1092 		if(bdst.alpha != &ones){
1093 			*bdst.alpha = ma+MUL(fd, *bdst.alpha, t);
1094 			bdst.alpha += bdst.delta;
1095 		}
1096 		bmask.alpha += bmask.delta;
1097 	}
1098 	return obdst;
1099 }
1100 
1101 static Buffer
boolcalc14(Buffer bdst,Buffer b1,Buffer bmask,int dx,int grey,int op)1102 boolcalc14(Buffer bdst, Buffer b1, Buffer bmask, int dx, int grey, int op)
1103 {
1104 	Buffer obdst;
1105 	int i, ma, zero;
1106 
1107 	USED(b1.grey);
1108 	obdst = bdst;
1109 
1110 	for(i=0; i<dx; i++){
1111 		ma = *bmask.alpha;
1112 		zero = ma ? op == DoutS : op == DinS;
1113 
1114 		if(grey){
1115 			if(zero)
1116 				*bdst.grey = 0;
1117 			bdst.grey += bdst.delta;
1118 		}else{
1119 			if(zero)
1120 				*bdst.red = *bdst.grn = *bdst.blu = 0;
1121 			bdst.red += bdst.delta;
1122 			bdst.blu += bdst.delta;
1123 			bdst.grn += bdst.delta;
1124 		}
1125 		bmask.alpha += bmask.delta;
1126 		if(bdst.alpha != &ones){
1127 			if(zero)
1128 				*bdst.alpha = 0;
1129 			bdst.alpha += bdst.delta;
1130 		}
1131 	}
1132 	return obdst;
1133 }
1134 
1135 static Buffer
boolcalc236789(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int grey,int op)1136 boolcalc236789(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1137 {
1138 	Buffer obdst;
1139 	int fs, fd;
1140 	int i, ma, da, zero;
1141 	ulong s, t;
1142 
1143 	obdst = bdst;
1144 	zero = !(op&1);
1145 
1146 	for(i=0; i<dx; i++){
1147 		ma = *bmask.alpha;
1148 		da = *bdst.alpha;
1149 		fs = da;
1150 		if(op&2)
1151 			fs = 255-da;
1152 		fd = 0;
1153 		if(op&4)
1154 			fd = 255;
1155 
1156 		if(grey){
1157 			if(ma)
1158 				*bdst.grey = MUL(fs, *bsrc.grey, s)+MUL(fd, *bdst.grey, t);
1159 			else if(zero)
1160 				*bdst.grey = 0;
1161 			bsrc.grey += bsrc.delta;
1162 			bdst.grey += bdst.delta;
1163 		}else{
1164 			if(ma){
1165 				*bdst.red = MUL(fs, *bsrc.red, s)+MUL(fd, *bdst.red, t);
1166 				*bdst.grn = MUL(fs, *bsrc.grn, s)+MUL(fd, *bdst.grn, t);
1167 				*bdst.blu = MUL(fs, *bsrc.blu, s)+MUL(fd, *bdst.blu, t);
1168 			}
1169 			else if(zero)
1170 				*bdst.red = *bdst.grn = *bdst.blu = 0;
1171 			bsrc.red += bsrc.delta;
1172 			bsrc.blu += bsrc.delta;
1173 			bsrc.grn += bsrc.delta;
1174 			bdst.red += bdst.delta;
1175 			bdst.blu += bdst.delta;
1176 			bdst.grn += bdst.delta;
1177 		}
1178 		bmask.alpha += bmask.delta;
1179 		if(bdst.alpha != &ones){
1180 			if(ma)
1181 				*bdst.alpha = fs+MUL(fd, da, t);
1182 			else if(zero)
1183 				*bdst.alpha = 0;
1184 			bdst.alpha += bdst.delta;
1185 		}
1186 	}
1187 	return obdst;
1188 }
1189 
1190 static Buffer
boolcalc1011(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int grey,int op)1191 boolcalc1011(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1192 {
1193 	Buffer obdst;
1194 	int i, ma, zero;
1195 
1196 	obdst = bdst;
1197 	zero = !(op&1);
1198 
1199 	for(i=0; i<dx; i++){
1200 		ma = *bmask.alpha;
1201 
1202 		if(grey){
1203 			if(ma)
1204 				*bdst.grey = *bsrc.grey;
1205 			else if(zero)
1206 				*bdst.grey = 0;
1207 			bsrc.grey += bsrc.delta;
1208 			bdst.grey += bdst.delta;
1209 		}else{
1210 			if(ma){
1211 				*bdst.red = *bsrc.red;
1212 				*bdst.grn = *bsrc.grn;
1213 				*bdst.blu = *bsrc.blu;
1214 			}
1215 			else if(zero)
1216 				*bdst.red = *bdst.grn = *bdst.blu = 0;
1217 			bsrc.red += bsrc.delta;
1218 			bsrc.blu += bsrc.delta;
1219 			bsrc.grn += bsrc.delta;
1220 			bdst.red += bdst.delta;
1221 			bdst.blu += bdst.delta;
1222 			bdst.grn += bdst.delta;
1223 		}
1224 		bmask.alpha += bmask.delta;
1225 		if(bdst.alpha != &ones){
1226 			if(ma)
1227 				*bdst.alpha = 255;
1228 			else if(zero)
1229 				*bdst.alpha = 0;
1230 			bdst.alpha += bdst.delta;
1231 		}
1232 	}
1233 	return obdst;
1234 }
1235 /*
1236  * Replicated cached scan line read.  Call the function listed in the Param,
1237  * but cache the result so that for replicated images we only do the work once.
1238  */
1239 static Buffer
replread(Param * p,uchar * s,int y)1240 replread(Param *p, uchar *s, int y)
1241 {
1242 	Buffer *b;
1243 
1244 	USED(s);
1245 	b = &p->bcache[y];
1246 	if((p->bfilled & (1<<y)) == 0){
1247 		p->bfilled |= 1<<y;
1248 		*b = p->replcall(p, p->bufbase+y*p->bufdelta, y);
1249 	}
1250 	return *b;
1251 }
1252 
1253 /*
1254  * Alpha reading function that simply relabels the grey pointer.
1255  */
1256 static Buffer
greymaskread(Param * p,uchar * buf,int y)1257 greymaskread(Param *p, uchar *buf, int y)
1258 {
1259 	Buffer b;
1260 
1261 	b = p->greymaskcall(p, buf, y);
1262 	b.alpha = b.grey;
1263 	return b;
1264 }
1265 
1266 #define DBG if(0)
1267 static Buffer
readnbit(Param * p,uchar * buf,int y)1268 readnbit(Param *p, uchar *buf, int y)
1269 {
1270 	Buffer b;
1271 	Memimage *img;
1272 	uchar *repl, *r, *w, *ow, bits;
1273 	int i, n, sh, depth, x, dx, npack, nbits;
1274 
1275 	b.rgba = (ulong*)buf;
1276 	b.grey = w = buf;
1277 	b.red = b.blu = b.grn = w;
1278 	b.alpha = &ones;
1279 	b.delta = 1;
1280 
1281 	dx = p->dx;
1282 	img = p->img;
1283 	depth = img->depth;
1284 	repl = &replbit[depth][0];
1285 	npack = 8/depth;
1286 	sh = 8-depth;
1287 
1288 	/* copy from p->r.min.x until end of repl rectangle */
1289 	x = p->r.min.x;
1290 	n = dx;
1291 	if(n > p->img->r.max.x - x)
1292 		n = p->img->r.max.x - x;
1293 
1294 	r = p->bytermin + y*p->bwidth;
1295 DBG print("readnbit dx %d %p=%p+%d*%d, *r=%d fetch %d ", dx, r, p->bytermin, y, p->bwidth, *r, n);
1296 	bits = *r++;
1297 	nbits = 8;
1298 	if(i=x&(npack-1)){
1299 DBG print("throwaway %d...", i);
1300 		bits <<= depth*i;
1301 		nbits -= depth*i;
1302 	}
1303 	for(i=0; i<n; i++){
1304 		if(nbits == 0){
1305 DBG print("(%.2ux)...", *r);
1306 			bits = *r++;
1307 			nbits = 8;
1308 		}
1309 		*w++ = repl[bits>>sh];
1310 DBG print("bit %x...", repl[bits>>sh]);
1311 		bits <<= depth;
1312 		nbits -= depth;
1313 	}
1314 	dx -= n;
1315 	if(dx == 0)
1316 		return b;
1317 
1318 	assert(x+i == p->img->r.max.x);
1319 
1320 	/* copy from beginning of repl rectangle until where we were before. */
1321 	x = p->img->r.min.x;
1322 	n = dx;
1323 	if(n > p->r.min.x - x)
1324 		n = p->r.min.x - x;
1325 
1326 	r = p->bytey0s + y*p->bwidth;
1327 DBG print("x=%d r=%p...", x, r);
1328 	bits = *r++;
1329 	nbits = 8;
1330 	if(i=x&(npack-1)){
1331 		bits <<= depth*i;
1332 		nbits -= depth*i;
1333 	}
1334 DBG print("nbits=%d...", nbits);
1335 	for(i=0; i<n; i++){
1336 		if(nbits == 0){
1337 			bits = *r++;
1338 			nbits = 8;
1339 		}
1340 		*w++ = repl[bits>>sh];
1341 DBG print("bit %x...", repl[bits>>sh]);
1342 		bits <<= depth;
1343 		nbits -= depth;
1344 DBG print("bits %x nbits %d...", bits, nbits);
1345 	}
1346 	dx -= n;
1347 	if(dx == 0)
1348 		return b;
1349 
1350 	assert(dx > 0);
1351 	/* now we have exactly one full scan line: just replicate the buffer itself until we are done */
1352 	ow = buf;
1353 	while(dx--)
1354 		*w++ = *ow++;
1355 
1356 	return b;
1357 }
1358 #undef DBG
1359 
1360 #define DBG if(0)
1361 static void
writenbit(Param * p,uchar * w,Buffer src)1362 writenbit(Param *p, uchar *w, Buffer src)
1363 {
1364 	uchar *r;
1365 	ulong bits;
1366 	int i, sh, depth, npack, nbits, x, ex;
1367 
1368 	assert(src.grey != nil && src.delta == 1);
1369 
1370 	x = p->r.min.x;
1371 	ex = x+p->dx;
1372 	depth = p->img->depth;
1373 	npack = 8/depth;
1374 
1375 	i=x&(npack-1);
1376 	bits = i ? (*w >> (8-depth*i)) : 0;
1377 	nbits = depth*i;
1378 	sh = 8-depth;
1379 	r = src.grey;
1380 
1381 	for(; x<ex; x++){
1382 		bits <<= depth;
1383 DBG print(" %x", *r);
1384 		bits |= (*r++ >> sh);
1385 		nbits += depth;
1386 		if(nbits == 8){
1387 			*w++ = bits;
1388 			nbits = 0;
1389 		}
1390 	}
1391 
1392 	if(nbits){
1393 		sh = 8-nbits;
1394 		bits <<= sh;
1395 		bits |= *w & ((1<<sh)-1);
1396 		*w = bits;
1397 	}
1398 DBG print("\n");
1399 	return;
1400 }
1401 #undef DBG
1402 
1403 static Buffer
readcmap(Param * p,uchar * buf,int y)1404 readcmap(Param *p, uchar *buf, int y)
1405 {
1406 	Buffer b;
1407 	int a, convgrey, copyalpha, dx, i, m;
1408 	uchar *q, *cmap, *begin, *end, *r, *w;
1409 
1410 	begin = p->bytey0s + y*p->bwidth;
1411 	r = p->bytermin + y*p->bwidth;
1412 	end = p->bytey0e + y*p->bwidth;
1413 	cmap = p->img->cmap->cmap2rgb;
1414 	convgrey = p->convgrey;
1415 	copyalpha = (p->img->flags&Falpha) ? 1 : 0;
1416 
1417 	w = buf;
1418 	dx = p->dx;
1419 	if(copyalpha){
1420 		b.alpha = buf++;
1421 		a = p->img->shift[CAlpha]/8;
1422 		m = p->img->shift[CMap]/8;
1423 		for(i=0; i<dx; i++){
1424 			*w++ = r[a];
1425 			q = cmap+r[m]*3;
1426 			r += 2;
1427 			if(r == end)
1428 				r = begin;
1429 			if(convgrey){
1430 				*w++ = RGB2K(q[0], q[1], q[2]);
1431 			}else{
1432 				*w++ = q[2];	/* blue */
1433 				*w++ = q[1];	/* green */
1434 				*w++ = q[0];	/* red */
1435 			}
1436 		}
1437 	}else{
1438 		b.alpha = &ones;
1439 		for(i=0; i<dx; i++){
1440 			q = cmap+*r++*3;
1441 			if(r == end)
1442 				r = begin;
1443 			if(convgrey){
1444 				*w++ = RGB2K(q[0], q[1], q[2]);
1445 			}else{
1446 				*w++ = q[2];	/* blue */
1447 				*w++ = q[1];	/* green */
1448 				*w++ = q[0];	/* red */
1449 			}
1450 		}
1451 	}
1452 
1453 	b.rgba = (ulong*)(buf-copyalpha);
1454 
1455 	if(convgrey){
1456 		b.grey = buf;
1457 		b.red = b.blu = b.grn = buf;
1458 		b.delta = 1+copyalpha;
1459 	}else{
1460 		b.blu = buf;
1461 		b.grn = buf+1;
1462 		b.red = buf+2;
1463 		b.grey = nil;
1464 		b.delta = 3+copyalpha;
1465 	}
1466 	return b;
1467 }
1468 
1469 static void
writecmap(Param * p,uchar * w,Buffer src)1470 writecmap(Param *p, uchar *w, Buffer src)
1471 {
1472 	uchar *cmap, *red, *grn, *blu;
1473 	int i, dx, delta;
1474 
1475 	cmap = p->img->cmap->rgb2cmap;
1476 
1477 	delta = src.delta;
1478 	red= src.red;
1479 	grn = src.grn;
1480 	blu = src.blu;
1481 
1482 	dx = p->dx;
1483 	for(i=0; i<dx; i++, red+=delta, grn+=delta, blu+=delta)
1484 		*w++ = cmap[(*red>>4)*256+(*grn>>4)*16+(*blu>>4)];
1485 }
1486 
1487 #define DBG if(0)
1488 static Buffer
readbyte(Param * p,uchar * buf,int y)1489 readbyte(Param *p, uchar *buf, int y)
1490 {
1491 	Buffer b;
1492 	Memimage *img;
1493 	int dx, isgrey, convgrey, alphaonly, copyalpha, i, nb;
1494 	uchar *begin, *end, *r, *w, *rrepl, *grepl, *brepl, *arepl, *krepl;
1495 	uchar ured, ugrn, ublu;
1496 	ulong u;
1497 
1498 	img = p->img;
1499 	begin = p->bytey0s + y*p->bwidth;
1500 	r = p->bytermin + y*p->bwidth;
1501 	end = p->bytey0e + y*p->bwidth;
1502 
1503 	w = buf;
1504 	dx = p->dx;
1505 	nb = img->depth/8;
1506 
1507 	convgrey = p->convgrey;	/* convert rgb to grey */
1508 	isgrey = img->flags&Fgrey;
1509 	alphaonly = p->alphaonly;
1510 	copyalpha = (img->flags&Falpha) ? 1 : 0;
1511 
1512 DBG print("copyalpha %d alphaonly %d convgrey %d isgrey %d\n", copyalpha, alphaonly, convgrey, isgrey);
1513 	/* if we can, avoid processing everything */
1514 	if(!(img->flags&Frepl) && !convgrey && (img->flags&Fbytes)){
1515 		memset(&b, 0, sizeof b);
1516 		if(p->needbuf){
1517 			memmove(buf, r, dx*nb);
1518 			r = buf;
1519 		}
1520 		b.rgba = (ulong*)r;
1521 		if(copyalpha)
1522 			b.alpha = r+img->shift[CAlpha]/8;
1523 		else
1524 			b.alpha = &ones;
1525 		if(isgrey){
1526 			b.grey = r+img->shift[CGrey]/8;
1527 			b.red = b.grn = b.blu = b.grey;
1528 		}else{
1529 			b.red = r+img->shift[CRed]/8;
1530 			b.grn = r+img->shift[CGreen]/8;
1531 			b.blu = r+img->shift[CBlue]/8;
1532 		}
1533 		b.delta = nb;
1534 		return b;
1535 	}
1536 
1537 DBG print("2\n");
1538 	rrepl = replbit[img->nbits[CRed]];
1539 	grepl = replbit[img->nbits[CGreen]];
1540 	brepl = replbit[img->nbits[CBlue]];
1541 	arepl = replbit[img->nbits[CAlpha]];
1542 	krepl = replbit[img->nbits[CGrey]];
1543 
1544 	for(i=0; i<dx; i++){
1545 		u = r[0] | (r[1]<<8) | (r[2]<<16) | (r[3]<<24);
1546 		if(copyalpha) {
1547 			*w++ = arepl[(u>>img->shift[CAlpha]) & img->mask[CAlpha]];
1548 DBG print("a %x\n", w[-1]);
1549 		}
1550 
1551 		if(isgrey)
1552 			*w++ = krepl[(u >> img->shift[CGrey]) & img->mask[CGrey]];
1553 		else if(!alphaonly){
1554 			ured = rrepl[(u >> img->shift[CRed]) & img->mask[CRed]];
1555 			ugrn = grepl[(u >> img->shift[CGreen]) & img->mask[CGreen]];
1556 			ublu = brepl[(u >> img->shift[CBlue]) & img->mask[CBlue]];
1557 			if(convgrey){
1558 DBG print("g %x %x %x\n", ured, ugrn, ublu);
1559 				*w++ = RGB2K(ured, ugrn, ublu);
1560 DBG print("%x\n", w[-1]);
1561 			}else{
1562 				*w++ = ublu;
1563 				*w++ = ugrn;
1564 				*w++ = ured;
1565 			}
1566 		}
1567 		r += nb;
1568 		if(r == end)
1569 			r = begin;
1570 	}
1571 
1572 	b.alpha = copyalpha ? buf : &ones;
1573 	b.rgba = (ulong*)buf;
1574 	if(alphaonly){
1575 		b.red = b.grn = b.blu = b.grey = nil;
1576 		if(!copyalpha)
1577 			b.rgba = nil;
1578 		b.delta = 1;
1579 	}else if(isgrey || convgrey){
1580 		b.grey = buf+copyalpha;
1581 		b.red = b.grn = b.blu = buf+copyalpha;
1582 		b.delta = copyalpha+1;
1583 DBG print("alpha %x grey %x\n", b.alpha ? *b.alpha : 0xFF, *b.grey);
1584 	}else{
1585 		b.blu = buf+copyalpha;
1586 		b.grn = buf+copyalpha+1;
1587 		b.grey = nil;
1588 		b.red = buf+copyalpha+2;
1589 		b.delta = copyalpha+3;
1590 	}
1591 	return b;
1592 }
1593 #undef DBG
1594 
1595 #define DBG if(0)
1596 static void
writebyte(Param * p,uchar * w,Buffer src)1597 writebyte(Param *p, uchar *w, Buffer src)
1598 {
1599 	Memimage *img;
1600 	int i, isalpha, isgrey, nb, delta, dx, adelta;
1601 	uchar ff, *red, *grn, *blu, *grey, *alpha;
1602 	ulong u, mask;
1603 
1604 	img = p->img;
1605 
1606 	red = src.red;
1607 	grn = src.grn;
1608 	blu = src.blu;
1609 	alpha = src.alpha;
1610 	delta = src.delta;
1611 	grey = src.grey;
1612 	dx = p->dx;
1613 
1614 	nb = img->depth/8;
1615 	mask = (nb==4) ? 0 : ~((1<<img->depth)-1);
1616 
1617 	isalpha = img->flags&Falpha;
1618 	isgrey = img->flags&Fgrey;
1619 	adelta = src.delta;
1620 
1621 	if(isalpha && (alpha == nil || alpha == &ones)){
1622 		ff = 0xFF;
1623 		alpha = &ff;
1624 		adelta = 0;
1625 	}
1626 
1627 	for(i=0; i<dx; i++){
1628 		u = w[0] | (w[1]<<8) | (w[2]<<16) | (w[3]<<24);
1629 DBG print("u %.8lux...", u);
1630 		u &= mask;
1631 DBG print("&mask %.8lux...", u);
1632 		if(isgrey){
1633 			u |= ((*grey >> (8-img->nbits[CGrey])) & img->mask[CGrey]) << img->shift[CGrey];
1634 DBG print("|grey %.8lux...", u);
1635 			grey += delta;
1636 		}else{
1637 			u |= ((*red >> (8-img->nbits[CRed])) & img->mask[CRed]) << img->shift[CRed];
1638 			u |= ((*grn >> (8-img->nbits[CGreen])) & img->mask[CGreen]) << img->shift[CGreen];
1639 			u |= ((*blu >> (8-img->nbits[CBlue])) & img->mask[CBlue]) << img->shift[CBlue];
1640 			red += delta;
1641 			grn += delta;
1642 			blu += delta;
1643 DBG print("|rgb %.8lux...", u);
1644 		}
1645 
1646 		if(isalpha){
1647 			u |= ((*alpha >> (8-img->nbits[CAlpha])) & img->mask[CAlpha]) << img->shift[CAlpha];
1648 			alpha += adelta;
1649 DBG print("|alpha %.8lux...", u);
1650 		}
1651 
1652 		w[0] = u;
1653 		w[1] = u>>8;
1654 		w[2] = u>>16;
1655 		w[3] = u>>24;
1656 		w += nb;
1657 	}
1658 }
1659 #undef DBG
1660 
1661 static Readfn*
readfn(Memimage * img)1662 readfn(Memimage *img)
1663 {
1664 	if(img->depth < 8)
1665 		return readnbit;
1666 	if(img->nbits[CMap] == 8)
1667 		return readcmap;
1668 	return readbyte;
1669 }
1670 
1671 static Readfn*
readalphafn(Memimage * m)1672 readalphafn(Memimage *m)
1673 {
1674 	USED(m);
1675 	return readbyte;
1676 }
1677 
1678 static Writefn*
writefn(Memimage * img)1679 writefn(Memimage *img)
1680 {
1681 	if(img->depth < 8)
1682 		return writenbit;
1683 	if(img->chan == CMAP8)
1684 		return writecmap;
1685 	return writebyte;
1686 }
1687 
1688 static void
nullwrite(Param * p,uchar * s,Buffer b)1689 nullwrite(Param *p, uchar *s, Buffer b)
1690 {
1691 	USED(p);
1692 	USED(s);
1693 	USED(b.grey);
1694 }
1695 
1696 static Buffer
readptr(Param * p,uchar * s,int y)1697 readptr(Param *p, uchar *s, int y)
1698 {
1699 	Buffer b;
1700 	uchar *q;
1701 
1702 	USED(s);
1703 	q = p->bytermin + y*p->bwidth;
1704 	b.red = q;	/* ptr to data */
1705 	b.grn = b.blu = b.grey = b.alpha = nil;
1706 	b.rgba = (ulong*)q;
1707 	b.delta = p->img->depth/8;
1708 	return b;
1709 }
1710 
1711 static Buffer
boolmemmove(Buffer bdst,Buffer bsrc,Buffer b1,int dx,int i,int o)1712 boolmemmove(Buffer bdst, Buffer bsrc, Buffer b1, int dx, int i, int o)
1713 {
1714 	USED(i);
1715 	USED(o);
1716 	USED(b1.grey);
1717 	memmove(bdst.red, bsrc.red, dx*bdst.delta);
1718 	return bdst;
1719 }
1720 
1721 static Buffer
boolcopy8(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int i,int o)1722 boolcopy8(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1723 {
1724 	uchar *m, *r, *w, *ew;
1725 
1726 	USED(i);
1727 	USED(o);
1728 	m = bmask.grey;
1729 	w = bdst.red;
1730 	r = bsrc.red;
1731 	ew = w+dx;
1732 	for(; w < ew; w++,r++)
1733 		if(*m++)
1734 			*w = *r;
1735 	return bdst;	/* not used */
1736 }
1737 
1738 static Buffer
boolcopy16(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int i,int o)1739 boolcopy16(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1740 {
1741 	uchar *m;
1742 	ushort *r, *w, *ew;
1743 
1744 	USED(i);
1745 	USED(o);
1746 	m = bmask.grey;
1747 	w = (ushort*)bdst.red;
1748 	r = (ushort*)bsrc.red;
1749 	ew = w+dx;
1750 	for(; w < ew; w++,r++)
1751 		if(*m++)
1752 			*w = *r;
1753 	return bdst;	/* not used */
1754 }
1755 
1756 static Buffer
boolcopy24(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int i,int o)1757 boolcopy24(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1758 {
1759 	uchar *m;
1760 	uchar *r, *w, *ew;
1761 
1762 	USED(i);
1763 	USED(o);
1764 	m = bmask.grey;
1765 	w = bdst.red;
1766 	r = bsrc.red;
1767 	ew = w+dx*3;
1768 	while(w < ew){
1769 		if(*m++){
1770 			*w++ = *r++;
1771 			*w++ = *r++;
1772 			*w++ = *r++;
1773 		}else{
1774 			w += 3;
1775 			r += 3;
1776 		}
1777 	}
1778 	return bdst;	/* not used */
1779 }
1780 
1781 static Buffer
boolcopy32(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int i,int o)1782 boolcopy32(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1783 {
1784 	uchar *m;
1785 	ulong *r, *w, *ew;
1786 
1787 	USED(i);
1788 	USED(o);
1789 	m = bmask.grey;
1790 	w = (ulong*)bdst.red;
1791 	r = (ulong*)bsrc.red;
1792 	ew = w+dx;
1793 	for(; w < ew; w++,r++)
1794 		if(*m++)
1795 			*w = *r;
1796 	return bdst;	/* not used */
1797 }
1798 
1799 static Buffer
genconv(Param * p,uchar * buf,int y)1800 genconv(Param *p, uchar *buf, int y)
1801 {
1802 	Buffer b;
1803 	int nb;
1804 	uchar *r, *w, *ew;
1805 
1806 	/* read from source into RGB format in convbuf */
1807 	b = p->convreadcall(p, p->convbuf, y);
1808 
1809 	/* write RGB format into dst format in buf */
1810 	p->convwritecall(p->convdpar, buf, b);
1811 
1812 	if(p->convdx){
1813 		nb = p->convdpar->img->depth/8;
1814 		r = buf;
1815 		w = buf+nb*p->dx;
1816 		ew = buf+nb*p->convdx;
1817 		while(w<ew)
1818 			*w++ = *r++;
1819 	}
1820 
1821 	b.red = buf;
1822 	b.blu = b.grn = b.grey = b.alpha = nil;
1823 	b.rgba = (ulong*)buf;
1824 	b.delta = 0;
1825 
1826 	return b;
1827 }
1828 
1829 static Readfn*
convfn(Memimage * dst,Param * dpar,Memimage * src,Param * spar,int * ndrawbuf)1830 convfn(Memimage *dst, Param *dpar, Memimage *src, Param *spar, int *ndrawbuf)
1831 {
1832 	if(dst->chan == src->chan && !(src->flags&Frepl)){
1833 //if(drawdebug) iprint("readptr...");
1834 		return readptr;
1835 	}
1836 
1837 	if(dst->chan==CMAP8 && (src->chan==GREY1||src->chan==GREY2||src->chan==GREY4)){
1838 		/* cheat because we know the replicated value is exactly the color map entry. */
1839 //if(drawdebug) iprint("Readnbit...");
1840 		return readnbit;
1841 	}
1842 
1843 	spar->convreadcall = readfn(src);
1844 	spar->convwritecall = writefn(dst);
1845 	spar->convdpar = dpar;
1846 
1847 	/* allocate a conversion buffer */
1848 	spar->convbufoff = *ndrawbuf;
1849 	*ndrawbuf += spar->dx*4;
1850 
1851 	if(spar->dx > Dx(spar->img->r)){
1852 		spar->convdx = spar->dx;
1853 		spar->dx = Dx(spar->img->r);
1854 	}
1855 
1856 //if(drawdebug) iprint("genconv...");
1857 	return genconv;
1858 }
1859 
1860 static ulong
pixelbits(Memimage * i,Point pt)1861 pixelbits(Memimage *i, Point pt)
1862 {
1863 	uchar *p;
1864 	ulong val;
1865 	int off, bpp, npack;
1866 
1867 	val = 0;
1868 	p = byteaddr(i, pt);
1869 	switch(bpp=i->depth){
1870 	case 1:
1871 	case 2:
1872 	case 4:
1873 		npack = 8/bpp;
1874 		off = pt.x%npack;
1875 		val = p[0] >> bpp*(npack-1-off);
1876 		val &= (1<<bpp)-1;
1877 		break;
1878 	case 8:
1879 		val = p[0];
1880 		break;
1881 	case 16:
1882 		val = p[0]|(p[1]<<8);
1883 		break;
1884 	case 24:
1885 		val = p[0]|(p[1]<<8)|(p[2]<<16);
1886 		break;
1887 	case 32:
1888 		val = p[0]|(p[1]<<8)|(p[2]<<16)|(p[3]<<24);
1889 		break;
1890 	}
1891 	while(bpp<32){
1892 		val |= val<<bpp;
1893 		bpp *= 2;
1894 	}
1895 	return val;
1896 }
1897 
1898 static Calcfn*
boolcopyfn(Memimage * img,Memimage * mask)1899 boolcopyfn(Memimage *img, Memimage *mask)
1900 {
1901 	if(mask->flags&Frepl && Dx(mask->r)==1 && Dy(mask->r)==1 && pixelbits(mask, mask->r.min)==~0)
1902 		return boolmemmove;
1903 
1904 	switch(img->depth){
1905 	case 8:
1906 		return boolcopy8;
1907 	case 16:
1908 		return boolcopy16;
1909 	case 24:
1910 		return boolcopy24;
1911 	case 32:
1912 		return boolcopy32;
1913 	default:
1914 		assert(0 /* boolcopyfn */);
1915 	}
1916 	return nil;
1917 }
1918 
1919 /*
1920  * Optimized draw for filling and scrolling; uses memset and memmove.
1921  */
1922 static void
memsetb(void * vp,uchar val,int n)1923 memsetb(void *vp, uchar val, int n)
1924 {
1925 	uchar *p, *ep;
1926 
1927 	p = vp;
1928 	ep = p+n;
1929 	while(p<ep)
1930 		*p++ = val;
1931 }
1932 
1933 static void
memsets(void * vp,ushort val,int n)1934 memsets(void *vp, ushort val, int n)
1935 {
1936 	ushort *p, *ep;
1937 
1938 	p = vp;
1939 	ep = p+n;
1940 	while(p<ep)
1941 		*p++ = val;
1942 }
1943 
1944 static void
memsetl(void * vp,ulong val,int n)1945 memsetl(void *vp, ulong val, int n)
1946 {
1947 	ulong *p, *ep;
1948 
1949 	p = vp;
1950 	ep = p+n;
1951 	while(p<ep)
1952 		*p++ = val;
1953 }
1954 
1955 static void
memset24(void * vp,ulong val,int n)1956 memset24(void *vp, ulong val, int n)
1957 {
1958 	uchar *p, *ep;
1959 	uchar a,b,c;
1960 
1961 	p = vp;
1962 	ep = p+3*n;
1963 	a = val;
1964 	b = val>>8;
1965 	c = val>>16;
1966 	while(p<ep){
1967 		*p++ = a;
1968 		*p++ = b;
1969 		*p++ = c;
1970 	}
1971 }
1972 
1973 static ulong
imgtorgba(Memimage * img,ulong val)1974 imgtorgba(Memimage *img, ulong val)
1975 {
1976 	uchar r, g, b, a;
1977 	int nb, ov, v;
1978 	ulong chan;
1979 	uchar *p;
1980 
1981 	a = 0xFF;
1982 	r = g = b = 0xAA;	/* garbage */
1983 	for(chan=img->chan; chan; chan>>=8){
1984 		nb = NBITS(chan);
1985 		ov = v = val&((1<<nb)-1);
1986 		val >>= nb;
1987 
1988 		while(nb < 8){
1989 			v |= v<<nb;
1990 			nb *= 2;
1991 		}
1992 		v >>= (nb-8);
1993 
1994 		switch(TYPE(chan)){
1995 		case CRed:
1996 			r = v;
1997 			break;
1998 		case CGreen:
1999 			g = v;
2000 			break;
2001 		case CBlue:
2002 			b = v;
2003 			break;
2004 		case CAlpha:
2005 			a = v;
2006 			break;
2007 		case CGrey:
2008 			r = g = b = v;
2009 			break;
2010 		case CMap:
2011 			p = img->cmap->cmap2rgb+3*ov;
2012 			r = *p++;
2013 			g = *p++;
2014 			b = *p;
2015 			break;
2016 		}
2017 	}
2018 	return (r<<24)|(g<<16)|(b<<8)|a;
2019 }
2020 
2021 static ulong
rgbatoimg(Memimage * img,ulong rgba)2022 rgbatoimg(Memimage *img, ulong rgba)
2023 {
2024 	ulong chan;
2025 	int d, nb;
2026 	ulong v;
2027 	uchar *p, r, g, b, a, m;
2028 
2029 	v = 0;
2030 	r = rgba>>24;
2031 	g = rgba>>16;
2032 	b = rgba>>8;
2033 	a = rgba;
2034 	d = 0;
2035 	for(chan=img->chan; chan; chan>>=8){
2036 		nb = NBITS(chan);
2037 		switch(TYPE(chan)){
2038 		case CRed:
2039 			v |= (r>>(8-nb))<<d;
2040 			break;
2041 		case CGreen:
2042 			v |= (g>>(8-nb))<<d;
2043 			break;
2044 		case CBlue:
2045 			v |= (b>>(8-nb))<<d;
2046 			break;
2047 		case CAlpha:
2048 			v |= (a>>(8-nb))<<d;
2049 			break;
2050 		case CMap:
2051 			p = img->cmap->rgb2cmap;
2052 			m = p[(r>>4)*256+(g>>4)*16+(b>>4)];
2053 			v |= (m>>(8-nb))<<d;
2054 			break;
2055 		case CGrey:
2056 			m = RGB2K(r,g,b);
2057 			v |= (m>>(8-nb))<<d;
2058 			break;
2059 		}
2060 		d += nb;
2061 	}
2062 //	print("rgba2img %.8lux = %.*lux\n", rgba, 2*d/8, v);
2063 	return v;
2064 }
2065 
2066 #define DBG if(0)
2067 static int
memoptdraw(Memdrawparam * par)2068 memoptdraw(Memdrawparam *par)
2069 {
2070 	int m, y, dy, dx, op;
2071 	ulong v;
2072 	Memimage *src;
2073 	Memimage *dst;
2074 
2075 	dx = Dx(par->r);
2076 	dy = Dy(par->r);
2077 	src = par->src;
2078 	dst = par->dst;
2079 	op = par->op;
2080 
2081 DBG print("state %lux mval %lux dd %d\n", par->state, par->mval, dst->depth);
2082 	/*
2083 	 * If we have an opaque mask and source is one opaque pixel we can convert to the
2084 	 * destination format and just replicate with memset.
2085 	 */
2086 	m = Simplesrc|Simplemask|Fullmask;
2087 	if((par->state&m)==m && (par->srgba&0xFF) == 0xFF && (op ==S || op == SoverD)){
2088 		uchar *dp, p[4];
2089 		int d, dwid, ppb, np, nb;
2090 		uchar lm, rm;
2091 
2092 DBG print("memopt, dst %p, dst->data->bdata %p\n", dst, dst->data->bdata);
2093 		dwid = dst->width*sizeof(ulong);
2094 		dp = byteaddr(dst, par->r.min);
2095 		v = par->sdval;
2096 DBG print("sdval %lud, depth %d\n", v, dst->depth);
2097 		switch(dst->depth){
2098 		case 1:
2099 		case 2:
2100 		case 4:
2101 			for(d=dst->depth; d<8; d*=2)
2102 				v |= (v<<d);
2103 			ppb = 8/dst->depth;	/* pixels per byte */
2104 			m = ppb-1;
2105 			/* left edge */
2106 			np = par->r.min.x&m;		/* no. pixels unused on left side of word */
2107 			dx -= (ppb-np);
2108 			nb = 8 - np * dst->depth;		/* no. bits used on right side of word */
2109 			lm = (1<<nb)-1;
2110 DBG print("np %d x %d nb %d lm %ux ppb %d m %ux\n", np, par->r.min.x, nb, lm, ppb, m);
2111 
2112 			/* right edge */
2113 			np = par->r.max.x&m;	/* no. pixels used on left side of word */
2114 			dx -= np;
2115 			nb = 8 - np * dst->depth;		/* no. bits unused on right side of word */
2116 			rm = ~((1<<nb)-1);
2117 DBG print("np %d x %d nb %d rm %ux ppb %d m %ux\n", np, par->r.max.x, nb, rm, ppb, m);
2118 
2119 DBG print("dx %d Dx %d\n", dx, Dx(par->r));
2120 			/* lm, rm are masks that are 1 where we should touch the bits */
2121 			if(dx < 0){	/* just one byte */
2122 				lm &= rm;
2123 				for(y=0; y<dy; y++, dp+=dwid)
2124 					*dp ^= (v ^ *dp) & lm;
2125 			}else if(dx == 0){	/* no full bytes */
2126 				if(lm)
2127 					dwid--;
2128 
2129 				for(y=0; y<dy; y++, dp+=dwid){
2130 					if(lm){
2131 DBG print("dp %p v %lux lm %ux (v ^ *dp) & lm %lux\n", dp, v, lm, (v^*dp)&lm);
2132 						*dp ^= (v ^ *dp) & lm;
2133 						dp++;
2134 					}
2135 					*dp ^= (v ^ *dp) & rm;
2136 				}
2137 			}else{		/* full bytes in middle */
2138 				dx /= ppb;
2139 				if(lm)
2140 					dwid--;
2141 				dwid -= dx;
2142 
2143 				for(y=0; y<dy; y++, dp+=dwid){
2144 					if(lm){
2145 						*dp ^= (v ^ *dp) & lm;
2146 						dp++;
2147 					}
2148 					memset(dp, v, dx);
2149 					dp += dx;
2150 					*dp ^= (v ^ *dp) & rm;
2151 				}
2152 			}
2153 			return 1;
2154 		case 8:
2155 			for(y=0; y<dy; y++, dp+=dwid)
2156 				memset(dp, v, dx);
2157 			return 1;
2158 		case 16:
2159 			p[0] = v;		/* make little endian */
2160 			p[1] = v>>8;
2161 			v = *(ushort*)p;
2162 DBG print("dp=%p; dx=%d; for(y=0; y<%d; y++, dp+=%d)\nmemsets(dp, v, dx);\n",
2163 	dp, dx, dy, dwid);
2164 			for(y=0; y<dy; y++, dp+=dwid)
2165 				memsets(dp, v, dx);
2166 			return 1;
2167 		case 24:
2168 			for(y=0; y<dy; y++, dp+=dwid)
2169 				memset24(dp, v, dx);
2170 			return 1;
2171 		case 32:
2172 			p[0] = v;		/* make little endian */
2173 			p[1] = v>>8;
2174 			p[2] = v>>16;
2175 			p[3] = v>>24;
2176 			v = *(ulong*)p;
2177 			for(y=0; y<dy; y++, dp+=dwid)
2178 				memsetl(dp, v, dx);
2179 			return 1;
2180 		default:
2181 			assert(0 /* bad dest depth in memoptdraw */);
2182 		}
2183 	}
2184 
2185 	/*
2186 	 * If no source alpha, an opaque mask, we can just copy the
2187 	 * source onto the destination.  If the channels are the same and
2188 	 * the source is not replicated, memmove suffices.
2189 	 */
2190 	m = Simplemask|Fullmask;
2191 	if((par->state&(m|Replsrc))==m && src->depth >= 8
2192 	&& src->chan == dst->chan && (op == S || (op == SoverD && !(src->flags&Falpha)))){
2193 		uchar *sp, *dp;
2194 		long swid, dwid, nb;
2195 		int dir;
2196 
2197 		if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min))
2198 			dir = -1;
2199 		else
2200 			dir = 1;
2201 
2202 		swid = src->width*sizeof(ulong);
2203 		dwid = dst->width*sizeof(ulong);
2204 		sp = byteaddr(src, par->sr.min);
2205 		dp = byteaddr(dst, par->r.min);
2206 		if(dir == -1){
2207 			sp += (dy-1)*swid;
2208 			dp += (dy-1)*dwid;
2209 			swid = -swid;
2210 			dwid = -dwid;
2211 		}
2212 		nb = (dx*src->depth)/8;
2213 		for(y=0; y<dy; y++, sp+=swid, dp+=dwid)
2214 			memmove(dp, sp, nb);
2215 		return 1;
2216 	}
2217 
2218 	/*
2219 	 * If we have a 1-bit mask, 1-bit source, and 1-bit destination, and
2220 	 * they're all bit aligned, we can just use bit operators.  This happens
2221 	 * when we're manipulating boolean masks, e.g. in the arc code.
2222 	 */
2223 	if((par->state&(Simplemask|Simplesrc|Replmask|Replsrc))==0
2224 	&& dst->chan==GREY1 && src->chan==GREY1 && par->mask->chan==GREY1
2225 	&& (par->r.min.x&7)==(par->sr.min.x&7) && (par->r.min.x&7)==(par->mr.min.x&7)){
2226 		uchar *sp, *dp, *mp;
2227 		uchar lm, rm;
2228 		long swid, dwid, mwid;
2229 		int i, x, dir;
2230 
2231 		sp = byteaddr(src, par->sr.min);
2232 		dp = byteaddr(dst, par->r.min);
2233 		mp = byteaddr(par->mask, par->mr.min);
2234 		swid = src->width*sizeof(ulong);
2235 		dwid = dst->width*sizeof(ulong);
2236 		mwid = par->mask->width*sizeof(ulong);
2237 
2238 		if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min)){
2239 			dir = -1;
2240 		}else
2241 			dir = 1;
2242 
2243 		lm = 0xFF>>(par->r.min.x&7);
2244 		rm = 0xFF<<(8-(par->r.max.x&7));
2245 		dx -= (8-(par->r.min.x&7)) + (par->r.max.x&7);
2246 
2247 		if(dx < 0){	/* one byte wide */
2248 			lm &= rm;
2249 			if(dir == -1){
2250 				dp += dwid*(dy-1);
2251 				sp += swid*(dy-1);
2252 				mp += mwid*(dy-1);
2253 				dwid = -dwid;
2254 				swid = -swid;
2255 				mwid = -mwid;
2256 			}
2257 			for(y=0; y<dy; y++){
2258 				*dp ^= (*dp ^ *sp) & *mp & lm;
2259 				dp += dwid;
2260 				sp += swid;
2261 				mp += mwid;
2262 			}
2263 			return 1;
2264 		}
2265 
2266 		dx /= 8;
2267 		if(dir == 1){
2268 			i = (lm!=0)+dx+(rm!=0);
2269 			mwid -= i;
2270 			swid -= i;
2271 			dwid -= i;
2272 			for(y=0; y<dy; y++, dp+=dwid, sp+=swid, mp+=mwid){
2273 				if(lm){
2274 					*dp ^= (*dp ^ *sp++) & *mp++ & lm;
2275 					dp++;
2276 				}
2277 				for(x=0; x<dx; x++){
2278 					*dp ^= (*dp ^ *sp++) & *mp++;
2279 					dp++;
2280 				}
2281 				if(rm){
2282 					*dp ^= (*dp ^ *sp++) & *mp++ & rm;
2283 					dp++;
2284 				}
2285 			}
2286 			return 1;
2287 		}else{
2288 		/* dir == -1 */
2289 			i = (lm!=0)+dx+(rm!=0);
2290 			dp += dwid*(dy-1)+i-1;
2291 			sp += swid*(dy-1)+i-1;
2292 			mp += mwid*(dy-1)+i-1;
2293 			dwid = -dwid+i;
2294 			swid = -swid+i;
2295 			mwid = -mwid+i;
2296 			for(y=0; y<dy; y++, dp+=dwid, sp+=swid, mp+=mwid){
2297 				if(rm){
2298 					*dp ^= (*dp ^ *sp--) & *mp-- & rm;
2299 					dp--;
2300 				}
2301 				for(x=0; x<dx; x++){
2302 					*dp ^= (*dp ^ *sp--) & *mp--;
2303 					dp--;
2304 				}
2305 				if(lm){
2306 					*dp ^= (*dp ^ *sp--) & *mp-- & lm;
2307 					dp--;
2308 				}
2309 			}
2310 		}
2311 		return 1;
2312 	}
2313 	return 0;
2314 }
2315 #undef DBG
2316 
2317 /*
2318  * Boolean character drawing.
2319  * Solid opaque color through a 1-bit greyscale mask.
2320  */
2321 #define DBG if(0)
2322 static int
chardraw(Memdrawparam * par)2323 chardraw(Memdrawparam *par)
2324 {
2325 	ulong bits;
2326 	int i, ddepth, dy, dx, x, bx, ex, y, npack, bsh, depth, op;
2327 	ulong v, maskwid, dstwid;
2328 	uchar *wp, *rp, *q, *wc;
2329 	ushort *ws;
2330 	ulong *wl;
2331 	uchar sp[4];
2332 	Rectangle r, mr;
2333 	Memimage *mask, *src, *dst;
2334 
2335 if(0) if(drawdebug) iprint("chardraw? mf %lux md %d sf %lux dxs %d dys %d dd %d ddat %p sdat %p\n",
2336 		par->mask->flags, par->mask->depth, par->src->flags,
2337 		Dx(par->src->r), Dy(par->src->r), par->dst->depth, par->dst->data, par->src->data);
2338 
2339 	mask = par->mask;
2340 	src = par->src;
2341 	dst = par->dst;
2342 	r = par->r;
2343 	mr = par->mr;
2344 	op = par->op;
2345 
2346 	if((par->state&(Replsrc|Simplesrc|Replmask)) != (Replsrc|Simplesrc)
2347 	|| mask->depth != 1 || src->flags&Falpha || dst->depth<8 || dst->data==src->data
2348 	|| op != SoverD)
2349 		return 0;
2350 
2351 //if(drawdebug) iprint("chardraw...");
2352 
2353 	depth = mask->depth;
2354 	maskwid = mask->width*sizeof(ulong);
2355 	rp = byteaddr(mask, mr.min);
2356 	npack = 8/depth;
2357 	bsh = (mr.min.x % npack) * depth;
2358 
2359 	wp = byteaddr(dst, r.min);
2360 	dstwid = dst->width*sizeof(ulong);
2361 DBG print("bsh %d\n", bsh);
2362 	dy = Dy(r);
2363 	dx = Dx(r);
2364 
2365 	ddepth = dst->depth;
2366 
2367 	/*
2368 	 * for loop counts from bsh to bsh+dx
2369 	 *
2370 	 * we want the bottom bits to be the amount
2371 	 * to shift the pixels down, so for n≡0 (mod 8) we want
2372 	 * bottom bits 7.  for n≡1, 6, etc.
2373 	 * the bits come from -n-1.
2374 	 */
2375 
2376 	bx = -bsh-1;
2377 	ex = -bsh-1-dx;
2378 	bits = 0;
2379 	v = par->sdval;
2380 
2381 	/* make little endian */
2382 	sp[0] = v;
2383 	sp[1] = v>>8;
2384 	sp[2] = v>>16;
2385 	sp[3] = v>>24;
2386 
2387 //print("sp %x %x %x %x\n", sp[0], sp[1], sp[2], sp[3]);
2388 	for(y=0; y<dy; y++, rp+=maskwid, wp+=dstwid){
2389 		q = rp;
2390 		if(bsh)
2391 			bits = *q++;
2392 		switch(ddepth){
2393 		case 8:
2394 //if(drawdebug) iprint("8loop...");
2395 			wc = wp;
2396 			for(x=bx; x>ex; x--, wc++){
2397 				i = x&7;
2398 				if(i == 8-1)
2399 					bits = *q++;
2400 DBG print("bits %lux sh %d...", bits, i);
2401 				if((bits>>i)&1)
2402 					*wc = v;
2403 			}
2404 			break;
2405 		case 16:
2406 			ws = (ushort*)wp;
2407 			v = *(ushort*)sp;
2408 			for(x=bx; x>ex; x--, ws++){
2409 				i = x&7;
2410 				if(i == 8-1)
2411 					bits = *q++;
2412 DBG print("bits %lux sh %d...", bits, i);
2413 				if((bits>>i)&1)
2414 					*ws = v;
2415 			}
2416 			break;
2417 		case 24:
2418 			wc = wp;
2419 			for(x=bx; x>ex; x--, wc+=3){
2420 				i = x&7;
2421 				if(i == 8-1)
2422 					bits = *q++;
2423 DBG print("bits %lux sh %d...", bits, i);
2424 				if((bits>>i)&1){
2425 					wc[0] = sp[0];
2426 					wc[1] = sp[1];
2427 					wc[2] = sp[2];
2428 				}
2429 			}
2430 			break;
2431 		case 32:
2432 			wl = (ulong*)wp;
2433 			v = *(ulong*)sp;
2434 			for(x=bx; x>ex; x--, wl++){
2435 				i = x&7;
2436 				if(i == 8-1)
2437 					bits = *q++;
2438 DBG iprint("bits %lux sh %d...", bits, i);
2439 				if((bits>>i)&1)
2440 					*wl = v;
2441 			}
2442 			break;
2443 		}
2444 	}
2445 
2446 DBG print("\n");
2447 	return 1;
2448 }
2449 #undef DBG
2450 
2451 
2452 /*
2453  * Fill entire byte with replicated (if necessary) copy of source pixel,
2454  * assuming destination ldepth is >= source ldepth.
2455  *
2456  * This code is just plain wrong for >8bpp.
2457  *
2458 ulong
2459 membyteval(Memimage *src)
2460 {
2461 	int i, val, bpp;
2462 	uchar uc;
2463 
2464 	unloadmemimage(src, src->r, &uc, 1);
2465 	bpp = src->depth;
2466 	uc <<= (src->r.min.x&(7/src->depth))*src->depth;
2467 	uc &= ~(0xFF>>bpp);
2468 	// pixel value is now in high part of byte. repeat throughout byte
2469 	val = uc;
2470 	for(i=bpp; i<8; i<<=1)
2471 		val |= val>>i;
2472 	return val;
2473 }
2474  *
2475  */
2476 
2477 void
memfillcolor(Memimage * i,ulong val)2478 memfillcolor(Memimage *i, ulong val)
2479 {
2480 	ulong bits;
2481 	int d, y;
2482 	uchar p[4];
2483 
2484 	if(val == DNofill)
2485 		return;
2486 
2487 	bits = rgbatoimg(i, val);
2488 	switch(i->depth){
2489 	case 24:	/* 24-bit images suck */
2490 		for(y=i->r.min.y; y<i->r.max.y; y++)
2491 			memset24(byteaddr(i, Pt(i->r.min.x, y)), bits, Dx(i->r));
2492 		break;
2493 	default:	/* 1, 2, 4, 8, 16, 32 */
2494 		for(d=i->depth; d<32; d*=2)
2495 			bits = (bits << d) | bits;
2496 		p[0] = bits;		/* make little endian */
2497 		p[1] = bits>>8;
2498 		p[2] = bits>>16;
2499 		p[3] = bits>>24;
2500 		bits = *(u32int*)p;
2501 		memsetl(wordaddr(i, i->r.min), bits, i->width*Dy(i->r));
2502 		break;
2503 	}
2504 }
2505 
2506