xref: /plan9-contrib/sys/src/libmemdraw/draw.c (revision 6f1f24dd981116678e9fd1c5fb4c6ae548570725)
1 #include <u.h>
2 #include <libc.h>
3 #include <draw.h>
4 #include <memdraw.h>
5 
6 int drawdebug;
7 static int	tablesbuilt;
8 
9 /* perfect approximation to NTSC = .299r+.587g+.114b when 0 ≤ r,g,b < 256 */
10 #define RGB2K(r,g,b)	((156763*(r)+307758*(g)+59769*(b))>>19)
11 
12 /*
13  * for 0 ≤ x ≤ 255*255, (x*0x0101+0x100)>>16 is a perfect approximation.
14  * for 0 ≤ x < (1<<16), x/255 = ((x+1)*0x0101)>>16 is a perfect approximation.
15  * the last one is perfect for all up to 1<<16, avoids a multiply, but requires a rathole.
16  */
17 /* #define DIV255(x) (((x)*257+256)>>16)  */
18 #define DIV255(x) ((((x)+1)*257)>>16)
19 /* #define DIV255(x) (tmp=(x)+1, (tmp+(tmp>>8))>>8) */
20 
21 #define MUL(x, y, t)	(t = (x)*(y)+128, (t+(t>>8))>>8)
22 #define MASK13	0xFF00FF00
23 #define MASK02	0x00FF00FF
24 #define MUL13(a, x, t)		(t = (a)*(((x)&MASK13)>>8)+128, ((t+((t>>8)&MASK02))>>8)&MASK02)
25 #define MUL02(a, x, t)		(t = (a)*(((x)&MASK02)>>0)+128, ((t+((t>>8)&MASK02))>>8)&MASK02)
26 #define MUL0123(a, x, s, t)	((MUL13(a, x, s)<<8)|MUL02(a, x, t))
27 
28 #define MUL2(u, v, x, y)	(t = (u)*(v)+(x)*(y)+256, (t+(t>>8))>>8)
29 
30 static void mktables(void);
31 typedef int Subdraw(Memdrawparam*);
32 static Subdraw chardraw, alphadraw, memoptdraw;
33 
34 static Memimage*	memones;
35 static Memimage*	memzeros;
36 Memimage *memwhite;
37 Memimage *memblack;
38 Memimage *memtransparent;
39 Memimage *memopaque;
40 
41 int	_ifmt(Fmt*);
42 
43 void
memimageinit(void)44 memimageinit(void)
45 {
46 	static int didinit = 0;
47 
48 	if(didinit)
49 		return;
50 
51 	didinit = 1;
52 
53 	memdrawallocinit();
54 
55 	mktables();
56 	_memmkcmap();
57 
58 	fmtinstall('R', Rfmt);
59 	fmtinstall('P', Pfmt);
60 	fmtinstall('b', _ifmt);
61 
62 	memones = allocmemimage(Rect(0,0,1,1), GREY1);
63 	memones->flags |= Frepl;
64 	memones->clipr = Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF);
65 	*byteaddr(memones, ZP) = ~0;
66 
67 	memzeros = allocmemimage(Rect(0,0,1,1), GREY1);
68 	memzeros->flags |= Frepl;
69 	memzeros->clipr = Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF);
70 	*byteaddr(memzeros, ZP) = 0;
71 
72 	if(memones == nil || memzeros == nil)
73 		assert(0 /*cannot initialize memimage library */);	/* RSC BUG */
74 
75 	memwhite = memones;
76 	memblack = memzeros;
77 	memopaque = memones;
78 	memtransparent = memzeros;
79 }
80 
81 static ulong imgtorgba(Memimage*, ulong);
82 static ulong rgbatoimg(Memimage*, ulong);
83 static ulong pixelbits(Memimage*, Point);
84 
85 #define DBG if(0)
86 void
memimagedraw(Memimage * dst,Rectangle r,Memimage * src,Point p0,Memimage * mask,Point p1,int op)87 memimagedraw(Memimage *dst, Rectangle r, Memimage *src, Point p0, Memimage *mask, Point p1, int op)
88 {
89 	static int n = 0;
90 	Memdrawparam par;
91 
92 	if(mask == nil)
93 		mask = memopaque;
94 
95 DBG	print("memimagedraw %p/%luX %R @ %p %p/%luX %P %p/%luX %P... ", dst, dst->chan, r, dst->data->bdata, src, src->chan, p0, mask, mask->chan, p1);
96 
97 	if(drawclip(dst, &r, src, &p0, mask, &p1, &par.sr, &par.mr) == 0){
98 //		if(drawdebug)
99 //			iprint("empty clipped rectangle\n");
100 		return;
101 	}
102 
103 	if(op < Clear || op > SoverD){
104 //		if(drawdebug)
105 //			iprint("op out of range: %d\n", op);
106 		return;
107 	}
108 
109 	par.op = op;
110 	par.dst = dst;
111 	par.r = r;
112 	par.src = src;
113 	/* par.sr set by drawclip */
114 	par.mask = mask;
115 	/* par.mr set by drawclip */
116 
117 	par.state = 0;
118 	if(src->flags&Frepl){
119 		par.state |= Replsrc;
120 		if(Dx(src->r)==1 && Dy(src->r)==1){
121 			par.sval = pixelbits(src, src->r.min);
122 			par.state |= Simplesrc;
123 			par.srgba = imgtorgba(src, par.sval);
124 			par.sdval = rgbatoimg(dst, par.srgba);
125 			if((par.srgba&0xFF) == 0 && (op&DoutS)){
126 //				if (drawdebug) iprint("fill with transparent source\n");
127 				return;	/* no-op successfully handled */
128 			}
129 		}
130 	}
131 
132 	if(mask->flags & Frepl){
133 		par.state |= Replmask;
134 		if(Dx(mask->r)==1 && Dy(mask->r)==1){
135 			par.mval = pixelbits(mask, mask->r.min);
136 			if(par.mval == 0 && (op&DoutS)){
137 //				if(drawdebug) iprint("fill with zero mask\n");
138 				return;	/* no-op successfully handled */
139 			}
140 			par.state |= Simplemask;
141 			if(par.mval == ~0)
142 				par.state |= Fullmask;
143 			par.mrgba = imgtorgba(mask, par.mval);
144 		}
145 	}
146 
147 //	if(drawdebug)
148 //		iprint("dr %R sr %R mr %R...", r, par.sr, par.mr);
149 DBG print("draw dr %R sr %R mr %R %lux\n", r, par.sr, par.mr, par.state);
150 
151 	/*
152 	 * Now that we've clipped the parameters down to be consistent, we
153 	 * simply try sub-drawing routines in order until we find one that was able
154 	 * to handle us.  If the sub-drawing routine returns zero, it means it was
155 	 * unable to satisfy the request, so we do not return.
156 	 */
157 
158 	/*
159 	 * Hardware support.  Each video driver provides this function,
160 	 * which checks to see if there is anything it can help with.
161 	 * There could be an if around this checking to see if dst is in video memory.
162 	 */
163 DBG print("test hwdraw\n");
164 	if(hwdraw(&par)){
165 //if(drawdebug) iprint("hw handled\n");
166 DBG print("hwdraw handled\n");
167 		return;
168 	}
169 	/*
170 	 * Optimizations using memmove and memset.
171 	 */
172 DBG print("test memoptdraw\n");
173 	if(memoptdraw(&par)){
174 //if(drawdebug) iprint("memopt handled\n");
175 DBG print("memopt handled\n");
176 		return;
177 	}
178 
179 	/*
180 	 * Character drawing.
181 	 * Solid source color being painted through a boolean mask onto a high res image.
182 	 */
183 DBG print("test chardraw\n");
184 	if(chardraw(&par)){
185 //if(drawdebug) iprint("chardraw handled\n");
186 DBG print("chardraw handled\n");
187 		return;
188 	}
189 
190 	/*
191 	 * General calculation-laden case that does alpha for each pixel.
192 	 */
193 DBG print("do alphadraw\n");
194 	alphadraw(&par);
195 //if(drawdebug) iprint("alphadraw handled\n");
196 DBG print("alphadraw handled\n");
197 }
198 #undef DBG
199 
200 /*
201  * Clip the destination rectangle further based on the properties of the
202  * source and mask rectangles.  Once the destination rectangle is properly
203  * clipped, adjust the source and mask rectangles to be the same size.
204  * Then if source or mask is replicated, move its clipped rectangle
205  * so that its minimum point falls within the repl rectangle.
206  *
207  * Return zero if the final rectangle is null.
208  */
209 int
drawclip(Memimage * dst,Rectangle * r,Memimage * src,Point * p0,Memimage * mask,Point * p1,Rectangle * sr,Rectangle * mr)210 drawclip(Memimage *dst, Rectangle *r, Memimage *src, Point *p0, Memimage *mask, Point *p1, Rectangle *sr, Rectangle *mr)
211 {
212 	Point rmin, delta;
213 	int splitcoords;
214 	Rectangle omr;
215 
216 	if(r->min.x>=r->max.x || r->min.y>=r->max.y)
217 		return 0;
218 	splitcoords = (p0->x!=p1->x) || (p0->y!=p1->y);
219 	/* clip to destination */
220 	rmin = r->min;
221 	if(!rectclip(r, dst->r) || !rectclip(r, dst->clipr))
222 		return 0;
223 	/* move mask point */
224 	p1->x += r->min.x-rmin.x;
225 	p1->y += r->min.y-rmin.y;
226 	/* move source point */
227 	p0->x += r->min.x-rmin.x;
228 	p0->y += r->min.y-rmin.y;
229 	/* map destination rectangle into source */
230 	sr->min = *p0;
231 	sr->max.x = p0->x+Dx(*r);
232 	sr->max.y = p0->y+Dy(*r);
233 	/* sr is r in source coordinates; clip to source */
234 	if(!(src->flags&Frepl) && !rectclip(sr, src->r))
235 		return 0;
236 	if(!rectclip(sr, src->clipr))
237 		return 0;
238 	/* compute and clip rectangle in mask */
239 	if(splitcoords){
240 		/* move mask point with source */
241 		p1->x += sr->min.x-p0->x;
242 		p1->y += sr->min.y-p0->y;
243 		mr->min = *p1;
244 		mr->max.x = p1->x+Dx(*sr);
245 		mr->max.y = p1->y+Dy(*sr);
246 		omr = *mr;
247 		/* mr is now rectangle in mask; clip it */
248 		if(!(mask->flags&Frepl) && !rectclip(mr, mask->r))
249 			return 0;
250 		if(!rectclip(mr, mask->clipr))
251 			return 0;
252 		/* reflect any clips back to source */
253 		sr->min.x += mr->min.x-omr.min.x;
254 		sr->min.y += mr->min.y-omr.min.y;
255 		sr->max.x += mr->max.x-omr.max.x;
256 		sr->max.y += mr->max.y-omr.max.y;
257 		*p1 = mr->min;
258 	}else{
259 		if(!(mask->flags&Frepl) && !rectclip(sr, mask->r))
260 			return 0;
261 		if(!rectclip(sr, mask->clipr))
262 			return 0;
263 		*p1 = sr->min;
264 	}
265 
266 	/* move source clipping back to destination */
267 	delta.x = r->min.x - p0->x;
268 	delta.y = r->min.y - p0->y;
269 	r->min.x = sr->min.x + delta.x;
270 	r->min.y = sr->min.y + delta.y;
271 	r->max.x = sr->max.x + delta.x;
272 	r->max.y = sr->max.y + delta.y;
273 
274 	/* move source rectangle so sr->min is in src->r */
275 	if(src->flags&Frepl) {
276 		delta.x = drawreplxy(src->r.min.x, src->r.max.x, sr->min.x) - sr->min.x;
277 		delta.y = drawreplxy(src->r.min.y, src->r.max.y, sr->min.y) - sr->min.y;
278 		sr->min.x += delta.x;
279 		sr->min.y += delta.y;
280 		sr->max.x += delta.x;
281 		sr->max.y += delta.y;
282 	}
283 	*p0 = sr->min;
284 
285 	/* move mask point so it is in mask->r */
286 	*p1 = drawrepl(mask->r, *p1);
287 	mr->min = *p1;
288 	mr->max.x = p1->x+Dx(*sr);
289 	mr->max.y = p1->y+Dy(*sr);
290 
291 	assert(Dx(*sr) == Dx(*mr) && Dx(*mr) == Dx(*r));
292 	assert(Dy(*sr) == Dy(*mr) && Dy(*mr) == Dy(*r));
293 	assert(ptinrect(*p0, src->r));
294 	assert(ptinrect(*p1, mask->r));
295 	assert(ptinrect(r->min, dst->r));
296 
297 	return 1;
298 }
299 
300 /*
301  * Conversion tables.
302  */
303 static uchar replbit[1+8][256];		/* replbit[x][y] is the replication of the x-bit quantity y to 8-bit depth */
304 static uchar conv18[256][8];		/* conv18[x][y] is the yth pixel in the depth-1 pixel x */
305 static uchar conv28[256][4];		/* ... */
306 static uchar conv48[256][2];
307 
308 /*
309  * bitmap of how to replicate n bits to fill 8, for 1 ≤ n ≤ 8.
310  * the X's are where to put the bottom (ones) bit of the n-bit pattern.
311  * only the top 8 bits of the result are actually used.
312  * (the lower 8 bits are needed to get bits in the right place
313  * when n is not a divisor of 8.)
314  *
315  * Should check to see if its easier to just refer to replmul than
316  * use the precomputed values in replbit.  On PCs it may well
317  * be; on machines with slow multiply instructions it probably isn't.
318  */
319 #define a ((((((((((((((((0
320 #define X *2+1)
321 #define _ *2)
322 static int replmul[1+8] = {
323 	0,
324 	a X X X X X X X X X X X X X X X X,
325 	a _ X _ X _ X _ X _ X _ X _ X _ X,
326 	a _ _ X _ _ X _ _ X _ _ X _ _ X _,
327 	a _ _ _ X _ _ _ X _ _ _ X _ _ _ X,
328 	a _ _ _ _ X _ _ _ _ X _ _ _ _ X _,
329 	a _ _ _ _ _ X _ _ _ _ _ X _ _ _ _,
330 	a _ _ _ _ _ _ X _ _ _ _ _ _ X _ _,
331 	a _ _ _ _ _ _ _ X _ _ _ _ _ _ _ X,
332 };
333 #undef a
334 #undef X
335 #undef _
336 
337 static void
mktables(void)338 mktables(void)
339 {
340 	int i, j, mask, sh, small;
341 
342 	if(tablesbuilt)
343 		return;
344 
345 	fmtinstall('R', Rfmt);
346 	fmtinstall('P', Pfmt);
347 	tablesbuilt = 1;
348 
349 	/* bit replication up to 8 bits */
350 	for(i=0; i<256; i++){
351 		for(j=0; j<=8; j++){	/* j <= 8 [sic] */
352 			small = i & ((1<<j)-1);
353 			replbit[j][i] = (small*replmul[j])>>8;
354 		}
355 	}
356 
357 	/* bit unpacking up to 8 bits, only powers of 2 */
358 	for(i=0; i<256; i++){
359 		for(j=0, sh=7, mask=1; j<8; j++, sh--)
360 			conv18[i][j] = replbit[1][(i>>sh)&mask];
361 
362 		for(j=0, sh=6, mask=3; j<4; j++, sh-=2)
363 			conv28[i][j] = replbit[2][(i>>sh)&mask];
364 
365 		for(j=0, sh=4, mask=15; j<2; j++, sh-=4)
366 			conv48[i][j] = replbit[4][(i>>sh)&mask];
367 	}
368 }
369 
370 static uchar ones = 0xff;
371 
372 /*
373  * General alpha drawing case.  Can handle anything.
374  */
375 typedef struct	Buffer	Buffer;
376 struct Buffer {
377 	/* used by most routines */
378 	uchar	*red;
379 	uchar	*grn;
380 	uchar	*blu;
381 	uchar	*alpha;
382 	uchar	*grey;
383 	ulong	*rgba;
384 	int	delta;	/* number of bytes to add to pointer to get next pixel to the right */
385 
386 	/* used by boolcalc* for mask data */
387 	uchar	*m;		/* ptr to mask data r.min byte; like p->bytermin */
388 	int		mskip;	/* no. of left bits to skip in *m */
389 	uchar	*bm;		/* ptr to mask data img->r.min byte; like p->bytey0s */
390 	int		bmskip;	/* no. of left bits to skip in *bm */
391 	uchar	*em;		/* ptr to mask data img->r.max.x byte; like p->bytey0e */
392 	int		emskip;	/* no. of right bits to skip in *em */
393 };
394 
395 typedef struct	Param	Param;
396 typedef Buffer	Readfn(Param*, uchar*, int);
397 typedef void	Writefn(Param*, uchar*, Buffer);
398 typedef Buffer	Calcfn(Buffer, Buffer, Buffer, int, int, int);
399 
400 enum {
401 	MAXBCACHE = 16
402 };
403 
404 /* giant rathole to customize functions with */
405 struct Param {
406 	Readfn	*replcall;
407 	Readfn	*greymaskcall;
408 	Readfn	*convreadcall;
409 	Writefn	*convwritecall;
410 
411 	Memimage *img;
412 	Rectangle	r;
413 	int	dx;	/* of r */
414 	int	needbuf;
415 	int	convgrey;
416 	int	alphaonly;
417 
418 	uchar	*bytey0s;		/* byteaddr(Pt(img->r.min.x, img->r.min.y)) */
419 	uchar	*bytermin;	/* byteaddr(Pt(r.min.x, img->r.min.y)) */
420 	uchar	*bytey0e;		/* byteaddr(Pt(img->r.max.x, img->r.min.y)) */
421 	int		bwidth;
422 
423 	int	replcache;	/* if set, cache buffers */
424 	Buffer	bcache[MAXBCACHE];
425 	ulong	bfilled;
426 	uchar	*bufbase;
427 	int	bufoff;
428 	int	bufdelta;
429 
430 	int	dir;
431 
432 	int	convbufoff;
433 	uchar	*convbuf;
434 	Param	*convdpar;
435 	int	convdx;
436 };
437 
438 static uchar *drawbuf;
439 static int	ndrawbuf;
440 static int	mdrawbuf;
441 static Readfn	greymaskread, replread, readptr;
442 static Writefn	nullwrite;
443 static Calcfn	alphacalc0, alphacalc14, alphacalc2810, alphacalc3679, alphacalc5, alphacalc11, alphacalcS;
444 static Calcfn	boolcalc14, boolcalc236789, boolcalc1011;
445 
446 static Readfn*	readfn(Memimage*);
447 static Readfn*	readalphafn(Memimage*);
448 static Writefn*	writefn(Memimage*);
449 
450 static Calcfn*	boolcopyfn(Memimage*, Memimage*);
451 static Readfn*	convfn(Memimage*, Param*, Memimage*, Param*, int*);
452 static Readfn*	ptrfn(Memimage*);
453 
454 static Calcfn *alphacalc[Ncomp] =
455 {
456 	alphacalc0,		/* Clear */
457 	alphacalc14,		/* DoutS */
458 	alphacalc2810,		/* SoutD */
459 	alphacalc3679,		/* DxorS */
460 	alphacalc14,		/* DinS */
461 	alphacalc5,		/* D */
462 	alphacalc3679,		/* DatopS */
463 	alphacalc3679,		/* DoverS */
464 	alphacalc2810,		/* SinD */
465 	alphacalc3679,		/* SatopD */
466 	alphacalc2810,		/* S */
467 	alphacalc11,		/* SoverD */
468 };
469 
470 static Calcfn *boolcalc[Ncomp] =
471 {
472 	alphacalc0,		/* Clear */
473 	boolcalc14,		/* DoutS */
474 	boolcalc236789,		/* SoutD */
475 	boolcalc236789,		/* DxorS */
476 	boolcalc14,		/* DinS */
477 	alphacalc5,		/* D */
478 	boolcalc236789,		/* DatopS */
479 	boolcalc236789,		/* DoverS */
480 	boolcalc236789,		/* SinD */
481 	boolcalc236789,		/* SatopD */
482 	boolcalc1011,		/* S */
483 	boolcalc1011,		/* SoverD */
484 };
485 
486 /*
487  * Avoid standard Lock, QLock so that can be used in kernel.
488  */
489 typedef struct Dbuf Dbuf;
490 struct Dbuf
491 {
492 	uchar *p;
493 	int n;
494 	Param spar, mpar, dpar;
495 	int inuse;
496 };
497 static Dbuf dbuf[10];
498 
499 static Dbuf*
allocdbuf(void)500 allocdbuf(void)
501 {
502 	int i;
503 
504 	for(i=0; i<nelem(dbuf); i++){
505 		if(dbuf[i].inuse)
506 			continue;
507 		if(!_tas(&dbuf[i].inuse))
508 			return &dbuf[i];
509 	}
510 	return nil;
511 }
512 
513 static void
getparam(Param * p,Memimage * img,Rectangle r,int convgrey,int needbuf,int * ndrawbuf)514 getparam(Param *p, Memimage *img, Rectangle r, int convgrey, int needbuf, int *ndrawbuf)
515 {
516 	int nbuf;
517 
518 	memset(p, 0, sizeof *p);
519 
520 	p->img = img;
521 	p->r = r;
522 	p->dx = Dx(r);
523 	p->needbuf = needbuf;
524 	p->convgrey = convgrey;
525 
526 	assert(img->r.min.x <= r.min.x && r.min.x < img->r.max.x);
527 
528 	p->bytey0s = byteaddr(img, Pt(img->r.min.x, img->r.min.y));
529 	p->bytermin = byteaddr(img, Pt(r.min.x, img->r.min.y));
530 	p->bytey0e = byteaddr(img, Pt(img->r.max.x, img->r.min.y));
531 	p->bwidth = sizeof(ulong)*img->width;
532 
533 	assert(p->bytey0s <= p->bytermin && p->bytermin <= p->bytey0e);
534 
535 	if(p->r.min.x == p->img->r.min.x)
536 		assert(p->bytermin == p->bytey0s);
537 
538 	nbuf = 1;
539 	if((img->flags&Frepl) && Dy(img->r) <= MAXBCACHE && Dy(img->r) < Dy(r)){
540 		p->replcache = 1;
541 		nbuf = Dy(img->r);
542 	}
543 	p->bufdelta = 4*p->dx;
544 	p->bufoff = *ndrawbuf;
545 	*ndrawbuf += p->bufdelta*nbuf;
546 }
547 
548 static void
clipy(Memimage * img,int * y)549 clipy(Memimage *img, int *y)
550 {
551 	int dy;
552 
553 	dy = Dy(img->r);
554 	if(*y == dy)
555 		*y = 0;
556 	else if(*y == -1)
557 		*y = dy-1;
558 	assert(0 <= *y && *y < dy);
559 }
560 
561 static void
dumpbuf(char * s,Buffer b,int n)562 dumpbuf(char *s, Buffer b, int n)
563 {
564 	int i;
565 	uchar *p;
566 
567 	print("%s", s);
568 	for(i=0; i<n; i++){
569 		print(" ");
570 		if(p=b.grey){
571 			print(" k%.2uX", *p);
572 			b.grey += b.delta;
573 		}else{
574 			if(p=b.red){
575 				print(" r%.2uX", *p);
576 				b.red += b.delta;
577 			}
578 			if(p=b.grn){
579 				print(" g%.2uX", *p);
580 				b.grn += b.delta;
581 			}
582 			if(p=b.blu){
583 				print(" b%.2uX", *p);
584 				b.blu += b.delta;
585 			}
586 		}
587 		if((p=b.alpha) != &ones){
588 			print(" α%.2uX", *p);
589 			b.alpha += b.delta;
590 		}
591 	}
592 	print("\n");
593 }
594 
595 /*
596  * For each scan line, we expand the pixels from source, mask, and destination
597  * into byte-aligned red, green, blue, alpha, and grey channels.  If buffering is not
598  * needed and the channels were already byte-aligned (grey8, rgb24, rgba32, rgb32),
599  * the readers need not copy the data: they can simply return pointers to the data.
600  * If the destination image is grey and the source is not, it is converted using the NTSC
601  * formula.
602  *
603  * Once we have all the channels, we call either rgbcalc or greycalc, depending on
604  * whether the destination image is color.  This is allowed to overwrite the dst buffer (perhaps
605  * the actual data, perhaps a copy) with its result.  It should only overwrite the dst buffer
606  * with the same format (i.e. red bytes with red bytes, etc.)  A new buffer is returned from
607  * the calculator, and that buffer is passed to a function to write it to the destination.
608  * If the buffer is already pointing at the destination, the writing function is a no-op.
609  */
610 #define DBG if(0)
611 static int
alphadraw(Memdrawparam * par)612 alphadraw(Memdrawparam *par)
613 {
614 	int isgrey, starty, endy, op;
615 	int needbuf, dsty, srcy, masky;
616 	int y, dir, dx, dy, ndrawbuf;
617 	uchar *drawbuf;
618 	Buffer bsrc, bdst, bmask;
619 	Readfn *rdsrc, *rdmask, *rddst;
620 	Calcfn *calc;
621 	Writefn *wrdst;
622 	Memimage *src, *mask, *dst;
623 	Rectangle r, sr, mr;
624 	Dbuf *z;
625 
626 	r = par->r;
627 	dx = Dx(r);
628 	dy = Dy(r);
629 
630 	z = allocdbuf();
631 	if(z == nil)
632 		return 0;
633 
634 	src = par->src;
635 	mask = par->mask;
636 	dst = par->dst;
637 	sr = par->sr;
638 	mr = par->mr;
639 	op = par->op;
640 
641 	isgrey = dst->flags&Fgrey;
642 
643 	/*
644 	 * Buffering when src and dst are the same bitmap is sufficient but not
645 	 * necessary.  There are stronger conditions we could use.  We could
646 	 * check to see if the rectangles intersect, and if simply moving in the
647 	 * correct y direction can avoid the need to buffer.
648 	 */
649 	needbuf = (src->data == dst->data);
650 
651 	ndrawbuf = 0;
652 	getparam(&z->spar, src, sr, isgrey, needbuf, &ndrawbuf);
653 	getparam(&z->dpar, dst, r, isgrey, needbuf, &ndrawbuf);
654 	getparam(&z->mpar, mask, mr, 0, needbuf, &ndrawbuf);
655 
656 	dir = (needbuf && byteaddr(dst, r.min) > byteaddr(src, sr.min)) ? -1 : 1;
657 	z->spar.dir = z->mpar.dir = z->dpar.dir = dir;
658 
659 	/*
660 	 * If the mask is purely boolean, we can convert from src to dst format
661 	 * when we read src, and then just copy it to dst where the mask tells us to.
662 	 * This requires a boolean (1-bit grey) mask and lack of a source alpha channel.
663 	 *
664 	 * The computation is accomplished by assigning the function pointers as follows:
665 	 *	rdsrc - read and convert source into dst format in a buffer
666 	 * 	rdmask - convert mask to bytes, set pointer to it
667 	 * 	rddst - fill with pointer to real dst data, but do no reads
668 	 *	calc - copy src onto dst when mask says to.
669 	 *	wrdst - do nothing
670 	 * This is slightly sleazy, since things aren't doing exactly what their names say,
671 	 * but it avoids a fair amount of code duplication to make this a case here
672 	 * rather than have a separate booldraw.
673 	 */
674 //if(drawdebug) iprint("flag %lud mchan %lux=?%x dd %d\n", src->flags&Falpha, mask->chan, GREY1, dst->depth);
675 	if(!(src->flags&Falpha) && mask->chan == GREY1 && dst->depth >= 8 && op == SoverD){
676 //if(drawdebug) iprint("boolcopy...");
677 		rdsrc = convfn(dst, &z->dpar, src, &z->spar, &ndrawbuf);
678 		rddst = readptr;
679 		rdmask = readfn(mask);
680 		calc = boolcopyfn(dst, mask);
681 		wrdst = nullwrite;
682 	}else{
683 		/* usual alphadraw parameter fetching */
684 		rdsrc = readfn(src);
685 		rddst = readfn(dst);
686 		wrdst = writefn(dst);
687 		calc = alphacalc[op];
688 
689 		/*
690 		 * If there is no alpha channel, we'll ask for a grey channel
691 		 * and pretend it is the alpha.
692 		 */
693 		if(mask->flags&Falpha){
694 			rdmask = readalphafn(mask);
695 			z->mpar.alphaonly = 1;
696 		}else{
697 			z->mpar.greymaskcall = readfn(mask);
698 			z->mpar.convgrey = 1;
699 			rdmask = greymaskread;
700 
701 			/*
702 			 * Should really be above, but then boolcopyfns would have
703 			 * to deal with bit alignment, and I haven't written that.
704 			 *
705 			 * This is a common case for things like ellipse drawing.
706 			 * When there's no alpha involved and the mask is boolean,
707 			 * we can avoid all the division and multiplication.
708 			 */
709 			if(mask->chan == GREY1 && !(src->flags&Falpha))
710 				calc = boolcalc[op];
711 			else if(op == SoverD && !(src->flags&Falpha))
712 				calc = alphacalcS;
713 		}
714 	}
715 
716 	/*
717 	 * If the image has a small enough repl rectangle,
718 	 * we can just read each line once and cache them.
719 	 */
720 	if(z->spar.replcache){
721 		z->spar.replcall = rdsrc;
722 		rdsrc = replread;
723 	}
724 	if(z->mpar.replcache){
725 		z->mpar.replcall = rdmask;
726 		rdmask = replread;
727 	}
728 
729 	if(z->n < ndrawbuf){
730 		free(z->p);
731 		if((z->p = mallocz(ndrawbuf, 0)) == nil){
732 			z->inuse = 0;
733 			return 0;
734 		}
735 		z->n = ndrawbuf;
736 	}
737 	drawbuf = z->p;
738 
739 	/*
740 	 * Before we were saving only offsets from drawbuf in the parameter
741 	 * structures; now that drawbuf has been grown to accomodate us,
742 	 * we can fill in the pointers.
743 	 */
744 	z->spar.bufbase = drawbuf+z->spar.bufoff;
745 	z->mpar.bufbase = drawbuf+z->mpar.bufoff;
746 	z->dpar.bufbase = drawbuf+z->dpar.bufoff;
747 	z->spar.convbuf = drawbuf+z->spar.convbufoff;
748 
749 	if(dir == 1){
750 		starty = 0;
751 		endy = dy;
752 	}else{
753 		starty = dy-1;
754 		endy = -1;
755 	}
756 
757 	/*
758 	 * srcy, masky, and dsty are offsets from the top of their
759 	 * respective Rectangles.  they need to be contained within
760 	 * the rectangles, so clipy can keep them there without division.
761  	 */
762 	srcy = (starty + sr.min.y - src->r.min.y)%Dy(src->r);
763 	masky = (starty + mr.min.y - mask->r.min.y)%Dy(mask->r);
764 	dsty = starty + r.min.y - dst->r.min.y;
765 
766 	assert(0 <= srcy && srcy < Dy(src->r));
767 	assert(0 <= masky && masky < Dy(mask->r));
768 	assert(0 <= dsty && dsty < Dy(dst->r));
769 
770 	for(y=starty; y!=endy; y+=dir, srcy+=dir, masky+=dir, dsty+=dir){
771 		clipy(src, &srcy);
772 		clipy(dst, &dsty);
773 		clipy(mask, &masky);
774 
775 		bsrc = rdsrc(&z->spar, z->spar.bufbase, srcy);
776 DBG print("[");
777 		bmask = rdmask(&z->mpar, z->mpar.bufbase, masky);
778 DBG print("]\n");
779 		bdst = rddst(&z->dpar, z->dpar.bufbase, dsty);
780 DBG		dumpbuf("src", bsrc, dx);
781 DBG		dumpbuf("mask", bmask, dx);
782 DBG		dumpbuf("dst", bdst, dx);
783 		bdst = calc(bdst, bsrc, bmask, dx, isgrey, op);
784 		wrdst(&z->dpar, z->dpar.bytermin+dsty*z->dpar.bwidth, bdst);
785 	}
786 
787 	z->inuse = 0;
788 	return 1;
789 }
790 #undef DBG
791 
792 static Buffer
alphacalc0(Buffer bdst,Buffer b1,Buffer b2,int dx,int grey,int op)793 alphacalc0(Buffer bdst, Buffer b1, Buffer b2, int dx, int grey, int op)
794 {
795 	USED(grey);
796 	USED(op);
797 	USED(b1);
798 	USED(b2);
799 	memset(bdst.rgba, 0, dx*bdst.delta);
800 	return bdst;
801 }
802 
803 static Buffer
alphacalc14(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int grey,int op)804 alphacalc14(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
805 {
806 	Buffer obdst;
807 	int fd, sadelta;
808 	int i, sa, ma, q;
809 	ulong s, t;
810 
811 	obdst = bdst;
812 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
813 	q = bsrc.delta == 4 && bdst.delta == 4;
814 
815 	for(i=0; i<dx; i++){
816 		sa = *bsrc.alpha;
817 		ma = *bmask.alpha;
818 		fd = MUL(sa, ma, t);
819 		if(op == DoutS)
820 			fd = 255-fd;
821 
822 		if(grey){
823 			*bdst.grey = MUL(fd, *bdst.grey, t);
824 			bsrc.grey += bsrc.delta;
825 			bdst.grey += bdst.delta;
826 		}else{
827 			if(q){
828 				*bdst.rgba = MUL0123(fd, *bdst.rgba, s, t);
829 				bsrc.rgba++;
830 				bdst.rgba++;
831 				bsrc.alpha += sadelta;
832 				bmask.alpha += bmask.delta;
833 				continue;
834 			}
835 			*bdst.red = MUL(fd, *bdst.red, t);
836 			*bdst.grn = MUL(fd, *bdst.grn, t);
837 			*bdst.blu = MUL(fd, *bdst.blu, t);
838 			bsrc.red += bsrc.delta;
839 			bsrc.blu += bsrc.delta;
840 			bsrc.grn += bsrc.delta;
841 			bdst.red += bdst.delta;
842 			bdst.blu += bdst.delta;
843 			bdst.grn += bdst.delta;
844 		}
845 		if(bdst.alpha != &ones){
846 			*bdst.alpha = MUL(fd, *bdst.alpha, t);
847 			bdst.alpha += bdst.delta;
848 		}
849 		bmask.alpha += bmask.delta;
850 		bsrc.alpha += sadelta;
851 	}
852 	return obdst;
853 }
854 
855 static Buffer
alphacalc2810(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int grey,int op)856 alphacalc2810(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
857 {
858 	Buffer obdst;
859 	int fs, sadelta;
860 	int i, ma, da, q;
861 	ulong s, t;
862 
863 	obdst = bdst;
864 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
865 	q = bsrc.delta == 4 && bdst.delta == 4;
866 
867 	for(i=0; i<dx; i++){
868 		ma = *bmask.alpha;
869 		da = *bdst.alpha;
870 		if(op == SoutD)
871 			da = 255-da;
872 		fs = ma;
873 		if(op != S)
874 			fs = MUL(fs, da, t);
875 
876 		if(grey){
877 			*bdst.grey = MUL(fs, *bsrc.grey, t);
878 			bsrc.grey += bsrc.delta;
879 			bdst.grey += bdst.delta;
880 		}else{
881 			if(q){
882 				*bdst.rgba = MUL0123(fs, *bsrc.rgba, s, t);
883 				bsrc.rgba++;
884 				bdst.rgba++;
885 				bmask.alpha += bmask.delta;
886 				bdst.alpha += bdst.delta;
887 				continue;
888 			}
889 			*bdst.red = MUL(fs, *bsrc.red, t);
890 			*bdst.grn = MUL(fs, *bsrc.grn, t);
891 			*bdst.blu = MUL(fs, *bsrc.blu, t);
892 			bsrc.red += bsrc.delta;
893 			bsrc.blu += bsrc.delta;
894 			bsrc.grn += bsrc.delta;
895 			bdst.red += bdst.delta;
896 			bdst.blu += bdst.delta;
897 			bdst.grn += bdst.delta;
898 		}
899 		if(bdst.alpha != &ones){
900 			*bdst.alpha = MUL(fs, *bsrc.alpha, t);
901 			bdst.alpha += bdst.delta;
902 		}
903 		bmask.alpha += bmask.delta;
904 		bsrc.alpha += sadelta;
905 	}
906 	return obdst;
907 }
908 
909 static Buffer
alphacalc3679(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int grey,int op)910 alphacalc3679(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
911 {
912 	Buffer obdst;
913 	int fs, fd, sadelta;
914 	int i, sa, ma, da, q;
915 	ulong s, t, u, v;
916 
917 	obdst = bdst;
918 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
919 	q = bsrc.delta == 4 && bdst.delta == 4;
920 
921 	for(i=0; i<dx; i++){
922 		sa = *bsrc.alpha;
923 		ma = *bmask.alpha;
924 		da = *bdst.alpha;
925 		if(op == SatopD)
926 			fs = MUL(ma, da, t);
927 		else
928 			fs = MUL(ma, 255-da, t);
929 		if(op == DoverS)
930 			fd = 255;
931 		else{
932 			fd = MUL(sa, ma, t);
933 			if(op != DatopS)
934 				fd = 255-fd;
935 		}
936 
937 		if(grey){
938 			*bdst.grey = MUL(fs, *bsrc.grey, s)+MUL(fd, *bdst.grey, t);
939 			bsrc.grey += bsrc.delta;
940 			bdst.grey += bdst.delta;
941 		}else{
942 			if(q){
943 				*bdst.rgba = MUL0123(fs, *bsrc.rgba, s, t)+MUL0123(fd, *bdst.rgba, u, v);
944 				bsrc.rgba++;
945 				bdst.rgba++;
946 				bsrc.alpha += sadelta;
947 				bmask.alpha += bmask.delta;
948 				bdst.alpha += bdst.delta;
949 				continue;
950 			}
951 			*bdst.red = MUL(fs, *bsrc.red, s)+MUL(fd, *bdst.red, t);
952 			*bdst.grn = MUL(fs, *bsrc.grn, s)+MUL(fd, *bdst.grn, t);
953 			*bdst.blu = MUL(fs, *bsrc.blu, s)+MUL(fd, *bdst.blu, t);
954 			bsrc.red += bsrc.delta;
955 			bsrc.blu += bsrc.delta;
956 			bsrc.grn += bsrc.delta;
957 			bdst.red += bdst.delta;
958 			bdst.blu += bdst.delta;
959 			bdst.grn += bdst.delta;
960 		}
961 		if(bdst.alpha != &ones){
962 			*bdst.alpha = MUL(fs, sa, s)+MUL(fd, da, t);
963 			bdst.alpha += bdst.delta;
964 		}
965 		bmask.alpha += bmask.delta;
966 		bsrc.alpha += sadelta;
967 	}
968 	return obdst;
969 }
970 
971 static Buffer
alphacalc5(Buffer bdst,Buffer b1,Buffer b2,int dx,int grey,int op)972 alphacalc5(Buffer bdst, Buffer b1, Buffer b2, int dx, int grey, int op)
973 {
974 	USED(dx);
975 	USED(grey);
976 	USED(op);
977 	USED(b1);
978 	USED(b2);
979 	return bdst;
980 }
981 
982 static Buffer
alphacalc11(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int grey,int op)983 alphacalc11(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
984 {
985 	Buffer obdst;
986 	int fd, sadelta;
987 	int i, sa, ma, q;
988 	ulong s, t, u, v;
989 
990 	USED(op);
991 	obdst = bdst;
992 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
993 	q = bsrc.delta == 4 && bdst.delta == 4;
994 
995 	for(i=0; i<dx; i++){
996 		sa = *bsrc.alpha;
997 		ma = *bmask.alpha;
998 		fd = 255-MUL(sa, ma, t);
999 
1000 		if(grey){
1001 			*bdst.grey = MUL(ma, *bsrc.grey, s)+MUL(fd, *bdst.grey, t);
1002 			bsrc.grey += bsrc.delta;
1003 			bdst.grey += bdst.delta;
1004 		}else{
1005 			if(q){
1006 				*bdst.rgba = MUL0123(ma, *bsrc.rgba, s, t)+MUL0123(fd, *bdst.rgba, u, v);
1007 				bsrc.rgba++;
1008 				bdst.rgba++;
1009 				bsrc.alpha += sadelta;
1010 				bmask.alpha += bmask.delta;
1011 				continue;
1012 			}
1013 			*bdst.red = MUL(ma, *bsrc.red, s)+MUL(fd, *bdst.red, t);
1014 			*bdst.grn = MUL(ma, *bsrc.grn, s)+MUL(fd, *bdst.grn, t);
1015 			*bdst.blu = MUL(ma, *bsrc.blu, s)+MUL(fd, *bdst.blu, t);
1016 			bsrc.red += bsrc.delta;
1017 			bsrc.blu += bsrc.delta;
1018 			bsrc.grn += bsrc.delta;
1019 			bdst.red += bdst.delta;
1020 			bdst.blu += bdst.delta;
1021 			bdst.grn += bdst.delta;
1022 		}
1023 		if(bdst.alpha != &ones){
1024 			*bdst.alpha = MUL(ma, sa, s)+MUL(fd, *bdst.alpha, t);
1025 			bdst.alpha += bdst.delta;
1026 		}
1027 		bmask.alpha += bmask.delta;
1028 		bsrc.alpha += sadelta;
1029 	}
1030 	return obdst;
1031 }
1032 
1033 /*
1034 not used yet
1035 source and mask alpha 1
1036 static Buffer
1037 alphacalcS0(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1038 {
1039 	Buffer obdst;
1040 	int i;
1041 
1042 	USED(op);
1043 	obdst = bdst;
1044 	if(bsrc.delta == bdst.delta){
1045 		memmove(bdst.rgba, bsrc.rgba, dx*bdst.delta);
1046 		return obdst;
1047 	}
1048 	for(i=0; i<dx; i++){
1049 		if(grey){
1050 			*bdst.grey = *bsrc.grey;
1051 			bsrc.grey += bsrc.delta;
1052 			bdst.grey += bdst.delta;
1053 		}else{
1054 			*bdst.red = *bsrc.red;
1055 			*bdst.grn = *bsrc.grn;
1056 			*bdst.blu = *bsrc.blu;
1057 			bsrc.red += bsrc.delta;
1058 			bsrc.blu += bsrc.delta;
1059 			bsrc.grn += bsrc.delta;
1060 			bdst.red += bdst.delta;
1061 			bdst.blu += bdst.delta;
1062 			bdst.grn += bdst.delta;
1063 		}
1064 		if(bdst.alpha != &ones){
1065 			*bdst.alpha = 255;
1066 			bdst.alpha += bdst.delta;
1067 		}
1068 	}
1069 	return obdst;
1070 }
1071 */
1072 
1073 /* source alpha 1 */
1074 static Buffer
alphacalcS(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int grey,int op)1075 alphacalcS(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1076 {
1077 	Buffer obdst;
1078 	int fd;
1079 	int i, ma;
1080 	ulong s, t;
1081 
1082 	USED(op);
1083 	obdst = bdst;
1084 
1085 	for(i=0; i<dx; i++){
1086 		ma = *bmask.alpha;
1087 		fd = 255-ma;
1088 
1089 		if(grey){
1090 			*bdst.grey = MUL(ma, *bsrc.grey, s)+MUL(fd, *bdst.grey, t);
1091 			bsrc.grey += bsrc.delta;
1092 			bdst.grey += bdst.delta;
1093 		}else{
1094 			*bdst.red = MUL(ma, *bsrc.red, s)+MUL(fd, *bdst.red, t);
1095 			*bdst.grn = MUL(ma, *bsrc.grn, s)+MUL(fd, *bdst.grn, t);
1096 			*bdst.blu = MUL(ma, *bsrc.blu, s)+MUL(fd, *bdst.blu, t);
1097 			bsrc.red += bsrc.delta;
1098 			bsrc.blu += bsrc.delta;
1099 			bsrc.grn += bsrc.delta;
1100 			bdst.red += bdst.delta;
1101 			bdst.blu += bdst.delta;
1102 			bdst.grn += bdst.delta;
1103 		}
1104 		if(bdst.alpha != &ones){
1105 			*bdst.alpha = ma+MUL(fd, *bdst.alpha, t);
1106 			bdst.alpha += bdst.delta;
1107 		}
1108 		bmask.alpha += bmask.delta;
1109 	}
1110 	return obdst;
1111 }
1112 
1113 static Buffer
boolcalc14(Buffer bdst,Buffer b1,Buffer bmask,int dx,int grey,int op)1114 boolcalc14(Buffer bdst, Buffer b1, Buffer bmask, int dx, int grey, int op)
1115 {
1116 	Buffer obdst;
1117 	int i, ma, zero;
1118 
1119 	USED(b1);
1120 
1121 	obdst = bdst;
1122 
1123 	for(i=0; i<dx; i++){
1124 		ma = *bmask.alpha;
1125 		zero = ma ? op == DoutS : op == DinS;
1126 
1127 		if(grey){
1128 			if(zero)
1129 				*bdst.grey = 0;
1130 			bdst.grey += bdst.delta;
1131 		}else{
1132 			if(zero)
1133 				*bdst.red = *bdst.grn = *bdst.blu = 0;
1134 			bdst.red += bdst.delta;
1135 			bdst.blu += bdst.delta;
1136 			bdst.grn += bdst.delta;
1137 		}
1138 		bmask.alpha += bmask.delta;
1139 		if(bdst.alpha != &ones){
1140 			if(zero)
1141 				*bdst.alpha = 0;
1142 			bdst.alpha += bdst.delta;
1143 		}
1144 	}
1145 	return obdst;
1146 }
1147 
1148 static Buffer
boolcalc236789(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int grey,int op)1149 boolcalc236789(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1150 {
1151 	Buffer obdst;
1152 	int fs, fd;
1153 	int i, ma, da, zero;
1154 	ulong s, t;
1155 
1156 	obdst = bdst;
1157 	zero = !(op&1);
1158 
1159 	for(i=0; i<dx; i++){
1160 		ma = *bmask.alpha;
1161 		da = *bdst.alpha;
1162 		fs = da;
1163 		if(op&2)
1164 			fs = 255-da;
1165 		fd = 0;
1166 		if(op&4)
1167 			fd = 255;
1168 
1169 		if(grey){
1170 			if(ma)
1171 				*bdst.grey = MUL(fs, *bsrc.grey, s)+MUL(fd, *bdst.grey, t);
1172 			else if(zero)
1173 				*bdst.grey = 0;
1174 			bsrc.grey += bsrc.delta;
1175 			bdst.grey += bdst.delta;
1176 		}else{
1177 			if(ma){
1178 				*bdst.red = MUL(fs, *bsrc.red, s)+MUL(fd, *bdst.red, t);
1179 				*bdst.grn = MUL(fs, *bsrc.grn, s)+MUL(fd, *bdst.grn, t);
1180 				*bdst.blu = MUL(fs, *bsrc.blu, s)+MUL(fd, *bdst.blu, t);
1181 			}
1182 			else if(zero)
1183 				*bdst.red = *bdst.grn = *bdst.blu = 0;
1184 			bsrc.red += bsrc.delta;
1185 			bsrc.blu += bsrc.delta;
1186 			bsrc.grn += bsrc.delta;
1187 			bdst.red += bdst.delta;
1188 			bdst.blu += bdst.delta;
1189 			bdst.grn += bdst.delta;
1190 		}
1191 		bmask.alpha += bmask.delta;
1192 		if(bdst.alpha != &ones){
1193 			if(ma)
1194 				*bdst.alpha = fs+MUL(fd, da, t);
1195 			else if(zero)
1196 				*bdst.alpha = 0;
1197 			bdst.alpha += bdst.delta;
1198 		}
1199 	}
1200 	return obdst;
1201 }
1202 
1203 static Buffer
boolcalc1011(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int grey,int op)1204 boolcalc1011(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1205 {
1206 	Buffer obdst;
1207 	int i, ma, zero;
1208 
1209 	obdst = bdst;
1210 	zero = !(op&1);
1211 
1212 	for(i=0; i<dx; i++){
1213 		ma = *bmask.alpha;
1214 
1215 		if(grey){
1216 			if(ma)
1217 				*bdst.grey = *bsrc.grey;
1218 			else if(zero)
1219 				*bdst.grey = 0;
1220 			bsrc.grey += bsrc.delta;
1221 			bdst.grey += bdst.delta;
1222 		}else{
1223 			if(ma){
1224 				*bdst.red = *bsrc.red;
1225 				*bdst.grn = *bsrc.grn;
1226 				*bdst.blu = *bsrc.blu;
1227 			}
1228 			else if(zero)
1229 				*bdst.red = *bdst.grn = *bdst.blu = 0;
1230 			bsrc.red += bsrc.delta;
1231 			bsrc.blu += bsrc.delta;
1232 			bsrc.grn += bsrc.delta;
1233 			bdst.red += bdst.delta;
1234 			bdst.blu += bdst.delta;
1235 			bdst.grn += bdst.delta;
1236 		}
1237 		bmask.alpha += bmask.delta;
1238 		if(bdst.alpha != &ones){
1239 			if(ma)
1240 				*bdst.alpha = 255;
1241 			else if(zero)
1242 				*bdst.alpha = 0;
1243 			bdst.alpha += bdst.delta;
1244 		}
1245 	}
1246 	return obdst;
1247 }
1248 /*
1249  * Replicated cached scan line read.  Call the function listed in the Param,
1250  * but cache the result so that for replicated images we only do the work once.
1251  */
1252 static Buffer
replread(Param * p,uchar * s,int y)1253 replread(Param *p, uchar *s, int y)
1254 {
1255 	Buffer *b;
1256 
1257 	USED(s);
1258 	b = &p->bcache[y];
1259 	if((p->bfilled & (1<<y)) == 0){
1260 		p->bfilled |= 1<<y;
1261 		*b = p->replcall(p, p->bufbase+y*p->bufdelta, y);
1262 	}
1263 	return *b;
1264 }
1265 
1266 /*
1267  * Alpha reading function that simply relabels the grey pointer.
1268  */
1269 static Buffer
greymaskread(Param * p,uchar * buf,int y)1270 greymaskread(Param *p, uchar *buf, int y)
1271 {
1272 	Buffer b;
1273 
1274 	b = p->greymaskcall(p, buf, y);
1275 	b.alpha = b.grey;
1276 	return b;
1277 }
1278 
1279 #define DBG if(0)
1280 static Buffer
readnbit(Param * p,uchar * buf,int y)1281 readnbit(Param *p, uchar *buf, int y)
1282 {
1283 	Buffer b;
1284 	Memimage *img;
1285 	uchar *repl, *r, *w, *ow, bits;
1286 	int i, n, sh, depth, x, dx, npack, nbits;
1287 
1288 	b.rgba = (ulong*)buf;
1289 	b.grey = w = buf;
1290 	b.red = b.blu = b.grn = w;
1291 	b.alpha = &ones;
1292 	b.delta = 1;
1293 
1294 	dx = p->dx;
1295 	img = p->img;
1296 	depth = img->depth;
1297 	repl = &replbit[depth][0];
1298 	npack = 8/depth;
1299 	sh = 8-depth;
1300 
1301 	/* copy from p->r.min.x until end of repl rectangle */
1302 	x = p->r.min.x;
1303 	n = dx;
1304 	if(n > p->img->r.max.x - x)
1305 		n = p->img->r.max.x - x;
1306 
1307 	r = p->bytermin + y*p->bwidth;
1308 DBG print("readnbit dx %d %p=%p+%d*%d, *r=%d fetch %d ", dx, r, p->bytermin, y, p->bwidth, *r, n);
1309 	bits = *r++;
1310 	nbits = 8;
1311 	if(i=x&(npack-1)){
1312 DBG print("throwaway %d...", i);
1313 		bits <<= depth*i;
1314 		nbits -= depth*i;
1315 	}
1316 	for(i=0; i<n; i++){
1317 		if(nbits == 0){
1318 DBG print("(%.2ux)...", *r);
1319 			bits = *r++;
1320 			nbits = 8;
1321 		}
1322 		*w++ = repl[bits>>sh];
1323 DBG print("bit %x...", repl[bits>>sh]);
1324 		bits <<= depth;
1325 		nbits -= depth;
1326 	}
1327 	dx -= n;
1328 	if(dx == 0)
1329 		return b;
1330 
1331 	assert(x+i == p->img->r.max.x);
1332 
1333 	/* copy from beginning of repl rectangle until where we were before. */
1334 	x = p->img->r.min.x;
1335 	n = dx;
1336 	if(n > p->r.min.x - x)
1337 		n = p->r.min.x - x;
1338 
1339 	r = p->bytey0s + y*p->bwidth;
1340 DBG print("x=%d r=%p...", x, r);
1341 	bits = *r++;
1342 	nbits = 8;
1343 	if(i=x&(npack-1)){
1344 		bits <<= depth*i;
1345 		nbits -= depth*i;
1346 	}
1347 DBG print("nbits=%d...", nbits);
1348 	for(i=0; i<n; i++){
1349 		if(nbits == 0){
1350 			bits = *r++;
1351 			nbits = 8;
1352 		}
1353 		*w++ = repl[bits>>sh];
1354 DBG print("bit %x...", repl[bits>>sh]);
1355 		bits <<= depth;
1356 		nbits -= depth;
1357 DBG print("bits %x nbits %d...", bits, nbits);
1358 	}
1359 	dx -= n;
1360 	if(dx == 0)
1361 		return b;
1362 
1363 	assert(dx > 0);
1364 	/* now we have exactly one full scan line: just replicate the buffer itself until we are done */
1365 	ow = buf;
1366 	while(dx--)
1367 		*w++ = *ow++;
1368 
1369 	return b;
1370 }
1371 #undef DBG
1372 
1373 #define DBG if(0)
1374 static void
writenbit(Param * p,uchar * w,Buffer src)1375 writenbit(Param *p, uchar *w, Buffer src)
1376 {
1377 	uchar *r;
1378 	ulong bits;
1379 	int i, sh, depth, npack, nbits, x, ex;
1380 
1381 	assert(src.grey != nil && src.delta == 1);
1382 
1383 	x = p->r.min.x;
1384 	ex = x+p->dx;
1385 	depth = p->img->depth;
1386 	npack = 8/depth;
1387 
1388 	i=x&(npack-1);
1389 	bits = i ? (*w >> (8-depth*i)) : 0;
1390 	nbits = depth*i;
1391 	sh = 8-depth;
1392 	r = src.grey;
1393 
1394 	for(; x<ex; x++){
1395 		bits <<= depth;
1396 DBG print(" %x", *r);
1397 		bits |= (*r++ >> sh);
1398 		nbits += depth;
1399 		if(nbits == 8){
1400 			*w++ = bits;
1401 			nbits = 0;
1402 		}
1403 	}
1404 
1405 	if(nbits){
1406 		sh = 8-nbits;
1407 		bits <<= sh;
1408 		bits |= *w & ((1<<sh)-1);
1409 		*w = bits;
1410 	}
1411 DBG print("\n");
1412 	return;
1413 }
1414 #undef DBG
1415 
1416 static Buffer
readcmap(Param * p,uchar * buf,int y)1417 readcmap(Param *p, uchar *buf, int y)
1418 {
1419 	Buffer b;
1420 	int a, convgrey, copyalpha, dx, i, m;
1421 	uchar *q, *cmap, *begin, *end, *r, *w;
1422 
1423 	begin = p->bytey0s + y*p->bwidth;
1424 	r = p->bytermin + y*p->bwidth;
1425 	end = p->bytey0e + y*p->bwidth;
1426 	cmap = p->img->cmap->cmap2rgb;
1427 	convgrey = p->convgrey;
1428 	copyalpha = (p->img->flags&Falpha) ? 1 : 0;
1429 
1430 	w = buf;
1431 	dx = p->dx;
1432 	if(copyalpha){
1433 		b.alpha = buf++;
1434 		a = p->img->shift[CAlpha]/8;
1435 		m = p->img->shift[CMap]/8;
1436 		for(i=0; i<dx; i++){
1437 			*w++ = r[a];
1438 			q = cmap+r[m]*3;
1439 			r += 2;
1440 			if(r == end)
1441 				r = begin;
1442 			if(convgrey){
1443 				*w++ = RGB2K(q[0], q[1], q[2]);
1444 			}else{
1445 				*w++ = q[2];	/* blue */
1446 				*w++ = q[1];	/* green */
1447 				*w++ = q[0];	/* red */
1448 			}
1449 		}
1450 	}else{
1451 		b.alpha = &ones;
1452 		for(i=0; i<dx; i++){
1453 			q = cmap+*r++*3;
1454 			if(r == end)
1455 				r = begin;
1456 			if(convgrey){
1457 				*w++ = RGB2K(q[0], q[1], q[2]);
1458 			}else{
1459 				*w++ = q[2];	/* blue */
1460 				*w++ = q[1];	/* green */
1461 				*w++ = q[0];	/* red */
1462 			}
1463 		}
1464 	}
1465 
1466 	b.rgba = (ulong*)(buf-copyalpha);
1467 
1468 	if(convgrey){
1469 		b.grey = buf;
1470 		b.red = b.blu = b.grn = buf;
1471 		b.delta = 1+copyalpha;
1472 	}else{
1473 		b.blu = buf;
1474 		b.grn = buf+1;
1475 		b.red = buf+2;
1476 		b.grey = nil;
1477 		b.delta = 3+copyalpha;
1478 	}
1479 	return b;
1480 }
1481 
1482 static void
writecmap(Param * p,uchar * w,Buffer src)1483 writecmap(Param *p, uchar *w, Buffer src)
1484 {
1485 	uchar *cmap, *red, *grn, *blu;
1486 	int i, dx, delta;
1487 
1488 	cmap = p->img->cmap->rgb2cmap;
1489 
1490 	delta = src.delta;
1491 	red= src.red;
1492 	grn = src.grn;
1493 	blu = src.blu;
1494 
1495 	dx = p->dx;
1496 	for(i=0; i<dx; i++, red+=delta, grn+=delta, blu+=delta)
1497 		*w++ = cmap[(*red>>4)*256+(*grn>>4)*16+(*blu>>4)];
1498 }
1499 
1500 #define DBG if(0)
1501 static Buffer
readbyte(Param * p,uchar * buf,int y)1502 readbyte(Param *p, uchar *buf, int y)
1503 {
1504 	Buffer b;
1505 	Memimage *img;
1506 	int dx, isgrey, convgrey, alphaonly, copyalpha, i, nb;
1507 	uchar *begin, *end, *r, *w, *rrepl, *grepl, *brepl, *arepl, *krepl;
1508 	uchar ured, ugrn, ublu;
1509 	ulong u;
1510 
1511 	img = p->img;
1512 	begin = p->bytey0s + y*p->bwidth;
1513 	r = p->bytermin + y*p->bwidth;
1514 	end = p->bytey0e + y*p->bwidth;
1515 
1516 	w = buf;
1517 	dx = p->dx;
1518 	nb = img->depth/8;
1519 
1520 	convgrey = p->convgrey;	/* convert rgb to grey */
1521 	isgrey = img->flags&Fgrey;
1522 	alphaonly = p->alphaonly;
1523 	copyalpha = (img->flags&Falpha) ? 1 : 0;
1524 
1525 DBG print("copyalpha %d alphaonly %d convgrey %d isgrey %d\n", copyalpha, alphaonly, convgrey, isgrey);
1526 	/* if we can, avoid processing everything */
1527 	if(!(img->flags&Frepl) && !convgrey && (img->flags&Fbytes)){
1528 		memset(&b, 0, sizeof b);
1529 		if(p->needbuf){
1530 			memmove(buf, r, dx*nb);
1531 			r = buf;
1532 		}
1533 		b.rgba = (ulong*)r;
1534 		if(copyalpha)
1535 			b.alpha = r+img->shift[CAlpha]/8;
1536 		else
1537 			b.alpha = &ones;
1538 		if(isgrey){
1539 			b.grey = r+img->shift[CGrey]/8;
1540 			b.red = b.grn = b.blu = b.grey;
1541 		}else{
1542 			b.red = r+img->shift[CRed]/8;
1543 			b.grn = r+img->shift[CGreen]/8;
1544 			b.blu = r+img->shift[CBlue]/8;
1545 		}
1546 		b.delta = nb;
1547 		return b;
1548 	}
1549 
1550 DBG print("2\n");
1551 	rrepl = replbit[img->nbits[CRed]];
1552 	grepl = replbit[img->nbits[CGreen]];
1553 	brepl = replbit[img->nbits[CBlue]];
1554 	arepl = replbit[img->nbits[CAlpha]];
1555 	krepl = replbit[img->nbits[CGrey]];
1556 
1557 	for(i=0; i<dx; i++){
1558 		u = r[0] | (r[1]<<8) | (r[2]<<16) | (r[3]<<24);
1559 		if(copyalpha) {
1560 			*w++ = arepl[(u>>img->shift[CAlpha]) & img->mask[CAlpha]];
1561 DBG print("a %x\n", w[-1]);
1562 		}
1563 
1564 		if(isgrey)
1565 			*w++ = krepl[(u >> img->shift[CGrey]) & img->mask[CGrey]];
1566 		else if(!alphaonly){
1567 			ured = rrepl[(u >> img->shift[CRed]) & img->mask[CRed]];
1568 			ugrn = grepl[(u >> img->shift[CGreen]) & img->mask[CGreen]];
1569 			ublu = brepl[(u >> img->shift[CBlue]) & img->mask[CBlue]];
1570 			if(convgrey){
1571 DBG print("g %x %x %x\n", ured, ugrn, ublu);
1572 				*w++ = RGB2K(ured, ugrn, ublu);
1573 DBG print("%x\n", w[-1]);
1574 			}else{
1575 				*w++ = brepl[(u >> img->shift[CBlue]) & img->mask[CBlue]];
1576 				*w++ = grepl[(u >> img->shift[CGreen]) & img->mask[CGreen]];
1577 				*w++ = rrepl[(u >> img->shift[CRed]) & img->mask[CRed]];
1578 			}
1579 		}
1580 		r += nb;
1581 		if(r == end)
1582 			r = begin;
1583 	}
1584 
1585 	b.alpha = copyalpha ? buf : &ones;
1586 	b.rgba = (ulong*)buf;
1587 	if(alphaonly){
1588 		b.red = b.grn = b.blu = b.grey = nil;
1589 		if(!copyalpha)
1590 			b.rgba = nil;
1591 		b.delta = 1;
1592 	}else if(isgrey || convgrey){
1593 		b.grey = buf+copyalpha;
1594 		b.red = b.grn = b.blu = buf+copyalpha;
1595 		b.delta = copyalpha+1;
1596 DBG print("alpha %x grey %x\n", b.alpha ? *b.alpha : 0xFF, *b.grey);
1597 	}else{
1598 		b.blu = buf+copyalpha;
1599 		b.grn = buf+copyalpha+1;
1600 		b.grey = nil;
1601 		b.red = buf+copyalpha+2;
1602 		b.delta = copyalpha+3;
1603 	}
1604 	return b;
1605 }
1606 #undef DBG
1607 
1608 #define DBG if(0)
1609 static void
writebyte(Param * p,uchar * w,Buffer src)1610 writebyte(Param *p, uchar *w, Buffer src)
1611 {
1612 	Memimage *img;
1613 	int i, isalpha, isgrey, nb, delta, dx, adelta;
1614 	uchar ff, *red, *grn, *blu, *grey, *alpha;
1615 	ulong u, mask;
1616 
1617 	img = p->img;
1618 
1619 	red = src.red;
1620 	grn = src.grn;
1621 	blu = src.blu;
1622 	alpha = src.alpha;
1623 	delta = src.delta;
1624 	grey = src.grey;
1625 	dx = p->dx;
1626 
1627 	nb = img->depth/8;
1628 	mask = (nb==4) ? 0 : ~((1<<img->depth)-1);
1629 
1630 	isalpha = img->flags&Falpha;
1631 	isgrey = img->flags&Fgrey;
1632 	adelta = src.delta;
1633 
1634 	if(isalpha && (alpha == nil || alpha == &ones)){
1635 		ff = 0xFF;
1636 		alpha = &ff;
1637 		adelta = 0;
1638 	}
1639 
1640 	for(i=0; i<dx; i++){
1641 		u = w[0] | (w[1]<<8) | (w[2]<<16) | (w[3]<<24);
1642 DBG print("u %.8lux...", u);
1643 		u &= mask;
1644 DBG print("&mask %.8lux...", u);
1645 		if(isgrey){
1646 			u |= ((*grey >> (8-img->nbits[CGrey])) & img->mask[CGrey]) << img->shift[CGrey];
1647 DBG print("|grey %.8lux...", u);
1648 			grey += delta;
1649 		}else{
1650 			u |= ((*red >> (8-img->nbits[CRed])) & img->mask[CRed]) << img->shift[CRed];
1651 			u |= ((*grn >> (8-img->nbits[CGreen])) & img->mask[CGreen]) << img->shift[CGreen];
1652 			u |= ((*blu >> (8-img->nbits[CBlue])) & img->mask[CBlue]) << img->shift[CBlue];
1653 			red += delta;
1654 			grn += delta;
1655 			blu += delta;
1656 DBG print("|rgb %.8lux...", u);
1657 		}
1658 
1659 		if(isalpha){
1660 			u |= ((*alpha >> (8-img->nbits[CAlpha])) & img->mask[CAlpha]) << img->shift[CAlpha];
1661 			alpha += adelta;
1662 DBG print("|alpha %.8lux...", u);
1663 		}
1664 
1665 		w[0] = u;
1666 		w[1] = u>>8;
1667 		w[2] = u>>16;
1668 		w[3] = u>>24;
1669 		w += nb;
1670 	}
1671 }
1672 #undef DBG
1673 
1674 static Readfn*
readfn(Memimage * img)1675 readfn(Memimage *img)
1676 {
1677 	if(img->depth < 8)
1678 		return readnbit;
1679 	if(img->nbits[CMap] == 8)
1680 		return readcmap;
1681 	return readbyte;
1682 }
1683 
1684 static Readfn*
readalphafn(Memimage * m)1685 readalphafn(Memimage *m)
1686 {
1687 	USED(m);
1688 	return readbyte;
1689 }
1690 
1691 static Writefn*
writefn(Memimage * img)1692 writefn(Memimage *img)
1693 {
1694 	if(img->depth < 8)
1695 		return writenbit;
1696 	if(img->chan == CMAP8)
1697 		return writecmap;
1698 	return writebyte;
1699 }
1700 
1701 static void
nullwrite(Param * p,uchar * s,Buffer b)1702 nullwrite(Param *p, uchar *s, Buffer b)
1703 {
1704 	USED(p);
1705 	USED(s);
1706 	USED(b);
1707 }
1708 
1709 static Buffer
readptr(Param * p,uchar * s,int y)1710 readptr(Param *p, uchar *s, int y)
1711 {
1712 	Buffer b;
1713 	uchar *q;
1714 
1715 	USED(s);
1716 	q = p->bytermin + y*p->bwidth;
1717 	b.red = q;	/* ptr to data */
1718 	b.grn = b.blu = b.grey = b.alpha = nil;
1719 	b.rgba = (ulong*)q;
1720 	b.delta = p->img->depth/8;
1721 	return b;
1722 }
1723 
1724 static Buffer
boolmemmove(Buffer bdst,Buffer bsrc,Buffer b1,int dx,int i,int o)1725 boolmemmove(Buffer bdst, Buffer bsrc, Buffer b1, int dx, int i, int o)
1726 {
1727 	USED(i);
1728 	USED(o);
1729 	USED(b1);
1730 	USED(bsrc);
1731 	memmove(bdst.red, bsrc.red, dx*bdst.delta);
1732 	return bdst;
1733 }
1734 
1735 static Buffer
boolcopy8(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int i,int o)1736 boolcopy8(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1737 {
1738 	uchar *m, *r, *w, *ew;
1739 
1740 	USED(i);
1741 	USED(o);
1742 	m = bmask.grey;
1743 	w = bdst.red;
1744 	r = bsrc.red;
1745 	ew = w+dx;
1746 	for(; w < ew; w++,r++)
1747 		if(*m++)
1748 			*w = *r;
1749 	return bdst;	/* not used */
1750 }
1751 
1752 static Buffer
boolcopy16(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int i,int o)1753 boolcopy16(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1754 {
1755 	uchar *m;
1756 	ushort *r, *w, *ew;
1757 
1758 	USED(i);
1759 	USED(o);
1760 	m = bmask.grey;
1761 	w = (ushort*)bdst.red;
1762 	r = (ushort*)bsrc.red;
1763 	ew = w+dx;
1764 	for(; w < ew; w++,r++)
1765 		if(*m++)
1766 			*w = *r;
1767 	return bdst;	/* not used */
1768 }
1769 
1770 static Buffer
boolcopy24(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int i,int o)1771 boolcopy24(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1772 {
1773 	uchar *m;
1774 	uchar *r, *w, *ew;
1775 
1776 	USED(i);
1777 	USED(o);
1778 	m = bmask.grey;
1779 	w = bdst.red;
1780 	r = bsrc.red;
1781 	ew = w+dx*3;
1782 	while(w < ew){
1783 		if(*m++){
1784 			*w++ = *r++;
1785 			*w++ = *r++;
1786 			*w++ = *r++;
1787 		}else{
1788 			w += 3;
1789 			r += 3;
1790 		}
1791 	}
1792 	return bdst;	/* not used */
1793 }
1794 
1795 static Buffer
boolcopy32(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int i,int o)1796 boolcopy32(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1797 {
1798 	uchar *m;
1799 	ulong *r, *w, *ew;
1800 
1801 	USED(i);
1802 	USED(o);
1803 	m = bmask.grey;
1804 	w = (ulong*)bdst.red;
1805 	r = (ulong*)bsrc.red;
1806 	ew = w+dx;
1807 	for(; w < ew; w++,r++)
1808 		if(*m++)
1809 			*w = *r;
1810 	return bdst;	/* not used */
1811 }
1812 
1813 static Buffer
genconv(Param * p,uchar * buf,int y)1814 genconv(Param *p, uchar *buf, int y)
1815 {
1816 	Buffer b;
1817 	int nb;
1818 	uchar *r, *w, *ew;
1819 
1820 	/* read from source into RGB format in convbuf */
1821 	b = p->convreadcall(p, p->convbuf, y);
1822 
1823 	/* write RGB format into dst format in buf */
1824 	p->convwritecall(p->convdpar, buf, b);
1825 
1826 	if(p->convdx){
1827 		nb = p->convdpar->img->depth/8;
1828 		r = buf;
1829 		w = buf+nb*p->dx;
1830 		ew = buf+nb*p->convdx;
1831 		while(w<ew)
1832 			*w++ = *r++;
1833 	}
1834 
1835 	b.red = buf;
1836 	b.blu = b.grn = b.grey = b.alpha = nil;
1837 	b.rgba = (ulong*)buf;
1838 	b.delta = 0;
1839 
1840 	return b;
1841 }
1842 
1843 static Readfn*
convfn(Memimage * dst,Param * dpar,Memimage * src,Param * spar,int * ndrawbuf)1844 convfn(Memimage *dst, Param *dpar, Memimage *src, Param *spar, int *ndrawbuf)
1845 {
1846 	if(dst->chan == src->chan && !(src->flags&Frepl)){
1847 //if(drawdebug) iprint("readptr...");
1848 		return readptr;
1849 	}
1850 
1851 	if(dst->chan==CMAP8 && (src->chan==GREY1||src->chan==GREY2||src->chan==GREY4)){
1852 		/* cheat because we know the replicated value is exactly the color map entry. */
1853 //if(drawdebug) iprint("Readnbit...");
1854 		return readnbit;
1855 	}
1856 
1857 	spar->convreadcall = readfn(src);
1858 	spar->convwritecall = writefn(dst);
1859 	spar->convdpar = dpar;
1860 
1861 	/* allocate a conversion buffer */
1862 	spar->convbufoff = *ndrawbuf;
1863 	*ndrawbuf += spar->dx*4;
1864 
1865 	if(spar->dx > Dx(spar->img->r)){
1866 		spar->convdx = spar->dx;
1867 		spar->dx = Dx(spar->img->r);
1868 	}
1869 
1870 //if(drawdebug) iprint("genconv...");
1871 	return genconv;
1872 }
1873 
1874 static ulong
pixelbits(Memimage * i,Point pt)1875 pixelbits(Memimage *i, Point pt)
1876 {
1877 	uchar *p;
1878 	ulong val;
1879 	int off, bpp, npack;
1880 
1881 	val = 0;
1882 	p = byteaddr(i, pt);
1883 	switch(bpp=i->depth){
1884 	case 1:
1885 	case 2:
1886 	case 4:
1887 		npack = 8/bpp;
1888 		off = pt.x%npack;
1889 		val = p[0] >> bpp*(npack-1-off);
1890 		val &= (1<<bpp)-1;
1891 		break;
1892 	case 8:
1893 		val = p[0];
1894 		break;
1895 	case 16:
1896 		val = p[0]|(p[1]<<8);
1897 		break;
1898 	case 24:
1899 		val = p[0]|(p[1]<<8)|(p[2]<<16);
1900 		break;
1901 	case 32:
1902 		val = p[0]|(p[1]<<8)|(p[2]<<16)|(p[3]<<24);
1903 		break;
1904 	}
1905 	while(bpp<32){
1906 		val |= val<<bpp;
1907 		bpp *= 2;
1908 	}
1909 	return val;
1910 }
1911 
1912 static Calcfn*
boolcopyfn(Memimage * img,Memimage * mask)1913 boolcopyfn(Memimage *img, Memimage *mask)
1914 {
1915 	if(mask->flags&Frepl && Dx(mask->r)==1 && Dy(mask->r)==1 && pixelbits(mask, mask->r.min)==~0)
1916 		return boolmemmove;
1917 
1918 	switch(img->depth){
1919 	case 8:
1920 		return boolcopy8;
1921 	case 16:
1922 		return boolcopy16;
1923 	case 24:
1924 		return boolcopy24;
1925 	case 32:
1926 		return boolcopy32;
1927 	default:
1928 		assert(0 /* boolcopyfn */);
1929 	}
1930 	return nil;
1931 }
1932 
1933 /*
1934  * Optimized draw for filling and scrolling; uses memset and memmove.
1935  */
1936 static void
memsetb(void * vp,uchar val,int n)1937 memsetb(void *vp, uchar val, int n)
1938 {
1939 	uchar *p, *ep;
1940 
1941 	p = vp;
1942 	ep = p+n;
1943 	while(p<ep)
1944 		*p++ = val;
1945 }
1946 
1947 static void
memsets(void * vp,ushort val,int n)1948 memsets(void *vp, ushort val, int n)
1949 {
1950 	ushort *p, *ep;
1951 
1952 	p = vp;
1953 	ep = p+n;
1954 	while(p<ep)
1955 		*p++ = val;
1956 }
1957 
1958 static void
memsetl(void * vp,ulong val,int n)1959 memsetl(void *vp, ulong val, int n)
1960 {
1961 	ulong *p, *ep;
1962 
1963 	p = vp;
1964 	ep = p+n;
1965 	while(p<ep)
1966 		*p++ = val;
1967 }
1968 
1969 static void
memset24(void * vp,ulong val,int n)1970 memset24(void *vp, ulong val, int n)
1971 {
1972 	uchar *p, *ep;
1973 	uchar a,b,c;
1974 
1975 	p = vp;
1976 	ep = p+3*n;
1977 	a = val;
1978 	b = val>>8;
1979 	c = val>>16;
1980 	while(p<ep){
1981 		*p++ = a;
1982 		*p++ = b;
1983 		*p++ = c;
1984 	}
1985 }
1986 
1987 static ulong
imgtorgba(Memimage * img,ulong val)1988 imgtorgba(Memimage *img, ulong val)
1989 {
1990 	uchar r, g, b, a;
1991 	int nb, ov, v;
1992 	ulong chan;
1993 	uchar *p;
1994 
1995 	a = 0xFF;
1996 	r = g = b = 0xAA;	/* garbage */
1997 	for(chan=img->chan; chan; chan>>=8){
1998 		nb = NBITS(chan);
1999 		ov = v = val&((1<<nb)-1);
2000 		val >>= nb;
2001 
2002 		while(nb < 8){
2003 			v |= v<<nb;
2004 			nb *= 2;
2005 		}
2006 		v >>= (nb-8);
2007 
2008 		switch(TYPE(chan)){
2009 		case CRed:
2010 			r = v;
2011 			break;
2012 		case CGreen:
2013 			g = v;
2014 			break;
2015 		case CBlue:
2016 			b = v;
2017 			break;
2018 		case CAlpha:
2019 			a = v;
2020 			break;
2021 		case CGrey:
2022 			r = g = b = v;
2023 			break;
2024 		case CMap:
2025 			p = img->cmap->cmap2rgb+3*ov;
2026 			r = *p++;
2027 			g = *p++;
2028 			b = *p;
2029 			break;
2030 		}
2031 	}
2032 	return (r<<24)|(g<<16)|(b<<8)|a;
2033 }
2034 
2035 static ulong
rgbatoimg(Memimage * img,ulong rgba)2036 rgbatoimg(Memimage *img, ulong rgba)
2037 {
2038 	ulong chan;
2039 	int d, nb;
2040 	ulong v;
2041 	uchar *p, r, g, b, a, m;
2042 
2043 	v = 0;
2044 	r = rgba>>24;
2045 	g = rgba>>16;
2046 	b = rgba>>8;
2047 	a = rgba;
2048 	d = 0;
2049 	for(chan=img->chan; chan; chan>>=8){
2050 		nb = NBITS(chan);
2051 		switch(TYPE(chan)){
2052 		case CRed:
2053 			v |= (r>>(8-nb))<<d;
2054 			break;
2055 		case CGreen:
2056 			v |= (g>>(8-nb))<<d;
2057 			break;
2058 		case CBlue:
2059 			v |= (b>>(8-nb))<<d;
2060 			break;
2061 		case CAlpha:
2062 			v |= (a>>(8-nb))<<d;
2063 			break;
2064 		case CMap:
2065 			p = img->cmap->rgb2cmap;
2066 			m = p[(r>>4)*256+(g>>4)*16+(b>>4)];
2067 			v |= (m>>(8-nb))<<d;
2068 			break;
2069 		case CGrey:
2070 			m = RGB2K(r,g,b);
2071 			v |= (m>>(8-nb))<<d;
2072 			break;
2073 		}
2074 		d += nb;
2075 	}
2076 //	print("rgba2img %.8lux = %.*lux\n", rgba, 2*d/8, v);
2077 	return v;
2078 }
2079 
2080 #define DBG if(0)
2081 static int
memoptdraw(Memdrawparam * par)2082 memoptdraw(Memdrawparam *par)
2083 {
2084 	int m, y, dy, dx, op;
2085 	ulong v;
2086 	Memimage *src;
2087 	Memimage *dst;
2088 
2089 	dx = Dx(par->r);
2090 	dy = Dy(par->r);
2091 	src = par->src;
2092 	dst = par->dst;
2093 	op = par->op;
2094 
2095 DBG print("state %lux mval %lux dd %d\n", par->state, par->mval, dst->depth);
2096 	/*
2097 	 * If we have an opaque mask and source is one opaque pixel we can convert to the
2098 	 * destination format and just replicate with memset.
2099 	 */
2100 	m = Simplesrc|Simplemask|Fullmask;
2101 	if((par->state&m)==m && (par->srgba&0xFF) == 0xFF && (op ==S || op == SoverD)){
2102 		uchar *dp, p[4];
2103 		int d, dwid, ppb, np, nb;
2104 		uchar lm, rm;
2105 
2106 DBG print("memopt, dst %p, dst->data->bdata %p\n", dst, dst->data->bdata);
2107 		dwid = dst->width*sizeof(ulong);
2108 		dp = byteaddr(dst, par->r.min);
2109 		v = par->sdval;
2110 DBG print("sdval %lud, depth %d\n", v, dst->depth);
2111 		switch(dst->depth){
2112 		case 1:
2113 		case 2:
2114 		case 4:
2115 			for(d=dst->depth; d<8; d*=2)
2116 				v |= (v<<d);
2117 			ppb = 8/dst->depth;	/* pixels per byte */
2118 			m = ppb-1;
2119 			/* left edge */
2120 			np = par->r.min.x&m;		/* no. pixels unused on left side of word */
2121 			dx -= (ppb-np);
2122 			nb = 8 - np * dst->depth;		/* no. bits used on right side of word */
2123 			lm = (1<<nb)-1;
2124 DBG print("np %d x %d nb %d lm %ux ppb %d m %ux\n", np, par->r.min.x, nb, lm, ppb, m);
2125 
2126 			/* right edge */
2127 			np = par->r.max.x&m;	/* no. pixels used on left side of word */
2128 			dx -= np;
2129 			nb = 8 - np * dst->depth;		/* no. bits unused on right side of word */
2130 			rm = ~((1<<nb)-1);
2131 DBG print("np %d x %d nb %d rm %ux ppb %d m %ux\n", np, par->r.max.x, nb, rm, ppb, m);
2132 
2133 DBG print("dx %d Dx %d\n", dx, Dx(par->r));
2134 			/* lm, rm are masks that are 1 where we should touch the bits */
2135 			if(dx < 0){	/* just one byte */
2136 				lm &= rm;
2137 				for(y=0; y<dy; y++, dp+=dwid)
2138 					*dp ^= (v ^ *dp) & lm;
2139 			}else if(dx == 0){	/* no full bytes */
2140 				if(lm)
2141 					dwid--;
2142 
2143 				for(y=0; y<dy; y++, dp+=dwid){
2144 					if(lm){
2145 DBG print("dp %p v %lux lm %ux (v ^ *dp) & lm %lux\n", dp, v, lm, (v^*dp)&lm);
2146 						*dp ^= (v ^ *dp) & lm;
2147 						dp++;
2148 					}
2149 					*dp ^= (v ^ *dp) & rm;
2150 				}
2151 			}else{		/* full bytes in middle */
2152 				dx /= ppb;
2153 				if(lm)
2154 					dwid--;
2155 				dwid -= dx;
2156 
2157 				for(y=0; y<dy; y++, dp+=dwid){
2158 					if(lm){
2159 						*dp ^= (v ^ *dp) & lm;
2160 						dp++;
2161 					}
2162 					memset(dp, v, dx);
2163 					dp += dx;
2164 					*dp ^= (v ^ *dp) & rm;
2165 				}
2166 			}
2167 			return 1;
2168 		case 8:
2169 			for(y=0; y<dy; y++, dp+=dwid)
2170 				memset(dp, v, dx);
2171 			return 1;
2172 		case 16:
2173 			p[0] = v;		/* make little endian */
2174 			p[1] = v>>8;
2175 			v = *(ushort*)p;
2176 DBG print("dp=%p; dx=%d; for(y=0; y<%d; y++, dp+=%d)\nmemsets(dp, v, dx);\n",
2177 	dp, dx, dy, dwid);
2178 			for(y=0; y<dy; y++, dp+=dwid)
2179 				memsets(dp, v, dx);
2180 			return 1;
2181 		case 24:
2182 			for(y=0; y<dy; y++, dp+=dwid)
2183 				memset24(dp, v, dx);
2184 			return 1;
2185 		case 32:
2186 			p[0] = v;		/* make little endian */
2187 			p[1] = v>>8;
2188 			p[2] = v>>16;
2189 			p[3] = v>>24;
2190 			v = *(ulong*)p;
2191 			for(y=0; y<dy; y++, dp+=dwid)
2192 				memsetl(dp, v, dx);
2193 			return 1;
2194 		default:
2195 			assert(0 /* bad dest depth in memoptdraw */);
2196 		}
2197 	}
2198 
2199 	/*
2200 	 * If no source alpha, an opaque mask, we can just copy the
2201 	 * source onto the destination.  If the channels are the same and
2202 	 * the source is not replicated, memmove suffices.
2203 	 */
2204 	m = Simplemask|Fullmask;
2205 	if((par->state&(m|Replsrc))==m && src->depth >= 8
2206 	&& src->chan == dst->chan && !(src->flags&Falpha) && (op == S || op == SoverD)){
2207 		uchar *sp, *dp;
2208 		long swid, dwid, nb;
2209 		int dir;
2210 
2211 		if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min))
2212 			dir = -1;
2213 		else
2214 			dir = 1;
2215 
2216 		swid = src->width*sizeof(ulong);
2217 		dwid = dst->width*sizeof(ulong);
2218 		sp = byteaddr(src, par->sr.min);
2219 		dp = byteaddr(dst, par->r.min);
2220 		if(dir == -1){
2221 			sp += (dy-1)*swid;
2222 			dp += (dy-1)*dwid;
2223 			swid = -swid;
2224 			dwid = -dwid;
2225 		}
2226 		nb = (dx*src->depth)/8;
2227 		for(y=0; y<dy; y++, sp+=swid, dp+=dwid)
2228 			memmove(dp, sp, nb);
2229 		return 1;
2230 	}
2231 
2232 	/*
2233 	 * If we have a 1-bit mask, 1-bit source, and 1-bit destination, and
2234 	 * they're all bit aligned, we can just use bit operators.  This happens
2235 	 * when we're manipulating boolean masks, e.g. in the arc code.
2236 	 */
2237 	if((par->state&(Simplemask|Simplesrc|Replmask|Replsrc))==0
2238 	&& dst->chan==GREY1 && src->chan==GREY1 && par->mask->chan==GREY1
2239 	&& (par->r.min.x&7)==(par->sr.min.x&7) && (par->r.min.x&7)==(par->mr.min.x&7)){
2240 		uchar *sp, *dp, *mp;
2241 		uchar lm, rm;
2242 		long swid, dwid, mwid;
2243 		int i, x, dir;
2244 
2245 		sp = byteaddr(src, par->sr.min);
2246 		dp = byteaddr(dst, par->r.min);
2247 		mp = byteaddr(par->mask, par->mr.min);
2248 		swid = src->width*sizeof(ulong);
2249 		dwid = dst->width*sizeof(ulong);
2250 		mwid = par->mask->width*sizeof(ulong);
2251 
2252 		if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min)){
2253 			dir = -1;
2254 		}else
2255 			dir = 1;
2256 
2257 		lm = 0xFF>>(par->r.min.x&7);
2258 		rm = 0xFF<<(8-(par->r.max.x&7));
2259 		dx -= (8-(par->r.min.x&7)) + (par->r.max.x&7);
2260 
2261 		if(dx < 0){	/* one byte wide */
2262 			lm &= rm;
2263 			if(dir == -1){
2264 				dp += dwid*(dy-1);
2265 				sp += swid*(dy-1);
2266 				mp += mwid*(dy-1);
2267 				dwid = -dwid;
2268 				swid = -swid;
2269 				mwid = -mwid;
2270 			}
2271 			for(y=0; y<dy; y++){
2272 				*dp ^= (*dp ^ *sp) & *mp & lm;
2273 				dp += dwid;
2274 				sp += swid;
2275 				mp += mwid;
2276 			}
2277 			return 1;
2278 		}
2279 
2280 		dx /= 8;
2281 		if(dir == 1){
2282 			i = (lm!=0)+dx+(rm!=0);
2283 			mwid -= i;
2284 			swid -= i;
2285 			dwid -= i;
2286 			for(y=0; y<dy; y++, dp+=dwid, sp+=swid, mp+=mwid){
2287 				if(lm){
2288 					*dp ^= (*dp ^ *sp++) & *mp++ & lm;
2289 					dp++;
2290 				}
2291 				for(x=0; x<dx; x++){
2292 					*dp ^= (*dp ^ *sp++) & *mp++;
2293 					dp++;
2294 				}
2295 				if(rm){
2296 					*dp ^= (*dp ^ *sp++) & *mp++ & rm;
2297 					dp++;
2298 				}
2299 			}
2300 			return 1;
2301 		}else{
2302 		/* dir == -1 */
2303 			i = (lm!=0)+dx+(rm!=0);
2304 			dp += dwid*(dy-1)+i-1;
2305 			sp += swid*(dy-1)+i-1;
2306 			mp += mwid*(dy-1)+i-1;
2307 			dwid = -dwid+i;
2308 			swid = -swid+i;
2309 			mwid = -mwid+i;
2310 			for(y=0; y<dy; y++, dp+=dwid, sp+=swid, mp+=mwid){
2311 				if(rm){
2312 					*dp ^= (*dp ^ *sp--) & *mp-- & rm;
2313 					dp--;
2314 				}
2315 				for(x=0; x<dx; x++){
2316 					*dp ^= (*dp ^ *sp--) & *mp--;
2317 					dp--;
2318 				}
2319 				if(lm){
2320 					*dp ^= (*dp ^ *sp--) & *mp-- & lm;
2321 					dp--;
2322 				}
2323 			}
2324 		}
2325 		return 1;
2326 	}
2327 	return 0;
2328 }
2329 #undef DBG
2330 
2331 /*
2332  * Boolean character drawing.
2333  * Solid opaque color through a 1-bit greyscale mask.
2334  */
2335 #define DBG if(0)
2336 static int
chardraw(Memdrawparam * par)2337 chardraw(Memdrawparam *par)
2338 {
2339 	ulong bits;
2340 	int i, ddepth, dy, dx, x, bx, ex, y, npack, bsh, depth, op;
2341 	ulong v, maskwid, dstwid;
2342 	uchar *wp, *rp, *q, *wc;
2343 	ushort *ws;
2344 	ulong *wl;
2345 	uchar sp[4];
2346 	Rectangle r, mr;
2347 	Memimage *mask, *src, *dst;
2348 
2349 if(0) if(drawdebug) iprint("chardraw? mf %lux md %d sf %lux dxs %d dys %d dd %d ddat %p sdat %p\n",
2350 		par->mask->flags, par->mask->depth, par->src->flags,
2351 		Dx(par->src->r), Dy(par->src->r), par->dst->depth, par->dst->data, par->src->data);
2352 
2353 	mask = par->mask;
2354 	src = par->src;
2355 	dst = par->dst;
2356 	r = par->r;
2357 	mr = par->mr;
2358 	op = par->op;
2359 
2360 	if((par->state&(Replsrc|Simplesrc|Replmask)) != (Replsrc|Simplesrc)
2361 	|| mask->depth != 1 || src->flags&Falpha || dst->depth<8 || dst->data==src->data
2362 	|| op != SoverD)
2363 		return 0;
2364 
2365 //if(drawdebug) iprint("chardraw...");
2366 
2367 	depth = mask->depth;
2368 	maskwid = mask->width*sizeof(ulong);
2369 	rp = byteaddr(mask, mr.min);
2370 	npack = 8/depth;
2371 	bsh = (mr.min.x % npack) * depth;
2372 
2373 	wp = byteaddr(dst, r.min);
2374 	dstwid = dst->width*sizeof(ulong);
2375 DBG print("bsh %d\n", bsh);
2376 	dy = Dy(r);
2377 	dx = Dx(r);
2378 
2379 	ddepth = dst->depth;
2380 
2381 	/*
2382 	 * for loop counts from bsh to bsh+dx
2383 	 *
2384 	 * we want the bottom bits to be the amount
2385 	 * to shift the pixels down, so for n≡0 (mod 8) we want
2386 	 * bottom bits 7.  for n≡1, 6, etc.
2387 	 * the bits come from -n-1.
2388 	 */
2389 
2390 	bx = -bsh-1;
2391 	ex = -bsh-1-dx;
2392 	SET(bits);
2393 	v = par->sdval;
2394 
2395 	/* make little endian */
2396 	sp[0] = v;
2397 	sp[1] = v>>8;
2398 	sp[2] = v>>16;
2399 	sp[3] = v>>24;
2400 
2401 //print("sp %x %x %x %x\n", sp[0], sp[1], sp[2], sp[3]);
2402 	for(y=0; y<dy; y++, rp+=maskwid, wp+=dstwid){
2403 		q = rp;
2404 		if(bsh)
2405 			bits = *q++;
2406 		switch(ddepth){
2407 		case 8:
2408 //if(drawdebug) iprint("8loop...");
2409 			wc = wp;
2410 			for(x=bx; x>ex; x--, wc++){
2411 				i = x&7;
2412 				if(i == 8-1)
2413 					bits = *q++;
2414 DBG print("bits %lux sh %d...", bits, i);
2415 				if((bits>>i)&1)
2416 					*wc = v;
2417 			}
2418 			break;
2419 		case 16:
2420 			ws = (ushort*)wp;
2421 			v = *(ushort*)sp;
2422 			for(x=bx; x>ex; x--, ws++){
2423 				i = x&7;
2424 				if(i == 8-1)
2425 					bits = *q++;
2426 DBG print("bits %lux sh %d...", bits, i);
2427 				if((bits>>i)&1)
2428 					*ws = v;
2429 			}
2430 			break;
2431 		case 24:
2432 			wc = wp;
2433 			for(x=bx; x>ex; x--, wc+=3){
2434 				i = x&7;
2435 				if(i == 8-1)
2436 					bits = *q++;
2437 DBG print("bits %lux sh %d...", bits, i);
2438 				if((bits>>i)&1){
2439 					wc[0] = sp[0];
2440 					wc[1] = sp[1];
2441 					wc[2] = sp[2];
2442 				}
2443 			}
2444 			break;
2445 		case 32:
2446 			wl = (ulong*)wp;
2447 			v = *(ulong*)sp;
2448 			for(x=bx; x>ex; x--, wl++){
2449 				i = x&7;
2450 				if(i == 8-1)
2451 					bits = *q++;
2452 DBG iprint("bits %lux sh %d...", bits, i);
2453 				if((bits>>i)&1)
2454 					*wl = v;
2455 			}
2456 			break;
2457 		}
2458 	}
2459 
2460 DBG print("\n");
2461 	return 1;
2462 }
2463 #undef DBG
2464 
2465 
2466 /*
2467  * Fill entire byte with replicated (if necessary) copy of source pixel,
2468  * assuming destination ldepth is >= source ldepth.
2469  *
2470  * This code is just plain wrong for >8bpp.
2471  *
2472 ulong
2473 membyteval(Memimage *src)
2474 {
2475 	int i, val, bpp;
2476 	uchar uc;
2477 
2478 	unloadmemimage(src, src->r, &uc, 1);
2479 	bpp = src->depth;
2480 	uc <<= (src->r.min.x&(7/src->depth))*src->depth;
2481 	uc &= ~(0xFF>>bpp);
2482 	/* pixel value is now in high part of byte. repeat throughout byte
2483 	val = uc;
2484 	for(i=bpp; i<8; i<<=1)
2485 		val |= val>>i;
2486 	return val;
2487 }
2488  *
2489  */
2490 
2491 void
memfillcolor(Memimage * i,ulong val)2492 memfillcolor(Memimage *i, ulong val)
2493 {
2494 	ulong bits;
2495 	int d, y;
2496 
2497 	if(val == DNofill)
2498 		return;
2499 
2500 	bits = rgbatoimg(i, val);
2501 	switch(i->depth){
2502 	case 24:	/* 24-bit images suck */
2503 		for(y=i->r.min.y; y<i->r.max.y; y++)
2504 			memset24(byteaddr(i, Pt(i->r.min.x, y)), bits, Dx(i->r));
2505 		break;
2506 	default:	/* 1, 2, 4, 8, 16, 32 */
2507 		for(d=i->depth; d<32; d*=2)
2508 			bits = (bits << d) | bits;
2509 		memsetl(wordaddr(i, i->r.min), bits, i->width*Dy(i->r));
2510 		break;
2511 	}
2512 }
2513 
2514