xref: /plan9/sys/src/libmemdraw/draw.c (revision 2c1878b464655de2e19be44392cdaf1bb089d01d)
1 #include <u.h>
2 #include <libc.h>
3 #include <draw.h>
4 #include <memdraw.h>
5 #include <pool.h>
6 
7 extern Pool* imagmem;
8 int drawdebug;
9 static int	tablesbuilt;
10 
11 /* perfect approximation to NTSC = .299r+.587g+.114b when 0 ≤ r,g,b < 256 */
12 #define RGB2K(r,g,b)	((156763*(r)+307758*(g)+59769*(b))>>19)
13 
14 /*
15  * for 0 ≤ x ≤ 255*255, (x*0x0101+0x100)>>16 is a perfect approximation.
16  * for 0 ≤ x < (1<<16), x/255 = ((x+1)*0x0101)>>16 is a perfect approximation.
17  * the last one is perfect for all up to 1<<16, avoids a multiply, but requires a rathole.
18  */
19 /* #define DIV255(x) (((x)*257+256)>>16)  */
20 #define DIV255(x) ((((x)+1)*257)>>16)
21 /* #define DIV255(x) (tmp=(x)+1, (tmp+(tmp>>8))>>8) */
22 
23 #define MUL(x, y, t)	(t = (x)*(y)+128, (t+(t>>8))>>8)
24 #define MASK13	0xFF00FF00
25 #define MASK02	0x00FF00FF
26 #define MUL13(a, x, t)		(t = (a)*(((x)&MASK13)>>8)+128, ((t+((t>>8)&MASK02))>>8)&MASK02)
27 #define MUL02(a, x, t)		(t = (a)*(((x)&MASK02)>>0)+128, ((t+((t>>8)&MASK02))>>8)&MASK02)
28 #define MUL0123(a, x, s, t)	((MUL13(a, x, s)<<8)|MUL02(a, x, t))
29 
30 #define MUL2(u, v, x, y)	(t = (u)*(v)+(x)*(y)+256, (t+(t>>8))>>8)
31 
32 static void mktables(void);
33 typedef int Subdraw(Memdrawparam*);
34 static Subdraw chardraw, alphadraw, memoptdraw;
35 
36 static Memimage*	memones;
37 static Memimage*	memzeros;
38 Memimage *memwhite;
39 Memimage *memblack;
40 Memimage *memtransparent;
41 Memimage *memopaque;
42 
43 int	_ifmt(Fmt*);
44 
45 void
memimageinit(void)46 memimageinit(void)
47 {
48 	static int didinit = 0;
49 
50 	if(didinit)
51 		return;
52 
53 	didinit = 1;
54 
55 	if(strcmp(imagmem->name, "Image") == 0 || strcmp(imagmem->name, "image") == 0)
56 		imagmem->move = memimagemove;
57 
58 	mktables();
59 	_memmkcmap();
60 
61 	fmtinstall('R', Rfmt);
62 	fmtinstall('P', Pfmt);
63 	fmtinstall('b', _ifmt);
64 
65 	memones = allocmemimage(Rect(0,0,1,1), GREY1);
66 	memones->flags |= Frepl;
67 	memones->clipr = Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF);
68 	*byteaddr(memones, ZP) = ~0;
69 
70 	memzeros = allocmemimage(Rect(0,0,1,1), GREY1);
71 	memzeros->flags |= Frepl;
72 	memzeros->clipr = Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF);
73 	*byteaddr(memzeros, ZP) = 0;
74 
75 	if(memones == nil || memzeros == nil)
76 		assert(0 /*cannot initialize memimage library */);	/* RSC BUG */
77 
78 	memwhite = memones;
79 	memblack = memzeros;
80 	memopaque = memones;
81 	memtransparent = memzeros;
82 }
83 
84 static ulong imgtorgba(Memimage*, ulong);
85 static ulong rgbatoimg(Memimage*, ulong);
86 static ulong pixelbits(Memimage*, Point);
87 
88 #define DBG if(0)
89 void
memimagedraw(Memimage * dst,Rectangle r,Memimage * src,Point p0,Memimage * mask,Point p1,int op)90 memimagedraw(Memimage *dst, Rectangle r, Memimage *src, Point p0, Memimage *mask, Point p1, int op)
91 {
92 	static int n = 0;
93 	Memdrawparam par;
94 
95 	if(mask == nil)
96 		mask = memopaque;
97 
98 DBG	print("memimagedraw %p/%luX %R @ %p %p/%luX %P %p/%luX %P... ", dst, dst->chan, r, dst->data->bdata, src, src->chan, p0, mask, mask->chan, p1);
99 
100 	if(drawclip(dst, &r, src, &p0, mask, &p1, &par.sr, &par.mr) == 0){
101 //		if(drawdebug)
102 //			iprint("empty clipped rectangle\n");
103 		return;
104 	}
105 
106 	if(op < Clear || op > SoverD){
107 //		if(drawdebug)
108 //			iprint("op out of range: %d\n", op);
109 		return;
110 	}
111 
112 	par.op = op;
113 	par.dst = dst;
114 	par.r = r;
115 	par.src = src;
116 	/* par.sr set by drawclip */
117 	par.mask = mask;
118 	/* par.mr set by drawclip */
119 
120 	par.state = 0;
121 	if(src->flags&Frepl){
122 		par.state |= Replsrc;
123 		if(Dx(src->r)==1 && Dy(src->r)==1){
124 			par.sval = pixelbits(src, src->r.min);
125 			par.state |= Simplesrc;
126 			par.srgba = imgtorgba(src, par.sval);
127 			par.sdval = rgbatoimg(dst, par.srgba);
128 			if((par.srgba&0xFF) == 0 && (op&DoutS)){
129 //				if (drawdebug) iprint("fill with transparent source\n");
130 				return;	/* no-op successfully handled */
131 			}
132 		}
133 	}
134 
135 	if(mask->flags & Frepl){
136 		par.state |= Replmask;
137 		if(Dx(mask->r)==1 && Dy(mask->r)==1){
138 			par.mval = pixelbits(mask, mask->r.min);
139 			if(par.mval == 0 && (op&DoutS)){
140 //				if(drawdebug) iprint("fill with zero mask\n");
141 				return;	/* no-op successfully handled */
142 			}
143 			par.state |= Simplemask;
144 			if(par.mval == ~0)
145 				par.state |= Fullmask;
146 			par.mrgba = imgtorgba(mask, par.mval);
147 		}
148 	}
149 
150 //	if(drawdebug)
151 //		iprint("dr %R sr %R mr %R...", r, par.sr, par.mr);
152 DBG print("draw dr %R sr %R mr %R %lux\n", r, par.sr, par.mr, par.state);
153 
154 	/*
155 	 * Now that we've clipped the parameters down to be consistent, we
156 	 * simply try sub-drawing routines in order until we find one that was able
157 	 * to handle us.  If the sub-drawing routine returns zero, it means it was
158 	 * unable to satisfy the request, so we do not return.
159 	 */
160 
161 	/*
162 	 * Hardware support.  Each video driver provides this function,
163 	 * which checks to see if there is anything it can help with.
164 	 * There could be an if around this checking to see if dst is in video memory.
165 	 */
166 DBG print("test hwdraw\n");
167 	if(hwdraw(&par)){
168 //if(drawdebug) iprint("hw handled\n");
169 DBG print("hwdraw handled\n");
170 		return;
171 	}
172 	/*
173 	 * Optimizations using memmove and memset.
174 	 */
175 DBG print("test memoptdraw\n");
176 	if(memoptdraw(&par)){
177 //if(drawdebug) iprint("memopt handled\n");
178 DBG print("memopt handled\n");
179 		return;
180 	}
181 
182 	/*
183 	 * Character drawing.
184 	 * Solid source color being painted through a boolean mask onto a high res image.
185 	 */
186 DBG print("test chardraw\n");
187 	if(chardraw(&par)){
188 //if(drawdebug) iprint("chardraw handled\n");
189 DBG print("chardraw handled\n");
190 		return;
191 	}
192 
193 	/*
194 	 * General calculation-laden case that does alpha for each pixel.
195 	 */
196 DBG print("do alphadraw\n");
197 	alphadraw(&par);
198 //if(drawdebug) iprint("alphadraw handled\n");
199 DBG print("alphadraw handled\n");
200 }
201 #undef DBG
202 
203 /*
204  * Clip the destination rectangle further based on the properties of the
205  * source and mask rectangles.  Once the destination rectangle is properly
206  * clipped, adjust the source and mask rectangles to be the same size.
207  * Then if source or mask is replicated, move its clipped rectangle
208  * so that its minimum point falls within the repl rectangle.
209  *
210  * Return zero if the final rectangle is null.
211  */
212 int
drawclip(Memimage * dst,Rectangle * r,Memimage * src,Point * p0,Memimage * mask,Point * p1,Rectangle * sr,Rectangle * mr)213 drawclip(Memimage *dst, Rectangle *r, Memimage *src, Point *p0, Memimage *mask, Point *p1, Rectangle *sr, Rectangle *mr)
214 {
215 	Point rmin, delta;
216 	int splitcoords;
217 	Rectangle omr;
218 
219 	if(r->min.x>=r->max.x || r->min.y>=r->max.y)
220 		return 0;
221 	splitcoords = (p0->x!=p1->x) || (p0->y!=p1->y);
222 	/* clip to destination */
223 	rmin = r->min;
224 	if(!rectclip(r, dst->r) || !rectclip(r, dst->clipr))
225 		return 0;
226 	/* move mask point */
227 	p1->x += r->min.x-rmin.x;
228 	p1->y += r->min.y-rmin.y;
229 	/* move source point */
230 	p0->x += r->min.x-rmin.x;
231 	p0->y += r->min.y-rmin.y;
232 	/* map destination rectangle into source */
233 	sr->min = *p0;
234 	sr->max.x = p0->x+Dx(*r);
235 	sr->max.y = p0->y+Dy(*r);
236 	/* sr is r in source coordinates; clip to source */
237 	if(!(src->flags&Frepl) && !rectclip(sr, src->r))
238 		return 0;
239 	if(!rectclip(sr, src->clipr))
240 		return 0;
241 	/* compute and clip rectangle in mask */
242 	if(splitcoords){
243 		/* move mask point with source */
244 		p1->x += sr->min.x-p0->x;
245 		p1->y += sr->min.y-p0->y;
246 		mr->min = *p1;
247 		mr->max.x = p1->x+Dx(*sr);
248 		mr->max.y = p1->y+Dy(*sr);
249 		omr = *mr;
250 		/* mr is now rectangle in mask; clip it */
251 		if(!(mask->flags&Frepl) && !rectclip(mr, mask->r))
252 			return 0;
253 		if(!rectclip(mr, mask->clipr))
254 			return 0;
255 		/* reflect any clips back to source */
256 		sr->min.x += mr->min.x-omr.min.x;
257 		sr->min.y += mr->min.y-omr.min.y;
258 		sr->max.x += mr->max.x-omr.max.x;
259 		sr->max.y += mr->max.y-omr.max.y;
260 		*p1 = mr->min;
261 	}else{
262 		if(!(mask->flags&Frepl) && !rectclip(sr, mask->r))
263 			return 0;
264 		if(!rectclip(sr, mask->clipr))
265 			return 0;
266 		*p1 = sr->min;
267 	}
268 
269 	/* move source clipping back to destination */
270 	delta.x = r->min.x - p0->x;
271 	delta.y = r->min.y - p0->y;
272 	r->min.x = sr->min.x + delta.x;
273 	r->min.y = sr->min.y + delta.y;
274 	r->max.x = sr->max.x + delta.x;
275 	r->max.y = sr->max.y + delta.y;
276 
277 	/* move source rectangle so sr->min is in src->r */
278 	if(src->flags&Frepl) {
279 		delta.x = drawreplxy(src->r.min.x, src->r.max.x, sr->min.x) - sr->min.x;
280 		delta.y = drawreplxy(src->r.min.y, src->r.max.y, sr->min.y) - sr->min.y;
281 		sr->min.x += delta.x;
282 		sr->min.y += delta.y;
283 		sr->max.x += delta.x;
284 		sr->max.y += delta.y;
285 	}
286 	*p0 = sr->min;
287 
288 	/* move mask point so it is in mask->r */
289 	*p1 = drawrepl(mask->r, *p1);
290 	mr->min = *p1;
291 	mr->max.x = p1->x+Dx(*sr);
292 	mr->max.y = p1->y+Dy(*sr);
293 
294 	assert(Dx(*sr) == Dx(*mr) && Dx(*mr) == Dx(*r));
295 	assert(Dy(*sr) == Dy(*mr) && Dy(*mr) == Dy(*r));
296 	assert(ptinrect(*p0, src->r));
297 	assert(ptinrect(*p1, mask->r));
298 	assert(ptinrect(r->min, dst->r));
299 
300 	return 1;
301 }
302 
303 /*
304  * Conversion tables.
305  */
306 static uchar replbit[1+8][256];		/* replbit[x][y] is the replication of the x-bit quantity y to 8-bit depth */
307 static uchar conv18[256][8];		/* conv18[x][y] is the yth pixel in the depth-1 pixel x */
308 static uchar conv28[256][4];		/* ... */
309 static uchar conv48[256][2];
310 
311 /*
312  * bitmap of how to replicate n bits to fill 8, for 1 ≤ n ≤ 8.
313  * the X's are where to put the bottom (ones) bit of the n-bit pattern.
314  * only the top 8 bits of the result are actually used.
315  * (the lower 8 bits are needed to get bits in the right place
316  * when n is not a divisor of 8.)
317  *
318  * Should check to see if its easier to just refer to replmul than
319  * use the precomputed values in replbit.  On PCs it may well
320  * be; on machines with slow multiply instructions it probably isn't.
321  */
322 #define a ((((((((((((((((0
323 #define X *2+1)
324 #define _ *2)
325 static int replmul[1+8] = {
326 	0,
327 	a X X X X X X X X X X X X X X X X,
328 	a _ X _ X _ X _ X _ X _ X _ X _ X,
329 	a _ _ X _ _ X _ _ X _ _ X _ _ X _,
330 	a _ _ _ X _ _ _ X _ _ _ X _ _ _ X,
331 	a _ _ _ _ X _ _ _ _ X _ _ _ _ X _,
332 	a _ _ _ _ _ X _ _ _ _ _ X _ _ _ _,
333 	a _ _ _ _ _ _ X _ _ _ _ _ _ X _ _,
334 	a _ _ _ _ _ _ _ X _ _ _ _ _ _ _ X,
335 };
336 #undef a
337 #undef X
338 #undef _
339 
340 static void
mktables(void)341 mktables(void)
342 {
343 	int i, j, mask, sh, small;
344 
345 	if(tablesbuilt)
346 		return;
347 
348 	fmtinstall('R', Rfmt);
349 	fmtinstall('P', Pfmt);
350 	tablesbuilt = 1;
351 
352 	/* bit replication up to 8 bits */
353 	for(i=0; i<256; i++){
354 		for(j=0; j<=8; j++){	/* j <= 8 [sic] */
355 			small = i & ((1<<j)-1);
356 			replbit[j][i] = (small*replmul[j])>>8;
357 		}
358 	}
359 
360 	/* bit unpacking up to 8 bits, only powers of 2 */
361 	for(i=0; i<256; i++){
362 		for(j=0, sh=7, mask=1; j<8; j++, sh--)
363 			conv18[i][j] = replbit[1][(i>>sh)&mask];
364 
365 		for(j=0, sh=6, mask=3; j<4; j++, sh-=2)
366 			conv28[i][j] = replbit[2][(i>>sh)&mask];
367 
368 		for(j=0, sh=4, mask=15; j<2; j++, sh-=4)
369 			conv48[i][j] = replbit[4][(i>>sh)&mask];
370 	}
371 }
372 
373 static uchar ones = 0xff;
374 
375 /*
376  * General alpha drawing case.  Can handle anything.
377  */
378 typedef struct	Buffer	Buffer;
379 struct Buffer {
380 	/* used by most routines */
381 	uchar	*red;
382 	uchar	*grn;
383 	uchar	*blu;
384 	uchar	*alpha;
385 	uchar	*grey;
386 	ulong	*rgba;
387 	int	delta;	/* number of bytes to add to pointer to get next pixel to the right */
388 
389 	/* used by boolcalc* for mask data */
390 	uchar	*m;		/* ptr to mask data r.min byte; like p->bytermin */
391 	int		mskip;	/* no. of left bits to skip in *m */
392 	uchar	*bm;		/* ptr to mask data img->r.min byte; like p->bytey0s */
393 	int		bmskip;	/* no. of left bits to skip in *bm */
394 	uchar	*em;		/* ptr to mask data img->r.max.x byte; like p->bytey0e */
395 	int		emskip;	/* no. of right bits to skip in *em */
396 };
397 
398 typedef struct	Param	Param;
399 typedef Buffer	Readfn(Param*, uchar*, int);
400 typedef void	Writefn(Param*, uchar*, Buffer);
401 typedef Buffer	Calcfn(Buffer, Buffer, Buffer, int, int, int);
402 
403 enum {
404 	MAXBCACHE = 16
405 };
406 
407 /* giant rathole to customize functions with */
408 struct Param {
409 	Readfn	*replcall;
410 	Readfn	*greymaskcall;
411 	Readfn	*convreadcall;
412 	Writefn	*convwritecall;
413 
414 	Memimage *img;
415 	Rectangle	r;
416 	int	dx;	/* of r */
417 	int	needbuf;
418 	int	convgrey;
419 	int	alphaonly;
420 
421 	uchar	*bytey0s;		/* byteaddr(Pt(img->r.min.x, img->r.min.y)) */
422 	uchar	*bytermin;	/* byteaddr(Pt(r.min.x, img->r.min.y)) */
423 	uchar	*bytey0e;		/* byteaddr(Pt(img->r.max.x, img->r.min.y)) */
424 	int		bwidth;
425 
426 	int	replcache;	/* if set, cache buffers */
427 	Buffer	bcache[MAXBCACHE];
428 	ulong	bfilled;
429 	uchar	*bufbase;
430 	int	bufoff;
431 	int	bufdelta;
432 
433 	int	dir;
434 
435 	int	convbufoff;
436 	uchar	*convbuf;
437 	Param	*convdpar;
438 	int	convdx;
439 };
440 
441 static uchar *drawbuf;
442 static int	ndrawbuf;
443 static int	mdrawbuf;
444 static Readfn	greymaskread, replread, readptr;
445 static Writefn	nullwrite;
446 static Calcfn	alphacalc0, alphacalc14, alphacalc2810, alphacalc3679, alphacalc5, alphacalc11, alphacalcS;
447 static Calcfn	boolcalc14, boolcalc236789, boolcalc1011;
448 
449 static Readfn*	readfn(Memimage*);
450 static Readfn*	readalphafn(Memimage*);
451 static Writefn*	writefn(Memimage*);
452 
453 static Calcfn*	boolcopyfn(Memimage*, Memimage*);
454 static Readfn*	convfn(Memimage*, Param*, Memimage*, Param*, int*);
455 static Readfn*	ptrfn(Memimage*);
456 
457 static Calcfn *alphacalc[Ncomp] =
458 {
459 	alphacalc0,		/* Clear */
460 	alphacalc14,		/* DoutS */
461 	alphacalc2810,		/* SoutD */
462 	alphacalc3679,		/* DxorS */
463 	alphacalc14,		/* DinS */
464 	alphacalc5,		/* D */
465 	alphacalc3679,		/* DatopS */
466 	alphacalc3679,		/* DoverS */
467 	alphacalc2810,		/* SinD */
468 	alphacalc3679,		/* SatopD */
469 	alphacalc2810,		/* S */
470 	alphacalc11,		/* SoverD */
471 };
472 
473 static Calcfn *boolcalc[Ncomp] =
474 {
475 	alphacalc0,		/* Clear */
476 	boolcalc14,		/* DoutS */
477 	boolcalc236789,		/* SoutD */
478 	boolcalc236789,		/* DxorS */
479 	boolcalc14,		/* DinS */
480 	alphacalc5,		/* D */
481 	boolcalc236789,		/* DatopS */
482 	boolcalc236789,		/* DoverS */
483 	boolcalc236789,		/* SinD */
484 	boolcalc236789,		/* SatopD */
485 	boolcalc1011,		/* S */
486 	boolcalc1011,		/* SoverD */
487 };
488 
489 /*
490  * Avoid standard Lock, QLock so that can be used in kernel.
491  */
492 typedef struct Dbuf Dbuf;
493 struct Dbuf
494 {
495 	uchar *p;
496 	int n;
497 	Param spar, mpar, dpar;
498 	int inuse;
499 };
500 static Dbuf dbuf[10];
501 
502 static Dbuf*
allocdbuf(void)503 allocdbuf(void)
504 {
505 	int i;
506 
507 	for(i=0; i<nelem(dbuf); i++){
508 		if(dbuf[i].inuse)
509 			continue;
510 		if(!_tas(&dbuf[i].inuse))
511 			return &dbuf[i];
512 	}
513 	return nil;
514 }
515 
516 static void
getparam(Param * p,Memimage * img,Rectangle r,int convgrey,int needbuf,int * ndrawbuf)517 getparam(Param *p, Memimage *img, Rectangle r, int convgrey, int needbuf, int *ndrawbuf)
518 {
519 	int nbuf;
520 
521 	memset(p, 0, sizeof *p);
522 
523 	p->img = img;
524 	p->r = r;
525 	p->dx = Dx(r);
526 	p->needbuf = needbuf;
527 	p->convgrey = convgrey;
528 
529 	assert(img->r.min.x <= r.min.x && r.min.x < img->r.max.x);
530 
531 	p->bytey0s = byteaddr(img, Pt(img->r.min.x, img->r.min.y));
532 	p->bytermin = byteaddr(img, Pt(r.min.x, img->r.min.y));
533 	p->bytey0e = byteaddr(img, Pt(img->r.max.x, img->r.min.y));
534 	p->bwidth = sizeof(ulong)*img->width;
535 
536 	assert(p->bytey0s <= p->bytermin && p->bytermin <= p->bytey0e);
537 
538 	if(p->r.min.x == p->img->r.min.x)
539 		assert(p->bytermin == p->bytey0s);
540 
541 	nbuf = 1;
542 	if((img->flags&Frepl) && Dy(img->r) <= MAXBCACHE && Dy(img->r) < Dy(r)){
543 		p->replcache = 1;
544 		nbuf = Dy(img->r);
545 	}
546 	p->bufdelta = 4*p->dx;
547 	p->bufoff = *ndrawbuf;
548 	*ndrawbuf += p->bufdelta*nbuf;
549 }
550 
551 static void
clipy(Memimage * img,int * y)552 clipy(Memimage *img, int *y)
553 {
554 	int dy;
555 
556 	dy = Dy(img->r);
557 	if(*y == dy)
558 		*y = 0;
559 	else if(*y == -1)
560 		*y = dy-1;
561 	assert(0 <= *y && *y < dy);
562 }
563 
564 static void
dumpbuf(char * s,Buffer b,int n)565 dumpbuf(char *s, Buffer b, int n)
566 {
567 	int i;
568 	uchar *p;
569 
570 	print("%s", s);
571 	for(i=0; i<n; i++){
572 		print(" ");
573 		if(p=b.grey){
574 			print(" k%.2uX", *p);
575 			b.grey += b.delta;
576 		}else{
577 			if(p=b.red){
578 				print(" r%.2uX", *p);
579 				b.red += b.delta;
580 			}
581 			if(p=b.grn){
582 				print(" g%.2uX", *p);
583 				b.grn += b.delta;
584 			}
585 			if(p=b.blu){
586 				print(" b%.2uX", *p);
587 				b.blu += b.delta;
588 			}
589 		}
590 		if((p=b.alpha) != &ones){
591 			print(" α%.2uX", *p);
592 			b.alpha += b.delta;
593 		}
594 	}
595 	print("\n");
596 }
597 
598 /*
599  * For each scan line, we expand the pixels from source, mask, and destination
600  * into byte-aligned red, green, blue, alpha, and grey channels.  If buffering is not
601  * needed and the channels were already byte-aligned (grey8, rgb24, rgba32, rgb32),
602  * the readers need not copy the data: they can simply return pointers to the data.
603  * If the destination image is grey and the source is not, it is converted using the NTSC
604  * formula.
605  *
606  * Once we have all the channels, we call either rgbcalc or greycalc, depending on
607  * whether the destination image is color.  This is allowed to overwrite the dst buffer (perhaps
608  * the actual data, perhaps a copy) with its result.  It should only overwrite the dst buffer
609  * with the same format (i.e. red bytes with red bytes, etc.)  A new buffer is returned from
610  * the calculator, and that buffer is passed to a function to write it to the destination.
611  * If the buffer is already pointing at the destination, the writing function is a no-op.
612  */
613 #define DBG if(0)
614 static int
alphadraw(Memdrawparam * par)615 alphadraw(Memdrawparam *par)
616 {
617 	int isgrey, starty, endy, op;
618 	int needbuf, dsty, srcy, masky;
619 	int y, dir, dx, dy, ndrawbuf;
620 	uchar *drawbuf;
621 	Buffer bsrc, bdst, bmask;
622 	Readfn *rdsrc, *rdmask, *rddst;
623 	Calcfn *calc;
624 	Writefn *wrdst;
625 	Memimage *src, *mask, *dst;
626 	Rectangle r, sr, mr;
627 	Dbuf *z;
628 
629 	r = par->r;
630 	dx = Dx(r);
631 	dy = Dy(r);
632 
633 	z = allocdbuf();
634 	if(z == nil)
635 		return 0;
636 
637 	src = par->src;
638 	mask = par->mask;
639 	dst = par->dst;
640 	sr = par->sr;
641 	mr = par->mr;
642 	op = par->op;
643 
644 	isgrey = dst->flags&Fgrey;
645 
646 	/*
647 	 * Buffering when src and dst are the same bitmap is sufficient but not
648 	 * necessary.  There are stronger conditions we could use.  We could
649 	 * check to see if the rectangles intersect, and if simply moving in the
650 	 * correct y direction can avoid the need to buffer.
651 	 */
652 	needbuf = (src->data == dst->data);
653 
654 	ndrawbuf = 0;
655 	getparam(&z->spar, src, sr, isgrey, needbuf, &ndrawbuf);
656 	getparam(&z->dpar, dst, r, isgrey, needbuf, &ndrawbuf);
657 	getparam(&z->mpar, mask, mr, 0, needbuf, &ndrawbuf);
658 
659 	dir = (needbuf && byteaddr(dst, r.min) > byteaddr(src, sr.min)) ? -1 : 1;
660 	z->spar.dir = z->mpar.dir = z->dpar.dir = dir;
661 
662 	/*
663 	 * If the mask is purely boolean, we can convert from src to dst format
664 	 * when we read src, and then just copy it to dst where the mask tells us to.
665 	 * This requires a boolean (1-bit grey) mask and lack of a source alpha channel.
666 	 *
667 	 * The computation is accomplished by assigning the function pointers as follows:
668 	 *	rdsrc - read and convert source into dst format in a buffer
669 	 * 	rdmask - convert mask to bytes, set pointer to it
670 	 * 	rddst - fill with pointer to real dst data, but do no reads
671 	 *	calc - copy src onto dst when mask says to.
672 	 *	wrdst - do nothing
673 	 * This is slightly sleazy, since things aren't doing exactly what their names say,
674 	 * but it avoids a fair amount of code duplication to make this a case here
675 	 * rather than have a separate booldraw.
676 	 */
677 //if(drawdebug) iprint("flag %lud mchan %lux=?%x dd %d\n", src->flags&Falpha, mask->chan, GREY1, dst->depth);
678 	if(!(src->flags&Falpha) && mask->chan == GREY1 && dst->depth >= 8 && op == SoverD){
679 //if(drawdebug) iprint("boolcopy...");
680 		rdsrc = convfn(dst, &z->dpar, src, &z->spar, &ndrawbuf);
681 		rddst = readptr;
682 		rdmask = readfn(mask);
683 		calc = boolcopyfn(dst, mask);
684 		wrdst = nullwrite;
685 	}else{
686 		/* usual alphadraw parameter fetching */
687 		rdsrc = readfn(src);
688 		rddst = readfn(dst);
689 		wrdst = writefn(dst);
690 		calc = alphacalc[op];
691 
692 		/*
693 		 * If there is no alpha channel, we'll ask for a grey channel
694 		 * and pretend it is the alpha.
695 		 */
696 		if(mask->flags&Falpha){
697 			rdmask = readalphafn(mask);
698 			z->mpar.alphaonly = 1;
699 		}else{
700 			z->mpar.greymaskcall = readfn(mask);
701 			z->mpar.convgrey = 1;
702 			rdmask = greymaskread;
703 
704 			/*
705 			 * Should really be above, but then boolcopyfns would have
706 			 * to deal with bit alignment, and I haven't written that.
707 			 *
708 			 * This is a common case for things like ellipse drawing.
709 			 * When there's no alpha involved and the mask is boolean,
710 			 * we can avoid all the division and multiplication.
711 			 */
712 			if(mask->chan == GREY1 && !(src->flags&Falpha))
713 				calc = boolcalc[op];
714 			else if(op == SoverD && !(src->flags&Falpha))
715 				calc = alphacalcS;
716 		}
717 	}
718 
719 	/*
720 	 * If the image has a small enough repl rectangle,
721 	 * we can just read each line once and cache them.
722 	 */
723 	if(z->spar.replcache){
724 		z->spar.replcall = rdsrc;
725 		rdsrc = replread;
726 	}
727 	if(z->mpar.replcache){
728 		z->mpar.replcall = rdmask;
729 		rdmask = replread;
730 	}
731 
732 	if(z->n < ndrawbuf){
733 		free(z->p);
734 		if((z->p = mallocz(ndrawbuf, 0)) == nil){
735 			z->inuse = 0;
736 			return 0;
737 		}
738 		z->n = ndrawbuf;
739 	}
740 	drawbuf = z->p;
741 
742 	/*
743 	 * Before we were saving only offsets from drawbuf in the parameter
744 	 * structures; now that drawbuf has been grown to accomodate us,
745 	 * we can fill in the pointers.
746 	 */
747 	z->spar.bufbase = drawbuf+z->spar.bufoff;
748 	z->mpar.bufbase = drawbuf+z->mpar.bufoff;
749 	z->dpar.bufbase = drawbuf+z->dpar.bufoff;
750 	z->spar.convbuf = drawbuf+z->spar.convbufoff;
751 
752 	if(dir == 1){
753 		starty = 0;
754 		endy = dy;
755 	}else{
756 		starty = dy-1;
757 		endy = -1;
758 	}
759 
760 	/*
761 	 * srcy, masky, and dsty are offsets from the top of their
762 	 * respective Rectangles.  they need to be contained within
763 	 * the rectangles, so clipy can keep them there without division.
764  	 */
765 	srcy = (starty + sr.min.y - src->r.min.y)%Dy(src->r);
766 	masky = (starty + mr.min.y - mask->r.min.y)%Dy(mask->r);
767 	dsty = starty + r.min.y - dst->r.min.y;
768 
769 	assert(0 <= srcy && srcy < Dy(src->r));
770 	assert(0 <= masky && masky < Dy(mask->r));
771 	assert(0 <= dsty && dsty < Dy(dst->r));
772 
773 	for(y=starty; y!=endy; y+=dir, srcy+=dir, masky+=dir, dsty+=dir){
774 		clipy(src, &srcy);
775 		clipy(dst, &dsty);
776 		clipy(mask, &masky);
777 
778 		bsrc = rdsrc(&z->spar, z->spar.bufbase, srcy);
779 DBG print("[");
780 		bmask = rdmask(&z->mpar, z->mpar.bufbase, masky);
781 DBG print("]\n");
782 		bdst = rddst(&z->dpar, z->dpar.bufbase, dsty);
783 DBG		dumpbuf("src", bsrc, dx);
784 DBG		dumpbuf("mask", bmask, dx);
785 DBG		dumpbuf("dst", bdst, dx);
786 		bdst = calc(bdst, bsrc, bmask, dx, isgrey, op);
787 		wrdst(&z->dpar, z->dpar.bytermin+dsty*z->dpar.bwidth, bdst);
788 	}
789 
790 	z->inuse = 0;
791 	return 1;
792 }
793 #undef DBG
794 
795 static Buffer
alphacalc0(Buffer bdst,Buffer b1,Buffer b2,int dx,int grey,int op)796 alphacalc0(Buffer bdst, Buffer b1, Buffer b2, int dx, int grey, int op)
797 {
798 	USED(grey);
799 	USED(op);
800 	USED(b1);
801 	USED(b2);
802 	memset(bdst.rgba, 0, dx*bdst.delta);
803 	return bdst;
804 }
805 
806 static Buffer
alphacalc14(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int grey,int op)807 alphacalc14(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
808 {
809 	Buffer obdst;
810 	int fd, sadelta;
811 	int i, sa, ma, q;
812 	ulong s, t;
813 
814 	obdst = bdst;
815 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
816 	q = bsrc.delta == 4 && bdst.delta == 4;
817 
818 	for(i=0; i<dx; i++){
819 		sa = *bsrc.alpha;
820 		ma = *bmask.alpha;
821 		fd = MUL(sa, ma, t);
822 		if(op == DoutS)
823 			fd = 255-fd;
824 
825 		if(grey){
826 			*bdst.grey = MUL(fd, *bdst.grey, t);
827 			bsrc.grey += bsrc.delta;
828 			bdst.grey += bdst.delta;
829 		}else{
830 			if(q){
831 				*bdst.rgba = MUL0123(fd, *bdst.rgba, s, t);
832 				bsrc.rgba++;
833 				bdst.rgba++;
834 				bsrc.alpha += sadelta;
835 				bmask.alpha += bmask.delta;
836 				continue;
837 			}
838 			*bdst.red = MUL(fd, *bdst.red, t);
839 			*bdst.grn = MUL(fd, *bdst.grn, t);
840 			*bdst.blu = MUL(fd, *bdst.blu, t);
841 			bsrc.red += bsrc.delta;
842 			bsrc.blu += bsrc.delta;
843 			bsrc.grn += bsrc.delta;
844 			bdst.red += bdst.delta;
845 			bdst.blu += bdst.delta;
846 			bdst.grn += bdst.delta;
847 		}
848 		if(bdst.alpha != &ones){
849 			*bdst.alpha = MUL(fd, *bdst.alpha, t);
850 			bdst.alpha += bdst.delta;
851 		}
852 		bmask.alpha += bmask.delta;
853 		bsrc.alpha += sadelta;
854 	}
855 	return obdst;
856 }
857 
858 static Buffer
alphacalc2810(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int grey,int op)859 alphacalc2810(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
860 {
861 	Buffer obdst;
862 	int fs, sadelta;
863 	int i, ma, da, q;
864 	ulong s, t;
865 
866 	obdst = bdst;
867 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
868 	q = bsrc.delta == 4 && bdst.delta == 4;
869 
870 	for(i=0; i<dx; i++){
871 		ma = *bmask.alpha;
872 		da = *bdst.alpha;
873 		if(op == SoutD)
874 			da = 255-da;
875 		fs = ma;
876 		if(op != S)
877 			fs = MUL(fs, da, t);
878 
879 		if(grey){
880 			*bdst.grey = MUL(fs, *bsrc.grey, t);
881 			bsrc.grey += bsrc.delta;
882 			bdst.grey += bdst.delta;
883 		}else{
884 			if(q){
885 				*bdst.rgba = MUL0123(fs, *bsrc.rgba, s, t);
886 				bsrc.rgba++;
887 				bdst.rgba++;
888 				bmask.alpha += bmask.delta;
889 				bdst.alpha += bdst.delta;
890 				continue;
891 			}
892 			*bdst.red = MUL(fs, *bsrc.red, t);
893 			*bdst.grn = MUL(fs, *bsrc.grn, t);
894 			*bdst.blu = MUL(fs, *bsrc.blu, t);
895 			bsrc.red += bsrc.delta;
896 			bsrc.blu += bsrc.delta;
897 			bsrc.grn += bsrc.delta;
898 			bdst.red += bdst.delta;
899 			bdst.blu += bdst.delta;
900 			bdst.grn += bdst.delta;
901 		}
902 		if(bdst.alpha != &ones){
903 			*bdst.alpha = MUL(fs, *bsrc.alpha, t);
904 			bdst.alpha += bdst.delta;
905 		}
906 		bmask.alpha += bmask.delta;
907 		bsrc.alpha += sadelta;
908 	}
909 	return obdst;
910 }
911 
912 static Buffer
alphacalc3679(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int grey,int op)913 alphacalc3679(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
914 {
915 	Buffer obdst;
916 	int fs, fd, sadelta;
917 	int i, sa, ma, da, q;
918 	ulong s, t, u, v;
919 
920 	obdst = bdst;
921 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
922 	q = bsrc.delta == 4 && bdst.delta == 4;
923 
924 	for(i=0; i<dx; i++){
925 		sa = *bsrc.alpha;
926 		ma = *bmask.alpha;
927 		da = *bdst.alpha;
928 		if(op == SatopD)
929 			fs = MUL(ma, da, t);
930 		else
931 			fs = MUL(ma, 255-da, t);
932 		if(op == DoverS)
933 			fd = 255;
934 		else{
935 			fd = MUL(sa, ma, t);
936 			if(op != DatopS)
937 				fd = 255-fd;
938 		}
939 
940 		if(grey){
941 			*bdst.grey = MUL(fs, *bsrc.grey, s)+MUL(fd, *bdst.grey, t);
942 			bsrc.grey += bsrc.delta;
943 			bdst.grey += bdst.delta;
944 		}else{
945 			if(q){
946 				*bdst.rgba = MUL0123(fs, *bsrc.rgba, s, t)+MUL0123(fd, *bdst.rgba, u, v);
947 				bsrc.rgba++;
948 				bdst.rgba++;
949 				bsrc.alpha += sadelta;
950 				bmask.alpha += bmask.delta;
951 				bdst.alpha += bdst.delta;
952 				continue;
953 			}
954 			*bdst.red = MUL(fs, *bsrc.red, s)+MUL(fd, *bdst.red, t);
955 			*bdst.grn = MUL(fs, *bsrc.grn, s)+MUL(fd, *bdst.grn, t);
956 			*bdst.blu = MUL(fs, *bsrc.blu, s)+MUL(fd, *bdst.blu, t);
957 			bsrc.red += bsrc.delta;
958 			bsrc.blu += bsrc.delta;
959 			bsrc.grn += bsrc.delta;
960 			bdst.red += bdst.delta;
961 			bdst.blu += bdst.delta;
962 			bdst.grn += bdst.delta;
963 		}
964 		if(bdst.alpha != &ones){
965 			*bdst.alpha = MUL(fs, sa, s)+MUL(fd, da, t);
966 			bdst.alpha += bdst.delta;
967 		}
968 		bmask.alpha += bmask.delta;
969 		bsrc.alpha += sadelta;
970 	}
971 	return obdst;
972 }
973 
974 static Buffer
alphacalc5(Buffer bdst,Buffer b1,Buffer b2,int dx,int grey,int op)975 alphacalc5(Buffer bdst, Buffer b1, Buffer b2, int dx, int grey, int op)
976 {
977 	USED(dx);
978 	USED(grey);
979 	USED(op);
980 	USED(b1);
981 	USED(b2);
982 	return bdst;
983 }
984 
985 static Buffer
alphacalc11(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int grey,int op)986 alphacalc11(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
987 {
988 	Buffer obdst;
989 	int fd, sadelta;
990 	int i, sa, ma, q;
991 	ulong s, t, u, v;
992 
993 	USED(op);
994 	obdst = bdst;
995 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
996 	q = bsrc.delta == 4 && bdst.delta == 4;
997 
998 	for(i=0; i<dx; i++){
999 		sa = *bsrc.alpha;
1000 		ma = *bmask.alpha;
1001 		fd = 255-MUL(sa, ma, t);
1002 
1003 		if(grey){
1004 			*bdst.grey = MUL(ma, *bsrc.grey, s)+MUL(fd, *bdst.grey, t);
1005 			bsrc.grey += bsrc.delta;
1006 			bdst.grey += bdst.delta;
1007 		}else{
1008 			if(q){
1009 				*bdst.rgba = MUL0123(ma, *bsrc.rgba, s, t)+MUL0123(fd, *bdst.rgba, u, v);
1010 				bsrc.rgba++;
1011 				bdst.rgba++;
1012 				bsrc.alpha += sadelta;
1013 				bmask.alpha += bmask.delta;
1014 				continue;
1015 			}
1016 			*bdst.red = MUL(ma, *bsrc.red, s)+MUL(fd, *bdst.red, t);
1017 			*bdst.grn = MUL(ma, *bsrc.grn, s)+MUL(fd, *bdst.grn, t);
1018 			*bdst.blu = MUL(ma, *bsrc.blu, s)+MUL(fd, *bdst.blu, t);
1019 			bsrc.red += bsrc.delta;
1020 			bsrc.blu += bsrc.delta;
1021 			bsrc.grn += bsrc.delta;
1022 			bdst.red += bdst.delta;
1023 			bdst.blu += bdst.delta;
1024 			bdst.grn += bdst.delta;
1025 		}
1026 		if(bdst.alpha != &ones){
1027 			*bdst.alpha = MUL(ma, sa, s)+MUL(fd, *bdst.alpha, t);
1028 			bdst.alpha += bdst.delta;
1029 		}
1030 		bmask.alpha += bmask.delta;
1031 		bsrc.alpha += sadelta;
1032 	}
1033 	return obdst;
1034 }
1035 
1036 /*
1037 not used yet
1038 source and mask alpha 1
1039 static Buffer
1040 alphacalcS0(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1041 {
1042 	Buffer obdst;
1043 	int i;
1044 
1045 	USED(op);
1046 	obdst = bdst;
1047 	if(bsrc.delta == bdst.delta){
1048 		memmove(bdst.rgba, bsrc.rgba, dx*bdst.delta);
1049 		return obdst;
1050 	}
1051 	for(i=0; i<dx; i++){
1052 		if(grey){
1053 			*bdst.grey = *bsrc.grey;
1054 			bsrc.grey += bsrc.delta;
1055 			bdst.grey += bdst.delta;
1056 		}else{
1057 			*bdst.red = *bsrc.red;
1058 			*bdst.grn = *bsrc.grn;
1059 			*bdst.blu = *bsrc.blu;
1060 			bsrc.red += bsrc.delta;
1061 			bsrc.blu += bsrc.delta;
1062 			bsrc.grn += bsrc.delta;
1063 			bdst.red += bdst.delta;
1064 			bdst.blu += bdst.delta;
1065 			bdst.grn += bdst.delta;
1066 		}
1067 		if(bdst.alpha != &ones){
1068 			*bdst.alpha = 255;
1069 			bdst.alpha += bdst.delta;
1070 		}
1071 	}
1072 	return obdst;
1073 }
1074 */
1075 
1076 /* source alpha 1 */
1077 static Buffer
alphacalcS(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int grey,int op)1078 alphacalcS(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1079 {
1080 	Buffer obdst;
1081 	int fd;
1082 	int i, ma;
1083 	ulong s, t;
1084 
1085 	USED(op);
1086 	obdst = bdst;
1087 
1088 	for(i=0; i<dx; i++){
1089 		ma = *bmask.alpha;
1090 		fd = 255-ma;
1091 
1092 		if(grey){
1093 			*bdst.grey = MUL(ma, *bsrc.grey, s)+MUL(fd, *bdst.grey, t);
1094 			bsrc.grey += bsrc.delta;
1095 			bdst.grey += bdst.delta;
1096 		}else{
1097 			*bdst.red = MUL(ma, *bsrc.red, s)+MUL(fd, *bdst.red, t);
1098 			*bdst.grn = MUL(ma, *bsrc.grn, s)+MUL(fd, *bdst.grn, t);
1099 			*bdst.blu = MUL(ma, *bsrc.blu, s)+MUL(fd, *bdst.blu, t);
1100 			bsrc.red += bsrc.delta;
1101 			bsrc.blu += bsrc.delta;
1102 			bsrc.grn += bsrc.delta;
1103 			bdst.red += bdst.delta;
1104 			bdst.blu += bdst.delta;
1105 			bdst.grn += bdst.delta;
1106 		}
1107 		if(bdst.alpha != &ones){
1108 			*bdst.alpha = ma+MUL(fd, *bdst.alpha, t);
1109 			bdst.alpha += bdst.delta;
1110 		}
1111 		bmask.alpha += bmask.delta;
1112 	}
1113 	return obdst;
1114 }
1115 
1116 static Buffer
boolcalc14(Buffer bdst,Buffer b1,Buffer bmask,int dx,int grey,int op)1117 boolcalc14(Buffer bdst, Buffer b1, Buffer bmask, int dx, int grey, int op)
1118 {
1119 	Buffer obdst;
1120 	int i, ma, zero;
1121 
1122 	USED(b1);
1123 
1124 	obdst = bdst;
1125 
1126 	for(i=0; i<dx; i++){
1127 		ma = *bmask.alpha;
1128 		zero = ma ? op == DoutS : op == DinS;
1129 
1130 		if(grey){
1131 			if(zero)
1132 				*bdst.grey = 0;
1133 			bdst.grey += bdst.delta;
1134 		}else{
1135 			if(zero)
1136 				*bdst.red = *bdst.grn = *bdst.blu = 0;
1137 			bdst.red += bdst.delta;
1138 			bdst.blu += bdst.delta;
1139 			bdst.grn += bdst.delta;
1140 		}
1141 		bmask.alpha += bmask.delta;
1142 		if(bdst.alpha != &ones){
1143 			if(zero)
1144 				*bdst.alpha = 0;
1145 			bdst.alpha += bdst.delta;
1146 		}
1147 	}
1148 	return obdst;
1149 }
1150 
1151 static Buffer
boolcalc236789(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int grey,int op)1152 boolcalc236789(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1153 {
1154 	Buffer obdst;
1155 	int fs, fd;
1156 	int i, ma, da, zero;
1157 	ulong s, t;
1158 
1159 	obdst = bdst;
1160 	zero = !(op&1);
1161 
1162 	for(i=0; i<dx; i++){
1163 		ma = *bmask.alpha;
1164 		da = *bdst.alpha;
1165 		fs = da;
1166 		if(op&2)
1167 			fs = 255-da;
1168 		fd = 0;
1169 		if(op&4)
1170 			fd = 255;
1171 
1172 		if(grey){
1173 			if(ma)
1174 				*bdst.grey = MUL(fs, *bsrc.grey, s)+MUL(fd, *bdst.grey, t);
1175 			else if(zero)
1176 				*bdst.grey = 0;
1177 			bsrc.grey += bsrc.delta;
1178 			bdst.grey += bdst.delta;
1179 		}else{
1180 			if(ma){
1181 				*bdst.red = MUL(fs, *bsrc.red, s)+MUL(fd, *bdst.red, t);
1182 				*bdst.grn = MUL(fs, *bsrc.grn, s)+MUL(fd, *bdst.grn, t);
1183 				*bdst.blu = MUL(fs, *bsrc.blu, s)+MUL(fd, *bdst.blu, t);
1184 			}
1185 			else if(zero)
1186 				*bdst.red = *bdst.grn = *bdst.blu = 0;
1187 			bsrc.red += bsrc.delta;
1188 			bsrc.blu += bsrc.delta;
1189 			bsrc.grn += bsrc.delta;
1190 			bdst.red += bdst.delta;
1191 			bdst.blu += bdst.delta;
1192 			bdst.grn += bdst.delta;
1193 		}
1194 		bmask.alpha += bmask.delta;
1195 		if(bdst.alpha != &ones){
1196 			if(ma)
1197 				*bdst.alpha = fs+MUL(fd, da, t);
1198 			else if(zero)
1199 				*bdst.alpha = 0;
1200 			bdst.alpha += bdst.delta;
1201 		}
1202 	}
1203 	return obdst;
1204 }
1205 
1206 static Buffer
boolcalc1011(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int grey,int op)1207 boolcalc1011(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1208 {
1209 	Buffer obdst;
1210 	int i, ma, zero;
1211 
1212 	obdst = bdst;
1213 	zero = !(op&1);
1214 
1215 	for(i=0; i<dx; i++){
1216 		ma = *bmask.alpha;
1217 
1218 		if(grey){
1219 			if(ma)
1220 				*bdst.grey = *bsrc.grey;
1221 			else if(zero)
1222 				*bdst.grey = 0;
1223 			bsrc.grey += bsrc.delta;
1224 			bdst.grey += bdst.delta;
1225 		}else{
1226 			if(ma){
1227 				*bdst.red = *bsrc.red;
1228 				*bdst.grn = *bsrc.grn;
1229 				*bdst.blu = *bsrc.blu;
1230 			}
1231 			else if(zero)
1232 				*bdst.red = *bdst.grn = *bdst.blu = 0;
1233 			bsrc.red += bsrc.delta;
1234 			bsrc.blu += bsrc.delta;
1235 			bsrc.grn += bsrc.delta;
1236 			bdst.red += bdst.delta;
1237 			bdst.blu += bdst.delta;
1238 			bdst.grn += bdst.delta;
1239 		}
1240 		bmask.alpha += bmask.delta;
1241 		if(bdst.alpha != &ones){
1242 			if(ma)
1243 				*bdst.alpha = 255;
1244 			else if(zero)
1245 				*bdst.alpha = 0;
1246 			bdst.alpha += bdst.delta;
1247 		}
1248 	}
1249 	return obdst;
1250 }
1251 /*
1252  * Replicated cached scan line read.  Call the function listed in the Param,
1253  * but cache the result so that for replicated images we only do the work once.
1254  */
1255 static Buffer
replread(Param * p,uchar * s,int y)1256 replread(Param *p, uchar *s, int y)
1257 {
1258 	Buffer *b;
1259 
1260 	USED(s);
1261 	b = &p->bcache[y];
1262 	if((p->bfilled & (1<<y)) == 0){
1263 		p->bfilled |= 1<<y;
1264 		*b = p->replcall(p, p->bufbase+y*p->bufdelta, y);
1265 	}
1266 	return *b;
1267 }
1268 
1269 /*
1270  * Alpha reading function that simply relabels the grey pointer.
1271  */
1272 static Buffer
greymaskread(Param * p,uchar * buf,int y)1273 greymaskread(Param *p, uchar *buf, int y)
1274 {
1275 	Buffer b;
1276 
1277 	b = p->greymaskcall(p, buf, y);
1278 	b.alpha = b.grey;
1279 	return b;
1280 }
1281 
1282 #define DBG if(0)
1283 static Buffer
readnbit(Param * p,uchar * buf,int y)1284 readnbit(Param *p, uchar *buf, int y)
1285 {
1286 	Buffer b;
1287 	Memimage *img;
1288 	uchar *repl, *r, *w, *ow, bits;
1289 	int i, n, sh, depth, x, dx, npack, nbits;
1290 
1291 	b.rgba = (ulong*)buf;
1292 	b.grey = w = buf;
1293 	b.red = b.blu = b.grn = w;
1294 	b.alpha = &ones;
1295 	b.delta = 1;
1296 
1297 	dx = p->dx;
1298 	img = p->img;
1299 	depth = img->depth;
1300 	repl = &replbit[depth][0];
1301 	npack = 8/depth;
1302 	sh = 8-depth;
1303 
1304 	/* copy from p->r.min.x until end of repl rectangle */
1305 	x = p->r.min.x;
1306 	n = dx;
1307 	if(n > p->img->r.max.x - x)
1308 		n = p->img->r.max.x - x;
1309 
1310 	r = p->bytermin + y*p->bwidth;
1311 DBG print("readnbit dx %d %p=%p+%d*%d, *r=%d fetch %d ", dx, r, p->bytermin, y, p->bwidth, *r, n);
1312 	bits = *r++;
1313 	nbits = 8;
1314 	if(i=x&(npack-1)){
1315 DBG print("throwaway %d...", i);
1316 		bits <<= depth*i;
1317 		nbits -= depth*i;
1318 	}
1319 	for(i=0; i<n; i++){
1320 		if(nbits == 0){
1321 DBG print("(%.2ux)...", *r);
1322 			bits = *r++;
1323 			nbits = 8;
1324 		}
1325 		*w++ = repl[bits>>sh];
1326 DBG print("bit %x...", repl[bits>>sh]);
1327 		bits <<= depth;
1328 		nbits -= depth;
1329 	}
1330 	dx -= n;
1331 	if(dx == 0)
1332 		return b;
1333 
1334 	assert(x+i == p->img->r.max.x);
1335 
1336 	/* copy from beginning of repl rectangle until where we were before. */
1337 	x = p->img->r.min.x;
1338 	n = dx;
1339 	if(n > p->r.min.x - x)
1340 		n = p->r.min.x - x;
1341 
1342 	r = p->bytey0s + y*p->bwidth;
1343 DBG print("x=%d r=%p...", x, r);
1344 	bits = *r++;
1345 	nbits = 8;
1346 	if(i=x&(npack-1)){
1347 		bits <<= depth*i;
1348 		nbits -= depth*i;
1349 	}
1350 DBG print("nbits=%d...", nbits);
1351 	for(i=0; i<n; i++){
1352 		if(nbits == 0){
1353 			bits = *r++;
1354 			nbits = 8;
1355 		}
1356 		*w++ = repl[bits>>sh];
1357 DBG print("bit %x...", repl[bits>>sh]);
1358 		bits <<= depth;
1359 		nbits -= depth;
1360 DBG print("bits %x nbits %d...", bits, nbits);
1361 	}
1362 	dx -= n;
1363 	if(dx == 0)
1364 		return b;
1365 
1366 	assert(dx > 0);
1367 	/* now we have exactly one full scan line: just replicate the buffer itself until we are done */
1368 	ow = buf;
1369 	while(dx--)
1370 		*w++ = *ow++;
1371 
1372 	return b;
1373 }
1374 #undef DBG
1375 
1376 #define DBG if(0)
1377 static void
writenbit(Param * p,uchar * w,Buffer src)1378 writenbit(Param *p, uchar *w, Buffer src)
1379 {
1380 	uchar *r;
1381 	ulong bits;
1382 	int i, sh, depth, npack, nbits, x, ex;
1383 
1384 	assert(src.grey != nil && src.delta == 1);
1385 
1386 	x = p->r.min.x;
1387 	ex = x+p->dx;
1388 	depth = p->img->depth;
1389 	npack = 8/depth;
1390 
1391 	i=x&(npack-1);
1392 	bits = i ? (*w >> (8-depth*i)) : 0;
1393 	nbits = depth*i;
1394 	sh = 8-depth;
1395 	r = src.grey;
1396 
1397 	for(; x<ex; x++){
1398 		bits <<= depth;
1399 DBG print(" %x", *r);
1400 		bits |= (*r++ >> sh);
1401 		nbits += depth;
1402 		if(nbits == 8){
1403 			*w++ = bits;
1404 			nbits = 0;
1405 		}
1406 	}
1407 
1408 	if(nbits){
1409 		sh = 8-nbits;
1410 		bits <<= sh;
1411 		bits |= *w & ((1<<sh)-1);
1412 		*w = bits;
1413 	}
1414 DBG print("\n");
1415 	return;
1416 }
1417 #undef DBG
1418 
1419 static Buffer
readcmap(Param * p,uchar * buf,int y)1420 readcmap(Param *p, uchar *buf, int y)
1421 {
1422 	Buffer b;
1423 	int a, convgrey, copyalpha, dx, i, m;
1424 	uchar *q, *cmap, *begin, *end, *r, *w;
1425 
1426 	begin = p->bytey0s + y*p->bwidth;
1427 	r = p->bytermin + y*p->bwidth;
1428 	end = p->bytey0e + y*p->bwidth;
1429 	cmap = p->img->cmap->cmap2rgb;
1430 	convgrey = p->convgrey;
1431 	copyalpha = (p->img->flags&Falpha) ? 1 : 0;
1432 
1433 	w = buf;
1434 	dx = p->dx;
1435 	if(copyalpha){
1436 		b.alpha = buf++;
1437 		a = p->img->shift[CAlpha]/8;
1438 		m = p->img->shift[CMap]/8;
1439 		for(i=0; i<dx; i++){
1440 			*w++ = r[a];
1441 			q = cmap+r[m]*3;
1442 			r += 2;
1443 			if(r == end)
1444 				r = begin;
1445 			if(convgrey){
1446 				*w++ = RGB2K(q[0], q[1], q[2]);
1447 			}else{
1448 				*w++ = q[2];	/* blue */
1449 				*w++ = q[1];	/* green */
1450 				*w++ = q[0];	/* red */
1451 			}
1452 		}
1453 	}else{
1454 		b.alpha = &ones;
1455 		for(i=0; i<dx; i++){
1456 			q = cmap+*r++*3;
1457 			if(r == end)
1458 				r = begin;
1459 			if(convgrey){
1460 				*w++ = RGB2K(q[0], q[1], q[2]);
1461 			}else{
1462 				*w++ = q[2];	/* blue */
1463 				*w++ = q[1];	/* green */
1464 				*w++ = q[0];	/* red */
1465 			}
1466 		}
1467 	}
1468 
1469 	b.rgba = (ulong*)(buf-copyalpha);
1470 
1471 	if(convgrey){
1472 		b.grey = buf;
1473 		b.red = b.blu = b.grn = buf;
1474 		b.delta = 1+copyalpha;
1475 	}else{
1476 		b.blu = buf;
1477 		b.grn = buf+1;
1478 		b.red = buf+2;
1479 		b.grey = nil;
1480 		b.delta = 3+copyalpha;
1481 	}
1482 	return b;
1483 }
1484 
1485 static void
writecmap(Param * p,uchar * w,Buffer src)1486 writecmap(Param *p, uchar *w, Buffer src)
1487 {
1488 	uchar *cmap, *red, *grn, *blu;
1489 	int i, dx, delta;
1490 
1491 	cmap = p->img->cmap->rgb2cmap;
1492 
1493 	delta = src.delta;
1494 	red= src.red;
1495 	grn = src.grn;
1496 	blu = src.blu;
1497 
1498 	dx = p->dx;
1499 	for(i=0; i<dx; i++, red+=delta, grn+=delta, blu+=delta)
1500 		*w++ = cmap[(*red>>4)*256+(*grn>>4)*16+(*blu>>4)];
1501 }
1502 
1503 #define DBG if(0)
1504 static Buffer
readbyte(Param * p,uchar * buf,int y)1505 readbyte(Param *p, uchar *buf, int y)
1506 {
1507 	Buffer b;
1508 	Memimage *img;
1509 	int dx, isgrey, convgrey, alphaonly, copyalpha, i, nb;
1510 	uchar *begin, *end, *r, *w, *rrepl, *grepl, *brepl, *arepl, *krepl;
1511 	uchar ured, ugrn, ublu;
1512 	ulong u;
1513 
1514 	img = p->img;
1515 	begin = p->bytey0s + y*p->bwidth;
1516 	r = p->bytermin + y*p->bwidth;
1517 	end = p->bytey0e + y*p->bwidth;
1518 
1519 	w = buf;
1520 	dx = p->dx;
1521 	nb = img->depth/8;
1522 
1523 	convgrey = p->convgrey;	/* convert rgb to grey */
1524 	isgrey = img->flags&Fgrey;
1525 	alphaonly = p->alphaonly;
1526 	copyalpha = (img->flags&Falpha) ? 1 : 0;
1527 
1528 DBG print("copyalpha %d alphaonly %d convgrey %d isgrey %d\n", copyalpha, alphaonly, convgrey, isgrey);
1529 	/* if we can, avoid processing everything */
1530 	if(!(img->flags&Frepl) && !convgrey && (img->flags&Fbytes)){
1531 		memset(&b, 0, sizeof b);
1532 		if(p->needbuf){
1533 			memmove(buf, r, dx*nb);
1534 			r = buf;
1535 		}
1536 		b.rgba = (ulong*)r;
1537 		if(copyalpha)
1538 			b.alpha = r+img->shift[CAlpha]/8;
1539 		else
1540 			b.alpha = &ones;
1541 		if(isgrey){
1542 			b.grey = r+img->shift[CGrey]/8;
1543 			b.red = b.grn = b.blu = b.grey;
1544 		}else{
1545 			b.red = r+img->shift[CRed]/8;
1546 			b.grn = r+img->shift[CGreen]/8;
1547 			b.blu = r+img->shift[CBlue]/8;
1548 		}
1549 		b.delta = nb;
1550 		return b;
1551 	}
1552 
1553 DBG print("2\n");
1554 	rrepl = replbit[img->nbits[CRed]];
1555 	grepl = replbit[img->nbits[CGreen]];
1556 	brepl = replbit[img->nbits[CBlue]];
1557 	arepl = replbit[img->nbits[CAlpha]];
1558 	krepl = replbit[img->nbits[CGrey]];
1559 
1560 	for(i=0; i<dx; i++){
1561 		u = r[0] | (r[1]<<8) | (r[2]<<16) | (r[3]<<24);
1562 		if(copyalpha) {
1563 			*w++ = arepl[(u>>img->shift[CAlpha]) & img->mask[CAlpha]];
1564 DBG print("a %x\n", w[-1]);
1565 		}
1566 
1567 		if(isgrey)
1568 			*w++ = krepl[(u >> img->shift[CGrey]) & img->mask[CGrey]];
1569 		else if(!alphaonly){
1570 			ured = rrepl[(u >> img->shift[CRed]) & img->mask[CRed]];
1571 			ugrn = grepl[(u >> img->shift[CGreen]) & img->mask[CGreen]];
1572 			ublu = brepl[(u >> img->shift[CBlue]) & img->mask[CBlue]];
1573 			if(convgrey){
1574 DBG print("g %x %x %x\n", ured, ugrn, ublu);
1575 				*w++ = RGB2K(ured, ugrn, ublu);
1576 DBG print("%x\n", w[-1]);
1577 			}else{
1578 				*w++ = brepl[(u >> img->shift[CBlue]) & img->mask[CBlue]];
1579 				*w++ = grepl[(u >> img->shift[CGreen]) & img->mask[CGreen]];
1580 				*w++ = rrepl[(u >> img->shift[CRed]) & img->mask[CRed]];
1581 			}
1582 		}
1583 		r += nb;
1584 		if(r == end)
1585 			r = begin;
1586 	}
1587 
1588 	b.alpha = copyalpha ? buf : &ones;
1589 	b.rgba = (ulong*)buf;
1590 	if(alphaonly){
1591 		b.red = b.grn = b.blu = b.grey = nil;
1592 		if(!copyalpha)
1593 			b.rgba = nil;
1594 		b.delta = 1;
1595 	}else if(isgrey || convgrey){
1596 		b.grey = buf+copyalpha;
1597 		b.red = b.grn = b.blu = buf+copyalpha;
1598 		b.delta = copyalpha+1;
1599 DBG print("alpha %x grey %x\n", b.alpha ? *b.alpha : 0xFF, *b.grey);
1600 	}else{
1601 		b.blu = buf+copyalpha;
1602 		b.grn = buf+copyalpha+1;
1603 		b.grey = nil;
1604 		b.red = buf+copyalpha+2;
1605 		b.delta = copyalpha+3;
1606 	}
1607 	return b;
1608 }
1609 #undef DBG
1610 
1611 #define DBG if(0)
1612 static void
writebyte(Param * p,uchar * w,Buffer src)1613 writebyte(Param *p, uchar *w, Buffer src)
1614 {
1615 	Memimage *img;
1616 	int i, isalpha, isgrey, nb, delta, dx, adelta;
1617 	uchar ff, *red, *grn, *blu, *grey, *alpha;
1618 	ulong u, mask;
1619 
1620 	img = p->img;
1621 
1622 	red = src.red;
1623 	grn = src.grn;
1624 	blu = src.blu;
1625 	alpha = src.alpha;
1626 	delta = src.delta;
1627 	grey = src.grey;
1628 	dx = p->dx;
1629 
1630 	nb = img->depth/8;
1631 	mask = (nb==4) ? 0 : ~((1<<img->depth)-1);
1632 
1633 	isalpha = img->flags&Falpha;
1634 	isgrey = img->flags&Fgrey;
1635 	adelta = src.delta;
1636 
1637 	if(isalpha && (alpha == nil || alpha == &ones)){
1638 		ff = 0xFF;
1639 		alpha = &ff;
1640 		adelta = 0;
1641 	}
1642 
1643 	for(i=0; i<dx; i++){
1644 		u = w[0] | (w[1]<<8) | (w[2]<<16) | (w[3]<<24);
1645 DBG print("u %.8lux...", u);
1646 		u &= mask;
1647 DBG print("&mask %.8lux...", u);
1648 		if(isgrey){
1649 			u |= ((*grey >> (8-img->nbits[CGrey])) & img->mask[CGrey]) << img->shift[CGrey];
1650 DBG print("|grey %.8lux...", u);
1651 			grey += delta;
1652 		}else{
1653 			u |= ((*red >> (8-img->nbits[CRed])) & img->mask[CRed]) << img->shift[CRed];
1654 			u |= ((*grn >> (8-img->nbits[CGreen])) & img->mask[CGreen]) << img->shift[CGreen];
1655 			u |= ((*blu >> (8-img->nbits[CBlue])) & img->mask[CBlue]) << img->shift[CBlue];
1656 			red += delta;
1657 			grn += delta;
1658 			blu += delta;
1659 DBG print("|rgb %.8lux...", u);
1660 		}
1661 
1662 		if(isalpha){
1663 			u |= ((*alpha >> (8-img->nbits[CAlpha])) & img->mask[CAlpha]) << img->shift[CAlpha];
1664 			alpha += adelta;
1665 DBG print("|alpha %.8lux...", u);
1666 		}
1667 
1668 		w[0] = u;
1669 		w[1] = u>>8;
1670 		w[2] = u>>16;
1671 		w[3] = u>>24;
1672 		w += nb;
1673 	}
1674 }
1675 #undef DBG
1676 
1677 static Readfn*
readfn(Memimage * img)1678 readfn(Memimage *img)
1679 {
1680 	if(img->depth < 8)
1681 		return readnbit;
1682 	if(img->nbits[CMap] == 8)
1683 		return readcmap;
1684 	return readbyte;
1685 }
1686 
1687 static Readfn*
readalphafn(Memimage * m)1688 readalphafn(Memimage *m)
1689 {
1690 	USED(m);
1691 	return readbyte;
1692 }
1693 
1694 static Writefn*
writefn(Memimage * img)1695 writefn(Memimage *img)
1696 {
1697 	if(img->depth < 8)
1698 		return writenbit;
1699 	if(img->chan == CMAP8)
1700 		return writecmap;
1701 	return writebyte;
1702 }
1703 
1704 static void
nullwrite(Param * p,uchar * s,Buffer b)1705 nullwrite(Param *p, uchar *s, Buffer b)
1706 {
1707 	USED(p);
1708 	USED(s);
1709 	USED(b);
1710 }
1711 
1712 static Buffer
readptr(Param * p,uchar * s,int y)1713 readptr(Param *p, uchar *s, int y)
1714 {
1715 	Buffer b;
1716 	uchar *q;
1717 
1718 	USED(s);
1719 	q = p->bytermin + y*p->bwidth;
1720 	b.red = q;	/* ptr to data */
1721 	b.grn = b.blu = b.grey = b.alpha = nil;
1722 	b.rgba = (ulong*)q;
1723 	b.delta = p->img->depth/8;
1724 	return b;
1725 }
1726 
1727 static Buffer
boolmemmove(Buffer bdst,Buffer bsrc,Buffer b1,int dx,int i,int o)1728 boolmemmove(Buffer bdst, Buffer bsrc, Buffer b1, int dx, int i, int o)
1729 {
1730 	USED(i);
1731 	USED(o);
1732 	USED(b1);
1733 	USED(bsrc);
1734 	memmove(bdst.red, bsrc.red, dx*bdst.delta);
1735 	return bdst;
1736 }
1737 
1738 static Buffer
boolcopy8(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int i,int o)1739 boolcopy8(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1740 {
1741 	uchar *m, *r, *w, *ew;
1742 
1743 	USED(i);
1744 	USED(o);
1745 	m = bmask.grey;
1746 	w = bdst.red;
1747 	r = bsrc.red;
1748 	ew = w+dx;
1749 	for(; w < ew; w++,r++)
1750 		if(*m++)
1751 			*w = *r;
1752 	return bdst;	/* not used */
1753 }
1754 
1755 static Buffer
boolcopy16(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int i,int o)1756 boolcopy16(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1757 {
1758 	uchar *m;
1759 	ushort *r, *w, *ew;
1760 
1761 	USED(i);
1762 	USED(o);
1763 	m = bmask.grey;
1764 	w = (ushort*)bdst.red;
1765 	r = (ushort*)bsrc.red;
1766 	ew = w+dx;
1767 	for(; w < ew; w++,r++)
1768 		if(*m++)
1769 			*w = *r;
1770 	return bdst;	/* not used */
1771 }
1772 
1773 static Buffer
boolcopy24(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int i,int o)1774 boolcopy24(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1775 {
1776 	uchar *m;
1777 	uchar *r, *w, *ew;
1778 
1779 	USED(i);
1780 	USED(o);
1781 	m = bmask.grey;
1782 	w = bdst.red;
1783 	r = bsrc.red;
1784 	ew = w+dx*3;
1785 	while(w < ew){
1786 		if(*m++){
1787 			*w++ = *r++;
1788 			*w++ = *r++;
1789 			*w++ = *r++;
1790 		}else{
1791 			w += 3;
1792 			r += 3;
1793 		}
1794 	}
1795 	return bdst;	/* not used */
1796 }
1797 
1798 static Buffer
boolcopy32(Buffer bdst,Buffer bsrc,Buffer bmask,int dx,int i,int o)1799 boolcopy32(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1800 {
1801 	uchar *m;
1802 	ulong *r, *w, *ew;
1803 
1804 	USED(i);
1805 	USED(o);
1806 	m = bmask.grey;
1807 	w = (ulong*)bdst.red;
1808 	r = (ulong*)bsrc.red;
1809 	ew = w+dx;
1810 	for(; w < ew; w++,r++)
1811 		if(*m++)
1812 			*w = *r;
1813 	return bdst;	/* not used */
1814 }
1815 
1816 static Buffer
genconv(Param * p,uchar * buf,int y)1817 genconv(Param *p, uchar *buf, int y)
1818 {
1819 	Buffer b;
1820 	int nb;
1821 	uchar *r, *w, *ew;
1822 
1823 	/* read from source into RGB format in convbuf */
1824 	b = p->convreadcall(p, p->convbuf, y);
1825 
1826 	/* write RGB format into dst format in buf */
1827 	p->convwritecall(p->convdpar, buf, b);
1828 
1829 	if(p->convdx){
1830 		nb = p->convdpar->img->depth/8;
1831 		r = buf;
1832 		w = buf+nb*p->dx;
1833 		ew = buf+nb*p->convdx;
1834 		while(w<ew)
1835 			*w++ = *r++;
1836 	}
1837 
1838 	b.red = buf;
1839 	b.blu = b.grn = b.grey = b.alpha = nil;
1840 	b.rgba = (ulong*)buf;
1841 	b.delta = 0;
1842 
1843 	return b;
1844 }
1845 
1846 static Readfn*
convfn(Memimage * dst,Param * dpar,Memimage * src,Param * spar,int * ndrawbuf)1847 convfn(Memimage *dst, Param *dpar, Memimage *src, Param *spar, int *ndrawbuf)
1848 {
1849 	if(dst->chan == src->chan && !(src->flags&Frepl)){
1850 //if(drawdebug) iprint("readptr...");
1851 		return readptr;
1852 	}
1853 
1854 	if(dst->chan==CMAP8 && (src->chan==GREY1||src->chan==GREY2||src->chan==GREY4)){
1855 		/* cheat because we know the replicated value is exactly the color map entry. */
1856 //if(drawdebug) iprint("Readnbit...");
1857 		return readnbit;
1858 	}
1859 
1860 	spar->convreadcall = readfn(src);
1861 	spar->convwritecall = writefn(dst);
1862 	spar->convdpar = dpar;
1863 
1864 	/* allocate a conversion buffer */
1865 	spar->convbufoff = *ndrawbuf;
1866 	*ndrawbuf += spar->dx*4;
1867 
1868 	if(spar->dx > Dx(spar->img->r)){
1869 		spar->convdx = spar->dx;
1870 		spar->dx = Dx(spar->img->r);
1871 	}
1872 
1873 //if(drawdebug) iprint("genconv...");
1874 	return genconv;
1875 }
1876 
1877 static ulong
pixelbits(Memimage * i,Point pt)1878 pixelbits(Memimage *i, Point pt)
1879 {
1880 	uchar *p;
1881 	ulong val;
1882 	int off, bpp, npack;
1883 
1884 	val = 0;
1885 	p = byteaddr(i, pt);
1886 	switch(bpp=i->depth){
1887 	case 1:
1888 	case 2:
1889 	case 4:
1890 		npack = 8/bpp;
1891 		off = pt.x%npack;
1892 		val = p[0] >> bpp*(npack-1-off);
1893 		val &= (1<<bpp)-1;
1894 		break;
1895 	case 8:
1896 		val = p[0];
1897 		break;
1898 	case 16:
1899 		val = p[0]|(p[1]<<8);
1900 		break;
1901 	case 24:
1902 		val = p[0]|(p[1]<<8)|(p[2]<<16);
1903 		break;
1904 	case 32:
1905 		val = p[0]|(p[1]<<8)|(p[2]<<16)|(p[3]<<24);
1906 		break;
1907 	}
1908 	while(bpp<32){
1909 		val |= val<<bpp;
1910 		bpp *= 2;
1911 	}
1912 	return val;
1913 }
1914 
1915 static Calcfn*
boolcopyfn(Memimage * img,Memimage * mask)1916 boolcopyfn(Memimage *img, Memimage *mask)
1917 {
1918 	if(mask->flags&Frepl && Dx(mask->r)==1 && Dy(mask->r)==1 && pixelbits(mask, mask->r.min)==~0)
1919 		return boolmemmove;
1920 
1921 	switch(img->depth){
1922 	case 8:
1923 		return boolcopy8;
1924 	case 16:
1925 		return boolcopy16;
1926 	case 24:
1927 		return boolcopy24;
1928 	case 32:
1929 		return boolcopy32;
1930 	default:
1931 		assert(0 /* boolcopyfn */);
1932 	}
1933 	return nil;
1934 }
1935 
1936 /*
1937  * Optimized draw for filling and scrolling; uses memset and memmove.
1938  */
1939 static void
memsetb(void * vp,uchar val,int n)1940 memsetb(void *vp, uchar val, int n)
1941 {
1942 	uchar *p, *ep;
1943 
1944 	p = vp;
1945 	ep = p+n;
1946 	while(p<ep)
1947 		*p++ = val;
1948 }
1949 
1950 static void
memsets(void * vp,ushort val,int n)1951 memsets(void *vp, ushort val, int n)
1952 {
1953 	ushort *p, *ep;
1954 
1955 	p = vp;
1956 	ep = p+n;
1957 	while(p<ep)
1958 		*p++ = val;
1959 }
1960 
1961 static void
memsetl(void * vp,ulong val,int n)1962 memsetl(void *vp, ulong val, int n)
1963 {
1964 	ulong *p, *ep;
1965 
1966 	p = vp;
1967 	ep = p+n;
1968 	while(p<ep)
1969 		*p++ = val;
1970 }
1971 
1972 static void
memset24(void * vp,ulong val,int n)1973 memset24(void *vp, ulong val, int n)
1974 {
1975 	uchar *p, *ep;
1976 	uchar a,b,c;
1977 
1978 	p = vp;
1979 	ep = p+3*n;
1980 	a = val;
1981 	b = val>>8;
1982 	c = val>>16;
1983 	while(p<ep){
1984 		*p++ = a;
1985 		*p++ = b;
1986 		*p++ = c;
1987 	}
1988 }
1989 
1990 static ulong
imgtorgba(Memimage * img,ulong val)1991 imgtorgba(Memimage *img, ulong val)
1992 {
1993 	uchar r, g, b, a;
1994 	int nb, ov, v;
1995 	ulong chan;
1996 	uchar *p;
1997 
1998 	a = 0xFF;
1999 	r = g = b = 0xAA;	/* garbage */
2000 	for(chan=img->chan; chan; chan>>=8){
2001 		nb = NBITS(chan);
2002 		ov = v = val&((1<<nb)-1);
2003 		val >>= nb;
2004 
2005 		while(nb < 8){
2006 			v |= v<<nb;
2007 			nb *= 2;
2008 		}
2009 		v >>= (nb-8);
2010 
2011 		switch(TYPE(chan)){
2012 		case CRed:
2013 			r = v;
2014 			break;
2015 		case CGreen:
2016 			g = v;
2017 			break;
2018 		case CBlue:
2019 			b = v;
2020 			break;
2021 		case CAlpha:
2022 			a = v;
2023 			break;
2024 		case CGrey:
2025 			r = g = b = v;
2026 			break;
2027 		case CMap:
2028 			p = img->cmap->cmap2rgb+3*ov;
2029 			r = *p++;
2030 			g = *p++;
2031 			b = *p;
2032 			break;
2033 		}
2034 	}
2035 	return (r<<24)|(g<<16)|(b<<8)|a;
2036 }
2037 
2038 static ulong
rgbatoimg(Memimage * img,ulong rgba)2039 rgbatoimg(Memimage *img, ulong rgba)
2040 {
2041 	ulong chan;
2042 	int d, nb;
2043 	ulong v;
2044 	uchar *p, r, g, b, a, m;
2045 
2046 	v = 0;
2047 	r = rgba>>24;
2048 	g = rgba>>16;
2049 	b = rgba>>8;
2050 	a = rgba;
2051 	d = 0;
2052 	for(chan=img->chan; chan; chan>>=8){
2053 		nb = NBITS(chan);
2054 		switch(TYPE(chan)){
2055 		case CRed:
2056 			v |= (r>>(8-nb))<<d;
2057 			break;
2058 		case CGreen:
2059 			v |= (g>>(8-nb))<<d;
2060 			break;
2061 		case CBlue:
2062 			v |= (b>>(8-nb))<<d;
2063 			break;
2064 		case CAlpha:
2065 			v |= (a>>(8-nb))<<d;
2066 			break;
2067 		case CMap:
2068 			p = img->cmap->rgb2cmap;
2069 			m = p[(r>>4)*256+(g>>4)*16+(b>>4)];
2070 			v |= (m>>(8-nb))<<d;
2071 			break;
2072 		case CGrey:
2073 			m = RGB2K(r,g,b);
2074 			v |= (m>>(8-nb))<<d;
2075 			break;
2076 		}
2077 		d += nb;
2078 	}
2079 //	print("rgba2img %.8lux = %.*lux\n", rgba, 2*d/8, v);
2080 	return v;
2081 }
2082 
2083 #define DBG if(0)
2084 static int
memoptdraw(Memdrawparam * par)2085 memoptdraw(Memdrawparam *par)
2086 {
2087 	int m, y, dy, dx, op;
2088 	ulong v;
2089 	Memimage *src;
2090 	Memimage *dst;
2091 
2092 	dx = Dx(par->r);
2093 	dy = Dy(par->r);
2094 	src = par->src;
2095 	dst = par->dst;
2096 	op = par->op;
2097 
2098 DBG print("state %lux mval %lux dd %d\n", par->state, par->mval, dst->depth);
2099 	/*
2100 	 * If we have an opaque mask and source is one opaque pixel we can convert to the
2101 	 * destination format and just replicate with memset.
2102 	 */
2103 	m = Simplesrc|Simplemask|Fullmask;
2104 	if((par->state&m)==m && (par->srgba&0xFF) == 0xFF && (op ==S || op == SoverD)){
2105 		uchar *dp, p[4];
2106 		int d, dwid, ppb, np, nb;
2107 		uchar lm, rm;
2108 
2109 DBG print("memopt, dst %p, dst->data->bdata %p\n", dst, dst->data->bdata);
2110 		dwid = dst->width*sizeof(ulong);
2111 		dp = byteaddr(dst, par->r.min);
2112 		v = par->sdval;
2113 DBG print("sdval %lud, depth %d\n", v, dst->depth);
2114 		switch(dst->depth){
2115 		case 1:
2116 		case 2:
2117 		case 4:
2118 			for(d=dst->depth; d<8; d*=2)
2119 				v |= (v<<d);
2120 			ppb = 8/dst->depth;	/* pixels per byte */
2121 			m = ppb-1;
2122 			/* left edge */
2123 			np = par->r.min.x&m;		/* no. pixels unused on left side of word */
2124 			dx -= (ppb-np);
2125 			nb = 8 - np * dst->depth;		/* no. bits used on right side of word */
2126 			lm = (1<<nb)-1;
2127 DBG print("np %d x %d nb %d lm %ux ppb %d m %ux\n", np, par->r.min.x, nb, lm, ppb, m);
2128 
2129 			/* right edge */
2130 			np = par->r.max.x&m;	/* no. pixels used on left side of word */
2131 			dx -= np;
2132 			nb = 8 - np * dst->depth;		/* no. bits unused on right side of word */
2133 			rm = ~((1<<nb)-1);
2134 DBG print("np %d x %d nb %d rm %ux ppb %d m %ux\n", np, par->r.max.x, nb, rm, ppb, m);
2135 
2136 DBG print("dx %d Dx %d\n", dx, Dx(par->r));
2137 			/* lm, rm are masks that are 1 where we should touch the bits */
2138 			if(dx < 0){	/* just one byte */
2139 				lm &= rm;
2140 				for(y=0; y<dy; y++, dp+=dwid)
2141 					*dp ^= (v ^ *dp) & lm;
2142 			}else if(dx == 0){	/* no full bytes */
2143 				if(lm)
2144 					dwid--;
2145 
2146 				for(y=0; y<dy; y++, dp+=dwid){
2147 					if(lm){
2148 DBG print("dp %p v %lux lm %ux (v ^ *dp) & lm %lux\n", dp, v, lm, (v^*dp)&lm);
2149 						*dp ^= (v ^ *dp) & lm;
2150 						dp++;
2151 					}
2152 					*dp ^= (v ^ *dp) & rm;
2153 				}
2154 			}else{		/* full bytes in middle */
2155 				dx /= ppb;
2156 				if(lm)
2157 					dwid--;
2158 				dwid -= dx;
2159 
2160 				for(y=0; y<dy; y++, dp+=dwid){
2161 					if(lm){
2162 						*dp ^= (v ^ *dp) & lm;
2163 						dp++;
2164 					}
2165 					memset(dp, v, dx);
2166 					dp += dx;
2167 					*dp ^= (v ^ *dp) & rm;
2168 				}
2169 			}
2170 			return 1;
2171 		case 8:
2172 			for(y=0; y<dy; y++, dp+=dwid)
2173 				memset(dp, v, dx);
2174 			return 1;
2175 		case 16:
2176 			p[0] = v;		/* make little endian */
2177 			p[1] = v>>8;
2178 			v = *(ushort*)p;
2179 DBG print("dp=%p; dx=%d; for(y=0; y<%d; y++, dp+=%d)\nmemsets(dp, v, dx);\n",
2180 	dp, dx, dy, dwid);
2181 			for(y=0; y<dy; y++, dp+=dwid)
2182 				memsets(dp, v, dx);
2183 			return 1;
2184 		case 24:
2185 			for(y=0; y<dy; y++, dp+=dwid)
2186 				memset24(dp, v, dx);
2187 			return 1;
2188 		case 32:
2189 			p[0] = v;		/* make little endian */
2190 			p[1] = v>>8;
2191 			p[2] = v>>16;
2192 			p[3] = v>>24;
2193 			v = *(ulong*)p;
2194 			for(y=0; y<dy; y++, dp+=dwid)
2195 				memsetl(dp, v, dx);
2196 			return 1;
2197 		default:
2198 			assert(0 /* bad dest depth in memoptdraw */);
2199 		}
2200 	}
2201 
2202 	/*
2203 	 * If no source alpha, an opaque mask, we can just copy the
2204 	 * source onto the destination.  If the channels are the same and
2205 	 * the source is not replicated, memmove suffices.
2206 	 */
2207 	m = Simplemask|Fullmask;
2208 	if((par->state&(m|Replsrc))==m && src->depth >= 8
2209 	&& src->chan == dst->chan && !(src->flags&Falpha) && (op == S || op == SoverD)){
2210 		uchar *sp, *dp;
2211 		long swid, dwid, nb;
2212 		int dir;
2213 
2214 		if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min))
2215 			dir = -1;
2216 		else
2217 			dir = 1;
2218 
2219 		swid = src->width*sizeof(ulong);
2220 		dwid = dst->width*sizeof(ulong);
2221 		sp = byteaddr(src, par->sr.min);
2222 		dp = byteaddr(dst, par->r.min);
2223 		if(dir == -1){
2224 			sp += (dy-1)*swid;
2225 			dp += (dy-1)*dwid;
2226 			swid = -swid;
2227 			dwid = -dwid;
2228 		}
2229 		nb = (dx*src->depth)/8;
2230 		for(y=0; y<dy; y++, sp+=swid, dp+=dwid)
2231 			memmove(dp, sp, nb);
2232 		return 1;
2233 	}
2234 
2235 	/*
2236 	 * If we have a 1-bit mask, 1-bit source, and 1-bit destination, and
2237 	 * they're all bit aligned, we can just use bit operators.  This happens
2238 	 * when we're manipulating boolean masks, e.g. in the arc code.
2239 	 */
2240 	if((par->state&(Simplemask|Simplesrc|Replmask|Replsrc))==0
2241 	&& dst->chan==GREY1 && src->chan==GREY1 && par->mask->chan==GREY1
2242 	&& (par->r.min.x&7)==(par->sr.min.x&7) && (par->r.min.x&7)==(par->mr.min.x&7)){
2243 		uchar *sp, *dp, *mp;
2244 		uchar lm, rm;
2245 		long swid, dwid, mwid;
2246 		int i, x, dir;
2247 
2248 		sp = byteaddr(src, par->sr.min);
2249 		dp = byteaddr(dst, par->r.min);
2250 		mp = byteaddr(par->mask, par->mr.min);
2251 		swid = src->width*sizeof(ulong);
2252 		dwid = dst->width*sizeof(ulong);
2253 		mwid = par->mask->width*sizeof(ulong);
2254 
2255 		if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min)){
2256 			dir = -1;
2257 		}else
2258 			dir = 1;
2259 
2260 		lm = 0xFF>>(par->r.min.x&7);
2261 		rm = 0xFF<<(8-(par->r.max.x&7));
2262 		dx -= (8-(par->r.min.x&7)) + (par->r.max.x&7);
2263 
2264 		if(dx < 0){	/* one byte wide */
2265 			lm &= rm;
2266 			if(dir == -1){
2267 				dp += dwid*(dy-1);
2268 				sp += swid*(dy-1);
2269 				mp += mwid*(dy-1);
2270 				dwid = -dwid;
2271 				swid = -swid;
2272 				mwid = -mwid;
2273 			}
2274 			for(y=0; y<dy; y++){
2275 				*dp ^= (*dp ^ *sp) & *mp & lm;
2276 				dp += dwid;
2277 				sp += swid;
2278 				mp += mwid;
2279 			}
2280 			return 1;
2281 		}
2282 
2283 		dx /= 8;
2284 		if(dir == 1){
2285 			i = (lm!=0)+dx+(rm!=0);
2286 			mwid -= i;
2287 			swid -= i;
2288 			dwid -= i;
2289 			for(y=0; y<dy; y++, dp+=dwid, sp+=swid, mp+=mwid){
2290 				if(lm){
2291 					*dp ^= (*dp ^ *sp++) & *mp++ & lm;
2292 					dp++;
2293 				}
2294 				for(x=0; x<dx; x++){
2295 					*dp ^= (*dp ^ *sp++) & *mp++;
2296 					dp++;
2297 				}
2298 				if(rm){
2299 					*dp ^= (*dp ^ *sp++) & *mp++ & rm;
2300 					dp++;
2301 				}
2302 			}
2303 			return 1;
2304 		}else{
2305 		/* dir == -1 */
2306 			i = (lm!=0)+dx+(rm!=0);
2307 			dp += dwid*(dy-1)+i-1;
2308 			sp += swid*(dy-1)+i-1;
2309 			mp += mwid*(dy-1)+i-1;
2310 			dwid = -dwid+i;
2311 			swid = -swid+i;
2312 			mwid = -mwid+i;
2313 			for(y=0; y<dy; y++, dp+=dwid, sp+=swid, mp+=mwid){
2314 				if(rm){
2315 					*dp ^= (*dp ^ *sp--) & *mp-- & rm;
2316 					dp--;
2317 				}
2318 				for(x=0; x<dx; x++){
2319 					*dp ^= (*dp ^ *sp--) & *mp--;
2320 					dp--;
2321 				}
2322 				if(lm){
2323 					*dp ^= (*dp ^ *sp--) & *mp-- & lm;
2324 					dp--;
2325 				}
2326 			}
2327 		}
2328 		return 1;
2329 	}
2330 	return 0;
2331 }
2332 #undef DBG
2333 
2334 /*
2335  * Boolean character drawing.
2336  * Solid opaque color through a 1-bit greyscale mask.
2337  */
2338 #define DBG if(0)
2339 static int
chardraw(Memdrawparam * par)2340 chardraw(Memdrawparam *par)
2341 {
2342 	ulong bits;
2343 	int i, ddepth, dy, dx, x, bx, ex, y, npack, bsh, depth, op;
2344 	ulong v, maskwid, dstwid;
2345 	uchar *wp, *rp, *q, *wc;
2346 	ushort *ws;
2347 	ulong *wl;
2348 	uchar sp[4];
2349 	Rectangle r, mr;
2350 	Memimage *mask, *src, *dst;
2351 
2352 if(0) if(drawdebug) iprint("chardraw? mf %lux md %d sf %lux dxs %d dys %d dd %d ddat %p sdat %p\n",
2353 		par->mask->flags, par->mask->depth, par->src->flags,
2354 		Dx(par->src->r), Dy(par->src->r), par->dst->depth, par->dst->data, par->src->data);
2355 
2356 	mask = par->mask;
2357 	src = par->src;
2358 	dst = par->dst;
2359 	r = par->r;
2360 	mr = par->mr;
2361 	op = par->op;
2362 
2363 	if((par->state&(Replsrc|Simplesrc|Replmask)) != (Replsrc|Simplesrc)
2364 	|| mask->depth != 1 || src->flags&Falpha || dst->depth<8 || dst->data==src->data
2365 	|| op != SoverD)
2366 		return 0;
2367 
2368 //if(drawdebug) iprint("chardraw...");
2369 
2370 	depth = mask->depth;
2371 	maskwid = mask->width*sizeof(ulong);
2372 	rp = byteaddr(mask, mr.min);
2373 	npack = 8/depth;
2374 	bsh = (mr.min.x % npack) * depth;
2375 
2376 	wp = byteaddr(dst, r.min);
2377 	dstwid = dst->width*sizeof(ulong);
2378 DBG print("bsh %d\n", bsh);
2379 	dy = Dy(r);
2380 	dx = Dx(r);
2381 
2382 	ddepth = dst->depth;
2383 
2384 	/*
2385 	 * for loop counts from bsh to bsh+dx
2386 	 *
2387 	 * we want the bottom bits to be the amount
2388 	 * to shift the pixels down, so for n≡0 (mod 8) we want
2389 	 * bottom bits 7.  for n≡1, 6, etc.
2390 	 * the bits come from -n-1.
2391 	 */
2392 
2393 	bx = -bsh-1;
2394 	ex = -bsh-1-dx;
2395 	SET(bits);
2396 	v = par->sdval;
2397 
2398 	/* make little endian */
2399 	sp[0] = v;
2400 	sp[1] = v>>8;
2401 	sp[2] = v>>16;
2402 	sp[3] = v>>24;
2403 
2404 //print("sp %x %x %x %x\n", sp[0], sp[1], sp[2], sp[3]);
2405 	for(y=0; y<dy; y++, rp+=maskwid, wp+=dstwid){
2406 		q = rp;
2407 		if(bsh)
2408 			bits = *q++;
2409 		switch(ddepth){
2410 		case 8:
2411 //if(drawdebug) iprint("8loop...");
2412 			wc = wp;
2413 			for(x=bx; x>ex; x--, wc++){
2414 				i = x&7;
2415 				if(i == 8-1)
2416 					bits = *q++;
2417 DBG print("bits %lux sh %d...", bits, i);
2418 				if((bits>>i)&1)
2419 					*wc = v;
2420 			}
2421 			break;
2422 		case 16:
2423 			ws = (ushort*)wp;
2424 			v = *(ushort*)sp;
2425 			for(x=bx; x>ex; x--, ws++){
2426 				i = x&7;
2427 				if(i == 8-1)
2428 					bits = *q++;
2429 DBG print("bits %lux sh %d...", bits, i);
2430 				if((bits>>i)&1)
2431 					*ws = v;
2432 			}
2433 			break;
2434 		case 24:
2435 			wc = wp;
2436 			for(x=bx; x>ex; x--, wc+=3){
2437 				i = x&7;
2438 				if(i == 8-1)
2439 					bits = *q++;
2440 DBG print("bits %lux sh %d...", bits, i);
2441 				if((bits>>i)&1){
2442 					wc[0] = sp[0];
2443 					wc[1] = sp[1];
2444 					wc[2] = sp[2];
2445 				}
2446 			}
2447 			break;
2448 		case 32:
2449 			wl = (ulong*)wp;
2450 			v = *(ulong*)sp;
2451 			for(x=bx; x>ex; x--, wl++){
2452 				i = x&7;
2453 				if(i == 8-1)
2454 					bits = *q++;
2455 DBG iprint("bits %lux sh %d...", bits, i);
2456 				if((bits>>i)&1)
2457 					*wl = v;
2458 			}
2459 			break;
2460 		}
2461 	}
2462 
2463 DBG print("\n");
2464 	return 1;
2465 }
2466 #undef DBG
2467 
2468 
2469 /*
2470  * Fill entire byte with replicated (if necessary) copy of source pixel,
2471  * assuming destination ldepth is >= source ldepth.
2472  *
2473  * This code is just plain wrong for >8bpp.
2474  *
2475 ulong
2476 membyteval(Memimage *src)
2477 {
2478 	int i, val, bpp;
2479 	uchar uc;
2480 
2481 	unloadmemimage(src, src->r, &uc, 1);
2482 	bpp = src->depth;
2483 	uc <<= (src->r.min.x&(7/src->depth))*src->depth;
2484 	uc &= ~(0xFF>>bpp);
2485 	/* pixel value is now in high part of byte. repeat throughout byte
2486 	val = uc;
2487 	for(i=bpp; i<8; i<<=1)
2488 		val |= val>>i;
2489 	return val;
2490 }
2491  *
2492  */
2493 
2494 void
memfillcolor(Memimage * i,ulong val)2495 memfillcolor(Memimage *i, ulong val)
2496 {
2497 	ulong bits;
2498 	int d, y;
2499 
2500 	if(val == DNofill)
2501 		return;
2502 
2503 	bits = rgbatoimg(i, val);
2504 	switch(i->depth){
2505 	case 24:	/* 24-bit images suck */
2506 		for(y=i->r.min.y; y<i->r.max.y; y++)
2507 			memset24(byteaddr(i, Pt(i->r.min.x, y)), bits, Dx(i->r));
2508 		break;
2509 	default:	/* 1, 2, 4, 8, 16, 32 */
2510 		for(d=i->depth; d<32; d*=2)
2511 			bits = (bits << d) | bits;
2512 		memsetl(wordaddr(i, i->r.min), bits, i->width*Dy(i->r));
2513 		break;
2514 	}
2515 }
2516 
2517