xref: /plan9/sys/src/cmd/gs/src/spdiff.c (revision 593dc095aefb2a85c828727bbfa9da139a49bdf4)
1 /* Copyright (C) 1994, 2000 Aladdin Enterprises.  All rights reserved.
2 
3   This software is provided AS-IS with no warranty, either express or
4   implied.
5 
6   This software is distributed under license and may not be copied,
7   modified or distributed except as expressly authorized under the terms
8   of the license contained in the file LICENSE in this distribution.
9 
10   For more information about licensing, please refer to
11   http://www.ghostscript.com/licensing/. For information on
12   commercial licensing, go to http://www.artifex.com/licensing/ or
13   contact Artifex Software, Inc., 101 Lucas Valley Road #110,
14   San Rafael, CA  94903, U.S.A., +1(415)492-9861.
15 */
16 
17 /* $Id: spdiff.c,v 1.9 2005/03/16 14:57:42 igor Exp $ */
18 /* Pixel differencing filters */
19 #include "stdio_.h"		/* should be std.h, but needs NULL */
20 #include "memory_.h"
21 #include "strimpl.h"
22 #include "spdiffx.h"
23 
24 /* ------ PixelDifferenceEncode/Decode ------ */
25 
26 private_st_PDiff_state();
27 
28 /* Define values for case dispatch. */
29 #define cBits1 0
30 #define cBits2 5
31 #define cBits4 10
32 #define cBits8 15
33 #define cBits16 20
34 #define cEncode 0
35 #define cDecode 25
36 
37 /* Set defaults */
38 private void
s_PDiff_set_defaults(stream_state * st)39 s_PDiff_set_defaults(stream_state * st)
40 {
41     stream_PDiff_state *const ss = (stream_PDiff_state *) st;
42 
43     s_PDiff_set_defaults_inline(ss);
44 }
45 
46 /* Common (re)initialization. */
47 private int
s_PDiff_reinit(stream_state * st)48 s_PDiff_reinit(stream_state * st)
49 {
50     stream_PDiff_state *const ss = (stream_PDiff_state *) st;
51 
52     ss->row_left = 0;
53     return 0;
54 }
55 
56 /* Initialize PixelDifferenceEncode filter. */
57 private int
s_PDiffE_init(stream_state * st)58 s_PDiffE_init(stream_state * st)
59 {
60     stream_PDiff_state *const ss = (stream_PDiff_state *) st;
61     int bits_per_row =
62 	ss->Colors * ss->BitsPerComponent * ss->Columns;
63     static const byte cb_values[] = {
64 	0, cBits1, cBits2, 0, cBits4, 0, 0, 0, cBits8,
65 	0, 0, 0, 0, 0, 0, 0, cBits16
66     };
67 
68     ss->row_count = (bits_per_row + 7) >> 3;
69     ss->end_mask = (1 << (-bits_per_row & 7)) - 1;
70     ss->case_index =
71 	cb_values[ss->BitsPerComponent] +
72 	(ss->Colors > 4 ? 0 : ss->Colors) + cEncode;
73     return s_PDiff_reinit(st);
74 }
75 
76 /* Initialize PixelDifferenceDecode filter. */
77 private int
s_PDiffD_init(stream_state * st)78 s_PDiffD_init(stream_state * st)
79 {
80     stream_PDiff_state *const ss = (stream_PDiff_state *) st;
81 
82     s_PDiffE_init(st);
83     ss->case_index += cDecode - cEncode;
84     return 0;
85 }
86 
87 /* Process a buffer.  Note that this handles both Encode and Decode. */
88 private int
s_PDiff_process(stream_state * st,stream_cursor_read * pr,stream_cursor_write * pw,bool last)89 s_PDiff_process(stream_state * st, stream_cursor_read * pr,
90 		stream_cursor_write * pw, bool last)
91 {
92     stream_PDiff_state *const ss = (stream_PDiff_state *) st;
93     const byte *p = pr->ptr;
94     byte *q = pw->ptr;
95     int count;
96     int status = 0;
97     uint s0 = ss->prev[0];
98     byte t = 0;			/* avoid spurious compiler warnings */
99     int  ti;
100     const byte end_mask = ss->end_mask;
101     int colors = ss->Colors;
102     int nb = (colors * ss->BitsPerComponent) >> 3;
103     int final;
104     int ndone, ci;
105 
106 row:
107     if (ss->row_left == 0) {
108 	ss->row_left = ss->row_count;
109 	s0 = 0;
110 	memset(&ss->prev[1], 0, sizeof(uint) * (s_PDiff_max_Colors - 1));
111     }
112     {
113 	int rcount = pr->limit - p;
114 	int wcount = pw->limit - q;
115 
116 	if (ss->row_left < rcount)
117 	    rcount = ss->row_left;
118 	count = (wcount < rcount ? (status = 1, wcount) : rcount);
119     }
120     final = (last && !status ? 1 : nb);
121     ss->row_left -= count;
122 
123     /*
124      * Encoding and decoding are fundamentally different.
125      * Encoding computes E[i] = D[i] - D[i-1];
126      * decoding computes D[i] = E[i] + D[i-1].
127      * Nevertheless, the loop structures are similar enough that
128      * we put the code for both functions in the same place.
129      *
130      * We only optimize BitsPerComponent = 1, 3, and 4, which
131      * correspond to the common color spaces.  (In some cases, it's still
132      * simpler to provide a separate loop for BPC = 2.)
133      */
134 
135 #define LOOP_BY(n, body)\
136   for (; count >= n; count -= n) p += n, q += n, body
137 
138     switch (ss->case_index) {
139 
140 	    /* 1 bit per component */
141 
142 #define ENCODE1_LOOP(ee)\
143   LOOP_BY(1, (t = *p, *q = ee, s0 = t)); break
144 
145 #define ENCODE_ALIGNED_LOOP(ee)\
146   BEGIN\
147     ss->prev[0] = s0;\
148     for (; count >= final; count -= ndone) {\
149 	ndone = min(count, nb);\
150 	for (ci = 0; ci < ndone; ++ci)\
151 	    t = *++p, *++q = ee, ss->prev[ci] = t;\
152     }\
153     s0 = ss->prev[0];\
154   END
155 
156 #define ENCODE_UNALIGNED_LOOP(shift, cshift, de)\
157   BEGIN\
158     for (; count >= final; count -= ndone) {\
159 	ndone = min(count, nb);\
160 	for (ci = 1; ci <= ndone; ++ci) {\
161 	    ++p;\
162 	    t = (s0 << (cshift)) | (ss->prev[ci] >> (shift));\
163 	    *++q = de;\
164 	    s0 = ss->prev[ci];\
165 	    ss->prev[ci] = *p;\
166 	}\
167     }\
168   END
169 
170 	case cEncode + cBits1 + 0:
171 	case cEncode + cBits1 + 2:
172 	    if (colors < 8) {	/* 2,5,6,7 */
173 		int cshift = 8 - colors;
174 
175 		ENCODE1_LOOP(t ^ ((s0 << cshift) | (t >> colors)));
176 	    } else if (colors & 7) {
177 		int shift = colors & 7;
178 		int cshift = 8 - shift;
179 
180 		ENCODE_UNALIGNED_LOOP(shift, cshift, *p ^ t);
181 	    } else {
182 		ENCODE_ALIGNED_LOOP(t ^ ss->prev[ci]);
183 	    }
184 	    break;
185 
186 	case cEncode + cBits1 + 1:
187 	    ENCODE1_LOOP(t ^ ((s0 << 7) | (t >> 1)));
188 	case cEncode + cBits1 + 3:
189 	    ENCODE1_LOOP(t ^ ((s0 << 5) | (t >> 3)));
190 	case cEncode + cBits1 + 4:
191 	    ENCODE1_LOOP(t ^ ((s0 << 4) | (t >> 4)));
192 
193 #define DECODE1_LOOP(te, de)\
194   LOOP_BY(1, (t = te, s0 = *q = de)); break
195 
196 #define DECODE_ALIGNED_LOOP(de)\
197   BEGIN\
198     ss->prev[0] = s0;\
199     for (; count >= final; count -= ndone) {\
200 	ndone = min(count, nb);\
201 	for (ci = 0; ci < ndone; ++ci)\
202 	    t = *++p, ss->prev[ci] = *++q = de;\
203     }\
204     s0 = ss->prev[0];\
205   END
206 
207 #define DECODE_UNALIGNED_LOOP(shift, cshift, de)\
208   BEGIN\
209     for (; count >= final; count -= ndone) {\
210 	ndone = min(count, nb);\
211 	for (ci = 1; ci <= ndone; ++ci) {\
212 	    ++p, ++q;\
213 	    t = (s0 << (cshift)) | (ss->prev[ci] >> (shift));\
214 	    s0 = ss->prev[ci];\
215 	    ss->prev[ci] = *q = de;\
216 	}\
217     }\
218   END
219 
220 	case cDecode + cBits1 + 0:
221 	    if (colors < 8) {	/* 5,6,7 */
222 		int cshift = 8 - colors;
223 
224 		DECODE1_LOOP(*p ^ (s0 << cshift), t ^ (t >> colors));
225 	    } else if (colors & 7) {
226 		int shift = colors & 7;
227 		int cshift = 8 - shift;
228 
229 		DECODE_UNALIGNED_LOOP(shift, cshift, *p ^ t);
230 	    } else {
231 		DECODE_ALIGNED_LOOP(t ^ ss->prev[ci]);
232 	    }
233 	    break;
234 
235 	case cDecode + cBits1 + 1:
236 	    DECODE1_LOOP(*p ^ (s0 << 7),
237 			 (t ^= t >> 1, t ^= t >> 2, t ^ (t >> 4)));
238 	case cDecode + cBits1 + 2:
239 	    DECODE1_LOOP(*p ^ (s0 << 6),
240 			 (t ^= (t >> 2), t ^ (t >> 4)));
241 	case cDecode + cBits1 + 3:
242 	    DECODE1_LOOP(*p ^ (s0 << 5),
243 			 t ^ (t >> 3) ^ (t >> 6));
244 	case cDecode + cBits1 + 4:
245 	    DECODE1_LOOP(*p ^ (s0 << 4),
246 			 t ^ (t >> 4));
247 
248 	    /* 2 bits per component */
249 
250 #define ADD4X2(a, b) ( (((a) & (b) & 0x55) << 1) ^ (a) ^ (b) )
251 /* The following computation looks very implausible, but it is correct. */
252 #define SUB4X2(a, b) ( ((~(a) & (b) & 0x55) << 1) ^ (a) ^ (b) )
253 
254 	case cEncode + cBits2 + 0:
255 	    if (colors & 7) {
256 		int shift = (colors & 3) << 1;
257 		int cshift = 8 - shift;
258 
259 		ENCODE_UNALIGNED_LOOP(shift, cshift, SUB4X2(*p, t));
260 	    } else {
261 		ENCODE_ALIGNED_LOOP(SUB4X2(t, ss->prev[ci]));
262 	    }
263 	    break;
264 
265 	case cEncode + cBits2 + 1:
266 	    ENCODE1_LOOP((s0 = (s0 << 6) | (t >> 2), SUB4X2(t, s0)));
267 	case cEncode + cBits2 + 2:
268 	    ENCODE1_LOOP((s0 = (s0 << 4) | (t >> 4), SUB4X2(t, s0)));
269 	case cEncode + cBits2 + 3:
270 	    ENCODE1_LOOP((s0 = (s0 << 2) | (t >> 6), SUB4X2(t, s0)));
271 	case cEncode + cBits2 + 4:
272 	    ENCODE1_LOOP(SUB4X2(t, s0));
273 
274 	case cDecode + cBits2 + 0:
275 	    if (colors & 7) {
276 		int shift = (colors & 3) << 1;
277 		int cshift = 8 - shift;
278 
279 		DECODE_UNALIGNED_LOOP(shift, cshift, ADD4X2(*p, t));
280 	    } else {
281 		DECODE_ALIGNED_LOOP(ADD4X2(t, ss->prev[ci]));
282 	    }
283 	    break;
284 
285 	case cDecode + cBits2 + 1:
286 	    DECODE1_LOOP(*p + (s0 << 6),
287 			 (t = ADD4X2(t >> 2, t), ADD4X2(t >> 4, t)));
288 	case cDecode + cBits2 + 2:
289 	    DECODE1_LOOP(*p, (t = ADD4X2(t, s0 << 4), ADD4X2(t >> 4, t)));
290 	case cDecode + cBits2 + 3:
291 	    DECODE1_LOOP(*p, (t = ADD4X2(t, s0 << 2), ADD4X2(t >> 6, t)));
292 	case cDecode + cBits2 + 4:
293 	    DECODE1_LOOP(*p, ADD4X2(t, s0));
294 
295 #undef ADD4X2
296 #undef SUB4X2
297 
298 	    /* 4 bits per component */
299 
300 #define ADD2X4(a, b) ( (((a) + (b)) & 0xf) + ((a) & 0xf0) + ((b) & 0xf0) )
301 #define ADD2X4R4(a) ( (((a) + ((a) >> 4)) & 0xf) + ((a) & 0xf0) )
302 #define SUB2X4(a, b) ( (((a) - (b)) & 0xf) + ((a) & 0xf0) - ((b) & 0xf0) )
303 #define SUB2X4R4(a) ( (((a) - ((a) >> 4)) & 0xf) + ((a) & 0xf0) )
304 
305 	case cEncode + cBits4 + 0:
306 	case cEncode + cBits4 + 2:
307     enc4:
308 	    if (colors & 1) {
309 		ENCODE_UNALIGNED_LOOP(4, 4, SUB2X4(*p, t));
310 	    } else {
311 		ENCODE_ALIGNED_LOOP(SUB2X4(t, ss->prev[ci]));
312 	    }
313 	    break;
314 
315 	case cEncode + cBits4 + 1:
316 	    ENCODE1_LOOP(((t - (s0 << 4)) & 0xf0) | ((t - (t >> 4)) & 0xf));
317 
318 	case cEncode + cBits4 + 3: {
319 	    uint s1 = ss->prev[1];
320 
321 	    LOOP_BY(1,
322 		    (t = *p,
323 		     *q = ((t - (s0 << 4)) & 0xf0) | ((t - (s1 >> 4)) & 0xf),
324 		     s0 = s1, s1 = t));
325 	    ss->prev[1] = s1;
326 	} break;
327 
328 	case cEncode + cBits4 + 4: {
329 	    uint s1 = ss->prev[1];
330 
331 	    LOOP_BY(2,
332 		    (t = p[-1], q[-1] = SUB2X4(t, s0), s0 = t,
333 		     t = *p, *q = SUB2X4(t, s1), s1 = t));
334 	    ss->prev[1] = s1;
335 	    goto enc4;		/* handle leftover bytes */
336 	}
337 
338 	case cDecode + cBits4 + 0:
339 	case cDecode + cBits4 + 2:
340     dec4:
341 	    if (colors & 1) {
342 		DECODE_UNALIGNED_LOOP(4, 4, ADD2X4(*p, t));
343 	    } else {
344 		DECODE_ALIGNED_LOOP(ADD2X4(t, ss->prev[ci]));
345 	    }
346 	    break;
347 
348 	case cDecode + cBits4 + 1:
349 	    DECODE1_LOOP(*p + (s0 << 4), ADD2X4R4(t));
350 
351 	case cDecode + cBits4 + 3: {
352 	    uint s1 = ss->prev[1];
353 
354 	    LOOP_BY(1, (t = (s0 << 4) + (s1 >> 4),
355 			s0 = s1, s1 = *q = ADD2X4(*p, t)));
356 	    ss->prev[1] = s1;
357 	} break;
358 
359 	case cDecode + cBits4 + 4: {
360 	    uint s1 = ss->prev[1];
361 
362 	    LOOP_BY(2,
363 		    (t = p[-1], s0 = q[-1] = ADD2X4(s0, t),
364 		     t = *p, s1 = *q = ADD2X4(s1, t)));
365 	    ss->prev[1] = s1;
366 	    goto dec4;		/* handle leftover bytes */
367 	}
368 
369 #undef ADD2X4
370 #undef ADD2X4R4
371 #undef SUB2X4
372 #undef SUB2X4R4
373 
374 	    /* 8 bits per component */
375 
376 #define ENCODE8(s, d) (q[d] = p[d] - s, s = p[d])
377 #define DECODE8(s, d) q[d] = s += p[d]
378 
379 	case cEncode + cBits8 + 0:
380 	case cEncode + cBits8 + 2:
381 	    ss->prev[0] = s0;
382 	    for (; count >= colors; count -= colors)
383 		for (ci = 0; ci < colors; ++ci) {
384 		    *++q = *++p - ss->prev[ci];
385 		    ss->prev[ci] = *p;
386 		}
387 	    s0 = ss->prev[0];
388     enc8:   /* Handle leftover bytes. */
389 	    if (last && !status)
390 		for (ci = 0; ci < count; ++ci)
391 		    *++q = *++p - ss->prev[ci],
392 			ss->prev[ci] = *p;
393 	    break;
394 
395 	case cDecode + cBits8 + 0:
396 	case cDecode + cBits8 + 2:
397 	    ss->prev[0] = s0;
398 	    for (; count >= colors; count -= colors)
399 		for (ci = 0; ci < colors; ++ci)
400 		    *++q = ss->prev[ci] += *++p;
401 	    s0 = ss->prev[0];
402     dec8:   /* Handle leftover bytes. */
403 	    if (last && !status)
404 		for (ci = 0; ci < count; ++ci)
405 		    *++q = ss->prev[ci] += *++p;
406 	    break;
407 
408 	case cEncode + cBits8 + 1:
409 	    LOOP_BY(1, ENCODE8(s0, 0));
410 	    break;
411 
412 	case cDecode + cBits8 + 1:
413 	    LOOP_BY(1, DECODE8(s0, 0));
414 	    break;
415 
416 	case cEncode + cBits8 + 3: {
417 	    uint s1 = ss->prev[1], s2 = ss->prev[2];
418 
419 	    LOOP_BY(3, (ENCODE8(s0, -2), ENCODE8(s1, -1),
420 			ENCODE8(s2, 0)));
421 	    ss->prev[1] = s1, ss->prev[2] = s2;
422 	    goto enc8;
423 	}
424 
425 	case cDecode + cBits8 + 3: {
426 	    uint s1 = ss->prev[1], s2 = ss->prev[2];
427 
428 	    LOOP_BY(3, (DECODE8(s0, -2), DECODE8(s1, -1),
429 			DECODE8(s2, 0)));
430 	    ss->prev[1] = s1, ss->prev[2] = s2;
431 	    goto dec8;
432 	} break;
433 
434 	case cEncode + cBits8 + 4: {
435 	    uint s1 = ss->prev[1], s2 = ss->prev[2], s3 = ss->prev[3];
436 
437 	    LOOP_BY(4, (ENCODE8(s0, -3), ENCODE8(s1, -2),
438 			ENCODE8(s2, -1), ENCODE8(s3, 0)));
439 	    ss->prev[1] = s1, ss->prev[2] = s2, ss->prev[3] = s3;
440 	    goto enc8;
441 	} break;
442 
443 	case cDecode + cBits8 + 4: {
444 	    uint s1 = ss->prev[1], s2 = ss->prev[2], s3 = ss->prev[3];
445 
446 	    LOOP_BY(4, (DECODE8(s0, -3), DECODE8(s1, -2),
447 			DECODE8(s2, -1), DECODE8(s3, 0)));
448 	    ss->prev[1] = s1, ss->prev[2] = s2, ss->prev[3] = s3;
449 	    goto dec8;
450 	} break;
451 
452 #undef ENCODE8
453 #undef DECODE8
454 
455 	    /* 16 bits per component */
456 
457 #define ENCODE16(s, d) (ti = ((p[d-1] << 8) + p[d]) - s, \
458 	    q[d-1] = ti >> 8, q[d] = t & 0xff, s = ti)
459 #define DECODE16(s, d) (s = 0xffff & (s + ((p[d-1] << 8) + p[d])), \
460 	    q[d-1] = s >> 8, q[d] = s && 0xff)
461 
462 	case cEncode + cBits16 + 0:
463 	case cEncode + cBits16 + 2:
464 	    ss->prev[0] = s0;
465 	    for (; count >= colors; count -= colors)
466 		for (ci = 0; ci < colors; ++ci) {
467 		    ti = (int)*++p << 8;
468 		    ti = (ti + *++p) - ss->prev[ci];
469 		    *++q = ti >> 8; *++q = ti & 0xff;
470 		    ss->prev[ci] = ti;
471 		}
472 	    s0 = ss->prev[0];
473     enc16:   /* Handle leftover bytes. */
474 	    if (last && !status)
475 		for (ci = 0; ci < count; ++ci)
476 		    *++q = *++p - ss->prev[ci],
477 			ss->prev[ci] = *p;
478 	    break;
479 
480 	case cDecode + cBits16 + 0:
481 	case cDecode + cBits16 + 2:
482 	    ss->prev[0] = s0;
483 	    for (; count >= colors; count -= colors)
484 		for (ci = 0; ci < colors; ++ci) {
485 		    ti = *++p >> 8;
486 		    ss->prev[ci] += ti + *++p;
487 		    *++q = ss->prev[ci] >> 8;
488 		    *++q = ss->prev[ci] & 0xff;
489 		}
490 	    s0 = ss->prev[0];
491     dec16:   /* Ignore leftover bytes. */
492 	    break;
493 
494 	case cEncode + cBits16 + 1:
495 	    LOOP_BY(2, ENCODE16(s0, 0));
496 	    break;
497 
498 	case cDecode + cBits16 + 1:
499 	    LOOP_BY(2, DECODE16(s0, 0));
500 	    break;
501 
502 	case cEncode + cBits16 + 3: {
503 	    uint s1 = ss->prev[1], s2 = ss->prev[2];
504 
505 	    LOOP_BY(6, (ENCODE16(s0, -4), ENCODE16(s1, -2),
506 			ENCODE16(s2, 0)));
507 	    ss->prev[1] = s1, ss->prev[2] = s2;
508 	    goto enc16;
509 	}
510 
511 	case cDecode + cBits16 + 3: {
512 	    uint s1 = ss->prev[1], s2 = ss->prev[2];
513 
514 	    LOOP_BY(6, (DECODE16(s0, -4), DECODE16(s1, -2),
515 			DECODE16(s2, 0)));
516 	    ss->prev[1] = s1, ss->prev[2] = s2;
517 	    goto dec16;
518 	} break;
519 
520 	case cEncode + cBits16 + 4: {
521 	    uint s1 = ss->prev[1], s2 = ss->prev[2], s3 = ss->prev[3];
522 
523 	    LOOP_BY(8, (ENCODE16(s0, -6), ENCODE16(s1, -4),
524 			ENCODE16(s2, -2), ENCODE16(s3, 0)));
525 	    ss->prev[1] = s1, ss->prev[2] = s2, ss->prev[3] = s3;
526 	    goto enc16;
527 	} break;
528 
529 	case cDecode + cBits16 + 4: {
530 	    uint s1 = ss->prev[1], s2 = ss->prev[2], s3 = ss->prev[3];
531 
532 	    LOOP_BY(8, (DECODE16(s0, -6), DECODE16(s1, -4),
533 			DECODE16(s2, -2), DECODE16(s3, 0)));
534 	    ss->prev[1] = s1, ss->prev[2] = s2, ss->prev[3] = s3;
535 	    goto dec16;
536 	} break;
537 
538 #undef ENCODE16
539 #undef DECODE16
540 
541     }
542 #undef LOOP_BY
543 #undef ENCODE1_LOOP
544 #undef DECODE1_LOOP
545     ss->row_left += count;	/* leftover bytes are possible */
546     if (ss->row_left == 0) {
547 	if (end_mask != 0)
548 	    *q = (*q & ~end_mask) | (*p & end_mask);
549 	if (p < pr->limit && q < pw->limit)
550 	    goto row;
551     }
552     ss->prev[0] = s0;
553     pr->ptr = p;
554     pw->ptr = q;
555     return status;
556 }
557 
558 /* Stream templates */
559 const stream_template s_PDiffE_template = {
560     &st_PDiff_state, s_PDiffE_init, s_PDiff_process, 1, 1, NULL,
561     s_PDiff_set_defaults, s_PDiff_reinit
562 };
563 const stream_template s_PDiffD_template = {
564     &st_PDiff_state, s_PDiffD_init, s_PDiff_process, 1, 1, NULL,
565     s_PDiff_set_defaults, s_PDiff_reinit
566 };
567