1 /* Copyright (C) 1994, 2000 Aladdin Enterprises. All rights reserved.
2
3 This software is provided AS-IS with no warranty, either express or
4 implied.
5
6 This software is distributed under license and may not be copied,
7 modified or distributed except as expressly authorized under the terms
8 of the license contained in the file LICENSE in this distribution.
9
10 For more information about licensing, please refer to
11 http://www.ghostscript.com/licensing/. For information on
12 commercial licensing, go to http://www.artifex.com/licensing/ or
13 contact Artifex Software, Inc., 101 Lucas Valley Road #110,
14 San Rafael, CA 94903, U.S.A., +1(415)492-9861.
15 */
16
17 /* $Id: spdiff.c,v 1.9 2005/03/16 14:57:42 igor Exp $ */
18 /* Pixel differencing filters */
19 #include "stdio_.h" /* should be std.h, but needs NULL */
20 #include "memory_.h"
21 #include "strimpl.h"
22 #include "spdiffx.h"
23
24 /* ------ PixelDifferenceEncode/Decode ------ */
25
26 private_st_PDiff_state();
27
28 /* Define values for case dispatch. */
29 #define cBits1 0
30 #define cBits2 5
31 #define cBits4 10
32 #define cBits8 15
33 #define cBits16 20
34 #define cEncode 0
35 #define cDecode 25
36
37 /* Set defaults */
38 private void
s_PDiff_set_defaults(stream_state * st)39 s_PDiff_set_defaults(stream_state * st)
40 {
41 stream_PDiff_state *const ss = (stream_PDiff_state *) st;
42
43 s_PDiff_set_defaults_inline(ss);
44 }
45
46 /* Common (re)initialization. */
47 private int
s_PDiff_reinit(stream_state * st)48 s_PDiff_reinit(stream_state * st)
49 {
50 stream_PDiff_state *const ss = (stream_PDiff_state *) st;
51
52 ss->row_left = 0;
53 return 0;
54 }
55
56 /* Initialize PixelDifferenceEncode filter. */
57 private int
s_PDiffE_init(stream_state * st)58 s_PDiffE_init(stream_state * st)
59 {
60 stream_PDiff_state *const ss = (stream_PDiff_state *) st;
61 int bits_per_row =
62 ss->Colors * ss->BitsPerComponent * ss->Columns;
63 static const byte cb_values[] = {
64 0, cBits1, cBits2, 0, cBits4, 0, 0, 0, cBits8,
65 0, 0, 0, 0, 0, 0, 0, cBits16
66 };
67
68 ss->row_count = (bits_per_row + 7) >> 3;
69 ss->end_mask = (1 << (-bits_per_row & 7)) - 1;
70 ss->case_index =
71 cb_values[ss->BitsPerComponent] +
72 (ss->Colors > 4 ? 0 : ss->Colors) + cEncode;
73 return s_PDiff_reinit(st);
74 }
75
76 /* Initialize PixelDifferenceDecode filter. */
77 private int
s_PDiffD_init(stream_state * st)78 s_PDiffD_init(stream_state * st)
79 {
80 stream_PDiff_state *const ss = (stream_PDiff_state *) st;
81
82 s_PDiffE_init(st);
83 ss->case_index += cDecode - cEncode;
84 return 0;
85 }
86
87 /* Process a buffer. Note that this handles both Encode and Decode. */
88 private int
s_PDiff_process(stream_state * st,stream_cursor_read * pr,stream_cursor_write * pw,bool last)89 s_PDiff_process(stream_state * st, stream_cursor_read * pr,
90 stream_cursor_write * pw, bool last)
91 {
92 stream_PDiff_state *const ss = (stream_PDiff_state *) st;
93 const byte *p = pr->ptr;
94 byte *q = pw->ptr;
95 int count;
96 int status = 0;
97 uint s0 = ss->prev[0];
98 byte t = 0; /* avoid spurious compiler warnings */
99 int ti;
100 const byte end_mask = ss->end_mask;
101 int colors = ss->Colors;
102 int nb = (colors * ss->BitsPerComponent) >> 3;
103 int final;
104 int ndone, ci;
105
106 row:
107 if (ss->row_left == 0) {
108 ss->row_left = ss->row_count;
109 s0 = 0;
110 memset(&ss->prev[1], 0, sizeof(uint) * (s_PDiff_max_Colors - 1));
111 }
112 {
113 int rcount = pr->limit - p;
114 int wcount = pw->limit - q;
115
116 if (ss->row_left < rcount)
117 rcount = ss->row_left;
118 count = (wcount < rcount ? (status = 1, wcount) : rcount);
119 }
120 final = (last && !status ? 1 : nb);
121 ss->row_left -= count;
122
123 /*
124 * Encoding and decoding are fundamentally different.
125 * Encoding computes E[i] = D[i] - D[i-1];
126 * decoding computes D[i] = E[i] + D[i-1].
127 * Nevertheless, the loop structures are similar enough that
128 * we put the code for both functions in the same place.
129 *
130 * We only optimize BitsPerComponent = 1, 3, and 4, which
131 * correspond to the common color spaces. (In some cases, it's still
132 * simpler to provide a separate loop for BPC = 2.)
133 */
134
135 #define LOOP_BY(n, body)\
136 for (; count >= n; count -= n) p += n, q += n, body
137
138 switch (ss->case_index) {
139
140 /* 1 bit per component */
141
142 #define ENCODE1_LOOP(ee)\
143 LOOP_BY(1, (t = *p, *q = ee, s0 = t)); break
144
145 #define ENCODE_ALIGNED_LOOP(ee)\
146 BEGIN\
147 ss->prev[0] = s0;\
148 for (; count >= final; count -= ndone) {\
149 ndone = min(count, nb);\
150 for (ci = 0; ci < ndone; ++ci)\
151 t = *++p, *++q = ee, ss->prev[ci] = t;\
152 }\
153 s0 = ss->prev[0];\
154 END
155
156 #define ENCODE_UNALIGNED_LOOP(shift, cshift, de)\
157 BEGIN\
158 for (; count >= final; count -= ndone) {\
159 ndone = min(count, nb);\
160 for (ci = 1; ci <= ndone; ++ci) {\
161 ++p;\
162 t = (s0 << (cshift)) | (ss->prev[ci] >> (shift));\
163 *++q = de;\
164 s0 = ss->prev[ci];\
165 ss->prev[ci] = *p;\
166 }\
167 }\
168 END
169
170 case cEncode + cBits1 + 0:
171 case cEncode + cBits1 + 2:
172 if (colors < 8) { /* 2,5,6,7 */
173 int cshift = 8 - colors;
174
175 ENCODE1_LOOP(t ^ ((s0 << cshift) | (t >> colors)));
176 } else if (colors & 7) {
177 int shift = colors & 7;
178 int cshift = 8 - shift;
179
180 ENCODE_UNALIGNED_LOOP(shift, cshift, *p ^ t);
181 } else {
182 ENCODE_ALIGNED_LOOP(t ^ ss->prev[ci]);
183 }
184 break;
185
186 case cEncode + cBits1 + 1:
187 ENCODE1_LOOP(t ^ ((s0 << 7) | (t >> 1)));
188 case cEncode + cBits1 + 3:
189 ENCODE1_LOOP(t ^ ((s0 << 5) | (t >> 3)));
190 case cEncode + cBits1 + 4:
191 ENCODE1_LOOP(t ^ ((s0 << 4) | (t >> 4)));
192
193 #define DECODE1_LOOP(te, de)\
194 LOOP_BY(1, (t = te, s0 = *q = de)); break
195
196 #define DECODE_ALIGNED_LOOP(de)\
197 BEGIN\
198 ss->prev[0] = s0;\
199 for (; count >= final; count -= ndone) {\
200 ndone = min(count, nb);\
201 for (ci = 0; ci < ndone; ++ci)\
202 t = *++p, ss->prev[ci] = *++q = de;\
203 }\
204 s0 = ss->prev[0];\
205 END
206
207 #define DECODE_UNALIGNED_LOOP(shift, cshift, de)\
208 BEGIN\
209 for (; count >= final; count -= ndone) {\
210 ndone = min(count, nb);\
211 for (ci = 1; ci <= ndone; ++ci) {\
212 ++p, ++q;\
213 t = (s0 << (cshift)) | (ss->prev[ci] >> (shift));\
214 s0 = ss->prev[ci];\
215 ss->prev[ci] = *q = de;\
216 }\
217 }\
218 END
219
220 case cDecode + cBits1 + 0:
221 if (colors < 8) { /* 5,6,7 */
222 int cshift = 8 - colors;
223
224 DECODE1_LOOP(*p ^ (s0 << cshift), t ^ (t >> colors));
225 } else if (colors & 7) {
226 int shift = colors & 7;
227 int cshift = 8 - shift;
228
229 DECODE_UNALIGNED_LOOP(shift, cshift, *p ^ t);
230 } else {
231 DECODE_ALIGNED_LOOP(t ^ ss->prev[ci]);
232 }
233 break;
234
235 case cDecode + cBits1 + 1:
236 DECODE1_LOOP(*p ^ (s0 << 7),
237 (t ^= t >> 1, t ^= t >> 2, t ^ (t >> 4)));
238 case cDecode + cBits1 + 2:
239 DECODE1_LOOP(*p ^ (s0 << 6),
240 (t ^= (t >> 2), t ^ (t >> 4)));
241 case cDecode + cBits1 + 3:
242 DECODE1_LOOP(*p ^ (s0 << 5),
243 t ^ (t >> 3) ^ (t >> 6));
244 case cDecode + cBits1 + 4:
245 DECODE1_LOOP(*p ^ (s0 << 4),
246 t ^ (t >> 4));
247
248 /* 2 bits per component */
249
250 #define ADD4X2(a, b) ( (((a) & (b) & 0x55) << 1) ^ (a) ^ (b) )
251 /* The following computation looks very implausible, but it is correct. */
252 #define SUB4X2(a, b) ( ((~(a) & (b) & 0x55) << 1) ^ (a) ^ (b) )
253
254 case cEncode + cBits2 + 0:
255 if (colors & 7) {
256 int shift = (colors & 3) << 1;
257 int cshift = 8 - shift;
258
259 ENCODE_UNALIGNED_LOOP(shift, cshift, SUB4X2(*p, t));
260 } else {
261 ENCODE_ALIGNED_LOOP(SUB4X2(t, ss->prev[ci]));
262 }
263 break;
264
265 case cEncode + cBits2 + 1:
266 ENCODE1_LOOP((s0 = (s0 << 6) | (t >> 2), SUB4X2(t, s0)));
267 case cEncode + cBits2 + 2:
268 ENCODE1_LOOP((s0 = (s0 << 4) | (t >> 4), SUB4X2(t, s0)));
269 case cEncode + cBits2 + 3:
270 ENCODE1_LOOP((s0 = (s0 << 2) | (t >> 6), SUB4X2(t, s0)));
271 case cEncode + cBits2 + 4:
272 ENCODE1_LOOP(SUB4X2(t, s0));
273
274 case cDecode + cBits2 + 0:
275 if (colors & 7) {
276 int shift = (colors & 3) << 1;
277 int cshift = 8 - shift;
278
279 DECODE_UNALIGNED_LOOP(shift, cshift, ADD4X2(*p, t));
280 } else {
281 DECODE_ALIGNED_LOOP(ADD4X2(t, ss->prev[ci]));
282 }
283 break;
284
285 case cDecode + cBits2 + 1:
286 DECODE1_LOOP(*p + (s0 << 6),
287 (t = ADD4X2(t >> 2, t), ADD4X2(t >> 4, t)));
288 case cDecode + cBits2 + 2:
289 DECODE1_LOOP(*p, (t = ADD4X2(t, s0 << 4), ADD4X2(t >> 4, t)));
290 case cDecode + cBits2 + 3:
291 DECODE1_LOOP(*p, (t = ADD4X2(t, s0 << 2), ADD4X2(t >> 6, t)));
292 case cDecode + cBits2 + 4:
293 DECODE1_LOOP(*p, ADD4X2(t, s0));
294
295 #undef ADD4X2
296 #undef SUB4X2
297
298 /* 4 bits per component */
299
300 #define ADD2X4(a, b) ( (((a) + (b)) & 0xf) + ((a) & 0xf0) + ((b) & 0xf0) )
301 #define ADD2X4R4(a) ( (((a) + ((a) >> 4)) & 0xf) + ((a) & 0xf0) )
302 #define SUB2X4(a, b) ( (((a) - (b)) & 0xf) + ((a) & 0xf0) - ((b) & 0xf0) )
303 #define SUB2X4R4(a) ( (((a) - ((a) >> 4)) & 0xf) + ((a) & 0xf0) )
304
305 case cEncode + cBits4 + 0:
306 case cEncode + cBits4 + 2:
307 enc4:
308 if (colors & 1) {
309 ENCODE_UNALIGNED_LOOP(4, 4, SUB2X4(*p, t));
310 } else {
311 ENCODE_ALIGNED_LOOP(SUB2X4(t, ss->prev[ci]));
312 }
313 break;
314
315 case cEncode + cBits4 + 1:
316 ENCODE1_LOOP(((t - (s0 << 4)) & 0xf0) | ((t - (t >> 4)) & 0xf));
317
318 case cEncode + cBits4 + 3: {
319 uint s1 = ss->prev[1];
320
321 LOOP_BY(1,
322 (t = *p,
323 *q = ((t - (s0 << 4)) & 0xf0) | ((t - (s1 >> 4)) & 0xf),
324 s0 = s1, s1 = t));
325 ss->prev[1] = s1;
326 } break;
327
328 case cEncode + cBits4 + 4: {
329 uint s1 = ss->prev[1];
330
331 LOOP_BY(2,
332 (t = p[-1], q[-1] = SUB2X4(t, s0), s0 = t,
333 t = *p, *q = SUB2X4(t, s1), s1 = t));
334 ss->prev[1] = s1;
335 goto enc4; /* handle leftover bytes */
336 }
337
338 case cDecode + cBits4 + 0:
339 case cDecode + cBits4 + 2:
340 dec4:
341 if (colors & 1) {
342 DECODE_UNALIGNED_LOOP(4, 4, ADD2X4(*p, t));
343 } else {
344 DECODE_ALIGNED_LOOP(ADD2X4(t, ss->prev[ci]));
345 }
346 break;
347
348 case cDecode + cBits4 + 1:
349 DECODE1_LOOP(*p + (s0 << 4), ADD2X4R4(t));
350
351 case cDecode + cBits4 + 3: {
352 uint s1 = ss->prev[1];
353
354 LOOP_BY(1, (t = (s0 << 4) + (s1 >> 4),
355 s0 = s1, s1 = *q = ADD2X4(*p, t)));
356 ss->prev[1] = s1;
357 } break;
358
359 case cDecode + cBits4 + 4: {
360 uint s1 = ss->prev[1];
361
362 LOOP_BY(2,
363 (t = p[-1], s0 = q[-1] = ADD2X4(s0, t),
364 t = *p, s1 = *q = ADD2X4(s1, t)));
365 ss->prev[1] = s1;
366 goto dec4; /* handle leftover bytes */
367 }
368
369 #undef ADD2X4
370 #undef ADD2X4R4
371 #undef SUB2X4
372 #undef SUB2X4R4
373
374 /* 8 bits per component */
375
376 #define ENCODE8(s, d) (q[d] = p[d] - s, s = p[d])
377 #define DECODE8(s, d) q[d] = s += p[d]
378
379 case cEncode + cBits8 + 0:
380 case cEncode + cBits8 + 2:
381 ss->prev[0] = s0;
382 for (; count >= colors; count -= colors)
383 for (ci = 0; ci < colors; ++ci) {
384 *++q = *++p - ss->prev[ci];
385 ss->prev[ci] = *p;
386 }
387 s0 = ss->prev[0];
388 enc8: /* Handle leftover bytes. */
389 if (last && !status)
390 for (ci = 0; ci < count; ++ci)
391 *++q = *++p - ss->prev[ci],
392 ss->prev[ci] = *p;
393 break;
394
395 case cDecode + cBits8 + 0:
396 case cDecode + cBits8 + 2:
397 ss->prev[0] = s0;
398 for (; count >= colors; count -= colors)
399 for (ci = 0; ci < colors; ++ci)
400 *++q = ss->prev[ci] += *++p;
401 s0 = ss->prev[0];
402 dec8: /* Handle leftover bytes. */
403 if (last && !status)
404 for (ci = 0; ci < count; ++ci)
405 *++q = ss->prev[ci] += *++p;
406 break;
407
408 case cEncode + cBits8 + 1:
409 LOOP_BY(1, ENCODE8(s0, 0));
410 break;
411
412 case cDecode + cBits8 + 1:
413 LOOP_BY(1, DECODE8(s0, 0));
414 break;
415
416 case cEncode + cBits8 + 3: {
417 uint s1 = ss->prev[1], s2 = ss->prev[2];
418
419 LOOP_BY(3, (ENCODE8(s0, -2), ENCODE8(s1, -1),
420 ENCODE8(s2, 0)));
421 ss->prev[1] = s1, ss->prev[2] = s2;
422 goto enc8;
423 }
424
425 case cDecode + cBits8 + 3: {
426 uint s1 = ss->prev[1], s2 = ss->prev[2];
427
428 LOOP_BY(3, (DECODE8(s0, -2), DECODE8(s1, -1),
429 DECODE8(s2, 0)));
430 ss->prev[1] = s1, ss->prev[2] = s2;
431 goto dec8;
432 } break;
433
434 case cEncode + cBits8 + 4: {
435 uint s1 = ss->prev[1], s2 = ss->prev[2], s3 = ss->prev[3];
436
437 LOOP_BY(4, (ENCODE8(s0, -3), ENCODE8(s1, -2),
438 ENCODE8(s2, -1), ENCODE8(s3, 0)));
439 ss->prev[1] = s1, ss->prev[2] = s2, ss->prev[3] = s3;
440 goto enc8;
441 } break;
442
443 case cDecode + cBits8 + 4: {
444 uint s1 = ss->prev[1], s2 = ss->prev[2], s3 = ss->prev[3];
445
446 LOOP_BY(4, (DECODE8(s0, -3), DECODE8(s1, -2),
447 DECODE8(s2, -1), DECODE8(s3, 0)));
448 ss->prev[1] = s1, ss->prev[2] = s2, ss->prev[3] = s3;
449 goto dec8;
450 } break;
451
452 #undef ENCODE8
453 #undef DECODE8
454
455 /* 16 bits per component */
456
457 #define ENCODE16(s, d) (ti = ((p[d-1] << 8) + p[d]) - s, \
458 q[d-1] = ti >> 8, q[d] = t & 0xff, s = ti)
459 #define DECODE16(s, d) (s = 0xffff & (s + ((p[d-1] << 8) + p[d])), \
460 q[d-1] = s >> 8, q[d] = s && 0xff)
461
462 case cEncode + cBits16 + 0:
463 case cEncode + cBits16 + 2:
464 ss->prev[0] = s0;
465 for (; count >= colors; count -= colors)
466 for (ci = 0; ci < colors; ++ci) {
467 ti = (int)*++p << 8;
468 ti = (ti + *++p) - ss->prev[ci];
469 *++q = ti >> 8; *++q = ti & 0xff;
470 ss->prev[ci] = ti;
471 }
472 s0 = ss->prev[0];
473 enc16: /* Handle leftover bytes. */
474 if (last && !status)
475 for (ci = 0; ci < count; ++ci)
476 *++q = *++p - ss->prev[ci],
477 ss->prev[ci] = *p;
478 break;
479
480 case cDecode + cBits16 + 0:
481 case cDecode + cBits16 + 2:
482 ss->prev[0] = s0;
483 for (; count >= colors; count -= colors)
484 for (ci = 0; ci < colors; ++ci) {
485 ti = *++p >> 8;
486 ss->prev[ci] += ti + *++p;
487 *++q = ss->prev[ci] >> 8;
488 *++q = ss->prev[ci] & 0xff;
489 }
490 s0 = ss->prev[0];
491 dec16: /* Ignore leftover bytes. */
492 break;
493
494 case cEncode + cBits16 + 1:
495 LOOP_BY(2, ENCODE16(s0, 0));
496 break;
497
498 case cDecode + cBits16 + 1:
499 LOOP_BY(2, DECODE16(s0, 0));
500 break;
501
502 case cEncode + cBits16 + 3: {
503 uint s1 = ss->prev[1], s2 = ss->prev[2];
504
505 LOOP_BY(6, (ENCODE16(s0, -4), ENCODE16(s1, -2),
506 ENCODE16(s2, 0)));
507 ss->prev[1] = s1, ss->prev[2] = s2;
508 goto enc16;
509 }
510
511 case cDecode + cBits16 + 3: {
512 uint s1 = ss->prev[1], s2 = ss->prev[2];
513
514 LOOP_BY(6, (DECODE16(s0, -4), DECODE16(s1, -2),
515 DECODE16(s2, 0)));
516 ss->prev[1] = s1, ss->prev[2] = s2;
517 goto dec16;
518 } break;
519
520 case cEncode + cBits16 + 4: {
521 uint s1 = ss->prev[1], s2 = ss->prev[2], s3 = ss->prev[3];
522
523 LOOP_BY(8, (ENCODE16(s0, -6), ENCODE16(s1, -4),
524 ENCODE16(s2, -2), ENCODE16(s3, 0)));
525 ss->prev[1] = s1, ss->prev[2] = s2, ss->prev[3] = s3;
526 goto enc16;
527 } break;
528
529 case cDecode + cBits16 + 4: {
530 uint s1 = ss->prev[1], s2 = ss->prev[2], s3 = ss->prev[3];
531
532 LOOP_BY(8, (DECODE16(s0, -6), DECODE16(s1, -4),
533 DECODE16(s2, -2), DECODE16(s3, 0)));
534 ss->prev[1] = s1, ss->prev[2] = s2, ss->prev[3] = s3;
535 goto dec16;
536 } break;
537
538 #undef ENCODE16
539 #undef DECODE16
540
541 }
542 #undef LOOP_BY
543 #undef ENCODE1_LOOP
544 #undef DECODE1_LOOP
545 ss->row_left += count; /* leftover bytes are possible */
546 if (ss->row_left == 0) {
547 if (end_mask != 0)
548 *q = (*q & ~end_mask) | (*p & end_mask);
549 if (p < pr->limit && q < pw->limit)
550 goto row;
551 }
552 ss->prev[0] = s0;
553 pr->ptr = p;
554 pw->ptr = q;
555 return status;
556 }
557
558 /* Stream templates */
559 const stream_template s_PDiffE_template = {
560 &st_PDiff_state, s_PDiffE_init, s_PDiff_process, 1, 1, NULL,
561 s_PDiff_set_defaults, s_PDiff_reinit
562 };
563 const stream_template s_PDiffD_template = {
564 &st_PDiff_state, s_PDiffD_init, s_PDiff_process, 1, 1, NULL,
565 s_PDiff_set_defaults, s_PDiff_reinit
566 };
567