1 /* Copyright (C) 1996, 1997, 1998, 1999 Aladdin Enterprises. All rights reserved.
2
3 This software is provided AS-IS with no warranty, either express or
4 implied.
5
6 This software is distributed under license and may not be copied,
7 modified or distributed except as expressly authorized under the terms
8 of the license contained in the file LICENSE in this distribution.
9
10 For more information about licensing, please refer to
11 http://www.ghostscript.com/licensing/. For information on
12 commercial licensing, go to http://www.artifex.com/licensing/ or
13 contact Artifex Software, Inc., 101 Lucas Valley Road #110,
14 San Rafael, CA 94903, U.S.A., +1(415)492-9861.
15 */
16
17 /* $Id: gsflip.c,v 1.5 2002/06/16 05:48:55 lpd Exp $ */
18 /* Routines for "flipping" image data */
19 #include "gx.h"
20 #include "gserrors.h" /* for rangecheck in sample macros */
21 #include "gsbitops.h"
22 #include "gsbittab.h"
23 #include "gsflip.h"
24
25 #define ARCH_HAS_BYTE_REGS 1
26
27 /* Transpose a block of bits between registers. */
28 #define TRANSPOSE(r,s,mask,shift)\
29 r ^= (temp = ((s >> shift) ^ r) & mask);\
30 s ^= temp << shift
31
32 /* Define the size of byte temporaries. On Intel CPUs, this should be */
33 /* byte, but on all other CPUs, it should be uint. */
34 #if ARCH_HAS_BYTE_REGS
35 typedef byte byte_var;
36 #else
37 typedef uint byte_var;
38 #endif
39
40 #define VTAB(v80,v40,v20,v10,v8,v4,v2,v1)\
41 bit_table_8(0,v80,v40,v20,v10,v8,v4,v2,v1)
42
43 /* Convert 3Mx1 to 3x1. */
44 private int
flip3x1(byte * buffer,const byte ** planes,int offset,int nbytes)45 flip3x1(byte * buffer, const byte ** planes, int offset, int nbytes)
46 {
47 byte *out = buffer;
48 const byte *in1 = planes[0] + offset;
49 const byte *in2 = planes[1] + offset;
50 const byte *in3 = planes[2] + offset;
51 int n = nbytes;
52 static const bits32 tab3x1[256] = {
53 VTAB(0x800000, 0x100000, 0x20000, 0x4000, 0x800, 0x100, 0x20, 4)
54 };
55
56 for (; n > 0; out += 3, ++in1, ++in2, ++in3, --n) {
57 bits32 b24 = tab3x1[*in1] | (tab3x1[*in2] >> 1) | (tab3x1[*in3] >> 2);
58
59 out[0] = (byte) (b24 >> 16);
60 out[1] = (byte) (b24 >> 8);
61 out[2] = (byte) b24;
62 }
63 return 0;
64 }
65
66 /* Convert 3Mx2 to 3x2. */
67 private int
flip3x2(byte * buffer,const byte ** planes,int offset,int nbytes)68 flip3x2(byte * buffer, const byte ** planes, int offset, int nbytes)
69 {
70 byte *out = buffer;
71 const byte *in1 = planes[0] + offset;
72 const byte *in2 = planes[1] + offset;
73 const byte *in3 = planes[2] + offset;
74 int n = nbytes;
75 static const bits32 tab3x2[256] = {
76 VTAB(0x800000, 0x400000, 0x20000, 0x10000, 0x800, 0x400, 0x20, 0x10)
77 };
78
79 for (; n > 0; out += 3, ++in1, ++in2, ++in3, --n) {
80 bits32 b24 = tab3x2[*in1] | (tab3x2[*in2] >> 2) | (tab3x2[*in3] >> 4);
81
82 out[0] = (byte) (b24 >> 16);
83 out[1] = (byte) (b24 >> 8);
84 out[2] = (byte) b24;
85 }
86 return 0;
87 }
88
89 /* Convert 3Mx4 to 3x4. */
90 private int
flip3x4(byte * buffer,const byte ** planes,int offset,int nbytes)91 flip3x4(byte * buffer, const byte ** planes, int offset, int nbytes)
92 {
93 byte *out = buffer;
94 const byte *in1 = planes[0] + offset;
95 const byte *in2 = planes[1] + offset;
96 const byte *in3 = planes[2] + offset;
97 int n = nbytes;
98
99 for (; n > 0; out += 3, ++in1, ++in2, ++in3, --n) {
100 byte_var b1 = *in1, b2 = *in2, b3 = *in3;
101
102 out[0] = (b1 & 0xf0) | (b2 >> 4);
103 out[1] = (b3 & 0xf0) | (b1 & 0xf);
104 out[2] = (byte) (b2 << 4) | (b3 & 0xf);
105 }
106 return 0;
107 }
108
109 /* Convert 3Mx8 to 3x8. */
110 private int
flip3x8(byte * buffer,const byte ** planes,int offset,int nbytes)111 flip3x8(byte * buffer, const byte ** planes, int offset, int nbytes)
112 {
113 byte *out = buffer;
114 const byte *in1 = planes[0] + offset;
115 const byte *in2 = planes[1] + offset;
116 const byte *in3 = planes[2] + offset;
117 int n = nbytes;
118
119 for (; n > 0; out += 3, ++in1, ++in2, ++in3, --n) {
120 out[0] = *in1;
121 out[1] = *in2;
122 out[2] = *in3;
123 }
124 return 0;
125 }
126
127 /* Convert 3Mx12 to 3x12. */
128 private int
flip3x12(byte * buffer,const byte ** planes,int offset,int nbytes)129 flip3x12(byte * buffer, const byte ** planes, int offset, int nbytes)
130 {
131 byte *out = buffer;
132 const byte *pa = planes[0] + offset;
133 const byte *pb = planes[1] + offset;
134 const byte *pc = planes[2] + offset;
135 int n = nbytes;
136
137 /*
138 * We assume that the input is an integral number of pixels, and
139 * round up n to a multiple of 3.
140 */
141 for (; n > 0; out += 9, pa += 3, pb += 3, pc += 3, n -= 3) {
142 byte_var a1 = pa[1], b0 = pb[0], b1 = pb[1], b2 = pb[2], c1 = pc[1];
143
144 out[0] = pa[0];
145 out[1] = (a1 & 0xf0) | (b0 >> 4);
146 out[2] = (byte) ((b0 << 4) | (b1 >> 4));
147 out[3] = pc[0];
148 out[4] = (c1 & 0xf0) | (a1 & 0xf);
149 out[5] = pa[2];
150 out[6] = (byte) ((b1 << 4) | (b2 >> 4));
151 out[7] = (byte) ((b2 << 4) | (c1 & 0xf));
152 out[8] = pc[2];
153 }
154 return 0;
155 }
156
157 /* Convert 4Mx1 to 4x1. */
158 private int
flip4x1(byte * buffer,const byte ** planes,int offset,int nbytes)159 flip4x1(byte * buffer, const byte ** planes, int offset, int nbytes)
160 {
161 byte *out = buffer;
162 const byte *in1 = planes[0] + offset;
163 const byte *in2 = planes[1] + offset;
164 const byte *in3 = planes[2] + offset;
165 const byte *in4 = planes[3] + offset;
166 int n = nbytes;
167
168 for (; n > 0; out += 4, ++in1, ++in2, ++in3, ++in4, --n) {
169 byte_var b1 = *in1, b2 = *in2, b3 = *in3, b4 = *in4;
170 byte_var temp;
171
172 /* Transpose blocks of 1 */
173 TRANSPOSE(b1, b2, 0x55, 1);
174 TRANSPOSE(b3, b4, 0x55, 1);
175 /* Transpose blocks of 2 */
176 TRANSPOSE(b1, b3, 0x33, 2);
177 TRANSPOSE(b2, b4, 0x33, 2);
178 /* There's probably a faster way to do this.... */
179 out[0] = (b1 & 0xf0) | (b2 >> 4);
180 out[1] = (b3 & 0xf0) | (b4 >> 4);
181 out[2] = (byte) ((b1 << 4) | (b2 & 0xf));
182 out[3] = (byte) ((b3 << 4) | (b4 & 0xf));
183 }
184 return 0;
185 }
186
187 /* Convert 4Mx2 to 4x2. */
188 private int
flip4x2(byte * buffer,const byte ** planes,int offset,int nbytes)189 flip4x2(byte * buffer, const byte ** planes, int offset, int nbytes)
190 {
191 byte *out = buffer;
192 const byte *in1 = planes[0] + offset;
193 const byte *in2 = planes[1] + offset;
194 const byte *in3 = planes[2] + offset;
195 const byte *in4 = planes[3] + offset;
196 int n = nbytes;
197
198 for (; n > 0; out += 4, ++in1, ++in2, ++in3, ++in4, --n) {
199 byte_var b1 = *in1, b2 = *in2, b3 = *in3, b4 = *in4;
200 byte_var temp;
201
202 /* Transpose blocks of 4x2 */
203 TRANSPOSE(b1, b3, 0x0f, 4);
204 TRANSPOSE(b2, b4, 0x0f, 4);
205 /* Transpose blocks of 2x1 */
206 TRANSPOSE(b1, b2, 0x33, 2);
207 TRANSPOSE(b3, b4, 0x33, 2);
208 out[0] = b1;
209 out[1] = b2;
210 out[2] = b3;
211 out[3] = b4;
212 }
213 return 0;
214 }
215
216 /* Convert 4Mx4 to 4x4. */
217 private int
flip4x4(byte * buffer,const byte ** planes,int offset,int nbytes)218 flip4x4(byte * buffer, const byte ** planes, int offset, int nbytes)
219 {
220 byte *out = buffer;
221 const byte *in1 = planes[0] + offset;
222 const byte *in2 = planes[1] + offset;
223 const byte *in3 = planes[2] + offset;
224 const byte *in4 = planes[3] + offset;
225 int n = nbytes;
226
227 for (; n > 0; out += 4, ++in1, ++in2, ++in3, ++in4, --n) {
228 byte_var b1 = *in1, b2 = *in2, b3 = *in3, b4 = *in4;
229
230 out[0] = (b1 & 0xf0) | (b2 >> 4);
231 out[1] = (b3 & 0xf0) | (b4 >> 4);
232 out[2] = (byte) ((b1 << 4) | (b2 & 0xf));
233 out[3] = (byte) ((b3 << 4) | (b4 & 0xf));
234 }
235 return 0;
236 }
237
238 /* Convert 4Mx8 to 4x8. */
239 private int
flip4x8(byte * buffer,const byte ** planes,int offset,int nbytes)240 flip4x8(byte * buffer, const byte ** planes, int offset, int nbytes)
241 {
242 byte *out = buffer;
243 const byte *in1 = planes[0] + offset;
244 const byte *in2 = planes[1] + offset;
245 const byte *in3 = planes[2] + offset;
246 const byte *in4 = planes[3] + offset;
247 int n = nbytes;
248
249 for (; n > 0; out += 4, ++in1, ++in2, ++in3, ++in4, --n) {
250 out[0] = *in1;
251 out[1] = *in2;
252 out[2] = *in3;
253 out[3] = *in4;
254 }
255 return 0;
256 }
257
258 /* Convert 4Mx12 to 4x12. */
259 private int
flip4x12(byte * buffer,const byte ** planes,int offset,int nbytes)260 flip4x12(byte * buffer, const byte ** planes, int offset, int nbytes)
261 {
262 byte *out = buffer;
263 const byte *pa = planes[0] + offset;
264 const byte *pb = planes[1] + offset;
265 const byte *pc = planes[2] + offset;
266 const byte *pd = planes[3] + offset;
267 int n = nbytes;
268
269 /*
270 * We assume that the input is an integral number of pixels, and
271 * round up n to a multiple of 3.
272 */
273 for (; n > 0; out += 12, pa += 3, pb += 3, pc += 3, pd += 3, n -= 3) {
274 byte_var a1 = pa[1], b1 = pb[1], c1 = pc[1], d1 = pd[1];
275
276 {
277 byte_var v0;
278
279 out[0] = pa[0];
280 v0 = pb[0];
281 out[1] = (a1 & 0xf0) | (v0 >> 4);
282 out[2] = (byte) ((v0 << 4) | (b1 >> 4));
283 out[3] = pc[0];
284 v0 = pd[0];
285 out[4] = (c1 & 0xf0) | (v0 >> 4);
286 out[5] = (byte) ((v0 << 4) | (d1 >> 4));
287 }
288 {
289 byte_var v2;
290
291 v2 = pa[2];
292 out[6] = (byte) ((a1 << 4) | (v2 >> 4));
293 out[7] = (byte) ((v2 << 4) | (b1 & 0xf));
294 out[8] = pb[2];
295 v2 = pc[2];
296 out[9] = (byte) ((c1 << 4) | (v2 >> 4));
297 out[10] = (byte) ((v2 << 4) | (d1 & 0xf));
298 out[11] = pd[2];
299 }
300 }
301 return 0;
302 }
303
304 /* Convert NMx{1,2,4,8} to Nx{1,2,4,8}. */
305 private int
flipNx1to8(byte * buffer,const byte ** planes,int offset,int nbytes,int num_planes,int bits_per_sample)306 flipNx1to8(byte * buffer, const byte ** planes, int offset, int nbytes,
307 int num_planes, int bits_per_sample)
308 {
309 /* This is only needed for DeviceN colors, so it can be slow. */
310 uint mask = (1 << bits_per_sample) - 1;
311 int bi, pi;
312 sample_store_declare_setup(dptr, dbit, dbbyte, buffer, 0, bits_per_sample);
313
314 for (bi = 0; bi < nbytes * 8; bi += bits_per_sample) {
315 for (pi = 0; pi < num_planes; ++pi) {
316 const byte *sptr = planes[pi] + offset + (bi >> 3);
317 uint value = (*sptr >> (8 - (bi & 7) - bits_per_sample)) & mask;
318
319 sample_store_next8(value, dptr, dbit, bits_per_sample, dbbyte);
320 }
321 }
322 sample_store_flush(dptr, dbit, bits_per_sample, dbbyte);
323 return 0;
324 }
325
326 /* Convert NMx12 to Nx12. */
327 private int
flipNx12(byte * buffer,const byte ** planes,int offset,int nbytes,int num_planes,int ignore_bits_per_sample)328 flipNx12(byte * buffer, const byte ** planes, int offset, int nbytes,
329 int num_planes, int ignore_bits_per_sample)
330 {
331 /* This is only needed for DeviceN colors, so it can be slow. */
332 int bi, pi;
333 sample_store_declare_setup(dptr, dbit, dbbyte, buffer, 0, 12);
334
335 for (bi = 0; bi < nbytes * 8; bi += 12) {
336 for (pi = 0; pi < num_planes; ++pi) {
337 const byte *sptr = planes[pi] + offset + (bi >> 3);
338 uint value =
339 (bi & 4 ? ((*sptr & 0xf) << 8) | sptr[1] :
340 (*sptr << 4) | (sptr[1] >> 4));
341
342 sample_store_next_12(value, dptr, dbit, dbbyte);
343 }
344 }
345 sample_store_flush(dptr, dbit, 12, dbbyte);
346 return 0;
347 }
348
349 /* Flip data given number of planes and bits per pixel. */
350 typedef int (*image_flip_proc) (byte *, const byte **, int, int);
351 private int
flip_fail(byte * buffer,const byte ** planes,int offset,int nbytes)352 flip_fail(byte * buffer, const byte ** planes, int offset, int nbytes)
353 {
354 return -1;
355 }
356 private const image_flip_proc image_flip3_procs[13] = {
357 flip_fail, flip3x1, flip3x2, flip_fail, flip3x4,
358 flip_fail, flip_fail, flip_fail, flip3x8,
359 flip_fail, flip_fail, flip_fail, flip3x12
360 };
361 private const image_flip_proc image_flip4_procs[13] = {
362 flip_fail, flip4x1, flip4x2, flip_fail, flip4x4,
363 flip_fail, flip_fail, flip_fail, flip4x8,
364 flip_fail, flip_fail, flip_fail, flip4x12
365 };
366 typedef int (*image_flipN_proc) (byte *, const byte **, int, int, int, int);
367 private int
flipN_fail(byte * buffer,const byte ** planes,int offset,int nbytes,int num_planes,int bits_per_sample)368 flipN_fail(byte * buffer, const byte ** planes, int offset, int nbytes,
369 int num_planes, int bits_per_sample)
370 {
371 return -1;
372 }
373 private const image_flipN_proc image_flipN_procs[13] = {
374 flipN_fail, flipNx1to8, flipNx1to8, flipN_fail, flipNx1to8,
375 flipN_fail, flipN_fail, flipN_fail, flipNx1to8,
376 flipN_fail, flipN_fail, flipN_fail, flipNx12
377 };
378
379 /* Here is the public interface to all of the above. */
380 int
image_flip_planes(byte * buffer,const byte ** planes,int offset,int nbytes,int num_planes,int bits_per_sample)381 image_flip_planes(byte * buffer, const byte ** planes, int offset, int nbytes,
382 int num_planes, int bits_per_sample)
383 {
384 if (bits_per_sample < 1 || bits_per_sample > 12)
385 return -1;
386 switch (num_planes) {
387
388 case 3:
389 return image_flip3_procs[bits_per_sample]
390 (buffer, planes, offset, nbytes);
391 case 4:
392 return image_flip4_procs[bits_per_sample]
393 (buffer, planes, offset, nbytes);
394 default:
395 if (num_planes < 0)
396 return -1;
397 return image_flipN_procs[bits_per_sample]
398 (buffer, planes, offset, nbytes, num_planes, bits_per_sample);
399 }
400 }
401