xref: /plan9/sys/src/cmd/gs/src/gsflip.c (revision 593dc095aefb2a85c828727bbfa9da139a49bdf4)
1 /* Copyright (C) 1996, 1997, 1998, 1999 Aladdin Enterprises.  All rights reserved.
2 
3   This software is provided AS-IS with no warranty, either express or
4   implied.
5 
6   This software is distributed under license and may not be copied,
7   modified or distributed except as expressly authorized under the terms
8   of the license contained in the file LICENSE in this distribution.
9 
10   For more information about licensing, please refer to
11   http://www.ghostscript.com/licensing/. For information on
12   commercial licensing, go to http://www.artifex.com/licensing/ or
13   contact Artifex Software, Inc., 101 Lucas Valley Road #110,
14   San Rafael, CA  94903, U.S.A., +1(415)492-9861.
15 */
16 
17 /* $Id: gsflip.c,v 1.5 2002/06/16 05:48:55 lpd Exp $ */
18 /* Routines for "flipping" image data */
19 #include "gx.h"
20 #include "gserrors.h"		/* for rangecheck in sample macros */
21 #include "gsbitops.h"
22 #include "gsbittab.h"
23 #include "gsflip.h"
24 
25 #define ARCH_HAS_BYTE_REGS 1
26 
27 /* Transpose a block of bits between registers. */
28 #define TRANSPOSE(r,s,mask,shift)\
29   r ^= (temp = ((s >> shift) ^ r) & mask);\
30   s ^= temp << shift
31 
32 /* Define the size of byte temporaries.  On Intel CPUs, this should be */
33 /* byte, but on all other CPUs, it should be uint. */
34 #if ARCH_HAS_BYTE_REGS
35 typedef byte byte_var;
36 #else
37 typedef uint byte_var;
38 #endif
39 
40 #define VTAB(v80,v40,v20,v10,v8,v4,v2,v1)\
41   bit_table_8(0,v80,v40,v20,v10,v8,v4,v2,v1)
42 
43 /* Convert 3Mx1 to 3x1. */
44 private int
flip3x1(byte * buffer,const byte ** planes,int offset,int nbytes)45 flip3x1(byte * buffer, const byte ** planes, int offset, int nbytes)
46 {
47     byte *out = buffer;
48     const byte *in1 = planes[0] + offset;
49     const byte *in2 = planes[1] + offset;
50     const byte *in3 = planes[2] + offset;
51     int n = nbytes;
52     static const bits32 tab3x1[256] = {
53 	VTAB(0x800000, 0x100000, 0x20000, 0x4000, 0x800, 0x100, 0x20, 4)
54     };
55 
56     for (; n > 0; out += 3, ++in1, ++in2, ++in3, --n) {
57 	bits32 b24 = tab3x1[*in1] | (tab3x1[*in2] >> 1) | (tab3x1[*in3] >> 2);
58 
59 	out[0] = (byte) (b24 >> 16);
60 	out[1] = (byte) (b24 >> 8);
61 	out[2] = (byte) b24;
62     }
63     return 0;
64 }
65 
66 /* Convert 3Mx2 to 3x2. */
67 private int
flip3x2(byte * buffer,const byte ** planes,int offset,int nbytes)68 flip3x2(byte * buffer, const byte ** planes, int offset, int nbytes)
69 {
70     byte *out = buffer;
71     const byte *in1 = planes[0] + offset;
72     const byte *in2 = planes[1] + offset;
73     const byte *in3 = planes[2] + offset;
74     int n = nbytes;
75     static const bits32 tab3x2[256] = {
76 	VTAB(0x800000, 0x400000, 0x20000, 0x10000, 0x800, 0x400, 0x20, 0x10)
77     };
78 
79     for (; n > 0; out += 3, ++in1, ++in2, ++in3, --n) {
80 	bits32 b24 = tab3x2[*in1] | (tab3x2[*in2] >> 2) | (tab3x2[*in3] >> 4);
81 
82 	out[0] = (byte) (b24 >> 16);
83 	out[1] = (byte) (b24 >> 8);
84 	out[2] = (byte) b24;
85     }
86     return 0;
87 }
88 
89 /* Convert 3Mx4 to 3x4. */
90 private int
flip3x4(byte * buffer,const byte ** planes,int offset,int nbytes)91 flip3x4(byte * buffer, const byte ** planes, int offset, int nbytes)
92 {
93     byte *out = buffer;
94     const byte *in1 = planes[0] + offset;
95     const byte *in2 = planes[1] + offset;
96     const byte *in3 = planes[2] + offset;
97     int n = nbytes;
98 
99     for (; n > 0; out += 3, ++in1, ++in2, ++in3, --n) {
100 	byte_var b1 = *in1, b2 = *in2, b3 = *in3;
101 
102 	out[0] = (b1 & 0xf0) | (b2 >> 4);
103 	out[1] = (b3 & 0xf0) | (b1 & 0xf);
104 	out[2] = (byte) (b2 << 4) | (b3 & 0xf);
105     }
106     return 0;
107 }
108 
109 /* Convert 3Mx8 to 3x8. */
110 private int
flip3x8(byte * buffer,const byte ** planes,int offset,int nbytes)111 flip3x8(byte * buffer, const byte ** planes, int offset, int nbytes)
112 {
113     byte *out = buffer;
114     const byte *in1 = planes[0] + offset;
115     const byte *in2 = planes[1] + offset;
116     const byte *in3 = planes[2] + offset;
117     int n = nbytes;
118 
119     for (; n > 0; out += 3, ++in1, ++in2, ++in3, --n) {
120 	out[0] = *in1;
121 	out[1] = *in2;
122 	out[2] = *in3;
123     }
124     return 0;
125 }
126 
127 /* Convert 3Mx12 to 3x12. */
128 private int
flip3x12(byte * buffer,const byte ** planes,int offset,int nbytes)129 flip3x12(byte * buffer, const byte ** planes, int offset, int nbytes)
130 {
131     byte *out = buffer;
132     const byte *pa = planes[0] + offset;
133     const byte *pb = planes[1] + offset;
134     const byte *pc = planes[2] + offset;
135     int n = nbytes;
136 
137     /*
138      * We assume that the input is an integral number of pixels, and
139      * round up n to a multiple of 3.
140      */
141     for (; n > 0; out += 9, pa += 3, pb += 3, pc += 3, n -= 3) {
142 	byte_var a1 = pa[1], b0 = pb[0], b1 = pb[1], b2 = pb[2], c1 = pc[1];
143 
144 	out[0] = pa[0];
145 	out[1] = (a1 & 0xf0) | (b0 >> 4);
146 	out[2] = (byte) ((b0 << 4) | (b1 >> 4));
147 	out[3] = pc[0];
148 	out[4] = (c1 & 0xf0) | (a1 & 0xf);
149 	out[5] = pa[2];
150 	out[6] = (byte) ((b1 << 4) | (b2 >> 4));
151 	out[7] = (byte) ((b2 << 4) | (c1 & 0xf));
152 	out[8] = pc[2];
153     }
154     return 0;
155 }
156 
157 /* Convert 4Mx1 to 4x1. */
158 private int
flip4x1(byte * buffer,const byte ** planes,int offset,int nbytes)159 flip4x1(byte * buffer, const byte ** planes, int offset, int nbytes)
160 {
161     byte *out = buffer;
162     const byte *in1 = planes[0] + offset;
163     const byte *in2 = planes[1] + offset;
164     const byte *in3 = planes[2] + offset;
165     const byte *in4 = planes[3] + offset;
166     int n = nbytes;
167 
168     for (; n > 0; out += 4, ++in1, ++in2, ++in3, ++in4, --n) {
169 	byte_var b1 = *in1, b2 = *in2, b3 = *in3, b4 = *in4;
170 	byte_var temp;
171 
172 	/* Transpose blocks of 1 */
173 	TRANSPOSE(b1, b2, 0x55, 1);
174 	TRANSPOSE(b3, b4, 0x55, 1);
175 	/* Transpose blocks of 2 */
176 	TRANSPOSE(b1, b3, 0x33, 2);
177 	TRANSPOSE(b2, b4, 0x33, 2);
178 	/* There's probably a faster way to do this.... */
179 	out[0] = (b1 & 0xf0) | (b2 >> 4);
180 	out[1] = (b3 & 0xf0) | (b4 >> 4);
181 	out[2] = (byte) ((b1 << 4) | (b2 & 0xf));
182 	out[3] = (byte) ((b3 << 4) | (b4 & 0xf));
183     }
184     return 0;
185 }
186 
187 /* Convert 4Mx2 to 4x2. */
188 private int
flip4x2(byte * buffer,const byte ** planes,int offset,int nbytes)189 flip4x2(byte * buffer, const byte ** planes, int offset, int nbytes)
190 {
191     byte *out = buffer;
192     const byte *in1 = planes[0] + offset;
193     const byte *in2 = planes[1] + offset;
194     const byte *in3 = planes[2] + offset;
195     const byte *in4 = planes[3] + offset;
196     int n = nbytes;
197 
198     for (; n > 0; out += 4, ++in1, ++in2, ++in3, ++in4, --n) {
199 	byte_var b1 = *in1, b2 = *in2, b3 = *in3, b4 = *in4;
200 	byte_var temp;
201 
202 	/* Transpose blocks of 4x2 */
203 	TRANSPOSE(b1, b3, 0x0f, 4);
204 	TRANSPOSE(b2, b4, 0x0f, 4);
205 	/* Transpose blocks of 2x1 */
206 	TRANSPOSE(b1, b2, 0x33, 2);
207 	TRANSPOSE(b3, b4, 0x33, 2);
208 	out[0] = b1;
209 	out[1] = b2;
210 	out[2] = b3;
211 	out[3] = b4;
212     }
213     return 0;
214 }
215 
216 /* Convert 4Mx4 to 4x4. */
217 private int
flip4x4(byte * buffer,const byte ** planes,int offset,int nbytes)218 flip4x4(byte * buffer, const byte ** planes, int offset, int nbytes)
219 {
220     byte *out = buffer;
221     const byte *in1 = planes[0] + offset;
222     const byte *in2 = planes[1] + offset;
223     const byte *in3 = planes[2] + offset;
224     const byte *in4 = planes[3] + offset;
225     int n = nbytes;
226 
227     for (; n > 0; out += 4, ++in1, ++in2, ++in3, ++in4, --n) {
228 	byte_var b1 = *in1, b2 = *in2, b3 = *in3, b4 = *in4;
229 
230 	out[0] = (b1 & 0xf0) | (b2 >> 4);
231 	out[1] = (b3 & 0xf0) | (b4 >> 4);
232 	out[2] = (byte) ((b1 << 4) | (b2 & 0xf));
233 	out[3] = (byte) ((b3 << 4) | (b4 & 0xf));
234     }
235     return 0;
236 }
237 
238 /* Convert 4Mx8 to 4x8. */
239 private int
flip4x8(byte * buffer,const byte ** planes,int offset,int nbytes)240 flip4x8(byte * buffer, const byte ** planes, int offset, int nbytes)
241 {
242     byte *out = buffer;
243     const byte *in1 = planes[0] + offset;
244     const byte *in2 = planes[1] + offset;
245     const byte *in3 = planes[2] + offset;
246     const byte *in4 = planes[3] + offset;
247     int n = nbytes;
248 
249     for (; n > 0; out += 4, ++in1, ++in2, ++in3, ++in4, --n) {
250 	out[0] = *in1;
251 	out[1] = *in2;
252 	out[2] = *in3;
253 	out[3] = *in4;
254     }
255     return 0;
256 }
257 
258 /* Convert 4Mx12 to 4x12. */
259 private int
flip4x12(byte * buffer,const byte ** planes,int offset,int nbytes)260 flip4x12(byte * buffer, const byte ** planes, int offset, int nbytes)
261 {
262     byte *out = buffer;
263     const byte *pa = planes[0] + offset;
264     const byte *pb = planes[1] + offset;
265     const byte *pc = planes[2] + offset;
266     const byte *pd = planes[3] + offset;
267     int n = nbytes;
268 
269     /*
270      * We assume that the input is an integral number of pixels, and
271      * round up n to a multiple of 3.
272      */
273     for (; n > 0; out += 12, pa += 3, pb += 3, pc += 3, pd += 3, n -= 3) {
274 	byte_var a1 = pa[1], b1 = pb[1], c1 = pc[1], d1 = pd[1];
275 
276 	{
277 	    byte_var v0;
278 
279 	    out[0] = pa[0];
280 	    v0 = pb[0];
281 	    out[1] = (a1 & 0xf0) | (v0 >> 4);
282 	    out[2] = (byte) ((v0 << 4) | (b1 >> 4));
283 	    out[3] = pc[0];
284 	    v0 = pd[0];
285 	    out[4] = (c1 & 0xf0) | (v0 >> 4);
286 	    out[5] = (byte) ((v0 << 4) | (d1 >> 4));
287 	}
288 	{
289 	    byte_var v2;
290 
291 	    v2 = pa[2];
292 	    out[6] = (byte) ((a1 << 4) | (v2 >> 4));
293 	    out[7] = (byte) ((v2 << 4) | (b1 & 0xf));
294 	    out[8] = pb[2];
295 	    v2 = pc[2];
296 	    out[9] = (byte) ((c1 << 4) | (v2 >> 4));
297 	    out[10] = (byte) ((v2 << 4) | (d1 & 0xf));
298 	    out[11] = pd[2];
299 	}
300     }
301     return 0;
302 }
303 
304 /* Convert NMx{1,2,4,8} to Nx{1,2,4,8}. */
305 private int
flipNx1to8(byte * buffer,const byte ** planes,int offset,int nbytes,int num_planes,int bits_per_sample)306 flipNx1to8(byte * buffer, const byte ** planes, int offset, int nbytes,
307 	   int num_planes, int bits_per_sample)
308 {
309     /* This is only needed for DeviceN colors, so it can be slow. */
310     uint mask = (1 << bits_per_sample) - 1;
311     int bi, pi;
312     sample_store_declare_setup(dptr, dbit, dbbyte, buffer, 0, bits_per_sample);
313 
314     for (bi = 0; bi < nbytes * 8; bi += bits_per_sample) {
315 	for (pi = 0; pi < num_planes; ++pi) {
316 	    const byte *sptr = planes[pi] + offset + (bi >> 3);
317 	    uint value = (*sptr >> (8 - (bi & 7) - bits_per_sample)) & mask;
318 
319 	    sample_store_next8(value, dptr, dbit, bits_per_sample, dbbyte);
320 	}
321     }
322     sample_store_flush(dptr, dbit, bits_per_sample, dbbyte);
323     return 0;
324 }
325 
326 /* Convert NMx12 to Nx12. */
327 private int
flipNx12(byte * buffer,const byte ** planes,int offset,int nbytes,int num_planes,int ignore_bits_per_sample)328 flipNx12(byte * buffer, const byte ** planes, int offset, int nbytes,
329 	 int num_planes, int ignore_bits_per_sample)
330 {
331     /* This is only needed for DeviceN colors, so it can be slow. */
332     int bi, pi;
333     sample_store_declare_setup(dptr, dbit, dbbyte, buffer, 0, 12);
334 
335     for (bi = 0; bi < nbytes * 8; bi += 12) {
336 	for (pi = 0; pi < num_planes; ++pi) {
337 	    const byte *sptr = planes[pi] + offset + (bi >> 3);
338 	    uint value =
339 		(bi & 4 ? ((*sptr & 0xf) << 8) | sptr[1] :
340 		 (*sptr << 4) | (sptr[1] >> 4));
341 
342 	    sample_store_next_12(value, dptr, dbit, dbbyte);
343 	}
344     }
345     sample_store_flush(dptr, dbit, 12, dbbyte);
346     return 0;
347 }
348 
349 /* Flip data given number of planes and bits per pixel. */
350 typedef int (*image_flip_proc) (byte *, const byte **, int, int);
351 private int
flip_fail(byte * buffer,const byte ** planes,int offset,int nbytes)352 flip_fail(byte * buffer, const byte ** planes, int offset, int nbytes)
353 {
354     return -1;
355 }
356 private const image_flip_proc image_flip3_procs[13] = {
357     flip_fail, flip3x1, flip3x2, flip_fail, flip3x4,
358     flip_fail, flip_fail, flip_fail, flip3x8,
359     flip_fail, flip_fail, flip_fail, flip3x12
360 };
361 private const image_flip_proc image_flip4_procs[13] = {
362     flip_fail, flip4x1, flip4x2, flip_fail, flip4x4,
363     flip_fail, flip_fail, flip_fail, flip4x8,
364     flip_fail, flip_fail, flip_fail, flip4x12
365 };
366 typedef int (*image_flipN_proc) (byte *, const byte **, int, int, int, int);
367 private int
flipN_fail(byte * buffer,const byte ** planes,int offset,int nbytes,int num_planes,int bits_per_sample)368 flipN_fail(byte * buffer, const byte ** planes, int offset, int nbytes,
369 	   int num_planes, int bits_per_sample)
370 {
371     return -1;
372 }
373 private const image_flipN_proc image_flipN_procs[13] = {
374     flipN_fail, flipNx1to8, flipNx1to8, flipN_fail, flipNx1to8,
375     flipN_fail, flipN_fail, flipN_fail, flipNx1to8,
376     flipN_fail, flipN_fail, flipN_fail, flipNx12
377 };
378 
379 /* Here is the public interface to all of the above. */
380 int
image_flip_planes(byte * buffer,const byte ** planes,int offset,int nbytes,int num_planes,int bits_per_sample)381 image_flip_planes(byte * buffer, const byte ** planes, int offset, int nbytes,
382 		  int num_planes, int bits_per_sample)
383 {
384     if (bits_per_sample < 1 || bits_per_sample > 12)
385 	return -1;
386     switch (num_planes) {
387 
388     case 3:
389 	return image_flip3_procs[bits_per_sample]
390 	    (buffer, planes, offset, nbytes);
391     case 4:
392 	return image_flip4_procs[bits_per_sample]
393 	    (buffer, planes, offset, nbytes);
394     default:
395 	if (num_planes < 0)
396 	    return -1;
397 	return image_flipN_procs[bits_per_sample]
398 	    (buffer, planes, offset, nbytes, num_planes, bits_per_sample);
399     }
400 }
401