xref: /plan9/sys/src/cmd/gs/src/gscencs.c (revision 593dc095aefb2a85c828727bbfa9da139a49bdf4)
1 /* Copyright (C) 2002 Aladdin Enterprises.  All rights reserved.
2 
3   This software is provided AS-IS with no warranty, either express or
4   implied.
5 
6   This software is distributed under license and may not be copied,
7   modified or distributed except as expressly authorized under the terms
8   of the license contained in the file LICENSE in this distribution.
9 
10   For more information about licensing, please refer to
11   http://www.ghostscript.com/licensing/. For information on
12   commercial licensing, go to http://www.artifex.com/licensing/ or
13   contact Artifex Software, Inc., 101 Lucas Valley Road #110,
14   San Rafael, CA  94903, U.S.A., +1(415)492-9861.
15 */
16 
17 /* $Id: gscencs.c,v 1.8 2004/10/04 17:28:33 igor Exp $ */
18 /* Compact C representation of built-in encodings */
19 
20 #include "memory_.h"
21 #include "gscedata.h"
22 #include "gscencs.h"
23 #include "gserror.h"
24 #include "gserrors.h"
25 
26 /*
27  * The actual encoding data tables in gscedata.c, and the internal
28  * interface definitions in gscedata.h, are generated by toolbin/encs2c.ps,
29  * q.v.
30  *
31  * In the encoding tables in gscedata.c, each glyph is represented by a
32  * ushort (16-bit) value.  A bias of gs_c_min_std_encoding_glyph is added
33  * or subtracted to form a gs_glyph value.
34  */
35 
36 /*
37  * gscedata.[hc] defines the following tables:
38  *	const char gs_c_known_encoding_chars[NUM_CHARS] --
39  *	  the character table.
40  *	const int gs_c_known_encoding_offsets[NUM_INDIRECT_LEN] --
41  *	  the starting offsets of the names of a given length in the
42  *	  character table.
43  *	const ushort *const gs_c_known_encodings[] --
44  *	  pointers to the encodings per se.
45  *	const ushort gs_c_known_encoding_lengths[] --
46  *	  lengths of the encodings.
47  */
48 
49 const gs_glyph gs_c_min_std_encoding_glyph = gs_min_cid_glyph - 0x10000;
50 
51 /*
52  * Encode a character in a known encoding.  The only use for glyph numbers
53  * returned by this procedure is to pass them to gs_c_glyph_name or gs_c_decode.
54  */
55 gs_glyph
gs_c_known_encode(gs_char ch,int ei)56 gs_c_known_encode(gs_char ch, int ei)
57 {
58     if (ei < 0 || ei >= gs_c_known_encoding_count ||
59 	ch >= gs_c_known_encoding_lengths[ei]
60 	)
61 	return gs_no_glyph;
62     return gs_c_min_std_encoding_glyph + gs_c_known_encodings[ei][ch];
63 }
64 
65 
66 /*
67  * Decode a gs_c_glyph_name glyph with a known encoding.
68  */
69 gs_char
gs_c_decode(gs_glyph glyph,int ei)70 gs_c_decode(gs_glyph glyph, int ei)
71 {
72     /* Do a binary search for glyph, using gx_c_known_encodings_reverse */
73     const ushort *const encoding = gs_c_known_encodings[ei];
74     const ushort *const reverse = gs_c_known_encodings_reverse[ei];
75     int first_index = 0;
76     int last_index = gs_c_known_encoding_reverse_lengths[ei];
77     while (first_index < last_index) {
78         const int test_index = (first_index + last_index) / 2;
79         const gs_glyph test_glyph =
80          gs_c_min_std_encoding_glyph + encoding[reverse[test_index]];
81         if (glyph < test_glyph)
82             last_index = test_index;
83         else if (glyph > test_glyph)
84             first_index = test_index + 1;
85         else
86             return reverse[test_index];
87     }
88     return GS_NO_CHAR;
89 }
90 
91 
92 /*
93  * Convert a glyph number returned by gs_c_known_encode to a string.
94  */
95 int
gs_c_glyph_name(gs_glyph glyph,gs_const_string * pstr)96 gs_c_glyph_name(gs_glyph glyph, gs_const_string *pstr)
97 {
98     uint n = (uint)(glyph - gs_c_min_std_encoding_glyph);
99     uint len = N_LEN(n);
100     uint off = N_OFFSET(n);
101 
102 #ifdef DEBUG
103     if (len == 0 || len > gs_c_known_encoding_max_length ||
104 	off >= gs_c_known_encoding_offsets[len + 1] -
105 	  gs_c_known_encoding_offsets[len] ||
106 	off % len != 0
107 	)
108 	return_error(gs_error_rangecheck);
109 #endif
110     pstr->data = (const byte *)
111 	&gs_c_known_encoding_chars[gs_c_known_encoding_offsets[len] + off];
112     pstr->size = len;
113     return 0;
114 }
115 
116 /*
117  * Test whether a string is one that was returned by gs_c_glyph_name.
118  */
119 bool
gs_is_c_glyph_name(const byte * str,uint len)120 gs_is_c_glyph_name(const byte *str, uint len)
121 {
122     return (str >= (const byte *)gs_c_known_encoding_chars &&
123 	    (str - (const byte *)gs_c_known_encoding_chars) <
124 	      gs_c_known_encoding_total_chars);
125 }
126 
127 /*
128  * Return the glyph number corresponding to a string (the inverse of
129  * gs_c_glyph_name), or gs_no_glyph if the glyph name is not known.
130  */
131 gs_glyph
gs_c_name_glyph(const byte * str,uint len)132 gs_c_name_glyph(const byte *str, uint len)
133 {
134     if (len == 0 || len > gs_c_known_encoding_max_length)
135 	return gs_no_glyph;
136     /* Binary search the character table. */
137     {
138 	uint base = gs_c_known_encoding_offsets[len];
139 	const byte *bot = (const byte *)&gs_c_known_encoding_chars[base];
140 	uint count = (gs_c_known_encoding_offsets[len + 1] - base) / len;
141 	uint a = 0, b = count;	/* know b > 0 */
142 	const byte *probe;
143 
144 	while (a < b) {		/* know will execute at least once */
145 	    uint m = (a + b) >> 1;
146 	    int cmp;
147 
148 	    probe = bot + m * len;
149 	    cmp = memcmp(str, probe, len);
150 	    if (cmp == 0)
151 		return gs_c_min_std_encoding_glyph + N(len, probe - bot);
152 	    else if (cmp > 0)
153 		a = m + 1;
154 	    else
155 		b = m;
156 	}
157     }
158 
159     return gs_no_glyph;
160 }
161 
162 #ifdef TEST
163 
164 /* NOTE: test values will have to be updated if representation changes. */
165 #define I_caron N(5,85)
166 #define I_carriagereturn N(14,154)
167 #define I_circlemultiply N(14,168)
168 #define I_numbersign N(10,270)
169 #define I_copyright N(9,180)
170 #define I_notdefined N(7, 0)
171 
172 /* Test */
173 #include <stdio.h>
main()174 main()
175 {
176     gs_glyph g;
177 	gs_char c;
178     gs_const_string str;
179 
180     /* Test with a short name. */
181     g = gs_c_known_encode((gs_char)0237, 1); /* caron */
182     printf("caron is %u, should be %u\n",
183 	   g - gs_c_min_std_encoding_glyph, I_caron);
184     gs_c_glyph_name(g, &str);
185     fwrite(str.data, 1, str.size, stdout);
186     printf(" should be caron\n");
187 
188     /* Test with a long name. */
189     g = gs_c_known_encode((gs_char)0277, 2); /* carriagereturn */
190     printf("carriagereturn is %u, should be %u\n",
191 	   g - gs_c_min_std_encoding_glyph, I_carriagereturn);
192     gs_c_glyph_name(g, &str);
193     fwrite(str.data, 1, str.size, stdout);
194     printf(" should be carriagereturn\n");
195 
196     /* Test lookup with 3 kinds of names. */
197     g = gs_c_name_glyph((const byte *)"circlemultiply", 14);
198     printf("circlemultiply is %u, should be %u\n",
199 	   g - gs_c_min_std_encoding_glyph, I_circlemultiply);
200     g = gs_c_name_glyph((const byte *)"numbersign", 10);
201     printf("numbersign is %u, should be %u\n",
202 	   g - gs_c_min_std_encoding_glyph, I_numbersign);
203     g = gs_c_name_glyph((const byte *)"copyright", 9);
204     printf("copyright is %u, should be %u\n",
205 	   g - gs_c_min_std_encoding_glyph, I_copyright);
206 
207     /* Test reverse lookup */
208     c = gs_c_decode(I_caron + gs_c_min_std_encoding_glyph, 1);
209     printf("%u (caron) looked up as %u, should be %u\n",
210      I_caron, c, 0237);
211     c = gs_c_decode(I_carriagereturn + gs_c_min_std_encoding_glyph, 2);
212     printf("%u (carriagereturn) looked up as %u, should be %u\n",
213      I_carriagereturn, c, 0277);
214     c = gs_c_decode(I_notdefined + gs_c_min_std_encoding_glyph, 1); /* undef'd */
215     printf("%u (notdefined) looked up as %d , should be %d\n",
216      I_notdefined, c, GS_NO_CHAR);
217 
218     exit(0);
219 }
220 
221 #endif /* TEST */
222