1 /* Copyright (C) 2002 Aladdin Enterprises. All rights reserved.
2
3 This software is provided AS-IS with no warranty, either express or
4 implied.
5
6 This software is distributed under license and may not be copied,
7 modified or distributed except as expressly authorized under the terms
8 of the license contained in the file LICENSE in this distribution.
9
10 For more information about licensing, please refer to
11 http://www.ghostscript.com/licensing/. For information on
12 commercial licensing, go to http://www.artifex.com/licensing/ or
13 contact Artifex Software, Inc., 101 Lucas Valley Road #110,
14 San Rafael, CA 94903, U.S.A., +1(415)492-9861.
15 */
16
17 /* $Id: gscencs.c,v 1.8 2004/10/04 17:28:33 igor Exp $ */
18 /* Compact C representation of built-in encodings */
19
20 #include "memory_.h"
21 #include "gscedata.h"
22 #include "gscencs.h"
23 #include "gserror.h"
24 #include "gserrors.h"
25
26 /*
27 * The actual encoding data tables in gscedata.c, and the internal
28 * interface definitions in gscedata.h, are generated by toolbin/encs2c.ps,
29 * q.v.
30 *
31 * In the encoding tables in gscedata.c, each glyph is represented by a
32 * ushort (16-bit) value. A bias of gs_c_min_std_encoding_glyph is added
33 * or subtracted to form a gs_glyph value.
34 */
35
36 /*
37 * gscedata.[hc] defines the following tables:
38 * const char gs_c_known_encoding_chars[NUM_CHARS] --
39 * the character table.
40 * const int gs_c_known_encoding_offsets[NUM_INDIRECT_LEN] --
41 * the starting offsets of the names of a given length in the
42 * character table.
43 * const ushort *const gs_c_known_encodings[] --
44 * pointers to the encodings per se.
45 * const ushort gs_c_known_encoding_lengths[] --
46 * lengths of the encodings.
47 */
48
49 const gs_glyph gs_c_min_std_encoding_glyph = gs_min_cid_glyph - 0x10000;
50
51 /*
52 * Encode a character in a known encoding. The only use for glyph numbers
53 * returned by this procedure is to pass them to gs_c_glyph_name or gs_c_decode.
54 */
55 gs_glyph
gs_c_known_encode(gs_char ch,int ei)56 gs_c_known_encode(gs_char ch, int ei)
57 {
58 if (ei < 0 || ei >= gs_c_known_encoding_count ||
59 ch >= gs_c_known_encoding_lengths[ei]
60 )
61 return gs_no_glyph;
62 return gs_c_min_std_encoding_glyph + gs_c_known_encodings[ei][ch];
63 }
64
65
66 /*
67 * Decode a gs_c_glyph_name glyph with a known encoding.
68 */
69 gs_char
gs_c_decode(gs_glyph glyph,int ei)70 gs_c_decode(gs_glyph glyph, int ei)
71 {
72 /* Do a binary search for glyph, using gx_c_known_encodings_reverse */
73 const ushort *const encoding = gs_c_known_encodings[ei];
74 const ushort *const reverse = gs_c_known_encodings_reverse[ei];
75 int first_index = 0;
76 int last_index = gs_c_known_encoding_reverse_lengths[ei];
77 while (first_index < last_index) {
78 const int test_index = (first_index + last_index) / 2;
79 const gs_glyph test_glyph =
80 gs_c_min_std_encoding_glyph + encoding[reverse[test_index]];
81 if (glyph < test_glyph)
82 last_index = test_index;
83 else if (glyph > test_glyph)
84 first_index = test_index + 1;
85 else
86 return reverse[test_index];
87 }
88 return GS_NO_CHAR;
89 }
90
91
92 /*
93 * Convert a glyph number returned by gs_c_known_encode to a string.
94 */
95 int
gs_c_glyph_name(gs_glyph glyph,gs_const_string * pstr)96 gs_c_glyph_name(gs_glyph glyph, gs_const_string *pstr)
97 {
98 uint n = (uint)(glyph - gs_c_min_std_encoding_glyph);
99 uint len = N_LEN(n);
100 uint off = N_OFFSET(n);
101
102 #ifdef DEBUG
103 if (len == 0 || len > gs_c_known_encoding_max_length ||
104 off >= gs_c_known_encoding_offsets[len + 1] -
105 gs_c_known_encoding_offsets[len] ||
106 off % len != 0
107 )
108 return_error(gs_error_rangecheck);
109 #endif
110 pstr->data = (const byte *)
111 &gs_c_known_encoding_chars[gs_c_known_encoding_offsets[len] + off];
112 pstr->size = len;
113 return 0;
114 }
115
116 /*
117 * Test whether a string is one that was returned by gs_c_glyph_name.
118 */
119 bool
gs_is_c_glyph_name(const byte * str,uint len)120 gs_is_c_glyph_name(const byte *str, uint len)
121 {
122 return (str >= (const byte *)gs_c_known_encoding_chars &&
123 (str - (const byte *)gs_c_known_encoding_chars) <
124 gs_c_known_encoding_total_chars);
125 }
126
127 /*
128 * Return the glyph number corresponding to a string (the inverse of
129 * gs_c_glyph_name), or gs_no_glyph if the glyph name is not known.
130 */
131 gs_glyph
gs_c_name_glyph(const byte * str,uint len)132 gs_c_name_glyph(const byte *str, uint len)
133 {
134 if (len == 0 || len > gs_c_known_encoding_max_length)
135 return gs_no_glyph;
136 /* Binary search the character table. */
137 {
138 uint base = gs_c_known_encoding_offsets[len];
139 const byte *bot = (const byte *)&gs_c_known_encoding_chars[base];
140 uint count = (gs_c_known_encoding_offsets[len + 1] - base) / len;
141 uint a = 0, b = count; /* know b > 0 */
142 const byte *probe;
143
144 while (a < b) { /* know will execute at least once */
145 uint m = (a + b) >> 1;
146 int cmp;
147
148 probe = bot + m * len;
149 cmp = memcmp(str, probe, len);
150 if (cmp == 0)
151 return gs_c_min_std_encoding_glyph + N(len, probe - bot);
152 else if (cmp > 0)
153 a = m + 1;
154 else
155 b = m;
156 }
157 }
158
159 return gs_no_glyph;
160 }
161
162 #ifdef TEST
163
164 /* NOTE: test values will have to be updated if representation changes. */
165 #define I_caron N(5,85)
166 #define I_carriagereturn N(14,154)
167 #define I_circlemultiply N(14,168)
168 #define I_numbersign N(10,270)
169 #define I_copyright N(9,180)
170 #define I_notdefined N(7, 0)
171
172 /* Test */
173 #include <stdio.h>
main()174 main()
175 {
176 gs_glyph g;
177 gs_char c;
178 gs_const_string str;
179
180 /* Test with a short name. */
181 g = gs_c_known_encode((gs_char)0237, 1); /* caron */
182 printf("caron is %u, should be %u\n",
183 g - gs_c_min_std_encoding_glyph, I_caron);
184 gs_c_glyph_name(g, &str);
185 fwrite(str.data, 1, str.size, stdout);
186 printf(" should be caron\n");
187
188 /* Test with a long name. */
189 g = gs_c_known_encode((gs_char)0277, 2); /* carriagereturn */
190 printf("carriagereturn is %u, should be %u\n",
191 g - gs_c_min_std_encoding_glyph, I_carriagereturn);
192 gs_c_glyph_name(g, &str);
193 fwrite(str.data, 1, str.size, stdout);
194 printf(" should be carriagereturn\n");
195
196 /* Test lookup with 3 kinds of names. */
197 g = gs_c_name_glyph((const byte *)"circlemultiply", 14);
198 printf("circlemultiply is %u, should be %u\n",
199 g - gs_c_min_std_encoding_glyph, I_circlemultiply);
200 g = gs_c_name_glyph((const byte *)"numbersign", 10);
201 printf("numbersign is %u, should be %u\n",
202 g - gs_c_min_std_encoding_glyph, I_numbersign);
203 g = gs_c_name_glyph((const byte *)"copyright", 9);
204 printf("copyright is %u, should be %u\n",
205 g - gs_c_min_std_encoding_glyph, I_copyright);
206
207 /* Test reverse lookup */
208 c = gs_c_decode(I_caron + gs_c_min_std_encoding_glyph, 1);
209 printf("%u (caron) looked up as %u, should be %u\n",
210 I_caron, c, 0237);
211 c = gs_c_decode(I_carriagereturn + gs_c_min_std_encoding_glyph, 2);
212 printf("%u (carriagereturn) looked up as %u, should be %u\n",
213 I_carriagereturn, c, 0277);
214 c = gs_c_decode(I_notdefined + gs_c_min_std_encoding_glyph, 1); /* undef'd */
215 printf("%u (notdefined) looked up as %d , should be %d\n",
216 I_notdefined, c, GS_NO_CHAR);
217
218 exit(0);
219 }
220
221 #endif /* TEST */
222