xref: /plan9/sys/src/cmd/gs/src/gxfcmap.h (revision 593dc095aefb2a85c828727bbfa9da139a49bdf4)
1 /* Copyright (C) 1997, 2000 Aladdin Enterprises.  All rights reserved.
2 
3   This software is provided AS-IS with no warranty, either express or
4   implied.
5 
6   This software is distributed under license and may not be copied,
7   modified or distributed except as expressly authorized under the terms
8   of the license contained in the file LICENSE in this distribution.
9 
10   For more information about licensing, please refer to
11   http://www.ghostscript.com/licensing/. For information on
12   commercial licensing, go to http://www.artifex.com/licensing/ or
13   contact Artifex Software, Inc., 101 Lucas Valley Road #110,
14   San Rafael, CA  94903, U.S.A., +1(415)492-9861.
15 */
16 
17 /* $Id: gxfcmap.h,v 1.16 2004/08/04 19:36:12 stefan Exp $ */
18 /* Internal CMap structure definitions */
19 
20 /* This file should be called gxcmap.h, except that name is already used. */
21 
22 #ifndef gxfcmap_INCLUDED
23 #  define gxfcmap_INCLUDED
24 
25 #include "gsfcmap.h"
26 #include "gsuid.h"
27 #include "gxcid.h"
28 
29 /*
30  * CMaps are the structures that map (possibly variable-length) characters
31  * appearing in a text string to glyph numbers in some font-specific space.
32  * The structure defined here generally follows Adobe's specifications, but
33  * the actual implementation of the code space and the lookup tables is
34  * virtual, so that the same interface can be used for direct access to the
35  * corresponding "cmap" structure in TrueType fonts, rather than having to
36  * convert that structure to the Adobe-based one.
37  */
38 
39 /*
40  * A CMap conceptually consists of three parts:
41  *
42  *	- The code space, used for parsing the input string into (possibly
43  *	  variable-length) characters.
44  *
45  *	- A 'def' map, which maps defined parsed characters to values.
46  *
47  *	- A 'notdef' map, which maps parsed but undefined characters to
48  *	  values.
49  *
50  * The value of a character may be a string, a name, or a CID.  For more
51  * information, see the Adobe documentation.
52  */
53 
54 /* ---------------- Code space ranges ---------------- */
55 
56 /*
57  * A code space is a non-empty, lexicographically sorted sequence of
58  * code space ranges.  Ranges must not overlap.  In each range,
59  * first[i] <= last[i] for 0 <= i < size.
60  */
61 #define MAX_CMAP_CODE_SIZE 4
62 typedef struct gx_code_space_range_s {
63     byte first[MAX_CMAP_CODE_SIZE];
64     byte last[MAX_CMAP_CODE_SIZE];
65     int size;			/* 1 .. MAX_CMAP_CODE_SIZE */
66 } gx_code_space_range_t;
67 
68 /* ---------------- Lookup tables ---------------- */
69 
70 /*
71  * A lookup table is a non-empty sequence of lookup ranges.  Each range has
72  * an associated sorted lookup table, indexed by the num_key_bytes low-order
73  * code bytes.  If key_is_range is true, each key is a range (2 x key_size
74  * bytes); if false, each key is a single code (key_size bytes).
75  *
76  * The only difference between CODE_VALUE_CID and CODE_VALUE_NOTDEF is
77  * that after looking up a CID in a table, for CODE_VALUE_CID the result
78  * is incremented by the difference between the input code and the key
79  * (i.e., a single CODE_VALUE_CID entry actually represents a range of
80  * CIDs), whereas for CODE_VALUE_NOTDEF, the result is not incremented.
81  * The defined-character map for a CMap uses the former behavior; the
82  * notdef map uses the latter.
83  *
84  * CODE_VALUE_GLYPH and CODE_VALUE_CHARS are reserved for
85  * rearranged font CMaps, which are not implemented yet.
86  */
87 typedef enum {
88     CODE_VALUE_CID,		/* CIDs */
89     CODE_VALUE_GLYPH,		/* glyphs */
90     CODE_VALUE_CHARS,		/* character(s) */
91     CODE_VALUE_NOTDEF		/* CID - for notdef(char|range) dst */
92 #define CODE_VALUE_MAX CODE_VALUE_NOTDEF
93 } gx_cmap_code_value_type_t;
94 typedef struct gx_cmap_lookup_entry_s {
95     /* Key */
96     byte key[2][MAX_CMAP_CODE_SIZE]; /* [key_is_range + 1][key_size] */
97     int key_size;		/* 0 .. MAX_CMAP_CODE_SIZE */
98     bool key_is_range;
99     /* Value */
100     gx_cmap_code_value_type_t value_type;
101     gs_const_string value;
102     int font_index;		/* for rearranged fonts */
103 } gx_cmap_lookup_entry_t;
104 
105 /* ---------------- CMaps proper ---------------- */
106 
107 /*
108  * Define the elements common to all CMaps.  Currently we include all
109  * elements from the Adobe specification except for the actual code space
110  * ranges and lookup tables.
111  *
112  * CMapType and id are common to all CMapTypes.  We really only support the
113  * single Adobe standard CMap format.  Note that the only documented values
114  * of CMapType in the PLRM are 0 and 1, which are equivalent; however, in
115  * the second PDF Reference, the CMapType for the example ToUnicode CMap is
116  * 2.
117  *
118  * glyph_name and glyph_name_data are only used if the CMap has lookup
119  * entries of type CODE_VALUE_GLYPH.  We deliberately chose to make
120  * glyph_name a function pointer rather than including it in the procs
121  * virtual functions.  The rationale is that the virtual functions are
122  * dependent on the representation of the CMap, so they should be set by the
123  * code that must work with this structure.  However, glyph_name is not
124  * dependent on the representation of the CMap: it does not need to know
125  * anything about how the CMap is stored.  Rather, it is meant to be used by
126  * the client who constructs the CMap, who decides how stored
127  * CODE_VALUE_GLYPH values correspond to printable glyph names.  The same
128  * glyph_name procedure can, in principle, be used with multiple different
129  * subclasses of gs_cmap_t.
130  */
131 #ifndef gs_cmap_DEFINED
132 #  define gs_cmap_DEFINED
133 typedef struct gs_cmap_s gs_cmap_t;
134 #endif
135 
136 #define GS_CMAP_COMMON\
137     int CMapType;		/* must be first */\
138     gs_id id;			/* internal ID (no relation to UID) */\
139 	/* End of entries common to all CMapTypes */\
140     gs_const_string CMapName;\
141     gs_cid_system_info_t *CIDSystemInfo; /* [num_fonts] */\
142     int num_fonts;\
143     float CMapVersion;\
144     gs_uid uid;			/* XUID or nothing */\
145     long UIDOffset;\
146     int WMode;\
147     bool from_Unicode;		/* if true, characters are Unicode */\
148     bool ToUnicode;             /* if true, it is a ToUnicode CMap */\
149     gs_glyph_name_proc_t glyph_name;  /* glyph name procedure for printing */\
150     void *glyph_name_data;	/* closure data */\
151     const gs_cmap_procs_t *procs
152 
153 extern_st(st_cmap);
154 #define public_st_cmap()	/* in gsfcmap.c */\
155   BASIC_PTRS(cmap_ptrs) {\
156     GC_CONST_STRING_ELT(gs_cmap_t, CMapName),\
157     GC_OBJ_ELT3(gs_cmap_t, CIDSystemInfo, uid.xvalues, glyph_name_data)\
158   };\
159   gs_public_st_basic(st_cmap, gs_cmap_t, "gs_cmap_t", cmap_ptrs, cmap_data)
160 
161 typedef struct gs_cmap_ranges_enum_s gs_cmap_ranges_enum_t;
162 typedef struct gs_cmap_lookups_enum_s gs_cmap_lookups_enum_t;
163 
164 typedef struct gs_cmap_procs_s {
165 
166     /*
167      * Decode and map a character from a string using a CMap.
168      * See gsfcmap.h for details.
169      */
170 
171     int (*decode_next)(const gs_cmap_t *pcmap, const gs_const_string *str,
172 		       uint *pindex, uint *pfidx,
173 		       gs_char *pchr, gs_glyph *pglyph);
174 
175     /*
176      * Initialize an enumeration of code space ranges.  See below.
177      */
178 
179     void (*enum_ranges)(const gs_cmap_t *pcmap,
180 			gs_cmap_ranges_enum_t *penum);
181 
182     /*
183      * Initialize an enumeration of lookups.  See below.
184      */
185 
186     void (*enum_lookups)(const gs_cmap_t *pcmap, int which,
187 			 gs_cmap_lookups_enum_t *penum);
188 
189     /*
190      * Check if the cmap is identity.
191      */
192 
193     bool (*is_identity)(const gs_cmap_t *pcmap, int font_index_only);
194 
195 } gs_cmap_procs_t;
196 
197 struct gs_cmap_s {
198     GS_CMAP_COMMON;
199 };
200 
201 /* ---------------- Enumerators ---------------- */
202 
203 /*
204  * Define enumeration structures for code space ranges and lookup tables.
205  * Since all current and currently envisioned implementations are very
206  * simple, we don't bother to make this fully general, with subclasses
207  * or a "finish" procedure.
208  */
209 typedef struct gs_cmap_ranges_enum_procs_s {
210     int (*next_range)(gs_cmap_ranges_enum_t *penum);
211 } gs_cmap_ranges_enum_procs_t;
212 struct gs_cmap_ranges_enum_s {
213     /*
214      * Return the next code space range here.
215      */
216     gx_code_space_range_t range;
217     /*
218      * The rest of the information is private to the implementation.
219      */
220     const gs_cmap_t *cmap;
221     const gs_cmap_ranges_enum_procs_t *procs;
222     uint index;
223 };
224 
225 typedef struct gs_cmap_lookups_enum_procs_s {
226     int (*next_lookup)(gs_cmap_lookups_enum_t *penum);
227     int (*next_entry)(gs_cmap_lookups_enum_t *penum);
228 } gs_cmap_lookups_enum_procs_t;
229 struct gs_cmap_lookups_enum_s {
230     /*
231      * Return the next lookup and entry here.
232      */
233     gx_cmap_lookup_entry_t entry;
234     /*
235      * The rest of the information is private to the implementation.
236      */
237     const gs_cmap_t *cmap;
238     const gs_cmap_lookups_enum_procs_t *procs;
239     uint index[2];
240     byte temp_value[max(sizeof(gs_glyph), sizeof(gs_char))];
241 };
242 /*
243  * Define a vacuous next_lookup procedure, useful for the notdef lookups
244  * for CMaps that don't have any.
245  */
246 extern const gs_cmap_lookups_enum_procs_t gs_cmap_no_lookups_procs;
247 
248 /* ---------------- Client procedures ---------------- */
249 
250 /*
251  * Initialize the enumeration of the code space ranges, and enumerate
252  * the next range.  enum_next returns 0 if OK, 1 if finished, <0 if error.
253  * The intended usage is:
254  *
255  *	for (gs_cmap_ranges_enum_init(pcmap, &renum);
256  *	     (code = gs_cmap_enum_next_range(&renum)) == 0; ) {
257  *	    ...
258  *	}
259  *	if (code < 0) <<error>>
260  */
261 void gs_cmap_ranges_enum_init(const gs_cmap_t *pcmap,
262 			      gs_cmap_ranges_enum_t *penum);
263 int gs_cmap_enum_next_range(gs_cmap_ranges_enum_t *penum);
264 
265 /*
266  * Initialize the enumeration of the lookups, and enumerate the next
267  * the next lookup or entry.  which = 0 for defined characters,
268  * which = 1 for notdef.  next_xxx returns 0 if OK, 1 if finished,
269  * <0 if error.  The intended usage is:
270  *
271  *	for (gs_cmap_lookups_enum_init(pcmap, which, &lenum);
272  *	     (code = gs_cmap_enum_next_lookup(&lenum)) == 0; ) {
273  *	    while ((code = gs_cmap_enum_next_entry(&lenum)) == 0) {
274  *		...
275  *	    }
276  *	    if (code < 0) <<error>>
277  *	}
278  *	if (code < 0) <<error>>
279  *
280  * Note that next_lookup sets (at least) penum->entry.
281  *	key_size, key_is_range, value_type, font_index
282  * whereas next_entry sets penum->entry.
283  *	key[0][*], key[1][*], value
284  * Clients must not modify any members of the enumerator.
285  * The bytes of the value string may be allocated locally (in the enumerator
286  * itself) and not survive from one call to the next.
287  */
288 void gs_cmap_lookups_enum_init(const gs_cmap_t *pcmap, int which,
289 			       gs_cmap_lookups_enum_t *penum);
290 int gs_cmap_enum_next_lookup(gs_cmap_lookups_enum_t *penum);
291 int gs_cmap_enum_next_entry(gs_cmap_lookups_enum_t *penum);
292 
293 /* ---------------- Implementation procedures ---------------- */
294 
295 /*
296  * Initialize a just-allocated CMap, to ensure that all pointers are clean
297  * for the GC.  Note that this only initializes the common part.
298  */
299 void gs_cmap_init(const gs_memory_t *mem, gs_cmap_t *pcmap, int num_fonts);
300 
301 /*
302  * Allocate and initialize (the common part of) a CMap.
303  */
304 int gs_cmap_alloc(gs_cmap_t **ppcmap, const gs_memory_struct_type_t *pstype,
305 		  int wmode, const byte *map_name, uint name_size,
306 		  const gs_cid_system_info_t *pcidsi, int num_fonts,
307 		  const gs_cmap_procs_t *procs, gs_memory_t *mem);
308 
309 /*
310  * Initialize an enumerator with convenient defaults (index = 0).
311  */
312 void gs_cmap_ranges_enum_setup(gs_cmap_ranges_enum_t *penum,
313 			       const gs_cmap_t *pcmap,
314 			       const gs_cmap_ranges_enum_procs_t *procs);
315 void gs_cmap_lookups_enum_setup(gs_cmap_lookups_enum_t *penum,
316 				const gs_cmap_t *pcmap,
317 				const gs_cmap_lookups_enum_procs_t *procs);
318 
319 /*
320  * Check for identity CMap. Uses a fast check for special cases.
321  */
322 bool gs_cmap_is_identity(const gs_cmap_t *pcmap, int font_index_only);
323 
324 /*
325  * For a random CMap, compute whether it is identity.
326  * It is not applicable to gs_cmap_ToUnicode_t due to
327  * different sizes of domain keys and range values.
328  */
329 bool gs_cmap_compute_identity(const gs_cmap_t *pcmap, int font_index_only);
330 
331 #endif /* gxfcmap_INCLUDED */
332