1 /* Copyright (C) 1997, 2000 Aladdin Enterprises. All rights reserved. 2 3 This software is provided AS-IS with no warranty, either express or 4 implied. 5 6 This software is distributed under license and may not be copied, 7 modified or distributed except as expressly authorized under the terms 8 of the license contained in the file LICENSE in this distribution. 9 10 For more information about licensing, please refer to 11 http://www.ghostscript.com/licensing/. For information on 12 commercial licensing, go to http://www.artifex.com/licensing/ or 13 contact Artifex Software, Inc., 101 Lucas Valley Road #110, 14 San Rafael, CA 94903, U.S.A., +1(415)492-9861. 15 */ 16 17 /* $Id: gxfcmap.h,v 1.16 2004/08/04 19:36:12 stefan Exp $ */ 18 /* Internal CMap structure definitions */ 19 20 /* This file should be called gxcmap.h, except that name is already used. */ 21 22 #ifndef gxfcmap_INCLUDED 23 # define gxfcmap_INCLUDED 24 25 #include "gsfcmap.h" 26 #include "gsuid.h" 27 #include "gxcid.h" 28 29 /* 30 * CMaps are the structures that map (possibly variable-length) characters 31 * appearing in a text string to glyph numbers in some font-specific space. 32 * The structure defined here generally follows Adobe's specifications, but 33 * the actual implementation of the code space and the lookup tables is 34 * virtual, so that the same interface can be used for direct access to the 35 * corresponding "cmap" structure in TrueType fonts, rather than having to 36 * convert that structure to the Adobe-based one. 37 */ 38 39 /* 40 * A CMap conceptually consists of three parts: 41 * 42 * - The code space, used for parsing the input string into (possibly 43 * variable-length) characters. 44 * 45 * - A 'def' map, which maps defined parsed characters to values. 46 * 47 * - A 'notdef' map, which maps parsed but undefined characters to 48 * values. 49 * 50 * The value of a character may be a string, a name, or a CID. For more 51 * information, see the Adobe documentation. 52 */ 53 54 /* ---------------- Code space ranges ---------------- */ 55 56 /* 57 * A code space is a non-empty, lexicographically sorted sequence of 58 * code space ranges. Ranges must not overlap. In each range, 59 * first[i] <= last[i] for 0 <= i < size. 60 */ 61 #define MAX_CMAP_CODE_SIZE 4 62 typedef struct gx_code_space_range_s { 63 byte first[MAX_CMAP_CODE_SIZE]; 64 byte last[MAX_CMAP_CODE_SIZE]; 65 int size; /* 1 .. MAX_CMAP_CODE_SIZE */ 66 } gx_code_space_range_t; 67 68 /* ---------------- Lookup tables ---------------- */ 69 70 /* 71 * A lookup table is a non-empty sequence of lookup ranges. Each range has 72 * an associated sorted lookup table, indexed by the num_key_bytes low-order 73 * code bytes. If key_is_range is true, each key is a range (2 x key_size 74 * bytes); if false, each key is a single code (key_size bytes). 75 * 76 * The only difference between CODE_VALUE_CID and CODE_VALUE_NOTDEF is 77 * that after looking up a CID in a table, for CODE_VALUE_CID the result 78 * is incremented by the difference between the input code and the key 79 * (i.e., a single CODE_VALUE_CID entry actually represents a range of 80 * CIDs), whereas for CODE_VALUE_NOTDEF, the result is not incremented. 81 * The defined-character map for a CMap uses the former behavior; the 82 * notdef map uses the latter. 83 * 84 * CODE_VALUE_GLYPH and CODE_VALUE_CHARS are reserved for 85 * rearranged font CMaps, which are not implemented yet. 86 */ 87 typedef enum { 88 CODE_VALUE_CID, /* CIDs */ 89 CODE_VALUE_GLYPH, /* glyphs */ 90 CODE_VALUE_CHARS, /* character(s) */ 91 CODE_VALUE_NOTDEF /* CID - for notdef(char|range) dst */ 92 #define CODE_VALUE_MAX CODE_VALUE_NOTDEF 93 } gx_cmap_code_value_type_t; 94 typedef struct gx_cmap_lookup_entry_s { 95 /* Key */ 96 byte key[2][MAX_CMAP_CODE_SIZE]; /* [key_is_range + 1][key_size] */ 97 int key_size; /* 0 .. MAX_CMAP_CODE_SIZE */ 98 bool key_is_range; 99 /* Value */ 100 gx_cmap_code_value_type_t value_type; 101 gs_const_string value; 102 int font_index; /* for rearranged fonts */ 103 } gx_cmap_lookup_entry_t; 104 105 /* ---------------- CMaps proper ---------------- */ 106 107 /* 108 * Define the elements common to all CMaps. Currently we include all 109 * elements from the Adobe specification except for the actual code space 110 * ranges and lookup tables. 111 * 112 * CMapType and id are common to all CMapTypes. We really only support the 113 * single Adobe standard CMap format. Note that the only documented values 114 * of CMapType in the PLRM are 0 and 1, which are equivalent; however, in 115 * the second PDF Reference, the CMapType for the example ToUnicode CMap is 116 * 2. 117 * 118 * glyph_name and glyph_name_data are only used if the CMap has lookup 119 * entries of type CODE_VALUE_GLYPH. We deliberately chose to make 120 * glyph_name a function pointer rather than including it in the procs 121 * virtual functions. The rationale is that the virtual functions are 122 * dependent on the representation of the CMap, so they should be set by the 123 * code that must work with this structure. However, glyph_name is not 124 * dependent on the representation of the CMap: it does not need to know 125 * anything about how the CMap is stored. Rather, it is meant to be used by 126 * the client who constructs the CMap, who decides how stored 127 * CODE_VALUE_GLYPH values correspond to printable glyph names. The same 128 * glyph_name procedure can, in principle, be used with multiple different 129 * subclasses of gs_cmap_t. 130 */ 131 #ifndef gs_cmap_DEFINED 132 # define gs_cmap_DEFINED 133 typedef struct gs_cmap_s gs_cmap_t; 134 #endif 135 136 #define GS_CMAP_COMMON\ 137 int CMapType; /* must be first */\ 138 gs_id id; /* internal ID (no relation to UID) */\ 139 /* End of entries common to all CMapTypes */\ 140 gs_const_string CMapName;\ 141 gs_cid_system_info_t *CIDSystemInfo; /* [num_fonts] */\ 142 int num_fonts;\ 143 float CMapVersion;\ 144 gs_uid uid; /* XUID or nothing */\ 145 long UIDOffset;\ 146 int WMode;\ 147 bool from_Unicode; /* if true, characters are Unicode */\ 148 bool ToUnicode; /* if true, it is a ToUnicode CMap */\ 149 gs_glyph_name_proc_t glyph_name; /* glyph name procedure for printing */\ 150 void *glyph_name_data; /* closure data */\ 151 const gs_cmap_procs_t *procs 152 153 extern_st(st_cmap); 154 #define public_st_cmap() /* in gsfcmap.c */\ 155 BASIC_PTRS(cmap_ptrs) {\ 156 GC_CONST_STRING_ELT(gs_cmap_t, CMapName),\ 157 GC_OBJ_ELT3(gs_cmap_t, CIDSystemInfo, uid.xvalues, glyph_name_data)\ 158 };\ 159 gs_public_st_basic(st_cmap, gs_cmap_t, "gs_cmap_t", cmap_ptrs, cmap_data) 160 161 typedef struct gs_cmap_ranges_enum_s gs_cmap_ranges_enum_t; 162 typedef struct gs_cmap_lookups_enum_s gs_cmap_lookups_enum_t; 163 164 typedef struct gs_cmap_procs_s { 165 166 /* 167 * Decode and map a character from a string using a CMap. 168 * See gsfcmap.h for details. 169 */ 170 171 int (*decode_next)(const gs_cmap_t *pcmap, const gs_const_string *str, 172 uint *pindex, uint *pfidx, 173 gs_char *pchr, gs_glyph *pglyph); 174 175 /* 176 * Initialize an enumeration of code space ranges. See below. 177 */ 178 179 void (*enum_ranges)(const gs_cmap_t *pcmap, 180 gs_cmap_ranges_enum_t *penum); 181 182 /* 183 * Initialize an enumeration of lookups. See below. 184 */ 185 186 void (*enum_lookups)(const gs_cmap_t *pcmap, int which, 187 gs_cmap_lookups_enum_t *penum); 188 189 /* 190 * Check if the cmap is identity. 191 */ 192 193 bool (*is_identity)(const gs_cmap_t *pcmap, int font_index_only); 194 195 } gs_cmap_procs_t; 196 197 struct gs_cmap_s { 198 GS_CMAP_COMMON; 199 }; 200 201 /* ---------------- Enumerators ---------------- */ 202 203 /* 204 * Define enumeration structures for code space ranges and lookup tables. 205 * Since all current and currently envisioned implementations are very 206 * simple, we don't bother to make this fully general, with subclasses 207 * or a "finish" procedure. 208 */ 209 typedef struct gs_cmap_ranges_enum_procs_s { 210 int (*next_range)(gs_cmap_ranges_enum_t *penum); 211 } gs_cmap_ranges_enum_procs_t; 212 struct gs_cmap_ranges_enum_s { 213 /* 214 * Return the next code space range here. 215 */ 216 gx_code_space_range_t range; 217 /* 218 * The rest of the information is private to the implementation. 219 */ 220 const gs_cmap_t *cmap; 221 const gs_cmap_ranges_enum_procs_t *procs; 222 uint index; 223 }; 224 225 typedef struct gs_cmap_lookups_enum_procs_s { 226 int (*next_lookup)(gs_cmap_lookups_enum_t *penum); 227 int (*next_entry)(gs_cmap_lookups_enum_t *penum); 228 } gs_cmap_lookups_enum_procs_t; 229 struct gs_cmap_lookups_enum_s { 230 /* 231 * Return the next lookup and entry here. 232 */ 233 gx_cmap_lookup_entry_t entry; 234 /* 235 * The rest of the information is private to the implementation. 236 */ 237 const gs_cmap_t *cmap; 238 const gs_cmap_lookups_enum_procs_t *procs; 239 uint index[2]; 240 byte temp_value[max(sizeof(gs_glyph), sizeof(gs_char))]; 241 }; 242 /* 243 * Define a vacuous next_lookup procedure, useful for the notdef lookups 244 * for CMaps that don't have any. 245 */ 246 extern const gs_cmap_lookups_enum_procs_t gs_cmap_no_lookups_procs; 247 248 /* ---------------- Client procedures ---------------- */ 249 250 /* 251 * Initialize the enumeration of the code space ranges, and enumerate 252 * the next range. enum_next returns 0 if OK, 1 if finished, <0 if error. 253 * The intended usage is: 254 * 255 * for (gs_cmap_ranges_enum_init(pcmap, &renum); 256 * (code = gs_cmap_enum_next_range(&renum)) == 0; ) { 257 * ... 258 * } 259 * if (code < 0) <<error>> 260 */ 261 void gs_cmap_ranges_enum_init(const gs_cmap_t *pcmap, 262 gs_cmap_ranges_enum_t *penum); 263 int gs_cmap_enum_next_range(gs_cmap_ranges_enum_t *penum); 264 265 /* 266 * Initialize the enumeration of the lookups, and enumerate the next 267 * the next lookup or entry. which = 0 for defined characters, 268 * which = 1 for notdef. next_xxx returns 0 if OK, 1 if finished, 269 * <0 if error. The intended usage is: 270 * 271 * for (gs_cmap_lookups_enum_init(pcmap, which, &lenum); 272 * (code = gs_cmap_enum_next_lookup(&lenum)) == 0; ) { 273 * while ((code = gs_cmap_enum_next_entry(&lenum)) == 0) { 274 * ... 275 * } 276 * if (code < 0) <<error>> 277 * } 278 * if (code < 0) <<error>> 279 * 280 * Note that next_lookup sets (at least) penum->entry. 281 * key_size, key_is_range, value_type, font_index 282 * whereas next_entry sets penum->entry. 283 * key[0][*], key[1][*], value 284 * Clients must not modify any members of the enumerator. 285 * The bytes of the value string may be allocated locally (in the enumerator 286 * itself) and not survive from one call to the next. 287 */ 288 void gs_cmap_lookups_enum_init(const gs_cmap_t *pcmap, int which, 289 gs_cmap_lookups_enum_t *penum); 290 int gs_cmap_enum_next_lookup(gs_cmap_lookups_enum_t *penum); 291 int gs_cmap_enum_next_entry(gs_cmap_lookups_enum_t *penum); 292 293 /* ---------------- Implementation procedures ---------------- */ 294 295 /* 296 * Initialize a just-allocated CMap, to ensure that all pointers are clean 297 * for the GC. Note that this only initializes the common part. 298 */ 299 void gs_cmap_init(const gs_memory_t *mem, gs_cmap_t *pcmap, int num_fonts); 300 301 /* 302 * Allocate and initialize (the common part of) a CMap. 303 */ 304 int gs_cmap_alloc(gs_cmap_t **ppcmap, const gs_memory_struct_type_t *pstype, 305 int wmode, const byte *map_name, uint name_size, 306 const gs_cid_system_info_t *pcidsi, int num_fonts, 307 const gs_cmap_procs_t *procs, gs_memory_t *mem); 308 309 /* 310 * Initialize an enumerator with convenient defaults (index = 0). 311 */ 312 void gs_cmap_ranges_enum_setup(gs_cmap_ranges_enum_t *penum, 313 const gs_cmap_t *pcmap, 314 const gs_cmap_ranges_enum_procs_t *procs); 315 void gs_cmap_lookups_enum_setup(gs_cmap_lookups_enum_t *penum, 316 const gs_cmap_t *pcmap, 317 const gs_cmap_lookups_enum_procs_t *procs); 318 319 /* 320 * Check for identity CMap. Uses a fast check for special cases. 321 */ 322 bool gs_cmap_is_identity(const gs_cmap_t *pcmap, int font_index_only); 323 324 /* 325 * For a random CMap, compute whether it is identity. 326 * It is not applicable to gs_cmap_ToUnicode_t due to 327 * different sizes of domain keys and range values. 328 */ 329 bool gs_cmap_compute_identity(const gs_cmap_t *pcmap, int font_index_only); 330 331 #endif /* gxfcmap_INCLUDED */ 332