1 /* Copyright (C) 1997, 2000 Aladdin Enterprises. All rights reserved. 2 3 This software is provided AS-IS with no warranty, either express or 4 implied. 5 6 This software is distributed under license and may not be copied, 7 modified or distributed except as expressly authorized under the terms 8 of the license contained in the file LICENSE in this distribution. 9 10 For more information about licensing, please refer to 11 http://www.ghostscript.com/licensing/. For information on 12 commercial licensing, go to http://www.artifex.com/licensing/ or 13 contact Artifex Software, Inc., 101 Lucas Valley Road #110, 14 San Rafael, CA 94903, U.S.A., +1(415)492-9861. 15 */ 16 17 /* $Id: gsfcmap.c,v 1.27 2004/12/08 21:35:13 stefan Exp $ */ 18 /* CMap character decoding */ 19 #include <assert.h> 20 #include "memory_.h" 21 #include "string_.h" 22 #include "gx.h" 23 #include "gserrors.h" 24 #include "gsstruct.h" 25 #include "gsutil.h" /* for gs_next_ids */ 26 #include "gxfcmap.h" 27 28 typedef struct gs_cmap_identity_s { 29 GS_CMAP_COMMON; 30 int num_bytes; 31 int varying_bytes; 32 int code; /* 0 or num_bytes */ 33 } gs_cmap_identity_t; 34 35 /* GC descriptors */ 36 public_st_cmap(); 37 gs_private_st_suffix_add0_local(st_cmap_identity, gs_cmap_identity_t, 38 "gs_cmap_identity_t", cmap_ptrs, cmap_data, 39 st_cmap); 40 41 /* ---------------- Client procedures ---------------- */ 42 43 /* ------ Initialization/creation ------ */ 44 45 /* 46 * Create an Identity CMap. 47 */ 48 private uint 49 get_integer_bytes(const byte *src, int count) 50 { 51 uint v = 0; 52 int i; 53 54 for (i = 0; i < count; ++i) 55 v = (v << 8) + src[i]; 56 return v; 57 } 58 private int 59 identity_decode_next(const gs_cmap_t *pcmap, const gs_const_string *str, 60 uint *pindex, uint *pfidx, 61 gs_char *pchr, gs_glyph *pglyph) 62 { 63 const gs_cmap_identity_t *const pcimap = 64 (const gs_cmap_identity_t *)pcmap; 65 int num_bytes = pcimap->num_bytes; 66 uint value; 67 68 if (str->size < *pindex + num_bytes) { 69 *pglyph = gs_no_glyph; 70 return (*pindex == str->size ? 2 : -1); 71 } 72 value = get_integer_bytes(str->data + *pindex, num_bytes); 73 *pglyph = gs_min_cid_glyph + value; 74 *pchr = value; 75 *pindex += num_bytes; 76 *pfidx = 0; 77 return pcimap->code; 78 } 79 private int 80 identity_next_range(gs_cmap_ranges_enum_t *penum) 81 { 82 if (penum->index == 0) { 83 const gs_cmap_identity_t *const pcimap = 84 (const gs_cmap_identity_t *)penum->cmap; 85 86 memset(penum->range.first, 0, pcimap->num_bytes); 87 memset(penum->range.last, 0xff, pcimap->num_bytes); 88 penum->index = 1; 89 return 0; 90 } 91 return 1; 92 } 93 private const gs_cmap_ranges_enum_procs_t identity_range_procs = { 94 identity_next_range 95 }; 96 private void 97 identity_enum_ranges(const gs_cmap_t *pcmap, gs_cmap_ranges_enum_t *pre) 98 { 99 gs_cmap_ranges_enum_setup(pre, pcmap, &identity_range_procs); 100 } 101 private int 102 identity_next_lookup(gs_cmap_lookups_enum_t *penum) 103 { 104 if (penum->index[0] == 0) { 105 const gs_cmap_identity_t *const pcimap = 106 (const gs_cmap_identity_t *)penum->cmap; 107 int num_bytes = pcimap->num_bytes; 108 109 memset(penum->entry.key[0], 0, num_bytes); 110 memset(penum->entry.key[1], 0xff, num_bytes); 111 memset(penum->entry.key[1], 0, num_bytes - pcimap->varying_bytes); 112 penum->entry.key_size = num_bytes; 113 penum->entry.key_is_range = true; 114 penum->entry.value_type = 115 (pcimap->code ? CODE_VALUE_CHARS : CODE_VALUE_CID); 116 penum->entry.value.size = num_bytes; 117 penum->entry.font_index = 0; 118 penum->index[0] = 1; 119 return 0; 120 } 121 return 1; 122 } 123 private int 124 no_next_lookup(gs_cmap_lookups_enum_t *penum) 125 { 126 return 1; 127 } 128 private int 129 identity_next_entry(gs_cmap_lookups_enum_t *penum) 130 { 131 const gs_cmap_identity_t *const pcimap = 132 (const gs_cmap_identity_t *)penum->cmap; 133 int num_bytes = pcimap->num_bytes; 134 int i = num_bytes - pcimap->varying_bytes; 135 136 memcpy(penum->temp_value, penum->entry.key[0], num_bytes); 137 memcpy(penum->entry.key[0], penum->entry.key[1], i); 138 while (--i >= 0) 139 if (++(penum->entry.key[1][i]) != 0) { 140 penum->entry.value.data = penum->temp_value; 141 return 0; 142 } 143 return 1; 144 } 145 146 private const gs_cmap_lookups_enum_procs_t identity_lookup_procs = { 147 identity_next_lookup, identity_next_entry 148 }; 149 const gs_cmap_lookups_enum_procs_t gs_cmap_no_lookups_procs = { 150 no_next_lookup, 0 151 }; 152 private void 153 identity_enum_lookups(const gs_cmap_t *pcmap, int which, 154 gs_cmap_lookups_enum_t *pre) 155 { 156 gs_cmap_lookups_enum_setup(pre, pcmap, 157 (which ? &gs_cmap_no_lookups_procs : 158 &identity_lookup_procs)); 159 } 160 private bool 161 identity_is_identity(const gs_cmap_t *pcmap, int font_index_only) 162 { 163 return true; 164 } 165 166 private const gs_cmap_procs_t identity_procs = { 167 identity_decode_next, identity_enum_ranges, identity_enum_lookups, identity_is_identity 168 }; 169 170 private int 171 gs_cmap_identity_alloc(gs_cmap_t **ppcmap, int num_bytes, int varying_bytes, 172 int return_code, const char *cmap_name, int wmode, 173 gs_memory_t *mem) 174 { 175 /* 176 * We could allow any value of num_bytes between 1 and 177 * min(MAX_CMAP_CODE_SIZE, 4), but if num_bytes != 2, we can't name 178 * the result "Identity-[HV]". 179 */ 180 static const gs_cid_system_info_t identity_cidsi = { 181 { (const byte *)"Adobe", 5 }, 182 { (const byte *)"Identity", 8 }, 183 0 184 }; 185 int code; 186 gs_cmap_identity_t *pcimap; 187 188 if (num_bytes != 2) 189 return_error(gs_error_rangecheck); 190 code = gs_cmap_alloc(ppcmap, &st_cmap_identity, wmode, 191 (const byte *)cmap_name, strlen(cmap_name), 192 &identity_cidsi, 1, &identity_procs, mem); 193 if (code < 0) 194 return code; 195 pcimap = (gs_cmap_identity_t *)*ppcmap; 196 pcimap->num_bytes = num_bytes; 197 pcimap->varying_bytes = varying_bytes; 198 pcimap->code = return_code; 199 return 0; 200 } 201 int 202 gs_cmap_create_identity(gs_cmap_t **ppcmap, int num_bytes, int wmode, 203 gs_memory_t *mem) 204 { 205 return gs_cmap_identity_alloc(ppcmap, num_bytes, num_bytes, 0, 206 (wmode ? "Identity-V" : "Identity-H"), 207 wmode, mem); 208 } 209 int 210 gs_cmap_create_char_identity(gs_cmap_t **ppcmap, int num_bytes, int wmode, 211 gs_memory_t *mem) 212 { 213 return gs_cmap_identity_alloc(ppcmap, num_bytes, 1, num_bytes, 214 (wmode ? "Identity-BF-V" : "Identity-BF-H"), 215 wmode, mem); 216 } 217 218 /* ------ Check identity ------ */ 219 220 /* 221 * Check for identity CMap. Uses a fast check for special cases. 222 */ 223 int 224 gs_cmap_is_identity(const gs_cmap_t *pcmap, int font_index_only) 225 { 226 return pcmap->procs->is_identity(pcmap, font_index_only); 227 } 228 229 /* ------ Decoding ------ */ 230 231 /* 232 * Decode and map a character from a string using a CMap. 233 * See gsfcmap.h for details. 234 */ 235 int 236 gs_cmap_decode_next(const gs_cmap_t *pcmap, const gs_const_string *str, 237 uint *pindex, uint *pfidx, 238 gs_char *pchr, gs_glyph *pglyph) 239 { 240 return pcmap->procs->decode_next(pcmap, str, pindex, pfidx, pchr, pglyph); 241 } 242 243 /* ------ Enumeration ------ */ 244 245 /* 246 * Initialize the enumeration of the code space ranges, and enumerate 247 * the next range. See gxfcmap.h for details. 248 */ 249 void 250 gs_cmap_ranges_enum_init(const gs_cmap_t *pcmap, gs_cmap_ranges_enum_t *penum) 251 { 252 pcmap->procs->enum_ranges(pcmap, penum); 253 } 254 int 255 gs_cmap_enum_next_range(gs_cmap_ranges_enum_t *penum) 256 { 257 return penum->procs->next_range(penum); 258 } 259 260 /* 261 * Initialize the enumeration of the lookups, and enumerate the next 262 * the next lookup or entry. See gxfcmap.h for details. 263 */ 264 void 265 gs_cmap_lookups_enum_init(const gs_cmap_t *pcmap, int which, 266 gs_cmap_lookups_enum_t *penum) 267 { 268 pcmap->procs->enum_lookups(pcmap, which, penum); 269 } 270 int 271 gs_cmap_enum_next_lookup(gs_cmap_lookups_enum_t *penum) 272 { 273 return penum->procs->next_lookup(penum); 274 } 275 int 276 gs_cmap_enum_next_entry(gs_cmap_lookups_enum_t *penum) 277 { 278 return penum->procs->next_entry(penum); 279 } 280 281 /* ---------------- Implementation procedures ---------------- */ 282 283 /* ------ Initialization/creation ------ */ 284 285 /* 286 * Initialize a just-allocated CMap, to ensure that all pointers are clean 287 * for the GC. Note that this only initializes the common part. 288 */ 289 void 290 gs_cmap_init(const gs_memory_t *mem, gs_cmap_t *pcmap, int num_fonts) 291 { 292 memset(pcmap, 0, sizeof(*pcmap)); 293 /* We reserve a range of IDs for pdfwrite needs, 294 to allow an identification of submaps for a particular subfont. 295 */ 296 pcmap->id = gs_next_ids(mem, num_fonts); 297 pcmap->num_fonts = num_fonts; 298 uid_set_invalid(&pcmap->uid); 299 } 300 301 /* 302 * Allocate and initialize (the common part of) a CMap. 303 */ 304 int 305 gs_cmap_alloc(gs_cmap_t **ppcmap, const gs_memory_struct_type_t *pstype, 306 int wmode, const byte *map_name, uint name_size, 307 const gs_cid_system_info_t *pcidsi_in, int num_fonts, 308 const gs_cmap_procs_t *procs, gs_memory_t *mem) 309 { 310 gs_cmap_t *pcmap = 311 gs_alloc_struct(mem, gs_cmap_t, pstype, "gs_cmap_alloc(CMap)"); 312 gs_cid_system_info_t *pcidsi = 313 gs_alloc_struct_array(mem, num_fonts, gs_cid_system_info_t, 314 &st_cid_system_info_element, 315 "gs_cmap_alloc(CIDSystemInfo)"); 316 317 if (pcmap == 0 || pcidsi == 0) { 318 gs_free_object(mem, pcidsi, "gs_cmap_alloc(CIDSystemInfo)"); 319 gs_free_object(mem, pcmap, "gs_cmap_alloc(CMap)"); 320 return_error(gs_error_VMerror); 321 } 322 gs_cmap_init(mem, pcmap, num_fonts); /* id, uid, num_fonts */ 323 pcmap->CMapType = 1; 324 pcmap->CMapName.data = map_name; 325 pcmap->CMapName.size = name_size; 326 if (pcidsi_in) 327 memcpy(pcidsi, pcidsi_in, sizeof(*pcidsi) * num_fonts); 328 else 329 memset(pcidsi, 0, sizeof(*pcidsi) * num_fonts); 330 pcmap->CIDSystemInfo = pcidsi; 331 pcmap->CMapVersion = 1.0; 332 /* uid = 0, UIDOffset = 0 */ 333 pcmap->WMode = wmode; 334 /* from_Unicode = 0 */ 335 /* not glyph_name, glyph_name_data */ 336 pcmap->procs = procs; 337 *ppcmap = pcmap; 338 return 0; 339 } 340 341 /* 342 * Initialize an enumerator with convenient defaults (index = 0). 343 */ 344 void 345 gs_cmap_ranges_enum_setup(gs_cmap_ranges_enum_t *penum, 346 const gs_cmap_t *pcmap, 347 const gs_cmap_ranges_enum_procs_t *procs) 348 { 349 penum->cmap = pcmap; 350 penum->procs = procs; 351 penum->index = 0; 352 } 353 void 354 gs_cmap_lookups_enum_setup(gs_cmap_lookups_enum_t *penum, 355 const gs_cmap_t *pcmap, 356 const gs_cmap_lookups_enum_procs_t *procs) 357 { 358 penum->cmap = pcmap; 359 penum->procs = procs; 360 penum->index[0] = penum->index[1] = 0; 361 } 362 363 /* 364 * For a random CMap, compute whether it is identity. 365 * It is not applicable to gs_cmap_ToUnicode_t due to 366 * different sizes of domain keys and range values. 367 */ 368 bool 369 gs_cmap_compute_identity(const gs_cmap_t *pcmap, int font_index_only) 370 { 371 const int which = 0; 372 gs_cmap_lookups_enum_t lenum; 373 int code; 374 375 for (gs_cmap_lookups_enum_init(pcmap, which, &lenum); 376 (code = gs_cmap_enum_next_lookup(&lenum)) == 0; ) { 377 if (font_index_only >= 0 && lenum.entry.font_index != font_index_only) 378 continue; 379 if (font_index_only < 0 && lenum.entry.font_index > 0) 380 return false; 381 while (gs_cmap_enum_next_entry(&lenum) == 0) { 382 switch (lenum.entry.value_type) { 383 case CODE_VALUE_CID: 384 break; 385 case CODE_VALUE_CHARS: 386 return false; /* Not implemented yet. */ 387 case CODE_VALUE_GLYPH: 388 return false; 389 default : 390 return false; /* Must not happen. */ 391 } 392 if (lenum.entry.key_size != lenum.entry.value.size) 393 return false; 394 if (memcmp(lenum.entry.key[0], lenum.entry.value.data, 395 lenum.entry.key_size)) 396 return false; 397 } 398 } 399 return true; 400 } 401 402 /* ================= ToUnicode CMap ========================= */ 403 404 /* 405 * This kind of CMaps keeps character a mapping from a random 406 * PS encoding to Unicode, being defined in PDF reference, "ToUnicode CMaps". 407 * It represents ranges in a closure data, without using 408 * gx_cmap_lookup_range_t. A special function gs_cmap_ToUnicode_set 409 * allows to write code pairs into the closure data. 410 */ 411 412 private const int gs_cmap_ToUnicode_code_bytes = 2; 413 414 typedef struct gs_cmap_ToUnicode_s { 415 GS_CMAP_COMMON; 416 int num_codes; 417 int key_size; 418 bool is_identity; 419 } gs_cmap_ToUnicode_t; 420 421 gs_private_st_suffix_add0(st_cmap_ToUnicode, gs_cmap_ToUnicode_t, 422 "gs_cmap_ToUnicode_t", cmap_ToUnicode_enum_ptrs, cmap_ToUnicode_reloc_ptrs, 423 st_cmap); 424 425 private int 426 gs_cmap_ToUnicode_next_range(gs_cmap_ranges_enum_t *penum) 427 { const gs_cmap_ToUnicode_t *cmap = (gs_cmap_ToUnicode_t *)penum->cmap; 428 if (penum->index == 0) { 429 memset(penum->range.first, 0, cmap->key_size); 430 memset(penum->range.last, 0xff, cmap->key_size); 431 penum->range.size = cmap->key_size; 432 penum->index = 1; 433 return 0; 434 } 435 return 1; 436 } 437 438 private const gs_cmap_ranges_enum_procs_t gs_cmap_ToUnicode_range_procs = { 439 gs_cmap_ToUnicode_next_range 440 }; 441 442 private int 443 gs_cmap_ToUnicode_decode_next(const gs_cmap_t *pcmap, const gs_const_string *str, 444 uint *pindex, uint *pfidx, 445 gs_char *pchr, gs_glyph *pglyph) 446 { 447 assert(0); /* Unsupported, because never used. */ 448 return 0; 449 } 450 451 private void 452 gs_cmap_ToUnicode_enum_ranges(const gs_cmap_t *pcmap, gs_cmap_ranges_enum_t *pre) 453 { 454 gs_cmap_ranges_enum_setup(pre, pcmap, &gs_cmap_ToUnicode_range_procs); 455 } 456 457 private int 458 gs_cmap_ToUnicode_next_lookup(gs_cmap_lookups_enum_t *penum) 459 { const gs_cmap_ToUnicode_t *cmap = (gs_cmap_ToUnicode_t *)penum->cmap; 460 461 if (penum->index[0]++ > 0) 462 return 1; 463 penum->entry.value.data = penum->temp_value; 464 penum->entry.value.size = gs_cmap_ToUnicode_code_bytes; 465 penum->index[1] = 0; 466 penum->entry.key_is_range = true; 467 penum->entry.value_type = CODE_VALUE_CHARS; 468 penum->entry.key_size = cmap->key_size; 469 penum->entry.value.size = gs_cmap_ToUnicode_code_bytes; 470 penum->entry.font_index = 0; 471 return 0; 472 } 473 474 private int 475 gs_cmap_ToUnicode_next_entry(gs_cmap_lookups_enum_t *penum) 476 { const gs_cmap_ToUnicode_t *cmap = (gs_cmap_ToUnicode_t *)penum->cmap; 477 const uchar *map = cmap->glyph_name_data; 478 const int num_codes = cmap->num_codes; 479 uint index = penum->index[1], i, j; 480 uchar c0, c1, c2; 481 482 /* Warning : this hardcodes gs_cmap_ToUnicode_num_code_bytes = 2 */ 483 for (i = index; i < num_codes; i++) 484 if (map[i + i + 0] != 0 || map[i + i + 1] != 0) 485 break; 486 if (i >= num_codes) 487 return 1; 488 c0 = map[i + i + 0]; 489 c1 = map[i + i + 1]; 490 for (j = i + 1, c2 = c1 + 1; j < num_codes; j++, c2++) { 491 /* Due to PDF spec, *bfrange boundaries may differ 492 in the last byte only. */ 493 if (j % 256 == 0) 494 break; 495 if ((uchar)c2 == 0) 496 break; 497 if (map[j + j + 0] != c0 || map[j + j + 1] != c2) 498 break; 499 } 500 penum->index[1] = j; 501 penum->entry.key[0][0] = (uchar)(i >> 8); 502 penum->entry.key[0][cmap->key_size - 1] = (uchar)(i & 0xFF); 503 penum->entry.key[1][0] = (uchar)(j >> 8); 504 penum->entry.key[1][cmap->key_size - 1] = (uchar)((j - 1) & 0xFF); 505 memcpy(penum->temp_value, map + i * gs_cmap_ToUnicode_code_bytes, 506 gs_cmap_ToUnicode_code_bytes); 507 return 0; 508 } 509 510 private const gs_cmap_lookups_enum_procs_t gs_cmap_ToUnicode_lookup_procs = { 511 gs_cmap_ToUnicode_next_lookup, gs_cmap_ToUnicode_next_entry 512 }; 513 514 private void 515 gs_cmap_ToUnicode_enum_lookups(const gs_cmap_t *pcmap, int which, 516 gs_cmap_lookups_enum_t *pre) 517 { 518 gs_cmap_lookups_enum_setup(pre, pcmap, 519 (which ? &gs_cmap_no_lookups_procs : /* fixme */ 520 &gs_cmap_ToUnicode_lookup_procs)); 521 } 522 523 private bool 524 gs_cmap_ToUnicode_is_identity(const gs_cmap_t *pcmap, int font_index_only) 525 { const gs_cmap_ToUnicode_t *cmap = (gs_cmap_ToUnicode_t *)pcmap; 526 return cmap->is_identity; 527 } 528 529 private const gs_cmap_procs_t gs_cmap_ToUnicode_procs = { 530 gs_cmap_ToUnicode_decode_next, 531 gs_cmap_ToUnicode_enum_ranges, 532 gs_cmap_ToUnicode_enum_lookups, 533 gs_cmap_ToUnicode_is_identity 534 }; 535 536 /* 537 * Allocate and initialize a ToUnicode CMap. 538 */ 539 int 540 gs_cmap_ToUnicode_alloc(gs_memory_t *mem, int id, int num_codes, int key_size, gs_cmap_t **ppcmap) 541 { int code; 542 uchar *map, *cmap_name = NULL; 543 gs_cmap_ToUnicode_t *cmap; 544 int name_len = 0; 545 # if 0 546 /* We don't write a CMap name to ToUnicode CMaps, 547 * becsue (1) there is no conventional method for 548 * generating them, and (2) Acrobat Reader ignores them. 549 * But we'd like to keep this code until beta-testing completes, 550 * and we ensure that other viewers do not need the names. 551 */ 552 char sid[10], *pref = "aux-"; 553 int sid_len, pref_len = strlen(pref); 554 555 sprintf(sid, "%d", id); 556 sid_len = strlen(sid); 557 name_len = pref_len + sid_len; 558 cmap_name = gs_alloc_string(mem, name_len, "gs_cmap_ToUnicode_alloc"); 559 if (cmap_name == 0) 560 return_error(gs_error_VMerror); 561 memcpy(cmap_name, pref, pref_len); 562 memcpy(cmap_name + pref_len, sid, sid_len); 563 # endif 564 code = gs_cmap_alloc(ppcmap, &st_cmap_ToUnicode, 565 0, cmap_name, name_len, NULL, 0, &gs_cmap_ToUnicode_procs, mem); 566 if (code < 0) 567 return code; 568 map = (uchar *)gs_alloc_bytes(mem, num_codes * gs_cmap_ToUnicode_code_bytes, 569 "gs_cmap_ToUnicode_alloc"); 570 if (map == NULL) 571 return_error(gs_error_VMerror); 572 memset(map, 0, num_codes * gs_cmap_ToUnicode_code_bytes); 573 cmap = (gs_cmap_ToUnicode_t *)*ppcmap; 574 cmap->glyph_name_data = map; 575 cmap->CMapType = 2; 576 cmap->num_fonts = 1; 577 cmap->key_size = key_size; 578 cmap->num_codes = num_codes; 579 cmap->ToUnicode = true; 580 cmap->is_identity = true; 581 return 0; 582 } 583 584 /* 585 * Write a code pair to ToUnicode CMap. 586 */ 587 void 588 gs_cmap_ToUnicode_add_pair(gs_cmap_t *pcmap, int code0, int code1) 589 { gs_cmap_ToUnicode_t *cmap = (gs_cmap_ToUnicode_t *)pcmap; 590 uchar *map = pcmap->glyph_name_data; 591 const int num_codes = ((gs_cmap_ToUnicode_t *)pcmap)->num_codes; 592 593 if (code0 >= num_codes) 594 return; /* must not happen. */ 595 map[code0 * gs_cmap_ToUnicode_code_bytes + 0] = (uchar)(code1 >> 8); 596 map[code0 * gs_cmap_ToUnicode_code_bytes + 1] = (uchar)(code1 & 0xFF); 597 cmap->is_identity &= (code0 == code1); 598 } 599