xref: /plan9/sys/src/cmd/gs/src/gschar0.c (revision 593dc095aefb2a85c828727bbfa9da139a49bdf4)
1 /* Copyright (C) 1991, 1992, 1993, 1997, 1998, 1999, 2000 Aladdin Enterprises.  All rights reserved.
2 
3   This software is provided AS-IS with no warranty, either express or
4   implied.
5 
6   This software is distributed under license and may not be copied,
7   modified or distributed except as expressly authorized under the terms
8   of the license contained in the file LICENSE in this distribution.
9 
10   For more information about licensing, please refer to
11   http://www.ghostscript.com/licensing/. For information on
12   commercial licensing, go to http://www.artifex.com/licensing/ or
13   contact Artifex Software, Inc., 101 Lucas Valley Road #110,
14   San Rafael, CA  94903, U.S.A., +1(415)492-9861.
15 */
16 
17 /* $Id: gschar0.c,v 1.8 2002/10/31 08:34:51 ray Exp $ */
18 /* Composite font decoding for Ghostscript library */
19 #include "memory_.h"
20 #include "gx.h"
21 #include "gserrors.h"
22 #include "gsstruct.h"
23 #include "gsfcmap.h"
24 #include "gxfcmap.h"
25 #include "gxfixed.h"
26 #include "gxdevice.h"
27 #include "gxfont.h"
28 #include "gxfont0.h"
29 #include "gxtext.h"
30 
31 /* Stack up modal composite fonts, down to a non-modal or base font. */
32 private int
gs_stack_modal_fonts(gs_text_enum_t * pte)33 gs_stack_modal_fonts(gs_text_enum_t *pte)
34 {
35     int fdepth = pte->fstack.depth;
36     gs_font *cfont = pte->fstack.items[fdepth].font;
37 
38     while (cfont->FontType == ft_composite) {
39 	gs_font_type0 *const cmfont = (gs_font_type0 *) cfont;
40 
41 	if (!fmap_type_is_modal(cmfont->data.FMapType))
42 	    break;
43 	if (fdepth == MAX_FONT_STACK)
44 	    return_error(gs_error_invalidfont);
45 	fdepth++;
46 	cfont = cmfont->data.FDepVector[cmfont->data.Encoding[0]];
47 	pte->fstack.items[fdepth].font = cfont;
48 	pte->fstack.items[fdepth].index = 0;
49 	if_debug2('j', "[j]stacking depth=%d font=0x%lx\n",
50 		  fdepth, (ulong) cfont);
51     }
52     pte->fstack.depth = fdepth;
53     return 0;
54 }
55 /* Initialize the composite font stack for a show enumerator. */
56 /* Return an error if the data is not a byte string. */
57 int
gs_type0_init_fstack(gs_text_enum_t * pte,gs_font * pfont)58 gs_type0_init_fstack(gs_text_enum_t *pte, gs_font * pfont)
59 {
60     if (!(pte->text.operation & (TEXT_FROM_STRING | TEXT_FROM_BYTES)))
61 	return_error(gs_error_invalidfont);
62     if_debug1('j', "[j]stacking depth=0 font=0x%lx\n",
63 	      (ulong) pfont);
64     pte->fstack.depth = 0;
65     pte->fstack.items[0].font = pfont;
66     pte->fstack.items[0].index = 0;
67     return gs_stack_modal_fonts(pte);
68 }
69 
70 /* Select the appropriate descendant of a font. */
71 /* Uses free variables: pte. */
72 /* Uses pdata, uses & updates fdepth, sets pfont. */
73 #define select_descendant(pfont, pdata, fidx, fdepth)\
74   if (fidx >= pdata->encoding_size)\
75     return_error(gs_error_rangecheck);\
76   if (fdepth == MAX_FONT_STACK)\
77     return_error(gs_error_invalidfont);\
78   pfont = pdata->FDepVector[pdata->Encoding[fidx]];\
79   if (++fdepth > orig_depth || pfont != pte->fstack.items[fdepth].font ||\
80       orig_index != fidx)\
81     pte->fstack.items[fdepth].font = pfont, changed = 1;\
82   pte->fstack.items[fdepth].index = fidx
83 
84 /* Get the root EscChar of a composite font, which overrides the EscChar */
85 /* of descendant fonts. */
86 private uint
root_esc_char(const gs_text_enum_t * pte)87 root_esc_char(const gs_text_enum_t *pte)
88 {
89     return ((gs_font_type0 *) (pte->fstack.items[0].font))->data.EscChar;
90 }
91 
92 /* Get the next character or glyph from a composite string. */
93 /* If we run off the end of the string in the middle of a */
94 /* multi-byte sequence, return gs_error_rangecheck. */
95 /* If the string is empty, return 2. */
96 /* If the current (base) font changed, return 1.  Otherwise, return 0. */
97 int
gs_type0_next_char_glyph(gs_text_enum_t * pte,gs_char * pchr,gs_glyph * pglyph)98 gs_type0_next_char_glyph(gs_text_enum_t *pte, gs_char *pchr, gs_glyph *pglyph)
99 {
100     const byte *str = pte->text.data.bytes;
101     const byte *p = str + pte->index;
102     const byte *end = str + pte->text.size;
103     int fdepth = pte->fstack.depth;
104     int orig_depth = fdepth;
105     int orig_index = pte->fstack.items[fdepth].index;
106     gs_font *pfont;
107 
108 #define pfont0 ((gs_font_type0 *)pfont)
109     gs_type0_data *pdata;
110     uint fidx;
111     gs_char chr;
112     gs_glyph glyph = gs_no_glyph;
113     int changed = 0;
114 
115     pte->FontBBox_as_Metrics2.x = pte->FontBBox_as_Metrics2.y = 0;
116 
117 #define need_left(n)\
118   if ( end - p < n ) return_error(gs_error_rangecheck)
119 
120     /*
121      * Although the Adobe documentation doesn't say anything about this,
122      * if the root font is modal and the very first character of the
123      * string being decoded is an escape or shift character, then
124      * font selection via the escape mechanism works down from the root,
125      * rather than up from the lowest modal font.  (This was first
126      * reported by Norio Katayama, and confirmed by someone at Adobe.)
127      */
128 
129     if (pte->index == 0) {
130 	int idepth = 0;
131 
132 	pfont = pte->fstack.items[0].font;
133 	for (; pfont->FontType == ft_composite;) {
134 	    fmap_type fmt = (pdata = &pfont0->data)->FMapType;
135 
136 	    if (p == end)
137 		return 2;
138 	    chr = *p;
139 	    switch (fmt) {
140 		case fmap_escape:
141 		    if (chr != root_esc_char(pte))
142 			break;
143 		    need_left(2);
144 		    fidx = p[1];
145 		    p += 2;
146 		    if_debug1('j', "[j]from root: escape %d\n", fidx);
147 		  rdown:select_descendant(pfont, pdata, fidx, idepth);
148 		    if_debug2('j', "[j]... new depth=%d, new font=0x%lx\n",
149 			      idepth, (ulong) pfont);
150 		    continue;
151 		case fmap_double_escape:
152 		    if (chr != root_esc_char(pte))
153 			break;
154 		    need_left(2);
155 		    fidx = p[1];
156 		    p += 2;
157 		    if (fidx == chr) {
158 			need_left(1);
159 			fidx = *p++ + 256;
160 		    }
161 		    if_debug1('j', "[j]from root: double escape %d\n", fidx);
162 		    goto rdown;
163 		case fmap_shift:
164 		    if (chr == pdata->ShiftIn)
165 			fidx = 0;
166 		    else if (chr == pdata->ShiftOut)
167 			fidx = 1;
168 		    else
169 			break;
170 		    p++;
171 		    if_debug1('j', "[j]from root: shift %d\n", fidx);
172 		    goto rdown;
173 		default:
174 		    break;
175 	    }
176 	    break;
177 	}
178 	/* If we saw any initial escapes or shifts, */
179 	/* compute a new initial base font. */
180 	if (idepth != 0) {
181 	    int code;
182 
183 	    pte->fstack.depth = idepth;
184 	    code = gs_stack_modal_fonts(pte);
185 	    if (code < 0)
186 		return code;
187 	    if (pte->fstack.depth > idepth)
188 		changed = 1;
189 	    orig_depth = fdepth = pte->fstack.depth;
190 	}
191     }
192     /* Handle initial escapes or shifts. */
193 
194   up:if (p == end)
195 	return 2;
196     chr = *p;
197     while (fdepth > 0) {
198 	pfont = pte->fstack.items[fdepth - 1].font;
199 	pdata = &pfont0->data;
200 	switch (pdata->FMapType) {
201 	    default:		/* non-modal */
202 		fdepth--;
203 		continue;
204 
205 	    case fmap_escape:
206 		if (chr != root_esc_char(pte))
207 		    break;
208 		need_left(2);
209 		fidx = *++p;
210 		if_debug1('j', "[j]next: escape %d\n", fidx);
211 		/* Per Adobe, if we get an escape at the root, */
212 		/* treat it as an ordinary character (font index). */
213 		if (fidx == chr && fdepth > 1) {
214 		    fdepth--;
215 		    goto up;
216 		}
217 	      down:if (++p == end)
218 		    return 2;
219 		chr = *p;
220 		fdepth--;
221 		do {
222 		    select_descendant(pfont, pdata, fidx, fdepth);
223 		    if_debug3('j', "[j]down from modal: new depth=%d, index=%d, new font=0x%lx\n",
224 			      fdepth, fidx, (ulong) pfont);
225 		    if (pfont->FontType != ft_composite)
226 			break;
227 		    pdata = &pfont0->data;
228 		    fidx = 0;
229 		}
230 		while (pdata->FMapType == fmap_escape);
231 		continue;
232 
233 	    case fmap_double_escape:
234 		if (chr != root_esc_char(pte))
235 		    break;
236 		need_left(2);
237 		fidx = *++p;
238 		if (fidx == chr) {
239 		    need_left(2);
240 		    fidx = *++p + 256;
241 		}
242 		if_debug1('j', "[j]next: double escape %d\n", fidx);
243 		goto down;
244 
245 	    case fmap_shift:
246 		if (chr == pdata->ShiftIn)
247 		    fidx = 0;
248 		else if (chr == pdata->ShiftOut)
249 		    fidx = 1;
250 		else
251 		    break;
252 		if_debug1('j', "[j]next: shift %d\n", fidx);
253 		goto down;
254 	}
255 	break;
256     }
257     /* At this point, chr == *p. */
258     /* (This is important to know for CMap'ed fonts.) */
259     p++;
260 
261     /*
262      * Now handle non-modal descendants.
263      * The PostScript language manual has some confusing
264      * wording about the parent supplying the "first part"
265      * of the child's decoding information; what this means
266      * is not (as one might imagine) the font index, but
267      * simply the first byte of the data.
268      */
269 
270     while ((pfont = pte->fstack.items[fdepth].font)->FontType == ft_composite) {
271 	pdata = &pfont0->data;
272 	switch (pdata->FMapType) {
273 	    default:		/* can't happen */
274 		return_error(gs_error_invalidfont);
275 
276 	    case fmap_8_8:
277 		need_left(1);
278 		fidx = chr;
279 		chr = *p++;
280 		if_debug2('J', "[J]8/8 index=%d, char=%ld\n",
281 			  fidx, chr);
282 		break;
283 
284 	    case fmap_1_7:
285 		fidx = chr >> 7;
286 		chr &= 0x7f;
287 		if_debug2('J', "[J]1/7 index=%d, char=%ld\n",
288 			  fidx, chr);
289 		break;
290 
291 	    case fmap_9_7:
292 		need_left(1);
293 		fidx = ((uint) chr << 1) + (*p >> 7);
294 		chr = *p & 0x7f;
295 		if_debug2('J', "[J]9/7 index=%d, char=%ld\n",
296 			  fidx, chr);
297 		p++;
298 		break;
299 
300 	    case fmap_SubsVector:
301 		{
302 		    int width = pdata->subs_width;
303 		    uint subs_count = pdata->subs_size;
304 		    const byte *psv = pdata->SubsVector.data;
305 
306 #define subs_loop(subs_elt, width)\
307   while ( subs_count != 0 && tchr >= (schr = subs_elt) )\
308     subs_count--, tchr -= schr, psv += width;\
309   chr = tchr; p += width - 1; break
310 
311 		    switch (width) {
312 			default:	/* can't happen */
313 			    return_error(gs_error_invalidfont);
314 			case 1:
315 			    {
316 				byte tchr = (byte) chr, schr;
317 
318 				subs_loop(*psv, 1);
319 			    }
320 			case 2:
321 			    need_left(1);
322 #define w2(p) (((ushort)*p << 8) + p[1])
323 			    {
324 				ushort tchr = ((ushort) chr << 8) + *p,
325 				       schr;
326 
327 				subs_loop(w2(psv), 2);
328 			    }
329 			case 3:
330 			    need_left(2);
331 #define w3(p) (((ulong)*p << 16) + ((uint)p[1] << 8) + p[2])
332 			    {
333 				ulong tchr = ((ulong) chr << 16) + w2(p),
334 				      schr;
335 
336 				subs_loop(w3(psv), 3);
337 			    }
338 			case 4:
339 			    need_left(3);
340 #define w4(p) (((ulong)*p << 24) + ((ulong)p[1] << 16) + ((uint)p[2] << 8) + p[3])
341 			    {
342 				ulong tchr = ((ulong) chr << 24) + w3(p),
343 				      schr;
344 
345 				subs_loop(w4(psv), 4);
346 			    }
347 #undef w2
348 #undef w3
349 #undef w4
350 #undef subs_loop
351 		    }
352 		    fidx = pdata->subs_size - subs_count;
353 		    if_debug2('J', "[J]SubsVector index=%d, char=%ld\n",
354 			      fidx, chr);
355 		    break;
356 		}
357 
358 	    case fmap_CMap:
359 		{
360 		    gs_const_string cstr;
361 		    uint mindex = p - str - 1;	/* p was incremented */
362 		    int code;
363 
364                     /*
365                      * When decoding an FMapType4 or 5, the value
366                      * of chr is modified; when an FMapType9 (CMap)
367                      * composite font is used as a decendant font,
368                      * we have to pass the text including a modified
369                      * chr. Check whether chr has been modified, and
370                      * if so, construct and pass a modified buffer.
371                      */
372 		    if (*(p - 1) != chr) {
373 			byte substr[MAX_CMAP_CODE_SIZE];
374 			int submindex = 0;
375 			if_debug2('j', "[j] *(p-1) 0x%02x != chr 0x%02x, modified str should be passed\n",
376 				*(p-1), (byte)chr);
377 			memcpy(substr, p - 1,
378 				min(MAX_CMAP_CODE_SIZE, end - p + 1));
379 			substr[0] = chr;
380 			cstr.data = substr;
381 			cstr.size = min(MAX_CMAP_CODE_SIZE, end - p + 1);
382 			if (gs_debug_c('j')) {
383 			    dlprintf("[j] original str(");
384 			    debug_print_string_hex(str, end - str);
385 			    dlprintf(") -> modified substr(");
386 			    debug_print_string_hex(cstr.data, cstr.size);
387 			    dlprintf(")\n");
388 			}
389 			code = gs_cmap_decode_next(pdata->CMap, &cstr,
390 					(uint*) &submindex, &fidx, &chr, &glyph);
391 			mindex += submindex;
392 		    } else {
393 			cstr.data = str;
394 			cstr.size = end - str;
395 			code = gs_cmap_decode_next(pdata->CMap, &cstr, &mindex,
396 					       &fidx, &chr, &glyph);
397 		    }
398 		    if (code < 0)
399 			return code;
400 		    pte->cmap_code = code; /* hack for widthshow */
401 		    p = str + mindex;
402 		    if_debug3('J', "[J]CMap returns %d, chr=0x%lx, glyph=0x%lx\n",
403 			      code, (ulong) chr, (ulong) glyph);
404 		    if (code == 0) {
405 			if (glyph == gs_no_glyph) {
406 			    glyph = gs_min_cid_glyph;
407 			    if_debug0('J', "... undefined\n");
408 			    goto done;
409 			}
410 		    } else
411 			chr = (gs_char) glyph, glyph = gs_no_glyph;
412 		    /****** RESCAN chr IF DESCENDANT IS CMAP'ED ******/
413 		    break;
414 		}
415 	}
416 
417 	select_descendant(pfont, pdata, fidx, fdepth);
418 	if_debug2('J', "... new depth=%d, new font=0x%lx\n",
419 		  fdepth, (ulong) pfont);
420 	/* FontBBox may be used as metrics2 with WMode=1 :
421 	*/
422 	if (pfont->FontType == ft_CID_encrypted ||
423 	    pfont->FontType == ft_CID_TrueType
424 	    ) {
425 	    gs_font_base *pfb = (gs_font_base *)pfont;
426 
427 	    pte->FontBBox_as_Metrics2 = pfb->FontBBox.q;
428 	}
429     }
430 done:
431     *pchr = chr;
432     *pglyph = glyph;
433     /* Update the pointer into the original string, but only if */
434     /* we didn't switch over to parsing a code from a CMap. */
435     if (str == pte->text.data.bytes)
436 	pte->index = p - str;
437     pte->fstack.depth = fdepth;
438     if_debug4('J', "[J]depth=%d font=0x%lx index=%d changed=%d\n",
439 	      fdepth, (ulong) pte->fstack.items[fdepth].font,
440 	      pte->fstack.items[fdepth].index, changed);
441     return changed;
442 }
443 #undef pfont0
444