xref: /plan9/sys/src/cmd/gs/src/gdevpdfr.c (revision 593dc095aefb2a85c828727bbfa9da139a49bdf4)
1 /* Copyright (C) 1997, 2000 Aladdin Enterprises.  All rights reserved.
2 
3   This software is provided AS-IS with no warranty, either express or
4   implied.
5 
6   This software is distributed under license and may not be copied,
7   modified or distributed except as expressly authorized under the terms
8   of the license contained in the file LICENSE in this distribution.
9 
10   For more information about licensing, please refer to
11   http://www.ghostscript.com/licensing/. For information on
12   commercial licensing, go to http://www.artifex.com/licensing/ or
13   contact Artifex Software, Inc., 101 Lucas Valley Road #110,
14   San Rafael, CA  94903, U.S.A., +1(415)492-9861.
15 */
16 
17 /* $Id: gdevpdfr.c,v 1.9 2005/04/25 12:28:49 igor Exp $ */
18 /* Named object pdfmark processing */
19 #include "memory_.h"
20 #include "string_.h"
21 #include "gx.h"
22 #include "gserrors.h"
23 #include "gsutil.h"		/* for bytes_compare */
24 #include "gdevpdfx.h"
25 #include "gdevpdfo.h"
26 #include "scanchar.h"
27 #include "strimpl.h"
28 #include "sstring.h"
29 
30 #ifndef gs_error_syntaxerror
31 #  define gs_error_syntaxerror gs_error_rangecheck
32 #endif
33 
34 /* Test whether an object name has valid syntax, {name}. */
35 bool
pdf_objname_is_valid(const byte * data,uint size)36 pdf_objname_is_valid(const byte *data, uint size)
37 {
38     return (size >= 2 && data[0] == '{' &&
39 	    (const byte *)memchr(data, '}', size) == data + size - 1);
40 }
41 
42 /*
43  * Look up a named object.  Return e_rangecheck if the syntax is invalid.
44  * If the object is missing, return e_undefined.
45  */
46 int
pdf_find_named(gx_device_pdf * pdev,const gs_param_string * pname,cos_object_t ** ppco)47 pdf_find_named(gx_device_pdf * pdev, const gs_param_string * pname,
48 	       cos_object_t **ppco)
49 {
50     const cos_value_t *pvalue;
51 
52     if (!pdf_objname_is_valid(pname->data, pname->size))
53 	return_error(gs_error_rangecheck);
54     if ((pvalue = cos_dict_find(pdev->local_named_objects, pname->data,
55 				pname->size)) != 0 ||
56 	(pvalue = cos_dict_find(pdev->global_named_objects, pname->data,
57 				pname->size)) != 0
58 	) {
59 	*ppco = pvalue->contents.object;
60 	return 0;
61     }
62     return_error(gs_error_undefined);
63 }
64 
65 /*
66  * Create a (local) named object.  id = -1L means do not assign an id.
67  * pname = 0 means just create the object, do not name it.  Note that
68  * during initialization, local_named_objects == global_named_objects.
69  */
70 int
pdf_create_named(gx_device_pdf * pdev,const gs_param_string * pname,cos_type_t cotype,cos_object_t ** ppco,long id)71 pdf_create_named(gx_device_pdf *pdev, const gs_param_string *pname,
72 		 cos_type_t cotype, cos_object_t **ppco, long id)
73 {
74     cos_object_t *pco;
75     cos_value_t value;
76 
77     *ppco = pco = cos_object_alloc(pdev, "pdf_create_named");
78     if (pco == 0)
79 	return_error(gs_error_VMerror);
80     pco->id =
81 	(id == -1 ? 0L : id == 0 ? pdf_obj_ref(pdev) : id);
82     if (pname) {
83 	int code = cos_dict_put(pdev->local_named_objects, pname->data,
84 				pname->size, cos_object_value(&value, pco));
85 
86 	if (code < 0)
87 	    return code;
88     }
89     if (cotype != cos_type_generic)
90 	cos_become(pco, cotype);
91     *ppco = pco;
92     return 0;
93 }
94 int
pdf_create_named_dict(gx_device_pdf * pdev,const gs_param_string * pname,cos_dict_t ** ppcd,long id)95 pdf_create_named_dict(gx_device_pdf *pdev, const gs_param_string *pname,
96 		      cos_dict_t **ppcd, long id)
97 {
98     cos_object_t *pco;
99     int code = pdf_create_named(pdev, pname, cos_type_dict, &pco, id);
100 
101     *ppcd = (cos_dict_t *)pco;
102     return code;
103 }
104 
105 /*
106  * Look up a named object as for pdf_find_named.  If the object does not
107  * exist, create it (as a dictionary if it is one of the predefined names
108  * {ThisPage}, {NextPage}, {PrevPage}, or {Page<#>}, otherwise as a
109  * generic object) and return 1.
110  */
111 int
pdf_refer_named(gx_device_pdf * pdev,const gs_param_string * pname_orig,cos_object_t ** ppco)112 pdf_refer_named(gx_device_pdf * pdev, const gs_param_string * pname_orig,
113 		cos_object_t **ppco)
114 {
115     const gs_param_string *pname = pname_orig;
116     int code = pdf_find_named(pdev, pname, ppco);
117     char page_name_chars[6 + 10 + 2]; /* {Page<n>}, enough for an int */
118     gs_param_string pnstr;
119     int page_number;
120 
121     if (code != gs_error_undefined)
122 	return code;
123     /*
124      * Check for a predefined name.  Map ThisPage, PrevPage, and NextPage
125      * to the appropriate Page<#> name.
126      */
127     if (pname->size >= 7 &&
128 	sscanf((const char *)pname->data, "{Page%d}", &page_number) == 1
129 	)
130 	goto cpage;
131     if (pdf_key_eq(pname, "{ThisPage}"))
132 	page_number = pdev->next_page + 1;
133     else if (pdf_key_eq(pname, "{NextPage}"))
134 	page_number = pdev->next_page + 2;
135     else if (pdf_key_eq(pname, "{PrevPage}"))
136 	page_number = pdev->next_page;
137     else {
138 	code = pdf_create_named(pdev, pname, cos_type_generic, ppco, 0L);
139 	return (code < 0 ? code : 1);
140     }
141     if (page_number <= 0)
142 	return code;
143     sprintf(page_name_chars, "{Page%d}", page_number);
144     param_string_from_string(pnstr, page_name_chars);
145     pname = &pnstr;
146     code = pdf_find_named(pdev, pname, ppco);
147     if (code != gs_error_undefined)
148 	return code;
149  cpage:
150     if (pdf_page_id(pdev, page_number) <= 0)
151 	return_error(gs_error_rangecheck);
152     *ppco = COS_OBJECT(pdev->pages[page_number - 1].Page);
153     return 0;
154 }
155 
156 /*
157  * Look up a named object as for pdf_refer_named.  If the object already
158  * exists and is not simply a forward reference, return e_rangecheck;
159  * if it exists as a forward reference, set its type and return 0;
160  * otherwise, create the object with the given type and return 1.
161  */
162 int
pdf_make_named(gx_device_pdf * pdev,const gs_param_string * pname,cos_type_t cotype,cos_object_t ** ppco,bool assign_id)163 pdf_make_named(gx_device_pdf * pdev, const gs_param_string * pname,
164 	       cos_type_t cotype, cos_object_t **ppco, bool assign_id)
165 {
166     if (pname) {
167 	int code = pdf_refer_named(pdev, pname, ppco);
168 	cos_object_t *pco = *ppco;
169 
170 	if (code < 0)
171 	    return code;
172 	if (cos_type(pco) != cos_type_generic)
173 	    return_error(gs_error_rangecheck);
174 	if (assign_id && pco->id == 0)
175 	    pco->id = pdf_obj_ref(pdev);
176 	cos_become(pco, cotype);
177 	return code;
178     } else {
179 	int code = pdf_create_named(pdev, pname, cotype, ppco,
180 				    (assign_id ? 0L : -1L));
181 
182 	return (code < 0 ? code : 1);
183     }
184 }
185 int
pdf_make_named_dict(gx_device_pdf * pdev,const gs_param_string * pname,cos_dict_t ** ppcd,bool assign_id)186 pdf_make_named_dict(gx_device_pdf * pdev, const gs_param_string * pname,
187 		    cos_dict_t **ppcd, bool assign_id)
188 {
189     cos_object_t *pco;
190     int code = pdf_make_named(pdev, pname, cos_type_dict, &pco, assign_id);
191 
192     *ppcd = (cos_dict_t *)pco;
193     return code;
194 }
195 
196 /*
197  * Look up a named object as for pdf_refer_named.  If the object does not
198  * exist, return e_undefined; if the object exists but has the wrong type,
199  * return e_typecheck.
200  */
201 int
pdf_get_named(gx_device_pdf * pdev,const gs_param_string * pname,cos_type_t cotype,cos_object_t ** ppco)202 pdf_get_named(gx_device_pdf * pdev, const gs_param_string * pname,
203 	      cos_type_t cotype, cos_object_t **ppco)
204 {
205     int code = pdf_refer_named(pdev, pname, ppco);
206 
207     if (code < 0)
208 	return code;
209     if (cos_type(*ppco) != cotype)
210 	return_error(gs_error_typecheck);
211     return code;
212 }
213 
214 /*
215  * Push the current local namespace onto the namespace stack, and reset it
216  * to an empty namespace.
217  */
218 int
pdf_push_namespace(gx_device_pdf * pdev)219 pdf_push_namespace(gx_device_pdf *pdev)
220 {
221     int code = cos_array_add_object(pdev->Namespace_stack,
222 				    COS_OBJECT(pdev->local_named_objects));
223     cos_dict_t *pcd =
224 	cos_dict_alloc(pdev, "pdf_push_namespace(local_named_objects)");
225     cos_array_t *pca =
226 	cos_array_alloc(pdev, "pdf_push_namespace(NI_stack)");
227 
228     if (code < 0 ||
229 	(code = cos_array_add_object(pdev->Namespace_stack,
230 				     COS_OBJECT(pdev->NI_stack))) < 0
231 	)
232 	return code;
233     if (pcd == 0 || pca == 0)
234 	return_error(gs_error_VMerror);
235     pdev->local_named_objects = pcd;
236     pdev->NI_stack = pca;
237     return 0;
238 }
239 
240 /*
241  * Pop the top local namespace from the namespace stack.  Return an error if
242  * the stack is empty.
243  */
244 int
pdf_pop_namespace(gx_device_pdf * pdev)245 pdf_pop_namespace(gx_device_pdf *pdev)
246 {
247     cos_value_t nis_value, lno_value;
248     int code = cos_array_unadd(pdev->Namespace_stack, &nis_value);
249 
250     if (code < 0 ||
251 	(code = cos_array_unadd(pdev->Namespace_stack, &lno_value)) < 0
252 	)
253 	return code;
254     COS_FREE(pdev->local_named_objects,
255 	     "pdf_pop_namespace(local_named_objects)");
256     pdev->local_named_objects = (cos_dict_t *)lno_value.contents.object;
257     COS_FREE(pdev->NI_stack, "pdf_pop_namespace(NI_stack)");
258     pdev->NI_stack = (cos_array_t *)nis_value.contents.object;
259     return 0;
260 }
261 
262 /*
263  * Scan a token from a string.  <<, >>, [, and ] are treated as tokens.
264  * Return 1 if a token was scanned, 0 if we reached the end of the string,
265  * or an error.  On a successful return, the token extends from *ptoken up
266  * to but not including *pscan.
267  *
268  * Note that this scanner expects a subset of PostScript syntax, not PDF
269  * syntax.  In particular, it doesn't understand ASCII85 strings,
270  * doesn't process the PDF #-escape syntax within names, and does only
271  * minimal syntax checking.  It also recognizes one extension to PostScript
272  * syntax, to allow gs_pdfwr.ps to pass names that include non-regular
273  * characters: If a name is immediately preceded by two null characters,
274  * the name includes everything up to a following null character.  The only
275  * place that currently generates this convention is the PostScript code
276  * that pre-processes the arguments for pdfmarks, in lib/gs_pdfwr.ps.
277  */
278 int
pdf_scan_token(const byte ** pscan,const byte * end,const byte ** ptoken)279 pdf_scan_token(const byte **pscan, const byte * end, const byte **ptoken)
280 {
281     const byte *p = *pscan;
282 
283     while (p < end && scan_char_decoder[*p] == ctype_space) {
284 	++p;
285 	if (p[-1] == 0 && p + 1 < end && *p == 0 && p[1] == '/') {
286 	/* Special handling for names delimited by a null character. */
287 	    *ptoken = ++p;
288 	    while (*p != 0)
289 		if (++p >= end)
290 		    return_error(gs_error_syntaxerror);	/* no terminator */
291 	    *pscan = p;
292 	    return 1;
293 	}
294     }
295     *ptoken = p;
296     if (p >= end) {
297 	*pscan = p;
298 	return 0;
299     }
300     switch (*p) {
301     case '%':
302     case ')':
303 	return_error(gs_error_syntaxerror);
304     case '(': {
305 	/* Skip over the string. */
306 	byte buf[50];		/* size is arbitrary */
307 	stream_cursor_read r;
308 	stream_cursor_write w;
309 	stream_PSSD_state ss;
310 	int status;
311 
312 	s_PSSD_init((stream_state *)&ss);
313 	r.ptr = p;		/* skip the '(' */
314 	r.limit = end - 1;
315 	w.limit = buf + sizeof(buf) - 1;
316 	do {
317 	    /* One picky compiler complains if we initialize to buf - 1. */
318 	    w.ptr = buf;  w.ptr--;
319 	    status = (*s_PSSD_template.process)
320 		((stream_state *) & ss, &r, &w, true);
321 	}
322 	while (status == 1);
323 	*pscan = r.ptr + 1;
324 	return 1;
325     }
326     case '<':
327 	if (end - p < 2)
328 	    return_error(gs_error_syntaxerror);
329 	if (p[1] != '<') {
330 	    /*
331 	     * We need the cast because some compilers declare memchar as
332 	     * returning a char * rather than a void *.
333 	     */
334 	    p = (const byte *)memchr(p + 1, '>', end - p - 1);
335 	    if (p == 0)
336 		return_error(gs_error_syntaxerror);
337 	}
338 	goto m2;
339     case '>':
340 	if (end - p < 2 || p[1] != '>')
341 	    return_error(gs_error_syntaxerror);
342 m2:	*pscan = p + 2;
343 	return 1;
344     case '[': case ']': case '{': case '}':
345 	*pscan = p + 1;
346 	return 1;
347     case '/':
348 	++p;
349     default:
350 	break;
351     }
352     while (p < end && scan_char_decoder[*p] <= ctype_name)
353 	++p;
354     *pscan = p;
355     if (p == *ptoken)		/* no chars scanned, i.e., not ctype_name */
356 	return_error(gs_error_syntaxerror);
357     return 1;
358 }
359 /*
360  * Scan a possibly composite token: arrays and dictionaries are treated as
361  * single tokens.
362  */
363 int
pdf_scan_token_composite(const byte ** pscan,const byte * end,const byte ** ptoken_orig)364 pdf_scan_token_composite(const byte **pscan, const byte * end,
365 			 const byte **ptoken_orig)
366 {
367     int level = 0;
368     const byte *ignore_token;
369     const byte **ptoken = ptoken_orig;
370     int code;
371 
372     do {
373 	code = pdf_scan_token(pscan, end, ptoken);
374 	if (code <= 0)
375 	    return (code < 0 || level == 0 ? code :
376 		    gs_note_error(gs_error_syntaxerror));
377 	switch (**ptoken) {
378 	case '<': case '[': case '{':
379 	    ++level; break;
380 	case '>': case ']': case '}':
381 	    if (level == 0)
382 		return_error(gs_error_syntaxerror);
383 	    --level; break;
384 	}
385 	ptoken = &ignore_token;
386     } while (level);
387     return code;
388 }
389 
390 /* Replace object names with object references in a (parameter) string. */
391 private const byte *
pdfmark_next_object(const byte * scan,const byte * end,const byte ** pname,cos_object_t ** ppco,gx_device_pdf * pdev)392 pdfmark_next_object(const byte * scan, const byte * end, const byte **pname,
393 		    cos_object_t **ppco, gx_device_pdf * pdev)
394 {
395     /*
396      * Starting at scan, find the next object reference, set *pname
397      * to point to it in the string, store the object at *ppco,
398      * and return a pointer to the first character beyond the
399      * reference.  If there are no more object references, set
400      * *pname = end, *ppco = 0, and return end.
401      */
402     int code;
403 
404     while ((code = pdf_scan_token(&scan, end, pname)) != 0) {
405 	gs_param_string sname;
406 
407 	if (code < 0) {
408 	    ++scan;
409 	    continue;
410 	}
411 	if (**pname != '{')
412 	    continue;
413 	/* Back up over the { and rescan as a single token. */
414 	scan = *pname;
415 	code = pdf_scan_token_composite(&scan, end, pname);
416 	if (code < 0) {
417 	    ++scan;
418 	    continue;
419 	}
420 	sname.data = *pname;
421 	sname.size = scan - sname.data;
422 	/*
423 	 * Forward references are allowed.  If there is an error,
424 	 * simply retain the name as a literal string.
425 	 */
426 	code = pdf_refer_named(pdev, &sname, ppco);
427 	if (code < 0)
428 	    continue;
429 	return scan;
430     }
431     *ppco = 0;
432     return end;
433 }
434 int
pdf_replace_names(gx_device_pdf * pdev,const gs_param_string * from,gs_param_string * to)435 pdf_replace_names(gx_device_pdf * pdev, const gs_param_string * from,
436 		  gs_param_string * to)
437 {
438     const byte *start = from->data;
439     const byte *end = start + from->size;
440     const byte *scan;
441     uint size = 0;
442     cos_object_t *pco;
443     bool any = false;
444     byte *sto;
445     char ref[1 + 10 + 5 + 1];	/* max obj number is 10 digits */
446 
447     /* Do a first pass to compute the length of the result. */
448     for (scan = start; scan < end;) {
449 	const byte *sname;
450 	const byte *next =
451 	    pdfmark_next_object(scan, end, &sname, &pco, pdev);
452 
453 	size += sname - scan;
454 	if (pco) {
455 	    sprintf(ref, " %ld 0 R ", pco->id);
456 	    size += strlen(ref);
457 	}
458 	scan = next;
459 	any |= next != sname;
460     }
461     to->persistent = true;	/* ??? */
462     if (!any) {
463 	to->data = start;
464 	to->size = size;
465 	return 0;
466     }
467     sto = gs_alloc_bytes(pdev->pdf_memory, size, "pdf_replace_names");
468     if (sto == 0)
469 	return_error(gs_error_VMerror);
470     to->data = sto;
471     to->size = size;
472     /* Do a second pass to do the actual substitutions. */
473     for (scan = start; scan < end;) {
474 	const byte *sname;
475 	const byte *next =
476 	    pdfmark_next_object(scan, end, &sname, &pco, pdev);
477 	uint copy = sname - scan;
478 	int rlen;
479 
480 	memcpy(sto, scan, copy);
481 	sto += copy;
482 	if (pco) {
483 	    sprintf(ref, " %ld 0 R ", pco->id);
484 	    rlen = strlen(ref);
485 	    memcpy(sto, ref, rlen);
486 	    sto += rlen;
487 	}
488 	scan = next;
489     }
490     return 0;
491 }
492