1 /* Copyright (C) 1997, 2000 Aladdin Enterprises. All rights reserved.
2
3 This software is provided AS-IS with no warranty, either express or
4 implied.
5
6 This software is distributed under license and may not be copied,
7 modified or distributed except as expressly authorized under the terms
8 of the license contained in the file LICENSE in this distribution.
9
10 For more information about licensing, please refer to
11 http://www.ghostscript.com/licensing/. For information on
12 commercial licensing, go to http://www.artifex.com/licensing/ or
13 contact Artifex Software, Inc., 101 Lucas Valley Road #110,
14 San Rafael, CA 94903, U.S.A., +1(415)492-9861.
15 */
16
17 /* $Id: gdevpdfr.c,v 1.9 2005/04/25 12:28:49 igor Exp $ */
18 /* Named object pdfmark processing */
19 #include "memory_.h"
20 #include "string_.h"
21 #include "gx.h"
22 #include "gserrors.h"
23 #include "gsutil.h" /* for bytes_compare */
24 #include "gdevpdfx.h"
25 #include "gdevpdfo.h"
26 #include "scanchar.h"
27 #include "strimpl.h"
28 #include "sstring.h"
29
30 #ifndef gs_error_syntaxerror
31 # define gs_error_syntaxerror gs_error_rangecheck
32 #endif
33
34 /* Test whether an object name has valid syntax, {name}. */
35 bool
pdf_objname_is_valid(const byte * data,uint size)36 pdf_objname_is_valid(const byte *data, uint size)
37 {
38 return (size >= 2 && data[0] == '{' &&
39 (const byte *)memchr(data, '}', size) == data + size - 1);
40 }
41
42 /*
43 * Look up a named object. Return e_rangecheck if the syntax is invalid.
44 * If the object is missing, return e_undefined.
45 */
46 int
pdf_find_named(gx_device_pdf * pdev,const gs_param_string * pname,cos_object_t ** ppco)47 pdf_find_named(gx_device_pdf * pdev, const gs_param_string * pname,
48 cos_object_t **ppco)
49 {
50 const cos_value_t *pvalue;
51
52 if (!pdf_objname_is_valid(pname->data, pname->size))
53 return_error(gs_error_rangecheck);
54 if ((pvalue = cos_dict_find(pdev->local_named_objects, pname->data,
55 pname->size)) != 0 ||
56 (pvalue = cos_dict_find(pdev->global_named_objects, pname->data,
57 pname->size)) != 0
58 ) {
59 *ppco = pvalue->contents.object;
60 return 0;
61 }
62 return_error(gs_error_undefined);
63 }
64
65 /*
66 * Create a (local) named object. id = -1L means do not assign an id.
67 * pname = 0 means just create the object, do not name it. Note that
68 * during initialization, local_named_objects == global_named_objects.
69 */
70 int
pdf_create_named(gx_device_pdf * pdev,const gs_param_string * pname,cos_type_t cotype,cos_object_t ** ppco,long id)71 pdf_create_named(gx_device_pdf *pdev, const gs_param_string *pname,
72 cos_type_t cotype, cos_object_t **ppco, long id)
73 {
74 cos_object_t *pco;
75 cos_value_t value;
76
77 *ppco = pco = cos_object_alloc(pdev, "pdf_create_named");
78 if (pco == 0)
79 return_error(gs_error_VMerror);
80 pco->id =
81 (id == -1 ? 0L : id == 0 ? pdf_obj_ref(pdev) : id);
82 if (pname) {
83 int code = cos_dict_put(pdev->local_named_objects, pname->data,
84 pname->size, cos_object_value(&value, pco));
85
86 if (code < 0)
87 return code;
88 }
89 if (cotype != cos_type_generic)
90 cos_become(pco, cotype);
91 *ppco = pco;
92 return 0;
93 }
94 int
pdf_create_named_dict(gx_device_pdf * pdev,const gs_param_string * pname,cos_dict_t ** ppcd,long id)95 pdf_create_named_dict(gx_device_pdf *pdev, const gs_param_string *pname,
96 cos_dict_t **ppcd, long id)
97 {
98 cos_object_t *pco;
99 int code = pdf_create_named(pdev, pname, cos_type_dict, &pco, id);
100
101 *ppcd = (cos_dict_t *)pco;
102 return code;
103 }
104
105 /*
106 * Look up a named object as for pdf_find_named. If the object does not
107 * exist, create it (as a dictionary if it is one of the predefined names
108 * {ThisPage}, {NextPage}, {PrevPage}, or {Page<#>}, otherwise as a
109 * generic object) and return 1.
110 */
111 int
pdf_refer_named(gx_device_pdf * pdev,const gs_param_string * pname_orig,cos_object_t ** ppco)112 pdf_refer_named(gx_device_pdf * pdev, const gs_param_string * pname_orig,
113 cos_object_t **ppco)
114 {
115 const gs_param_string *pname = pname_orig;
116 int code = pdf_find_named(pdev, pname, ppco);
117 char page_name_chars[6 + 10 + 2]; /* {Page<n>}, enough for an int */
118 gs_param_string pnstr;
119 int page_number;
120
121 if (code != gs_error_undefined)
122 return code;
123 /*
124 * Check for a predefined name. Map ThisPage, PrevPage, and NextPage
125 * to the appropriate Page<#> name.
126 */
127 if (pname->size >= 7 &&
128 sscanf((const char *)pname->data, "{Page%d}", &page_number) == 1
129 )
130 goto cpage;
131 if (pdf_key_eq(pname, "{ThisPage}"))
132 page_number = pdev->next_page + 1;
133 else if (pdf_key_eq(pname, "{NextPage}"))
134 page_number = pdev->next_page + 2;
135 else if (pdf_key_eq(pname, "{PrevPage}"))
136 page_number = pdev->next_page;
137 else {
138 code = pdf_create_named(pdev, pname, cos_type_generic, ppco, 0L);
139 return (code < 0 ? code : 1);
140 }
141 if (page_number <= 0)
142 return code;
143 sprintf(page_name_chars, "{Page%d}", page_number);
144 param_string_from_string(pnstr, page_name_chars);
145 pname = &pnstr;
146 code = pdf_find_named(pdev, pname, ppco);
147 if (code != gs_error_undefined)
148 return code;
149 cpage:
150 if (pdf_page_id(pdev, page_number) <= 0)
151 return_error(gs_error_rangecheck);
152 *ppco = COS_OBJECT(pdev->pages[page_number - 1].Page);
153 return 0;
154 }
155
156 /*
157 * Look up a named object as for pdf_refer_named. If the object already
158 * exists and is not simply a forward reference, return e_rangecheck;
159 * if it exists as a forward reference, set its type and return 0;
160 * otherwise, create the object with the given type and return 1.
161 */
162 int
pdf_make_named(gx_device_pdf * pdev,const gs_param_string * pname,cos_type_t cotype,cos_object_t ** ppco,bool assign_id)163 pdf_make_named(gx_device_pdf * pdev, const gs_param_string * pname,
164 cos_type_t cotype, cos_object_t **ppco, bool assign_id)
165 {
166 if (pname) {
167 int code = pdf_refer_named(pdev, pname, ppco);
168 cos_object_t *pco = *ppco;
169
170 if (code < 0)
171 return code;
172 if (cos_type(pco) != cos_type_generic)
173 return_error(gs_error_rangecheck);
174 if (assign_id && pco->id == 0)
175 pco->id = pdf_obj_ref(pdev);
176 cos_become(pco, cotype);
177 return code;
178 } else {
179 int code = pdf_create_named(pdev, pname, cotype, ppco,
180 (assign_id ? 0L : -1L));
181
182 return (code < 0 ? code : 1);
183 }
184 }
185 int
pdf_make_named_dict(gx_device_pdf * pdev,const gs_param_string * pname,cos_dict_t ** ppcd,bool assign_id)186 pdf_make_named_dict(gx_device_pdf * pdev, const gs_param_string * pname,
187 cos_dict_t **ppcd, bool assign_id)
188 {
189 cos_object_t *pco;
190 int code = pdf_make_named(pdev, pname, cos_type_dict, &pco, assign_id);
191
192 *ppcd = (cos_dict_t *)pco;
193 return code;
194 }
195
196 /*
197 * Look up a named object as for pdf_refer_named. If the object does not
198 * exist, return e_undefined; if the object exists but has the wrong type,
199 * return e_typecheck.
200 */
201 int
pdf_get_named(gx_device_pdf * pdev,const gs_param_string * pname,cos_type_t cotype,cos_object_t ** ppco)202 pdf_get_named(gx_device_pdf * pdev, const gs_param_string * pname,
203 cos_type_t cotype, cos_object_t **ppco)
204 {
205 int code = pdf_refer_named(pdev, pname, ppco);
206
207 if (code < 0)
208 return code;
209 if (cos_type(*ppco) != cotype)
210 return_error(gs_error_typecheck);
211 return code;
212 }
213
214 /*
215 * Push the current local namespace onto the namespace stack, and reset it
216 * to an empty namespace.
217 */
218 int
pdf_push_namespace(gx_device_pdf * pdev)219 pdf_push_namespace(gx_device_pdf *pdev)
220 {
221 int code = cos_array_add_object(pdev->Namespace_stack,
222 COS_OBJECT(pdev->local_named_objects));
223 cos_dict_t *pcd =
224 cos_dict_alloc(pdev, "pdf_push_namespace(local_named_objects)");
225 cos_array_t *pca =
226 cos_array_alloc(pdev, "pdf_push_namespace(NI_stack)");
227
228 if (code < 0 ||
229 (code = cos_array_add_object(pdev->Namespace_stack,
230 COS_OBJECT(pdev->NI_stack))) < 0
231 )
232 return code;
233 if (pcd == 0 || pca == 0)
234 return_error(gs_error_VMerror);
235 pdev->local_named_objects = pcd;
236 pdev->NI_stack = pca;
237 return 0;
238 }
239
240 /*
241 * Pop the top local namespace from the namespace stack. Return an error if
242 * the stack is empty.
243 */
244 int
pdf_pop_namespace(gx_device_pdf * pdev)245 pdf_pop_namespace(gx_device_pdf *pdev)
246 {
247 cos_value_t nis_value, lno_value;
248 int code = cos_array_unadd(pdev->Namespace_stack, &nis_value);
249
250 if (code < 0 ||
251 (code = cos_array_unadd(pdev->Namespace_stack, &lno_value)) < 0
252 )
253 return code;
254 COS_FREE(pdev->local_named_objects,
255 "pdf_pop_namespace(local_named_objects)");
256 pdev->local_named_objects = (cos_dict_t *)lno_value.contents.object;
257 COS_FREE(pdev->NI_stack, "pdf_pop_namespace(NI_stack)");
258 pdev->NI_stack = (cos_array_t *)nis_value.contents.object;
259 return 0;
260 }
261
262 /*
263 * Scan a token from a string. <<, >>, [, and ] are treated as tokens.
264 * Return 1 if a token was scanned, 0 if we reached the end of the string,
265 * or an error. On a successful return, the token extends from *ptoken up
266 * to but not including *pscan.
267 *
268 * Note that this scanner expects a subset of PostScript syntax, not PDF
269 * syntax. In particular, it doesn't understand ASCII85 strings,
270 * doesn't process the PDF #-escape syntax within names, and does only
271 * minimal syntax checking. It also recognizes one extension to PostScript
272 * syntax, to allow gs_pdfwr.ps to pass names that include non-regular
273 * characters: If a name is immediately preceded by two null characters,
274 * the name includes everything up to a following null character. The only
275 * place that currently generates this convention is the PostScript code
276 * that pre-processes the arguments for pdfmarks, in lib/gs_pdfwr.ps.
277 */
278 int
pdf_scan_token(const byte ** pscan,const byte * end,const byte ** ptoken)279 pdf_scan_token(const byte **pscan, const byte * end, const byte **ptoken)
280 {
281 const byte *p = *pscan;
282
283 while (p < end && scan_char_decoder[*p] == ctype_space) {
284 ++p;
285 if (p[-1] == 0 && p + 1 < end && *p == 0 && p[1] == '/') {
286 /* Special handling for names delimited by a null character. */
287 *ptoken = ++p;
288 while (*p != 0)
289 if (++p >= end)
290 return_error(gs_error_syntaxerror); /* no terminator */
291 *pscan = p;
292 return 1;
293 }
294 }
295 *ptoken = p;
296 if (p >= end) {
297 *pscan = p;
298 return 0;
299 }
300 switch (*p) {
301 case '%':
302 case ')':
303 return_error(gs_error_syntaxerror);
304 case '(': {
305 /* Skip over the string. */
306 byte buf[50]; /* size is arbitrary */
307 stream_cursor_read r;
308 stream_cursor_write w;
309 stream_PSSD_state ss;
310 int status;
311
312 s_PSSD_init((stream_state *)&ss);
313 r.ptr = p; /* skip the '(' */
314 r.limit = end - 1;
315 w.limit = buf + sizeof(buf) - 1;
316 do {
317 /* One picky compiler complains if we initialize to buf - 1. */
318 w.ptr = buf; w.ptr--;
319 status = (*s_PSSD_template.process)
320 ((stream_state *) & ss, &r, &w, true);
321 }
322 while (status == 1);
323 *pscan = r.ptr + 1;
324 return 1;
325 }
326 case '<':
327 if (end - p < 2)
328 return_error(gs_error_syntaxerror);
329 if (p[1] != '<') {
330 /*
331 * We need the cast because some compilers declare memchar as
332 * returning a char * rather than a void *.
333 */
334 p = (const byte *)memchr(p + 1, '>', end - p - 1);
335 if (p == 0)
336 return_error(gs_error_syntaxerror);
337 }
338 goto m2;
339 case '>':
340 if (end - p < 2 || p[1] != '>')
341 return_error(gs_error_syntaxerror);
342 m2: *pscan = p + 2;
343 return 1;
344 case '[': case ']': case '{': case '}':
345 *pscan = p + 1;
346 return 1;
347 case '/':
348 ++p;
349 default:
350 break;
351 }
352 while (p < end && scan_char_decoder[*p] <= ctype_name)
353 ++p;
354 *pscan = p;
355 if (p == *ptoken) /* no chars scanned, i.e., not ctype_name */
356 return_error(gs_error_syntaxerror);
357 return 1;
358 }
359 /*
360 * Scan a possibly composite token: arrays and dictionaries are treated as
361 * single tokens.
362 */
363 int
pdf_scan_token_composite(const byte ** pscan,const byte * end,const byte ** ptoken_orig)364 pdf_scan_token_composite(const byte **pscan, const byte * end,
365 const byte **ptoken_orig)
366 {
367 int level = 0;
368 const byte *ignore_token;
369 const byte **ptoken = ptoken_orig;
370 int code;
371
372 do {
373 code = pdf_scan_token(pscan, end, ptoken);
374 if (code <= 0)
375 return (code < 0 || level == 0 ? code :
376 gs_note_error(gs_error_syntaxerror));
377 switch (**ptoken) {
378 case '<': case '[': case '{':
379 ++level; break;
380 case '>': case ']': case '}':
381 if (level == 0)
382 return_error(gs_error_syntaxerror);
383 --level; break;
384 }
385 ptoken = &ignore_token;
386 } while (level);
387 return code;
388 }
389
390 /* Replace object names with object references in a (parameter) string. */
391 private const byte *
pdfmark_next_object(const byte * scan,const byte * end,const byte ** pname,cos_object_t ** ppco,gx_device_pdf * pdev)392 pdfmark_next_object(const byte * scan, const byte * end, const byte **pname,
393 cos_object_t **ppco, gx_device_pdf * pdev)
394 {
395 /*
396 * Starting at scan, find the next object reference, set *pname
397 * to point to it in the string, store the object at *ppco,
398 * and return a pointer to the first character beyond the
399 * reference. If there are no more object references, set
400 * *pname = end, *ppco = 0, and return end.
401 */
402 int code;
403
404 while ((code = pdf_scan_token(&scan, end, pname)) != 0) {
405 gs_param_string sname;
406
407 if (code < 0) {
408 ++scan;
409 continue;
410 }
411 if (**pname != '{')
412 continue;
413 /* Back up over the { and rescan as a single token. */
414 scan = *pname;
415 code = pdf_scan_token_composite(&scan, end, pname);
416 if (code < 0) {
417 ++scan;
418 continue;
419 }
420 sname.data = *pname;
421 sname.size = scan - sname.data;
422 /*
423 * Forward references are allowed. If there is an error,
424 * simply retain the name as a literal string.
425 */
426 code = pdf_refer_named(pdev, &sname, ppco);
427 if (code < 0)
428 continue;
429 return scan;
430 }
431 *ppco = 0;
432 return end;
433 }
434 int
pdf_replace_names(gx_device_pdf * pdev,const gs_param_string * from,gs_param_string * to)435 pdf_replace_names(gx_device_pdf * pdev, const gs_param_string * from,
436 gs_param_string * to)
437 {
438 const byte *start = from->data;
439 const byte *end = start + from->size;
440 const byte *scan;
441 uint size = 0;
442 cos_object_t *pco;
443 bool any = false;
444 byte *sto;
445 char ref[1 + 10 + 5 + 1]; /* max obj number is 10 digits */
446
447 /* Do a first pass to compute the length of the result. */
448 for (scan = start; scan < end;) {
449 const byte *sname;
450 const byte *next =
451 pdfmark_next_object(scan, end, &sname, &pco, pdev);
452
453 size += sname - scan;
454 if (pco) {
455 sprintf(ref, " %ld 0 R ", pco->id);
456 size += strlen(ref);
457 }
458 scan = next;
459 any |= next != sname;
460 }
461 to->persistent = true; /* ??? */
462 if (!any) {
463 to->data = start;
464 to->size = size;
465 return 0;
466 }
467 sto = gs_alloc_bytes(pdev->pdf_memory, size, "pdf_replace_names");
468 if (sto == 0)
469 return_error(gs_error_VMerror);
470 to->data = sto;
471 to->size = size;
472 /* Do a second pass to do the actual substitutions. */
473 for (scan = start; scan < end;) {
474 const byte *sname;
475 const byte *next =
476 pdfmark_next_object(scan, end, &sname, &pco, pdev);
477 uint copy = sname - scan;
478 int rlen;
479
480 memcpy(sto, scan, copy);
481 sto += copy;
482 if (pco) {
483 sprintf(ref, " %ld 0 R ", pco->id);
484 rlen = strlen(ref);
485 memcpy(sto, ref, rlen);
486 sto += rlen;
487 }
488 scan = next;
489 }
490 return 0;
491 }
492