xref: /plan9/sys/src/cmd/gs/src/iscanbin.c (revision 593dc095aefb2a85c828727bbfa9da139a49bdf4)
1 /* Copyright (C) 1989, 2000 Aladdin Enterprises.  All rights reserved.
2 
3   This software is provided AS-IS with no warranty, either express or
4   implied.
5 
6   This software is distributed under license and may not be copied,
7   modified or distributed except as expressly authorized under the terms
8   of the license contained in the file LICENSE in this distribution.
9 
10   For more information about licensing, please refer to
11   http://www.ghostscript.com/licensing/. For information on
12   commercial licensing, go to http://www.artifex.com/licensing/ or
13   contact Artifex Software, Inc., 101 Lucas Valley Road #110,
14   San Rafael, CA  94903, U.S.A., +1(415)492-9861.
15 */
16 
17 /* $Id: iscanbin.c,v 1.14 2004/08/04 19:36:13 stefan Exp $ */
18 /* Ghostscript binary token scanner and writer */
19 #include "math_.h"
20 #include "memory_.h"
21 #include "ghost.h"
22 #include "gsutil.h"
23 #include "gxalloc.h"		/* for names_array in allocator */
24 #include "stream.h"
25 #include "strimpl.h"		/* for sfilter.h */
26 #include "sfilter.h"		/* for iscan.h */
27 #include "ierrors.h"
28 #include "ialloc.h"
29 #include "iddict.h"
30 #include "dstack.h"		/* for immediately evaluated names */
31 #include "ostack.h"		/* must precede iscan.h */
32 #include "iname.h"
33 #include "iscan.h"		/* for scan_Refill */
34 #include "iscanbin.h"
35 #include "iutil.h"
36 #include "ivmspace.h"
37 #include "store.h"
38 #include "btoken.h"
39 #include "ibnum.h"
40 
41 /* Define the binary token types. */
42 typedef enum {
43     BT_SEQ = 128,		/* binary object sequence: */
44     BT_SEQ_IEEE_MSB = 128,	/* IEEE floats, big-endian */
45     BT_SEQ_IEEE_LSB = 129,	/* IEEE float, little-endian */
46     BT_SEQ_NATIVE_MSB = 130,	/* native floats, big-endian */
47     BT_SEQ_NATIVE_LSB = 131,	/* native floats, little-endian */
48     BT_INT32_MSB = 132,
49     BT_INT32_LSB = 133,
50     BT_INT16_MSB = 134,
51     BT_INT16_LSB = 135,
52     BT_INT8 = 136,
53     BT_FIXED = 137,
54     BT_FLOAT_IEEE_MSB = 138,
55     BT_FLOAT_IEEE_LSB = 139,
56     BT_FLOAT_NATIVE = 140,
57     BT_BOOLEAN = 141,
58     BT_STRING_256 = 142,
59     BT_STRING_64K_MSB = 143,
60     BT_STRING_64K_LSB = 144,
61     BT_LITNAME_SYSTEM = 145,
62     BT_EXECNAME_SYSTEM = 146,
63     BT_LITNAME_USER = 147,
64     BT_EXECNAME_USER = 148,
65     BT_NUM_ARRAY = 149
66 } bin_token_type_t;
67 
68 #define MIN_BIN_TOKEN_TYPE 128
69 #define MAX_BIN_TOKEN_TYPE 159
70 #define NUM_BIN_TOKEN_TYPES (MAX_BIN_TOKEN_TYPE - MIN_BIN_TOKEN_TYPE + 1)
71 
72 /* Define the number of required initial bytes for binary tokens. */
73 private const byte bin_token_bytes[NUM_BIN_TOKEN_TYPES] =
74 {
75     4, 4, 4, 4, 5, 5, 3, 3, 2, 2, 5, 5, 5,
76     2, 2, 3, 3, 2, 2, 2, 2, 4,
77     1, 1, 1, 1, 1, 1, 1, 1, 1, 1	/* undefined */
78 };
79 
80 /* Define the number formats for those binary tokens that need them. */
81 private const byte bin_token_num_formats[NUM_BIN_TOKEN_TYPES] =
82 {
83     num_msb + num_float_IEEE,	/* BT_SEQ_IEEE_MSB */
84     num_lsb + num_float_IEEE,	/* BT_SEQ_IEEE_LSB */
85 #if ARCH_FLOATS_ARE_IEEE && BYTE_SWAP_IEEE_NATIVE_REALS
86     /* Treat native floats like IEEE floats for byte swapping. */
87     num_msb + num_float_IEEE,	/* BT_SEQ_NATIVE_MSB */
88     num_lsb + num_float_IEEE,	/* BT_SEQ_NATIVE_LSB */
89 #else
90     num_msb + num_float_native,	/* BT_SEQ_NATIVE_MSB */
91     num_lsb + num_float_native,	/* BT_SEQ_NATIVE_LSB */
92 #endif
93     num_msb + num_int32,	/* BT_INT32_MSB */
94     num_lsb + num_int32,	/* BT_INT32_LSB */
95     num_msb + num_int16,	/* BT_INT16_MSB */
96     num_lsb + num_int16,	/* BT_INT16_LSB */
97     0,				/* BT_INT8, not used */
98     0,				/* BT_FIXED, not used */
99     num_msb + num_float_IEEE,	/* BT_FLOAT_IEEE_MSB */
100     num_lsb + num_float_IEEE,	/* BT_FLOAT_IEEE_LSB */
101     num_float_native,		/* BT_FLOAT_NATIVE */
102     0,				/* BT_BOOLEAN, not used */
103     0,				/* BT_STRING_256, not used */
104     num_msb,			/* BT_STRING_64K_MSB */
105     num_lsb			/* BT_STRING_64K_LSB */
106     /* rest not used */
107 };
108 
109 /* Binary object sequence element types */
110 typedef enum {
111     BS_TYPE_NULL = 0,
112     BS_TYPE_INTEGER = 1,
113     BS_TYPE_REAL = 2,
114     BS_TYPE_NAME = 3,
115     BS_TYPE_BOOLEAN = 4,
116     BS_TYPE_STRING = 5,
117     BS_TYPE_EVAL_NAME = 6,
118     BS_TYPE_ARRAY = 9,
119     BS_TYPE_MARK = 10,
120     /*
121      * We extend the PostScript language definition by allowing
122      * dictionaries in binary object sequences.  The data for
123      * a dictionary is like that for an array, with the following
124      * changes:
125      *      - If the size is an even number, the value is the index of
126      * the first of a series of alternating keys and values.
127      *      - If the size is 1, the value is the index of another
128      * object (which must also be a dictionary, and must not have
129      * size = 1); this object represents the same object as that one.
130      */
131     BS_TYPE_DICTIONARY = 15
132 } bin_seq_type_t;
133 
134 #define BS_EXECUTABLE 128
135 #define SIZEOF_BIN_SEQ_OBJ ((uint)8)
136 
137 /* Forward references */
138 private int scan_bin_get_name(const gs_memory_t *mem, const ref *, int, ref *);
139 private int scan_bin_num_array_continue(i_ctx_t *, stream *, ref *, scanner_state *);
140 private int scan_bin_string_continue(i_ctx_t *, stream *, ref *, scanner_state *);
141 private int scan_bos_continue(i_ctx_t *, stream *, ref *, scanner_state *);
142 private byte *scan_bos_resize(i_ctx_t *, scanner_state *, uint, uint);
143 private int scan_bos_string_continue(i_ctx_t *, stream *, ref *, scanner_state *);
144 
145 /* Scan a binary token.  Called from the main scanner */
146 /* when it encounters an ASCII code 128-159, */
147 /* if binary tokens are being recognized (object format != 0). */
148 int
scan_binary_token(i_ctx_t * i_ctx_p,stream * s,ref * pref,scanner_state * pstate)149 scan_binary_token(i_ctx_t *i_ctx_p, stream *s, ref *pref,
150 		  scanner_state *pstate)
151 {
152     scan_binary_state *const pbs = &pstate->s_ss.binary;
153 
154     s_declare_inline(s, p, rlimit);
155     int num_format, code;
156     uint arg;
157     uint wanted;
158     uint rcnt;
159 
160     s_begin_inline(s, p, rlimit);
161     wanted = bin_token_bytes[*p - MIN_BIN_TOKEN_TYPE] - 1;
162     rcnt = rlimit - p;
163     if (rcnt < wanted) {
164 	s_end_inline(s, p - 1, rlimit);
165 	pstate->s_scan_type = scanning_none;
166 	return scan_Refill;
167     }
168     num_format = bin_token_num_formats[*p - MIN_BIN_TOKEN_TYPE];
169     switch (*p) {
170 	case BT_SEQ_IEEE_MSB:
171 	case BT_SEQ_IEEE_LSB:
172 	case BT_SEQ_NATIVE_MSB:
173 	case BT_SEQ_NATIVE_LSB:{
174 		uint top_size = p[1];
175 		uint hsize, size;
176 
177 		pbs->num_format = num_format;
178 		if (top_size == 0) {
179 		    /* Extended header (2-byte array size, 4-byte length) */
180 		    ulong lsize;
181 
182 		    if (rcnt < 7) {
183 			s_end_inline(s, p - 1, rlimit);
184 			pstate->s_scan_type = scanning_none;
185 			return scan_Refill;
186 		    }
187 		    if (p[1] != 0) /* reserved, must be 0 */
188 			return_error(e_syntaxerror);
189 		    top_size = sdecodeushort(p + 2, num_format);
190 		    lsize = sdecodelong(p + 4, num_format);
191 		    if ((size = lsize) != lsize)
192 			return_error(e_limitcheck);
193 		    hsize = 8;
194 		} else {
195 		    /* Normal header (1-byte array size, 2-byte length). */
196 		    /* We already checked rcnt >= 3. */
197 		    size = sdecodeushort(p + 2, num_format);
198 		    hsize = 4;
199 		}
200 		if (size < hsize)
201 		    return_error(e_syntaxerror);
202 		/* Preallocate an array large enough for the worst case, */
203 		/* namely, all objects and no strings. */
204 		code = ialloc_ref_array(&pbs->bin_array,
205 				   a_all + a_executable, size / sizeof(ref),
206 					"binary object sequence(objects)");
207 		if (code < 0)
208 		    return code;
209 		p += hsize - 1;
210 		size -= hsize;
211 		s_end_inline(s, p, rlimit);
212 		pbs->max_array_index = pbs->top_size = top_size;
213 		pbs->min_string_index = pbs->size = size;
214 		pbs->index = 0;
215 		pstate->s_da.is_dynamic = false;
216 		pstate->s_da.base = pstate->s_da.next =
217 		    pstate->s_da.limit = pstate->s_da.buf;
218 		code = scan_bos_continue(i_ctx_p, s, pref, pstate);
219 		if (code == scan_Refill || code < 0) {
220 		    /* Clean up array for GC. */
221 		    uint index = pbs->index;
222 
223 		    refset_null(pbs->bin_array.value.refs + index,
224 				r_size(&pbs->bin_array) - index);
225 		}
226 		return code;
227 	    }
228 	case BT_INT8:
229 	    make_int(pref, (p[1] ^ 128) - 128);
230 	    s_end_inline(s, p + 1, rlimit);
231 	    return 0;
232 	case BT_FIXED:
233 	    num_format = p[1];
234 	    if (!num_is_valid(num_format))
235 		return_error(e_syntaxerror);
236 	    wanted = 1 + encoded_number_bytes(num_format);
237 	    if (rcnt < wanted) {
238 		s_end_inline(s, p - 1, rlimit);
239 		pstate->s_scan_type = scanning_none;
240 		return scan_Refill;
241 	    }
242 	    code = sdecode_number(p + 2, num_format, pref);
243 	    goto rnum;
244 	case BT_INT32_MSB:
245 	case BT_INT32_LSB:
246 	case BT_INT16_MSB:
247 	case BT_INT16_LSB:
248 	case BT_FLOAT_IEEE_MSB:
249 	case BT_FLOAT_IEEE_LSB:
250 	case BT_FLOAT_NATIVE:
251 	    code = sdecode_number(p + 1, num_format, pref);
252 	  rnum:
253 	    switch (code) {
254 		case t_integer:
255 		case t_real:
256 		    r_set_type(pref, code);
257 		    break;
258 		case t_null:
259 		    return_error(e_syntaxerror);
260 		default:
261 		    return code;
262 	    }
263 	    s_end_inline(s, p + wanted, rlimit);
264 	    return 0;
265 	case BT_BOOLEAN:
266 	    arg = p[1];
267 	    if (arg & ~1)
268 		return_error(e_syntaxerror);
269 	    make_bool(pref, arg);
270 	    s_end_inline(s, p + 1, rlimit);
271 	    return 0;
272 	case BT_STRING_256:
273 	    arg = *++p;
274 	    goto str;
275 	case BT_STRING_64K_MSB:
276 	case BT_STRING_64K_LSB:
277 	    arg = sdecodeushort(p + 1, num_format);
278 	    p += 2;
279 	  str:
280 	    if (s->foreign && rlimit - p >= arg) {
281 		/*
282 		 * Reference the string directly in the buffer.  It is
283 		 * marked writable for consistency with the non-direct
284 		 * case, but since the "buffer" may be data compiled into
285 		 * the executable, it is probably actually read-only.
286 		 */
287 		s_end_inline(s, p, rlimit);
288 		make_const_string(pref, a_all | avm_foreign, arg, sbufptr(s));
289 		sbufskip(s, arg);
290 		return 0;
291 	    } else {
292 		byte *str = ialloc_string(arg, "string token");
293 
294 		if (str == 0)
295 		    return_error(e_VMerror);
296 		s_end_inline(s, p, rlimit);
297 		pstate->s_da.base = pstate->s_da.next = str;
298 		pstate->s_da.limit = str + arg;
299 		code = scan_bin_string_continue(i_ctx_p, s, pref, pstate);
300 		if (code == scan_Refill || code < 0) {
301 		    pstate->s_da.is_dynamic = true;
302 		    make_null(&pbs->bin_array);		/* clean up for GC */
303 		    pbs->cont = scan_bin_string_continue;
304 		}
305 		return code;
306 	    }
307 	case BT_LITNAME_SYSTEM:
308 	    code = scan_bin_get_name(imemory, system_names_p, p[1], pref);
309 	    goto lname;
310 	case BT_EXECNAME_SYSTEM:
311 	    code = scan_bin_get_name(imemory, system_names_p, p[1], pref);
312 	    goto xname;
313 	case BT_LITNAME_USER:
314 	    code = scan_bin_get_name(imemory, user_names_p, p[1], pref);
315 	  lname:
316 	    if (code < 0)
317 		return code;
318 	    if (!r_has_type(pref, t_name))
319 		return_error(e_undefined);
320 	    s_end_inline(s, p + 1, rlimit);
321 	    return 0;
322 	case BT_EXECNAME_USER:
323 	    code = scan_bin_get_name(imemory, user_names_p, p[1], pref);
324 	  xname:
325 	    if (code < 0)
326 		return code;
327 	    if (!r_has_type(pref, t_name))
328 		return_error(e_undefined);
329 	    r_set_attrs(pref, a_executable);
330 	    s_end_inline(s, p + 1, rlimit);
331 	    return 0;
332 	case BT_NUM_ARRAY:
333 	    num_format = p[1];
334 	    if (!num_is_valid(num_format))
335 		return_error(e_syntaxerror);
336 	    arg = sdecodeushort(p + 2, num_format);
337 	    code = ialloc_ref_array(&pbs->bin_array, a_all, arg,
338 				    "number array token");
339 	    if (code < 0)
340 		return code;
341 	    pbs->num_format = num_format;
342 	    pbs->index = 0;
343 	    p += 3;
344 	    s_end_inline(s, p, rlimit);
345 	    code = scan_bin_num_array_continue(i_ctx_p, s, pref, pstate);
346 	    if (code == scan_Refill || code < 0) {
347 		/* Make sure the array is clean for the GC. */
348 		refset_null(pbs->bin_array.value.refs + pbs->index,
349 			    arg - pbs->index);
350 		pbs->cont = scan_bin_num_array_continue;
351 	    }
352 	    return code;
353     }
354     return_error(e_syntaxerror);
355 }
356 
357 /* Get a system or user name. */
358 private int
scan_bin_get_name(const gs_memory_t * mem,const ref * pnames,int index,ref * pref)359 scan_bin_get_name(const gs_memory_t *mem, const ref *pnames /*t_array*/, int index, ref *pref)
360 {
361     if (pnames == 0)
362 	return_error(e_rangecheck);
363     return array_get(mem, pnames, (long)index, pref);
364 }
365 
366 /* Continue collecting a binary string. */
367 private int
scan_bin_string_continue(i_ctx_t * i_ctx_p,stream * s,ref * pref,scanner_state * pstate)368 scan_bin_string_continue(i_ctx_t *i_ctx_p, stream * s, ref * pref,
369 			 scanner_state * pstate)
370 {
371     byte *q = pstate->s_da.next;
372     uint wanted = pstate->s_da.limit - q;
373     uint rcnt;
374 
375     /* We don't check the return status from 'sgets' here.
376        If there is an error in sgets, the condition rcnt==wanted
377        would be false and this function will return scan_Refill.
378     */
379     sgets(s, q, wanted, &rcnt);
380     if (rcnt == wanted) {
381 	/* Finished collecting the string. */
382 	make_string(pref, a_all | icurrent_space,
383 		    pstate->s_da.limit - pstate->s_da.base,
384 		    pstate->s_da.base);
385 	return 0;
386     }
387     pstate->s_da.next = q + rcnt;
388     pstate->s_scan_type = scanning_binary;
389     return scan_Refill;
390 }
391 
392 /* Continue scanning a binary number array. */
393 private int
scan_bin_num_array_continue(i_ctx_t * i_ctx_p,stream * s,ref * pref,scanner_state * pstate)394 scan_bin_num_array_continue(i_ctx_t *i_ctx_p, stream * s, ref * pref,
395 			    scanner_state * pstate)
396 {
397     scan_binary_state *const pbs = &pstate->s_ss.binary;
398     uint index = pbs->index;
399     ref *np = pbs->bin_array.value.refs + index;
400     uint wanted = encoded_number_bytes(pbs->num_format);
401 
402     for (; index < r_size(&pbs->bin_array); index++, np++) {
403 	int code;
404 
405 	if (sbufavailable(s) < wanted) {
406 	    pbs->index = index;
407 	    pstate->s_scan_type = scanning_binary;
408 	    return scan_Refill;
409 	}
410 	code = sdecode_number(sbufptr(s), pbs->num_format, np);
411 	switch (code) {
412 	    case t_integer:
413 	    case t_real:
414 		r_set_type(np, code);
415 		sbufskip(s, wanted);
416 		break;
417 	    case t_null:
418 		return_error(e_syntaxerror);
419 	    default:
420 		return code;
421 	}
422     }
423     *pref = pbs->bin_array;
424     return 0;
425 }
426 
427 /*
428  * Continue scanning a binary object sequence.  We preallocated space for
429  * the largest possible number of objects, but not for strings, since
430  * the latter would probably be a gross over-estimate.  Instead,
431  * we wait until we see the first string or name, and allocate string space
432  * based on the hope that its string index is the smallest one we will see.
433  * If this turns out to be wrong, we may have to reallocate, and adjust
434  * all the pointers.
435  */
436 private int
scan_bos_continue(i_ctx_t * i_ctx_p,register stream * s,ref * pref,scanner_state * pstate)437 scan_bos_continue(i_ctx_t *i_ctx_p, register stream * s, ref * pref,
438 		  scanner_state * pstate)
439 {
440     scan_binary_state *const pbs = &pstate->s_ss.binary;
441     s_declare_inline(s, p, rlimit);
442     uint max_array_index = pbs->max_array_index;
443     uint min_string_index = pbs->min_string_index;
444     int num_format = pbs->num_format;
445     uint index = pbs->index;
446     uint size = pbs->size;
447     ref *abase = pbs->bin_array.value.refs;
448     int code;
449 
450     pbs->cont = scan_bos_continue;  /* in case of premature return */
451     s_begin_inline(s, p, rlimit);
452     for (; index < max_array_index; p += SIZEOF_BIN_SEQ_OBJ, index++) {
453 	ref *op = abase + index;
454 	uint osize;
455 	long value;
456 	uint atype, attrs;
457 
458 	s_end_inline(s, p, rlimit);	/* in case of error */
459 	if (rlimit - p < SIZEOF_BIN_SEQ_OBJ) {
460 	    pbs->index = index;
461 	    pbs->max_array_index = max_array_index;
462 	    pbs->min_string_index = min_string_index;
463 	    pstate->s_scan_type = scanning_binary;
464 	    return scan_Refill;
465 	}
466 	if (p[2] != 0) /* reserved, must be 0 */
467 	    return_error(e_syntaxerror);
468 	attrs = (p[1] & 128 ? a_executable : 0);
469 	switch (p[1] & 0x7f) {
470 	    case BS_TYPE_NULL:
471 		make_null(op);
472 		break;
473 	    case BS_TYPE_INTEGER:
474 		make_int(op, sdecodelong(p + 5, num_format));
475 		break;
476 	    case BS_TYPE_REAL:{
477 		    float vreal;
478 
479 		    osize = sdecodeushort(p + 3, num_format);
480 		    if (osize != 0) {	/* fixed-point number */
481 			value = sdecodelong(p + 5, num_format);
482 			/* ldexp requires a signed 2nd argument.... */
483 			vreal = (float)ldexp((double)value, -(int)osize);
484 		    } else {
485 			vreal = sdecodefloat(p + 5, num_format);
486 		    }
487 		    make_real(op, vreal);
488 		    break;
489 		}
490 	    case BS_TYPE_BOOLEAN:
491 		make_bool(op, sdecodelong(p + 5, num_format) != 0);
492 		break;
493 	    case BS_TYPE_STRING:
494 		osize = sdecodeushort(p + 3, num_format);
495 		attrs |= a_all;
496 	      str:
497 		if (osize == 0) {
498 		    /* For zero-length strings, the offset */
499 		    /* doesn't matter, and may be zero. */
500 		    make_empty_string(op, attrs);
501 		    break;
502 		}
503 		value = sdecodelong(p + 5, num_format);
504 		if (value < max_array_index * SIZEOF_BIN_SEQ_OBJ ||
505 		    value + osize > size
506 		    )
507 		    return_error(e_syntaxerror);
508 		if (value < min_string_index) {
509 		    /* We have to (re)allocate the strings. */
510 		    uint str_size = size - value;
511 		    byte *sbase;
512 
513 		    if (pstate->s_da.is_dynamic)
514 			sbase = scan_bos_resize(i_ctx_p, pstate, str_size,
515 						index);
516 		    else
517 			sbase = ialloc_string(str_size,
518 					      "bos strings");
519 		    if (sbase == 0)
520 			return_error(e_VMerror);
521 		    pstate->s_da.is_dynamic = true;
522 		    pstate->s_da.base = pstate->s_da.next = sbase;
523 		    pstate->s_da.limit = sbase + str_size;
524 		    min_string_index = value;
525 		}
526 		make_string(op, attrs | icurrent_space, osize,
527 			    pstate->s_da.base +
528 			    (value - min_string_index));
529 		break;
530 	    case BS_TYPE_EVAL_NAME:
531 		attrs |= a_readonly;	/* mark as executable for later */
532 		/* falls through */
533 	    case BS_TYPE_NAME:
534 		osize = sdecodeushort(p + 3, num_format);
535 		value = sdecodelong(p + 5, num_format);
536 		switch (osize) {
537 		    case 0:
538 			code = array_get(imemory, user_names_p, value, op);
539 			goto usn;
540 		    case 0xffff:
541 			code = array_get(imemory, system_names_p, value, op);
542 		      usn:
543 			if (code < 0)
544 			    return code;
545 			if (!r_has_type(op, t_name))
546 			    return_error(e_undefined);
547 			r_set_attrs(op, attrs);
548 			break;
549 		    default:
550 			goto str;
551 		}
552 		break;
553 	    case BS_TYPE_ARRAY:
554 		osize = sdecodeushort(p + 3, num_format);
555 		atype = t_array;
556 	      arr:
557 		value = sdecodelong(p + 5, num_format);
558 		if (value + osize > min_string_index ||
559 		    value & (SIZEOF_BIN_SEQ_OBJ - 1)
560 		    )
561 		    return_error(e_syntaxerror);
562 		{
563 		    uint aindex = value / SIZEOF_BIN_SEQ_OBJ;
564 
565 		    max_array_index =
566 			max(max_array_index, aindex + osize);
567 		    make_tasv_new(op, atype,
568 				  attrs | a_all | icurrent_space,
569 				  osize, refs, abase + aindex);
570 		}
571 		break;
572 	    case BS_TYPE_DICTIONARY:	/* EXTENSION */
573 		osize = sdecodeushort(p + 3, num_format);
574 		if ((osize & 1) != 0 && osize != 1)
575 		    return_error(e_syntaxerror);
576 		atype = t_mixedarray;	/* mark as dictionary */
577 		goto arr;
578 	    case BS_TYPE_MARK:
579 		make_mark(op);
580 		break;
581 	    default:
582 		return_error(e_syntaxerror);
583 	}
584     }
585     s_end_inline(s, p, rlimit);
586     /* Shorten the objects to remove the space that turned out */
587     /* to be used for strings. */
588     pbs->index = max_array_index;
589     iresize_ref_array(&pbs->bin_array, max_array_index,
590 		      "binary object sequence(objects)");
591     code = scan_bos_string_continue(i_ctx_p, s, pref, pstate);
592     if (code == scan_Refill)
593 	pbs->cont = scan_bos_string_continue;
594     return code;
595 }
596 
597 /* Reallocate the strings for a binary object sequence, */
598 /* adjusting all the pointers to them from objects. */
599 private byte *
scan_bos_resize(i_ctx_t * i_ctx_p,scanner_state * pstate,uint new_size,uint index)600 scan_bos_resize(i_ctx_t *i_ctx_p, scanner_state * pstate, uint new_size,
601 		uint index)
602 {
603     scan_binary_state *const pbs = &pstate->s_ss.binary;
604     uint old_size = da_size(&pstate->s_da);
605     byte *old_base = pstate->s_da.base;
606     byte *new_base = iresize_string(old_base, old_size, new_size,
607 				    "scan_bos_resize");
608     byte *relocated_base = new_base + (new_size - old_size);
609     uint i;
610     ref *aptr = pbs->bin_array.value.refs;
611 
612     if (new_base == 0)
613 	return 0;
614     /* Since the allocator normally extends strings downward, */
615     /* it's quite possible that new and old addresses are the same. */
616     if (relocated_base != old_base)
617 	for (i = index; i != 0; i--, aptr++)
618 	    if (r_has_type(aptr, t_string) && r_size(aptr) != 0)
619 		aptr->value.bytes =
620 		    aptr->value.bytes - old_base + relocated_base;
621     return new_base;
622 }
623 
624 /* Continue reading the strings for a binary object sequence. */
625 private int
scan_bos_string_continue(i_ctx_t * i_ctx_p,register stream * s,ref * pref,scanner_state * pstate)626 scan_bos_string_continue(i_ctx_t *i_ctx_p, register stream * s, ref * pref,
627 			 scanner_state * pstate)
628 {
629     scan_binary_state *const pbs = &pstate->s_ss.binary;
630     ref rstr;
631     ref *op;
632     int code = scan_bin_string_continue(i_ctx_p, s, &rstr, pstate);
633     uint space = ialloc_space(idmemory);
634     bool rescan = false;
635     uint i;
636 
637     if (code != 0)
638 	return code;
639 
640     /* Fix up names.  We must do this before creating dictionaries. */
641 
642     for (op = pbs->bin_array.value.refs, i = r_size(&pbs->bin_array);
643 	 i != 0; i--, op++
644 	 )
645 	switch (r_type(op)) {
646 	    case t_string:
647 		if (r_has_attr(op, a_write))	/* a real string */
648 		    break;
649 		/* This is actually a name; look it up now. */
650 		{
651 		    uint attrs =
652 		    (r_has_attr(op, a_executable) ? a_executable : 0);
653 
654 		    code = name_ref(imemory, op->value.bytes, r_size(op), op, 1);
655 		    if (code < 0)
656 			return code;
657 		    r_set_attrs(op, attrs);
658 		}
659 		/* falls through */
660 	    case t_name:
661 		if (r_has_attr(op, a_read)) {	/* BS_TYPE_EVAL_NAME */
662 		    ref *defp = dict_find_name(op);
663 
664 		    if (defp == 0)
665 			return_error(e_undefined);
666 		    store_check_space(space, defp);
667 		    ref_assign(op, defp);
668 		}
669 		break;
670 	    case t_mixedarray:	/* actually a dictionary */
671 		rescan = true;
672 	}
673 
674     /* Create dictionaries, if any. */
675 
676     if (rescan) {
677 	rescan = false;
678 	for (op = pbs->bin_array.value.refs, i = r_size(&pbs->bin_array);
679 	     i != 0; i--, op++
680 	     )
681 	    switch (r_type(op)) {
682 	    case t_mixedarray:	/* actually a dictionary */
683 		{
684 		    uint count = r_size(op);
685 		    ref rdict;
686 
687 		    if (count == 1) {
688 			/* Indirect reference. */
689 			if (op->value.refs < op)
690 			    ref_assign(&rdict, op->value.refs);
691 			else {
692 			    rescan = true;
693 			    continue;
694 			}
695 		    } else {
696 			code = dict_create(count >> 1, &rdict);
697 			if (code < 0)
698 			    return code;
699 			while (count) {
700 			    count -= 2;
701 			    code = idict_put(&rdict,
702 					     &op->value.refs[count],
703 					     &op->value.refs[count + 1]);
704 			    if (code < 0)
705 				return code;
706 			}
707 		    }
708 		    r_set_attrs(&rdict, a_all);
709 		    r_copy_attrs(&rdict, a_executable, op);
710 		    ref_assign(op, &rdict);
711 		}
712 		break;
713 	    }
714     }
715 
716     /* If there were any forward indirect references, fix them up now. */
717 
718     if (rescan)
719 	for (op = pbs->bin_array.value.refs, i = r_size(&pbs->bin_array);
720 	     i != 0; i--, op++
721 	    )
722 	    if (r_has_type(op, t_mixedarray)) {
723 		const ref *piref = op->value.const_refs;
724 		ref rdict;
725 
726 		if (r_has_type(piref, t_mixedarray))	/* ref to indirect */
727 		    return_error(e_syntaxerror);
728 		ref_assign(&rdict, piref);
729 		r_copy_attrs(&rdict, a_executable, op);
730 		ref_assign(op, &rdict);
731 	    }
732 
733     ref_assign(pref, &pbs->bin_array);
734     r_set_size(pref, pbs->top_size);
735     return scan_BOS;
736 }
737 
738 /* ---------------- Writing ---------------- */
739 
740 int
encode_binary_token(i_ctx_t * i_ctx_p,const ref * obj,long * ref_offset,long * char_offset,byte * str)741 encode_binary_token(i_ctx_t *i_ctx_p, const ref *obj, long *ref_offset,
742 		    long *char_offset, byte *str)
743 {
744     bin_seq_type_t type;
745     uint size = 0;
746     int format = (int)ref_binary_object_format.value.intval;
747     long value;
748     ref nstr;
749 
750     switch (r_type(obj)) {
751 	case t_null:
752 	    type = BS_TYPE_NULL;
753 	    goto tx;
754 	case t_mark:
755 	    type = BS_TYPE_MARK;
756 	    goto tx;
757 	case t_integer:
758 	    type = BS_TYPE_INTEGER;
759 	    value = obj->value.intval;
760 	    break;
761 	case t_real:
762 	    type = BS_TYPE_REAL;
763 	    if (sizeof(obj->value.realval) != sizeof(int)) {
764 		/* The PLRM allocates exactly 4 bytes for reals. */
765 		return_error(e_rangecheck);
766 	    }
767 	    value = *(const int *)&obj->value.realval;
768 #if !(ARCH_FLOATS_ARE_IEEE && BYTE_SWAP_IEEE_NATIVE_REALS)
769 	    if (format >= 3) {
770 		/* Never byte-swap native reals -- use native byte order. */
771 		format = 4 - ARCH_IS_BIG_ENDIAN;
772 	    }
773 #endif
774 	    break;
775 	case t_boolean:
776 	    type = BS_TYPE_BOOLEAN;
777 	    value = obj->value.boolval;
778 	    break;
779 	case t_array:
780 	    type = BS_TYPE_ARRAY;
781 	    size = r_size(obj);
782 	    goto aod;
783 	case t_dictionary:	/* EXTENSION */
784 	    type = BS_TYPE_DICTIONARY;
785 	    size = dict_length(obj) << 1;
786 	  aod:value = *ref_offset;
787 	    *ref_offset += size * (ulong) SIZEOF_BIN_SEQ_OBJ;
788 	    break;
789 	case t_string:
790 	    type = BS_TYPE_STRING;
791 nos:
792 	    size = r_size(obj);
793 	    value = *char_offset;
794 	    *char_offset += size;
795 	    break;
796 	case t_name:
797 	    type = BS_TYPE_NAME;
798 	    name_string_ref(imemory, obj, &nstr);
799 	    r_copy_attrs(&nstr, a_executable, obj);
800 	    obj = &nstr;
801 	    goto nos;
802 	default:
803 	    return_error(e_rangecheck);
804     }
805     {
806 	byte s0 = (byte) size, s1 = (byte) (size >> 8);
807 	byte v0 = (byte) value, v1 = (byte) (value >> 8),
808 	    v2 = (byte) (value >> 16), v3 = (byte) (value >> 24);
809 
810 	if (format & 1) {
811 	    /* Store big-endian */
812 	    str[2] = s1, str[3] = s0;
813 	    str[4] = v3, str[5] = v2, str[6] = v1, str[7] = v0;
814 	} else {
815 	    /* Store little-endian */
816 	    str[2] = s0, str[3] = s1;
817 	    str[4] = v0, str[5] = v1, str[6] = v2, str[7] = v3;
818 	}
819     }
820 tx:
821     if (r_has_attr(obj, a_executable))
822 	type += BS_EXECUTABLE;
823     str[0] = (byte) type;
824     return 0;
825 }
826