xref: /plan9/sys/src/cmd/gs/src/iscannum.c (revision 593dc095aefb2a85c828727bbfa9da139a49bdf4)
1 /* Copyright (C) 1994, 1995, 1997, 1998, 1999 Aladdin Enterprises.  All rights reserved.
2 
3   This software is provided AS-IS with no warranty, either express or
4   implied.
5 
6   This software is distributed under license and may not be copied,
7   modified or distributed except as expressly authorized under the terms
8   of the license contained in the file LICENSE in this distribution.
9 
10   For more information about licensing, please refer to
11   http://www.ghostscript.com/licensing/. For information on
12   commercial licensing, go to http://www.artifex.com/licensing/ or
13   contact Artifex Software, Inc., 101 Lucas Valley Road #110,
14   San Rafael, CA  94903, U.S.A., +1(415)492-9861.
15 */
16 
17 /* $Id: iscannum.c,v 1.10 2004/09/15 19:41:01 ray Exp $ */
18 /* Number scanner for Ghostscript interpreter */
19 #include "math_.h"
20 #include "ghost.h"
21 #include "ierrors.h"
22 #include "scommon.h"
23 #include "iscannum.h"		/* defines interface */
24 #include "scanchar.h"
25 #include "store.h"
26 
27 /*
28  * Warning: this file has a "spaghetti" control structure.  But since this
29  * code accounts for over 10% of the execution time of some PostScript
30  * files, this is one of the few places we feel this is justified.
31  */
32 
33 /*
34  * Scan a number.  If the number consumes the entire string, return 0;
35  * if not, set *psp to the first character beyond the number and return 1.
36  */
37 int
scan_number(const byte * str,const byte * end,int sign,ref * pref,const byte ** psp,const bool PDFScanInvNum)38 scan_number(const byte * str, const byte * end, int sign,
39 	    ref * pref, const byte ** psp, const bool PDFScanInvNum)
40 {
41     const byte *sp = str;
42 #define GET_NEXT(cvar, sp, end_action)\
43   if (sp >= end) { end_action; } else cvar = *sp++
44 
45     /*
46      * Powers of 10 up to 6 can be represented accurately as
47      * a single-precision float.
48      */
49 #define NUM_POWERS_10 6
50     static const float powers_10[NUM_POWERS_10 + 1] = {
51 	1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6
52     };
53     static const double neg_powers_10[NUM_POWERS_10 + 1] = {
54 	1e0, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6
55     };
56 
57     int ival;
58     long lval;
59     double dval;
60     int exp10;
61     int code = 0;
62     int c, d;
63     const byte *const decoder = scan_char_decoder;
64 #define IS_DIGIT(d, c)\
65   ((d = decoder[c]) < 10)
66 #define WOULD_OVERFLOW(val, d, maxv)\
67   (val >= maxv / 10 && (val > maxv / 10 || d > (int)(maxv % 10)))
68 
69     GET_NEXT(c, sp, return_error(e_syntaxerror));
70     if (!IS_DIGIT(d, c)) {
71 	if (c != '.')
72 	    return_error(e_syntaxerror);
73 	/* Might be a number starting with '.'. */
74 	GET_NEXT(c, sp, return_error(e_syntaxerror));
75 	if (!IS_DIGIT(d, c))
76 	    return_error(e_syntaxerror);
77 	ival = 0;
78 	goto i2r;
79     }
80     /* Accumulate an integer in ival. */
81     /* Do up to 4 digits without a loop, */
82     /* since we know this can't overflow and since */
83     /* most numbers have 4 (integer) digits or fewer. */
84     ival = d;
85     if (end - sp >= 3) {	/* just check once */
86 	if (!IS_DIGIT(d, (c = *sp))) {
87 	    sp++;
88 	    goto ind;
89 	}
90 	ival = ival * 10 + d;
91 	if (!IS_DIGIT(d, (c = sp[1]))) {
92 	    sp += 2;
93 	    goto ind;
94 	}
95 	ival = ival * 10 + d;
96 	sp += 3;
97 	if (!IS_DIGIT(d, (c = sp[-1])))
98 	    goto ind;
99 	ival = ival * 10 + d;
100     }
101     for (;; ival = ival * 10 + d) {
102 	GET_NEXT(c, sp, goto iret);
103 	if (!IS_DIGIT(d, c))
104 	    break;
105 	if (WOULD_OVERFLOW(ival, d, max_int))
106 	    goto i2l;
107     }
108   ind:				/* We saw a non-digit while accumulating an integer in ival. */
109     switch (c) {
110 	case '.':
111 	    GET_NEXT(c, sp, c = EOFC);
112 	    goto i2r;
113 	default:
114 	    *psp = sp;
115 	    code = 1;
116 	    break;
117 	case 'e':
118 	case 'E':
119 	    if (sign < 0)
120 		ival = -ival;
121 	    dval = ival;
122 	    exp10 = 0;
123 	    goto fe;
124 	case '#':
125 	    {
126 		const uint radix = (uint)ival;
127 		ulong uval = 0, lmax;
128 
129 		if (sign || radix < min_radix || radix > max_radix)
130 		    return_error(e_syntaxerror);
131 		/* Avoid multiplies for power-of-2 radix. */
132 		if (!(radix & (radix - 1))) {
133 		    int shift;
134 
135 		    switch (radix) {
136 			case 2:
137 			    shift = 1, lmax = max_ulong >> 1;
138 			    break;
139 			case 4:
140 			    shift = 2, lmax = max_ulong >> 2;
141 			    break;
142 			case 8:
143 			    shift = 3, lmax = max_ulong >> 3;
144 			    break;
145 			case 16:
146 			    shift = 4, lmax = max_ulong >> 4;
147 			    break;
148 			case 32:
149 			    shift = 5, lmax = max_ulong >> 5;
150 			    break;
151 			default:	/* can't happen */
152 			    return_error(e_rangecheck);
153 		    }
154 		    for (;; uval = (uval << shift) + d) {
155 			GET_NEXT(c, sp, break);
156 			d = decoder[c];
157 			if (d >= radix) {
158 			    *psp = sp;
159 			    code = 1;
160 			    break;
161 			}
162 			if (uval > lmax)
163 			    return_error(e_limitcheck);
164 		    }
165 		} else {
166 		    int lrem = max_ulong % radix;
167 
168 		    lmax = max_ulong / radix;
169 		    for (;; uval = uval * radix + d) {
170 			GET_NEXT(c, sp, break);
171 			d = decoder[c];
172 			if (d >= radix) {
173 			    *psp = sp;
174 			    code = 1;
175 			    break;
176 			}
177 			if (uval >= lmax &&
178 			    (uval > lmax || d > lrem)
179 			    )
180 			    return_error(e_limitcheck);
181 		    }
182 		}
183 		make_int(pref, uval);
184 		return code;
185 	    }
186     }
187 iret:
188     make_int(pref, (sign < 0 ? -ival : ival));
189     return code;
190 
191     /* Accumulate a long in lval. */
192 i2l:
193     for (lval = ival;;) {
194 	if (WOULD_OVERFLOW(lval, d, max_long)) {
195 	    /* Make a special check for entering the smallest */
196 	    /* (most negative) integer. */
197 	    if (lval == max_long / 10 &&
198 		d == (int)(max_long % 10) + 1 && sign < 0
199 		) {
200 		GET_NEXT(c, sp, c = EOFC);
201 		dval = -(double)min_long;
202 		if (c == 'e' || c == 'E') {
203 		    exp10 = 0;
204 		    goto fs;
205 		} else if (c == '.') {
206                     GET_NEXT(c, sp, c = EOFC);
207 		    exp10 = 0;
208 		    goto fd;
209                 } else if (!IS_DIGIT(d, c)) {
210 		    lval = min_long;
211 		    break;
212 		}
213 	    } else
214 		dval = lval;
215 	    goto l2d;
216 	}
217 	lval = lval * 10 + d;
218 	GET_NEXT(c, sp, goto lret);
219 	if (!IS_DIGIT(d, c))
220 	    break;
221     }
222     switch (c) {
223 	case '.':
224 	    GET_NEXT(c, sp, c = EOFC);
225 	    exp10 = 0;
226 	    goto l2r;
227 	case EOFC:
228 	    break;
229 	default:
230 	    *psp = sp;
231 	    code = 1;
232 	    break;
233 	case 'e':
234 	case 'E':
235 	    exp10 = 0;
236 	    goto le;
237 	case '#':
238 	    return_error(e_syntaxerror);
239     }
240 lret:
241     make_int(pref, (sign < 0 ? -lval : lval));
242     return code;
243 
244     /* Accumulate a double in dval. */
245 l2d:
246     exp10 = 0;
247     for (;;) {
248 	dval = dval * 10 + d;
249 	GET_NEXT(c, sp, c = EOFC);
250 	if (!IS_DIGIT(d, c))
251 	    break;
252     }
253     switch (c) {
254 	case '.':
255 	    GET_NEXT(c, sp, c = EOFC);
256 	    exp10 = 0;
257 	    goto fd;
258 	default:
259 	    *psp = sp;
260 	    code = 1;
261 	    /* falls through */
262 	case EOFC:
263 	    if (sign < 0)
264 		dval = -dval;
265 	    goto rret;
266 	case 'e':
267 	case 'E':
268 	    exp10 = 0;
269 	    goto fs;
270 	case '#':
271 	    return_error(e_syntaxerror);
272     }
273 
274     /* We saw a '.' while accumulating an integer in ival. */
275 i2r:
276     exp10 = 0;
277     while (IS_DIGIT(d, c) || c == '-') {
278 	/*
279 	 * PostScript gives an error on numbers with a '-' following a '.'
280 	 * Adobe Acrobat Reader (PDF) apparently doesn't treat this as an
281 	 * error. Experiments show that the numbers following the '-' are
282 	 * ignored, so we swallow the fractional part. PDFScanInvNum enables
283 	 * this compatibility kloodge.
284 	 */
285 	if (c == '-') {
286 	    if (!PDFScanInvNum)
287 		break;
288 	    do {
289 		GET_NEXT(c, sp, c = EOFC);
290 	    } while (IS_DIGIT(d, c));
291 	    break;
292 	}
293 	if (WOULD_OVERFLOW(ival, d, max_int)) {
294 	    lval = ival;
295 	    goto l2r;
296 	}
297 	ival = ival * 10 + d;
298 	exp10--;
299 	GET_NEXT(c, sp, c = EOFC);
300     }
301     if (sign < 0)
302 	ival = -ival;
303     /* Take a shortcut for the common case */
304     if (!(c == 'e' || c == 'E' || exp10 < -NUM_POWERS_10)) {	/* Check for trailing garbage */
305 	if (c != EOFC)
306 	    *psp = sp, code = 1;
307 	make_real(pref, ival * neg_powers_10[-exp10]);
308 	return code;
309     }
310     dval = ival;
311     goto fe;
312 
313     /* We saw a '.' while accumulating a long in lval. */
314 l2r:
315     while (IS_DIGIT(d, c) || c == '-') {
316 	/* Handle bogus '-' following '.' as in i2r above.	*/
317 	if (c == '-') {
318 	    if (!PDFScanInvNum)
319 		break;
320 	    do {
321 		GET_NEXT(c, sp, c = EOFC);
322 	    } while (IS_DIGIT(d, c));
323 	    break;
324 	}
325 	if (WOULD_OVERFLOW(lval, d, max_long)) {
326 	    dval = lval;
327 	    goto fd;
328 	}
329 	lval = lval * 10 + d;
330 	exp10--;
331 	GET_NEXT(c, sp, c = EOFC);
332     }
333 le:
334     if (sign < 0)
335 	lval = -lval;
336     dval = lval;
337     goto fe;
338 
339     /* Now we are accumulating a double in dval. */
340 fd:
341     while (IS_DIGIT(d, c)) {
342 	dval = dval * 10 + d;
343 	exp10--;
344 	GET_NEXT(c, sp, c = EOFC);
345     }
346 fs:
347     if (sign < 0)
348 	dval = -dval;
349 fe:
350     /* Now dval contains the value, negated if necessary. */
351     switch (c) {
352 	case 'e':
353 	case 'E':
354 	    {			/* Check for a following exponent. */
355 		int esign = 0;
356 		int iexp;
357 
358 		GET_NEXT(c, sp, return_error(e_syntaxerror));
359 		switch (c) {
360 		    case '-':
361 			esign = 1;
362 		    case '+':
363 			GET_NEXT(c, sp, return_error(e_syntaxerror));
364 		}
365 		/* Scan the exponent.  We limit it arbitrarily to 999. */
366 		if (!IS_DIGIT(d, c))
367 		    return_error(e_syntaxerror);
368 		iexp = d;
369 		for (;; iexp = iexp * 10 + d) {
370 		    GET_NEXT(c, sp, break);
371 		    if (!IS_DIGIT(d, c)) {
372 			*psp = sp;
373 			code = 1;
374 			break;
375 		    }
376 		    if (iexp > 99)
377 			return_error(e_limitcheck);
378 		}
379 		if (esign)
380 		    exp10 -= iexp;
381 		else
382 		    exp10 += iexp;
383 		break;
384 	    }
385 	default:
386 	    *psp = sp;
387 	    code = 1;
388 	case EOFC:
389 	    ;
390     }
391     /* Compute dval * 10^exp10. */
392     if (exp10 > 0) {
393 	while (exp10 > NUM_POWERS_10)
394 	    dval *= powers_10[NUM_POWERS_10],
395 		exp10 -= NUM_POWERS_10;
396 	if (exp10 > 0)
397 	    dval *= powers_10[exp10];
398     } else if (exp10 < 0) {
399 	while (exp10 < -NUM_POWERS_10)
400 	    dval /= powers_10[NUM_POWERS_10],
401 		exp10 += NUM_POWERS_10;
402 	if (exp10 < 0)
403 	    dval /= powers_10[-exp10];
404     }
405     /*
406      * Check for an out-of-range result.  Currently we don't check for
407      * absurdly large numbers of digits in the accumulation loops,
408      * but we should.
409      */
410     if (dval >= 0) {
411 	if (dval > MAX_FLOAT)
412 	    return_error(e_limitcheck);
413     } else {
414 	if (dval < -MAX_FLOAT)
415 	    return_error(e_limitcheck);
416     }
417 rret:
418     make_real(pref, dval);
419     return code;
420 }
421