1 /* Copyright (C) 1994, 1995, 1997, 1998, 1999 Aladdin Enterprises. All rights reserved.
2
3 This software is provided AS-IS with no warranty, either express or
4 implied.
5
6 This software is distributed under license and may not be copied,
7 modified or distributed except as expressly authorized under the terms
8 of the license contained in the file LICENSE in this distribution.
9
10 For more information about licensing, please refer to
11 http://www.ghostscript.com/licensing/. For information on
12 commercial licensing, go to http://www.artifex.com/licensing/ or
13 contact Artifex Software, Inc., 101 Lucas Valley Road #110,
14 San Rafael, CA 94903, U.S.A., +1(415)492-9861.
15 */
16
17 /* $Id: iscannum.c,v 1.10 2004/09/15 19:41:01 ray Exp $ */
18 /* Number scanner for Ghostscript interpreter */
19 #include "math_.h"
20 #include "ghost.h"
21 #include "ierrors.h"
22 #include "scommon.h"
23 #include "iscannum.h" /* defines interface */
24 #include "scanchar.h"
25 #include "store.h"
26
27 /*
28 * Warning: this file has a "spaghetti" control structure. But since this
29 * code accounts for over 10% of the execution time of some PostScript
30 * files, this is one of the few places we feel this is justified.
31 */
32
33 /*
34 * Scan a number. If the number consumes the entire string, return 0;
35 * if not, set *psp to the first character beyond the number and return 1.
36 */
37 int
scan_number(const byte * str,const byte * end,int sign,ref * pref,const byte ** psp,const bool PDFScanInvNum)38 scan_number(const byte * str, const byte * end, int sign,
39 ref * pref, const byte ** psp, const bool PDFScanInvNum)
40 {
41 const byte *sp = str;
42 #define GET_NEXT(cvar, sp, end_action)\
43 if (sp >= end) { end_action; } else cvar = *sp++
44
45 /*
46 * Powers of 10 up to 6 can be represented accurately as
47 * a single-precision float.
48 */
49 #define NUM_POWERS_10 6
50 static const float powers_10[NUM_POWERS_10 + 1] = {
51 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6
52 };
53 static const double neg_powers_10[NUM_POWERS_10 + 1] = {
54 1e0, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6
55 };
56
57 int ival;
58 long lval;
59 double dval;
60 int exp10;
61 int code = 0;
62 int c, d;
63 const byte *const decoder = scan_char_decoder;
64 #define IS_DIGIT(d, c)\
65 ((d = decoder[c]) < 10)
66 #define WOULD_OVERFLOW(val, d, maxv)\
67 (val >= maxv / 10 && (val > maxv / 10 || d > (int)(maxv % 10)))
68
69 GET_NEXT(c, sp, return_error(e_syntaxerror));
70 if (!IS_DIGIT(d, c)) {
71 if (c != '.')
72 return_error(e_syntaxerror);
73 /* Might be a number starting with '.'. */
74 GET_NEXT(c, sp, return_error(e_syntaxerror));
75 if (!IS_DIGIT(d, c))
76 return_error(e_syntaxerror);
77 ival = 0;
78 goto i2r;
79 }
80 /* Accumulate an integer in ival. */
81 /* Do up to 4 digits without a loop, */
82 /* since we know this can't overflow and since */
83 /* most numbers have 4 (integer) digits or fewer. */
84 ival = d;
85 if (end - sp >= 3) { /* just check once */
86 if (!IS_DIGIT(d, (c = *sp))) {
87 sp++;
88 goto ind;
89 }
90 ival = ival * 10 + d;
91 if (!IS_DIGIT(d, (c = sp[1]))) {
92 sp += 2;
93 goto ind;
94 }
95 ival = ival * 10 + d;
96 sp += 3;
97 if (!IS_DIGIT(d, (c = sp[-1])))
98 goto ind;
99 ival = ival * 10 + d;
100 }
101 for (;; ival = ival * 10 + d) {
102 GET_NEXT(c, sp, goto iret);
103 if (!IS_DIGIT(d, c))
104 break;
105 if (WOULD_OVERFLOW(ival, d, max_int))
106 goto i2l;
107 }
108 ind: /* We saw a non-digit while accumulating an integer in ival. */
109 switch (c) {
110 case '.':
111 GET_NEXT(c, sp, c = EOFC);
112 goto i2r;
113 default:
114 *psp = sp;
115 code = 1;
116 break;
117 case 'e':
118 case 'E':
119 if (sign < 0)
120 ival = -ival;
121 dval = ival;
122 exp10 = 0;
123 goto fe;
124 case '#':
125 {
126 const uint radix = (uint)ival;
127 ulong uval = 0, lmax;
128
129 if (sign || radix < min_radix || radix > max_radix)
130 return_error(e_syntaxerror);
131 /* Avoid multiplies for power-of-2 radix. */
132 if (!(radix & (radix - 1))) {
133 int shift;
134
135 switch (radix) {
136 case 2:
137 shift = 1, lmax = max_ulong >> 1;
138 break;
139 case 4:
140 shift = 2, lmax = max_ulong >> 2;
141 break;
142 case 8:
143 shift = 3, lmax = max_ulong >> 3;
144 break;
145 case 16:
146 shift = 4, lmax = max_ulong >> 4;
147 break;
148 case 32:
149 shift = 5, lmax = max_ulong >> 5;
150 break;
151 default: /* can't happen */
152 return_error(e_rangecheck);
153 }
154 for (;; uval = (uval << shift) + d) {
155 GET_NEXT(c, sp, break);
156 d = decoder[c];
157 if (d >= radix) {
158 *psp = sp;
159 code = 1;
160 break;
161 }
162 if (uval > lmax)
163 return_error(e_limitcheck);
164 }
165 } else {
166 int lrem = max_ulong % radix;
167
168 lmax = max_ulong / radix;
169 for (;; uval = uval * radix + d) {
170 GET_NEXT(c, sp, break);
171 d = decoder[c];
172 if (d >= radix) {
173 *psp = sp;
174 code = 1;
175 break;
176 }
177 if (uval >= lmax &&
178 (uval > lmax || d > lrem)
179 )
180 return_error(e_limitcheck);
181 }
182 }
183 make_int(pref, uval);
184 return code;
185 }
186 }
187 iret:
188 make_int(pref, (sign < 0 ? -ival : ival));
189 return code;
190
191 /* Accumulate a long in lval. */
192 i2l:
193 for (lval = ival;;) {
194 if (WOULD_OVERFLOW(lval, d, max_long)) {
195 /* Make a special check for entering the smallest */
196 /* (most negative) integer. */
197 if (lval == max_long / 10 &&
198 d == (int)(max_long % 10) + 1 && sign < 0
199 ) {
200 GET_NEXT(c, sp, c = EOFC);
201 dval = -(double)min_long;
202 if (c == 'e' || c == 'E') {
203 exp10 = 0;
204 goto fs;
205 } else if (c == '.') {
206 GET_NEXT(c, sp, c = EOFC);
207 exp10 = 0;
208 goto fd;
209 } else if (!IS_DIGIT(d, c)) {
210 lval = min_long;
211 break;
212 }
213 } else
214 dval = lval;
215 goto l2d;
216 }
217 lval = lval * 10 + d;
218 GET_NEXT(c, sp, goto lret);
219 if (!IS_DIGIT(d, c))
220 break;
221 }
222 switch (c) {
223 case '.':
224 GET_NEXT(c, sp, c = EOFC);
225 exp10 = 0;
226 goto l2r;
227 case EOFC:
228 break;
229 default:
230 *psp = sp;
231 code = 1;
232 break;
233 case 'e':
234 case 'E':
235 exp10 = 0;
236 goto le;
237 case '#':
238 return_error(e_syntaxerror);
239 }
240 lret:
241 make_int(pref, (sign < 0 ? -lval : lval));
242 return code;
243
244 /* Accumulate a double in dval. */
245 l2d:
246 exp10 = 0;
247 for (;;) {
248 dval = dval * 10 + d;
249 GET_NEXT(c, sp, c = EOFC);
250 if (!IS_DIGIT(d, c))
251 break;
252 }
253 switch (c) {
254 case '.':
255 GET_NEXT(c, sp, c = EOFC);
256 exp10 = 0;
257 goto fd;
258 default:
259 *psp = sp;
260 code = 1;
261 /* falls through */
262 case EOFC:
263 if (sign < 0)
264 dval = -dval;
265 goto rret;
266 case 'e':
267 case 'E':
268 exp10 = 0;
269 goto fs;
270 case '#':
271 return_error(e_syntaxerror);
272 }
273
274 /* We saw a '.' while accumulating an integer in ival. */
275 i2r:
276 exp10 = 0;
277 while (IS_DIGIT(d, c) || c == '-') {
278 /*
279 * PostScript gives an error on numbers with a '-' following a '.'
280 * Adobe Acrobat Reader (PDF) apparently doesn't treat this as an
281 * error. Experiments show that the numbers following the '-' are
282 * ignored, so we swallow the fractional part. PDFScanInvNum enables
283 * this compatibility kloodge.
284 */
285 if (c == '-') {
286 if (!PDFScanInvNum)
287 break;
288 do {
289 GET_NEXT(c, sp, c = EOFC);
290 } while (IS_DIGIT(d, c));
291 break;
292 }
293 if (WOULD_OVERFLOW(ival, d, max_int)) {
294 lval = ival;
295 goto l2r;
296 }
297 ival = ival * 10 + d;
298 exp10--;
299 GET_NEXT(c, sp, c = EOFC);
300 }
301 if (sign < 0)
302 ival = -ival;
303 /* Take a shortcut for the common case */
304 if (!(c == 'e' || c == 'E' || exp10 < -NUM_POWERS_10)) { /* Check for trailing garbage */
305 if (c != EOFC)
306 *psp = sp, code = 1;
307 make_real(pref, ival * neg_powers_10[-exp10]);
308 return code;
309 }
310 dval = ival;
311 goto fe;
312
313 /* We saw a '.' while accumulating a long in lval. */
314 l2r:
315 while (IS_DIGIT(d, c) || c == '-') {
316 /* Handle bogus '-' following '.' as in i2r above. */
317 if (c == '-') {
318 if (!PDFScanInvNum)
319 break;
320 do {
321 GET_NEXT(c, sp, c = EOFC);
322 } while (IS_DIGIT(d, c));
323 break;
324 }
325 if (WOULD_OVERFLOW(lval, d, max_long)) {
326 dval = lval;
327 goto fd;
328 }
329 lval = lval * 10 + d;
330 exp10--;
331 GET_NEXT(c, sp, c = EOFC);
332 }
333 le:
334 if (sign < 0)
335 lval = -lval;
336 dval = lval;
337 goto fe;
338
339 /* Now we are accumulating a double in dval. */
340 fd:
341 while (IS_DIGIT(d, c)) {
342 dval = dval * 10 + d;
343 exp10--;
344 GET_NEXT(c, sp, c = EOFC);
345 }
346 fs:
347 if (sign < 0)
348 dval = -dval;
349 fe:
350 /* Now dval contains the value, negated if necessary. */
351 switch (c) {
352 case 'e':
353 case 'E':
354 { /* Check for a following exponent. */
355 int esign = 0;
356 int iexp;
357
358 GET_NEXT(c, sp, return_error(e_syntaxerror));
359 switch (c) {
360 case '-':
361 esign = 1;
362 case '+':
363 GET_NEXT(c, sp, return_error(e_syntaxerror));
364 }
365 /* Scan the exponent. We limit it arbitrarily to 999. */
366 if (!IS_DIGIT(d, c))
367 return_error(e_syntaxerror);
368 iexp = d;
369 for (;; iexp = iexp * 10 + d) {
370 GET_NEXT(c, sp, break);
371 if (!IS_DIGIT(d, c)) {
372 *psp = sp;
373 code = 1;
374 break;
375 }
376 if (iexp > 99)
377 return_error(e_limitcheck);
378 }
379 if (esign)
380 exp10 -= iexp;
381 else
382 exp10 += iexp;
383 break;
384 }
385 default:
386 *psp = sp;
387 code = 1;
388 case EOFC:
389 ;
390 }
391 /* Compute dval * 10^exp10. */
392 if (exp10 > 0) {
393 while (exp10 > NUM_POWERS_10)
394 dval *= powers_10[NUM_POWERS_10],
395 exp10 -= NUM_POWERS_10;
396 if (exp10 > 0)
397 dval *= powers_10[exp10];
398 } else if (exp10 < 0) {
399 while (exp10 < -NUM_POWERS_10)
400 dval /= powers_10[NUM_POWERS_10],
401 exp10 += NUM_POWERS_10;
402 if (exp10 < 0)
403 dval /= powers_10[-exp10];
404 }
405 /*
406 * Check for an out-of-range result. Currently we don't check for
407 * absurdly large numbers of digits in the accumulation loops,
408 * but we should.
409 */
410 if (dval >= 0) {
411 if (dval > MAX_FLOAT)
412 return_error(e_limitcheck);
413 } else {
414 if (dval < -MAX_FLOAT)
415 return_error(e_limitcheck);
416 }
417 rret:
418 make_real(pref, dval);
419 return code;
420 }
421