xref: /netbsd-src/external/bsd/file/dist/src/vasprintf.c (revision 6cf6fe02a981b55727c49c3d37b0d8191a98c0ee)
1 /*	$NetBSD: vasprintf.c,v 1.1.1.4 2014/06/13 01:48:21 christos Exp $	*/
2 /*
3  * Copyright (c) Ian F. Darwin 1986-1995.
4  * Software written by Ian F. Darwin and others;
5  * maintained 1995-present by Christos Zoulas and others.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice immediately at the beginning of the file, without modification,
12  *    this list of conditions, and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
21  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 /*###########################################################################
30   #                                                                           #
31   #                                vasprintf                                  #
32   #                                                                           #
33   #               Copyright (c) 2002-2005 David TAILLANDIER                   #
34   #                                                                           #
35   ###########################################################################*/
36 
37 /*
38 
39 This software is distributed under the "modified BSD licence".
40 
41 This software is also released with GNU license (GPL) in another file (same
42 source-code, only license differ).
43 
44 
45 
46 Redistribution and use in source and binary forms, with or without
47 modification, are permitted provided that the following conditions are met:
48 
49 Redistributions of source code must retain the above copyright notice, this
50 list of conditions and the following disclaimer. Redistributions in binary
51 form must reproduce the above copyright notice, this list of conditions and
52 the following disclaimer in the documentation and/or other materials
53 provided with the distribution. The name of the author may not be used to
54 endorse or promote products derived from this software without specific
55 prior written permission.
56 
57 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
58 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
59 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
60 EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
61 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
62 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
63 OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
64 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
65 OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
66 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
67 
68 ====================
69 
70 Hacked from xnprintf version of 26th February 2005 to provide only
71 vasprintf by Reuben Thomas <rrt@sc3d.org>.
72 
73 ====================
74 
75 
76 'printf' function family use the following format string:
77 
78 %[flag][width][.prec][modifier]type
79 
80 %% is the escape sequence to print a '%'
81 %  followed by an unknown format will print the characters without
82 trying to do any interpretation
83 
84 flag:   none   +     -     #     (blank)
85 width:  n    0n    *
86 prec:   none   .0    .n     .*
87 modifier:    F N L h l ll z t    ('F' and 'N' are ms-dos/16-bit specific)
88 type:  d i o u x X f e g E G c s p n
89 
90 
91 The function needs to allocate memory to store the full text before to
92 actually writting it.  i.e if you want to fnprintf() 1000 characters, the
93 functions will allocate 1000 bytes.
94 This behaviour can be modified: you have to customise the code to flush the
95 internal buffer (writing to screen or file) when it reach a given size. Then
96 the buffer can have a shorter length. But what? If you really need to write
97 HUGE string, don't use printf!
98 During the process, some other memory is allocated (1024 bytes minimum)
99 to handle the output of partial sprintf() calls. If you have only 10000 bytes
100 free in memory, you *may* not be able to nprintf() a 8000 bytes-long text.
101 
102 note: if a buffer overflow occurs, exit() is called. This situation should
103 never appear ... but if you want to be *really* sure, you have to modify the
104 code to handle those situations (only one place to modify).
105 A buffer overflow can only occur if your sprintf() do strange things or when
106 you use strange formats.
107 
108 */
109 #include "file.h"
110 
111 #ifndef	lint
112 #if 0
113 FILE_RCSID("@(#)$File: vasprintf.c,v 1.12 2014/05/14 23:09:21 christos Exp $")
114 #else
115 __RCSID("$NetBSD: vasprintf.c,v 1.1.1.4 2014/06/13 01:48:21 christos Exp $");
116 #endif
117 #endif	/* lint */
118 
119 #include <assert.h>
120 #include <string.h>
121 #include <stdlib.h>
122 #include <stdarg.h>
123 #include <ctype.h>
124 #ifdef HAVE_LIMITS_H
125 #include <limits.h>
126 #endif
127 #ifdef HAVE_STDDEF_H
128 #include <stddef.h>
129 #endif
130 
131 #define ALLOC_CHUNK 2048
132 #define ALLOC_SECURITY_MARGIN 1024   /* big value because some platforms have very big 'G' exponent */
133 #if ALLOC_CHUNK < ALLOC_SECURITY_MARGIN
134 #    error  !!! ALLOC_CHUNK < ALLOC_SECURITY_MARGIN !!!
135 #endif
136 /* note: to have some interest, ALLOC_CHUNK should be much greater than ALLOC_SECURITY_MARGIN */
137 
138 /*
139  *  To save a lot of push/pop, every variable are stored into this
140  *  structure, which is passed among nearly every sub-functions.
141  */
142 typedef struct {
143   const char * src_string;        /* current position into intput string */
144   char *       buffer_base;       /* output buffer */
145   char *       dest_string;       /* current position into output string */
146   size_t       buffer_len;        /* length of output buffer */
147   size_t       real_len;          /* real current length of output text */
148   size_t       pseudo_len;        /* total length of output text if it were not limited in size */
149   size_t       maxlen;
150   va_list      vargs;             /* pointer to current position into vargs */
151   char *       sprintf_string;
152   FILE *       fprintf_file;
153 } xprintf_struct;
154 
155 /*
156  *  Realloc buffer if needed
157  *  Return value:  0 = ok
158  *               EOF = not enought memory
159  */
160 static int realloc_buff(xprintf_struct *s, size_t len)
161 {
162   char * ptr;
163 
164   if (len + ALLOC_SECURITY_MARGIN + s->real_len > s->buffer_len) {
165     len += s->real_len + ALLOC_CHUNK;
166     ptr = (char *)realloc((void *)(s->buffer_base), len);
167     if (ptr == NULL) {
168       s->buffer_base = NULL;
169       return EOF;
170     }
171 
172     s->dest_string = ptr + (size_t)(s->dest_string - s->buffer_base);
173     s->buffer_base = ptr;
174     s->buffer_len = len;
175 
176     (s->buffer_base)[s->buffer_len - 1] = 1; /* overflow marker */
177   }
178 
179   return 0;
180 }
181 
182 /*
183  *  Prints 'usual' characters    up to next '%'
184  *                            or up to end of text
185  */
186 static int usual_char(xprintf_struct * s)
187 {
188   size_t len;
189 
190   len = strcspn(s->src_string, "%");     /* reachs the next '%' or end of input string */
191   /* note: 'len' is never 0 because the presence of '%' */
192   /* or end-of-line is checked in the calling function  */
193 
194   if (realloc_buff(s,len) == EOF)
195     return EOF;
196 
197   memcpy(s->dest_string, s->src_string, len);
198   s->src_string += len;
199   s->dest_string += len;
200   s->real_len += len;
201   s->pseudo_len += len;
202 
203   return 0;
204 }
205 
206 /*
207  *  Return value: 0 = ok
208  *                EOF = error
209  */
210 static int print_it(xprintf_struct *s, size_t approx_len,
211                     const char *format_string, ...)
212 {
213   va_list varg;
214   int vsprintf_len;
215   size_t len;
216 
217   if (realloc_buff(s,approx_len) == EOF)
218     return EOF;
219 
220   va_start(varg, format_string);
221   vsprintf_len = vsprintf(s->dest_string, format_string, varg);
222   va_end(varg);
223 
224   /* Check for overflow */
225   assert((s->buffer_base)[s->buffer_len - 1] == 1);
226 
227   if (vsprintf_len == EOF) /* must be done *after* overflow-check */
228     return EOF;
229 
230   s->pseudo_len += vsprintf_len;
231   len = strlen(s->dest_string);
232   s->real_len += len;
233   s->dest_string += len;
234 
235   return 0;
236 }
237 
238 /*
239  *  Prints a string (%s)
240  *  We need special handling because:
241  *     a: the length of the string is unknown
242  *     b: when .prec is used, we must not access any extra byte of the
243  *        string (of course, if the original sprintf() does... what the
244  *        hell, not my problem)
245  *
246  *  Return value: 0 = ok
247  *                EOF = error
248  */
249 static int type_s(xprintf_struct *s, int width, int prec,
250                   const char *format_string, const char *arg_string)
251 {
252   size_t string_len;
253 
254   if (arg_string == NULL)
255     return print_it(s, (size_t)6, "(null)", 0);
256 
257   /* hand-made strlen() whitch stops when 'prec' is reached. */
258   /* if 'prec' is -1 then it is never reached. */
259   string_len = 0;
260   while (arg_string[string_len] != 0 && (size_t)prec != string_len)
261     string_len++;
262 
263   if (width != -1 && string_len < (size_t)width)
264     string_len = (size_t)width;
265 
266   return print_it(s, string_len, format_string, arg_string);
267 }
268 
269 /*
270  *  Read a serie of digits. Stop when non-digit is found.
271  *  Return value: the value read (between 0 and 32767).
272  *  Note: no checks are made against overflow. If the string contain a big
273  *  number, then the return value won't be what we want (but, in this case,
274  *  the programmer don't know whatr he wants, then no problem).
275  */
276 static int getint(const char **string)
277 {
278   int i = 0;
279 
280   while (isdigit((unsigned char)**string) != 0) {
281     i = i * 10 + (**string - '0');
282     (*string)++;
283   }
284 
285   if (i < 0 || i > 32767)
286     i = 32767; /* if we have i==-10 this is not because the number is */
287   /* negative; this is because the number is big */
288   return i;
289 }
290 
291 /*
292  *  Read a part of the format string. A part is 'usual characters' (ie "blabla")
293  *  or '%%' escape sequence (to print a single '%') or any combination of
294  *  format specifier (ie "%i" or "%10.2d").
295  *  After the current part is managed, the function returns to caller with
296  *  everything ready to manage the following part.
297  *  The caller must ensure than the string is not empty, i.e. the first byte
298  *  is not zero.
299  *
300  *  Return value:  0 = ok
301  *                 EOF = error
302  */
303 static int dispatch(xprintf_struct *s)
304 {
305   const char *initial_ptr;
306   char format_string[24]; /* max length may be something like  "% +-#032768.32768Ld" */
307   char *format_ptr;
308   int flag_plus, flag_minus, flag_space, flag_sharp, flag_zero;
309   int width, prec, modifier, approx_width;
310   char type;
311   /* most of those variables are here to rewrite the format string */
312 
313 #define SRCTXT  (s->src_string)
314 #define DESTTXT (s->dest_string)
315 
316   /* incoherent format string. Characters after the '%' will be printed with the next call */
317 #define INCOHERENT()         do {SRCTXT=initial_ptr; return 0;} while (0)     /* do/while to avoid */
318 #define INCOHERENT_TEST()    do {if(*SRCTXT==0)   INCOHERENT();} while (0)    /* a null statement  */
319 
320   /* 'normal' text */
321   if (*SRCTXT != '%')
322     return usual_char(s);
323 
324   /* we then have a '%' */
325   SRCTXT++;
326   /* don't check for end-of-string ; this is done later */
327 
328   /* '%%' escape sequence */
329   if (*SRCTXT == '%') {
330     if (realloc_buff(s, (size_t)1) == EOF) /* because we can have "%%%%%%%%..." */
331       return EOF;
332     *DESTTXT = '%';
333     DESTTXT++;
334     SRCTXT++;
335     (s->real_len)++;
336     (s->pseudo_len)++;
337     return 0;
338   }
339 
340   /* '%' managing */
341   initial_ptr = SRCTXT;   /* save current pointer in case of incorrect */
342   /* 'decoding'. Points just after the '%' so the '%' */
343   /* won't be printed in any case, as required. */
344 
345   /* flag */
346   flag_plus = flag_minus = flag_space = flag_sharp = flag_zero = 0;
347 
348   for (;; SRCTXT++) {
349     if (*SRCTXT == ' ')
350       flag_space = 1;
351     else if (*SRCTXT == '+')
352       flag_plus = 1;
353     else if (*SRCTXT == '-')
354       flag_minus = 1;
355     else if (*SRCTXT == '#')
356       flag_sharp = 1;
357     else if (*SRCTXT == '0')
358       flag_zero = 1;
359     else
360       break;
361   }
362 
363   INCOHERENT_TEST();    /* here is the first test for end of string */
364 
365   /* width */
366   if (*SRCTXT == '*') {         /* width given by next argument */
367     SRCTXT++;
368     width = va_arg(s->vargs, int);
369     if ((size_t)width > 0x3fffU) /* 'size_t' to check against negative values too */
370       width = 0x3fff;
371   } else if (isdigit((unsigned char)*SRCTXT)) /* width given as ASCII number */
372     width = getint(&SRCTXT);
373   else
374     width = -1;                 /* no width specified */
375 
376   INCOHERENT_TEST();
377 
378   /* .prec */
379   if (*SRCTXT == '.') {
380     SRCTXT++;
381     if (*SRCTXT == '*') {       /* .prec given by next argument */
382       SRCTXT++;
383       prec = va_arg(s->vargs, int);
384       if ((size_t)prec >= 0x3fffU) /* 'size_t' to check against negative values too */
385         prec = 0x3fff;
386     } else {                    /* .prec given as ASCII number */
387       if (isdigit((unsigned char)*SRCTXT) == 0)
388         INCOHERENT();
389       prec = getint(&SRCTXT);
390     }
391     INCOHERENT_TEST();
392   } else
393     prec = -1;                  /* no .prec specified */
394 
395   /* modifier */
396   switch (*SRCTXT) {
397   case 'L':
398   case 'h':
399   case 'l':
400   case 'z':
401   case 't':
402     modifier = *SRCTXT;
403     SRCTXT++;
404     if (modifier=='l' && *SRCTXT=='l') {
405       SRCTXT++;
406       modifier = 'L';  /* 'll' == 'L'      long long == long double */
407     } /* only for compatibility ; not portable */
408     INCOHERENT_TEST();
409     break;
410   default:
411     modifier = -1;              /* no modifier specified */
412     break;
413   }
414 
415   /* type */
416   type = *SRCTXT;
417   if (strchr("diouxXfegEGcspn",type) == NULL)
418     INCOHERENT();               /* unknown type */
419   SRCTXT++;
420 
421   /* rewrite format-string */
422   format_string[0] = '%';
423   format_ptr = &(format_string[1]);
424 
425   if (flag_plus) {
426     *format_ptr = '+';
427     format_ptr++;
428   }
429   if (flag_minus) {
430     *format_ptr = '-';
431     format_ptr++;
432   }
433   if (flag_space) {
434     *format_ptr = ' ';
435     format_ptr++;
436   }
437   if (flag_sharp) {
438     *format_ptr = '#';
439     format_ptr++;
440   }
441   if (flag_zero) {
442     *format_ptr = '0';
443     format_ptr++;
444   } /* '0' *must* be the last one */
445 
446   if (width != -1) {
447     sprintf(format_ptr, "%i", width);
448     format_ptr += strlen(format_ptr);
449   }
450 
451   if (prec != -1) {
452     *format_ptr = '.';
453     format_ptr++;
454     sprintf(format_ptr, "%i", prec);
455     format_ptr += strlen(format_ptr);
456   }
457 
458   if (modifier != -1) {
459     if (modifier == 'L' && strchr("diouxX",type) != NULL) {
460       *format_ptr = 'l';
461       format_ptr++;
462       *format_ptr = 'l';
463       format_ptr++;
464     } else {
465       *format_ptr = modifier;
466       format_ptr++;
467     }
468   }
469 
470   *format_ptr = type;
471   format_ptr++;
472   *format_ptr = 0;
473 
474   /* vague approximation of minimal length if width or prec are specified */
475   approx_width = width + prec;
476   if (approx_width < 0) /* because width == -1 and/or prec == -1 */
477     approx_width = 0;
478 
479   switch (type) {
480     /* int */
481   case 'd':
482   case 'i':
483   case 'o':
484   case 'u':
485   case 'x':
486   case 'X':
487     switch (modifier) {
488     case -1 :
489       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int));
490     case 'L':
491       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long long int));
492     case 'l':
493       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long int));
494     case 'h':
495       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int));
496     case 'z':
497       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, size_t));
498     case 't':
499       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, ptrdiff_t));
500       /* 'int' instead of 'short int' because default promotion is 'int' */
501     default:
502       INCOHERENT();
503     }
504 
505     /* char */
506   case 'c':
507     if (modifier != -1)
508       INCOHERENT();
509     return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int));
510     /* 'int' instead of 'char' because default promotion is 'int' */
511 
512     /* math */
513   case 'e':
514   case 'f':
515   case 'g':
516   case 'E':
517   case 'G':
518     switch (modifier) {
519     case -1 : /* because of default promotion, no modifier means 'l' */
520     case 'l':
521       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, double));
522     case 'L':
523       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long double));
524     default:
525       INCOHERENT();
526     }
527 
528     /* string */
529   case 's':
530     return type_s(s, width, prec, format_string, va_arg(s->vargs, const char*));
531 
532     /* pointer */
533   case 'p':
534     if (modifier == -1)
535       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, void *));
536     INCOHERENT();
537 
538     /* store */
539   case 'n':
540     if (modifier == -1) {
541       int * p;
542       p = va_arg(s->vargs, int *);
543       if (p != NULL) {
544         *p = s->pseudo_len;
545         return 0;
546       }
547       return EOF;
548     }
549     INCOHERENT();
550 
551   } /* switch */
552 
553   INCOHERENT();                 /* unknown type */
554 
555 #undef INCOHERENT
556 #undef INCOHERENT_TEST
557 #undef SRCTXT
558 #undef DESTTXT
559 }
560 
561 /*
562  *  Return value: number of *virtually* written characters
563  *                EOF = error
564  */
565 static int core(xprintf_struct *s)
566 {
567   size_t save_len;
568   char *dummy_base;
569 
570   /* basic checks */
571   if ((int)(s->maxlen) <= 0) /* 'int' to check against some conversion */
572     return EOF;           /* error for example if value is (int)-10 */
573   s->maxlen--;      /* because initial maxlen counts final 0 */
574   /* note: now 'maxlen' _can_ be zero */
575 
576   if (s->src_string == NULL)
577     s->src_string = "(null)";
578 
579   /* struct init and memory allocation */
580   s->buffer_base = NULL;
581   s->buffer_len = 0;
582   s->real_len = 0;
583   s->pseudo_len = 0;
584   if (realloc_buff(s, (size_t)0) == EOF)
585     return EOF;
586   s->dest_string = s->buffer_base;
587 
588   /* process source string */
589   for (;;) {
590     /* up to end of source string */
591     if (*(s->src_string) == 0) {
592       *(s->dest_string) = '\0';    /* final NUL */
593       break;
594     }
595 
596     if (dispatch(s) == EOF)
597       goto free_EOF;
598 
599     /* up to end of dest string */
600     if (s->real_len >= s->maxlen) {
601       (s->buffer_base)[s->maxlen] = '\0'; /* final NUL */
602       break;
603     }
604   }
605 
606   /* for (v)asnprintf */
607   dummy_base = s->buffer_base;
608 
609   dummy_base = s->buffer_base + s->real_len;
610   save_len = s->real_len;
611 
612   /* process the remaining of source string to compute 'pseudo_len'. We
613    * overwrite again and again, starting at 'dummy_base' because we don't
614    * need the text, only char count. */
615   while(*(s->src_string) != 0) { /* up to end of source string */
616     s->real_len = 0;
617     s->dest_string = dummy_base;
618     if (dispatch(s) == EOF)
619       goto free_EOF;
620   }
621 
622   s->buffer_base = (char *)realloc((void *)(s->buffer_base), save_len + 1);
623   if (s->buffer_base == NULL)
624     return EOF; /* should rarely happen because we shrink the buffer */
625   return s->pseudo_len;
626 
627  free_EOF:
628   free(s->buffer_base);
629   return EOF;
630 }
631 
632 int vasprintf(char **ptr, const char *format_string, va_list vargs)
633 {
634   xprintf_struct s;
635   int retval;
636 
637   s.src_string = format_string;
638 #ifdef va_copy
639   va_copy (s.vargs, vargs);
640 #else
641 #ifdef __va_copy
642   __va_copy (s.vargs, vargs);
643 #else
644   memcpy (&s.vargs, &vargs, sizeof (s.va_args));
645 #endif /* __va_copy */
646 #endif /* va_copy */
647   s.maxlen = (size_t)INT_MAX;
648 
649   retval = core(&s);
650   va_end(s.vargs);
651   if (retval == EOF) {
652     *ptr = NULL;
653     return EOF;
654   }
655 
656   *ptr = s.buffer_base;
657   return retval;
658 }
659