xref: /netbsd-src/external/bsd/file/dist/src/vasprintf.c (revision 4e00368f12e7278a94903a082dfe31dfebb70415)
1 /*	$NetBSD: vasprintf.c,v 1.1.1.5 2015/01/02 20:34:27 christos Exp $	*/
2 
3 /*
4  * Copyright (c) Ian F. Darwin 1986-1995.
5  * Software written by Ian F. Darwin and others;
6  * maintained 1995-present by Christos Zoulas and others.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice immediately at the beginning of the file, without modification,
13  *    this list of conditions, and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
22  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 /*###########################################################################
31   #                                                                           #
32   #                                vasprintf                                  #
33   #                                                                           #
34   #               Copyright (c) 2002-2005 David TAILLANDIER                   #
35   #                                                                           #
36   ###########################################################################*/
37 
38 /*
39 
40 This software is distributed under the "modified BSD licence".
41 
42 This software is also released with GNU license (GPL) in another file (same
43 source-code, only license differ).
44 
45 
46 
47 Redistribution and use in source and binary forms, with or without
48 modification, are permitted provided that the following conditions are met:
49 
50 Redistributions of source code must retain the above copyright notice, this
51 list of conditions and the following disclaimer. Redistributions in binary
52 form must reproduce the above copyright notice, this list of conditions and
53 the following disclaimer in the documentation and/or other materials
54 provided with the distribution. The name of the author may not be used to
55 endorse or promote products derived from this software without specific
56 prior written permission.
57 
58 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
59 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
60 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
61 EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
62 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
63 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
64 OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
65 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
66 OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
67 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68 
69 ====================
70 
71 Hacked from xnprintf version of 26th February 2005 to provide only
72 vasprintf by Reuben Thomas <rrt@sc3d.org>.
73 
74 ====================
75 
76 
77 'printf' function family use the following format string:
78 
79 %[flag][width][.prec][modifier]type
80 
81 %% is the escape sequence to print a '%'
82 %  followed by an unknown format will print the characters without
83 trying to do any interpretation
84 
85 flag:   none   +     -     #     (blank)
86 width:  n    0n    *
87 prec:   none   .0    .n     .*
88 modifier:    F N L h l ll z t    ('F' and 'N' are ms-dos/16-bit specific)
89 type:  d i o u x X f e g E G c s p n
90 
91 
92 The function needs to allocate memory to store the full text before to
93 actually writting it.  i.e if you want to fnprintf() 1000 characters, the
94 functions will allocate 1000 bytes.
95 This behaviour can be modified: you have to customise the code to flush the
96 internal buffer (writing to screen or file) when it reach a given size. Then
97 the buffer can have a shorter length. But what? If you really need to write
98 HUGE string, don't use printf!
99 During the process, some other memory is allocated (1024 bytes minimum)
100 to handle the output of partial sprintf() calls. If you have only 10000 bytes
101 free in memory, you *may* not be able to nprintf() a 8000 bytes-long text.
102 
103 note: if a buffer overflow occurs, exit() is called. This situation should
104 never appear ... but if you want to be *really* sure, you have to modify the
105 code to handle those situations (only one place to modify).
106 A buffer overflow can only occur if your sprintf() do strange things or when
107 you use strange formats.
108 
109 */
110 #include "file.h"
111 
112 #ifndef	lint
113 #if 0
114 FILE_RCSID("@(#)$File: vasprintf.c,v 1.13 2014/12/04 15:56:46 christos Exp $")
115 #else
116 __RCSID("$NetBSD: vasprintf.c,v 1.1.1.5 2015/01/02 20:34:27 christos Exp $");
117 #endif
118 #endif	/* lint */
119 
120 #include <assert.h>
121 #include <string.h>
122 #include <stdlib.h>
123 #include <stdarg.h>
124 #include <ctype.h>
125 #ifdef HAVE_LIMITS_H
126 #include <limits.h>
127 #endif
128 #ifdef HAVE_STDDEF_H
129 #include <stddef.h>
130 #endif
131 
132 #define ALLOC_CHUNK 2048
133 #define ALLOC_SECURITY_MARGIN 1024   /* big value because some platforms have very big 'G' exponent */
134 #if ALLOC_CHUNK < ALLOC_SECURITY_MARGIN
135 #    error  !!! ALLOC_CHUNK < ALLOC_SECURITY_MARGIN !!!
136 #endif
137 /* note: to have some interest, ALLOC_CHUNK should be much greater than ALLOC_SECURITY_MARGIN */
138 
139 /*
140  *  To save a lot of push/pop, every variable are stored into this
141  *  structure, which is passed among nearly every sub-functions.
142  */
143 typedef struct {
144   const char * src_string;        /* current position into intput string */
145   char *       buffer_base;       /* output buffer */
146   char *       dest_string;       /* current position into output string */
147   size_t       buffer_len;        /* length of output buffer */
148   size_t       real_len;          /* real current length of output text */
149   size_t       pseudo_len;        /* total length of output text if it were not limited in size */
150   size_t       maxlen;
151   va_list      vargs;             /* pointer to current position into vargs */
152   char *       sprintf_string;
153   FILE *       fprintf_file;
154 } xprintf_struct;
155 
156 /*
157  *  Realloc buffer if needed
158  *  Return value:  0 = ok
159  *               EOF = not enought memory
160  */
161 static int realloc_buff(xprintf_struct *s, size_t len)
162 {
163   char * ptr;
164 
165   if (len + ALLOC_SECURITY_MARGIN + s->real_len > s->buffer_len) {
166     len += s->real_len + ALLOC_CHUNK;
167     ptr = (char *)realloc((void *)(s->buffer_base), len);
168     if (ptr == NULL) {
169       s->buffer_base = NULL;
170       return EOF;
171     }
172 
173     s->dest_string = ptr + (size_t)(s->dest_string - s->buffer_base);
174     s->buffer_base = ptr;
175     s->buffer_len = len;
176 
177     (s->buffer_base)[s->buffer_len - 1] = 1; /* overflow marker */
178   }
179 
180   return 0;
181 }
182 
183 /*
184  *  Prints 'usual' characters    up to next '%'
185  *                            or up to end of text
186  */
187 static int usual_char(xprintf_struct * s)
188 {
189   size_t len;
190 
191   len = strcspn(s->src_string, "%");     /* reachs the next '%' or end of input string */
192   /* note: 'len' is never 0 because the presence of '%' */
193   /* or end-of-line is checked in the calling function  */
194 
195   if (realloc_buff(s,len) == EOF)
196     return EOF;
197 
198   memcpy(s->dest_string, s->src_string, len);
199   s->src_string += len;
200   s->dest_string += len;
201   s->real_len += len;
202   s->pseudo_len += len;
203 
204   return 0;
205 }
206 
207 /*
208  *  Return value: 0 = ok
209  *                EOF = error
210  */
211 static int print_it(xprintf_struct *s, size_t approx_len,
212                     const char *format_string, ...)
213 {
214   va_list varg;
215   int vsprintf_len;
216   size_t len;
217 
218   if (realloc_buff(s,approx_len) == EOF)
219     return EOF;
220 
221   va_start(varg, format_string);
222   vsprintf_len = vsprintf(s->dest_string, format_string, varg);
223   va_end(varg);
224 
225   /* Check for overflow */
226   assert((s->buffer_base)[s->buffer_len - 1] == 1);
227 
228   if (vsprintf_len == EOF) /* must be done *after* overflow-check */
229     return EOF;
230 
231   s->pseudo_len += vsprintf_len;
232   len = strlen(s->dest_string);
233   s->real_len += len;
234   s->dest_string += len;
235 
236   return 0;
237 }
238 
239 /*
240  *  Prints a string (%s)
241  *  We need special handling because:
242  *     a: the length of the string is unknown
243  *     b: when .prec is used, we must not access any extra byte of the
244  *        string (of course, if the original sprintf() does... what the
245  *        hell, not my problem)
246  *
247  *  Return value: 0 = ok
248  *                EOF = error
249  */
250 static int type_s(xprintf_struct *s, int width, int prec,
251                   const char *format_string, const char *arg_string)
252 {
253   size_t string_len;
254 
255   if (arg_string == NULL)
256     return print_it(s, (size_t)6, "(null)", 0);
257 
258   /* hand-made strlen() whitch stops when 'prec' is reached. */
259   /* if 'prec' is -1 then it is never reached. */
260   string_len = 0;
261   while (arg_string[string_len] != 0 && (size_t)prec != string_len)
262     string_len++;
263 
264   if (width != -1 && string_len < (size_t)width)
265     string_len = (size_t)width;
266 
267   return print_it(s, string_len, format_string, arg_string);
268 }
269 
270 /*
271  *  Read a serie of digits. Stop when non-digit is found.
272  *  Return value: the value read (between 0 and 32767).
273  *  Note: no checks are made against overflow. If the string contain a big
274  *  number, then the return value won't be what we want (but, in this case,
275  *  the programmer don't know whatr he wants, then no problem).
276  */
277 static int getint(const char **string)
278 {
279   int i = 0;
280 
281   while (isdigit((unsigned char)**string) != 0) {
282     i = i * 10 + (**string - '0');
283     (*string)++;
284   }
285 
286   if (i < 0 || i > 32767)
287     i = 32767; /* if we have i==-10 this is not because the number is */
288   /* negative; this is because the number is big */
289   return i;
290 }
291 
292 /*
293  *  Read a part of the format string. A part is 'usual characters' (ie "blabla")
294  *  or '%%' escape sequence (to print a single '%') or any combination of
295  *  format specifier (ie "%i" or "%10.2d").
296  *  After the current part is managed, the function returns to caller with
297  *  everything ready to manage the following part.
298  *  The caller must ensure than the string is not empty, i.e. the first byte
299  *  is not zero.
300  *
301  *  Return value:  0 = ok
302  *                 EOF = error
303  */
304 static int dispatch(xprintf_struct *s)
305 {
306   const char *initial_ptr;
307   char format_string[24]; /* max length may be something like  "% +-#032768.32768Ld" */
308   char *format_ptr;
309   int flag_plus, flag_minus, flag_space, flag_sharp, flag_zero;
310   int width, prec, modifier, approx_width;
311   char type;
312   /* most of those variables are here to rewrite the format string */
313 
314 #define SRCTXT  (s->src_string)
315 #define DESTTXT (s->dest_string)
316 
317   /* incoherent format string. Characters after the '%' will be printed with the next call */
318 #define INCOHERENT()         do {SRCTXT=initial_ptr; return 0;} while (0)     /* do/while to avoid */
319 #define INCOHERENT_TEST()    do {if(*SRCTXT==0)   INCOHERENT();} while (0)    /* a null statement  */
320 
321   /* 'normal' text */
322   if (*SRCTXT != '%')
323     return usual_char(s);
324 
325   /* we then have a '%' */
326   SRCTXT++;
327   /* don't check for end-of-string ; this is done later */
328 
329   /* '%%' escape sequence */
330   if (*SRCTXT == '%') {
331     if (realloc_buff(s, (size_t)1) == EOF) /* because we can have "%%%%%%%%..." */
332       return EOF;
333     *DESTTXT = '%';
334     DESTTXT++;
335     SRCTXT++;
336     (s->real_len)++;
337     (s->pseudo_len)++;
338     return 0;
339   }
340 
341   /* '%' managing */
342   initial_ptr = SRCTXT;   /* save current pointer in case of incorrect */
343   /* 'decoding'. Points just after the '%' so the '%' */
344   /* won't be printed in any case, as required. */
345 
346   /* flag */
347   flag_plus = flag_minus = flag_space = flag_sharp = flag_zero = 0;
348 
349   for (;; SRCTXT++) {
350     if (*SRCTXT == ' ')
351       flag_space = 1;
352     else if (*SRCTXT == '+')
353       flag_plus = 1;
354     else if (*SRCTXT == '-')
355       flag_minus = 1;
356     else if (*SRCTXT == '#')
357       flag_sharp = 1;
358     else if (*SRCTXT == '0')
359       flag_zero = 1;
360     else
361       break;
362   }
363 
364   INCOHERENT_TEST();    /* here is the first test for end of string */
365 
366   /* width */
367   if (*SRCTXT == '*') {         /* width given by next argument */
368     SRCTXT++;
369     width = va_arg(s->vargs, int);
370     if ((size_t)width > 0x3fffU) /* 'size_t' to check against negative values too */
371       width = 0x3fff;
372   } else if (isdigit((unsigned char)*SRCTXT)) /* width given as ASCII number */
373     width = getint(&SRCTXT);
374   else
375     width = -1;                 /* no width specified */
376 
377   INCOHERENT_TEST();
378 
379   /* .prec */
380   if (*SRCTXT == '.') {
381     SRCTXT++;
382     if (*SRCTXT == '*') {       /* .prec given by next argument */
383       SRCTXT++;
384       prec = va_arg(s->vargs, int);
385       if ((size_t)prec >= 0x3fffU) /* 'size_t' to check against negative values too */
386         prec = 0x3fff;
387     } else {                    /* .prec given as ASCII number */
388       if (isdigit((unsigned char)*SRCTXT) == 0)
389         INCOHERENT();
390       prec = getint(&SRCTXT);
391     }
392     INCOHERENT_TEST();
393   } else
394     prec = -1;                  /* no .prec specified */
395 
396   /* modifier */
397   switch (*SRCTXT) {
398   case 'L':
399   case 'h':
400   case 'l':
401   case 'z':
402   case 't':
403     modifier = *SRCTXT;
404     SRCTXT++;
405     if (modifier=='l' && *SRCTXT=='l') {
406       SRCTXT++;
407       modifier = 'L';  /* 'll' == 'L'      long long == long double */
408     } /* only for compatibility ; not portable */
409     INCOHERENT_TEST();
410     break;
411   default:
412     modifier = -1;              /* no modifier specified */
413     break;
414   }
415 
416   /* type */
417   type = *SRCTXT;
418   if (strchr("diouxXfegEGcspn",type) == NULL)
419     INCOHERENT();               /* unknown type */
420   SRCTXT++;
421 
422   /* rewrite format-string */
423   format_string[0] = '%';
424   format_ptr = &(format_string[1]);
425 
426   if (flag_plus) {
427     *format_ptr = '+';
428     format_ptr++;
429   }
430   if (flag_minus) {
431     *format_ptr = '-';
432     format_ptr++;
433   }
434   if (flag_space) {
435     *format_ptr = ' ';
436     format_ptr++;
437   }
438   if (flag_sharp) {
439     *format_ptr = '#';
440     format_ptr++;
441   }
442   if (flag_zero) {
443     *format_ptr = '0';
444     format_ptr++;
445   } /* '0' *must* be the last one */
446 
447   if (width != -1) {
448     sprintf(format_ptr, "%i", width);
449     format_ptr += strlen(format_ptr);
450   }
451 
452   if (prec != -1) {
453     *format_ptr = '.';
454     format_ptr++;
455     sprintf(format_ptr, "%i", prec);
456     format_ptr += strlen(format_ptr);
457   }
458 
459   if (modifier != -1) {
460     if (modifier == 'L' && strchr("diouxX",type) != NULL) {
461       *format_ptr = 'l';
462       format_ptr++;
463       *format_ptr = 'l';
464       format_ptr++;
465     } else {
466       *format_ptr = modifier;
467       format_ptr++;
468     }
469   }
470 
471   *format_ptr = type;
472   format_ptr++;
473   *format_ptr = 0;
474 
475   /* vague approximation of minimal length if width or prec are specified */
476   approx_width = width + prec;
477   if (approx_width < 0) /* because width == -1 and/or prec == -1 */
478     approx_width = 0;
479 
480   switch (type) {
481     /* int */
482   case 'd':
483   case 'i':
484   case 'o':
485   case 'u':
486   case 'x':
487   case 'X':
488     switch (modifier) {
489     case -1 :
490       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int));
491     case 'L':
492       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long long int));
493     case 'l':
494       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long int));
495     case 'h':
496       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int));
497     case 'z':
498       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, size_t));
499     case 't':
500       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, ptrdiff_t));
501       /* 'int' instead of 'short int' because default promotion is 'int' */
502     default:
503       INCOHERENT();
504     }
505 
506     /* char */
507   case 'c':
508     if (modifier != -1)
509       INCOHERENT();
510     return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int));
511     /* 'int' instead of 'char' because default promotion is 'int' */
512 
513     /* math */
514   case 'e':
515   case 'f':
516   case 'g':
517   case 'E':
518   case 'G':
519     switch (modifier) {
520     case -1 : /* because of default promotion, no modifier means 'l' */
521     case 'l':
522       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, double));
523     case 'L':
524       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long double));
525     default:
526       INCOHERENT();
527     }
528 
529     /* string */
530   case 's':
531     return type_s(s, width, prec, format_string, va_arg(s->vargs, const char*));
532 
533     /* pointer */
534   case 'p':
535     if (modifier == -1)
536       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, void *));
537     INCOHERENT();
538 
539     /* store */
540   case 'n':
541     if (modifier == -1) {
542       int * p;
543       p = va_arg(s->vargs, int *);
544       if (p != NULL) {
545         *p = s->pseudo_len;
546         return 0;
547       }
548       return EOF;
549     }
550     INCOHERENT();
551 
552   } /* switch */
553 
554   INCOHERENT();                 /* unknown type */
555 
556 #undef INCOHERENT
557 #undef INCOHERENT_TEST
558 #undef SRCTXT
559 #undef DESTTXT
560 }
561 
562 /*
563  *  Return value: number of *virtually* written characters
564  *                EOF = error
565  */
566 static int core(xprintf_struct *s)
567 {
568   size_t save_len;
569   char *dummy_base;
570 
571   /* basic checks */
572   if ((int)(s->maxlen) <= 0) /* 'int' to check against some conversion */
573     return EOF;           /* error for example if value is (int)-10 */
574   s->maxlen--;      /* because initial maxlen counts final 0 */
575   /* note: now 'maxlen' _can_ be zero */
576 
577   if (s->src_string == NULL)
578     s->src_string = "(null)";
579 
580   /* struct init and memory allocation */
581   s->buffer_base = NULL;
582   s->buffer_len = 0;
583   s->real_len = 0;
584   s->pseudo_len = 0;
585   if (realloc_buff(s, (size_t)0) == EOF)
586     return EOF;
587   s->dest_string = s->buffer_base;
588 
589   /* process source string */
590   for (;;) {
591     /* up to end of source string */
592     if (*(s->src_string) == 0) {
593       *(s->dest_string) = '\0';    /* final NUL */
594       break;
595     }
596 
597     if (dispatch(s) == EOF)
598       goto free_EOF;
599 
600     /* up to end of dest string */
601     if (s->real_len >= s->maxlen) {
602       (s->buffer_base)[s->maxlen] = '\0'; /* final NUL */
603       break;
604     }
605   }
606 
607   /* for (v)asnprintf */
608   dummy_base = s->buffer_base;
609 
610   dummy_base = s->buffer_base + s->real_len;
611   save_len = s->real_len;
612 
613   /* process the remaining of source string to compute 'pseudo_len'. We
614    * overwrite again and again, starting at 'dummy_base' because we don't
615    * need the text, only char count. */
616   while(*(s->src_string) != 0) { /* up to end of source string */
617     s->real_len = 0;
618     s->dest_string = dummy_base;
619     if (dispatch(s) == EOF)
620       goto free_EOF;
621   }
622 
623   s->buffer_base = (char *)realloc((void *)(s->buffer_base), save_len + 1);
624   if (s->buffer_base == NULL)
625     return EOF; /* should rarely happen because we shrink the buffer */
626   return s->pseudo_len;
627 
628  free_EOF:
629   free(s->buffer_base);
630   return EOF;
631 }
632 
633 int vasprintf(char **ptr, const char *format_string, va_list vargs)
634 {
635   xprintf_struct s;
636   int retval;
637 
638   s.src_string = format_string;
639 #ifdef va_copy
640   va_copy (s.vargs, vargs);
641 #else
642 # ifdef __va_copy
643   __va_copy (s.vargs, vargs);
644 # else
645 #  ifdef WIN32
646   s.vargs = vargs;
647 #  else
648   memcpy (&s.vargs, &vargs, sizeof (s.va_args));
649 #  endif /* WIN32 */
650 # endif /* __va_copy */
651 #endif /* va_copy */
652   s.maxlen = (size_t)INT_MAX;
653 
654   retval = core(&s);
655   va_end(s.vargs);
656   if (retval == EOF) {
657     *ptr = NULL;
658     return EOF;
659   }
660 
661   *ptr = s.buffer_base;
662   return retval;
663 }
664