xref: /netbsd-src/external/bsd/file/dist/src/vasprintf.c (revision deb6f0161a9109e7de9b519dc8dfb9478668dcdd)
1 /*	$NetBSD: vasprintf.c,v 1.1.1.7 2018/10/18 23:54:09 christos Exp $	*/
2 
3 /*
4  * Copyright (c) Ian F. Darwin 1986-1995.
5  * Software written by Ian F. Darwin and others;
6  * maintained 1995-present by Christos Zoulas and others.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice immediately at the beginning of the file, without modification,
13  *    this list of conditions, and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
22  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 /*###########################################################################
31   #                                                                           #
32   #                                vasprintf                                  #
33   #                                                                           #
34   #               Copyright (c) 2002-2005 David TAILLANDIER                   #
35   #                                                                           #
36   ###########################################################################*/
37 
38 /*
39 
40 This software is distributed under the "modified BSD licence".
41 
42 This software is also released with GNU license (GPL) in another file (same
43 source-code, only license differ).
44 
45 
46 
47 Redistribution and use in source and binary forms, with or without
48 modification, are permitted provided that the following conditions are met:
49 
50 Redistributions of source code must retain the above copyright notice, this
51 list of conditions and the following disclaimer. Redistributions in binary
52 form must reproduce the above copyright notice, this list of conditions and
53 the following disclaimer in the documentation and/or other materials
54 provided with the distribution. The name of the author may not be used to
55 endorse or promote products derived from this software without specific
56 prior written permission.
57 
58 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
59 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
60 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
61 EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
62 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
63 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
64 OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
65 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
66 OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
67 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
68 
69 ====================
70 
71 Hacked from xnprintf version of 26th February 2005 to provide only
72 vasprintf by Reuben Thomas <rrt@sc3d.org>.
73 
74 ====================
75 
76 
77 'printf' function family use the following format string:
78 
79 %[flag][width][.prec][modifier]type
80 
81 %% is the escape sequence to print a '%'
82 %  followed by an unknown format will print the characters without
83 trying to do any interpretation
84 
85 flag:   none   +     -     #     (blank)
86 width:  n    0n    *
87 prec:   none   .0    .n     .*
88 modifier:    F N L h l ll z t    ('F' and 'N' are ms-dos/16-bit specific)
89 type:  d i o u x X f e g E G c s p n
90 
91 
92 The function needs to allocate memory to store the full text before to
93 actually writing it.  i.e if you want to fnprintf() 1000 characters, the
94 functions will allocate 1000 bytes.
95 This behaviour can be modified: you have to customise the code to flush the
96 internal buffer (writing to screen or file) when it reach a given size. Then
97 the buffer can have a shorter length. But what? If you really need to write
98 HUGE string, don't use printf!
99 During the process, some other memory is allocated (1024 bytes minimum)
100 to handle the output of partial sprintf() calls. If you have only 10000 bytes
101 free in memory, you *may* not be able to nprintf() a 8000 bytes-long text.
102 
103 note: if a buffer overflow occurs, exit() is called. This situation should
104 never appear ... but if you want to be *really* sure, you have to modify the
105 code to handle those situations (only one place to modify).
106 A buffer overflow can only occur if your sprintf() do strange things or when
107 you use strange formats.
108 
109 */
110 #include "file.h"
111 
112 #ifndef	lint
113 #if 0
114 FILE_RCSID("@(#)$File: vasprintf.c,v 1.16 2018/10/01 18:45:39 christos Exp $")
115 #else
116 __RCSID("$NetBSD: vasprintf.c,v 1.1.1.7 2018/10/18 23:54:09 christos Exp $");
117 #endif
118 #endif	/* lint */
119 
120 #include <assert.h>
121 #include <string.h>
122 #include <stdlib.h>
123 #include <stdarg.h>
124 #include <ctype.h>
125 #include <limits.h>
126 #include <stddef.h>
127 
128 #define ALLOC_CHUNK 2048
129 #define ALLOC_SECURITY_MARGIN 1024   /* big value because some platforms have very big 'G' exponent */
130 #if ALLOC_CHUNK < ALLOC_SECURITY_MARGIN
131 #    error  !!! ALLOC_CHUNK < ALLOC_SECURITY_MARGIN !!!
132 #endif
133 /* note: to have some interest, ALLOC_CHUNK should be much greater than ALLOC_SECURITY_MARGIN */
134 
135 /*
136  *  To save a lot of push/pop, every variable are stored into this
137  *  structure, which is passed among nearly every sub-functions.
138  */
139 typedef struct {
140   const char * src_string;        /* current position into intput string */
141   char *       buffer_base;       /* output buffer */
142   char *       dest_string;       /* current position into output string */
143   size_t       buffer_len;        /* length of output buffer */
144   size_t       real_len;          /* real current length of output text */
145   size_t       pseudo_len;        /* total length of output text if it were not limited in size */
146   size_t       maxlen;
147   va_list      vargs;             /* pointer to current position into vargs */
148   char *       sprintf_string;
149   FILE *       fprintf_file;
150 } xprintf_struct;
151 
152 /*
153  *  Realloc buffer if needed
154  *  Return value:  0 = ok
155  *               EOF = not enought memory
156  */
157 static int realloc_buff(xprintf_struct *s, size_t len)
158 {
159   char * ptr;
160 
161   if (len + ALLOC_SECURITY_MARGIN + s->real_len > s->buffer_len) {
162     len += s->real_len + ALLOC_CHUNK;
163     ptr = (char *)realloc((void *)(s->buffer_base), len);
164     if (ptr == NULL) {
165       s->buffer_base = NULL;
166       return EOF;
167     }
168 
169     s->dest_string = ptr + (size_t)(s->dest_string - s->buffer_base);
170     s->buffer_base = ptr;
171     s->buffer_len = len;
172 
173     (s->buffer_base)[s->buffer_len - 1] = 1; /* overflow marker */
174   }
175 
176   return 0;
177 }
178 
179 /*
180  *  Prints 'usual' characters    up to next '%'
181  *                            or up to end of text
182  */
183 static int usual_char(xprintf_struct * s)
184 {
185   size_t len;
186 
187   len = strcspn(s->src_string, "%");     /* reachs the next '%' or end of input string */
188   /* note: 'len' is never 0 because the presence of '%' */
189   /* or end-of-line is checked in the calling function  */
190 
191   if (realloc_buff(s,len) == EOF)
192     return EOF;
193 
194   memcpy(s->dest_string, s->src_string, len);
195   s->src_string += len;
196   s->dest_string += len;
197   s->real_len += len;
198   s->pseudo_len += len;
199 
200   return 0;
201 }
202 
203 /*
204  *  Return value: 0 = ok
205  *                EOF = error
206  */
207 static int print_it(xprintf_struct *s, size_t approx_len,
208                     const char *format_string, ...)
209 {
210   va_list varg;
211   int vsprintf_len;
212   size_t len;
213 
214   if (realloc_buff(s,approx_len) == EOF)
215     return EOF;
216 
217   va_start(varg, format_string);
218   vsprintf_len = vsprintf(s->dest_string, format_string, varg);
219   va_end(varg);
220 
221   /* Check for overflow */
222   assert((s->buffer_base)[s->buffer_len - 1] == 1);
223 
224   if (vsprintf_len == EOF) /* must be done *after* overflow-check */
225     return EOF;
226 
227   s->pseudo_len += vsprintf_len;
228   len = strlen(s->dest_string);
229   s->real_len += len;
230   s->dest_string += len;
231 
232   return 0;
233 }
234 
235 /*
236  *  Prints a string (%s)
237  *  We need special handling because:
238  *     a: the length of the string is unknown
239  *     b: when .prec is used, we must not access any extra byte of the
240  *        string (of course, if the original sprintf() does... what the
241  *        hell, not my problem)
242  *
243  *  Return value: 0 = ok
244  *                EOF = error
245  */
246 static int type_s(xprintf_struct *s, int width, int prec,
247                   const char *format_string, const char *arg_string)
248 {
249   size_t string_len;
250 
251   if (arg_string == NULL)
252     return print_it(s, (size_t)6, "(null)", 0);
253 
254   /* hand-made strlen() whitch stops when 'prec' is reached. */
255   /* if 'prec' is -1 then it is never reached. */
256   string_len = 0;
257   while (arg_string[string_len] != 0 && (size_t)prec != string_len)
258     string_len++;
259 
260   if (width != -1 && string_len < (size_t)width)
261     string_len = (size_t)width;
262 
263   return print_it(s, string_len, format_string, arg_string);
264 }
265 
266 /*
267  *  Read a serie of digits. Stop when non-digit is found.
268  *  Return value: the value read (between 0 and 32767).
269  *  Note: no checks are made against overflow. If the string contain a big
270  *  number, then the return value won't be what we want (but, in this case,
271  *  the programmer don't know whatr he wants, then no problem).
272  */
273 static int getint(const char **string)
274 {
275   int i = 0;
276 
277   while (isdigit((unsigned char)**string) != 0) {
278     i = i * 10 + (**string - '0');
279     (*string)++;
280   }
281 
282   if (i < 0 || i > 32767)
283     i = 32767; /* if we have i==-10 this is not because the number is */
284   /* negative; this is because the number is big */
285   return i;
286 }
287 
288 /*
289  *  Read a part of the format string. A part is 'usual characters' (ie "blabla")
290  *  or '%%' escape sequence (to print a single '%') or any combination of
291  *  format specifier (ie "%i" or "%10.2d").
292  *  After the current part is managed, the function returns to caller with
293  *  everything ready to manage the following part.
294  *  The caller must ensure than the string is not empty, i.e. the first byte
295  *  is not zero.
296  *
297  *  Return value:  0 = ok
298  *                 EOF = error
299  */
300 static int dispatch(xprintf_struct *s)
301 {
302   const char *initial_ptr;
303   char format_string[24]; /* max length may be something like  "% +-#032768.32768Ld" */
304   char *format_ptr;
305   int flag_plus, flag_minus, flag_space, flag_sharp, flag_zero;
306   int width, prec, modifier, approx_width;
307   char type;
308   /* most of those variables are here to rewrite the format string */
309 
310 #define SRCTXT  (s->src_string)
311 #define DESTTXT (s->dest_string)
312 
313   /* incoherent format string. Characters after the '%' will be printed with the next call */
314 #define INCOHERENT()         do {SRCTXT=initial_ptr; return 0;} while (0)     /* do/while to avoid */
315 #define INCOHERENT_TEST()    do {if(*SRCTXT==0)   INCOHERENT();} while (0)    /* a null statement  */
316 
317   /* 'normal' text */
318   if (*SRCTXT != '%')
319     return usual_char(s);
320 
321   /* we then have a '%' */
322   SRCTXT++;
323   /* don't check for end-of-string ; this is done later */
324 
325   /* '%%' escape sequence */
326   if (*SRCTXT == '%') {
327     if (realloc_buff(s, (size_t)1) == EOF) /* because we can have "%%%%%%%%..." */
328       return EOF;
329     *DESTTXT = '%';
330     DESTTXT++;
331     SRCTXT++;
332     (s->real_len)++;
333     (s->pseudo_len)++;
334     return 0;
335   }
336 
337   /* '%' managing */
338   initial_ptr = SRCTXT;   /* save current pointer in case of incorrect */
339   /* 'decoding'. Points just after the '%' so the '%' */
340   /* won't be printed in any case, as required. */
341 
342   /* flag */
343   flag_plus = flag_minus = flag_space = flag_sharp = flag_zero = 0;
344 
345   for (;; SRCTXT++) {
346     if (*SRCTXT == ' ')
347       flag_space = 1;
348     else if (*SRCTXT == '+')
349       flag_plus = 1;
350     else if (*SRCTXT == '-')
351       flag_minus = 1;
352     else if (*SRCTXT == '#')
353       flag_sharp = 1;
354     else if (*SRCTXT == '0')
355       flag_zero = 1;
356     else
357       break;
358   }
359 
360   INCOHERENT_TEST();    /* here is the first test for end of string */
361 
362   /* width */
363   if (*SRCTXT == '*') {         /* width given by next argument */
364     SRCTXT++;
365     width = va_arg(s->vargs, int);
366     if ((size_t)width > 0x3fffU) /* 'size_t' to check against negative values too */
367       width = 0x3fff;
368   } else if (isdigit((unsigned char)*SRCTXT)) /* width given as ASCII number */
369     width = getint(&SRCTXT);
370   else
371     width = -1;                 /* no width specified */
372 
373   INCOHERENT_TEST();
374 
375   /* .prec */
376   if (*SRCTXT == '.') {
377     SRCTXT++;
378     if (*SRCTXT == '*') {       /* .prec given by next argument */
379       SRCTXT++;
380       prec = va_arg(s->vargs, int);
381       if ((size_t)prec >= 0x3fffU) /* 'size_t' to check against negative values too */
382         prec = 0x3fff;
383     } else {                    /* .prec given as ASCII number */
384       if (isdigit((unsigned char)*SRCTXT) == 0)
385         INCOHERENT();
386       prec = getint(&SRCTXT);
387     }
388     INCOHERENT_TEST();
389   } else
390     prec = -1;                  /* no .prec specified */
391 
392   /* modifier */
393   switch (*SRCTXT) {
394   case 'L':
395   case 'h':
396   case 'l':
397   case 'z':
398   case 't':
399     modifier = *SRCTXT;
400     SRCTXT++;
401     if (modifier=='l' && *SRCTXT=='l') {
402       SRCTXT++;
403       modifier = 'L';  /* 'll' == 'L'      long long == long double */
404     } /* only for compatibility ; not portable */
405     INCOHERENT_TEST();
406     break;
407   default:
408     modifier = -1;              /* no modifier specified */
409     break;
410   }
411 
412   /* type */
413   type = *SRCTXT;
414   if (strchr("diouxXfegEGcspn",type) == NULL)
415     INCOHERENT();               /* unknown type */
416   SRCTXT++;
417 
418   /* rewrite format-string */
419   format_string[0] = '%';
420   format_ptr = &(format_string[1]);
421 
422   if (flag_plus) {
423     *format_ptr = '+';
424     format_ptr++;
425   }
426   if (flag_minus) {
427     *format_ptr = '-';
428     format_ptr++;
429   }
430   if (flag_space) {
431     *format_ptr = ' ';
432     format_ptr++;
433   }
434   if (flag_sharp) {
435     *format_ptr = '#';
436     format_ptr++;
437   }
438   if (flag_zero) {
439     *format_ptr = '0';
440     format_ptr++;
441   } /* '0' *must* be the last one */
442 
443   if (width != -1) {
444     sprintf(format_ptr, "%i", width);
445     format_ptr += strlen(format_ptr);
446   }
447 
448   if (prec != -1) {
449     *format_ptr = '.';
450     format_ptr++;
451     sprintf(format_ptr, "%i", prec);
452     format_ptr += strlen(format_ptr);
453   }
454 
455   if (modifier != -1) {
456     if (modifier == 'L' && strchr("diouxX",type) != NULL) {
457       *format_ptr = 'l';
458       format_ptr++;
459       *format_ptr = 'l';
460       format_ptr++;
461     } else {
462       *format_ptr = modifier;
463       format_ptr++;
464     }
465   }
466 
467   *format_ptr = type;
468   format_ptr++;
469   *format_ptr = 0;
470 
471   /* vague approximation of minimal length if width or prec are specified */
472   approx_width = width + prec;
473   if (approx_width < 0) /* because width == -1 and/or prec == -1 */
474     approx_width = 0;
475 
476   switch (type) {
477     /* int */
478   case 'd':
479   case 'i':
480   case 'o':
481   case 'u':
482   case 'x':
483   case 'X':
484     switch (modifier) {
485     case -1 :
486       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int));
487     case 'L':
488       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long long int));
489     case 'l':
490       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long int));
491     case 'h':
492       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int));
493     case 'z':
494       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, size_t));
495     case 't':
496       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, ptrdiff_t));
497       /* 'int' instead of 'short int' because default promotion is 'int' */
498     default:
499       INCOHERENT();
500     }
501 
502     /* char */
503   case 'c':
504     if (modifier != -1)
505       INCOHERENT();
506     return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int));
507     /* 'int' instead of 'char' because default promotion is 'int' */
508 
509     /* math */
510   case 'e':
511   case 'f':
512   case 'g':
513   case 'E':
514   case 'G':
515     switch (modifier) {
516     case -1 : /* because of default promotion, no modifier means 'l' */
517     case 'l':
518       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, double));
519     case 'L':
520       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long double));
521     default:
522       INCOHERENT();
523     }
524 
525     /* string */
526   case 's':
527     return type_s(s, width, prec, format_string, va_arg(s->vargs, const char*));
528 
529     /* pointer */
530   case 'p':
531     if (modifier == -1)
532       return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, void *));
533     INCOHERENT();
534 
535     /* store */
536   case 'n':
537     if (modifier == -1) {
538       int * p;
539       p = va_arg(s->vargs, int *);
540       if (p != NULL) {
541         *p = s->pseudo_len;
542         return 0;
543       }
544       return EOF;
545     }
546     INCOHERENT();
547 
548   } /* switch */
549 
550   INCOHERENT();                 /* unknown type */
551 
552 #undef INCOHERENT
553 #undef INCOHERENT_TEST
554 #undef SRCTXT
555 #undef DESTTXT
556 }
557 
558 /*
559  *  Return value: number of *virtually* written characters
560  *                EOF = error
561  */
562 static int core(xprintf_struct *s)
563 {
564   size_t save_len;
565   char *dummy_base;
566 
567   /* basic checks */
568   if ((int)(s->maxlen) <= 0) /* 'int' to check against some conversion */
569     return EOF;           /* error for example if value is (int)-10 */
570   s->maxlen--;      /* because initial maxlen counts final 0 */
571   /* note: now 'maxlen' _can_ be zero */
572 
573   if (s->src_string == NULL)
574     s->src_string = "(null)";
575 
576   /* struct init and memory allocation */
577   s->buffer_base = NULL;
578   s->buffer_len = 0;
579   s->real_len = 0;
580   s->pseudo_len = 0;
581   if (realloc_buff(s, (size_t)0) == EOF)
582     return EOF;
583   s->dest_string = s->buffer_base;
584 
585   /* process source string */
586   for (;;) {
587     /* up to end of source string */
588     if (*(s->src_string) == 0) {
589       *(s->dest_string) = '\0';    /* final NUL */
590       break;
591     }
592 
593     if (dispatch(s) == EOF)
594       goto free_EOF;
595 
596     /* up to end of dest string */
597     if (s->real_len >= s->maxlen) {
598       (s->buffer_base)[s->maxlen] = '\0'; /* final NUL */
599       break;
600     }
601   }
602 
603   /* for (v)asnprintf */
604   dummy_base = s->buffer_base;
605 
606   dummy_base = s->buffer_base + s->real_len;
607   save_len = s->real_len;
608 
609   /* process the remaining of source string to compute 'pseudo_len'. We
610    * overwrite again and again, starting at 'dummy_base' because we don't
611    * need the text, only char count. */
612   while(*(s->src_string) != 0) { /* up to end of source string */
613     s->real_len = 0;
614     s->dest_string = dummy_base;
615     if (dispatch(s) == EOF)
616       goto free_EOF;
617   }
618 
619   s->buffer_base = (char *)realloc((void *)(s->buffer_base), save_len + 1);
620   if (s->buffer_base == NULL)
621     return EOF; /* should rarely happen because we shrink the buffer */
622   return s->pseudo_len;
623 
624  free_EOF:
625   free(s->buffer_base);
626   return EOF;
627 }
628 
629 int vasprintf(char **ptr, const char *format_string, va_list vargs)
630 {
631   xprintf_struct s;
632   int retval;
633 
634   s.src_string = format_string;
635 #ifdef va_copy
636   va_copy (s.vargs, vargs);
637 #else
638 # ifdef __va_copy
639   __va_copy (s.vargs, vargs);
640 # else
641 #  ifdef WIN32
642   s.vargs = vargs;
643 #  else
644   memcpy (&s.vargs, &vargs, sizeof (s.va_args));
645 #  endif /* WIN32 */
646 # endif /* __va_copy */
647 #endif /* va_copy */
648   s.maxlen = (size_t)INT_MAX;
649 
650   retval = core(&s);
651   va_end(s.vargs);
652   if (retval == EOF) {
653     *ptr = NULL;
654     return EOF;
655   }
656 
657   *ptr = s.buffer_base;
658   return retval;
659 }
660