1 /* $NetBSD: vasprintf.c,v 1.1.1.7 2018/10/18 23:54:09 christos Exp $ */ 2 3 /* 4 * Copyright (c) Ian F. Darwin 1986-1995. 5 * Software written by Ian F. Darwin and others; 6 * maintained 1995-present by Christos Zoulas and others. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice immediately at the beginning of the file, without modification, 13 * this list of conditions, and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 /*########################################################################### 31 # # 32 # vasprintf # 33 # # 34 # Copyright (c) 2002-2005 David TAILLANDIER # 35 # # 36 ###########################################################################*/ 37 38 /* 39 40 This software is distributed under the "modified BSD licence". 41 42 This software is also released with GNU license (GPL) in another file (same 43 source-code, only license differ). 44 45 46 47 Redistribution and use in source and binary forms, with or without 48 modification, are permitted provided that the following conditions are met: 49 50 Redistributions of source code must retain the above copyright notice, this 51 list of conditions and the following disclaimer. Redistributions in binary 52 form must reproduce the above copyright notice, this list of conditions and 53 the following disclaimer in the documentation and/or other materials 54 provided with the distribution. The name of the author may not be used to 55 endorse or promote products derived from this software without specific 56 prior written permission. 57 58 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED 59 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 60 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 61 EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 62 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 63 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 64 OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 65 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 66 OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 67 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 68 69 ==================== 70 71 Hacked from xnprintf version of 26th February 2005 to provide only 72 vasprintf by Reuben Thomas <rrt@sc3d.org>. 73 74 ==================== 75 76 77 'printf' function family use the following format string: 78 79 %[flag][width][.prec][modifier]type 80 81 %% is the escape sequence to print a '%' 82 % followed by an unknown format will print the characters without 83 trying to do any interpretation 84 85 flag: none + - # (blank) 86 width: n 0n * 87 prec: none .0 .n .* 88 modifier: F N L h l ll z t ('F' and 'N' are ms-dos/16-bit specific) 89 type: d i o u x X f e g E G c s p n 90 91 92 The function needs to allocate memory to store the full text before to 93 actually writing it. i.e if you want to fnprintf() 1000 characters, the 94 functions will allocate 1000 bytes. 95 This behaviour can be modified: you have to customise the code to flush the 96 internal buffer (writing to screen or file) when it reach a given size. Then 97 the buffer can have a shorter length. But what? If you really need to write 98 HUGE string, don't use printf! 99 During the process, some other memory is allocated (1024 bytes minimum) 100 to handle the output of partial sprintf() calls. If you have only 10000 bytes 101 free in memory, you *may* not be able to nprintf() a 8000 bytes-long text. 102 103 note: if a buffer overflow occurs, exit() is called. This situation should 104 never appear ... but if you want to be *really* sure, you have to modify the 105 code to handle those situations (only one place to modify). 106 A buffer overflow can only occur if your sprintf() do strange things or when 107 you use strange formats. 108 109 */ 110 #include "file.h" 111 112 #ifndef lint 113 #if 0 114 FILE_RCSID("@(#)$File: vasprintf.c,v 1.16 2018/10/01 18:45:39 christos Exp $") 115 #else 116 __RCSID("$NetBSD: vasprintf.c,v 1.1.1.7 2018/10/18 23:54:09 christos Exp $"); 117 #endif 118 #endif /* lint */ 119 120 #include <assert.h> 121 #include <string.h> 122 #include <stdlib.h> 123 #include <stdarg.h> 124 #include <ctype.h> 125 #include <limits.h> 126 #include <stddef.h> 127 128 #define ALLOC_CHUNK 2048 129 #define ALLOC_SECURITY_MARGIN 1024 /* big value because some platforms have very big 'G' exponent */ 130 #if ALLOC_CHUNK < ALLOC_SECURITY_MARGIN 131 # error !!! ALLOC_CHUNK < ALLOC_SECURITY_MARGIN !!! 132 #endif 133 /* note: to have some interest, ALLOC_CHUNK should be much greater than ALLOC_SECURITY_MARGIN */ 134 135 /* 136 * To save a lot of push/pop, every variable are stored into this 137 * structure, which is passed among nearly every sub-functions. 138 */ 139 typedef struct { 140 const char * src_string; /* current position into intput string */ 141 char * buffer_base; /* output buffer */ 142 char * dest_string; /* current position into output string */ 143 size_t buffer_len; /* length of output buffer */ 144 size_t real_len; /* real current length of output text */ 145 size_t pseudo_len; /* total length of output text if it were not limited in size */ 146 size_t maxlen; 147 va_list vargs; /* pointer to current position into vargs */ 148 char * sprintf_string; 149 FILE * fprintf_file; 150 } xprintf_struct; 151 152 /* 153 * Realloc buffer if needed 154 * Return value: 0 = ok 155 * EOF = not enought memory 156 */ 157 static int realloc_buff(xprintf_struct *s, size_t len) 158 { 159 char * ptr; 160 161 if (len + ALLOC_SECURITY_MARGIN + s->real_len > s->buffer_len) { 162 len += s->real_len + ALLOC_CHUNK; 163 ptr = (char *)realloc((void *)(s->buffer_base), len); 164 if (ptr == NULL) { 165 s->buffer_base = NULL; 166 return EOF; 167 } 168 169 s->dest_string = ptr + (size_t)(s->dest_string - s->buffer_base); 170 s->buffer_base = ptr; 171 s->buffer_len = len; 172 173 (s->buffer_base)[s->buffer_len - 1] = 1; /* overflow marker */ 174 } 175 176 return 0; 177 } 178 179 /* 180 * Prints 'usual' characters up to next '%' 181 * or up to end of text 182 */ 183 static int usual_char(xprintf_struct * s) 184 { 185 size_t len; 186 187 len = strcspn(s->src_string, "%"); /* reachs the next '%' or end of input string */ 188 /* note: 'len' is never 0 because the presence of '%' */ 189 /* or end-of-line is checked in the calling function */ 190 191 if (realloc_buff(s,len) == EOF) 192 return EOF; 193 194 memcpy(s->dest_string, s->src_string, len); 195 s->src_string += len; 196 s->dest_string += len; 197 s->real_len += len; 198 s->pseudo_len += len; 199 200 return 0; 201 } 202 203 /* 204 * Return value: 0 = ok 205 * EOF = error 206 */ 207 static int print_it(xprintf_struct *s, size_t approx_len, 208 const char *format_string, ...) 209 { 210 va_list varg; 211 int vsprintf_len; 212 size_t len; 213 214 if (realloc_buff(s,approx_len) == EOF) 215 return EOF; 216 217 va_start(varg, format_string); 218 vsprintf_len = vsprintf(s->dest_string, format_string, varg); 219 va_end(varg); 220 221 /* Check for overflow */ 222 assert((s->buffer_base)[s->buffer_len - 1] == 1); 223 224 if (vsprintf_len == EOF) /* must be done *after* overflow-check */ 225 return EOF; 226 227 s->pseudo_len += vsprintf_len; 228 len = strlen(s->dest_string); 229 s->real_len += len; 230 s->dest_string += len; 231 232 return 0; 233 } 234 235 /* 236 * Prints a string (%s) 237 * We need special handling because: 238 * a: the length of the string is unknown 239 * b: when .prec is used, we must not access any extra byte of the 240 * string (of course, if the original sprintf() does... what the 241 * hell, not my problem) 242 * 243 * Return value: 0 = ok 244 * EOF = error 245 */ 246 static int type_s(xprintf_struct *s, int width, int prec, 247 const char *format_string, const char *arg_string) 248 { 249 size_t string_len; 250 251 if (arg_string == NULL) 252 return print_it(s, (size_t)6, "(null)", 0); 253 254 /* hand-made strlen() whitch stops when 'prec' is reached. */ 255 /* if 'prec' is -1 then it is never reached. */ 256 string_len = 0; 257 while (arg_string[string_len] != 0 && (size_t)prec != string_len) 258 string_len++; 259 260 if (width != -1 && string_len < (size_t)width) 261 string_len = (size_t)width; 262 263 return print_it(s, string_len, format_string, arg_string); 264 } 265 266 /* 267 * Read a serie of digits. Stop when non-digit is found. 268 * Return value: the value read (between 0 and 32767). 269 * Note: no checks are made against overflow. If the string contain a big 270 * number, then the return value won't be what we want (but, in this case, 271 * the programmer don't know whatr he wants, then no problem). 272 */ 273 static int getint(const char **string) 274 { 275 int i = 0; 276 277 while (isdigit((unsigned char)**string) != 0) { 278 i = i * 10 + (**string - '0'); 279 (*string)++; 280 } 281 282 if (i < 0 || i > 32767) 283 i = 32767; /* if we have i==-10 this is not because the number is */ 284 /* negative; this is because the number is big */ 285 return i; 286 } 287 288 /* 289 * Read a part of the format string. A part is 'usual characters' (ie "blabla") 290 * or '%%' escape sequence (to print a single '%') or any combination of 291 * format specifier (ie "%i" or "%10.2d"). 292 * After the current part is managed, the function returns to caller with 293 * everything ready to manage the following part. 294 * The caller must ensure than the string is not empty, i.e. the first byte 295 * is not zero. 296 * 297 * Return value: 0 = ok 298 * EOF = error 299 */ 300 static int dispatch(xprintf_struct *s) 301 { 302 const char *initial_ptr; 303 char format_string[24]; /* max length may be something like "% +-#032768.32768Ld" */ 304 char *format_ptr; 305 int flag_plus, flag_minus, flag_space, flag_sharp, flag_zero; 306 int width, prec, modifier, approx_width; 307 char type; 308 /* most of those variables are here to rewrite the format string */ 309 310 #define SRCTXT (s->src_string) 311 #define DESTTXT (s->dest_string) 312 313 /* incoherent format string. Characters after the '%' will be printed with the next call */ 314 #define INCOHERENT() do {SRCTXT=initial_ptr; return 0;} while (0) /* do/while to avoid */ 315 #define INCOHERENT_TEST() do {if(*SRCTXT==0) INCOHERENT();} while (0) /* a null statement */ 316 317 /* 'normal' text */ 318 if (*SRCTXT != '%') 319 return usual_char(s); 320 321 /* we then have a '%' */ 322 SRCTXT++; 323 /* don't check for end-of-string ; this is done later */ 324 325 /* '%%' escape sequence */ 326 if (*SRCTXT == '%') { 327 if (realloc_buff(s, (size_t)1) == EOF) /* because we can have "%%%%%%%%..." */ 328 return EOF; 329 *DESTTXT = '%'; 330 DESTTXT++; 331 SRCTXT++; 332 (s->real_len)++; 333 (s->pseudo_len)++; 334 return 0; 335 } 336 337 /* '%' managing */ 338 initial_ptr = SRCTXT; /* save current pointer in case of incorrect */ 339 /* 'decoding'. Points just after the '%' so the '%' */ 340 /* won't be printed in any case, as required. */ 341 342 /* flag */ 343 flag_plus = flag_minus = flag_space = flag_sharp = flag_zero = 0; 344 345 for (;; SRCTXT++) { 346 if (*SRCTXT == ' ') 347 flag_space = 1; 348 else if (*SRCTXT == '+') 349 flag_plus = 1; 350 else if (*SRCTXT == '-') 351 flag_minus = 1; 352 else if (*SRCTXT == '#') 353 flag_sharp = 1; 354 else if (*SRCTXT == '0') 355 flag_zero = 1; 356 else 357 break; 358 } 359 360 INCOHERENT_TEST(); /* here is the first test for end of string */ 361 362 /* width */ 363 if (*SRCTXT == '*') { /* width given by next argument */ 364 SRCTXT++; 365 width = va_arg(s->vargs, int); 366 if ((size_t)width > 0x3fffU) /* 'size_t' to check against negative values too */ 367 width = 0x3fff; 368 } else if (isdigit((unsigned char)*SRCTXT)) /* width given as ASCII number */ 369 width = getint(&SRCTXT); 370 else 371 width = -1; /* no width specified */ 372 373 INCOHERENT_TEST(); 374 375 /* .prec */ 376 if (*SRCTXT == '.') { 377 SRCTXT++; 378 if (*SRCTXT == '*') { /* .prec given by next argument */ 379 SRCTXT++; 380 prec = va_arg(s->vargs, int); 381 if ((size_t)prec >= 0x3fffU) /* 'size_t' to check against negative values too */ 382 prec = 0x3fff; 383 } else { /* .prec given as ASCII number */ 384 if (isdigit((unsigned char)*SRCTXT) == 0) 385 INCOHERENT(); 386 prec = getint(&SRCTXT); 387 } 388 INCOHERENT_TEST(); 389 } else 390 prec = -1; /* no .prec specified */ 391 392 /* modifier */ 393 switch (*SRCTXT) { 394 case 'L': 395 case 'h': 396 case 'l': 397 case 'z': 398 case 't': 399 modifier = *SRCTXT; 400 SRCTXT++; 401 if (modifier=='l' && *SRCTXT=='l') { 402 SRCTXT++; 403 modifier = 'L'; /* 'll' == 'L' long long == long double */ 404 } /* only for compatibility ; not portable */ 405 INCOHERENT_TEST(); 406 break; 407 default: 408 modifier = -1; /* no modifier specified */ 409 break; 410 } 411 412 /* type */ 413 type = *SRCTXT; 414 if (strchr("diouxXfegEGcspn",type) == NULL) 415 INCOHERENT(); /* unknown type */ 416 SRCTXT++; 417 418 /* rewrite format-string */ 419 format_string[0] = '%'; 420 format_ptr = &(format_string[1]); 421 422 if (flag_plus) { 423 *format_ptr = '+'; 424 format_ptr++; 425 } 426 if (flag_minus) { 427 *format_ptr = '-'; 428 format_ptr++; 429 } 430 if (flag_space) { 431 *format_ptr = ' '; 432 format_ptr++; 433 } 434 if (flag_sharp) { 435 *format_ptr = '#'; 436 format_ptr++; 437 } 438 if (flag_zero) { 439 *format_ptr = '0'; 440 format_ptr++; 441 } /* '0' *must* be the last one */ 442 443 if (width != -1) { 444 sprintf(format_ptr, "%i", width); 445 format_ptr += strlen(format_ptr); 446 } 447 448 if (prec != -1) { 449 *format_ptr = '.'; 450 format_ptr++; 451 sprintf(format_ptr, "%i", prec); 452 format_ptr += strlen(format_ptr); 453 } 454 455 if (modifier != -1) { 456 if (modifier == 'L' && strchr("diouxX",type) != NULL) { 457 *format_ptr = 'l'; 458 format_ptr++; 459 *format_ptr = 'l'; 460 format_ptr++; 461 } else { 462 *format_ptr = modifier; 463 format_ptr++; 464 } 465 } 466 467 *format_ptr = type; 468 format_ptr++; 469 *format_ptr = 0; 470 471 /* vague approximation of minimal length if width or prec are specified */ 472 approx_width = width + prec; 473 if (approx_width < 0) /* because width == -1 and/or prec == -1 */ 474 approx_width = 0; 475 476 switch (type) { 477 /* int */ 478 case 'd': 479 case 'i': 480 case 'o': 481 case 'u': 482 case 'x': 483 case 'X': 484 switch (modifier) { 485 case -1 : 486 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int)); 487 case 'L': 488 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long long int)); 489 case 'l': 490 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long int)); 491 case 'h': 492 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int)); 493 case 'z': 494 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, size_t)); 495 case 't': 496 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, ptrdiff_t)); 497 /* 'int' instead of 'short int' because default promotion is 'int' */ 498 default: 499 INCOHERENT(); 500 } 501 502 /* char */ 503 case 'c': 504 if (modifier != -1) 505 INCOHERENT(); 506 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, int)); 507 /* 'int' instead of 'char' because default promotion is 'int' */ 508 509 /* math */ 510 case 'e': 511 case 'f': 512 case 'g': 513 case 'E': 514 case 'G': 515 switch (modifier) { 516 case -1 : /* because of default promotion, no modifier means 'l' */ 517 case 'l': 518 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, double)); 519 case 'L': 520 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, long double)); 521 default: 522 INCOHERENT(); 523 } 524 525 /* string */ 526 case 's': 527 return type_s(s, width, prec, format_string, va_arg(s->vargs, const char*)); 528 529 /* pointer */ 530 case 'p': 531 if (modifier == -1) 532 return print_it(s, (size_t)approx_width, format_string, va_arg(s->vargs, void *)); 533 INCOHERENT(); 534 535 /* store */ 536 case 'n': 537 if (modifier == -1) { 538 int * p; 539 p = va_arg(s->vargs, int *); 540 if (p != NULL) { 541 *p = s->pseudo_len; 542 return 0; 543 } 544 return EOF; 545 } 546 INCOHERENT(); 547 548 } /* switch */ 549 550 INCOHERENT(); /* unknown type */ 551 552 #undef INCOHERENT 553 #undef INCOHERENT_TEST 554 #undef SRCTXT 555 #undef DESTTXT 556 } 557 558 /* 559 * Return value: number of *virtually* written characters 560 * EOF = error 561 */ 562 static int core(xprintf_struct *s) 563 { 564 size_t save_len; 565 char *dummy_base; 566 567 /* basic checks */ 568 if ((int)(s->maxlen) <= 0) /* 'int' to check against some conversion */ 569 return EOF; /* error for example if value is (int)-10 */ 570 s->maxlen--; /* because initial maxlen counts final 0 */ 571 /* note: now 'maxlen' _can_ be zero */ 572 573 if (s->src_string == NULL) 574 s->src_string = "(null)"; 575 576 /* struct init and memory allocation */ 577 s->buffer_base = NULL; 578 s->buffer_len = 0; 579 s->real_len = 0; 580 s->pseudo_len = 0; 581 if (realloc_buff(s, (size_t)0) == EOF) 582 return EOF; 583 s->dest_string = s->buffer_base; 584 585 /* process source string */ 586 for (;;) { 587 /* up to end of source string */ 588 if (*(s->src_string) == 0) { 589 *(s->dest_string) = '\0'; /* final NUL */ 590 break; 591 } 592 593 if (dispatch(s) == EOF) 594 goto free_EOF; 595 596 /* up to end of dest string */ 597 if (s->real_len >= s->maxlen) { 598 (s->buffer_base)[s->maxlen] = '\0'; /* final NUL */ 599 break; 600 } 601 } 602 603 /* for (v)asnprintf */ 604 dummy_base = s->buffer_base; 605 606 dummy_base = s->buffer_base + s->real_len; 607 save_len = s->real_len; 608 609 /* process the remaining of source string to compute 'pseudo_len'. We 610 * overwrite again and again, starting at 'dummy_base' because we don't 611 * need the text, only char count. */ 612 while(*(s->src_string) != 0) { /* up to end of source string */ 613 s->real_len = 0; 614 s->dest_string = dummy_base; 615 if (dispatch(s) == EOF) 616 goto free_EOF; 617 } 618 619 s->buffer_base = (char *)realloc((void *)(s->buffer_base), save_len + 1); 620 if (s->buffer_base == NULL) 621 return EOF; /* should rarely happen because we shrink the buffer */ 622 return s->pseudo_len; 623 624 free_EOF: 625 free(s->buffer_base); 626 return EOF; 627 } 628 629 int vasprintf(char **ptr, const char *format_string, va_list vargs) 630 { 631 xprintf_struct s; 632 int retval; 633 634 s.src_string = format_string; 635 #ifdef va_copy 636 va_copy (s.vargs, vargs); 637 #else 638 # ifdef __va_copy 639 __va_copy (s.vargs, vargs); 640 # else 641 # ifdef WIN32 642 s.vargs = vargs; 643 # else 644 memcpy (&s.vargs, &vargs, sizeof (s.va_args)); 645 # endif /* WIN32 */ 646 # endif /* __va_copy */ 647 #endif /* va_copy */ 648 s.maxlen = (size_t)INT_MAX; 649 650 retval = core(&s); 651 va_end(s.vargs); 652 if (retval == EOF) { 653 *ptr = NULL; 654 return EOF; 655 } 656 657 *ptr = s.buffer_base; 658 return retval; 659 } 660