109ade360Sjoerg /*- 240b1a6e6Sjoerg * Copyright (c) 2003-2010 Tim Kientzle 309ade360Sjoerg * All rights reserved. 409ade360Sjoerg * 509ade360Sjoerg * Redistribution and use in source and binary forms, with or without 609ade360Sjoerg * modification, are permitted provided that the following conditions 709ade360Sjoerg * are met: 809ade360Sjoerg * 1. Redistributions of source code must retain the above copyright 909ade360Sjoerg * notice, this list of conditions and the following disclaimer. 1009ade360Sjoerg * 2. Redistributions in binary form must reproduce the above copyright 1109ade360Sjoerg * notice, this list of conditions and the following disclaimer in the 1209ade360Sjoerg * documentation and/or other materials provided with the distribution. 1309ade360Sjoerg * 1409ade360Sjoerg * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 1509ade360Sjoerg * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 1609ade360Sjoerg * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 1709ade360Sjoerg * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 1809ade360Sjoerg * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 1909ade360Sjoerg * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2009ade360Sjoerg * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2109ade360Sjoerg * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2209ade360Sjoerg * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2309ade360Sjoerg * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2409ade360Sjoerg */ 2509ade360Sjoerg 26*65e637abSchristos #ifndef ARCHIVE_STRING_H_INCLUDED 27*65e637abSchristos #define ARCHIVE_STRING_H_INCLUDED 28*65e637abSchristos 299fde5391Sjoerg #ifndef __LIBARCHIVE_BUILD 3040b1a6e6Sjoerg #ifndef __LIBARCHIVE_TEST 319fde5391Sjoerg #error This header is only to be used internally to libarchive. 329fde5391Sjoerg #endif 3340b1a6e6Sjoerg #endif 349fde5391Sjoerg 3509ade360Sjoerg #include <stdarg.h> 3609ade360Sjoerg #ifdef HAVE_STDLIB_H 3709ade360Sjoerg #include <stdlib.h> /* required for wchar_t on some systems */ 3809ade360Sjoerg #endif 3909ade360Sjoerg #ifdef HAVE_STRING_H 4009ade360Sjoerg #include <string.h> 4109ade360Sjoerg #endif 4209ade360Sjoerg #ifdef HAVE_WCHAR_H 4309ade360Sjoerg #include <wchar.h> 4409ade360Sjoerg #endif 4509ade360Sjoerg 4640b1a6e6Sjoerg #include "archive.h" 4740b1a6e6Sjoerg 4809ade360Sjoerg /* 4940b1a6e6Sjoerg * Basic resizable/reusable string support similar to Java's "StringBuffer." 5009ade360Sjoerg * 5109ade360Sjoerg * Unlike sbuf(9), the buffers here are fully reusable and track the 5209ade360Sjoerg * length throughout. 5309ade360Sjoerg */ 5409ade360Sjoerg 5509ade360Sjoerg struct archive_string { 5609ade360Sjoerg char *s; /* Pointer to the storage */ 5740b1a6e6Sjoerg size_t length; /* Length of 's' in characters */ 5840b1a6e6Sjoerg size_t buffer_length; /* Length of malloc-ed storage in bytes. */ 5909ade360Sjoerg }; 6009ade360Sjoerg 6140b1a6e6Sjoerg struct archive_wstring { 6240b1a6e6Sjoerg wchar_t *s; /* Pointer to the storage */ 6340b1a6e6Sjoerg size_t length; /* Length of 's' in characters */ 6440b1a6e6Sjoerg size_t buffer_length; /* Length of malloc-ed storage in bytes. */ 6540b1a6e6Sjoerg }; 6640b1a6e6Sjoerg 6740b1a6e6Sjoerg struct archive_string_conv; 6840b1a6e6Sjoerg 6909ade360Sjoerg /* Initialize an archive_string object on the stack or elsewhere. */ 7009ade360Sjoerg #define archive_string_init(a) \ 7109ade360Sjoerg do { (a)->s = NULL; (a)->length = 0; (a)->buffer_length = 0; } while(0) 7209ade360Sjoerg 7309ade360Sjoerg /* Append a C char to an archive_string, resizing as necessary. */ 7409ade360Sjoerg struct archive_string * 7540b1a6e6Sjoerg archive_strappend_char(struct archive_string *, char); 7609ade360Sjoerg 7740b1a6e6Sjoerg /* Ditto for a wchar_t and an archive_wstring. */ 7840b1a6e6Sjoerg struct archive_wstring * 7940b1a6e6Sjoerg archive_wstrappend_wchar(struct archive_wstring *, wchar_t); 8009ade360Sjoerg 8140b1a6e6Sjoerg /* Append a raw array to an archive_string, resizing as necessary */ 8209ade360Sjoerg struct archive_string * 8340b1a6e6Sjoerg archive_array_append(struct archive_string *, const char *, size_t); 8409ade360Sjoerg 8540b1a6e6Sjoerg /* Convert a Unicode string to current locale and append the result. */ 8640b1a6e6Sjoerg /* Returns -1 if conversion fails. */ 8740b1a6e6Sjoerg int 8840b1a6e6Sjoerg archive_string_append_from_wcs(struct archive_string *, const wchar_t *, size_t); 8940b1a6e6Sjoerg 9040b1a6e6Sjoerg 9140b1a6e6Sjoerg /* Create a string conversion object. 9240b1a6e6Sjoerg * Return NULL and set a error message if the conversion is not supported 9340b1a6e6Sjoerg * on the platform. */ 9440b1a6e6Sjoerg struct archive_string_conv * 9540b1a6e6Sjoerg archive_string_conversion_to_charset(struct archive *, const char *, int); 9640b1a6e6Sjoerg struct archive_string_conv * 9740b1a6e6Sjoerg archive_string_conversion_from_charset(struct archive *, const char *, int); 9840b1a6e6Sjoerg /* Create the default string conversion object for reading/writing an archive. 9940b1a6e6Sjoerg * Return NULL if the conversion is unneeded. 10040b1a6e6Sjoerg * Note: On non Windows platform this always returns NULL. 10140b1a6e6Sjoerg */ 10240b1a6e6Sjoerg struct archive_string_conv * 10340b1a6e6Sjoerg archive_string_default_conversion_for_read(struct archive *); 10440b1a6e6Sjoerg struct archive_string_conv * 10540b1a6e6Sjoerg archive_string_default_conversion_for_write(struct archive *); 10640b1a6e6Sjoerg /* Dispose of a string conversion object. */ 10740b1a6e6Sjoerg void 10840b1a6e6Sjoerg archive_string_conversion_free(struct archive *); 10940b1a6e6Sjoerg const char * 11040b1a6e6Sjoerg archive_string_conversion_charset_name(struct archive_string_conv *); 11140b1a6e6Sjoerg void 11240b1a6e6Sjoerg archive_string_conversion_set_opt(struct archive_string_conv *, int); 11340b1a6e6Sjoerg #define SCONV_SET_OPT_UTF8_LIBARCHIVE2X 1 11440b1a6e6Sjoerg #define SCONV_SET_OPT_NORMALIZATION_C 2 11540b1a6e6Sjoerg #define SCONV_SET_OPT_NORMALIZATION_D 4 11640b1a6e6Sjoerg 11740b1a6e6Sjoerg 11840b1a6e6Sjoerg /* Copy one archive_string to another in locale conversion. 11940b1a6e6Sjoerg * Return -1 if conversion fails. */ 12040b1a6e6Sjoerg int 12140b1a6e6Sjoerg archive_strncpy_l(struct archive_string *, const void *, size_t, 12240b1a6e6Sjoerg struct archive_string_conv *); 12340b1a6e6Sjoerg 12440b1a6e6Sjoerg /* Copy one archive_string to another in locale conversion. 12540b1a6e6Sjoerg * Return -1 if conversion fails. */ 12640b1a6e6Sjoerg int 12740b1a6e6Sjoerg archive_strncat_l(struct archive_string *, const void *, size_t, 12840b1a6e6Sjoerg struct archive_string_conv *); 12940b1a6e6Sjoerg 13009ade360Sjoerg 13109ade360Sjoerg /* Copy one archive_string to another */ 13209ade360Sjoerg #define archive_string_copy(dest, src) \ 13340b1a6e6Sjoerg ((dest)->length = 0, archive_string_concat((dest), (src))) 13440b1a6e6Sjoerg #define archive_wstring_copy(dest, src) \ 13540b1a6e6Sjoerg ((dest)->length = 0, archive_wstring_concat((dest), (src))) 13609ade360Sjoerg 1379fde5391Sjoerg /* Concatenate one archive_string to another */ 13840b1a6e6Sjoerg void archive_string_concat(struct archive_string *dest, struct archive_string *src); 13940b1a6e6Sjoerg void archive_wstring_concat(struct archive_wstring *dest, struct archive_wstring *src); 1409fde5391Sjoerg 14109ade360Sjoerg /* Ensure that the underlying buffer is at least as large as the request. */ 14209ade360Sjoerg struct archive_string * 14340b1a6e6Sjoerg archive_string_ensure(struct archive_string *, size_t); 14440b1a6e6Sjoerg struct archive_wstring * 14540b1a6e6Sjoerg archive_wstring_ensure(struct archive_wstring *, size_t); 14609ade360Sjoerg 14709ade360Sjoerg /* Append C string, which may lack trailing \0. */ 1489fde5391Sjoerg /* The source is declared void * here because this gets used with 1499fde5391Sjoerg * "signed char *", "unsigned char *" and "char *" arguments. 1509fde5391Sjoerg * Declaring it "char *" as with some of the other functions just 1519fde5391Sjoerg * leads to a lot of extra casts. */ 15209ade360Sjoerg struct archive_string * 15340b1a6e6Sjoerg archive_strncat(struct archive_string *, const void *, size_t); 15440b1a6e6Sjoerg struct archive_wstring * 15540b1a6e6Sjoerg archive_wstrncat(struct archive_wstring *, const wchar_t *, size_t); 15609ade360Sjoerg 15709ade360Sjoerg /* Append a C string to an archive_string, resizing as necessary. */ 15840b1a6e6Sjoerg struct archive_string * 15940b1a6e6Sjoerg archive_strcat(struct archive_string *, const void *); 16040b1a6e6Sjoerg struct archive_wstring * 16140b1a6e6Sjoerg archive_wstrcat(struct archive_wstring *, const wchar_t *); 16209ade360Sjoerg 16309ade360Sjoerg /* Copy a C string to an archive_string, resizing as necessary. */ 16409ade360Sjoerg #define archive_strcpy(as,p) \ 16540b1a6e6Sjoerg archive_strncpy((as), (p), ((p) == NULL ? 0 : strlen(p))) 16640b1a6e6Sjoerg #define archive_wstrcpy(as,p) \ 16740b1a6e6Sjoerg archive_wstrncpy((as), (p), ((p) == NULL ? 0 : wcslen(p))) 16840b1a6e6Sjoerg #define archive_strcpy_l(as,p,lo) \ 16940b1a6e6Sjoerg archive_strncpy_l((as), (p), ((p) == NULL ? 0 : strlen(p)), (lo)) 17009ade360Sjoerg 17109ade360Sjoerg /* Copy a C string to an archive_string with limit, resizing as necessary. */ 17209ade360Sjoerg #define archive_strncpy(as,p,l) \ 17309ade360Sjoerg ((as)->length=0, archive_strncat((as), (p), (l))) 17440b1a6e6Sjoerg #define archive_wstrncpy(as,p,l) \ 17540b1a6e6Sjoerg ((as)->length = 0, archive_wstrncat((as), (p), (l))) 17609ade360Sjoerg 17709ade360Sjoerg /* Return length of string. */ 17809ade360Sjoerg #define archive_strlen(a) ((a)->length) 17909ade360Sjoerg 18009ade360Sjoerg /* Set string length to zero. */ 18109ade360Sjoerg #define archive_string_empty(a) ((a)->length = 0) 18240b1a6e6Sjoerg #define archive_wstring_empty(a) ((a)->length = 0) 18309ade360Sjoerg 18409ade360Sjoerg /* Release any allocated storage resources. */ 18540b1a6e6Sjoerg void archive_string_free(struct archive_string *); 18640b1a6e6Sjoerg void archive_wstring_free(struct archive_wstring *); 18709ade360Sjoerg 18809ade360Sjoerg /* Like 'vsprintf', but resizes the underlying string as necessary. */ 18940b1a6e6Sjoerg /* Note: This only implements a small subset of standard printf functionality. */ 19040b1a6e6Sjoerg void archive_string_vsprintf(struct archive_string *, const char *, 19140b1a6e6Sjoerg va_list) __LA_PRINTF(2, 0); 19240b1a6e6Sjoerg void archive_string_sprintf(struct archive_string *, const char *, ...) 19340b1a6e6Sjoerg __LA_PRINTF(2, 3); 19409ade360Sjoerg 19540b1a6e6Sjoerg /* Translates from MBS to Unicode. */ 19640b1a6e6Sjoerg /* Returns non-zero if conversion failed in any way. */ 19740b1a6e6Sjoerg int archive_wstring_append_from_mbs(struct archive_wstring *dest, 19840b1a6e6Sjoerg const char *, size_t); 19909ade360Sjoerg 20040b1a6e6Sjoerg 20140b1a6e6Sjoerg /* A "multistring" can hold Unicode, UTF8, or MBS versions of 20240b1a6e6Sjoerg * the string. If you set and read the same version, no translation 20340b1a6e6Sjoerg * is done. If you set and read different versions, the library 20440b1a6e6Sjoerg * will attempt to transparently convert. 20540b1a6e6Sjoerg */ 20640b1a6e6Sjoerg struct archive_mstring { 20740b1a6e6Sjoerg struct archive_string aes_mbs; 20840b1a6e6Sjoerg struct archive_string aes_utf8; 20940b1a6e6Sjoerg struct archive_wstring aes_wcs; 21040b1a6e6Sjoerg struct archive_string aes_mbs_in_locale; 21140b1a6e6Sjoerg /* Bitmap of which of the above are valid. Because we're lazy 21240b1a6e6Sjoerg * about malloc-ing and reusing the underlying storage, we 21340b1a6e6Sjoerg * can't rely on NULL pointers to indicate whether a string 21440b1a6e6Sjoerg * has been set. */ 21540b1a6e6Sjoerg int aes_set; 21640b1a6e6Sjoerg #define AES_SET_MBS 1 21740b1a6e6Sjoerg #define AES_SET_UTF8 2 21840b1a6e6Sjoerg #define AES_SET_WCS 4 21940b1a6e6Sjoerg }; 22040b1a6e6Sjoerg 22140b1a6e6Sjoerg void archive_mstring_clean(struct archive_mstring *); 22240b1a6e6Sjoerg void archive_mstring_copy(struct archive_mstring *dest, struct archive_mstring *src); 22340b1a6e6Sjoerg int archive_mstring_get_mbs(struct archive *, struct archive_mstring *, const char **); 22440b1a6e6Sjoerg int archive_mstring_get_utf8(struct archive *, struct archive_mstring *, const char **); 22540b1a6e6Sjoerg int archive_mstring_get_wcs(struct archive *, struct archive_mstring *, const wchar_t **); 226*65e637abSchristos int archive_mstring_get_mbs_l(struct archive *, struct archive_mstring *, const char **, 22740b1a6e6Sjoerg size_t *, struct archive_string_conv *); 22840b1a6e6Sjoerg int archive_mstring_copy_mbs(struct archive_mstring *, const char *mbs); 22940b1a6e6Sjoerg int archive_mstring_copy_mbs_len(struct archive_mstring *, const char *mbs, 23040b1a6e6Sjoerg size_t); 23140b1a6e6Sjoerg int archive_mstring_copy_utf8(struct archive_mstring *, const char *utf8); 23240b1a6e6Sjoerg int archive_mstring_copy_wcs(struct archive_mstring *, const wchar_t *wcs); 23340b1a6e6Sjoerg int archive_mstring_copy_wcs_len(struct archive_mstring *, 23440b1a6e6Sjoerg const wchar_t *wcs, size_t); 23540b1a6e6Sjoerg int archive_mstring_copy_mbs_len_l(struct archive_mstring *, 23640b1a6e6Sjoerg const char *mbs, size_t, struct archive_string_conv *); 23740b1a6e6Sjoerg int archive_mstring_update_utf8(struct archive *, struct archive_mstring *aes, const char *utf8); 23809ade360Sjoerg 23909ade360Sjoerg 24009ade360Sjoerg #endif 241