xref: /netbsd-src/external/bsd/libarchive/dist/libarchive/archive_string.h (revision 65e637ab3a9cc7c3e7749c941a1011ecd65517e6)
109ade360Sjoerg /*-
240b1a6e6Sjoerg  * Copyright (c) 2003-2010 Tim Kientzle
309ade360Sjoerg  * All rights reserved.
409ade360Sjoerg  *
509ade360Sjoerg  * Redistribution and use in source and binary forms, with or without
609ade360Sjoerg  * modification, are permitted provided that the following conditions
709ade360Sjoerg  * are met:
809ade360Sjoerg  * 1. Redistributions of source code must retain the above copyright
909ade360Sjoerg  *    notice, this list of conditions and the following disclaimer.
1009ade360Sjoerg  * 2. Redistributions in binary form must reproduce the above copyright
1109ade360Sjoerg  *    notice, this list of conditions and the following disclaimer in the
1209ade360Sjoerg  *    documentation and/or other materials provided with the distribution.
1309ade360Sjoerg  *
1409ade360Sjoerg  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
1509ade360Sjoerg  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
1609ade360Sjoerg  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
1709ade360Sjoerg  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
1809ade360Sjoerg  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
1909ade360Sjoerg  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2009ade360Sjoerg  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2109ade360Sjoerg  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2209ade360Sjoerg  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
2309ade360Sjoerg  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2409ade360Sjoerg  */
2509ade360Sjoerg 
26*65e637abSchristos #ifndef ARCHIVE_STRING_H_INCLUDED
27*65e637abSchristos #define ARCHIVE_STRING_H_INCLUDED
28*65e637abSchristos 
299fde5391Sjoerg #ifndef __LIBARCHIVE_BUILD
3040b1a6e6Sjoerg #ifndef __LIBARCHIVE_TEST
319fde5391Sjoerg #error This header is only to be used internally to libarchive.
329fde5391Sjoerg #endif
3340b1a6e6Sjoerg #endif
349fde5391Sjoerg 
3509ade360Sjoerg #include <stdarg.h>
3609ade360Sjoerg #ifdef HAVE_STDLIB_H
3709ade360Sjoerg #include <stdlib.h>  /* required for wchar_t on some systems */
3809ade360Sjoerg #endif
3909ade360Sjoerg #ifdef HAVE_STRING_H
4009ade360Sjoerg #include <string.h>
4109ade360Sjoerg #endif
4209ade360Sjoerg #ifdef HAVE_WCHAR_H
4309ade360Sjoerg #include <wchar.h>
4409ade360Sjoerg #endif
4509ade360Sjoerg 
4640b1a6e6Sjoerg #include "archive.h"
4740b1a6e6Sjoerg 
4809ade360Sjoerg /*
4940b1a6e6Sjoerg  * Basic resizable/reusable string support similar to Java's "StringBuffer."
5009ade360Sjoerg  *
5109ade360Sjoerg  * Unlike sbuf(9), the buffers here are fully reusable and track the
5209ade360Sjoerg  * length throughout.
5309ade360Sjoerg  */
5409ade360Sjoerg 
5509ade360Sjoerg struct archive_string {
5609ade360Sjoerg 	char	*s;  /* Pointer to the storage */
5740b1a6e6Sjoerg 	size_t	 length; /* Length of 's' in characters */
5840b1a6e6Sjoerg 	size_t	 buffer_length; /* Length of malloc-ed storage in bytes. */
5909ade360Sjoerg };
6009ade360Sjoerg 
6140b1a6e6Sjoerg struct archive_wstring {
6240b1a6e6Sjoerg 	wchar_t	*s;  /* Pointer to the storage */
6340b1a6e6Sjoerg 	size_t	 length; /* Length of 's' in characters */
6440b1a6e6Sjoerg 	size_t	 buffer_length; /* Length of malloc-ed storage in bytes. */
6540b1a6e6Sjoerg };
6640b1a6e6Sjoerg 
6740b1a6e6Sjoerg struct archive_string_conv;
6840b1a6e6Sjoerg 
6909ade360Sjoerg /* Initialize an archive_string object on the stack or elsewhere. */
7009ade360Sjoerg #define	archive_string_init(a)	\
7109ade360Sjoerg 	do { (a)->s = NULL; (a)->length = 0; (a)->buffer_length = 0; } while(0)
7209ade360Sjoerg 
7309ade360Sjoerg /* Append a C char to an archive_string, resizing as necessary. */
7409ade360Sjoerg struct archive_string *
7540b1a6e6Sjoerg archive_strappend_char(struct archive_string *, char);
7609ade360Sjoerg 
7740b1a6e6Sjoerg /* Ditto for a wchar_t and an archive_wstring. */
7840b1a6e6Sjoerg struct archive_wstring *
7940b1a6e6Sjoerg archive_wstrappend_wchar(struct archive_wstring *, wchar_t);
8009ade360Sjoerg 
8140b1a6e6Sjoerg /* Append a raw array to an archive_string, resizing as necessary */
8209ade360Sjoerg struct archive_string *
8340b1a6e6Sjoerg archive_array_append(struct archive_string *, const char *, size_t);
8409ade360Sjoerg 
8540b1a6e6Sjoerg /* Convert a Unicode string to current locale and append the result. */
8640b1a6e6Sjoerg /* Returns -1 if conversion fails. */
8740b1a6e6Sjoerg int
8840b1a6e6Sjoerg archive_string_append_from_wcs(struct archive_string *, const wchar_t *, size_t);
8940b1a6e6Sjoerg 
9040b1a6e6Sjoerg 
9140b1a6e6Sjoerg /* Create a string conversion object.
9240b1a6e6Sjoerg  * Return NULL and set a error message if the conversion is not supported
9340b1a6e6Sjoerg  * on the platform. */
9440b1a6e6Sjoerg struct archive_string_conv *
9540b1a6e6Sjoerg archive_string_conversion_to_charset(struct archive *, const char *, int);
9640b1a6e6Sjoerg struct archive_string_conv *
9740b1a6e6Sjoerg archive_string_conversion_from_charset(struct archive *, const char *, int);
9840b1a6e6Sjoerg /* Create the default string conversion object for reading/writing an archive.
9940b1a6e6Sjoerg  * Return NULL if the conversion is unneeded.
10040b1a6e6Sjoerg  * Note: On non Windows platform this always returns NULL.
10140b1a6e6Sjoerg  */
10240b1a6e6Sjoerg struct archive_string_conv *
10340b1a6e6Sjoerg archive_string_default_conversion_for_read(struct archive *);
10440b1a6e6Sjoerg struct archive_string_conv *
10540b1a6e6Sjoerg archive_string_default_conversion_for_write(struct archive *);
10640b1a6e6Sjoerg /* Dispose of a string conversion object. */
10740b1a6e6Sjoerg void
10840b1a6e6Sjoerg archive_string_conversion_free(struct archive *);
10940b1a6e6Sjoerg const char *
11040b1a6e6Sjoerg archive_string_conversion_charset_name(struct archive_string_conv *);
11140b1a6e6Sjoerg void
11240b1a6e6Sjoerg archive_string_conversion_set_opt(struct archive_string_conv *, int);
11340b1a6e6Sjoerg #define SCONV_SET_OPT_UTF8_LIBARCHIVE2X	1
11440b1a6e6Sjoerg #define SCONV_SET_OPT_NORMALIZATION_C	2
11540b1a6e6Sjoerg #define SCONV_SET_OPT_NORMALIZATION_D	4
11640b1a6e6Sjoerg 
11740b1a6e6Sjoerg 
11840b1a6e6Sjoerg /* Copy one archive_string to another in locale conversion.
11940b1a6e6Sjoerg  * Return -1 if conversion fails. */
12040b1a6e6Sjoerg int
12140b1a6e6Sjoerg archive_strncpy_l(struct archive_string *, const void *, size_t,
12240b1a6e6Sjoerg     struct archive_string_conv *);
12340b1a6e6Sjoerg 
12440b1a6e6Sjoerg /* Copy one archive_string to another in locale conversion.
12540b1a6e6Sjoerg  * Return -1 if conversion fails. */
12640b1a6e6Sjoerg int
12740b1a6e6Sjoerg archive_strncat_l(struct archive_string *, const void *, size_t,
12840b1a6e6Sjoerg     struct archive_string_conv *);
12940b1a6e6Sjoerg 
13009ade360Sjoerg 
13109ade360Sjoerg /* Copy one archive_string to another */
13209ade360Sjoerg #define	archive_string_copy(dest, src) \
13340b1a6e6Sjoerg 	((dest)->length = 0, archive_string_concat((dest), (src)))
13440b1a6e6Sjoerg #define	archive_wstring_copy(dest, src) \
13540b1a6e6Sjoerg 	((dest)->length = 0, archive_wstring_concat((dest), (src)))
13609ade360Sjoerg 
1379fde5391Sjoerg /* Concatenate one archive_string to another */
13840b1a6e6Sjoerg void archive_string_concat(struct archive_string *dest, struct archive_string *src);
13940b1a6e6Sjoerg void archive_wstring_concat(struct archive_wstring *dest, struct archive_wstring *src);
1409fde5391Sjoerg 
14109ade360Sjoerg /* Ensure that the underlying buffer is at least as large as the request. */
14209ade360Sjoerg struct archive_string *
14340b1a6e6Sjoerg archive_string_ensure(struct archive_string *, size_t);
14440b1a6e6Sjoerg struct archive_wstring *
14540b1a6e6Sjoerg archive_wstring_ensure(struct archive_wstring *, size_t);
14609ade360Sjoerg 
14709ade360Sjoerg /* Append C string, which may lack trailing \0. */
1489fde5391Sjoerg /* The source is declared void * here because this gets used with
1499fde5391Sjoerg  * "signed char *", "unsigned char *" and "char *" arguments.
1509fde5391Sjoerg  * Declaring it "char *" as with some of the other functions just
1519fde5391Sjoerg  * leads to a lot of extra casts. */
15209ade360Sjoerg struct archive_string *
15340b1a6e6Sjoerg archive_strncat(struct archive_string *, const void *, size_t);
15440b1a6e6Sjoerg struct archive_wstring *
15540b1a6e6Sjoerg archive_wstrncat(struct archive_wstring *, const wchar_t *, size_t);
15609ade360Sjoerg 
15709ade360Sjoerg /* Append a C string to an archive_string, resizing as necessary. */
15840b1a6e6Sjoerg struct archive_string *
15940b1a6e6Sjoerg archive_strcat(struct archive_string *, const void *);
16040b1a6e6Sjoerg struct archive_wstring *
16140b1a6e6Sjoerg archive_wstrcat(struct archive_wstring *, const wchar_t *);
16209ade360Sjoerg 
16309ade360Sjoerg /* Copy a C string to an archive_string, resizing as necessary. */
16409ade360Sjoerg #define	archive_strcpy(as,p) \
16540b1a6e6Sjoerg 	archive_strncpy((as), (p), ((p) == NULL ? 0 : strlen(p)))
16640b1a6e6Sjoerg #define	archive_wstrcpy(as,p) \
16740b1a6e6Sjoerg 	archive_wstrncpy((as), (p), ((p) == NULL ? 0 : wcslen(p)))
16840b1a6e6Sjoerg #define	archive_strcpy_l(as,p,lo) \
16940b1a6e6Sjoerg 	archive_strncpy_l((as), (p), ((p) == NULL ? 0 : strlen(p)), (lo))
17009ade360Sjoerg 
17109ade360Sjoerg /* Copy a C string to an archive_string with limit, resizing as necessary. */
17209ade360Sjoerg #define	archive_strncpy(as,p,l) \
17309ade360Sjoerg 	((as)->length=0, archive_strncat((as), (p), (l)))
17440b1a6e6Sjoerg #define	archive_wstrncpy(as,p,l) \
17540b1a6e6Sjoerg 	((as)->length = 0, archive_wstrncat((as), (p), (l)))
17609ade360Sjoerg 
17709ade360Sjoerg /* Return length of string. */
17809ade360Sjoerg #define	archive_strlen(a) ((a)->length)
17909ade360Sjoerg 
18009ade360Sjoerg /* Set string length to zero. */
18109ade360Sjoerg #define	archive_string_empty(a) ((a)->length = 0)
18240b1a6e6Sjoerg #define	archive_wstring_empty(a) ((a)->length = 0)
18309ade360Sjoerg 
18409ade360Sjoerg /* Release any allocated storage resources. */
18540b1a6e6Sjoerg void	archive_string_free(struct archive_string *);
18640b1a6e6Sjoerg void	archive_wstring_free(struct archive_wstring *);
18709ade360Sjoerg 
18809ade360Sjoerg /* Like 'vsprintf', but resizes the underlying string as necessary. */
18940b1a6e6Sjoerg /* Note: This only implements a small subset of standard printf functionality. */
19040b1a6e6Sjoerg void	archive_string_vsprintf(struct archive_string *, const char *,
19140b1a6e6Sjoerg 	    va_list) __LA_PRINTF(2, 0);
19240b1a6e6Sjoerg void	archive_string_sprintf(struct archive_string *, const char *, ...)
19340b1a6e6Sjoerg 	    __LA_PRINTF(2, 3);
19409ade360Sjoerg 
19540b1a6e6Sjoerg /* Translates from MBS to Unicode. */
19640b1a6e6Sjoerg /* Returns non-zero if conversion failed in any way. */
19740b1a6e6Sjoerg int archive_wstring_append_from_mbs(struct archive_wstring *dest,
19840b1a6e6Sjoerg     const char *, size_t);
19909ade360Sjoerg 
20040b1a6e6Sjoerg 
20140b1a6e6Sjoerg /* A "multistring" can hold Unicode, UTF8, or MBS versions of
20240b1a6e6Sjoerg  * the string.  If you set and read the same version, no translation
20340b1a6e6Sjoerg  * is done.  If you set and read different versions, the library
20440b1a6e6Sjoerg  * will attempt to transparently convert.
20540b1a6e6Sjoerg  */
20640b1a6e6Sjoerg struct archive_mstring {
20740b1a6e6Sjoerg 	struct archive_string aes_mbs;
20840b1a6e6Sjoerg 	struct archive_string aes_utf8;
20940b1a6e6Sjoerg 	struct archive_wstring aes_wcs;
21040b1a6e6Sjoerg 	struct archive_string aes_mbs_in_locale;
21140b1a6e6Sjoerg 	/* Bitmap of which of the above are valid.  Because we're lazy
21240b1a6e6Sjoerg 	 * about malloc-ing and reusing the underlying storage, we
21340b1a6e6Sjoerg 	 * can't rely on NULL pointers to indicate whether a string
21440b1a6e6Sjoerg 	 * has been set. */
21540b1a6e6Sjoerg 	int aes_set;
21640b1a6e6Sjoerg #define	AES_SET_MBS 1
21740b1a6e6Sjoerg #define	AES_SET_UTF8 2
21840b1a6e6Sjoerg #define	AES_SET_WCS 4
21940b1a6e6Sjoerg };
22040b1a6e6Sjoerg 
22140b1a6e6Sjoerg void	archive_mstring_clean(struct archive_mstring *);
22240b1a6e6Sjoerg void	archive_mstring_copy(struct archive_mstring *dest, struct archive_mstring *src);
22340b1a6e6Sjoerg int archive_mstring_get_mbs(struct archive *, struct archive_mstring *, const char **);
22440b1a6e6Sjoerg int archive_mstring_get_utf8(struct archive *, struct archive_mstring *, const char **);
22540b1a6e6Sjoerg int archive_mstring_get_wcs(struct archive *, struct archive_mstring *, const wchar_t **);
226*65e637abSchristos int	archive_mstring_get_mbs_l(struct archive *, struct archive_mstring *, const char **,
22740b1a6e6Sjoerg 	    size_t *, struct archive_string_conv *);
22840b1a6e6Sjoerg int	archive_mstring_copy_mbs(struct archive_mstring *, const char *mbs);
22940b1a6e6Sjoerg int	archive_mstring_copy_mbs_len(struct archive_mstring *, const char *mbs,
23040b1a6e6Sjoerg 	    size_t);
23140b1a6e6Sjoerg int	archive_mstring_copy_utf8(struct archive_mstring *, const char *utf8);
23240b1a6e6Sjoerg int	archive_mstring_copy_wcs(struct archive_mstring *, const wchar_t *wcs);
23340b1a6e6Sjoerg int	archive_mstring_copy_wcs_len(struct archive_mstring *,
23440b1a6e6Sjoerg 	    const wchar_t *wcs, size_t);
23540b1a6e6Sjoerg int	archive_mstring_copy_mbs_len_l(struct archive_mstring *,
23640b1a6e6Sjoerg 	    const char *mbs, size_t, struct archive_string_conv *);
23740b1a6e6Sjoerg int     archive_mstring_update_utf8(struct archive *, struct archive_mstring *aes, const char *utf8);
23809ade360Sjoerg 
23909ade360Sjoerg 
24009ade360Sjoerg #endif
241