1*0a6a1f1dSLionel Sambuc /* $NetBSD: chartype.h,v 1.15 2015/05/17 13:14:41 christos Exp $ */
23e1db26aSLionel Sambuc
33e1db26aSLionel Sambuc /*-
43e1db26aSLionel Sambuc * Copyright (c) 2009 The NetBSD Foundation, Inc.
53e1db26aSLionel Sambuc * All rights reserved.
63e1db26aSLionel Sambuc *
73e1db26aSLionel Sambuc * Redistribution and use in source and binary forms, with or without
83e1db26aSLionel Sambuc * modification, are permitted provided that the following conditions
93e1db26aSLionel Sambuc * are met:
103e1db26aSLionel Sambuc * 1. Redistributions of source code must retain the above copyright
113e1db26aSLionel Sambuc * notice, this list of conditions and the following disclaimer.
123e1db26aSLionel Sambuc * 2. Redistributions in binary form must reproduce the above copyright
133e1db26aSLionel Sambuc * notice, this list of conditions and the following disclaimer in the
143e1db26aSLionel Sambuc * documentation and/or other materials provided with the distribution.
153e1db26aSLionel Sambuc * 3. All advertising materials mentioning features or use of this software
163e1db26aSLionel Sambuc * must display the following acknowledgement:
173e1db26aSLionel Sambuc * This product includes software developed by the NetBSD
183e1db26aSLionel Sambuc * Foundation, Inc. and its contributors.
193e1db26aSLionel Sambuc * 4. Neither the name of The NetBSD Foundation nor the names of its
203e1db26aSLionel Sambuc * contributors may be used to endorse or promote products derived
213e1db26aSLionel Sambuc * from this software without specific prior written permission.
223e1db26aSLionel Sambuc *
233e1db26aSLionel Sambuc * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
243e1db26aSLionel Sambuc * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
253e1db26aSLionel Sambuc * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
263e1db26aSLionel Sambuc * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
273e1db26aSLionel Sambuc * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
283e1db26aSLionel Sambuc * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
293e1db26aSLionel Sambuc * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
303e1db26aSLionel Sambuc * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
313e1db26aSLionel Sambuc * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
323e1db26aSLionel Sambuc * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
333e1db26aSLionel Sambuc * POSSIBILITY OF SUCH DAMAGE.
343e1db26aSLionel Sambuc */
353e1db26aSLionel Sambuc
363e1db26aSLionel Sambuc #ifndef _h_chartype_f
373e1db26aSLionel Sambuc #define _h_chartype_f
383e1db26aSLionel Sambuc
393e1db26aSLionel Sambuc
403e1db26aSLionel Sambuc
413e1db26aSLionel Sambuc #ifdef WIDECHAR
423e1db26aSLionel Sambuc
433e1db26aSLionel Sambuc /* Ideally we should also test the value of the define to see if it
443e1db26aSLionel Sambuc * supports non-BMP code points without requiring UTF-16, but nothing
453e1db26aSLionel Sambuc * seems to actually advertise this properly, despite Unicode 3.1 having
463e1db26aSLionel Sambuc * been around since 2001... */
47*0a6a1f1dSLionel Sambuc #if !defined(__minix)
48*0a6a1f1dSLionel Sambuc #if !defined(__NetBSD__) && !defined(__sun) && !(defined(__APPLE__) && defined(__MACH__)) && !defined(__OpenBSD__) && !defined(__FreeBSD__)
493e1db26aSLionel Sambuc #ifndef __STDC_ISO_10646__
503e1db26aSLionel Sambuc /* In many places it is assumed that the first 127 code points are ASCII
513e1db26aSLionel Sambuc * compatible, so ensure wchar_t indeed does ISO 10646 and not some other
523e1db26aSLionel Sambuc * funky encoding that could break us in weird and wonderful ways. */
533e1db26aSLionel Sambuc #error wchar_t must store ISO 10646 characters
543e1db26aSLionel Sambuc #endif
553e1db26aSLionel Sambuc #endif
56*0a6a1f1dSLionel Sambuc #endif /* !defined(__minix) */
573e1db26aSLionel Sambuc
583e1db26aSLionel Sambuc /* Oh for a <uchar.h> with char32_t and __STDC_UTF_32__ in it...
593e1db26aSLionel Sambuc * ref: ISO/IEC DTR 19769
603e1db26aSLionel Sambuc */
613e1db26aSLionel Sambuc #if WCHAR_MAX < INT32_MAX
623e1db26aSLionel Sambuc #warning Build environment does not support non-BMP characters
633e1db26aSLionel Sambuc #endif
643e1db26aSLionel Sambuc
653e1db26aSLionel Sambuc #define ct_mbtowc mbtowc
663e1db26aSLionel Sambuc #define ct_mbtowc_reset mbtowc(0,0,(size_t)0)
673e1db26aSLionel Sambuc #define ct_wctomb wctomb
683e1db26aSLionel Sambuc #define ct_wctomb_reset wctomb(0,0)
693e1db26aSLionel Sambuc #define ct_wcstombs wcstombs
703e1db26aSLionel Sambuc #define ct_mbstowcs mbstowcs
713e1db26aSLionel Sambuc
723e1db26aSLionel Sambuc #define Char wchar_t
733e1db26aSLionel Sambuc #define Int wint_t
743e1db26aSLionel Sambuc #define FUN(prefix,rest) prefix ## _w ## rest
753e1db26aSLionel Sambuc #define FUNW(type) type ## _w
763e1db26aSLionel Sambuc #define TYPE(type) type ## W
77*0a6a1f1dSLionel Sambuc #define FCHAR "%lc"
783e1db26aSLionel Sambuc #define FSTR "%ls"
793e1db26aSLionel Sambuc #define STR(x) L ## x
803e1db26aSLionel Sambuc #define UC(c) c
813e1db26aSLionel Sambuc #define Isalpha(x) iswalpha(x)
823e1db26aSLionel Sambuc #define Isalnum(x) iswalnum(x)
833e1db26aSLionel Sambuc #define Isgraph(x) iswgraph(x)
843e1db26aSLionel Sambuc #define Isspace(x) iswspace(x)
853e1db26aSLionel Sambuc #define Isdigit(x) iswdigit(x)
863e1db26aSLionel Sambuc #define Iscntrl(x) iswcntrl(x)
873e1db26aSLionel Sambuc #define Isprint(x) iswprint(x)
883e1db26aSLionel Sambuc
893e1db26aSLionel Sambuc #define Isupper(x) iswupper(x)
903e1db26aSLionel Sambuc #define Islower(x) iswlower(x)
913e1db26aSLionel Sambuc #define Toupper(x) towupper(x)
923e1db26aSLionel Sambuc #define Tolower(x) towlower(x)
933e1db26aSLionel Sambuc
943e1db26aSLionel Sambuc #define IsASCII(x) (x < 0x100)
953e1db26aSLionel Sambuc
963e1db26aSLionel Sambuc #define Strlen(x) wcslen(x)
973e1db26aSLionel Sambuc #define Strchr(s,c) wcschr(s,c)
983e1db26aSLionel Sambuc #define Strrchr(s,c) wcsrchr(s,c)
993e1db26aSLionel Sambuc #define Strstr(s,v) wcsstr(s,v)
1003e1db26aSLionel Sambuc #define Strdup(x) wcsdup(x)
1013e1db26aSLionel Sambuc #define Strcpy(d,s) wcscpy(d,s)
1023e1db26aSLionel Sambuc #define Strncpy(d,s,n) wcsncpy(d,s,n)
1033e1db26aSLionel Sambuc #define Strncat(d,s,n) wcsncat(d,s,n)
1043e1db26aSLionel Sambuc
1053e1db26aSLionel Sambuc #define Strcmp(s,v) wcscmp(s,v)
1063e1db26aSLionel Sambuc #define Strncmp(s,v,n) wcsncmp(s,v,n)
1073e1db26aSLionel Sambuc #define Strcspn(s,r) wcscspn(s,r)
1083e1db26aSLionel Sambuc
1093e1db26aSLionel Sambuc #define Strtol(p,e,b) wcstol(p,e,b)
1103e1db26aSLionel Sambuc
1113e1db26aSLionel Sambuc static inline int
Width(wchar_t c)1123e1db26aSLionel Sambuc Width(wchar_t c)
1133e1db26aSLionel Sambuc {
1143e1db26aSLionel Sambuc int w = wcwidth(c);
1153e1db26aSLionel Sambuc return w < 0 ? 0 : w;
1163e1db26aSLionel Sambuc }
1173e1db26aSLionel Sambuc
1183e1db26aSLionel Sambuc #else /* NARROW */
1193e1db26aSLionel Sambuc
1203e1db26aSLionel Sambuc #define ct_mbtowc error
1213e1db26aSLionel Sambuc #define ct_mbtowc_reset
1223e1db26aSLionel Sambuc #define ct_wctomb error
1233e1db26aSLionel Sambuc #define ct_wctomb_reset
1243e1db26aSLionel Sambuc #define ct_wcstombs(a, b, c) (strncpy(a, b, c), strlen(a))
1253e1db26aSLionel Sambuc #define ct_mbstowcs(a, b, c) (strncpy(a, b, c), strlen(a))
1263e1db26aSLionel Sambuc
1273e1db26aSLionel Sambuc #define Char char
1283e1db26aSLionel Sambuc #define Int int
1293e1db26aSLionel Sambuc #define FUN(prefix,rest) prefix ## _ ## rest
1303e1db26aSLionel Sambuc #define FUNW(type) type
1313e1db26aSLionel Sambuc #define TYPE(type) type
132*0a6a1f1dSLionel Sambuc #define FCHAR "%c"
1333e1db26aSLionel Sambuc #define FSTR "%s"
1343e1db26aSLionel Sambuc #define STR(x) x
1353e1db26aSLionel Sambuc #define UC(c) (unsigned char)(c)
1363e1db26aSLionel Sambuc
1373e1db26aSLionel Sambuc #define Isalpha(x) isalpha((unsigned char)x)
1383e1db26aSLionel Sambuc #define Isalnum(x) isalnum((unsigned char)x)
1393e1db26aSLionel Sambuc #define Isgraph(x) isgraph((unsigned char)x)
1403e1db26aSLionel Sambuc #define Isspace(x) isspace((unsigned char)x)
1413e1db26aSLionel Sambuc #define Isdigit(x) isdigit((unsigned char)x)
1423e1db26aSLionel Sambuc #define Iscntrl(x) iscntrl((unsigned char)x)
1433e1db26aSLionel Sambuc #define Isprint(x) isprint((unsigned char)x)
1443e1db26aSLionel Sambuc
1453e1db26aSLionel Sambuc #define Isupper(x) isupper((unsigned char)x)
1463e1db26aSLionel Sambuc #define Islower(x) islower((unsigned char)x)
1473e1db26aSLionel Sambuc #define Toupper(x) toupper((unsigned char)x)
1483e1db26aSLionel Sambuc #define Tolower(x) tolower((unsigned char)x)
1493e1db26aSLionel Sambuc
1503e1db26aSLionel Sambuc #define IsASCII(x) isascii((unsigned char)x)
1513e1db26aSLionel Sambuc
1523e1db26aSLionel Sambuc #define Strlen(x) strlen(x)
1533e1db26aSLionel Sambuc #define Strchr(s,c) strchr(s,c)
1543e1db26aSLionel Sambuc #define Strrchr(s,c) strrchr(s,c)
1553e1db26aSLionel Sambuc #define Strstr(s,v) strstr(s,v)
1563e1db26aSLionel Sambuc #define Strdup(x) strdup(x)
1573e1db26aSLionel Sambuc #define Strcpy(d,s) strcpy(d,s)
1583e1db26aSLionel Sambuc #define Strncpy(d,s,n) strncpy(d,s,n)
1593e1db26aSLionel Sambuc #define Strncat(d,s,n) strncat(d,s,n)
1603e1db26aSLionel Sambuc
1613e1db26aSLionel Sambuc #define Strcmp(s,v) strcmp(s,v)
1623e1db26aSLionel Sambuc #define Strncmp(s,v,n) strncmp(s,v,n)
1633e1db26aSLionel Sambuc #define Strcspn(s,r) strcspn(s,r)
1643e1db26aSLionel Sambuc
1653e1db26aSLionel Sambuc #define Strtol(p,e,b) strtol(p,e,b)
1663e1db26aSLionel Sambuc
1673e1db26aSLionel Sambuc #define Width(c) 1
1683e1db26aSLionel Sambuc
1693e1db26aSLionel Sambuc #endif
1703e1db26aSLionel Sambuc
1713e1db26aSLionel Sambuc
1723e1db26aSLionel Sambuc #ifdef WIDECHAR
1733e1db26aSLionel Sambuc /*
1743e1db26aSLionel Sambuc * Conversion buffer
1753e1db26aSLionel Sambuc */
1763e1db26aSLionel Sambuc typedef struct ct_buffer_t {
1773e1db26aSLionel Sambuc char *cbuff;
1783e1db26aSLionel Sambuc size_t csize;
1793e1db26aSLionel Sambuc Char *wbuff;
1803e1db26aSLionel Sambuc size_t wsize;
1813e1db26aSLionel Sambuc } ct_buffer_t;
1823e1db26aSLionel Sambuc
1833e1db26aSLionel Sambuc #define ct_encode_string __ct_encode_string
1843e1db26aSLionel Sambuc /* Encode a wide-character string and return the UTF-8 encoded result. */
1853e1db26aSLionel Sambuc public char *ct_encode_string(const Char *, ct_buffer_t *);
1863e1db26aSLionel Sambuc
1873e1db26aSLionel Sambuc #define ct_decode_string __ct_decode_string
1883e1db26aSLionel Sambuc /* Decode a (multi)?byte string and return the wide-character string result. */
1893e1db26aSLionel Sambuc public Char *ct_decode_string(const char *, ct_buffer_t *);
1903e1db26aSLionel Sambuc
1913e1db26aSLionel Sambuc /* Decode a (multi)?byte argv string array.
1923e1db26aSLionel Sambuc * The pointer returned must be free()d when done. */
1933e1db26aSLionel Sambuc protected Char **ct_decode_argv(int, const char *[], ct_buffer_t *);
1943e1db26aSLionel Sambuc
1953e1db26aSLionel Sambuc /* Resizes the conversion buffer(s) if needed. */
196*0a6a1f1dSLionel Sambuc protected int ct_conv_cbuff_resize(ct_buffer_t *, size_t);
197*0a6a1f1dSLionel Sambuc protected int ct_conv_wbuff_resize(ct_buffer_t *, size_t);
1983e1db26aSLionel Sambuc protected ssize_t ct_encode_char(char *, size_t, Char);
1993e1db26aSLionel Sambuc protected size_t ct_enc_width(Char);
2003e1db26aSLionel Sambuc
2013e1db26aSLionel Sambuc #define ct_free_argv(s) el_free(s)
2023e1db26aSLionel Sambuc
2033e1db26aSLionel Sambuc #else
2043e1db26aSLionel Sambuc #define ct_encode_string(s, b) (s)
2053e1db26aSLionel Sambuc #define ct_decode_string(s, b) (s)
2063e1db26aSLionel Sambuc #define ct_decode_argv(l, s, b) (s)
207*0a6a1f1dSLionel Sambuc #define ct_conv_cbuff_resize(b, s) ((s) == (0))
208*0a6a1f1dSLionel Sambuc #define ct_conv_wbuff_resize(b, s) ((s) == (0))
2093e1db26aSLionel Sambuc #define ct_encode_char(d, l, s) (*d = s, 1)
2103e1db26aSLionel Sambuc #define ct_free_argv(s)
2113e1db26aSLionel Sambuc #endif
2123e1db26aSLionel Sambuc
2133e1db26aSLionel Sambuc #ifndef NARROWCHAR
2143e1db26aSLionel Sambuc /* Encode a characted into the destination buffer, provided there is sufficent
2153e1db26aSLionel Sambuc * buffer space available. Returns the number of bytes used up (zero if the
2163e1db26aSLionel Sambuc * character cannot be encoded, -1 if there was not enough space available). */
2173e1db26aSLionel Sambuc
2183e1db26aSLionel Sambuc /* The maximum buffer size to hold the most unwieldly visual representation,
2193e1db26aSLionel Sambuc * in this case \U+nnnnn. */
2203e1db26aSLionel Sambuc #define VISUAL_WIDTH_MAX ((size_t)8)
2213e1db26aSLionel Sambuc
2223e1db26aSLionel Sambuc /* The terminal is thought of in terms of X columns by Y lines. In the cases
2233e1db26aSLionel Sambuc * where a wide character takes up more than one column, the adjacent
2243e1db26aSLionel Sambuc * occupied column entries will contain this faux character. */
2253e1db26aSLionel Sambuc #define MB_FILL_CHAR ((Char)-1)
2263e1db26aSLionel Sambuc
2273e1db26aSLionel Sambuc /* Visual width of character c, taking into account ^? , \0177 and \U+nnnnn
2283e1db26aSLionel Sambuc * style visual expansions. */
2293e1db26aSLionel Sambuc protected int ct_visual_width(Char);
2303e1db26aSLionel Sambuc
2313e1db26aSLionel Sambuc /* Turn the given character into the appropriate visual format, matching
2323e1db26aSLionel Sambuc * the width given by ct_visual_width(). Returns the number of characters used
2333e1db26aSLionel Sambuc * up, or -1 if insufficient space. Buffer length is in count of Char's. */
2343e1db26aSLionel Sambuc protected ssize_t ct_visual_char(Char *, size_t, Char);
2353e1db26aSLionel Sambuc
2363e1db26aSLionel Sambuc /* Convert the given string into visual format, using the ct_visual_char()
2373e1db26aSLionel Sambuc * function. Uses a static buffer, so not threadsafe. */
2383e1db26aSLionel Sambuc protected const Char *ct_visual_string(const Char *);
2393e1db26aSLionel Sambuc
2403e1db26aSLionel Sambuc
2413e1db26aSLionel Sambuc /* printable character, use ct_visual_width() to find out display width */
2423e1db26aSLionel Sambuc #define CHTYPE_PRINT ( 0)
2433e1db26aSLionel Sambuc /* control character found inside the ASCII portion of the charset */
2443e1db26aSLionel Sambuc #define CHTYPE_ASCIICTL (-1)
2453e1db26aSLionel Sambuc /* a \t */
2463e1db26aSLionel Sambuc #define CHTYPE_TAB (-2)
2473e1db26aSLionel Sambuc /* a \n */
2483e1db26aSLionel Sambuc #define CHTYPE_NL (-3)
2493e1db26aSLionel Sambuc /* non-printable character */
2503e1db26aSLionel Sambuc #define CHTYPE_NONPRINT (-4)
2513e1db26aSLionel Sambuc /* classification of character c, as one of the above defines */
2523e1db26aSLionel Sambuc protected int ct_chr_class(Char c);
2533e1db26aSLionel Sambuc #endif
2543e1db26aSLionel Sambuc
2553e1db26aSLionel Sambuc
2563e1db26aSLionel Sambuc #endif /* _chartype_f */
257