1*60ecde0cSDaniel Fojt /* $NetBSD: chartype.c,v 1.35 2019/07/23 10:18:52 christos Exp $ */
232fe07f8SJohn Marino
332fe07f8SJohn Marino /*-
432fe07f8SJohn Marino * Copyright (c) 2009 The NetBSD Foundation, Inc.
532fe07f8SJohn Marino * All rights reserved.
632fe07f8SJohn Marino *
732fe07f8SJohn Marino * Redistribution and use in source and binary forms, with or without
832fe07f8SJohn Marino * modification, are permitted provided that the following conditions
932fe07f8SJohn Marino * are met:
1032fe07f8SJohn Marino * 1. Redistributions of source code must retain the above copyright
1132fe07f8SJohn Marino * notice, this list of conditions and the following disclaimer.
1232fe07f8SJohn Marino * 2. Redistributions in binary form must reproduce the above copyright
1332fe07f8SJohn Marino * notice, this list of conditions and the following disclaimer in the
1432fe07f8SJohn Marino * documentation and/or other materials provided with the distribution.
1532fe07f8SJohn Marino *
1632fe07f8SJohn Marino * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
1732fe07f8SJohn Marino * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
1832fe07f8SJohn Marino * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
1932fe07f8SJohn Marino * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
2032fe07f8SJohn Marino * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2132fe07f8SJohn Marino * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2232fe07f8SJohn Marino * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2332fe07f8SJohn Marino * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2432fe07f8SJohn Marino * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
2532fe07f8SJohn Marino * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2632fe07f8SJohn Marino * POSSIBILITY OF SUCH DAMAGE.
2732fe07f8SJohn Marino */
2832fe07f8SJohn Marino
2932fe07f8SJohn Marino /*
3032fe07f8SJohn Marino * chartype.c: character classification and meta information
3132fe07f8SJohn Marino */
3232fe07f8SJohn Marino #include "config.h"
3332fe07f8SJohn Marino #if !defined(lint) && !defined(SCCSID)
34*60ecde0cSDaniel Fojt __RCSID("$NetBSD: chartype.c,v 1.35 2019/07/23 10:18:52 christos Exp $");
3532fe07f8SJohn Marino #endif /* not lint && not SCCSID */
3612db70c8Szrj
3712db70c8Szrj #include <ctype.h>
38ae19eda8Szrj #include <limits.h>
3932fe07f8SJohn Marino #include <stdlib.h>
4012db70c8Szrj #include <string.h>
4112db70c8Szrj
4212db70c8Szrj #include "el.h"
4332fe07f8SJohn Marino
4432fe07f8SJohn Marino #define CT_BUFSIZ ((size_t)1024)
4532fe07f8SJohn Marino
4612db70c8Szrj static int ct_conv_cbuff_resize(ct_buffer_t *, size_t);
4712db70c8Szrj static int ct_conv_wbuff_resize(ct_buffer_t *, size_t);
4812db70c8Szrj
4912db70c8Szrj static int
ct_conv_cbuff_resize(ct_buffer_t * conv,size_t csize)50a0c9eb18SJohn Marino ct_conv_cbuff_resize(ct_buffer_t *conv, size_t csize)
5132fe07f8SJohn Marino {
5232fe07f8SJohn Marino void *p;
53a0c9eb18SJohn Marino
54a0c9eb18SJohn Marino if (csize <= conv->csize)
55a0c9eb18SJohn Marino return 0;
56a0c9eb18SJohn Marino
57a0c9eb18SJohn Marino conv->csize = csize;
58a0c9eb18SJohn Marino
5932fe07f8SJohn Marino p = el_realloc(conv->cbuff, conv->csize * sizeof(*conv->cbuff));
6032fe07f8SJohn Marino if (p == NULL) {
6132fe07f8SJohn Marino conv->csize = 0;
6232fe07f8SJohn Marino el_free(conv->cbuff);
6332fe07f8SJohn Marino conv->cbuff = NULL;
64a0c9eb18SJohn Marino return -1;
65a0c9eb18SJohn Marino }
6632fe07f8SJohn Marino conv->cbuff = p;
67a0c9eb18SJohn Marino return 0;
6832fe07f8SJohn Marino }
6932fe07f8SJohn Marino
7012db70c8Szrj static int
ct_conv_wbuff_resize(ct_buffer_t * conv,size_t wsize)71a0c9eb18SJohn Marino ct_conv_wbuff_resize(ct_buffer_t *conv, size_t wsize)
72a0c9eb18SJohn Marino {
73a0c9eb18SJohn Marino void *p;
74a0c9eb18SJohn Marino
75a0c9eb18SJohn Marino if (wsize <= conv->wsize)
76a0c9eb18SJohn Marino return 0;
77a0c9eb18SJohn Marino
78a0c9eb18SJohn Marino conv->wsize = wsize;
79a0c9eb18SJohn Marino
8032fe07f8SJohn Marino p = el_realloc(conv->wbuff, conv->wsize * sizeof(*conv->wbuff));
8132fe07f8SJohn Marino if (p == NULL) {
8232fe07f8SJohn Marino conv->wsize = 0;
8332fe07f8SJohn Marino el_free(conv->wbuff);
8432fe07f8SJohn Marino conv->wbuff = NULL;
85a0c9eb18SJohn Marino return -1;
8632fe07f8SJohn Marino }
87a0c9eb18SJohn Marino conv->wbuff = p;
88a0c9eb18SJohn Marino return 0;
8932fe07f8SJohn Marino }
9032fe07f8SJohn Marino
9132fe07f8SJohn Marino
9212db70c8Szrj char *
ct_encode_string(const wchar_t * s,ct_buffer_t * conv)9312db70c8Szrj ct_encode_string(const wchar_t *s, ct_buffer_t *conv)
9432fe07f8SJohn Marino {
9532fe07f8SJohn Marino char *dst;
96a0c9eb18SJohn Marino ssize_t used;
9732fe07f8SJohn Marino
9832fe07f8SJohn Marino if (!s)
9932fe07f8SJohn Marino return NULL;
10032fe07f8SJohn Marino
10132fe07f8SJohn Marino dst = conv->cbuff;
102a0c9eb18SJohn Marino for (;;) {
103a0c9eb18SJohn Marino used = (ssize_t)(dst - conv->cbuff);
104a0c9eb18SJohn Marino if ((conv->csize - (size_t)used) < 5) {
105a0c9eb18SJohn Marino if (ct_conv_cbuff_resize(conv,
106a0c9eb18SJohn Marino conv->csize + CT_BUFSIZ) == -1)
10732fe07f8SJohn Marino return NULL;
10832fe07f8SJohn Marino dst = conv->cbuff + used;
10932fe07f8SJohn Marino }
110a0c9eb18SJohn Marino if (!*s)
111a0c9eb18SJohn Marino break;
11232fe07f8SJohn Marino used = ct_encode_char(dst, (size_t)5, *s);
11332fe07f8SJohn Marino if (used == -1) /* failed to encode, need more buffer space */
11432fe07f8SJohn Marino abort();
11532fe07f8SJohn Marino ++s;
11632fe07f8SJohn Marino dst += used;
11732fe07f8SJohn Marino }
11832fe07f8SJohn Marino *dst = '\0';
11932fe07f8SJohn Marino return conv->cbuff;
12032fe07f8SJohn Marino }
12132fe07f8SJohn Marino
12212db70c8Szrj wchar_t *
ct_decode_string(const char * s,ct_buffer_t * conv)12332fe07f8SJohn Marino ct_decode_string(const char *s, ct_buffer_t *conv)
12432fe07f8SJohn Marino {
125a0c9eb18SJohn Marino size_t len;
12632fe07f8SJohn Marino
12732fe07f8SJohn Marino if (!s)
12832fe07f8SJohn Marino return NULL;
12932fe07f8SJohn Marino
13012db70c8Szrj len = mbstowcs(NULL, s, (size_t)0);
13132fe07f8SJohn Marino if (len == (size_t)-1)
13232fe07f8SJohn Marino return NULL;
133a0c9eb18SJohn Marino
134a0c9eb18SJohn Marino if (conv->wsize < ++len)
135a0c9eb18SJohn Marino if (ct_conv_wbuff_resize(conv, len + CT_BUFSIZ) == -1)
13632fe07f8SJohn Marino return NULL;
137a0c9eb18SJohn Marino
13812db70c8Szrj mbstowcs(conv->wbuff, s, conv->wsize);
13932fe07f8SJohn Marino return conv->wbuff;
14032fe07f8SJohn Marino }
14132fe07f8SJohn Marino
14232fe07f8SJohn Marino
14312db70c8Szrj libedit_private wchar_t **
ct_decode_argv(int argc,const char * argv[],ct_buffer_t * conv)14432fe07f8SJohn Marino ct_decode_argv(int argc, const char *argv[], ct_buffer_t *conv)
14532fe07f8SJohn Marino {
14632fe07f8SJohn Marino size_t bufspace;
14732fe07f8SJohn Marino int i;
14812db70c8Szrj wchar_t *p;
14912db70c8Szrj wchar_t **wargv;
15032fe07f8SJohn Marino ssize_t bytes;
15132fe07f8SJohn Marino
15232fe07f8SJohn Marino /* Make sure we have enough space in the conversion buffer to store all
15332fe07f8SJohn Marino * the argv strings. */
15432fe07f8SJohn Marino for (i = 0, bufspace = 0; i < argc; ++i)
15532fe07f8SJohn Marino bufspace += argv[i] ? strlen(argv[i]) + 1 : 0;
156a0c9eb18SJohn Marino if (conv->wsize < ++bufspace)
157a0c9eb18SJohn Marino if (ct_conv_wbuff_resize(conv, bufspace + CT_BUFSIZ) == -1)
15832fe07f8SJohn Marino return NULL;
15932fe07f8SJohn Marino
160*60ecde0cSDaniel Fojt wargv = el_calloc((size_t)(argc + 1), sizeof(*wargv));
16132fe07f8SJohn Marino
16232fe07f8SJohn Marino for (i = 0, p = conv->wbuff; i < argc; ++i) {
16332fe07f8SJohn Marino if (!argv[i]) { /* don't pass null pointers to mbstowcs */
16432fe07f8SJohn Marino wargv[i] = NULL;
16532fe07f8SJohn Marino continue;
16632fe07f8SJohn Marino } else {
16732fe07f8SJohn Marino wargv[i] = p;
16832fe07f8SJohn Marino bytes = (ssize_t)mbstowcs(p, argv[i], bufspace);
16932fe07f8SJohn Marino }
17032fe07f8SJohn Marino if (bytes == -1) {
17132fe07f8SJohn Marino el_free(wargv);
17232fe07f8SJohn Marino return NULL;
17332fe07f8SJohn Marino } else
17432fe07f8SJohn Marino bytes++; /* include '\0' in the count */
17532fe07f8SJohn Marino bufspace -= (size_t)bytes;
17632fe07f8SJohn Marino p += bytes;
17732fe07f8SJohn Marino }
17812db70c8Szrj wargv[i] = NULL;
17932fe07f8SJohn Marino
18032fe07f8SJohn Marino return wargv;
18132fe07f8SJohn Marino }
18232fe07f8SJohn Marino
18332fe07f8SJohn Marino
18412db70c8Szrj libedit_private size_t
ct_enc_width(wchar_t c)18512db70c8Szrj ct_enc_width(wchar_t c)
18632fe07f8SJohn Marino {
187ae19eda8Szrj mbstate_t mbs;
188ae19eda8Szrj char buf[MB_LEN_MAX];
189ae19eda8Szrj size_t size;
190ae19eda8Szrj memset(&mbs, 0, sizeof(mbs));
191ae19eda8Szrj
192ae19eda8Szrj if ((size = wcrtomb(buf, c, &mbs)) == (size_t)-1)
193ae19eda8Szrj return 0;
194ae19eda8Szrj return size;
19532fe07f8SJohn Marino }
19632fe07f8SJohn Marino
19712db70c8Szrj libedit_private ssize_t
ct_encode_char(char * dst,size_t len,wchar_t c)19812db70c8Szrj ct_encode_char(char *dst, size_t len, wchar_t c)
19932fe07f8SJohn Marino {
20032fe07f8SJohn Marino ssize_t l = 0;
20132fe07f8SJohn Marino if (len < ct_enc_width(c))
20232fe07f8SJohn Marino return -1;
20312db70c8Szrj l = wctomb(dst, c);
20432fe07f8SJohn Marino
20532fe07f8SJohn Marino if (l < 0) {
20612db70c8Szrj wctomb(NULL, L'\0');
20732fe07f8SJohn Marino l = 0;
20832fe07f8SJohn Marino }
20932fe07f8SJohn Marino return l;
21032fe07f8SJohn Marino }
21132fe07f8SJohn Marino
21212db70c8Szrj libedit_private const wchar_t *
ct_visual_string(const wchar_t * s,ct_buffer_t * conv)21312db70c8Szrj ct_visual_string(const wchar_t *s, ct_buffer_t *conv)
21432fe07f8SJohn Marino {
21512db70c8Szrj wchar_t *dst;
21612db70c8Szrj ssize_t used;
21732fe07f8SJohn Marino
21832fe07f8SJohn Marino if (!s)
21932fe07f8SJohn Marino return NULL;
22012db70c8Szrj
22112db70c8Szrj if (ct_conv_wbuff_resize(conv, CT_BUFSIZ) == -1)
22212db70c8Szrj return NULL;
22312db70c8Szrj
22412db70c8Szrj used = 0;
22512db70c8Szrj dst = conv->wbuff;
22632fe07f8SJohn Marino while (*s) {
22712db70c8Szrj used = ct_visual_char(dst,
22812db70c8Szrj conv->wsize - (size_t)(dst - conv->wbuff), *s);
22912db70c8Szrj if (used != -1) {
23032fe07f8SJohn Marino ++s;
23132fe07f8SJohn Marino dst += used;
23212db70c8Szrj continue;
23332fe07f8SJohn Marino }
23412db70c8Szrj
23512db70c8Szrj /* failed to encode, need more buffer space */
23612db70c8Szrj used = dst - conv->wbuff;
23712db70c8Szrj if (ct_conv_wbuff_resize(conv, conv->wsize + CT_BUFSIZ) == -1)
23832fe07f8SJohn Marino return NULL;
23912db70c8Szrj dst = conv->wbuff + used;
24012db70c8Szrj }
24112db70c8Szrj
24212db70c8Szrj if (dst >= (conv->wbuff + conv->wsize)) { /* sigh */
24312db70c8Szrj used = dst - conv->wbuff;
24412db70c8Szrj if (ct_conv_wbuff_resize(conv, conv->wsize + CT_BUFSIZ) == -1)
24512db70c8Szrj return NULL;
24612db70c8Szrj dst = conv->wbuff + used;
24712db70c8Szrj }
24812db70c8Szrj
24912db70c8Szrj *dst = L'\0';
25012db70c8Szrj return conv->wbuff;
25132fe07f8SJohn Marino }
25232fe07f8SJohn Marino
25332fe07f8SJohn Marino
25432fe07f8SJohn Marino
25512db70c8Szrj libedit_private int
ct_visual_width(wchar_t c)25612db70c8Szrj ct_visual_width(wchar_t c)
25732fe07f8SJohn Marino {
25832fe07f8SJohn Marino int t = ct_chr_class(c);
25932fe07f8SJohn Marino switch (t) {
26032fe07f8SJohn Marino case CHTYPE_ASCIICTL:
26132fe07f8SJohn Marino return 2; /* ^@ ^? etc. */
26232fe07f8SJohn Marino case CHTYPE_TAB:
26332fe07f8SJohn Marino return 1; /* Hmm, this really need to be handled outside! */
26432fe07f8SJohn Marino case CHTYPE_NL:
26532fe07f8SJohn Marino return 0; /* Should this be 1 instead? */
26632fe07f8SJohn Marino case CHTYPE_PRINT:
26732fe07f8SJohn Marino return wcwidth(c);
26832fe07f8SJohn Marino case CHTYPE_NONPRINT:
26932fe07f8SJohn Marino if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */
27032fe07f8SJohn Marino return 8; /* \U+12345 */
27132fe07f8SJohn Marino else
27232fe07f8SJohn Marino return 7; /* \U+1234 */
27332fe07f8SJohn Marino default:
27432fe07f8SJohn Marino return 0; /* should not happen */
27532fe07f8SJohn Marino }
27632fe07f8SJohn Marino }
27732fe07f8SJohn Marino
27832fe07f8SJohn Marino
27912db70c8Szrj libedit_private ssize_t
ct_visual_char(wchar_t * dst,size_t len,wchar_t c)28012db70c8Szrj ct_visual_char(wchar_t *dst, size_t len, wchar_t c)
28132fe07f8SJohn Marino {
28232fe07f8SJohn Marino int t = ct_chr_class(c);
28332fe07f8SJohn Marino switch (t) {
28432fe07f8SJohn Marino case CHTYPE_TAB:
28532fe07f8SJohn Marino case CHTYPE_NL:
28632fe07f8SJohn Marino case CHTYPE_ASCIICTL:
28732fe07f8SJohn Marino if (len < 2)
28832fe07f8SJohn Marino return -1; /* insufficient space */
28932fe07f8SJohn Marino *dst++ = '^';
29032fe07f8SJohn Marino if (c == '\177')
29132fe07f8SJohn Marino *dst = '?'; /* DEL -> ^? */
29232fe07f8SJohn Marino else
29332fe07f8SJohn Marino *dst = c | 0100; /* uncontrolify it */
29432fe07f8SJohn Marino return 2;
29532fe07f8SJohn Marino case CHTYPE_PRINT:
29632fe07f8SJohn Marino if (len < 1)
29732fe07f8SJohn Marino return -1; /* insufficient space */
29832fe07f8SJohn Marino *dst = c;
29932fe07f8SJohn Marino return 1;
30032fe07f8SJohn Marino case CHTYPE_NONPRINT:
30132fe07f8SJohn Marino /* we only use single-width glyphs for display,
30232fe07f8SJohn Marino * so this is right */
30332fe07f8SJohn Marino if ((ssize_t)len < ct_visual_width(c))
30432fe07f8SJohn Marino return -1; /* insufficient space */
30532fe07f8SJohn Marino *dst++ = '\\';
30632fe07f8SJohn Marino *dst++ = 'U';
30732fe07f8SJohn Marino *dst++ = '+';
30832fe07f8SJohn Marino #define tohexdigit(v) "0123456789ABCDEF"[v]
30932fe07f8SJohn Marino if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */
31032fe07f8SJohn Marino *dst++ = tohexdigit(((unsigned int) c >> 16) & 0xf);
31132fe07f8SJohn Marino *dst++ = tohexdigit(((unsigned int) c >> 12) & 0xf);
31232fe07f8SJohn Marino *dst++ = tohexdigit(((unsigned int) c >> 8) & 0xf);
31332fe07f8SJohn Marino *dst++ = tohexdigit(((unsigned int) c >> 4) & 0xf);
31432fe07f8SJohn Marino *dst = tohexdigit(((unsigned int) c ) & 0xf);
31532fe07f8SJohn Marino return c > 0xffff ? 8 : 7;
31632fe07f8SJohn Marino /*FALLTHROUGH*/
31732fe07f8SJohn Marino /* these two should be handled outside this function */
31832fe07f8SJohn Marino default: /* we should never hit the default */
31932fe07f8SJohn Marino return 0;
32032fe07f8SJohn Marino }
32132fe07f8SJohn Marino }
32232fe07f8SJohn Marino
32332fe07f8SJohn Marino
32432fe07f8SJohn Marino
32532fe07f8SJohn Marino
32612db70c8Szrj libedit_private int
ct_chr_class(wchar_t c)32712db70c8Szrj ct_chr_class(wchar_t c)
32832fe07f8SJohn Marino {
32932fe07f8SJohn Marino if (c == '\t')
33032fe07f8SJohn Marino return CHTYPE_TAB;
33132fe07f8SJohn Marino else if (c == '\n')
33232fe07f8SJohn Marino return CHTYPE_NL;
33312db70c8Szrj else if (c < 0x100 && iswcntrl(c))
33432fe07f8SJohn Marino return CHTYPE_ASCIICTL;
33512db70c8Szrj else if (iswprint(c))
33632fe07f8SJohn Marino return CHTYPE_PRINT;
33732fe07f8SJohn Marino else
33832fe07f8SJohn Marino return CHTYPE_NONPRINT;
33932fe07f8SJohn Marino }
340