xref: /dflybsd-src/contrib/libedit/src/chartype.c (revision 60ecde0cdffa503f18fd5ad9437b34567263aecf)
1*60ecde0cSDaniel Fojt /*	$NetBSD: chartype.c,v 1.35 2019/07/23 10:18:52 christos Exp $	*/
232fe07f8SJohn Marino 
332fe07f8SJohn Marino /*-
432fe07f8SJohn Marino  * Copyright (c) 2009 The NetBSD Foundation, Inc.
532fe07f8SJohn Marino  * All rights reserved.
632fe07f8SJohn Marino  *
732fe07f8SJohn Marino  * Redistribution and use in source and binary forms, with or without
832fe07f8SJohn Marino  * modification, are permitted provided that the following conditions
932fe07f8SJohn Marino  * are met:
1032fe07f8SJohn Marino  * 1. Redistributions of source code must retain the above copyright
1132fe07f8SJohn Marino  *    notice, this list of conditions and the following disclaimer.
1232fe07f8SJohn Marino  * 2. Redistributions in binary form must reproduce the above copyright
1332fe07f8SJohn Marino  *    notice, this list of conditions and the following disclaimer in the
1432fe07f8SJohn Marino  *    documentation and/or other materials provided with the distribution.
1532fe07f8SJohn Marino  *
1632fe07f8SJohn Marino  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
1732fe07f8SJohn Marino  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
1832fe07f8SJohn Marino  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
1932fe07f8SJohn Marino  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
2032fe07f8SJohn Marino  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2132fe07f8SJohn Marino  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2232fe07f8SJohn Marino  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2332fe07f8SJohn Marino  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2432fe07f8SJohn Marino  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
2532fe07f8SJohn Marino  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2632fe07f8SJohn Marino  * POSSIBILITY OF SUCH DAMAGE.
2732fe07f8SJohn Marino  */
2832fe07f8SJohn Marino 
2932fe07f8SJohn Marino /*
3032fe07f8SJohn Marino  * chartype.c: character classification and meta information
3132fe07f8SJohn Marino  */
3232fe07f8SJohn Marino #include "config.h"
3332fe07f8SJohn Marino #if !defined(lint) && !defined(SCCSID)
34*60ecde0cSDaniel Fojt __RCSID("$NetBSD: chartype.c,v 1.35 2019/07/23 10:18:52 christos Exp $");
3532fe07f8SJohn Marino #endif /* not lint && not SCCSID */
3612db70c8Szrj 
3712db70c8Szrj #include <ctype.h>
38ae19eda8Szrj #include <limits.h>
3932fe07f8SJohn Marino #include <stdlib.h>
4012db70c8Szrj #include <string.h>
4112db70c8Szrj 
4212db70c8Szrj #include "el.h"
4332fe07f8SJohn Marino 
4432fe07f8SJohn Marino #define CT_BUFSIZ ((size_t)1024)
4532fe07f8SJohn Marino 
4612db70c8Szrj static int ct_conv_cbuff_resize(ct_buffer_t *, size_t);
4712db70c8Szrj static int ct_conv_wbuff_resize(ct_buffer_t *, size_t);
4812db70c8Szrj 
4912db70c8Szrj static int
ct_conv_cbuff_resize(ct_buffer_t * conv,size_t csize)50a0c9eb18SJohn Marino ct_conv_cbuff_resize(ct_buffer_t *conv, size_t csize)
5132fe07f8SJohn Marino {
5232fe07f8SJohn Marino 	void *p;
53a0c9eb18SJohn Marino 
54a0c9eb18SJohn Marino 	if (csize <= conv->csize)
55a0c9eb18SJohn Marino 		return 0;
56a0c9eb18SJohn Marino 
57a0c9eb18SJohn Marino 	conv->csize = csize;
58a0c9eb18SJohn Marino 
5932fe07f8SJohn Marino 	p = el_realloc(conv->cbuff, conv->csize * sizeof(*conv->cbuff));
6032fe07f8SJohn Marino 	if (p == NULL) {
6132fe07f8SJohn Marino 		conv->csize = 0;
6232fe07f8SJohn Marino 		el_free(conv->cbuff);
6332fe07f8SJohn Marino 		conv->cbuff = NULL;
64a0c9eb18SJohn Marino 		return -1;
65a0c9eb18SJohn Marino 	}
6632fe07f8SJohn Marino 	conv->cbuff = p;
67a0c9eb18SJohn Marino 	return 0;
6832fe07f8SJohn Marino }
6932fe07f8SJohn Marino 
7012db70c8Szrj static int
ct_conv_wbuff_resize(ct_buffer_t * conv,size_t wsize)71a0c9eb18SJohn Marino ct_conv_wbuff_resize(ct_buffer_t *conv, size_t wsize)
72a0c9eb18SJohn Marino {
73a0c9eb18SJohn Marino 	void *p;
74a0c9eb18SJohn Marino 
75a0c9eb18SJohn Marino 	if (wsize <= conv->wsize)
76a0c9eb18SJohn Marino 		return 0;
77a0c9eb18SJohn Marino 
78a0c9eb18SJohn Marino 	conv->wsize = wsize;
79a0c9eb18SJohn Marino 
8032fe07f8SJohn Marino 	p = el_realloc(conv->wbuff, conv->wsize * sizeof(*conv->wbuff));
8132fe07f8SJohn Marino 	if (p == NULL) {
8232fe07f8SJohn Marino 		conv->wsize = 0;
8332fe07f8SJohn Marino 		el_free(conv->wbuff);
8432fe07f8SJohn Marino 		conv->wbuff = NULL;
85a0c9eb18SJohn Marino 		return -1;
8632fe07f8SJohn Marino 	}
87a0c9eb18SJohn Marino 	conv->wbuff = p;
88a0c9eb18SJohn Marino 	return 0;
8932fe07f8SJohn Marino }
9032fe07f8SJohn Marino 
9132fe07f8SJohn Marino 
9212db70c8Szrj char *
ct_encode_string(const wchar_t * s,ct_buffer_t * conv)9312db70c8Szrj ct_encode_string(const wchar_t *s, ct_buffer_t *conv)
9432fe07f8SJohn Marino {
9532fe07f8SJohn Marino 	char *dst;
96a0c9eb18SJohn Marino 	ssize_t used;
9732fe07f8SJohn Marino 
9832fe07f8SJohn Marino 	if (!s)
9932fe07f8SJohn Marino 		return NULL;
10032fe07f8SJohn Marino 
10132fe07f8SJohn Marino 	dst = conv->cbuff;
102a0c9eb18SJohn Marino 	for (;;) {
103a0c9eb18SJohn Marino 		used = (ssize_t)(dst - conv->cbuff);
104a0c9eb18SJohn Marino 		if ((conv->csize - (size_t)used) < 5) {
105a0c9eb18SJohn Marino 			if (ct_conv_cbuff_resize(conv,
106a0c9eb18SJohn Marino 			    conv->csize + CT_BUFSIZ) == -1)
10732fe07f8SJohn Marino 				return NULL;
10832fe07f8SJohn Marino 			dst = conv->cbuff + used;
10932fe07f8SJohn Marino 		}
110a0c9eb18SJohn Marino 		if (!*s)
111a0c9eb18SJohn Marino 			break;
11232fe07f8SJohn Marino 		used = ct_encode_char(dst, (size_t)5, *s);
11332fe07f8SJohn Marino 		if (used == -1) /* failed to encode, need more buffer space */
11432fe07f8SJohn Marino 			abort();
11532fe07f8SJohn Marino 		++s;
11632fe07f8SJohn Marino 		dst += used;
11732fe07f8SJohn Marino 	}
11832fe07f8SJohn Marino 	*dst = '\0';
11932fe07f8SJohn Marino 	return conv->cbuff;
12032fe07f8SJohn Marino }
12132fe07f8SJohn Marino 
12212db70c8Szrj wchar_t *
ct_decode_string(const char * s,ct_buffer_t * conv)12332fe07f8SJohn Marino ct_decode_string(const char *s, ct_buffer_t *conv)
12432fe07f8SJohn Marino {
125a0c9eb18SJohn Marino 	size_t len;
12632fe07f8SJohn Marino 
12732fe07f8SJohn Marino 	if (!s)
12832fe07f8SJohn Marino 		return NULL;
12932fe07f8SJohn Marino 
13012db70c8Szrj 	len = mbstowcs(NULL, s, (size_t)0);
13132fe07f8SJohn Marino 	if (len == (size_t)-1)
13232fe07f8SJohn Marino 		return NULL;
133a0c9eb18SJohn Marino 
134a0c9eb18SJohn Marino 	if (conv->wsize < ++len)
135a0c9eb18SJohn Marino 		if (ct_conv_wbuff_resize(conv, len + CT_BUFSIZ) == -1)
13632fe07f8SJohn Marino 			return NULL;
137a0c9eb18SJohn Marino 
13812db70c8Szrj 	mbstowcs(conv->wbuff, s, conv->wsize);
13932fe07f8SJohn Marino 	return conv->wbuff;
14032fe07f8SJohn Marino }
14132fe07f8SJohn Marino 
14232fe07f8SJohn Marino 
14312db70c8Szrj libedit_private wchar_t **
ct_decode_argv(int argc,const char * argv[],ct_buffer_t * conv)14432fe07f8SJohn Marino ct_decode_argv(int argc, const char *argv[], ct_buffer_t *conv)
14532fe07f8SJohn Marino {
14632fe07f8SJohn Marino 	size_t bufspace;
14732fe07f8SJohn Marino 	int i;
14812db70c8Szrj 	wchar_t *p;
14912db70c8Szrj 	wchar_t **wargv;
15032fe07f8SJohn Marino 	ssize_t bytes;
15132fe07f8SJohn Marino 
15232fe07f8SJohn Marino 	/* Make sure we have enough space in the conversion buffer to store all
15332fe07f8SJohn Marino 	 * the argv strings. */
15432fe07f8SJohn Marino 	for (i = 0, bufspace = 0; i < argc; ++i)
15532fe07f8SJohn Marino 		bufspace += argv[i] ? strlen(argv[i]) + 1 : 0;
156a0c9eb18SJohn Marino 	if (conv->wsize < ++bufspace)
157a0c9eb18SJohn Marino 		if (ct_conv_wbuff_resize(conv, bufspace + CT_BUFSIZ) == -1)
15832fe07f8SJohn Marino 			return NULL;
15932fe07f8SJohn Marino 
160*60ecde0cSDaniel Fojt 	wargv = el_calloc((size_t)(argc + 1), sizeof(*wargv));
16132fe07f8SJohn Marino 
16232fe07f8SJohn Marino 	for (i = 0, p = conv->wbuff; i < argc; ++i) {
16332fe07f8SJohn Marino 		if (!argv[i]) {   /* don't pass null pointers to mbstowcs */
16432fe07f8SJohn Marino 			wargv[i] = NULL;
16532fe07f8SJohn Marino 			continue;
16632fe07f8SJohn Marino 		} else {
16732fe07f8SJohn Marino 			wargv[i] = p;
16832fe07f8SJohn Marino 			bytes = (ssize_t)mbstowcs(p, argv[i], bufspace);
16932fe07f8SJohn Marino 		}
17032fe07f8SJohn Marino 		if (bytes == -1) {
17132fe07f8SJohn Marino 			el_free(wargv);
17232fe07f8SJohn Marino 			return NULL;
17332fe07f8SJohn Marino 		} else
17432fe07f8SJohn Marino 			bytes++;  /* include '\0' in the count */
17532fe07f8SJohn Marino 		bufspace -= (size_t)bytes;
17632fe07f8SJohn Marino 		p += bytes;
17732fe07f8SJohn Marino 	}
17812db70c8Szrj 	wargv[i] = NULL;
17932fe07f8SJohn Marino 
18032fe07f8SJohn Marino 	return wargv;
18132fe07f8SJohn Marino }
18232fe07f8SJohn Marino 
18332fe07f8SJohn Marino 
18412db70c8Szrj libedit_private size_t
ct_enc_width(wchar_t c)18512db70c8Szrj ct_enc_width(wchar_t c)
18632fe07f8SJohn Marino {
187ae19eda8Szrj 	mbstate_t mbs;
188ae19eda8Szrj 	char buf[MB_LEN_MAX];
189ae19eda8Szrj 	size_t size;
190ae19eda8Szrj 	memset(&mbs, 0, sizeof(mbs));
191ae19eda8Szrj 
192ae19eda8Szrj 	if ((size = wcrtomb(buf, c, &mbs)) == (size_t)-1)
193ae19eda8Szrj 		return 0;
194ae19eda8Szrj 	return size;
19532fe07f8SJohn Marino }
19632fe07f8SJohn Marino 
19712db70c8Szrj libedit_private ssize_t
ct_encode_char(char * dst,size_t len,wchar_t c)19812db70c8Szrj ct_encode_char(char *dst, size_t len, wchar_t c)
19932fe07f8SJohn Marino {
20032fe07f8SJohn Marino 	ssize_t l = 0;
20132fe07f8SJohn Marino 	if (len < ct_enc_width(c))
20232fe07f8SJohn Marino 		return -1;
20312db70c8Szrj 	l = wctomb(dst, c);
20432fe07f8SJohn Marino 
20532fe07f8SJohn Marino 	if (l < 0) {
20612db70c8Szrj 		wctomb(NULL, L'\0');
20732fe07f8SJohn Marino 		l = 0;
20832fe07f8SJohn Marino 	}
20932fe07f8SJohn Marino 	return l;
21032fe07f8SJohn Marino }
21132fe07f8SJohn Marino 
21212db70c8Szrj libedit_private const wchar_t *
ct_visual_string(const wchar_t * s,ct_buffer_t * conv)21312db70c8Szrj ct_visual_string(const wchar_t *s, ct_buffer_t *conv)
21432fe07f8SJohn Marino {
21512db70c8Szrj 	wchar_t *dst;
21612db70c8Szrj 	ssize_t used;
21732fe07f8SJohn Marino 
21832fe07f8SJohn Marino 	if (!s)
21932fe07f8SJohn Marino 		return NULL;
22012db70c8Szrj 
22112db70c8Szrj 	if (ct_conv_wbuff_resize(conv, CT_BUFSIZ) == -1)
22212db70c8Szrj 		return NULL;
22312db70c8Szrj 
22412db70c8Szrj 	used = 0;
22512db70c8Szrj 	dst = conv->wbuff;
22632fe07f8SJohn Marino 	while (*s) {
22712db70c8Szrj 		used = ct_visual_char(dst,
22812db70c8Szrj 		    conv->wsize - (size_t)(dst - conv->wbuff), *s);
22912db70c8Szrj 		if (used != -1) {
23032fe07f8SJohn Marino 			++s;
23132fe07f8SJohn Marino 			dst += used;
23212db70c8Szrj 			continue;
23332fe07f8SJohn Marino 		}
23412db70c8Szrj 
23512db70c8Szrj 		/* failed to encode, need more buffer space */
23612db70c8Szrj 		used = dst - conv->wbuff;
23712db70c8Szrj 		if (ct_conv_wbuff_resize(conv, conv->wsize + CT_BUFSIZ) == -1)
23832fe07f8SJohn Marino 			return NULL;
23912db70c8Szrj 		dst = conv->wbuff + used;
24012db70c8Szrj 	}
24112db70c8Szrj 
24212db70c8Szrj 	if (dst >= (conv->wbuff + conv->wsize)) { /* sigh */
24312db70c8Szrj 		used = dst - conv->wbuff;
24412db70c8Szrj 		if (ct_conv_wbuff_resize(conv, conv->wsize + CT_BUFSIZ) == -1)
24512db70c8Szrj 			return NULL;
24612db70c8Szrj 		dst = conv->wbuff + used;
24712db70c8Szrj 	}
24812db70c8Szrj 
24912db70c8Szrj 	*dst = L'\0';
25012db70c8Szrj 	return conv->wbuff;
25132fe07f8SJohn Marino }
25232fe07f8SJohn Marino 
25332fe07f8SJohn Marino 
25432fe07f8SJohn Marino 
25512db70c8Szrj libedit_private int
ct_visual_width(wchar_t c)25612db70c8Szrj ct_visual_width(wchar_t c)
25732fe07f8SJohn Marino {
25832fe07f8SJohn Marino 	int t = ct_chr_class(c);
25932fe07f8SJohn Marino 	switch (t) {
26032fe07f8SJohn Marino 	case CHTYPE_ASCIICTL:
26132fe07f8SJohn Marino 		return 2; /* ^@ ^? etc. */
26232fe07f8SJohn Marino 	case CHTYPE_TAB:
26332fe07f8SJohn Marino 		return 1; /* Hmm, this really need to be handled outside! */
26432fe07f8SJohn Marino 	case CHTYPE_NL:
26532fe07f8SJohn Marino 		return 0; /* Should this be 1 instead? */
26632fe07f8SJohn Marino 	case CHTYPE_PRINT:
26732fe07f8SJohn Marino 		return wcwidth(c);
26832fe07f8SJohn Marino 	case CHTYPE_NONPRINT:
26932fe07f8SJohn Marino 		if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */
27032fe07f8SJohn Marino 			return 8; /* \U+12345 */
27132fe07f8SJohn Marino 		else
27232fe07f8SJohn Marino 			return 7; /* \U+1234 */
27332fe07f8SJohn Marino 	default:
27432fe07f8SJohn Marino 		return 0; /* should not happen */
27532fe07f8SJohn Marino 	}
27632fe07f8SJohn Marino }
27732fe07f8SJohn Marino 
27832fe07f8SJohn Marino 
27912db70c8Szrj libedit_private ssize_t
ct_visual_char(wchar_t * dst,size_t len,wchar_t c)28012db70c8Szrj ct_visual_char(wchar_t *dst, size_t len, wchar_t c)
28132fe07f8SJohn Marino {
28232fe07f8SJohn Marino 	int t = ct_chr_class(c);
28332fe07f8SJohn Marino 	switch (t) {
28432fe07f8SJohn Marino 	case CHTYPE_TAB:
28532fe07f8SJohn Marino 	case CHTYPE_NL:
28632fe07f8SJohn Marino 	case CHTYPE_ASCIICTL:
28732fe07f8SJohn Marino 		if (len < 2)
28832fe07f8SJohn Marino 			return -1;   /* insufficient space */
28932fe07f8SJohn Marino 		*dst++ = '^';
29032fe07f8SJohn Marino 		if (c == '\177')
29132fe07f8SJohn Marino 			*dst = '?'; /* DEL -> ^? */
29232fe07f8SJohn Marino 		else
29332fe07f8SJohn Marino 			*dst = c | 0100;    /* uncontrolify it */
29432fe07f8SJohn Marino 		return 2;
29532fe07f8SJohn Marino 	case CHTYPE_PRINT:
29632fe07f8SJohn Marino 		if (len < 1)
29732fe07f8SJohn Marino 			return -1;  /* insufficient space */
29832fe07f8SJohn Marino 		*dst = c;
29932fe07f8SJohn Marino 		return 1;
30032fe07f8SJohn Marino 	case CHTYPE_NONPRINT:
30132fe07f8SJohn Marino 		/* we only use single-width glyphs for display,
30232fe07f8SJohn Marino 		 * so this is right */
30332fe07f8SJohn Marino 		if ((ssize_t)len < ct_visual_width(c))
30432fe07f8SJohn Marino 			return -1;   /* insufficient space */
30532fe07f8SJohn Marino 		*dst++ = '\\';
30632fe07f8SJohn Marino 		*dst++ = 'U';
30732fe07f8SJohn Marino 		*dst++ = '+';
30832fe07f8SJohn Marino #define tohexdigit(v) "0123456789ABCDEF"[v]
30932fe07f8SJohn Marino 		if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */
31032fe07f8SJohn Marino 			*dst++ = tohexdigit(((unsigned int) c >> 16) & 0xf);
31132fe07f8SJohn Marino 		*dst++ = tohexdigit(((unsigned int) c >> 12) & 0xf);
31232fe07f8SJohn Marino 		*dst++ = tohexdigit(((unsigned int) c >>  8) & 0xf);
31332fe07f8SJohn Marino 		*dst++ = tohexdigit(((unsigned int) c >>  4) & 0xf);
31432fe07f8SJohn Marino 		*dst   = tohexdigit(((unsigned int) c      ) & 0xf);
31532fe07f8SJohn Marino 		return c > 0xffff ? 8 : 7;
31632fe07f8SJohn Marino 		/*FALLTHROUGH*/
31732fe07f8SJohn Marino 	/* these two should be handled outside this function */
31832fe07f8SJohn Marino 	default:            /* we should never hit the default */
31932fe07f8SJohn Marino 		return 0;
32032fe07f8SJohn Marino 	}
32132fe07f8SJohn Marino }
32232fe07f8SJohn Marino 
32332fe07f8SJohn Marino 
32432fe07f8SJohn Marino 
32532fe07f8SJohn Marino 
32612db70c8Szrj libedit_private int
ct_chr_class(wchar_t c)32712db70c8Szrj ct_chr_class(wchar_t c)
32832fe07f8SJohn Marino {
32932fe07f8SJohn Marino 	if (c == '\t')
33032fe07f8SJohn Marino 		return CHTYPE_TAB;
33132fe07f8SJohn Marino 	else if (c == '\n')
33232fe07f8SJohn Marino 		return CHTYPE_NL;
33312db70c8Szrj 	else if (c < 0x100 && iswcntrl(c))
33432fe07f8SJohn Marino 		return CHTYPE_ASCIICTL;
33512db70c8Szrj 	else if (iswprint(c))
33632fe07f8SJohn Marino 		return CHTYPE_PRINT;
33732fe07f8SJohn Marino 	else
33832fe07f8SJohn Marino 		return CHTYPE_NONPRINT;
33932fe07f8SJohn Marino }
340