xref: /openbsd-src/lib/libedit/chartype.c (revision 45e13dcbc13f9325c6eae482b9d8d72f5b38f2ac)
1*45e13dcbSyasuoka /*	$OpenBSD: chartype.c,v 1.16 2019/01/29 09:47:00 yasuoka Exp $	*/
2c046fa78Syasuoka /*	$NetBSD: chartype.c,v 1.6 2011/07/28 00:48:21 christos Exp $	*/
3aed0ee81Snicm 
4aed0ee81Snicm /*-
5aed0ee81Snicm  * Copyright (c) 2009 The NetBSD Foundation, Inc.
6aed0ee81Snicm  * All rights reserved.
7aed0ee81Snicm  *
8aed0ee81Snicm  * Redistribution and use in source and binary forms, with or without
9aed0ee81Snicm  * modification, are permitted provided that the following conditions
10aed0ee81Snicm  * are met:
11aed0ee81Snicm  * 1. Redistributions of source code must retain the above copyright
12aed0ee81Snicm  *    notice, this list of conditions and the following disclaimer.
13aed0ee81Snicm  * 2. Redistributions in binary form must reproduce the above copyright
14aed0ee81Snicm  *    notice, this list of conditions and the following disclaimer in the
15aed0ee81Snicm  *    documentation and/or other materials provided with the distribution.
16aed0ee81Snicm  *
17aed0ee81Snicm  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18aed0ee81Snicm  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19aed0ee81Snicm  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20aed0ee81Snicm  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21aed0ee81Snicm  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22aed0ee81Snicm  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23aed0ee81Snicm  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24aed0ee81Snicm  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25aed0ee81Snicm  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26aed0ee81Snicm  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27aed0ee81Snicm  * POSSIBILITY OF SUCH DAMAGE.
28aed0ee81Snicm  */
29aed0ee81Snicm 
30aed0ee81Snicm /*
31aed0ee81Snicm  * chartype.c: character classification and meta information
32aed0ee81Snicm  */
33aed0ee81Snicm #include "config.h"
347ccfa089Sschwarze 
357ccfa089Sschwarze #include <ctype.h>
365f805b19Sokan #include <stdlib.h>
377ccfa089Sschwarze #include <string.h>
387ccfa089Sschwarze 
397ccfa089Sschwarze #include "el.h"
40aed0ee81Snicm 
41aed0ee81Snicm #define CT_BUFSIZ 1024
42aed0ee81Snicm 
43ddc81437Sschwarze static void ct_conv_buff_resize(ct_buffer_t *, size_t, size_t);
448dc8c690Sschwarze 
45ddc81437Sschwarze static void
ct_conv_buff_resize(ct_buffer_t * conv,size_t mincsize,size_t minwsize)46aed0ee81Snicm ct_conv_buff_resize(ct_buffer_t *conv, size_t mincsize, size_t minwsize)
47aed0ee81Snicm {
48aed0ee81Snicm 	void *p;
49aed0ee81Snicm 	if (mincsize > conv->csize) {
50aed0ee81Snicm 		conv->csize = mincsize;
51014b1be8Sderaadt 		p = reallocarray(conv->cbuff, conv->csize, sizeof(char));
52aed0ee81Snicm 		if (p == NULL) {
53aed0ee81Snicm 			conv->csize = 0;
54014b1be8Sderaadt 			free(conv->cbuff);
55aed0ee81Snicm 			conv->cbuff = NULL;
56aed0ee81Snicm 		} else
57aed0ee81Snicm 			conv->cbuff = p;
58aed0ee81Snicm 	}
59aed0ee81Snicm 
60aed0ee81Snicm 	if (minwsize > conv->wsize) {
61aed0ee81Snicm 		conv->wsize = minwsize;
62e3191321Sschwarze 		p = reallocarray(conv->wbuff, conv->wsize, sizeof(wchar_t));
63aed0ee81Snicm 		if (p == NULL) {
64aed0ee81Snicm 			conv->wsize = 0;
65014b1be8Sderaadt 			free(conv->wbuff);
66aed0ee81Snicm 			conv->wbuff = NULL;
67aed0ee81Snicm 		} else
68aed0ee81Snicm 			conv->wbuff = p;
69aed0ee81Snicm 	}
70aed0ee81Snicm }
71aed0ee81Snicm 
72aed0ee81Snicm 
73ddc81437Sschwarze char *
ct_encode_string(const wchar_t * s,ct_buffer_t * conv)74e3191321Sschwarze ct_encode_string(const wchar_t *s, ct_buffer_t *conv)
75aed0ee81Snicm {
76aed0ee81Snicm 	char *dst;
77aed0ee81Snicm 	ssize_t used = 0;
78aed0ee81Snicm 
79aed0ee81Snicm 	if (!s)
80aed0ee81Snicm 		return NULL;
81aed0ee81Snicm 	if (!conv->cbuff)
82aed0ee81Snicm 		ct_conv_buff_resize(conv, CT_BUFSIZ, 0);
83aed0ee81Snicm 	if (!conv->cbuff)
84aed0ee81Snicm 		return NULL;
85aed0ee81Snicm 
86aed0ee81Snicm 	dst = conv->cbuff;
87aed0ee81Snicm 	while (*s) {
88c046fa78Syasuoka 		used = conv->csize - (dst - conv->cbuff);
89c046fa78Syasuoka 		if (used < 5) {
90aed0ee81Snicm 			used = dst - conv->cbuff;
91aed0ee81Snicm 			ct_conv_buff_resize(conv, conv->csize + CT_BUFSIZ, 0);
92aed0ee81Snicm 			if (!conv->cbuff)
93aed0ee81Snicm 				return NULL;
94aed0ee81Snicm 			dst = conv->cbuff + used;
95aed0ee81Snicm 		}
96c046fa78Syasuoka 		used = ct_encode_char(dst, 5, *s);
97c046fa78Syasuoka 		if (used == -1) /* failed to encode, need more buffer space */
98c046fa78Syasuoka 			abort();
99aed0ee81Snicm 		++s;
100aed0ee81Snicm 		dst += used;
101aed0ee81Snicm 	}
102aed0ee81Snicm 	*dst = '\0';
103aed0ee81Snicm 	return conv->cbuff;
104aed0ee81Snicm }
105aed0ee81Snicm 
106ddc81437Sschwarze wchar_t *
ct_decode_string(const char * s,ct_buffer_t * conv)107aed0ee81Snicm ct_decode_string(const char *s, ct_buffer_t *conv)
108aed0ee81Snicm {
109aed0ee81Snicm 	size_t len = 0;
110aed0ee81Snicm 
111aed0ee81Snicm 	if (!s)
112aed0ee81Snicm 		return NULL;
113aed0ee81Snicm 	if (!conv->wbuff)
114aed0ee81Snicm 		ct_conv_buff_resize(conv, 0, CT_BUFSIZ);
115aed0ee81Snicm 	if (!conv->wbuff)
116aed0ee81Snicm 		return NULL;
117aed0ee81Snicm 
118565aa7e8Sschwarze 	len = mbstowcs(NULL, s, 0);
119c046fa78Syasuoka 	if (len == (size_t)-1)
120c046fa78Syasuoka 		return NULL;
121aed0ee81Snicm 	if (len > conv->wsize)
122aed0ee81Snicm 		ct_conv_buff_resize(conv, 0, len + 1);
123aed0ee81Snicm 	if (!conv->wbuff)
124aed0ee81Snicm 		return NULL;
125565aa7e8Sschwarze 
126565aa7e8Sschwarze 	mbstowcs(conv->wbuff, s, conv->wsize);
127aed0ee81Snicm 	return conv->wbuff;
128aed0ee81Snicm }
129aed0ee81Snicm 
130aed0ee81Snicm 
131e3191321Sschwarze protected wchar_t **
ct_decode_argv(int argc,const char * argv[],ct_buffer_t * conv)132aed0ee81Snicm ct_decode_argv(int argc, const char *argv[], ct_buffer_t *conv)
133aed0ee81Snicm {
134aed0ee81Snicm 	size_t bufspace;
135aed0ee81Snicm 	int i;
136e3191321Sschwarze 	wchar_t *p;
137e3191321Sschwarze 	wchar_t **wargv;
13856494721Snicm 	size_t wlen;
139aed0ee81Snicm 
140aed0ee81Snicm 	/* Make sure we have enough space in the conversion buffer to store all
141aed0ee81Snicm 	 * the argv strings. */
142aed0ee81Snicm 	for (i = 0, bufspace = 0; i < argc; ++i)
143aed0ee81Snicm 		bufspace += argv[i] ? strlen(argv[i]) + 1 : 0;
14456494721Snicm 	ct_conv_buff_resize(conv, 0, bufspace * sizeof(*p));
145aed0ee81Snicm 	if (!conv->wsize)
146aed0ee81Snicm 		return NULL;
147aed0ee81Snicm 
148*45e13dcbSyasuoka 	wargv = reallocarray(NULL, argc + 1, sizeof(*wargv));
149aed0ee81Snicm 
150aed0ee81Snicm 	for (i = 0, p = conv->wbuff; i < argc; ++i) {
151aed0ee81Snicm 		if (!argv[i]) {   /* don't pass null pointers to mbstowcs */
152aed0ee81Snicm 			wargv[i] = NULL;
153aed0ee81Snicm 			continue;
154aed0ee81Snicm 		} else {
155aed0ee81Snicm 			wargv[i] = p;
15656494721Snicm 			wlen = mbstowcs(p, argv[i], bufspace);
157aed0ee81Snicm 		}
15856494721Snicm 		if (wlen == (size_t)-1 || wlen == bufspace) {
15956494721Snicm 			/* Encoding error or not enough room for NUL. */
160014b1be8Sderaadt 			free(wargv);
161aed0ee81Snicm 			return NULL;
162aed0ee81Snicm 		} else
16356494721Snicm 			wlen++; /* include NUL in the count */
16456494721Snicm 		bufspace -= wlen;
16556494721Snicm 		p += wlen;
166aed0ee81Snicm 	}
167*45e13dcbSyasuoka 	wargv[i] = NULL;
168aed0ee81Snicm 
169aed0ee81Snicm 	return wargv;
170aed0ee81Snicm }
171aed0ee81Snicm 
172aed0ee81Snicm 
173aed0ee81Snicm protected size_t
ct_enc_width(wchar_t c)174e3191321Sschwarze ct_enc_width(wchar_t c)
175aed0ee81Snicm {
176aed0ee81Snicm 	/* UTF-8 encoding specific values */
177aed0ee81Snicm 	if (c < 0x80)
178aed0ee81Snicm 		return 1;
179aed0ee81Snicm 	else if (c < 0x0800)
180aed0ee81Snicm 		return 2;
181aed0ee81Snicm 	else if (c < 0x10000)
182aed0ee81Snicm 		return 3;
183aed0ee81Snicm 	else if (c < 0x110000)
184aed0ee81Snicm 		return 4;
185aed0ee81Snicm 	else
186aed0ee81Snicm 		return 0; /* not a valid codepoint */
187aed0ee81Snicm }
188aed0ee81Snicm 
189aed0ee81Snicm protected ssize_t
ct_encode_char(char * dst,size_t len,wchar_t c)190e3191321Sschwarze ct_encode_char(char *dst, size_t len, wchar_t c)
191aed0ee81Snicm {
192aed0ee81Snicm 	ssize_t l = 0;
193aed0ee81Snicm 	if (len < ct_enc_width(c))
194aed0ee81Snicm 		return -1;
195565aa7e8Sschwarze 	l = wctomb(dst, c);
196aed0ee81Snicm 
197aed0ee81Snicm 	if (l < 0) {
198565aa7e8Sschwarze 		wctomb(NULL, L'\0');
199aed0ee81Snicm 		l = 0;
200aed0ee81Snicm 	}
201aed0ee81Snicm 	return l;
202aed0ee81Snicm }
2033a40234dSschwarze 
204e3191321Sschwarze protected const wchar_t *
ct_visual_string(const wchar_t * s)205e3191321Sschwarze ct_visual_string(const wchar_t *s)
206aed0ee81Snicm {
207e3191321Sschwarze 	static wchar_t *buff = NULL;
208aed0ee81Snicm 	static size_t buffsize = 0;
209aed0ee81Snicm 	void *p;
210e3191321Sschwarze 	wchar_t *dst;
211aed0ee81Snicm 	ssize_t used = 0;
212aed0ee81Snicm 
213aed0ee81Snicm 	if (!s)
214aed0ee81Snicm 		return NULL;
215aed0ee81Snicm 	if (!buff) {
216aed0ee81Snicm 	    buffsize = CT_BUFSIZ;
217014b1be8Sderaadt 	    buff = reallocarray(NULL, buffsize, sizeof(*buff));
218aed0ee81Snicm 	}
219aed0ee81Snicm 	dst = buff;
220aed0ee81Snicm 	while (*s) {
221aed0ee81Snicm 		used = ct_visual_char(dst, buffsize - (dst - buff), *s);
222aed0ee81Snicm 		if (used == -1) { /* failed to encode, need more buffer space */
223aed0ee81Snicm 			used = dst - buff;
224aed0ee81Snicm 			buffsize += CT_BUFSIZ;
225014b1be8Sderaadt 			p = reallocarray(buff, buffsize, sizeof(*buff));
226aed0ee81Snicm 			if (p == NULL)
227aed0ee81Snicm 				goto out;
228aed0ee81Snicm 			buff = p;
229aed0ee81Snicm 			dst = buff + used;
230aed0ee81Snicm 			/* don't increment s here - we want to retry it! */
231aed0ee81Snicm 		}
232aed0ee81Snicm 		else
233aed0ee81Snicm 		    ++s;
234aed0ee81Snicm 		dst += used;
235aed0ee81Snicm 	}
236aed0ee81Snicm 	if (dst >= (buff + buffsize)) { /* sigh */
237aed0ee81Snicm 		buffsize += 1;
238014b1be8Sderaadt 		p = reallocarray(buff, buffsize, sizeof(*buff));
239aed0ee81Snicm 		if (p == NULL)
240aed0ee81Snicm 			goto out;
241aed0ee81Snicm 		buff = p;
242aed0ee81Snicm 		dst = buff + buffsize - 1;
243aed0ee81Snicm 	}
244aed0ee81Snicm 	*dst = 0;
245aed0ee81Snicm 	return buff;
246aed0ee81Snicm out:
247014b1be8Sderaadt 	free(buff);
248aed0ee81Snicm 	buffsize = 0;
249aed0ee81Snicm 	return NULL;
250aed0ee81Snicm }
251aed0ee81Snicm 
252aed0ee81Snicm 
253aed0ee81Snicm 
254aed0ee81Snicm protected int
ct_visual_width(wchar_t c)255e3191321Sschwarze ct_visual_width(wchar_t c)
256aed0ee81Snicm {
257aed0ee81Snicm 	int t = ct_chr_class(c);
258cf7973e1Sstsp 	int w;
259aed0ee81Snicm 	switch (t) {
260aed0ee81Snicm 	case CHTYPE_ASCIICTL:
261aed0ee81Snicm 		return 2; /* ^@ ^? etc. */
262aed0ee81Snicm 	case CHTYPE_TAB:
263aed0ee81Snicm 		return 1; /* Hmm, this really need to be handled outside! */
264aed0ee81Snicm 	case CHTYPE_NL:
265aed0ee81Snicm 		return 0; /* Should this be 1 instead? */
266aed0ee81Snicm 	case CHTYPE_PRINT:
267cf7973e1Sstsp 		w = wcwidth(c);
268cf7973e1Sstsp 		return (w == -1 ? 0 : w);
269aed0ee81Snicm 	case CHTYPE_NONPRINT:
270aed0ee81Snicm 		if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */
271aed0ee81Snicm 			return 8; /* \U+12345 */
272aed0ee81Snicm 		else
273aed0ee81Snicm 			return 7; /* \U+1234 */
274aed0ee81Snicm 	default:
275aed0ee81Snicm 		return 0; /* should not happen */
276aed0ee81Snicm 	}
277aed0ee81Snicm }
278aed0ee81Snicm 
279aed0ee81Snicm 
280aed0ee81Snicm protected ssize_t
ct_visual_char(wchar_t * dst,size_t len,wchar_t c)281e3191321Sschwarze ct_visual_char(wchar_t *dst, size_t len, wchar_t c)
282aed0ee81Snicm {
283aed0ee81Snicm 	int t = ct_chr_class(c);
284aed0ee81Snicm 	switch (t) {
285aed0ee81Snicm 	case CHTYPE_TAB:
286aed0ee81Snicm 	case CHTYPE_NL:
287aed0ee81Snicm 	case CHTYPE_ASCIICTL:
288aed0ee81Snicm 		if (len < 2)
289aed0ee81Snicm 			return -1;   /* insufficient space */
290aed0ee81Snicm 		*dst++ = '^';
291aed0ee81Snicm 		if (c == '\177')
292aed0ee81Snicm 			*dst = '?'; /* DEL -> ^? */
293aed0ee81Snicm 		else
294aed0ee81Snicm 			*dst = c | 0100;    /* uncontrolify it */
295aed0ee81Snicm 		return 2;
296aed0ee81Snicm 	case CHTYPE_PRINT:
297aed0ee81Snicm 		if (len < 1)
298aed0ee81Snicm 			return -1;  /* insufficient space */
299aed0ee81Snicm 		*dst = c;
300aed0ee81Snicm 		return 1;
301aed0ee81Snicm 	case CHTYPE_NONPRINT:
302aed0ee81Snicm 		/* we only use single-width glyphs for display,
303aed0ee81Snicm 		 * so this is right */
304aed0ee81Snicm 		if ((ssize_t)len < ct_visual_width(c))
305aed0ee81Snicm 			return -1;   /* insufficient space */
306aed0ee81Snicm 		*dst++ = '\\';
307aed0ee81Snicm 		*dst++ = 'U';
308aed0ee81Snicm 		*dst++ = '+';
309aed0ee81Snicm #define tohexdigit(v) "0123456789ABCDEF"[v]
310aed0ee81Snicm 		if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */
311aed0ee81Snicm 			*dst++ = tohexdigit(((unsigned int) c >> 16) & 0xf);
312aed0ee81Snicm 		*dst++ = tohexdigit(((unsigned int) c >> 12) & 0xf);
313aed0ee81Snicm 		*dst++ = tohexdigit(((unsigned int) c >>  8) & 0xf);
314aed0ee81Snicm 		*dst++ = tohexdigit(((unsigned int) c >>  4) & 0xf);
315aed0ee81Snicm 		*dst   = tohexdigit(((unsigned int) c      ) & 0xf);
316aed0ee81Snicm 		return (c > 0xffff) ? 8 : 7;
317aed0ee81Snicm 		/*FALLTHROUGH*/
318aed0ee81Snicm 	/* these two should be handled outside this function */
319aed0ee81Snicm 	default:            /* we should never hit the default */
320aed0ee81Snicm 		return 0;
321aed0ee81Snicm 	}
322aed0ee81Snicm }
323aed0ee81Snicm 
324aed0ee81Snicm 
325aed0ee81Snicm 
326aed0ee81Snicm 
327aed0ee81Snicm protected int
ct_chr_class(wchar_t c)328e3191321Sschwarze ct_chr_class(wchar_t c)
329aed0ee81Snicm {
330aed0ee81Snicm 	if (c == '\t')
331aed0ee81Snicm 		return CHTYPE_TAB;
332aed0ee81Snicm 	else if (c == '\n')
333aed0ee81Snicm 		return CHTYPE_NL;
334565aa7e8Sschwarze 	else if (c < 0x100 && iswcntrl(c))
335aed0ee81Snicm 		return CHTYPE_ASCIICTL;
336565aa7e8Sschwarze 	else if (iswprint(c))
337aed0ee81Snicm 		return CHTYPE_PRINT;
338aed0ee81Snicm 	else
339aed0ee81Snicm 		return CHTYPE_NONPRINT;
340aed0ee81Snicm }
341