1*45e13dcbSyasuoka /* $OpenBSD: chartype.c,v 1.16 2019/01/29 09:47:00 yasuoka Exp $ */
2c046fa78Syasuoka /* $NetBSD: chartype.c,v 1.6 2011/07/28 00:48:21 christos Exp $ */
3aed0ee81Snicm
4aed0ee81Snicm /*-
5aed0ee81Snicm * Copyright (c) 2009 The NetBSD Foundation, Inc.
6aed0ee81Snicm * All rights reserved.
7aed0ee81Snicm *
8aed0ee81Snicm * Redistribution and use in source and binary forms, with or without
9aed0ee81Snicm * modification, are permitted provided that the following conditions
10aed0ee81Snicm * are met:
11aed0ee81Snicm * 1. Redistributions of source code must retain the above copyright
12aed0ee81Snicm * notice, this list of conditions and the following disclaimer.
13aed0ee81Snicm * 2. Redistributions in binary form must reproduce the above copyright
14aed0ee81Snicm * notice, this list of conditions and the following disclaimer in the
15aed0ee81Snicm * documentation and/or other materials provided with the distribution.
16aed0ee81Snicm *
17aed0ee81Snicm * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18aed0ee81Snicm * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19aed0ee81Snicm * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20aed0ee81Snicm * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21aed0ee81Snicm * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22aed0ee81Snicm * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23aed0ee81Snicm * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24aed0ee81Snicm * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25aed0ee81Snicm * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26aed0ee81Snicm * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27aed0ee81Snicm * POSSIBILITY OF SUCH DAMAGE.
28aed0ee81Snicm */
29aed0ee81Snicm
30aed0ee81Snicm /*
31aed0ee81Snicm * chartype.c: character classification and meta information
32aed0ee81Snicm */
33aed0ee81Snicm #include "config.h"
347ccfa089Sschwarze
357ccfa089Sschwarze #include <ctype.h>
365f805b19Sokan #include <stdlib.h>
377ccfa089Sschwarze #include <string.h>
387ccfa089Sschwarze
397ccfa089Sschwarze #include "el.h"
40aed0ee81Snicm
41aed0ee81Snicm #define CT_BUFSIZ 1024
42aed0ee81Snicm
43ddc81437Sschwarze static void ct_conv_buff_resize(ct_buffer_t *, size_t, size_t);
448dc8c690Sschwarze
45ddc81437Sschwarze static void
ct_conv_buff_resize(ct_buffer_t * conv,size_t mincsize,size_t minwsize)46aed0ee81Snicm ct_conv_buff_resize(ct_buffer_t *conv, size_t mincsize, size_t minwsize)
47aed0ee81Snicm {
48aed0ee81Snicm void *p;
49aed0ee81Snicm if (mincsize > conv->csize) {
50aed0ee81Snicm conv->csize = mincsize;
51014b1be8Sderaadt p = reallocarray(conv->cbuff, conv->csize, sizeof(char));
52aed0ee81Snicm if (p == NULL) {
53aed0ee81Snicm conv->csize = 0;
54014b1be8Sderaadt free(conv->cbuff);
55aed0ee81Snicm conv->cbuff = NULL;
56aed0ee81Snicm } else
57aed0ee81Snicm conv->cbuff = p;
58aed0ee81Snicm }
59aed0ee81Snicm
60aed0ee81Snicm if (minwsize > conv->wsize) {
61aed0ee81Snicm conv->wsize = minwsize;
62e3191321Sschwarze p = reallocarray(conv->wbuff, conv->wsize, sizeof(wchar_t));
63aed0ee81Snicm if (p == NULL) {
64aed0ee81Snicm conv->wsize = 0;
65014b1be8Sderaadt free(conv->wbuff);
66aed0ee81Snicm conv->wbuff = NULL;
67aed0ee81Snicm } else
68aed0ee81Snicm conv->wbuff = p;
69aed0ee81Snicm }
70aed0ee81Snicm }
71aed0ee81Snicm
72aed0ee81Snicm
73ddc81437Sschwarze char *
ct_encode_string(const wchar_t * s,ct_buffer_t * conv)74e3191321Sschwarze ct_encode_string(const wchar_t *s, ct_buffer_t *conv)
75aed0ee81Snicm {
76aed0ee81Snicm char *dst;
77aed0ee81Snicm ssize_t used = 0;
78aed0ee81Snicm
79aed0ee81Snicm if (!s)
80aed0ee81Snicm return NULL;
81aed0ee81Snicm if (!conv->cbuff)
82aed0ee81Snicm ct_conv_buff_resize(conv, CT_BUFSIZ, 0);
83aed0ee81Snicm if (!conv->cbuff)
84aed0ee81Snicm return NULL;
85aed0ee81Snicm
86aed0ee81Snicm dst = conv->cbuff;
87aed0ee81Snicm while (*s) {
88c046fa78Syasuoka used = conv->csize - (dst - conv->cbuff);
89c046fa78Syasuoka if (used < 5) {
90aed0ee81Snicm used = dst - conv->cbuff;
91aed0ee81Snicm ct_conv_buff_resize(conv, conv->csize + CT_BUFSIZ, 0);
92aed0ee81Snicm if (!conv->cbuff)
93aed0ee81Snicm return NULL;
94aed0ee81Snicm dst = conv->cbuff + used;
95aed0ee81Snicm }
96c046fa78Syasuoka used = ct_encode_char(dst, 5, *s);
97c046fa78Syasuoka if (used == -1) /* failed to encode, need more buffer space */
98c046fa78Syasuoka abort();
99aed0ee81Snicm ++s;
100aed0ee81Snicm dst += used;
101aed0ee81Snicm }
102aed0ee81Snicm *dst = '\0';
103aed0ee81Snicm return conv->cbuff;
104aed0ee81Snicm }
105aed0ee81Snicm
106ddc81437Sschwarze wchar_t *
ct_decode_string(const char * s,ct_buffer_t * conv)107aed0ee81Snicm ct_decode_string(const char *s, ct_buffer_t *conv)
108aed0ee81Snicm {
109aed0ee81Snicm size_t len = 0;
110aed0ee81Snicm
111aed0ee81Snicm if (!s)
112aed0ee81Snicm return NULL;
113aed0ee81Snicm if (!conv->wbuff)
114aed0ee81Snicm ct_conv_buff_resize(conv, 0, CT_BUFSIZ);
115aed0ee81Snicm if (!conv->wbuff)
116aed0ee81Snicm return NULL;
117aed0ee81Snicm
118565aa7e8Sschwarze len = mbstowcs(NULL, s, 0);
119c046fa78Syasuoka if (len == (size_t)-1)
120c046fa78Syasuoka return NULL;
121aed0ee81Snicm if (len > conv->wsize)
122aed0ee81Snicm ct_conv_buff_resize(conv, 0, len + 1);
123aed0ee81Snicm if (!conv->wbuff)
124aed0ee81Snicm return NULL;
125565aa7e8Sschwarze
126565aa7e8Sschwarze mbstowcs(conv->wbuff, s, conv->wsize);
127aed0ee81Snicm return conv->wbuff;
128aed0ee81Snicm }
129aed0ee81Snicm
130aed0ee81Snicm
131e3191321Sschwarze protected wchar_t **
ct_decode_argv(int argc,const char * argv[],ct_buffer_t * conv)132aed0ee81Snicm ct_decode_argv(int argc, const char *argv[], ct_buffer_t *conv)
133aed0ee81Snicm {
134aed0ee81Snicm size_t bufspace;
135aed0ee81Snicm int i;
136e3191321Sschwarze wchar_t *p;
137e3191321Sschwarze wchar_t **wargv;
13856494721Snicm size_t wlen;
139aed0ee81Snicm
140aed0ee81Snicm /* Make sure we have enough space in the conversion buffer to store all
141aed0ee81Snicm * the argv strings. */
142aed0ee81Snicm for (i = 0, bufspace = 0; i < argc; ++i)
143aed0ee81Snicm bufspace += argv[i] ? strlen(argv[i]) + 1 : 0;
14456494721Snicm ct_conv_buff_resize(conv, 0, bufspace * sizeof(*p));
145aed0ee81Snicm if (!conv->wsize)
146aed0ee81Snicm return NULL;
147aed0ee81Snicm
148*45e13dcbSyasuoka wargv = reallocarray(NULL, argc + 1, sizeof(*wargv));
149aed0ee81Snicm
150aed0ee81Snicm for (i = 0, p = conv->wbuff; i < argc; ++i) {
151aed0ee81Snicm if (!argv[i]) { /* don't pass null pointers to mbstowcs */
152aed0ee81Snicm wargv[i] = NULL;
153aed0ee81Snicm continue;
154aed0ee81Snicm } else {
155aed0ee81Snicm wargv[i] = p;
15656494721Snicm wlen = mbstowcs(p, argv[i], bufspace);
157aed0ee81Snicm }
15856494721Snicm if (wlen == (size_t)-1 || wlen == bufspace) {
15956494721Snicm /* Encoding error or not enough room for NUL. */
160014b1be8Sderaadt free(wargv);
161aed0ee81Snicm return NULL;
162aed0ee81Snicm } else
16356494721Snicm wlen++; /* include NUL in the count */
16456494721Snicm bufspace -= wlen;
16556494721Snicm p += wlen;
166aed0ee81Snicm }
167*45e13dcbSyasuoka wargv[i] = NULL;
168aed0ee81Snicm
169aed0ee81Snicm return wargv;
170aed0ee81Snicm }
171aed0ee81Snicm
172aed0ee81Snicm
173aed0ee81Snicm protected size_t
ct_enc_width(wchar_t c)174e3191321Sschwarze ct_enc_width(wchar_t c)
175aed0ee81Snicm {
176aed0ee81Snicm /* UTF-8 encoding specific values */
177aed0ee81Snicm if (c < 0x80)
178aed0ee81Snicm return 1;
179aed0ee81Snicm else if (c < 0x0800)
180aed0ee81Snicm return 2;
181aed0ee81Snicm else if (c < 0x10000)
182aed0ee81Snicm return 3;
183aed0ee81Snicm else if (c < 0x110000)
184aed0ee81Snicm return 4;
185aed0ee81Snicm else
186aed0ee81Snicm return 0; /* not a valid codepoint */
187aed0ee81Snicm }
188aed0ee81Snicm
189aed0ee81Snicm protected ssize_t
ct_encode_char(char * dst,size_t len,wchar_t c)190e3191321Sschwarze ct_encode_char(char *dst, size_t len, wchar_t c)
191aed0ee81Snicm {
192aed0ee81Snicm ssize_t l = 0;
193aed0ee81Snicm if (len < ct_enc_width(c))
194aed0ee81Snicm return -1;
195565aa7e8Sschwarze l = wctomb(dst, c);
196aed0ee81Snicm
197aed0ee81Snicm if (l < 0) {
198565aa7e8Sschwarze wctomb(NULL, L'\0');
199aed0ee81Snicm l = 0;
200aed0ee81Snicm }
201aed0ee81Snicm return l;
202aed0ee81Snicm }
2033a40234dSschwarze
204e3191321Sschwarze protected const wchar_t *
ct_visual_string(const wchar_t * s)205e3191321Sschwarze ct_visual_string(const wchar_t *s)
206aed0ee81Snicm {
207e3191321Sschwarze static wchar_t *buff = NULL;
208aed0ee81Snicm static size_t buffsize = 0;
209aed0ee81Snicm void *p;
210e3191321Sschwarze wchar_t *dst;
211aed0ee81Snicm ssize_t used = 0;
212aed0ee81Snicm
213aed0ee81Snicm if (!s)
214aed0ee81Snicm return NULL;
215aed0ee81Snicm if (!buff) {
216aed0ee81Snicm buffsize = CT_BUFSIZ;
217014b1be8Sderaadt buff = reallocarray(NULL, buffsize, sizeof(*buff));
218aed0ee81Snicm }
219aed0ee81Snicm dst = buff;
220aed0ee81Snicm while (*s) {
221aed0ee81Snicm used = ct_visual_char(dst, buffsize - (dst - buff), *s);
222aed0ee81Snicm if (used == -1) { /* failed to encode, need more buffer space */
223aed0ee81Snicm used = dst - buff;
224aed0ee81Snicm buffsize += CT_BUFSIZ;
225014b1be8Sderaadt p = reallocarray(buff, buffsize, sizeof(*buff));
226aed0ee81Snicm if (p == NULL)
227aed0ee81Snicm goto out;
228aed0ee81Snicm buff = p;
229aed0ee81Snicm dst = buff + used;
230aed0ee81Snicm /* don't increment s here - we want to retry it! */
231aed0ee81Snicm }
232aed0ee81Snicm else
233aed0ee81Snicm ++s;
234aed0ee81Snicm dst += used;
235aed0ee81Snicm }
236aed0ee81Snicm if (dst >= (buff + buffsize)) { /* sigh */
237aed0ee81Snicm buffsize += 1;
238014b1be8Sderaadt p = reallocarray(buff, buffsize, sizeof(*buff));
239aed0ee81Snicm if (p == NULL)
240aed0ee81Snicm goto out;
241aed0ee81Snicm buff = p;
242aed0ee81Snicm dst = buff + buffsize - 1;
243aed0ee81Snicm }
244aed0ee81Snicm *dst = 0;
245aed0ee81Snicm return buff;
246aed0ee81Snicm out:
247014b1be8Sderaadt free(buff);
248aed0ee81Snicm buffsize = 0;
249aed0ee81Snicm return NULL;
250aed0ee81Snicm }
251aed0ee81Snicm
252aed0ee81Snicm
253aed0ee81Snicm
254aed0ee81Snicm protected int
ct_visual_width(wchar_t c)255e3191321Sschwarze ct_visual_width(wchar_t c)
256aed0ee81Snicm {
257aed0ee81Snicm int t = ct_chr_class(c);
258cf7973e1Sstsp int w;
259aed0ee81Snicm switch (t) {
260aed0ee81Snicm case CHTYPE_ASCIICTL:
261aed0ee81Snicm return 2; /* ^@ ^? etc. */
262aed0ee81Snicm case CHTYPE_TAB:
263aed0ee81Snicm return 1; /* Hmm, this really need to be handled outside! */
264aed0ee81Snicm case CHTYPE_NL:
265aed0ee81Snicm return 0; /* Should this be 1 instead? */
266aed0ee81Snicm case CHTYPE_PRINT:
267cf7973e1Sstsp w = wcwidth(c);
268cf7973e1Sstsp return (w == -1 ? 0 : w);
269aed0ee81Snicm case CHTYPE_NONPRINT:
270aed0ee81Snicm if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */
271aed0ee81Snicm return 8; /* \U+12345 */
272aed0ee81Snicm else
273aed0ee81Snicm return 7; /* \U+1234 */
274aed0ee81Snicm default:
275aed0ee81Snicm return 0; /* should not happen */
276aed0ee81Snicm }
277aed0ee81Snicm }
278aed0ee81Snicm
279aed0ee81Snicm
280aed0ee81Snicm protected ssize_t
ct_visual_char(wchar_t * dst,size_t len,wchar_t c)281e3191321Sschwarze ct_visual_char(wchar_t *dst, size_t len, wchar_t c)
282aed0ee81Snicm {
283aed0ee81Snicm int t = ct_chr_class(c);
284aed0ee81Snicm switch (t) {
285aed0ee81Snicm case CHTYPE_TAB:
286aed0ee81Snicm case CHTYPE_NL:
287aed0ee81Snicm case CHTYPE_ASCIICTL:
288aed0ee81Snicm if (len < 2)
289aed0ee81Snicm return -1; /* insufficient space */
290aed0ee81Snicm *dst++ = '^';
291aed0ee81Snicm if (c == '\177')
292aed0ee81Snicm *dst = '?'; /* DEL -> ^? */
293aed0ee81Snicm else
294aed0ee81Snicm *dst = c | 0100; /* uncontrolify it */
295aed0ee81Snicm return 2;
296aed0ee81Snicm case CHTYPE_PRINT:
297aed0ee81Snicm if (len < 1)
298aed0ee81Snicm return -1; /* insufficient space */
299aed0ee81Snicm *dst = c;
300aed0ee81Snicm return 1;
301aed0ee81Snicm case CHTYPE_NONPRINT:
302aed0ee81Snicm /* we only use single-width glyphs for display,
303aed0ee81Snicm * so this is right */
304aed0ee81Snicm if ((ssize_t)len < ct_visual_width(c))
305aed0ee81Snicm return -1; /* insufficient space */
306aed0ee81Snicm *dst++ = '\\';
307aed0ee81Snicm *dst++ = 'U';
308aed0ee81Snicm *dst++ = '+';
309aed0ee81Snicm #define tohexdigit(v) "0123456789ABCDEF"[v]
310aed0ee81Snicm if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */
311aed0ee81Snicm *dst++ = tohexdigit(((unsigned int) c >> 16) & 0xf);
312aed0ee81Snicm *dst++ = tohexdigit(((unsigned int) c >> 12) & 0xf);
313aed0ee81Snicm *dst++ = tohexdigit(((unsigned int) c >> 8) & 0xf);
314aed0ee81Snicm *dst++ = tohexdigit(((unsigned int) c >> 4) & 0xf);
315aed0ee81Snicm *dst = tohexdigit(((unsigned int) c ) & 0xf);
316aed0ee81Snicm return (c > 0xffff) ? 8 : 7;
317aed0ee81Snicm /*FALLTHROUGH*/
318aed0ee81Snicm /* these two should be handled outside this function */
319aed0ee81Snicm default: /* we should never hit the default */
320aed0ee81Snicm return 0;
321aed0ee81Snicm }
322aed0ee81Snicm }
323aed0ee81Snicm
324aed0ee81Snicm
325aed0ee81Snicm
326aed0ee81Snicm
327aed0ee81Snicm protected int
ct_chr_class(wchar_t c)328e3191321Sschwarze ct_chr_class(wchar_t c)
329aed0ee81Snicm {
330aed0ee81Snicm if (c == '\t')
331aed0ee81Snicm return CHTYPE_TAB;
332aed0ee81Snicm else if (c == '\n')
333aed0ee81Snicm return CHTYPE_NL;
334565aa7e8Sschwarze else if (c < 0x100 && iswcntrl(c))
335aed0ee81Snicm return CHTYPE_ASCIICTL;
336565aa7e8Sschwarze else if (iswprint(c))
337aed0ee81Snicm return CHTYPE_PRINT;
338aed0ee81Snicm else
339aed0ee81Snicm return CHTYPE_NONPRINT;
340aed0ee81Snicm }
341