1 /* $OpenBSD: chartype.c,v 1.5 2013/05/22 00:31:38 yasuoka Exp $ */ 2 /* $NetBSD: chartype.c,v 1.6 2011/07/28 00:48:21 christos Exp $ */ 3 4 /*- 5 * Copyright (c) 2009 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the NetBSD 19 * Foundation, Inc. and its contributors. 20 * 4. Neither the name of The NetBSD Foundation nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 26 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 27 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 28 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 /* 38 * chartype.c: character classification and meta information 39 */ 40 #include "config.h" 41 #include "el.h" 42 #include <stdlib.h> 43 44 #define CT_BUFSIZ 1024 45 46 #ifdef WIDECHAR 47 protected void 48 ct_conv_buff_resize(ct_buffer_t *conv, size_t mincsize, size_t minwsize) 49 { 50 void *p; 51 if (mincsize > conv->csize) { 52 conv->csize = mincsize; 53 p = el_realloc(conv->cbuff, conv->csize * sizeof(char)); 54 if (p == NULL) { 55 conv->csize = 0; 56 el_free(conv->cbuff); 57 conv->cbuff = NULL; 58 } else 59 conv->cbuff = p; 60 } 61 62 if (minwsize > conv->wsize) { 63 conv->wsize = minwsize; 64 p = el_realloc(conv->wbuff, conv->wsize * sizeof(Char)); 65 if (p == NULL) { 66 conv->wsize = 0; 67 el_free(conv->wbuff); 68 conv->wbuff = NULL; 69 } else 70 conv->wbuff = p; 71 } 72 } 73 74 75 public char * 76 ct_encode_string(const Char *s, ct_buffer_t *conv) 77 { 78 char *dst; 79 ssize_t used = 0; 80 81 if (!s) 82 return NULL; 83 if (!conv->cbuff) 84 ct_conv_buff_resize(conv, CT_BUFSIZ, 0); 85 if (!conv->cbuff) 86 return NULL; 87 88 dst = conv->cbuff; 89 while (*s) { 90 used = conv->csize - (dst - conv->cbuff); 91 if (used < 5) { 92 used = dst - conv->cbuff; 93 ct_conv_buff_resize(conv, conv->csize + CT_BUFSIZ, 0); 94 if (!conv->cbuff) 95 return NULL; 96 dst = conv->cbuff + used; 97 } 98 used = ct_encode_char(dst, 5, *s); 99 if (used == -1) /* failed to encode, need more buffer space */ 100 abort(); 101 ++s; 102 dst += used; 103 } 104 *dst = '\0'; 105 return conv->cbuff; 106 } 107 108 public Char * 109 ct_decode_string(const char *s, ct_buffer_t *conv) 110 { 111 size_t len = 0; 112 113 if (!s) 114 return NULL; 115 if (!conv->wbuff) 116 ct_conv_buff_resize(conv, 0, CT_BUFSIZ); 117 if (!conv->wbuff) 118 return NULL; 119 120 len = ct_mbstowcs(NULL, s, 0); 121 if (len == (size_t)-1) 122 return NULL; 123 if (len > conv->wsize) 124 ct_conv_buff_resize(conv, 0, len + 1); 125 if (!conv->wbuff) 126 return NULL; 127 ct_mbstowcs(conv->wbuff, s, conv->wsize); 128 return conv->wbuff; 129 } 130 131 132 protected Char ** 133 ct_decode_argv(int argc, const char *argv[], ct_buffer_t *conv) 134 { 135 size_t bufspace; 136 int i; 137 Char *p; 138 Char **wargv; 139 size_t wlen; 140 141 /* Make sure we have enough space in the conversion buffer to store all 142 * the argv strings. */ 143 for (i = 0, bufspace = 0; i < argc; ++i) 144 bufspace += argv[i] ? strlen(argv[i]) + 1 : 0; 145 ct_conv_buff_resize(conv, 0, bufspace * sizeof(*p)); 146 if (!conv->wsize) 147 return NULL; 148 149 wargv = el_malloc(argc * sizeof(*wargv)); 150 151 for (i = 0, p = conv->wbuff; i < argc; ++i) { 152 if (!argv[i]) { /* don't pass null pointers to mbstowcs */ 153 wargv[i] = NULL; 154 continue; 155 } else { 156 wargv[i] = p; 157 wlen = mbstowcs(p, argv[i], bufspace); 158 } 159 if (wlen == (size_t)-1 || wlen == bufspace) { 160 /* Encoding error or not enough room for NUL. */ 161 el_free(wargv); 162 return NULL; 163 } else 164 wlen++; /* include NUL in the count */ 165 bufspace -= wlen; 166 p += wlen; 167 } 168 169 return wargv; 170 } 171 172 173 protected size_t 174 ct_enc_width(Char c) 175 { 176 /* UTF-8 encoding specific values */ 177 if (c < 0x80) 178 return 1; 179 else if (c < 0x0800) 180 return 2; 181 else if (c < 0x10000) 182 return 3; 183 else if (c < 0x110000) 184 return 4; 185 else 186 return 0; /* not a valid codepoint */ 187 } 188 189 protected ssize_t 190 ct_encode_char(char *dst, size_t len, Char c) 191 { 192 ssize_t l = 0; 193 if (len < ct_enc_width(c)) 194 return -1; 195 l = ct_wctomb(dst, c); 196 197 if (l < 0) { 198 ct_wctomb_reset; 199 l = 0; 200 } 201 return l; 202 } 203 #endif 204 205 protected const Char * 206 ct_visual_string(const Char *s) 207 { 208 static Char *buff = NULL; 209 static size_t buffsize = 0; 210 void *p; 211 Char *dst; 212 ssize_t used = 0; 213 214 if (!s) 215 return NULL; 216 if (!buff) { 217 buffsize = CT_BUFSIZ; 218 buff = el_malloc(buffsize * sizeof(*buff)); 219 } 220 dst = buff; 221 while (*s) { 222 used = ct_visual_char(dst, buffsize - (dst - buff), *s); 223 if (used == -1) { /* failed to encode, need more buffer space */ 224 used = dst - buff; 225 buffsize += CT_BUFSIZ; 226 p = el_realloc(buff, buffsize * sizeof(*buff)); 227 if (p == NULL) 228 goto out; 229 buff = p; 230 dst = buff + used; 231 /* don't increment s here - we want to retry it! */ 232 } 233 else 234 ++s; 235 dst += used; 236 } 237 if (dst >= (buff + buffsize)) { /* sigh */ 238 buffsize += 1; 239 p = el_realloc(buff, buffsize * sizeof(*buff)); 240 if (p == NULL) 241 goto out; 242 buff = p; 243 dst = buff + buffsize - 1; 244 } 245 *dst = 0; 246 return buff; 247 out: 248 el_free(buff); 249 buffsize = 0; 250 return NULL; 251 } 252 253 254 255 protected int 256 ct_visual_width(Char c) 257 { 258 int t = ct_chr_class(c); 259 #ifdef WIDECHAR 260 int w; 261 #endif 262 switch (t) { 263 case CHTYPE_ASCIICTL: 264 return 2; /* ^@ ^? etc. */ 265 case CHTYPE_TAB: 266 return 1; /* Hmm, this really need to be handled outside! */ 267 case CHTYPE_NL: 268 return 0; /* Should this be 1 instead? */ 269 #ifdef WIDECHAR 270 case CHTYPE_PRINT: 271 w = wcwidth(c); 272 return (w == -1 ? 0 : w); 273 case CHTYPE_NONPRINT: 274 if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */ 275 return 8; /* \U+12345 */ 276 else 277 return 7; /* \U+1234 */ 278 #else 279 case CHTYPE_PRINT: 280 return 1; 281 case CHTYPE_NONPRINT: 282 return 4; /* \123 */ 283 #endif 284 default: 285 return 0; /* should not happen */ 286 } 287 } 288 289 290 protected ssize_t 291 ct_visual_char(Char *dst, size_t len, Char c) 292 { 293 int t = ct_chr_class(c); 294 switch (t) { 295 case CHTYPE_TAB: 296 case CHTYPE_NL: 297 case CHTYPE_ASCIICTL: 298 if (len < 2) 299 return -1; /* insufficient space */ 300 *dst++ = '^'; 301 if (c == '\177') 302 *dst = '?'; /* DEL -> ^? */ 303 else 304 *dst = c | 0100; /* uncontrolify it */ 305 return 2; 306 case CHTYPE_PRINT: 307 if (len < 1) 308 return -1; /* insufficient space */ 309 *dst = c; 310 return 1; 311 case CHTYPE_NONPRINT: 312 /* we only use single-width glyphs for display, 313 * so this is right */ 314 if ((ssize_t)len < ct_visual_width(c)) 315 return -1; /* insufficient space */ 316 #ifdef WIDECHAR 317 *dst++ = '\\'; 318 *dst++ = 'U'; 319 *dst++ = '+'; 320 #define tohexdigit(v) "0123456789ABCDEF"[v] 321 if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */ 322 *dst++ = tohexdigit(((unsigned int) c >> 16) & 0xf); 323 *dst++ = tohexdigit(((unsigned int) c >> 12) & 0xf); 324 *dst++ = tohexdigit(((unsigned int) c >> 8) & 0xf); 325 *dst++ = tohexdigit(((unsigned int) c >> 4) & 0xf); 326 *dst = tohexdigit(((unsigned int) c ) & 0xf); 327 return (c > 0xffff) ? 8 : 7; 328 #else 329 *dst++ = '\\'; 330 #define tooctaldigit(v) ((v) + '0') 331 *dst++ = tooctaldigit(((unsigned int) c >> 6) & 0x7); 332 *dst++ = tooctaldigit(((unsigned int) c >> 3) & 0x7); 333 *dst++ = tooctaldigit(((unsigned int) c ) & 0x7); 334 #endif 335 /*FALLTHROUGH*/ 336 /* these two should be handled outside this function */ 337 default: /* we should never hit the default */ 338 return 0; 339 } 340 } 341 342 343 344 345 protected int 346 ct_chr_class(Char c) 347 { 348 if (c == '\t') 349 return CHTYPE_TAB; 350 else if (c == '\n') 351 return CHTYPE_NL; 352 else if (IsASCII(c) && Iscntrl(c)) 353 return CHTYPE_ASCIICTL; 354 else if (Isprint(c)) 355 return CHTYPE_PRINT; 356 else 357 return CHTYPE_NONPRINT; 358 } 359