1 /* $OpenBSD: chartype.c,v 1.4 2011/11/17 20:14:24 nicm Exp $ */ 2 /* $NetBSD: chartype.c,v 1.4 2010/04/15 00:55:57 christos Exp $ */ 3 4 /*- 5 * Copyright (c) 2009 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the NetBSD 19 * Foundation, Inc. and its contributors. 20 * 4. Neither the name of The NetBSD Foundation nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 26 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 27 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 28 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 /* 38 * chartype.c: character classification and meta information 39 */ 40 #include "config.h" 41 #include "el.h" 42 #include <stdlib.h> 43 44 #define CT_BUFSIZ 1024 45 46 #ifdef WIDECHAR 47 protected void 48 ct_conv_buff_resize(ct_buffer_t *conv, size_t mincsize, size_t minwsize) 49 { 50 void *p; 51 if (mincsize > conv->csize) { 52 conv->csize = mincsize; 53 p = el_realloc(conv->cbuff, conv->csize); 54 if (p == NULL) { 55 conv->csize = 0; 56 el_free(conv->cbuff); 57 conv->cbuff = NULL; 58 } else 59 conv->cbuff = p; 60 } 61 62 if (minwsize > conv->wsize) { 63 conv->wsize = minwsize; 64 p = el_realloc(conv->wbuff, conv->wsize); 65 if (p == NULL) { 66 conv->wsize = 0; 67 el_free(conv->wbuff); 68 conv->wbuff = NULL; 69 } else 70 conv->wbuff = p; 71 } 72 } 73 74 75 public char * 76 ct_encode_string(const Char *s, ct_buffer_t *conv) 77 { 78 char *dst; 79 ssize_t used = 0; 80 81 if (!s) 82 return NULL; 83 if (!conv->cbuff) 84 ct_conv_buff_resize(conv, CT_BUFSIZ, 0); 85 if (!conv->cbuff) 86 return NULL; 87 88 dst = conv->cbuff; 89 while (*s) { 90 used = ct_encode_char(dst, (int)(conv->csize - 91 (dst - conv->cbuff)), *s); 92 if (used == -1) { /* failed to encode, need more buffer space */ 93 used = dst - conv->cbuff; 94 ct_conv_buff_resize(conv, conv->csize + CT_BUFSIZ, 0); 95 if (!conv->cbuff) 96 return NULL; 97 dst = conv->cbuff + used; 98 /* don't increment s here - we want to retry it! */ 99 } 100 else 101 ++s; 102 dst += used; 103 } 104 if (dst >= (conv->cbuff + conv->csize)) { 105 used = dst - conv->cbuff; 106 ct_conv_buff_resize(conv, conv->csize + 1, 0); 107 if (!conv->cbuff) 108 return NULL; 109 dst = conv->cbuff + used; 110 } 111 *dst = '\0'; 112 return conv->cbuff; 113 } 114 115 public Char * 116 ct_decode_string(const char *s, ct_buffer_t *conv) 117 { 118 size_t len = 0; 119 120 if (!s) 121 return NULL; 122 if (!conv->wbuff) 123 ct_conv_buff_resize(conv, 0, CT_BUFSIZ); 124 if (!conv->wbuff) 125 return NULL; 126 127 len = ct_mbstowcs(0, s, 0); 128 if (len > conv->wsize) 129 ct_conv_buff_resize(conv, 0, len + 1); 130 if (!conv->wbuff) 131 return NULL; 132 ct_mbstowcs(conv->wbuff, s, conv->wsize); 133 return conv->wbuff; 134 } 135 136 137 protected Char ** 138 ct_decode_argv(int argc, const char *argv[], ct_buffer_t *conv) 139 { 140 size_t bufspace; 141 int i; 142 Char *p; 143 Char **wargv; 144 size_t wlen; 145 146 /* Make sure we have enough space in the conversion buffer to store all 147 * the argv strings. */ 148 for (i = 0, bufspace = 0; i < argc; ++i) 149 bufspace += argv[i] ? strlen(argv[i]) + 1 : 0; 150 ct_conv_buff_resize(conv, 0, bufspace * sizeof(*p)); 151 if (!conv->wsize) 152 return NULL; 153 154 wargv = el_malloc(argc * sizeof(*wargv)); 155 156 for (i = 0, p = conv->wbuff; i < argc; ++i) { 157 if (!argv[i]) { /* don't pass null pointers to mbstowcs */ 158 wargv[i] = NULL; 159 continue; 160 } else { 161 wargv[i] = p; 162 wlen = mbstowcs(p, argv[i], bufspace); 163 } 164 if (wlen == (size_t)-1 || wlen == bufspace) { 165 /* Encoding error or not enough room for NUL. */ 166 el_free(wargv); 167 return NULL; 168 } else 169 wlen++; /* include NUL in the count */ 170 bufspace -= wlen; 171 p += wlen; 172 } 173 174 return wargv; 175 } 176 177 178 protected size_t 179 ct_enc_width(Char c) 180 { 181 /* UTF-8 encoding specific values */ 182 if (c < 0x80) 183 return 1; 184 else if (c < 0x0800) 185 return 2; 186 else if (c < 0x10000) 187 return 3; 188 else if (c < 0x110000) 189 return 4; 190 else 191 return 0; /* not a valid codepoint */ 192 } 193 194 protected ssize_t 195 ct_encode_char(char *dst, size_t len, Char c) 196 { 197 ssize_t l = 0; 198 if (len < ct_enc_width(c)) 199 return -1; 200 l = ct_wctomb(dst, c); 201 202 if (l < 0) { 203 ct_wctomb_reset; 204 l = 0; 205 } 206 return l; 207 } 208 #endif 209 210 protected const Char * 211 ct_visual_string(const Char *s) 212 { 213 static Char *buff = NULL; 214 static size_t buffsize = 0; 215 void *p; 216 Char *dst; 217 ssize_t used = 0; 218 219 if (!s) 220 return NULL; 221 if (!buff) { 222 buffsize = CT_BUFSIZ; 223 buff = el_malloc(buffsize * sizeof(*buff)); 224 } 225 dst = buff; 226 while (*s) { 227 used = ct_visual_char(dst, buffsize - (dst - buff), *s); 228 if (used == -1) { /* failed to encode, need more buffer space */ 229 used = dst - buff; 230 buffsize += CT_BUFSIZ; 231 p = el_realloc(buff, buffsize * sizeof(*buff)); 232 if (p == NULL) 233 goto out; 234 buff = p; 235 dst = buff + used; 236 /* don't increment s here - we want to retry it! */ 237 } 238 else 239 ++s; 240 dst += used; 241 } 242 if (dst >= (buff + buffsize)) { /* sigh */ 243 buffsize += 1; 244 p = el_realloc(buff, buffsize * sizeof(*buff)); 245 if (p == NULL) 246 goto out; 247 buff = p; 248 dst = buff + buffsize - 1; 249 } 250 *dst = 0; 251 return buff; 252 out: 253 el_free(buff); 254 buffsize = 0; 255 return NULL; 256 } 257 258 259 260 protected int 261 ct_visual_width(Char c) 262 { 263 int t = ct_chr_class(c); 264 #ifdef WIDECHAR 265 int w; 266 #endif 267 switch (t) { 268 case CHTYPE_ASCIICTL: 269 return 2; /* ^@ ^? etc. */ 270 case CHTYPE_TAB: 271 return 1; /* Hmm, this really need to be handled outside! */ 272 case CHTYPE_NL: 273 return 0; /* Should this be 1 instead? */ 274 #ifdef WIDECHAR 275 case CHTYPE_PRINT: 276 w = wcwidth(c); 277 return (w == -1 ? 0 : w); 278 case CHTYPE_NONPRINT: 279 if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */ 280 return 8; /* \U+12345 */ 281 else 282 return 7; /* \U+1234 */ 283 #else 284 case CHTYPE_PRINT: 285 return 1; 286 case CHTYPE_NONPRINT: 287 return 4; /* \123 */ 288 #endif 289 default: 290 return 0; /* should not happen */ 291 } 292 } 293 294 295 protected ssize_t 296 ct_visual_char(Char *dst, size_t len, Char c) 297 { 298 int t = ct_chr_class(c); 299 switch (t) { 300 case CHTYPE_TAB: 301 case CHTYPE_NL: 302 case CHTYPE_ASCIICTL: 303 if (len < 2) 304 return -1; /* insufficient space */ 305 *dst++ = '^'; 306 if (c == '\177') 307 *dst = '?'; /* DEL -> ^? */ 308 else 309 *dst = c | 0100; /* uncontrolify it */ 310 return 2; 311 case CHTYPE_PRINT: 312 if (len < 1) 313 return -1; /* insufficient space */ 314 *dst = c; 315 return 1; 316 case CHTYPE_NONPRINT: 317 /* we only use single-width glyphs for display, 318 * so this is right */ 319 if ((ssize_t)len < ct_visual_width(c)) 320 return -1; /* insufficient space */ 321 #ifdef WIDECHAR 322 *dst++ = '\\'; 323 *dst++ = 'U'; 324 *dst++ = '+'; 325 #define tohexdigit(v) "0123456789ABCDEF"[v] 326 if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */ 327 *dst++ = tohexdigit(((unsigned int) c >> 16) & 0xf); 328 *dst++ = tohexdigit(((unsigned int) c >> 12) & 0xf); 329 *dst++ = tohexdigit(((unsigned int) c >> 8) & 0xf); 330 *dst++ = tohexdigit(((unsigned int) c >> 4) & 0xf); 331 *dst = tohexdigit(((unsigned int) c ) & 0xf); 332 return (c > 0xffff) ? 8 : 7; 333 #else 334 *dst++ = '\\'; 335 #define tooctaldigit(v) ((v) + '0') 336 *dst++ = tooctaldigit(((unsigned int) c >> 6) & 0x7); 337 *dst++ = tooctaldigit(((unsigned int) c >> 3) & 0x7); 338 *dst++ = tooctaldigit(((unsigned int) c ) & 0x7); 339 #endif 340 /*FALLTHROUGH*/ 341 /* these two should be handled outside this function */ 342 default: /* we should never hit the default */ 343 return 0; 344 } 345 } 346 347 348 349 350 protected int 351 ct_chr_class(Char c) 352 { 353 if (c == '\t') 354 return CHTYPE_TAB; 355 else if (c == '\n') 356 return CHTYPE_NL; 357 else if (IsASCII(c) && Iscntrl(c)) 358 return CHTYPE_ASCIICTL; 359 else if (Isprint(c)) 360 return CHTYPE_PRINT; 361 else 362 return CHTYPE_NONPRINT; 363 } 364