1 /* $OpenBSD: chartype.c,v 1.3 2011/07/07 05:40:42 okan Exp $ */ 2 /* $NetBSD: chartype.c,v 1.4 2010/04/15 00:55:57 christos Exp $ */ 3 4 /*- 5 * Copyright (c) 2009 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the NetBSD 19 * Foundation, Inc. and its contributors. 20 * 4. Neither the name of The NetBSD Foundation nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 26 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 27 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 28 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 37 /* 38 * chartype.c: character classification and meta information 39 */ 40 #include "config.h" 41 #include "el.h" 42 #include <stdlib.h> 43 44 #define CT_BUFSIZ 1024 45 46 #ifdef WIDECHAR 47 protected void 48 ct_conv_buff_resize(ct_buffer_t *conv, size_t mincsize, size_t minwsize) 49 { 50 void *p; 51 if (mincsize > conv->csize) { 52 conv->csize = mincsize; 53 p = el_realloc(conv->cbuff, conv->csize); 54 if (p == NULL) { 55 conv->csize = 0; 56 el_free(conv->cbuff); 57 conv->cbuff = NULL; 58 } else 59 conv->cbuff = p; 60 } 61 62 if (minwsize > conv->wsize) { 63 conv->wsize = minwsize; 64 p = el_realloc(conv->wbuff, conv->wsize); 65 if (p == NULL) { 66 conv->wsize = 0; 67 el_free(conv->wbuff); 68 conv->wbuff = NULL; 69 } else 70 conv->wbuff = p; 71 } 72 } 73 74 75 public char * 76 ct_encode_string(const Char *s, ct_buffer_t *conv) 77 { 78 char *dst; 79 ssize_t used = 0; 80 81 if (!s) 82 return NULL; 83 if (!conv->cbuff) 84 ct_conv_buff_resize(conv, CT_BUFSIZ, 0); 85 if (!conv->cbuff) 86 return NULL; 87 88 dst = conv->cbuff; 89 while (*s) { 90 used = ct_encode_char(dst, (int)(conv->csize - 91 (dst - conv->cbuff)), *s); 92 if (used == -1) { /* failed to encode, need more buffer space */ 93 used = dst - conv->cbuff; 94 ct_conv_buff_resize(conv, conv->csize + CT_BUFSIZ, 0); 95 if (!conv->cbuff) 96 return NULL; 97 dst = conv->cbuff + used; 98 /* don't increment s here - we want to retry it! */ 99 } 100 else 101 ++s; 102 dst += used; 103 } 104 if (dst >= (conv->cbuff + conv->csize)) { 105 used = dst - conv->cbuff; 106 ct_conv_buff_resize(conv, conv->csize + 1, 0); 107 if (!conv->cbuff) 108 return NULL; 109 dst = conv->cbuff + used; 110 } 111 *dst = '\0'; 112 return conv->cbuff; 113 } 114 115 public Char * 116 ct_decode_string(const char *s, ct_buffer_t *conv) 117 { 118 size_t len = 0; 119 120 if (!s) 121 return NULL; 122 if (!conv->wbuff) 123 ct_conv_buff_resize(conv, 0, CT_BUFSIZ); 124 if (!conv->wbuff) 125 return NULL; 126 127 len = ct_mbstowcs(0, s, 0); 128 if (len > conv->wsize) 129 ct_conv_buff_resize(conv, 0, len + 1); 130 if (!conv->wbuff) 131 return NULL; 132 ct_mbstowcs(conv->wbuff, s, conv->wsize); 133 return conv->wbuff; 134 } 135 136 137 protected Char ** 138 ct_decode_argv(int argc, const char *argv[], ct_buffer_t *conv) 139 { 140 size_t bufspace; 141 int i; 142 Char *p; 143 Char **wargv; 144 ssize_t bytes; 145 146 /* Make sure we have enough space in the conversion buffer to store all 147 * the argv strings. */ 148 for (i = 0, bufspace = 0; i < argc; ++i) 149 bufspace += argv[i] ? strlen(argv[i]) + 1 : 0; 150 ct_conv_buff_resize(conv, 0, bufspace); 151 if (!conv->wsize) 152 return NULL; 153 154 wargv = el_malloc(argc * sizeof(*wargv)); 155 156 for (i = 0, p = conv->wbuff; i < argc; ++i) { 157 if (!argv[i]) { /* don't pass null pointers to mbstowcs */ 158 wargv[i] = NULL; 159 continue; 160 } else { 161 wargv[i] = p; 162 bytes = mbstowcs(p, argv[i], bufspace); 163 } 164 if (bytes == -1) { 165 el_free(wargv); 166 return NULL; 167 } else 168 bytes++; /* include '\0' in the count */ 169 bufspace -= bytes; 170 p += bytes; 171 } 172 173 return wargv; 174 } 175 176 177 protected size_t 178 ct_enc_width(Char c) 179 { 180 /* UTF-8 encoding specific values */ 181 if (c < 0x80) 182 return 1; 183 else if (c < 0x0800) 184 return 2; 185 else if (c < 0x10000) 186 return 3; 187 else if (c < 0x110000) 188 return 4; 189 else 190 return 0; /* not a valid codepoint */ 191 } 192 193 protected ssize_t 194 ct_encode_char(char *dst, size_t len, Char c) 195 { 196 ssize_t l = 0; 197 if (len < ct_enc_width(c)) 198 return -1; 199 l = ct_wctomb(dst, c); 200 201 if (l < 0) { 202 ct_wctomb_reset; 203 l = 0; 204 } 205 return l; 206 } 207 #endif 208 209 protected const Char * 210 ct_visual_string(const Char *s) 211 { 212 static Char *buff = NULL; 213 static size_t buffsize = 0; 214 void *p; 215 Char *dst; 216 ssize_t used = 0; 217 218 if (!s) 219 return NULL; 220 if (!buff) { 221 buffsize = CT_BUFSIZ; 222 buff = el_malloc(buffsize * sizeof(*buff)); 223 } 224 dst = buff; 225 while (*s) { 226 used = ct_visual_char(dst, buffsize - (dst - buff), *s); 227 if (used == -1) { /* failed to encode, need more buffer space */ 228 used = dst - buff; 229 buffsize += CT_BUFSIZ; 230 p = el_realloc(buff, buffsize * sizeof(*buff)); 231 if (p == NULL) 232 goto out; 233 buff = p; 234 dst = buff + used; 235 /* don't increment s here - we want to retry it! */ 236 } 237 else 238 ++s; 239 dst += used; 240 } 241 if (dst >= (buff + buffsize)) { /* sigh */ 242 buffsize += 1; 243 p = el_realloc(buff, buffsize * sizeof(*buff)); 244 if (p == NULL) 245 goto out; 246 buff = p; 247 dst = buff + buffsize - 1; 248 } 249 *dst = 0; 250 return buff; 251 out: 252 el_free(buff); 253 buffsize = 0; 254 return NULL; 255 } 256 257 258 259 protected int 260 ct_visual_width(Char c) 261 { 262 int t = ct_chr_class(c); 263 #ifdef WIDECHAR 264 int w; 265 #endif 266 switch (t) { 267 case CHTYPE_ASCIICTL: 268 return 2; /* ^@ ^? etc. */ 269 case CHTYPE_TAB: 270 return 1; /* Hmm, this really need to be handled outside! */ 271 case CHTYPE_NL: 272 return 0; /* Should this be 1 instead? */ 273 #ifdef WIDECHAR 274 case CHTYPE_PRINT: 275 w = wcwidth(c); 276 return (w == -1 ? 0 : w); 277 case CHTYPE_NONPRINT: 278 if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */ 279 return 8; /* \U+12345 */ 280 else 281 return 7; /* \U+1234 */ 282 #else 283 case CHTYPE_PRINT: 284 return 1; 285 case CHTYPE_NONPRINT: 286 return 4; /* \123 */ 287 #endif 288 default: 289 return 0; /* should not happen */ 290 } 291 } 292 293 294 protected ssize_t 295 ct_visual_char(Char *dst, size_t len, Char c) 296 { 297 int t = ct_chr_class(c); 298 switch (t) { 299 case CHTYPE_TAB: 300 case CHTYPE_NL: 301 case CHTYPE_ASCIICTL: 302 if (len < 2) 303 return -1; /* insufficient space */ 304 *dst++ = '^'; 305 if (c == '\177') 306 *dst = '?'; /* DEL -> ^? */ 307 else 308 *dst = c | 0100; /* uncontrolify it */ 309 return 2; 310 case CHTYPE_PRINT: 311 if (len < 1) 312 return -1; /* insufficient space */ 313 *dst = c; 314 return 1; 315 case CHTYPE_NONPRINT: 316 /* we only use single-width glyphs for display, 317 * so this is right */ 318 if ((ssize_t)len < ct_visual_width(c)) 319 return -1; /* insufficient space */ 320 #ifdef WIDECHAR 321 *dst++ = '\\'; 322 *dst++ = 'U'; 323 *dst++ = '+'; 324 #define tohexdigit(v) "0123456789ABCDEF"[v] 325 if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */ 326 *dst++ = tohexdigit(((unsigned int) c >> 16) & 0xf); 327 *dst++ = tohexdigit(((unsigned int) c >> 12) & 0xf); 328 *dst++ = tohexdigit(((unsigned int) c >> 8) & 0xf); 329 *dst++ = tohexdigit(((unsigned int) c >> 4) & 0xf); 330 *dst = tohexdigit(((unsigned int) c ) & 0xf); 331 return (c > 0xffff) ? 8 : 7; 332 #else 333 *dst++ = '\\'; 334 #define tooctaldigit(v) ((v) + '0') 335 *dst++ = tooctaldigit(((unsigned int) c >> 6) & 0x7); 336 *dst++ = tooctaldigit(((unsigned int) c >> 3) & 0x7); 337 *dst++ = tooctaldigit(((unsigned int) c ) & 0x7); 338 #endif 339 /*FALLTHROUGH*/ 340 /* these two should be handled outside this function */ 341 default: /* we should never hit the default */ 342 return 0; 343 } 344 } 345 346 347 348 349 protected int 350 ct_chr_class(Char c) 351 { 352 if (c == '\t') 353 return CHTYPE_TAB; 354 else if (c == '\n') 355 return CHTYPE_NL; 356 else if (IsASCII(c) && Iscntrl(c)) 357 return CHTYPE_ASCIICTL; 358 else if (Isprint(c)) 359 return CHTYPE_PRINT; 360 else 361 return CHTYPE_NONPRINT; 362 } 363