1 /* $NetBSD: conv.c,v 1.2 2013/11/22 15:52:05 christos Exp $ */ 2 /*- 3 * Copyright (c) 1993, 1994 4 * The Regents of the University of California. All rights reserved. 5 * Copyright (c) 1993, 1994, 1995, 1996 6 * Keith Bostic. All rights reserved. 7 * 8 * See the LICENSE file for redistribution information. 9 */ 10 11 #include "config.h" 12 13 #ifndef lint 14 static const char sccsid[] = "Id: conv.c,v 1.27 2001/08/18 21:41:41 skimo Exp (Berkeley) Date: 2001/08/18 21:41:41 "; 15 #endif /* not lint */ 16 17 #include <sys/types.h> 18 #include <sys/queue.h> 19 #include <sys/time.h> 20 21 #include <bitstring.h> 22 #include <errno.h> 23 #include <limits.h> 24 #include <stdio.h> 25 #include <stdlib.h> 26 #include <string.h> 27 #include <unistd.h> 28 29 #include "common.h" 30 31 #ifdef USE_ICONV 32 #include <langinfo.h> 33 #include <iconv.h> 34 35 #define LANGCODESET nl_langinfo(CODESET) 36 #else 37 typedef int iconv_t; 38 39 #define LANGCODESET "" 40 #endif 41 42 #include <locale.h> 43 44 #ifdef USE_WIDECHAR 45 static int 46 raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, size_t *tolen, 47 const CHAR_T **dst) 48 { 49 int i; 50 CHAR_T **tostr = (CHAR_T **)(void *)&cw->bp1; 51 size_t *blen = &cw->blen1; 52 53 BINC_RETW(NULL, *tostr, *blen, len); 54 55 *tolen = len; 56 for (i = 0; i < len; ++i) 57 (*tostr)[i] = (u_char) str[i]; 58 59 *dst = cw->bp1; 60 61 return 0; 62 } 63 64 #ifndef ERROR_ON_CONVERT 65 #define HANDLE_ICONV_ERROR(o, i, ol, il) do { \ 66 *o++ = *i++; \ 67 ol--; il--; \ 68 } while (/*CONSTCOND*/0) 69 #define HANDLE_MBR_ERROR(n, mbs, d, s) do { \ 70 d = s; \ 71 MEMSET(&mbs, 0, 1); \ 72 n = 1; \ 73 } while (/*CONSTCOND*/0) 74 #else 75 #define HANDLE_ICONV_ERROR goto err 76 #define HANDLE_MBR_ERROR goto err 77 #endif 78 79 #define CONV_BUFFER_SIZE 512 80 /* fill the buffer with codeset encoding of string pointed to by str 81 * left has the number of bytes left in str and is adjusted 82 * len contains the number of bytes put in the buffer 83 */ 84 #ifdef USE_ICONV 85 #define CONVERT(str, left, src, len) \ 86 do { \ 87 size_t outleft; \ 88 char *bp = buffer; \ 89 outleft = CONV_BUFFER_SIZE; \ 90 errno = 0; \ 91 if (iconv(id, (const char **)&str, &left, &bp, &outleft) \ 92 == (size_t)-1 /* && errno != E2BIG */) \ 93 HANDLE_ICONV_ERROR(bp, str, outleft, left); \ 94 if ((len = CONV_BUFFER_SIZE - outleft) == 0) { \ 95 error = -left; \ 96 goto err; \ 97 } \ 98 src = buffer; \ 99 } while (0) 100 #else 101 #define CONVERT(str, left, src, len) 102 #endif 103 104 static int 105 default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 106 size_t *tolen, const CHAR_T **dst, const char *enc) 107 { 108 int j; 109 size_t i = 0; 110 CHAR_T **tostr = (CHAR_T **)(void *)&cw->bp1; 111 size_t *blen = &cw->blen1; 112 mbstate_t mbs; 113 size_t n; 114 ssize_t nlen = len; 115 const char *src = (const char *)str; 116 iconv_t id = (iconv_t)-1; 117 char buffer[CONV_BUFFER_SIZE]; 118 size_t left = len; 119 int error = 1; 120 121 MEMSET(&mbs, 0, 1); 122 BINC_RETW(NULL, *tostr, *blen, nlen); 123 124 #ifdef USE_ICONV 125 if (strcmp(nl_langinfo(CODESET), enc)) { 126 id = iconv_open(nl_langinfo(CODESET), enc); 127 if (id == (iconv_t)-1) 128 goto err; 129 CONVERT(str, left, src, len); 130 } 131 #endif 132 133 for (i = 0, j = 0; j < len; ) { 134 n = mbrtowc((*tostr)+i, src+j, len-j, &mbs); 135 /* NULL character converted */ 136 if (n == (size_t)-2) error = -(len-j); 137 if (n == (size_t)-1 || n == (size_t)-2) 138 HANDLE_MBR_ERROR(n, mbs, (*tostr)[i], src[j]); 139 if (n == 0) n = 1; 140 j += n; 141 if (++i >= *blen) { 142 nlen += 256; 143 BINC_RETW(NULL, *tostr, *blen, nlen); 144 } 145 if (id != (iconv_t)-1 && j == len && left) { 146 CONVERT(str, left, src, len); 147 j = 0; 148 } 149 } 150 *tolen = i; 151 152 if (id != (iconv_t)-1) 153 iconv_close(id); 154 155 *dst = cw->bp1; 156 157 return 0; 158 err: 159 *tolen = i; 160 if (id != (iconv_t)-1) 161 iconv_close(id); 162 *dst = cw->bp1; 163 164 return error; 165 } 166 167 static int 168 fe_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 169 size_t *tolen, const CHAR_T **dst) 170 { 171 return default_char2int(sp, str, len, cw, tolen, dst, O_STR(sp, O_FILEENCODING)); 172 } 173 174 static int 175 ie_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 176 size_t *tolen, const CHAR_T **dst) 177 { 178 return default_char2int(sp, str, len, cw, tolen, dst, O_STR(sp, O_INPUTENCODING)); 179 } 180 181 static int 182 cs_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 183 size_t *tolen, const CHAR_T **dst) 184 { 185 return default_char2int(sp, str, len, cw, tolen, dst, LANGCODESET); 186 } 187 188 static int 189 CHAR_T_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 190 size_t *tolen, const char **dst) 191 { 192 *tolen = len * sizeof(CHAR_T); 193 *dst = (const char *)(const void *)str; 194 195 return 0; 196 } 197 198 static int 199 CHAR_T_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, 200 size_t *tolen, const CHAR_T **dst) 201 { 202 *tolen = len / sizeof(CHAR_T); 203 *dst = (const CHAR_T*) str; 204 205 return 0; 206 } 207 208 static int 209 int2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, size_t *tolen, 210 const char **dst) 211 { 212 int i; 213 char **tostr = (char **)(void *)&cw->bp1; 214 size_t *blen = &cw->blen1; 215 216 BINC_RETC(NULL, *tostr, *blen, len); 217 218 *tolen = len; 219 for (i = 0; i < len; ++i) 220 (*tostr)[i] = str[i]; 221 222 *dst = cw->bp1; 223 224 return 0; 225 } 226 227 static int 228 default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 229 size_t *tolen, const char **pdst, const char *enc) 230 { 231 size_t i, j; 232 int offset = 0; 233 char **tostr = (char **)(void *)&cw->bp1; 234 size_t *blen = &cw->blen1; 235 mbstate_t mbs; 236 size_t n; 237 ssize_t nlen = len + MB_CUR_MAX; 238 char *dst; 239 size_t buflen; 240 char buffer[CONV_BUFFER_SIZE]; 241 iconv_t id = (iconv_t)-1; 242 243 /* convert first len bytes of buffer and append it to cw->bp 244 * len is adjusted => 0 245 * offset contains the offset in cw->bp and is adjusted 246 * cw->bp is grown as required 247 */ 248 #ifdef USE_ICONV 249 #define CONVERT2(len, cw, offset) \ 250 do { \ 251 const char *bp = buffer; \ 252 while (len != 0) { \ 253 size_t outleft = cw->blen1 - offset; \ 254 char *obp = (char *)cw->bp1 + offset; \ 255 if (cw->blen1 < offset + MB_CUR_MAX) { \ 256 nlen += 256; \ 257 BINC_RETC(NULL, cw->bp1, cw->blen1, nlen); \ 258 } \ 259 errno = 0; \ 260 if (iconv(id, &bp, &len, &obp, &outleft) == (size_t)-1 && \ 261 errno != E2BIG) \ 262 HANDLE_ICONV_ERROR(obp, bp, outleft, len); \ 263 offset = cw->blen1 - outleft; \ 264 } \ 265 } while (0) 266 #else 267 #define CONVERT2(len, cw, offset) 268 #endif 269 270 271 MEMSET(&mbs, 0, 1); 272 BINC_RETC(NULL, *tostr, *blen, nlen); 273 dst = *tostr; buflen = *blen; 274 275 #ifdef USE_ICONV 276 if (strcmp(nl_langinfo(CODESET), enc)) { 277 id = iconv_open(enc, nl_langinfo(CODESET)); 278 if (id == (iconv_t)-1) 279 goto err; 280 dst = buffer; buflen = CONV_BUFFER_SIZE; 281 } 282 #endif 283 284 for (i = 0, j = 0; i < (size_t)len; ++i) { 285 n = wcrtomb(dst+j, str[i], &mbs); 286 if (n == (size_t)-1) 287 HANDLE_MBR_ERROR(n, mbs, dst[j], str[i]); 288 j += n; 289 if (buflen < j + MB_CUR_MAX) { 290 if (id != (iconv_t)-1) { 291 CONVERT2(j, cw, offset); 292 } else { 293 nlen += 256; 294 BINC_RETC(NULL, *tostr, *blen, nlen); 295 dst = *tostr; buflen = *blen; 296 } 297 } 298 } 299 300 n = wcrtomb(dst+j, L'\0', &mbs); 301 j += n - 1; /* don't count NUL at the end */ 302 *tolen = j; 303 304 if (id != (iconv_t)-1) { 305 CONVERT2(j, cw, offset); 306 *tolen = offset; 307 } 308 309 *pdst = cw->bp1; 310 311 return 0; 312 err: 313 *tolen = j; 314 315 *pdst = cw->bp1; 316 317 return 1; 318 } 319 320 static int 321 fe_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 322 size_t *tolen, const char **dst) 323 { 324 return default_int2char(sp, str, len, cw, tolen, dst, O_STR(sp, O_FILEENCODING)); 325 } 326 327 static int 328 cs_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, 329 size_t *tolen, const char **dst) 330 { 331 return default_int2char(sp, str, len, cw, tolen, dst, LANGCODESET); 332 } 333 334 #endif 335 336 337 void 338 conv_init (SCR *orig, SCR *sp) 339 { 340 if (orig != NULL) 341 MEMCPY(&sp->conv, &orig->conv, 1); 342 else { 343 setlocale(LC_ALL, ""); 344 #ifdef USE_WIDECHAR 345 sp->conv.sys2int = cs_char2int; 346 sp->conv.int2sys = cs_int2char; 347 sp->conv.file2int = fe_char2int; 348 sp->conv.int2file = fe_int2char; 349 sp->conv.input2int = ie_char2int; 350 #endif 351 #ifdef USE_ICONV 352 o_set(sp, O_FILEENCODING, OS_STRDUP, nl_langinfo(CODESET), 0); 353 o_set(sp, O_INPUTENCODING, OS_STRDUP, nl_langinfo(CODESET), 0); 354 #endif 355 } 356 } 357 358 int 359 conv_enc (SCR *sp, int option, const char *enc) 360 { 361 #if defined(USE_WIDECHAR) && defined(USE_ICONV) 362 iconv_t id; 363 char2wchar_t *c2w; 364 wchar2char_t *w2c; 365 366 switch (option) { 367 case O_FILEENCODING: 368 c2w = &sp->conv.file2int; 369 w2c = &sp->conv.int2file; 370 break; 371 case O_INPUTENCODING: 372 c2w = &sp->conv.input2int; 373 w2c = NULL; 374 break; 375 default: 376 c2w = NULL; 377 w2c = NULL; 378 break; 379 } 380 381 if (!*enc) { 382 if (c2w) *c2w = raw2int; 383 if (w2c) *w2c = int2raw; 384 return 0; 385 } 386 387 if (!strcmp(enc, "WCHAR_T")) { 388 if (c2w) *c2w = CHAR_T_char2int; 389 if (w2c) *w2c = CHAR_T_int2char; 390 return 0; 391 } 392 393 id = iconv_open(enc, nl_langinfo(CODESET)); 394 if (id == (iconv_t)-1) 395 goto err; 396 iconv_close(id); 397 id = iconv_open(nl_langinfo(CODESET), enc); 398 if (id == (iconv_t)-1) 399 goto err; 400 iconv_close(id); 401 402 switch (option) { 403 case O_FILEENCODING: 404 *c2w = fe_char2int; 405 *w2c = fe_int2char; 406 break; 407 case O_INPUTENCODING: 408 *c2w = ie_char2int; 409 break; 410 } 411 412 F_CLR(sp, SC_CONV_ERROR); 413 F_SET(sp, SC_SCR_REFORMAT); 414 415 return 0; 416 err: 417 switch (option) { 418 case O_FILEENCODING: 419 msgq(sp, M_ERR, 420 "321|File encoding conversion not supported"); 421 break; 422 case O_INPUTENCODING: 423 msgq(sp, M_ERR, 424 "322|Input encoding conversion not supported"); 425 break; 426 } 427 #endif 428 return 1; 429 } 430 431