1 /* $NetBSD: vis.c,v 1.53 2013/02/15 00:28:10 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c) 1999, 2005 The NetBSD Foundation, Inc. 34 * All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 46 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 47 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 48 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 49 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 50 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 51 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 52 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 53 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 54 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 55 * POSSIBILITY OF SUCH DAMAGE. 56 */ 57 58 #include <sys/cdefs.h> 59 #if defined(LIBC_SCCS) && !defined(lint) 60 __RCSID("$NetBSD: vis.c,v 1.53 2013/02/15 00:28:10 christos Exp $"); 61 #endif /* LIBC_SCCS and not lint */ 62 #ifdef __FBSDID 63 __FBSDID("$FreeBSD$"); 64 #define _DIAGASSERT(x) assert(x) 65 #endif 66 67 #include "namespace.h" 68 #include <sys/types.h> 69 70 #include <assert.h> 71 #include <vis.h> 72 #include <errno.h> 73 #include <stdlib.h> 74 #include <wchar.h> 75 #include <wctype.h> 76 77 #ifdef __weak_alias 78 __weak_alias(strvisx,_strvisx) 79 #endif 80 81 #if !HAVE_VIS || !HAVE_SVIS 82 #include <ctype.h> 83 #include <limits.h> 84 #include <stdio.h> 85 #include <string.h> 86 87 /* 88 * The reason for going through the trouble to deal with character encodings 89 * in vis(3), is that we use this to safe encode output of commands. This 90 * safe encoding varies depending on the character set. For example if we 91 * display ps output in French, we don't want to display French characters 92 * as M-foo. 93 */ 94 95 static wchar_t *do_svis(wchar_t *, wint_t, int, wint_t, const wchar_t *); 96 97 #undef BELL 98 #define BELL L'\a' 99 100 #define iswoctal(c) (((u_char)(c)) >= L'0' && ((u_char)(c)) <= L'7') 101 #define iswwhite(c) (c == L' ' || c == L'\t' || c == L'\n') 102 #define iswsafe(c) (c == L'\b' || c == BELL || c == L'\r') 103 #define xtoa(c) L"0123456789abcdef"[c] 104 #define XTOA(c) L"0123456789ABCDEF"[c] 105 106 #define MAXEXTRAS 9 107 108 #define MAKEEXTRALIST(flag, extra, orig_str) \ 109 do { \ 110 const wchar_t *orig = orig_str; \ 111 const wchar_t *o = orig; \ 112 wchar_t *e; \ 113 while (*o++) \ 114 continue; \ 115 extra = calloc((size_t)((o - orig) + MAXEXTRAS), sizeof(*extra)); \ 116 if (!extra) break; \ 117 for (o = orig, e = extra; (*e++ = *o++) != L'\0';) \ 118 continue; \ 119 e--; \ 120 if (flag & VIS_GLOB) { \ 121 *e++ = L'*'; \ 122 *e++ = L'?'; \ 123 *e++ = L'['; \ 124 *e++ = L'#'; \ 125 } \ 126 if (flag & VIS_SP) *e++ = L' '; \ 127 if (flag & VIS_TAB) *e++ = L'\t'; \ 128 if (flag & VIS_NL) *e++ = L'\n'; \ 129 if ((flag & VIS_NOSLASH) == 0) *e++ = L'\\'; \ 130 *e = L'\0'; \ 131 } while (/*CONSTCOND*/0) 132 133 /* 134 * This is do_hvis, for HTTP style (RFC 1808) 135 */ 136 static wchar_t * 137 do_hvis(wchar_t *dst, wint_t c, int flag, wint_t nextc, const wchar_t *extra) 138 { 139 if (iswalnum(c) 140 /* safe */ 141 || c == L'$' || c == L'-' || c == L'_' || c == L'.' || c == L'+' 142 /* extra */ 143 || c == L'!' || c == L'*' || c == L'\'' || c == L'(' || c == L')' 144 || c == L',') 145 dst = do_svis(dst, c, flag, nextc, extra); 146 else { 147 *dst++ = L'%'; 148 *dst++ = xtoa(((unsigned int)c >> 4) & 0xf); 149 *dst++ = xtoa((unsigned int)c & 0xf); 150 } 151 152 return dst; 153 } 154 155 /* 156 * This is do_mvis, for Quoted-Printable MIME (RFC 2045) 157 * NB: No handling of long lines or CRLF. 158 */ 159 static wchar_t * 160 do_mvis(wchar_t *dst, wint_t c, int flag, wint_t nextc, const wchar_t *extra) 161 { 162 if ((c != L'\n') && 163 /* Space at the end of the line */ 164 ((iswspace(c) && (nextc == L'\r' || nextc == L'\n')) || 165 /* Out of range */ 166 (!iswspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) || 167 /* Specific char to be escaped */ 168 wcschr(L"#$@[\\]^`{|}~", c) != NULL)) { 169 *dst++ = L'='; 170 *dst++ = XTOA(((unsigned int)c >> 4) & 0xf); 171 *dst++ = XTOA((unsigned int)c & 0xf); 172 } else 173 dst = do_svis(dst, c, flag, nextc, extra); 174 return dst; 175 } 176 177 /* 178 * This is do_vis, the central code of vis. 179 * dst: Pointer to the destination buffer 180 * c: Character to encode 181 * flag: Flag word 182 * nextc: The character following 'c' 183 * extra: Pointer to the list of extra characters to be 184 * backslash-protected. 185 */ 186 static wchar_t * 187 do_svis(wchar_t *dst, wint_t c, int flag, wint_t nextc, const wchar_t *extra) 188 { 189 int iswextra; 190 191 iswextra = wcschr(extra, c) != NULL; 192 if (!iswextra && (iswgraph(c) || iswwhite(c) || 193 ((flag & VIS_SAFE) && iswsafe(c)))) { 194 *dst++ = c; 195 return dst; 196 } 197 if (flag & VIS_CSTYLE) { 198 switch (c) { 199 case L'\n': 200 *dst++ = L'\\'; *dst++ = L'n'; 201 return dst; 202 case L'\r': 203 *dst++ = L'\\'; *dst++ = L'r'; 204 return dst; 205 case L'\b': 206 *dst++ = L'\\'; *dst++ = L'b'; 207 return dst; 208 case BELL: 209 *dst++ = L'\\'; *dst++ = L'a'; 210 return dst; 211 case L'\v': 212 *dst++ = L'\\'; *dst++ = L'v'; 213 return dst; 214 case L'\t': 215 *dst++ = L'\\'; *dst++ = L't'; 216 return dst; 217 case L'\f': 218 *dst++ = L'\\'; *dst++ = L'f'; 219 return dst; 220 case L' ': 221 *dst++ = L'\\'; *dst++ = L's'; 222 return dst; 223 case L'\0': 224 *dst++ = L'\\'; *dst++ = L'0'; 225 if (iswoctal(nextc)) { 226 *dst++ = L'0'; 227 *dst++ = L'0'; 228 } 229 return dst; 230 default: 231 if (iswgraph(c)) { 232 *dst++ = L'\\'; 233 *dst++ = c; 234 return dst; 235 } 236 } 237 } 238 if (iswextra || ((c & 0177) == L' ') || (flag & VIS_OCTAL)) { 239 *dst++ = L'\\'; 240 *dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + L'0'; 241 *dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + L'0'; 242 *dst++ = (c & 07) + L'0'; 243 } else { 244 if ((flag & VIS_NOSLASH) == 0) 245 *dst++ = L'\\'; 246 247 if (c & 0200) { 248 c &= 0177; 249 *dst++ = L'M'; 250 } 251 252 if (iswcntrl(c)) { 253 *dst++ = L'^'; 254 if (c == 0177) 255 *dst++ = L'?'; 256 else 257 *dst++ = c + L'@'; 258 } else { 259 *dst++ = L'-'; 260 *dst++ = c; 261 } 262 } 263 return dst; 264 } 265 266 typedef wchar_t *(*visfun_t)(wchar_t *, wint_t, int, wint_t, const wchar_t *); 267 268 /* 269 * Return the appropriate encoding function depending on the flags given. 270 */ 271 static visfun_t 272 getvisfun(int flag) 273 { 274 if (flag & VIS_HTTPSTYLE) 275 return do_hvis; 276 if (flag & VIS_MIMESTYLE) 277 return do_mvis; 278 return do_svis; 279 } 280 281 /* 282 * istrsnvisx() 283 * The main internal function. 284 * All user-visible functions call this one. 285 */ 286 static int 287 istrsnvisx(char *mbdst, size_t *dlen, const char *mbsrc, size_t mblength, 288 int flag, const char *mbextra) 289 { 290 wchar_t *dst, *src, *pdst, *psrc, *start, *extra, *nextra; 291 size_t len, olen; 292 wint_t c; 293 visfun_t f; 294 int clen, error = -1; 295 ssize_t mbslength; 296 297 _DIAGASSERT(mbdst != NULL); 298 _DIAGASSERT(mbsrc != NULL); 299 _DIAGASSERT(mbextra != NULL); 300 301 /* 302 * Input (mbsrc) is a char string considered to be multibyte 303 * characters. The input loop will read this string pulling 304 * one character, possibly multiple bytes, from mbsrc and 305 * converting each to wchar_t in src. 306 * 307 * The vis conversion will be done using the wide char 308 * wchar_t string. 309 * 310 * This will then be converted back to a multibyte string to 311 * return to the caller. 312 */ 313 314 /* Allocate space for the wide char strings */ 315 psrc = pdst = extra = nextra = NULL; 316 if (!mblength) 317 mblength = strlen(mbsrc); 318 319 if ((psrc = calloc(mblength + 1, sizeof(*psrc))) == NULL) 320 return -1; 321 if ((pdst = calloc((4 * mblength) + 1, sizeof(*pdst))) == NULL) 322 goto out; 323 if ((extra = calloc((strlen(mbextra) + 1), sizeof(*extra))) == NULL) 324 goto out; 325 326 dst = pdst; 327 src = psrc; 328 329 /* 330 * Input loop. 331 * Handle up to mblength characters (not bytes). We do not 332 * stop at NULs because we may be processing a block of data 333 * that includes NULs. We process one more than the character 334 * count so that we also get the next character of input which 335 * is needed under some circumstances as a look-ahead character. 336 */ 337 mbslength = (ssize_t)mblength; 338 /* 339 * When inputing a single character, must also read in the 340 * next character for nextc, the look-ahead character. 341 */ 342 if (mbslength == 1) 343 mbslength++; 344 while (mbslength > 0) { 345 /* Convert one multibyte character to wchar_t. */ 346 clen = mbtowc(src, mbsrc, MB_LEN_MAX); 347 if (clen < 0) { 348 /* Conversion error, process as a byte instead. */ 349 *src = (wint_t)*mbsrc; 350 clen = 1; 351 } 352 if (clen == 0) 353 /* 354 * NUL in input gives 0 return value. process 355 * as single NUL byte. 356 */ 357 clen = 1; 358 /* Advance output pointer if we still have input left. */ 359 src++; 360 /* Advance input pointer by number of bytes read. */ 361 mbsrc += clen; 362 /* Decrement input count */ 363 mbslength -= clen; 364 } 365 len = src - psrc; 366 src = psrc; 367 /* 368 * In the single character input case, we will have actually 369 * processed two characters, c and nextc. Reset len back to 370 * just a single character. 371 */ 372 if (mblength < len) 373 len = mblength; 374 375 /* Convert extra argument to list of characters for this mode. */ 376 mbstowcs(extra, mbextra, strlen(mbextra)); 377 MAKEEXTRALIST(flag, nextra, extra); 378 if (!nextra) { 379 if (dlen && *dlen == 0) { 380 errno = ENOSPC; 381 goto out; 382 } 383 *mbdst = '\0'; /* can't create nextra, return "" */ 384 error = 0; 385 goto out; 386 } 387 388 /* Look up which processing function to call. */ 389 f = getvisfun(flag); 390 391 /* 392 * Main processing loop. 393 * Call do_Xvis processing function one character at a time 394 * with next character available for look-ahead. 395 */ 396 for (start = dst; len > 0; len--) { 397 c = *src++; 398 dst = (*f)(dst, c, flag, len >= 1 ? *src : L'\0', nextra); 399 if (dst == NULL) { 400 errno = ENOSPC; 401 goto out; 402 } 403 } 404 405 /* Terminate the output string. */ 406 *dst = L'\0'; 407 408 /* Convert wchar_t string back to multibyte output string. */ 409 len = dlen ? *dlen : ((wcslen(start) + 1) * MB_LEN_MAX); 410 olen = wcstombs(mbdst, start, len * sizeof(*mbdst)); 411 412 free(nextra); 413 free(extra); 414 free(pdst); 415 free(psrc); 416 417 return (int)olen; 418 out: 419 free(nextra); 420 free(extra); 421 free(pdst); 422 free(psrc); 423 return error; 424 } 425 #endif 426 427 #if !HAVE_SVIS 428 /* 429 * The "svis" variants all take an "extra" arg that is a pointer 430 * to a NUL-terminated list of characters to be encoded, too. 431 * These functions are useful e. g. to encode strings in such a 432 * way so that they are not interpreted by a shell. 433 */ 434 435 char * 436 svis(char *mbdst, int c, int flag, int nextc, const char *mbextra) 437 { 438 char cc[2]; 439 int ret; 440 441 cc[0] = c; 442 cc[1] = nextc; 443 444 ret = istrsnvisx(mbdst, NULL, cc, 1, flag, mbextra); 445 if (ret < 0) 446 return NULL; 447 return mbdst + ret; 448 } 449 450 char * 451 snvis(char *mbdst, size_t dlen, int c, int flag, int nextc, const char *mbextra) 452 { 453 char cc[2]; 454 int ret; 455 456 cc[0] = c; 457 cc[1] = nextc; 458 459 ret = istrsnvisx(mbdst, &dlen, cc, 1, flag, mbextra); 460 if (ret < 0) 461 return NULL; 462 return mbdst + ret; 463 } 464 465 int 466 strsvis(char *mbdst, const char *mbsrc, int flag, const char *mbextra) 467 { 468 return istrsnvisx(mbdst, NULL, mbsrc, 0, flag, mbextra); 469 } 470 471 int 472 strsnvis(char *mbdst, size_t dlen, const char *mbsrc, int flag, const char *mbextra) 473 { 474 return istrsnvisx(mbdst, &dlen, mbsrc, 0, flag, mbextra); 475 } 476 477 int 478 strsvisx(char *mbdst, const char *mbsrc, size_t len, int flag, const char *mbextra) 479 { 480 return istrsnvisx(mbdst, NULL, mbsrc, len, flag, mbextra); 481 } 482 483 int 484 strsnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flag, 485 const char *mbextra) 486 { 487 return istrsnvisx(mbdst, &dlen, mbsrc, len, flag, mbextra); 488 } 489 #endif 490 491 #if !HAVE_VIS 492 /* 493 * vis - visually encode characters 494 */ 495 char * 496 vis(char *mbdst, int c, int flag, int nextc) 497 { 498 char cc[2]; 499 int ret; 500 501 cc[0] = c; 502 cc[1] = nextc; 503 504 ret = istrsnvisx(mbdst, NULL, cc, 1, flag, ""); 505 if (ret < 0) 506 return NULL; 507 return mbdst + ret; 508 } 509 510 char * 511 nvis(char *mbdst, size_t dlen, int c, int flag, int nextc) 512 { 513 char cc[2]; 514 int ret; 515 516 cc[0] = c; 517 cc[1] = nextc; 518 519 ret = istrsnvisx(mbdst, &dlen, cc, 1, flag, ""); 520 if (ret < 0) 521 return NULL; 522 return mbdst + ret; 523 } 524 525 /* 526 * strvis - visually encode characters from src into dst 527 * 528 * Dst must be 4 times the size of src to account for possible 529 * expansion. The length of dst, not including the trailing NULL, 530 * is returned. 531 */ 532 533 int 534 strvis(char *mbdst, const char *mbsrc, int flag) 535 { 536 return istrsnvisx(mbdst, NULL, mbsrc, 0, flag, ""); 537 } 538 539 int 540 strnvis(char *mbdst, size_t dlen, const char *mbsrc, int flag) 541 { 542 return istrsnvisx(mbdst, &dlen, mbsrc, 0, flag, ""); 543 } 544 545 /* 546 * strvisx - visually encode characters from src into dst 547 * 548 * Dst must be 4 times the size of src to account for possible 549 * expansion. The length of dst, not including the trailing NULL, 550 * is returned. 551 * 552 * Strvisx encodes exactly len characters from src into dst. 553 * This is useful for encoding a block of data. 554 */ 555 556 int 557 strvisx(char *mbdst, const char *mbsrc, size_t len, int flag) 558 { 559 return istrsnvisx(mbdst, NULL, mbsrc, len, flag, ""); 560 } 561 562 int 563 strnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flag) 564 { 565 return istrsnvisx(mbdst, &dlen, mbsrc, len, flag, ""); 566 } 567 #endif 568