1 /* $NetBSD: mime_codecs.c,v 1.10 2012/11/24 21:40:02 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2006 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Anon Ymous. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * This module contains all mime related codecs. Typically there are 34 * two versions: one operating on buffers and one operating on files. 35 * All exported routines have a "mime_" prefix. The file oriented 36 * routines have a "mime_f" prefix replacing the "mime_" prefix of the 37 * equivalent buffer based version. 38 * 39 * The file based API should be: 40 * 41 * mime_f<name>_{encode,decode}(FILE *in, FILE *out, void *cookie) 42 * 43 * XXX - currently this naming convention has not been adheared to. 44 * 45 * where the cookie is a generic way to pass arguments to the routine. 46 * This way these routines can be run by run_function() in mime.c. 47 * 48 * The buffer based API is not as rigid. 49 */ 50 51 #ifdef MIME_SUPPORT 52 53 #include <sys/cdefs.h> 54 #ifndef __lint__ 55 __RCSID("$NetBSD: mime_codecs.c,v 1.10 2012/11/24 21:40:02 christos Exp $"); 56 #endif /* not __lint__ */ 57 58 #include <assert.h> 59 #include <iconv.h> 60 #include <stdio.h> 61 #include <stdlib.h> 62 #include <util.h> 63 64 #include "def.h" 65 #include "extern.h" 66 #include "mime_codecs.h" 67 68 69 #ifdef CHARSET_SUPPORT 70 /************************************************************************ 71 * Core character set conversion routines. 72 * 73 */ 74 75 /* 76 * Fault-tolerant iconv() function. 77 * 78 * This routine was borrowed from nail-11.25/mime.c and modified. It 79 * tries to handle errno == EILSEQ by restarting at the next input 80 * byte (is this a good idea?). All other errors are handled by the 81 * caller. 82 */ 83 PUBLIC size_t 84 mime_iconv(iconv_t cd, const char **inb, size_t *inbleft, char **outb, size_t *outbleft) 85 { 86 size_t sz = 0; 87 88 while ((sz = iconv(cd, inb, inbleft, outb, outbleft)) == (size_t)-1 89 && errno == EILSEQ) { 90 if (*outbleft > 0) { 91 *(*outb)++ = '?'; 92 (*outbleft)--; 93 } else { 94 **outb = '\0'; 95 return E2BIG; 96 } 97 if (*inbleft > 0) { 98 (*inb)++; 99 (*inbleft)--; 100 } else { 101 **outb = '\0'; 102 break; 103 } 104 } 105 return sz; 106 } 107 108 /* 109 * This routine was mostly borrowed from src/usr.bin/iconv/iconv.c. 110 * We don't care about the invalid character count, so don't bother 111 * with __iconv(). We do care about robustness, so call iconv_ft() 112 * above to try to recover from errors. 113 */ 114 #define INBUFSIZE 1024 115 #define OUTBUFSIZE (INBUFSIZE * 2) 116 117 PUBLIC void 118 mime_ficonv(FILE *fi, FILE *fo, void *cookie) 119 { 120 char inbuf[INBUFSIZE], outbuf[OUTBUFSIZE], *out; 121 const char *in; 122 size_t inbytes, outbytes, ret; 123 iconv_t cd; 124 125 /* 126 * NOTE: iconv_t is actually a pointer typedef, so this 127 * conversion is not what it appears to be! 128 */ 129 cd = (iconv_t)cookie; 130 131 while ((inbytes = fread(inbuf, 1, INBUFSIZE, fi)) > 0) { 132 in = inbuf; 133 while (inbytes > 0) { 134 out = outbuf; 135 outbytes = OUTBUFSIZE; 136 ret = mime_iconv(cd, &in, &inbytes, &out, &outbytes); 137 if (ret == (size_t)-1 && errno != E2BIG) { 138 if (errno != EINVAL || in == inbuf) { 139 /* XXX - what is proper here? 140 * Just copy out the remains? */ 141 (void)fprintf(fo, 142 "\n\t[ iconv truncated message: %s ]\n\n", 143 strerror(errno)); 144 return; 145 } 146 /* 147 * If here: errno == EINVAL && in != inbuf 148 */ 149 /* incomplete input character */ 150 (void)memmove(inbuf, in, inbytes); 151 ret = fread(inbuf + inbytes, 1, 152 INBUFSIZE - inbytes, fi); 153 if (ret == 0) { 154 if (feof(fi)) { 155 (void)fprintf(fo, 156 "\n\t[ unexpected end of file; " 157 "the last character is " 158 "incomplete. ]\n\n"); 159 return; 160 } 161 (void)fprintf(fo, 162 "\n\t[ fread(): %s ]\n\n", 163 strerror(errno)); 164 return; 165 } 166 in = inbuf; 167 inbytes += ret; 168 169 } 170 if (outbytes < OUTBUFSIZE) 171 (void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, fo); 172 } 173 } 174 /* reset the shift state of the output buffer */ 175 outbytes = OUTBUFSIZE; 176 out = outbuf; 177 ret = iconv(cd, NULL, NULL, &out, &outbytes); 178 if (ret == (size_t)-1) { 179 (void)fprintf(fo, "\n\t[ iconv(): %s ]\n\n", 180 strerror(errno)); 181 return; 182 } 183 if (outbytes < OUTBUFSIZE) 184 (void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, fo); 185 } 186 187 #endif /* CHARSET_SUPPORT */ 188 189 190 191 /************************************************************************ 192 * Core base64 routines 193 * 194 * Defined in sec 6.8 of RFC 2045. 195 */ 196 197 /* 198 * Decode a base64 buffer. 199 * 200 * bin: buffer to hold the decoded (binary) result (see note 1). 201 * b64: buffer holding the encoded (base64) source. 202 * cnt: number of bytes in the b64 buffer to decode (see note 2). 203 * 204 * Return: the number of bytes written to the 'bin' buffer or -1 on 205 * error. 206 * NOTES: 207 * 1) It is the callers responsibility to ensure that bin is large 208 * enough to hold the result. 209 * 2) The b64 buffer should always contain a multiple of 4 bytes of 210 * data! 211 */ 212 PUBLIC ssize_t 213 mime_b64tobin(char *bin, const char *b64, size_t cnt) 214 { 215 static const signed char b64index[] = { 216 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 217 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 218 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63, 219 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1, 220 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14, 221 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1, 222 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 223 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1 224 }; 225 unsigned char *p; 226 const unsigned char *q, *end; 227 228 #define EQU (unsigned)-2 229 #define BAD (unsigned)-1 230 #define uchar64(c) ((c) >= sizeof(b64index) ? BAD : (unsigned)b64index[(c)]) 231 232 p = (unsigned char *)bin; 233 q = (const unsigned char *)b64; 234 for (end = q + cnt; q < end; q += 4) { 235 unsigned a = uchar64(q[0]); 236 unsigned b = uchar64(q[1]); 237 unsigned c = uchar64(q[2]); 238 unsigned d = uchar64(q[3]); 239 240 if (a == BAD || a == EQU || b == BAD || b == EQU || 241 c == BAD || d == BAD) 242 return -1; 243 244 *p++ = ((a << 2) | ((b & 0x30) >> 4)); 245 if (c == EQU) { /* got '=' */ 246 if (d != EQU) 247 return -1; 248 break; 249 } 250 *p++ = (((b & 0x0f) << 4) | ((c & 0x3c) >> 2)); 251 if (d == EQU) { /* got '=' */ 252 break; 253 } 254 *p++ = (((c & 0x03) << 6) | d); 255 } 256 257 #undef uchar64 258 #undef EQU 259 #undef BAD 260 261 return p - (unsigned char*)bin; 262 } 263 264 /* 265 * Encode a buffer as a base64 result. 266 * 267 * b64: buffer to hold the encoded (base64) result (see note). 268 * bin: buffer holding the binary source. 269 * cnt: number of bytes in the bin buffer to encode. 270 * 271 * NOTE: it is the callers responsibility to ensure that 'b64' is 272 * large enough to hold the result. 273 */ 274 PUBLIC void 275 mime_bintob64(char *b64, const char *bin, size_t cnt) 276 { 277 static const char b64table[] = 278 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 279 const unsigned char *p = (const unsigned char*)bin; 280 ssize_t i; 281 282 for (i = cnt; i > 0; i -= 3) { 283 unsigned a = p[0]; 284 unsigned b = p[1]; 285 unsigned c = p[2]; 286 287 b64[0] = b64table[a >> 2]; 288 switch(i) { 289 case 1: 290 b64[1] = b64table[((a & 0x3) << 4)]; 291 b64[2] = '='; 292 b64[3] = '='; 293 break; 294 case 2: 295 b64[1] = b64table[((a & 0x3) << 4) | ((b & 0xf0) >> 4)]; 296 b64[2] = b64table[((b & 0xf) << 2)]; 297 b64[3] = '='; 298 break; 299 default: 300 b64[1] = b64table[((a & 0x3) << 4) | ((b & 0xf0) >> 4)]; 301 b64[2] = b64table[((b & 0xf) << 2) | ((c & 0xc0) >> 6)]; 302 b64[3] = b64table[c & 0x3f]; 303 break; 304 } 305 p += 3; 306 b64 += 4; 307 } 308 } 309 310 311 #define MIME_BASE64_LINE_MAX (4 * 19) /* max line length is 76: see RFC2045 sec 6.8 */ 312 313 static void 314 mime_fB64_encode(FILE *fi, FILE *fo, void *cookie __unused) 315 { 316 static char b64[MIME_BASE64_LINE_MAX]; 317 static char mem[3 * (MIME_BASE64_LINE_MAX / 4)]; 318 size_t cnt; 319 char *cp; 320 size_t limit; 321 #ifdef __lint__ 322 cookie = cookie; 323 #endif 324 limit = 0; 325 if ((cp = value(ENAME_MIME_B64_LINE_MAX)) != NULL) 326 limit = (size_t)atoi(cp); 327 if (limit == 0 || limit > sizeof(b64)) 328 limit = sizeof(b64); 329 330 limit = 3 * roundup(limit, 4) / 4; 331 if (limit < 3) 332 limit = 3; 333 334 while ((cnt = fread(mem, sizeof(*mem), limit, fi)) > 0) { 335 mime_bintob64(b64, mem, (size_t)cnt); 336 (void)fwrite(b64, sizeof(*b64), (size_t)4 * roundup(cnt, 3) / 3, fo); 337 (void)putc('\n', fo); 338 } 339 } 340 341 static void 342 mime_fB64_decode(FILE *fi, FILE *fo, void *add_lf) 343 { 344 char *line; 345 size_t len; 346 char *buf; 347 size_t buflen; 348 349 buflen = 3 * (MIME_BASE64_LINE_MAX / 4); 350 buf = emalloc(buflen); 351 352 while ((line = fgetln(fi, &len)) != NULL) { 353 ssize_t binlen; 354 if (line[len-1] == '\n') /* forget the trailing newline */ 355 len--; 356 357 /* trash trailing white space */ 358 for (/*EMPTY*/; len > 0 && is_WSP(line[len-1]); len--) 359 continue; 360 361 /* skip leading white space */ 362 for (/*EMPTY*/; len > 0 && is_WSP(line[0]); len--, line++) 363 continue; 364 365 if (len == 0) 366 break; 367 368 if (3 * len > 4 * buflen) { 369 buflen *= 2; 370 buf = erealloc(buf, buflen); 371 } 372 373 binlen = mime_b64tobin(buf, line, len); 374 375 if (binlen <= 0) { 376 (void)fprintf(fo, "WARN: invalid base64 encoding\n"); 377 break; 378 } 379 (void)fwrite(buf, 1, (size_t)binlen, fo); 380 } 381 382 free(buf); 383 384 if (add_lf) 385 (void)fputc('\n', fo); 386 } 387 388 389 /************************************************************************ 390 * Core quoted-printable routines. 391 * 392 * Note: the header QP routines are slightly different and burried 393 * inside mime_header.c 394 */ 395 396 static int 397 mustquote(unsigned char *p, unsigned char *end, size_t l) 398 { 399 #define N 0 /* do not quote */ 400 #define Q 1 /* must quote */ 401 #define SP 2 /* white space */ 402 #define XF 3 /* special character 'F' - maybe quoted */ 403 #define XD 4 /* special character '.' - maybe quoted */ 404 #define EQ Q /* '=' must be quoted */ 405 #define TB SP /* treat '\t' as a space */ 406 #define NL N /* don't quote '\n' (NL) - XXX - quoting here breaks the line length algorithm */ 407 #define CR Q /* always quote a '\r' (CR) - it occurs only in a CRLF combo */ 408 409 static const signed char quotetab[] = { 410 Q, Q, Q, Q, Q, Q, Q, Q, Q,TB,NL, Q, Q,CR, Q, Q, 411 Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, 412 SP, N, N, N, N, N, N, N, N, N, N, N, N, N,XD, N, 413 N, N, N, N, N, N, N, N, N, N, N, N, N,EQ, N, N, 414 415 N, N, N, N, N, N,XF, N, N, N, N, N, N, N, N, N, 416 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, 417 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, 418 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, Q, 419 }; 420 int flag = *p > 0x7f ? Q : quotetab[*p]; 421 422 if (flag == N) 423 return 0; 424 if (flag == Q) 425 return 1; 426 if (flag == SP) 427 return p + 1 < end && p[1] == '\n'; /* trailing white space */ 428 429 /* The remainder are special start-of-line cases. */ 430 if (l != 0) 431 return 0; 432 433 if (flag == XF) /* line may start with "From" */ 434 return p + 4 < end && p[1] == 'r' && p[2] == 'o' && p[3] == 'm'; 435 436 if (flag == XD) /* line may consist of a single dot */ 437 return p + 1 < end && p[1] == '\n'; 438 439 errx(EXIT_FAILURE, 440 "mustquote: invalid logic: *p=0x%x (%d) flag=%d, l=%zu\n", 441 *p, *p, flag, l); 442 /* NOT REACHED */ 443 return 0; /* appease GCC */ 444 445 #undef N 446 #undef Q 447 #undef SP 448 #undef XX 449 #undef EQ 450 #undef TB 451 #undef NL 452 #undef CR 453 } 454 455 456 #define MIME_QUOTED_LINE_MAX 76 /* QP max length: see RFC2045 sec 6.7 */ 457 458 static void 459 fput_quoted_line(FILE *fo, char *line, size_t len, size_t limit) 460 { 461 size_t l; /* length of current output line */ 462 unsigned char *beg; 463 unsigned char *end; 464 unsigned char *p; 465 466 assert(limit <= MIME_QUOTED_LINE_MAX); 467 468 beg = (unsigned char*)line; 469 end = beg + len; 470 l = 0; 471 for (p = (unsigned char*)line; p < end; p++) { 472 if (mustquote(p, end, l)) { 473 if (l + 4 > limit) { 474 (void)fputs("=\n", fo); 475 l = 0; 476 } 477 (void)fprintf(fo, "=%02X", *p); 478 l += 3; 479 } 480 else { 481 if (*p == '\n') { 482 if (p > beg && p[-1] == '\r') 483 (void)fputs("=0A=", fo); 484 l = (size_t)-1; 485 } 486 else if (l + 2 > limit) { 487 (void)fputs("=\n", fo); 488 l = 0; 489 } 490 (void)putc(*p, fo); 491 l++; 492 } 493 } 494 /* 495 * Lines ending in a blank must escape the newline. 496 */ 497 if (len && is_WSP(p[-1])) 498 (void)fputs("=\n", fo); 499 } 500 501 static void 502 mime_fQP_encode(FILE *fi, FILE *fo, void *cookie __unused) 503 { 504 char *line; 505 size_t len; 506 char *cp; 507 size_t limit; 508 509 #ifdef __lint__ 510 cookie = cookie; 511 #endif 512 limit = 0; 513 if ((cp = value(ENAME_MIME_QP_LINE_MAX)) != NULL) 514 limit = (size_t)atoi(cp); 515 if (limit == 0 || limit > MIME_QUOTED_LINE_MAX) 516 limit = MIME_QUOTED_LINE_MAX; 517 if (limit < 4) 518 limit = 4; 519 520 while ((line = fgetln(fi, &len)) != NULL) 521 fput_quoted_line(fo, line, len, limit); 522 } 523 524 static void 525 mime_fQP_decode(FILE *fi, FILE *fo, void *cookie __unused) 526 { 527 char *line; 528 size_t len; 529 530 #ifdef __lint__ 531 cookie = cookie; 532 #endif 533 while ((line = fgetln(fi, &len)) != NULL) { 534 char *p; 535 char *end; 536 537 end = line + len; 538 for (p = line; p < end; p++) { 539 if (*p == '=') { 540 p++; 541 while (p < end && is_WSP(*p)) 542 p++; 543 if (*p != '\n' && p + 1 < end) { 544 int c; 545 char buf[3]; 546 547 buf[0] = *p++; 548 buf[1] = *p; 549 buf[2] = '\0'; 550 c = (int)strtol(buf, NULL, 16); 551 (void)fputc(c, fo); 552 } 553 } 554 else 555 (void)fputc(*p, fo); 556 } 557 } 558 } 559 560 561 /************************************************************************ 562 * Routines to select the codec by name. 563 */ 564 565 PUBLIC void 566 mime_fio_copy(FILE *fi, FILE *fo, void *cookie __unused) 567 { 568 int c; 569 570 #ifdef __lint__ 571 cookie = cookie; 572 #endif 573 while ((c = getc(fi)) != EOF) 574 (void)putc(c, fo); 575 576 (void)fflush(fo); 577 if (ferror(fi)) { 578 warn("read"); 579 rewind(fi); 580 return; 581 } 582 if (ferror(fo)) { 583 warn("write"); 584 (void)Fclose(fo); 585 rewind(fi); 586 return; 587 } 588 } 589 590 591 static const struct transfer_encoding_s { 592 const char *name; 593 mime_codec_t enc; 594 mime_codec_t dec; 595 } transfer_encoding_tbl[] = { 596 { MIME_TRANSFER_7BIT, mime_fio_copy, mime_fio_copy }, 597 { MIME_TRANSFER_8BIT, mime_fio_copy, mime_fio_copy }, 598 { MIME_TRANSFER_BINARY, mime_fio_copy, mime_fio_copy }, 599 { MIME_TRANSFER_QUOTED, mime_fQP_encode, mime_fQP_decode }, 600 { MIME_TRANSFER_BASE64, mime_fB64_encode, mime_fB64_decode }, 601 { NULL, NULL, NULL }, 602 }; 603 604 605 PUBLIC mime_codec_t 606 mime_fio_encoder(const char *ename) 607 { 608 const struct transfer_encoding_s *tep = NULL; 609 610 if (ename == NULL) 611 return NULL; 612 613 for (tep = transfer_encoding_tbl; tep->name; tep++) 614 if (strcasecmp(tep->name, ename) == 0) 615 break; 616 return tep->enc; 617 } 618 619 PUBLIC mime_codec_t 620 mime_fio_decoder(const char *ename) 621 { 622 const struct transfer_encoding_s *tep = NULL; 623 624 if (ename == NULL) 625 return NULL; 626 627 for (tep = transfer_encoding_tbl; tep->name; tep++) 628 if (strcasecmp(tep->name, ename) == 0) 629 break; 630 return tep->dec; 631 } 632 633 /* 634 * This is for use in complete.c and mime.c to get the list of 635 * encoding names without exposing the transfer_encoding_tbl[]. The 636 * first name is returned if called with a pointer to a NULL pointer. 637 * Subsequent calls with the same cookie give successive names. A 638 * NULL return indicates the end of the list. 639 */ 640 PUBLIC const char * 641 mime_next_encoding_name(const void **cookie) 642 { 643 const struct transfer_encoding_s *tep; 644 645 tep = *cookie; 646 if (tep == NULL) 647 tep = transfer_encoding_tbl; 648 649 *cookie = tep->name ? &tep[1] : NULL; 650 651 return tep->name; 652 } 653 654 #endif /* MIME_SUPPORT */ 655