1 /* $NetBSD: mime_codecs.c,v 1.6 2007/10/23 14:58:44 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2006 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Anon Ymous. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * This module contains all mime related codecs. Typically there are 41 * two versions: one operating on buffers and one operating on files. 42 * All exported routines have a "mime_" prefix. The file oriented 43 * routines have a "mime_f" prefix replacing the "mime_" prefix of the 44 * equivalent buffer based version. 45 * 46 * The file based API should be: 47 * 48 * mime_f<name>_{encode,decode}(FILE *in, FILE *out, void *cookie) 49 * 50 * XXX - currently this naming convention has not been adheared to. 51 * 52 * where the cookie is a generic way to pass arguments to the routine. 53 * This way these routines can be run by run_function() in mime.c. 54 * 55 * The buffer based API is not as rigid. 56 */ 57 58 #ifdef MIME_SUPPORT 59 60 #include <sys/cdefs.h> 61 #ifndef __lint__ 62 __RCSID("$NetBSD: mime_codecs.c,v 1.6 2007/10/23 14:58:44 christos Exp $"); 63 #endif /* not __lint__ */ 64 65 #include <assert.h> 66 #include <iconv.h> 67 #include <stdio.h> 68 #include <stdlib.h> 69 #include <util.h> 70 71 #include "def.h" 72 #include "extern.h" 73 #include "mime_codecs.h" 74 75 76 #ifdef CHARSET_SUPPORT 77 /************************************************************************ 78 * Core character set conversion routines. 79 * 80 */ 81 82 /* 83 * Fault-tolerant iconv() function. 84 * 85 * This routine was borrowed from nail-11.25/mime.c and modified. It 86 * tries to handle errno == EILSEQ by restarting at the next input 87 * byte (is this a good idea?). All other errors are handled by the 88 * caller. 89 */ 90 PUBLIC size_t 91 mime_iconv(iconv_t cd, const char **inb, size_t *inbleft, char **outb, size_t *outbleft) 92 { 93 size_t sz = 0; 94 95 while ((sz = iconv(cd, inb, inbleft, outb, outbleft)) == (size_t)-1 96 && errno == EILSEQ) { 97 if (*outbleft > 0) { 98 *(*outb)++ = '?'; 99 (*outbleft)--; 100 } else { 101 **outb = '\0'; 102 return E2BIG; 103 } 104 if (*inbleft > 0) { 105 (*inb)++; 106 (*inbleft)--; 107 } else { 108 **outb = '\0'; 109 break; 110 } 111 } 112 return sz; 113 } 114 115 /* 116 * This routine was mostly borrowed from src/usr.bin/iconv/iconv.c. 117 * We don't care about the invalid character count, so don't bother 118 * with __iconv(). We do care about robustness, so call iconv_ft() 119 * above to try to recover from errors. 120 */ 121 #define INBUFSIZE 1024 122 #define OUTBUFSIZE (INBUFSIZE * 2) 123 124 PUBLIC void 125 mime_ficonv(FILE *fi, FILE *fo, void *cookie) 126 { 127 char inbuf[INBUFSIZE], outbuf[OUTBUFSIZE], *out; 128 const char *in; 129 size_t inbytes, outbytes, ret; 130 iconv_t cd; 131 132 /* 133 * NOTE: iconv_t is actually a pointer typedef, so this 134 * conversion is not what it appears to be! 135 */ 136 cd = (iconv_t)cookie; 137 138 while ((inbytes = fread(inbuf, 1, INBUFSIZE, fi)) > 0) { 139 in = inbuf; 140 while (inbytes > 0) { 141 out = outbuf; 142 outbytes = OUTBUFSIZE; 143 ret = mime_iconv(cd, &in, &inbytes, &out, &outbytes); 144 if (ret == (size_t)-1 && errno != E2BIG) { 145 if (errno != EINVAL || in == inbuf) { 146 /* XXX - what is proper here? 147 * Just copy out the remains? */ 148 (void)fprintf(fo, 149 "\n\t[ iconv truncated message: %s ]\n\n", 150 strerror(errno)); 151 return; 152 } 153 /* 154 * If here: errno == EINVAL && in != inbuf 155 */ 156 /* incomplete input character */ 157 (void)memmove(inbuf, in, inbytes); 158 ret = fread(inbuf + inbytes, 1, 159 INBUFSIZE - inbytes, fi); 160 if (ret == 0) { 161 if (feof(fi)) { 162 (void)fprintf(fo, 163 "\n\t[ unexpected end of file; " 164 "the last character is " 165 "incomplete. ]\n\n"); 166 return; 167 } 168 (void)fprintf(fo, 169 "\n\t[ fread(): %s ]\n\n", 170 strerror(errno)); 171 return; 172 } 173 in = inbuf; 174 inbytes += ret; 175 176 } 177 if (outbytes < OUTBUFSIZE) 178 (void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, fo); 179 } 180 } 181 /* reset the shift state of the output buffer */ 182 outbytes = OUTBUFSIZE; 183 out = outbuf; 184 ret = iconv(cd, NULL, NULL, &out, &outbytes); 185 if (ret == (size_t)-1) { 186 (void)fprintf(fo, "\n\t[ iconv(): %s ]\n\n", 187 strerror(errno)); 188 return; 189 } 190 if (outbytes < OUTBUFSIZE) 191 (void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, fo); 192 } 193 194 #endif /* CHARSET_SUPPORT */ 195 196 197 198 /************************************************************************ 199 * Core base64 routines 200 * 201 * Defined in sec 6.8 of RFC 2045. 202 */ 203 204 /* 205 * Decode a base64 buffer. 206 * 207 * bin: buffer to hold the decoded (binary) result (see note 1). 208 * b64: buffer holding the encoded (base64) source. 209 * cnt: number of bytes in the b64 buffer to decode (see note 2). 210 * 211 * Return: the number of bytes written to the 'bin' buffer or -1 on 212 * error. 213 * NOTES: 214 * 1) It is the callers responsibility to ensure that bin is large 215 * enough to hold the result. 216 * 2) The b64 buffer should always contain a multiple of 4 bytes of 217 * data! 218 */ 219 PUBLIC ssize_t 220 mime_b64tobin(char *bin, const char *b64, size_t cnt) 221 { 222 static const signed char b64index[] = { 223 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 224 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 225 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63, 226 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1, 227 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14, 228 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1, 229 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 230 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1 231 }; 232 unsigned char *p; 233 const unsigned char *q, *end; 234 235 #define EQU (unsigned)-2 236 #define BAD (unsigned)-1 237 #define uchar64(c) (unsigned)((c) >= sizeof(b64index) ? BAD : b64index[(c)]) 238 239 p = (unsigned char *)bin; 240 q = (const unsigned char *)b64; 241 for (end = q + cnt; q < end; q += 4) { 242 unsigned a = uchar64(q[0]); 243 unsigned b = uchar64(q[1]); 244 unsigned c = uchar64(q[2]); 245 unsigned d = uchar64(q[3]); 246 247 *p++ = ((a << 2) | ((b & 0x30) >> 4)); 248 if (c == EQU) { /* got '=' */ 249 if (d != EQU) 250 return -1; 251 break; 252 } 253 *p++ = (((b & 0x0f) << 4) | ((c & 0x3c) >> 2)); 254 if (d == EQU) { /* got '=' */ 255 break; 256 } 257 *p++ = (((c & 0x03) << 6) | d); 258 259 if (a == BAD || b == BAD || c == BAD || d == BAD) 260 return -1; 261 } 262 263 #undef uchar64 264 #undef EQU 265 #undef BAD 266 267 return p - (unsigned char*)bin; 268 } 269 270 /* 271 * Encode a buffer as a base64 result. 272 * 273 * b64: buffer to hold the encoded (base64) result (see note). 274 * bin: buffer holding the binary source. 275 * cnt: number of bytes in the bin buffer to encode. 276 * 277 * NOTE: it is the callers responsibility to ensure that 'b64' is 278 * large enough to hold the result. 279 */ 280 PUBLIC void 281 mime_bintob64(char *b64, const char *bin, size_t cnt) 282 { 283 static const char b64table[] = 284 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 285 const unsigned char *p = (const unsigned char*)bin; 286 int i; 287 288 for (i = cnt; i > 0; i -= 3) { 289 unsigned a = p[0]; 290 unsigned b = p[1]; 291 unsigned c = p[2]; 292 293 b64[0] = b64table[a >> 2]; 294 switch(i) { 295 case 1: 296 b64[1] = b64table[((a & 0x3) << 4)]; 297 b64[2] = '='; 298 b64[3] = '='; 299 break; 300 case 2: 301 b64[1] = b64table[((a & 0x3) << 4) | ((b & 0xf0) >> 4)]; 302 b64[2] = b64table[((b & 0xf) << 2)]; 303 b64[3] = '='; 304 break; 305 default: 306 b64[1] = b64table[((a & 0x3) << 4) | ((b & 0xf0) >> 4)]; 307 b64[2] = b64table[((b & 0xf) << 2) | ((c & 0xc0) >> 6)]; 308 b64[3] = b64table[c & 0x3f]; 309 break; 310 } 311 p += 3; 312 b64 += 4; 313 } 314 } 315 316 317 #define MIME_BASE64_LINE_MAX (4 * 19) /* max line length is 76: see RFC2045 sec 6.8 */ 318 319 static void 320 mime_fB64_encode(FILE *fi, FILE *fo, void *cookie __unused) 321 { 322 static char b64[MIME_BASE64_LINE_MAX]; 323 static char mem[3 * (MIME_BASE64_LINE_MAX / 4)]; 324 int cnt; 325 char *cp; 326 size_t limit; 327 #ifdef __lint__ 328 cookie = cookie; 329 #endif 330 limit = 0; 331 if ((cp = value(ENAME_MIME_B64_LINE_MAX)) != NULL) 332 limit = (size_t)atoi(cp); 333 if (limit == 0 || limit > sizeof(b64)) 334 limit = sizeof(b64); 335 336 limit = 3 * roundup(limit, 4) / 4; 337 if (limit < 3) 338 limit = 3; 339 340 while ((cnt = fread(mem, sizeof(*mem), limit, fi)) > 0) { 341 mime_bintob64(b64, mem, (size_t)cnt); 342 (void)fwrite(b64, sizeof(*b64), (size_t)4 * roundup(cnt, 3) / 3, fo); 343 (void)putc('\n', fo); 344 } 345 } 346 347 static void 348 mime_fB64_decode(FILE *fi, FILE *fo, void *add_lf) 349 { 350 char *line; 351 size_t len; 352 char *buf; 353 size_t buflen; 354 355 buflen = 3 * (MIME_BASE64_LINE_MAX / 4); 356 buf = emalloc(buflen); 357 358 while ((line = fgetln(fi, &len)) != NULL) { 359 ssize_t binlen; 360 if (line[len-1] == '\n') /* forget the trailing newline */ 361 len--; 362 363 /* trash trailing white space */ 364 for (/*EMPTY*/; len > 0 && is_WSP(line[len-1]); len--) 365 continue; 366 367 /* skip leading white space */ 368 for (/*EMPTY*/; len > 0 && is_WSP(line[0]); len--, line++) 369 continue; 370 371 if (len == 0) 372 break; 373 374 if (3 * len > 4 * buflen) { 375 buflen *= 2; 376 buf = erealloc(buf, buflen); 377 } 378 379 binlen = mime_b64tobin(buf, line, len); 380 381 if (binlen <= 0) { 382 (void)fprintf(fo, "WARN: invalid base64 encoding\n"); 383 break; 384 } 385 (void)fwrite(buf, 1, (size_t)binlen, fo); 386 } 387 388 free(buf); 389 390 if (add_lf) 391 (void)fputc('\n', fo); 392 } 393 394 395 /************************************************************************ 396 * Core quoted-printable routines. 397 * 398 * Note: the header QP routines are slightly different and burried 399 * inside mime_header.c 400 */ 401 402 static int 403 mustquote(unsigned char *p, unsigned char *end, size_t l) 404 { 405 #define N 0 /* do not quote */ 406 #define Q 1 /* must quote */ 407 #define SP 2 /* white space */ 408 #define XF 3 /* special character 'F' - maybe quoted */ 409 #define XD 4 /* special character '.' - maybe quoted */ 410 #define EQ Q /* '=' must be quoted */ 411 #define TB SP /* treat '\t' as a space */ 412 #define NL N /* don't quote '\n' (NL) - XXX - quoting here breaks the line length algorithm */ 413 #define CR Q /* always quote a '\r' (CR) - it occurs only in a CRLF combo */ 414 415 static const signed char quotetab[] = { 416 Q, Q, Q, Q, Q, Q, Q, Q, Q,TB,NL, Q, Q,CR, Q, Q, 417 Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, 418 SP, N, N, N, N, N, N, N, N, N, N, N, N, N,XD, N, 419 N, N, N, N, N, N, N, N, N, N, N, N, N,EQ, N, N, 420 421 N, N, N, N, N, N,XF, N, N, N, N, N, N, N, N, N, 422 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, 423 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, 424 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, Q, 425 }; 426 int flag = *p > 0x7f ? Q : quotetab[*p]; 427 428 if (flag == N) 429 return 0; 430 if (flag == Q) 431 return 1; 432 if (flag == SP) 433 return p + 1 < end && p[1] == '\n'; /* trailing white space */ 434 435 /* The remainder are special start-of-line cases. */ 436 if (l != 0) 437 return 0; 438 439 if (flag == XF) /* line may start with "From" */ 440 return p + 4 < end && p[1] == 'r' && p[2] == 'o' && p[3] == 'm'; 441 442 if (flag == XD) /* line may consist of a single dot */ 443 return p + 1 < end && p[1] == '\n'; 444 445 errx(EXIT_FAILURE, 446 "mustquote: invalid logic: *p=0x%x (%d) flag=%d, l=%zu\n", 447 *p, *p, flag, l); 448 /* NOT REACHED */ 449 return 0; /* appease GCC */ 450 451 #undef N 452 #undef Q 453 #undef SP 454 #undef XX 455 #undef EQ 456 #undef TB 457 #undef NL 458 #undef CR 459 } 460 461 462 #define MIME_QUOTED_LINE_MAX 76 /* QP max length: see RFC2045 sec 6.7 */ 463 464 static void 465 fput_quoted_line(FILE *fo, char *line, size_t len, size_t limit) 466 { 467 size_t l; /* length of current output line */ 468 unsigned char *beg; 469 unsigned char *end; 470 unsigned char *p; 471 472 assert(limit <= MIME_QUOTED_LINE_MAX); 473 474 beg = (unsigned char*)line; 475 end = beg + len; 476 l = 0; 477 for (p = (unsigned char*)line; p < end; p++) { 478 if (mustquote(p, end, l)) { 479 if (l + 4 > limit) { 480 (void)fputs("=\n", fo); 481 l = 0; 482 } 483 (void)fprintf(fo, "=%02X", *p); 484 l += 3; 485 } 486 else { 487 if (*p == '\n') { 488 if (p > beg && p[-1] == '\r') 489 (void)fputs("=0A=", fo); 490 l = (size_t)-1; 491 } 492 else if (l + 2 > limit) { 493 (void)fputs("=\n", fo); 494 l = 0; 495 } 496 (void)putc(*p, fo); 497 l++; 498 } 499 } 500 /* 501 * Lines ending in a blank must escape the newline. 502 */ 503 if (len && is_WSP(p[-1])) 504 (void)fputs("=\n", fo); 505 } 506 507 static void 508 mime_fQP_encode(FILE *fi, FILE *fo, void *cookie __unused) 509 { 510 char *line; 511 size_t len; 512 char *cp; 513 size_t limit; 514 515 #ifdef __lint__ 516 cookie = cookie; 517 #endif 518 limit = 0; 519 if ((cp = value(ENAME_MIME_QP_LINE_MAX)) != NULL) 520 limit = (size_t)atoi(cp); 521 if (limit == 0 || limit > MIME_QUOTED_LINE_MAX) 522 limit = MIME_QUOTED_LINE_MAX; 523 if (limit < 4) 524 limit = 4; 525 526 while ((line = fgetln(fi, &len)) != NULL) 527 fput_quoted_line(fo, line, len, limit); 528 } 529 530 static void 531 mime_fQP_decode(FILE *fi, FILE *fo, void *cookie __unused) 532 { 533 char *line; 534 size_t len; 535 536 #ifdef __lint__ 537 cookie = cookie; 538 #endif 539 while ((line = fgetln(fi, &len)) != NULL) { 540 int c; 541 char *p; 542 char *end; 543 end = line + len; 544 for (p = line; p < end; p++) { 545 if (*p == '=') { 546 p++; 547 while (p < end && is_WSP(*p)) 548 p++; 549 if (*p != '\n' && p + 1 < end) { 550 char buf[3]; 551 buf[0] = *p++; 552 buf[1] = *p; 553 buf[2] = '\0'; 554 c = strtol(buf, NULL, 16); 555 (void)fputc(c, fo); 556 } 557 } 558 else 559 (void)fputc(*p, fo); 560 } 561 } 562 } 563 564 565 /************************************************************************ 566 * Routines to select the codec by name. 567 */ 568 569 PUBLIC void 570 mime_fio_copy(FILE *fi, FILE *fo, void *cookie __unused) 571 { 572 int c; 573 574 #ifdef __lint__ 575 cookie = cookie; 576 #endif 577 while ((c = getc(fi)) != EOF) 578 (void)putc(c, fo); 579 580 (void)fflush(fo); 581 if (ferror(fi)) { 582 warn("read"); 583 rewind(fi); 584 return; 585 } 586 if (ferror(fo)) { 587 warn("write"); 588 (void)Fclose(fo); 589 rewind(fi); 590 return; 591 } 592 } 593 594 595 static const struct transfer_encoding_s { 596 const char *name; 597 mime_codec_t enc; 598 mime_codec_t dec; 599 } transfer_encoding_tbl[] = { 600 { MIME_TRANSFER_7BIT, mime_fio_copy, mime_fio_copy }, 601 { MIME_TRANSFER_8BIT, mime_fio_copy, mime_fio_copy }, 602 { MIME_TRANSFER_BINARY, mime_fio_copy, mime_fio_copy }, 603 { MIME_TRANSFER_QUOTED, mime_fQP_encode, mime_fQP_decode }, 604 { MIME_TRANSFER_BASE64, mime_fB64_encode, mime_fB64_decode }, 605 { NULL, NULL, NULL }, 606 }; 607 608 609 PUBLIC mime_codec_t 610 mime_fio_encoder(const char *ename) 611 { 612 const struct transfer_encoding_s *tep = NULL; 613 614 if (ename == NULL) 615 return NULL; 616 617 for (tep = transfer_encoding_tbl; tep->name; tep++) 618 if (strcasecmp(tep->name, ename) == 0) 619 break; 620 return tep->enc; 621 } 622 623 PUBLIC mime_codec_t 624 mime_fio_decoder(const char *ename) 625 { 626 const struct transfer_encoding_s *tep = NULL; 627 628 if (ename == NULL) 629 return NULL; 630 631 for (tep = transfer_encoding_tbl; tep->name; tep++) 632 if (strcasecmp(tep->name, ename) == 0) 633 break; 634 return tep->dec; 635 } 636 637 /* 638 * This is for use in complete.c and mime.c to get the list of 639 * encoding names without exposing the transfer_encoding_tbl[]. The 640 * first name is returned if called with a pointer to a NULL pointer. 641 * Subsequent calls with the same cookie give successive names. A 642 * NULL return indicates the end of the list. 643 */ 644 PUBLIC const char * 645 mime_next_encoding_name(const void **cookie) 646 { 647 const struct transfer_encoding_s *tep; 648 649 tep = *cookie; 650 if (tep == NULL) 651 tep = transfer_encoding_tbl; 652 653 *cookie = tep->name ? &tep[1] : NULL; 654 655 return tep->name; 656 } 657 658 #endif /* MIME_SUPPORT */ 659