1 /* $NetBSD: mime_codecs.c,v 1.4 2006/10/24 19:57:05 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2006 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Anon Ymous. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * This module contains all mime related codecs. Typically there are 41 * two versions: one operating on buffers and one operating on files. 42 * All exported routines have a "mime_" prefix. The file oriented 43 * routines have a "mime_f" prefix replacing the "mime_" prefix of the 44 * equivalent buffer based version. 45 * 46 * The file based API should be: 47 * 48 * mime_f<name>_{encode,decode}(FILE *in, FILE *out, void *cookie) 49 * 50 * XXX - currently this naming convention has not been adheared to. 51 * 52 * where the cookie is a generic way to pass arguments to the routine. 53 * This way these routines can be run by run_function() in mime.c. 54 * 55 * The buffer based API is not as rigid. 56 */ 57 58 #ifdef MIME_SUPPORT 59 60 #include <sys/cdefs.h> 61 #ifndef __lint__ 62 __RCSID("$NetBSD: mime_codecs.c,v 1.4 2006/10/24 19:57:05 christos Exp $"); 63 #endif /* not __lint__ */ 64 65 #include <assert.h> 66 #include <iconv.h> 67 #include <stdio.h> 68 #include <stdlib.h> 69 #include <util.h> 70 71 #include "def.h" 72 #include "extern.h" 73 #include "mime_codecs.h" 74 75 76 #ifdef CHARSET_SUPPORT 77 /************************************************************************ 78 * Core character set conversion routines. 79 * 80 */ 81 82 /* 83 * Fault-tolerant iconv() function. 84 * 85 * This routine was borrowed from nail-11.25/mime.c and modified. It 86 * tries to handle errno == EILSEQ by restarting at the next input 87 * byte (is this a good idea?). All other errors are handled by the 88 * caller. 89 */ 90 PUBLIC size_t 91 mime_iconv(iconv_t cd, const char **inb, size_t *inbleft, char **outb, size_t *outbleft) 92 { 93 size_t sz = 0; 94 95 while ((sz = iconv(cd, inb, inbleft, outb, outbleft)) == (size_t)-1 96 && errno == EILSEQ) { 97 if (*outbleft > 0) { 98 *(*outb)++ = '?'; 99 (*outbleft)--; 100 } else { 101 **outb = '\0'; 102 return E2BIG; 103 } 104 if (*inbleft > 0) { 105 (*inb)++; 106 (*inbleft)--; 107 } else { 108 **outb = '\0'; 109 break; 110 } 111 } 112 return sz; 113 } 114 115 /* 116 * This routine was mostly borrowed from src/usr.bin/iconv/iconv.c. 117 * We don't care about the invalid character count, so don't bother 118 * with __iconv(). We do care about robustness, so call iconv_ft() 119 * above to try to recover from errors. 120 */ 121 #define INBUFSIZE 1024 122 #define OUTBUFSIZE (INBUFSIZE * 2) 123 124 PUBLIC void 125 mime_ficonv(FILE *fi, FILE *fo, void *cookie) 126 { 127 char inbuf[INBUFSIZE], outbuf[OUTBUFSIZE], *out; 128 const char *in; 129 size_t inbytes, outbytes, ret; 130 iconv_t cd; 131 132 /* 133 * NOTE: iconv_t is actually a pointer typedef, so this 134 * conversion is not what it appears to be! 135 */ 136 cd = (iconv_t)cookie; 137 138 while ((inbytes = fread(inbuf, 1, INBUFSIZE, fi)) > 0) { 139 in = inbuf; 140 while (inbytes > 0) { 141 out = outbuf; 142 outbytes = OUTBUFSIZE; 143 ret = mime_iconv(cd, &in, &inbytes, &out, &outbytes); 144 if (ret == (size_t)-1 && errno != E2BIG) { 145 if (errno != EINVAL || in == inbuf) { 146 /* XXX - what is proper here? 147 * Just copy out the remains? */ 148 (void)fprintf(fo, 149 "\n\t[ iconv truncated message: %s ]\n\n", 150 strerror(errno)); 151 return; 152 } 153 /* 154 * If here: errno == EINVAL && in != inbuf 155 */ 156 /* incomplete input character */ 157 (void)memmove(inbuf, in, inbytes); 158 ret = fread(inbuf + inbytes, 1, 159 INBUFSIZE - inbytes, fi); 160 if (ret == 0) { 161 if (feof(fi)) { 162 (void)fprintf(fo, 163 "\n\t[ unexpected end of file; " 164 "the last character is " 165 "incomplete. ]\n\n"); 166 return; 167 } 168 (void)fprintf(fo, 169 "\n\t[ fread(): %s ]\n\n", 170 strerror(errno)); 171 return; 172 } 173 in = inbuf; 174 inbytes += ret; 175 176 } 177 if (outbytes < OUTBUFSIZE) 178 (void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, fo); 179 } 180 } 181 /* reset the shift state of the output buffer */ 182 outbytes = OUTBUFSIZE; 183 out = outbuf; 184 ret = iconv(cd, NULL, NULL, &out, &outbytes); 185 if (ret == (size_t)-1) { 186 (void)fprintf(fo, "\n\t[ iconv(): %s ]\n\n", 187 strerror(errno)); 188 return; 189 } 190 if (outbytes < OUTBUFSIZE) 191 (void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, fo); 192 } 193 194 #endif /* CHARSET_SUPPORT */ 195 196 197 198 /************************************************************************ 199 * Core base64 routines 200 * 201 * Defined in sec 6.8 of RFC 2045. 202 */ 203 204 /* 205 * Decode a base64 buffer. 206 * 207 * bin: buffer to hold the decoded (binary) result (see note 1). 208 * b64: buffer holding the encoded (base64) source. 209 * cnt: number of bytes in the b64 buffer to decode (see note 2). 210 * 211 * Return: the number of bytes written to the 'bin' buffer or -1 on 212 * error. 213 * NOTES: 214 * 1) It is the callers responsibility to ensure that bin is large 215 * enough to hold the result. 216 * 2) The b64 buffer should always contain a multiple of 4 bytes of 217 * data! 218 */ 219 PUBLIC ssize_t 220 mime_b64tobin(char *bin, const char *b64, size_t cnt) 221 { 222 static const signed char b64index[] = { 223 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 224 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 225 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63, 226 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1, 227 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14, 228 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1, 229 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 230 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1 231 }; 232 unsigned char *p; 233 const unsigned char *q, *end; 234 235 #define EQU (unsigned)-2 236 #define BAD (unsigned)-1 237 #define uchar64(c) (unsigned)((c) >= sizeof(b64index) ? BAD : b64index[(c)]) 238 239 p = (unsigned char *)bin; 240 q = (const unsigned char *)b64; 241 for (end = q + cnt; q < end; q += 4) { 242 unsigned a = uchar64(q[0]); 243 unsigned b = uchar64(q[1]); 244 unsigned c = uchar64(q[2]); 245 unsigned d = uchar64(q[3]); 246 247 *p++ = ((a << 2) | ((b & 0x30) >> 4)); 248 if (c == EQU) { /* got '=' */ 249 if (d != EQU) 250 return -1; 251 break; 252 } 253 *p++ = (((b & 0x0f) << 4) | ((c & 0x3c) >> 2)); 254 if (d == EQU) { /* got '=' */ 255 break; 256 } 257 *p++ = (((c & 0x03) << 6) | d); 258 259 if (a == BAD || b == BAD || c == BAD || d == BAD) 260 return -1; 261 } 262 263 #undef uchar64 264 #undef EQU 265 #undef BAD 266 267 return p - (unsigned char*)bin; 268 } 269 270 /* 271 * Encode a buffer as a base64 result. 272 * 273 * b64: buffer to hold the encoded (base64) result (see note). 274 * bin: buffer holding the binary source. 275 * cnt: number of bytes in the bin buffer to encode. 276 * 277 * NOTE: it is the callers responsibility to ensure that 'b64' is 278 * large enough to hold the result. 279 */ 280 PUBLIC void 281 mime_bintob64(char *b64, const char *bin, size_t cnt) 282 { 283 static const char b64table[] = 284 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 285 const unsigned char *p = (const unsigned char*)bin; 286 int i; 287 288 for (i = cnt; i > 0; i -= 3) { 289 unsigned a = p[0]; 290 unsigned b = p[1]; 291 unsigned c = p[2]; 292 293 b64[0] = b64table[a >> 2]; 294 switch(i) { 295 case 1: 296 b64[1] = b64table[((a & 0x3) << 4)]; 297 b64[2] = '='; 298 b64[3] = '='; 299 break; 300 case 2: 301 b64[1] = b64table[((a & 0x3) << 4) | ((b & 0xf0) >> 4)]; 302 b64[2] = b64table[((b & 0xf) << 2)]; 303 b64[3] = '='; 304 break; 305 default: 306 b64[1] = b64table[((a & 0x3) << 4) | ((b & 0xf0) >> 4)]; 307 b64[2] = b64table[((b & 0xf) << 2) | ((c & 0xc0) >> 6)]; 308 b64[3] = b64table[c & 0x3f]; 309 break; 310 } 311 p += 3; 312 b64 += 4; 313 } 314 } 315 316 317 #define MIME_BASE64_LINE_MAX (4 * 19) /* max line length is 76: see RFC2045 sec 6.8 */ 318 319 static void 320 mime_fB64_encode(FILE *fi, FILE *fo, void *cookie __unused) 321 { 322 static char b64[MIME_BASE64_LINE_MAX]; 323 static char mem[3 * (MIME_BASE64_LINE_MAX / 4)]; 324 int cnt; 325 char *cp; 326 size_t limit; 327 #ifdef __lint__ 328 cookie = cookie; 329 #endif 330 limit = 0; 331 if ((cp = value(ENAME_MIME_B64_LINE_MAX)) != NULL) 332 limit = (size_t)atoi(cp); 333 if (limit == 0 || limit > sizeof(b64)) 334 limit = sizeof(b64); 335 336 limit = 3 * roundup(limit, 4) / 4; 337 if (limit < 3) 338 limit = 3; 339 340 while ((cnt = fread(mem, sizeof(*mem), limit, fi)) > 0) { 341 mime_bintob64(b64, mem, (size_t)cnt); 342 (void)fwrite(b64, sizeof(*b64), (size_t)4 * roundup(cnt, 3) / 3, fo); 343 (void)putc('\n', fo); 344 } 345 } 346 347 static void 348 mime_fB64_decode(FILE *fi, FILE *fo, void *add_lf) 349 { 350 char *line; 351 size_t len; 352 char *buf; 353 size_t buflen; 354 355 buflen = 3 * (MIME_BASE64_LINE_MAX / 4); 356 buf = emalloc(buflen); 357 358 while ((line = fgetln(fi, &len)) != NULL) { 359 ssize_t binlen; 360 if (line[len-1] == '\n') /* forget the trailing newline */ 361 len--; 362 363 /* trash trailing white space */ 364 for (/* EMPTY */; len > 0 && isblank((unsigned char)line[len-1]); len--) 365 continue; 366 367 /* skip leading white space */ 368 for (/* EMPTY */; len > 0 && isblank((unsigned char)line[0]); len--, line++) 369 continue; 370 371 if (len == 0) 372 break; 373 374 if (3 * len > 4 * buflen) { 375 buflen *= 2; 376 buf = erealloc(buf, buflen); 377 } 378 379 binlen = mime_b64tobin(buf, line, len); 380 381 if (binlen <= 0) { 382 (void)fprintf(fo, "WARN: invalid base64 encoding\n"); 383 break; 384 } 385 (void)fwrite(buf, 1, (size_t)binlen, fo); 386 } 387 388 free(buf); 389 390 if (add_lf) 391 (void)fputc('\n', fo); 392 } 393 394 395 /************************************************************************ 396 * Core quoted-printable routines. 397 * 398 * Note: the header QP routines are slightly different and burried 399 * inside mime_header.c 400 */ 401 402 static int 403 mustquote(unsigned char *p, unsigned char *end, size_t l) 404 { 405 #define N 0 /* do not quote */ 406 #define Q 1 /* must quote */ 407 #define SP 2 /* white space */ 408 #define XF 3 /* special character 'F' - maybe quoted */ 409 #define XD 4 /* special character '.' - maybe quoted */ 410 #define EQ Q /* '=' must be quoted */ 411 #define TB SP /* treat '\t' as a space */ 412 #define NL N /* don't quote '\n' (NL) - XXX - quoting here breaks the line length algorithm */ 413 #define CR Q /* always quote a '\r' (CR) - it occurs only in a CRLF combo */ 414 415 static const signed char quotetab[] = { 416 Q, Q, Q, Q, Q, Q, Q, Q, Q,TB,NL, Q, Q,CR, Q, Q, 417 Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, 418 SP, N, N, N, N, N, N, N, N, N, N, N, N, N,XD, N, 419 N, N, N, N, N, N, N, N, N, N, N, N, N,EQ, N, N, 420 421 N, N, N, N, N, N,XF, N, N, N, N, N, N, N, N, N, 422 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, 423 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, 424 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, Q, 425 }; 426 int flag = *p > 0x7f ? Q : quotetab[*p]; 427 428 if (flag == N) 429 return 0; 430 if (flag == Q) 431 return 1; 432 if (flag == SP) 433 return (p + 1 < end && p[1] == '\n'); /* trailing white space */ 434 435 /* The remainder are special start-of-line cases. */ 436 if (l != 0) 437 return 0; 438 439 if (flag == XF) /* line may start with "From" */ 440 return (p + 4 < end && p[1] == 'r' && p[2] == 'o' && p[3] == 'm'); 441 442 if (flag == XD) /* line may consist of a single dot */ 443 return (p + 1 < end && p[1] == '\n'); 444 445 errx(EXIT_FAILURE, "mustquote: invalid logic: *p=0x%x (%d) flag=%d, l=%zu\n", 446 *p, *p, flag, l); 447 /* NOT REACHED */ 448 return 0; /* appease GCC */ 449 450 #undef N 451 #undef Q 452 #undef SP 453 #undef XX 454 #undef EQ 455 #undef TB 456 #undef NL 457 #undef CR 458 } 459 460 461 #define MIME_QUOTED_LINE_MAX 76 /* QP max length: see RFC2045 sec 6.7 */ 462 463 static void 464 fput_quoted_line(FILE *fo, char *line, size_t len, size_t limit) 465 { 466 size_t l; /* length of current output line */ 467 unsigned char *beg; 468 unsigned char *end; 469 unsigned char *p; 470 471 assert(limit <= MIME_QUOTED_LINE_MAX); 472 473 beg = (unsigned char*)line; 474 end = beg + len; 475 l = 0; 476 for (p = (unsigned char*)line; p < end; p++) { 477 if (mustquote(p, end, l)) { 478 if (l + 4 > limit) { 479 (void)fputs("=\n", fo); 480 l = 0; 481 } 482 (void)fprintf(fo, "=%02X", *p); 483 l += 3; 484 } 485 else { 486 if (*p == '\n') { 487 if (p > beg && p[-1] == '\r') 488 (void)fputs("=0A=", fo); 489 l = (size_t)-1; 490 } 491 else if (l + 2 > limit) { 492 (void)fputs("=\n", fo); 493 l = 0; 494 } 495 (void)putc(*p, fo); 496 l++; 497 } 498 } 499 /* 500 * Lines ending in a blank must escape the newline. 501 */ 502 if (len && isblank((unsigned char)p[-1])) 503 (void)fputs("=\n", fo); 504 } 505 506 static void 507 mime_fQP_encode(FILE *fi, FILE *fo, void *cookie __unused) 508 { 509 char *line; 510 size_t len; 511 char *cp; 512 size_t limit; 513 514 #ifdef __lint__ 515 cookie = cookie; 516 #endif 517 limit = 0; 518 if ((cp = value(ENAME_MIME_QP_LINE_MAX)) != NULL) 519 limit = (size_t)atoi(cp); 520 if (limit == 0 || limit > MIME_QUOTED_LINE_MAX) 521 limit = MIME_QUOTED_LINE_MAX; 522 if (limit < 4) 523 limit = 4; 524 525 while ((line = fgetln(fi, &len)) != NULL) 526 fput_quoted_line(fo, line, len, limit); 527 } 528 529 static void 530 mime_fQP_decode(FILE *fi, FILE *fo, void *cookie __unused) 531 { 532 char *line; 533 size_t len; 534 535 #ifdef __lint__ 536 cookie = cookie; 537 #endif 538 while ((line = fgetln(fi, &len)) != NULL) { 539 int c; 540 char *p; 541 char *end; 542 end = line + len; 543 for (p = line; p < end; p++) { 544 if (*p == '=') { 545 p++; 546 while (p < end && isblank((unsigned char)*p)) 547 p++; 548 if (*p != '\n' && p + 1 < end) { 549 char buf[3]; 550 buf[0] = *p++; 551 buf[1] = *p; 552 buf[2] = '\0'; 553 c = strtol(buf, NULL, 16); 554 (void)fputc(c, fo); 555 } 556 } 557 else 558 (void)fputc(*p, fo); 559 } 560 } 561 } 562 563 564 /************************************************************************ 565 * Routines to select the codec by name. 566 */ 567 568 PUBLIC void 569 mime_fio_copy(FILE *fi, FILE *fo, void *cookie __unused) 570 { 571 int c; 572 573 #ifdef __lint__ 574 cookie = cookie; 575 #endif 576 while ((c = getc(fi)) != EOF) 577 (void)putc(c, fo); 578 579 (void)fflush(fo); 580 if (ferror(fi)) { 581 warn("read"); 582 rewind(fi); 583 return; 584 } 585 if (ferror(fo)) { 586 warn("write"); 587 (void)Fclose(fo); 588 rewind(fi); 589 return; 590 } 591 } 592 593 594 static const struct transfer_encoding_s { 595 const char *name; 596 mime_codec_t enc; 597 mime_codec_t dec; 598 } transfer_encoding_tbl[] = { 599 { MIME_TRANSFER_7BIT, mime_fio_copy, mime_fio_copy }, 600 { MIME_TRANSFER_8BIT, mime_fio_copy, mime_fio_copy }, 601 { MIME_TRANSFER_BINARY, mime_fio_copy, mime_fio_copy }, 602 { MIME_TRANSFER_QUOTED, mime_fQP_encode, mime_fQP_decode }, 603 { MIME_TRANSFER_BASE64, mime_fB64_encode, mime_fB64_decode }, 604 { NULL, NULL, NULL }, 605 }; 606 607 608 PUBLIC mime_codec_t 609 mime_fio_encoder(const char *ename) 610 { 611 const struct transfer_encoding_s *tep = NULL; 612 613 if (ename == NULL) 614 return NULL; 615 616 for (tep = transfer_encoding_tbl; tep->name; tep++) 617 if (strcasecmp(tep->name, ename) == 0) 618 break; 619 return tep->enc; 620 } 621 622 PUBLIC mime_codec_t 623 mime_fio_decoder(const char *ename) 624 { 625 const struct transfer_encoding_s *tep = NULL; 626 627 if (ename == NULL) 628 return NULL; 629 630 for (tep = transfer_encoding_tbl; tep->name; tep++) 631 if (strcasecmp(tep->name, ename) == 0) 632 break; 633 return tep->dec; 634 } 635 636 /* 637 * This is for use in complete.c and mime.c to get the list of 638 * encoding names without exposing the transfer_encoding_tbl[]. The 639 * first name is returned if called with a pointer to a NULL pointer. 640 * Subsequent calls with the same cookie give successive names. A 641 * NULL return indicates the end of the list. 642 */ 643 PUBLIC const char * 644 mime_next_encoding_name(const void **cookie) 645 { 646 const struct transfer_encoding_s *tep; 647 648 tep = *cookie; 649 if (tep == NULL) 650 tep = transfer_encoding_tbl; 651 652 *cookie = tep->name ? &tep[1] : NULL; 653 654 return tep->name; 655 } 656 657 658 #endif /* MIME_SUPPORT */ 659