1 /* 2 * Copyright (c) 1994 Eric P. Allman 3 * Copyright (c) 1994 4 * The Regents of the University of California. All rights reserved. 5 * 6 * %sccs.include.redist.c% 7 */ 8 9 # include "sendmail.h" 10 # include <string.h> 11 12 #ifndef lint 13 static char sccsid[] = "@(#)mime.c 8.14 (Berkeley) 04/03/95"; 14 #endif /* not lint */ 15 16 /* 17 ** MIME support. 18 ** 19 ** I am indebted to John Beck of Hewlett-Packard, who contributed 20 ** his code to me for inclusion. As it turns out, I did not use 21 ** his code since he used a "minimum change" approach that used 22 ** several temp files, and I wanted a "minimum impact" approach 23 ** that would avoid copying. However, looking over his code 24 ** helped me cement my understanding of the problem. 25 ** 26 ** I also looked at, but did not directly use, Nathaniel 27 ** Borenstein's "code.c" module. Again, it functioned as 28 ** a file-to-file translator, which did not fit within my 29 ** design bounds, but it was a useful base for understanding 30 ** the problem. 31 */ 32 33 34 /* character set for hex and base64 encoding */ 35 char Base16Code[] = "0123456789ABCDEF"; 36 char Base64Code[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 37 38 /* types of MIME boundaries */ 39 #define MBT_SYNTAX 0 /* syntax error */ 40 #define MBT_NOTSEP 1 /* not a boundary */ 41 #define MBT_INTERMED 2 /* intermediate boundary (no trailing --) */ 42 #define MBT_FINAL 3 /* final boundary (trailing -- included) */ 43 44 static char *MimeBoundaryNames[] = 45 { 46 "SYNTAX", "NOTSEP", "INTERMED", "FINAL" 47 }; 48 /* 49 ** MIME8TO7 -- output 8 bit body in 7 bit format 50 ** 51 ** The header has already been output -- this has to do the 52 ** 8 to 7 bit conversion. It would be easy if we didn't have 53 ** to deal with nested formats (multipart/xxx and message/rfc822). 54 ** 55 ** We won't be called if we don't have to do a conversion, and 56 ** appropriate MIME-Version: and Content-Type: fields have been 57 ** output. Any Content-Transfer-Encoding: field has not been 58 ** output, and we can add it here. 59 ** 60 ** Parameters: 61 ** mci -- mailer connection information. 62 ** header -- the header for this body part. 63 ** e -- envelope. 64 ** boundaries -- the currently pending message boundaries. 65 ** NULL if we are processing the outer portion. 66 ** flags -- to tweak processing. 67 ** 68 ** Returns: 69 ** An indicator of what terminated the message part: 70 ** MBT_FINAL -- the final boundary 71 ** MBT_INTERMED -- an intermediate boundary 72 ** MBT_NOTSEP -- an end of file 73 */ 74 75 struct args 76 { 77 char *field; /* name of field */ 78 char *value; /* value of that field */ 79 }; 80 81 int 82 mime8to7(mci, header, e, boundaries, flags) 83 register MCI *mci; 84 HDR *header; 85 register ENVELOPE *e; 86 char **boundaries; 87 int flags; 88 { 89 register char *p; 90 int linelen; 91 int bt; 92 off_t offset; 93 size_t sectionsize, sectionhighbits; 94 int i; 95 char *type; 96 char *subtype; 97 char **pvp; 98 int argc = 0; 99 struct args argv[MAXMIMEARGS]; 100 char bbuf[128]; 101 char buf[MAXLINE]; 102 char pvpbuf[MAXLINE]; 103 extern char MimeTokenTab[256]; 104 105 if (tTd(43, 1)) 106 { 107 printf("mime8to7: flags = %x, boundaries =", flags); 108 if (boundaries[0] == NULL) 109 printf(" <none>"); 110 else 111 { 112 for (i = 0; boundaries[i] != NULL; i++) 113 printf(" %s", boundaries[i]); 114 } 115 printf("\n"); 116 } 117 type = subtype = "-none-"; 118 p = hvalue("Content-Type", header); 119 if (p != NULL && 120 (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL, 121 MimeTokenTab)) != NULL && 122 pvp[0] != NULL) 123 { 124 if (tTd(43, 40)) 125 { 126 for (i = 0; pvp[i] != NULL; i++) 127 printf("pvp[%d] = \"%s\"\n", i, pvp[i]); 128 } 129 type = *pvp++; 130 if (*pvp != NULL && strcmp(*pvp, "/") == 0 && 131 *++pvp != NULL) 132 { 133 subtype = *pvp++; 134 } 135 136 /* break out parameters */ 137 while (*pvp != NULL && argc < MAXMIMEARGS) 138 { 139 /* skip to semicolon separator */ 140 while (*pvp != NULL && strcmp(*pvp, ";") != 0) 141 pvp++; 142 if (*pvp++ == NULL || *pvp == NULL) 143 break; 144 145 /* extract field name */ 146 argv[argc].field = *pvp++; 147 148 /* see if there is a value */ 149 if (*pvp != NULL && strcmp(*pvp, "=") == 0 && 150 (*++pvp == NULL || strcmp(*pvp, ";") != 0)) 151 { 152 argv[argc].value = *pvp; 153 argc++; 154 } 155 } 156 } 157 158 /* handle types that cannot have 8-bit data internally */ 159 sprintf(buf, "%s/%s", type, subtype); 160 if (wordinclass(buf, 'n')) 161 flags |= M87F_NO8BIT; 162 163 /* 164 ** Multipart requires special processing. 165 ** 166 ** Do a recursive descent into the message. 167 */ 168 169 if (strcasecmp(type, "multipart") == 0) 170 { 171 register char *q; 172 173 for (i = 0; i < argc; i++) 174 { 175 if (strcasecmp(argv[i].field, "boundary") == 0) 176 break; 177 } 178 if (i >= argc) 179 { 180 syserr("mime8to7: Content-Type: %s missing boundary", p); 181 p = "---"; 182 } 183 else 184 p = argv[i].value; 185 if (*p == '"') 186 q = strchr(++p, '"'); 187 else 188 q = p + strlen(p); 189 if (q - p > sizeof bbuf - 1) 190 { 191 syserr("mime8to7: multipart boundary \"%.*s\" too long", 192 q - p, p); 193 q = p + sizeof bbuf - 1; 194 } 195 strncpy(bbuf, p, q - p); 196 bbuf[q - p] = '\0'; 197 if (tTd(43, 1)) 198 printf("mime8to7: multipart boundary \"%s\"\n", bbuf); 199 for (i = 0; i < MAXMIMENESTING; i++) 200 if (boundaries[i] == NULL) 201 break; 202 if (i >= MAXMIMENESTING) 203 syserr("mime8to7: multipart nesting boundary too deep"); 204 else 205 { 206 boundaries[i] = bbuf; 207 boundaries[i + 1] = NULL; 208 } 209 210 /* skip the early "comment" prologue */ 211 putline("", mci); 212 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 213 { 214 bt = mimeboundary(buf, boundaries); 215 if (bt != MBT_NOTSEP) 216 break; 217 putline(buf, mci); 218 if (tTd(43, 99)) 219 printf(" ...%s", buf); 220 } 221 if (feof(e->e_dfp)) 222 bt = MBT_FINAL; 223 while (bt != MBT_FINAL) 224 { 225 auto HDR *hdr = NULL; 226 227 sprintf(buf, "--%s", bbuf); 228 putline(buf, mci); 229 if (tTd(43, 35)) 230 printf(" ...%s\n", buf); 231 collect(e->e_dfp, FALSE, FALSE, &hdr, e); 232 if (tTd(43, 101)) 233 putline("+++after collect", mci); 234 putheader(mci, hdr, e, 0); 235 if (tTd(43, 101)) 236 putline("+++after putheader", mci); 237 bt = mime8to7(mci, hdr, e, boundaries, flags); 238 } 239 sprintf(buf, "--%s--", bbuf); 240 putline(buf, mci); 241 if (tTd(43, 35)) 242 printf(" ...%s\n", buf); 243 boundaries[i] = NULL; 244 245 /* skip the late "comment" epilogue */ 246 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 247 { 248 bt = mimeboundary(buf, boundaries); 249 if (bt != MBT_NOTSEP) 250 break; 251 putline(buf, mci); 252 if (tTd(43, 99)) 253 printf(" ...%s", buf); 254 } 255 if (feof(e->e_dfp)) 256 bt = MBT_FINAL; 257 if (tTd(43, 3)) 258 printf("\t\t\tmime8to7=>%s (multipart)\n", 259 MimeBoundaryNames[bt]); 260 return bt; 261 } 262 263 /* 264 ** Non-compound body type 265 ** 266 ** Compute the ratio of seven to eight bit characters; 267 ** use that as a heuristic to decide how to do the 268 ** encoding. 269 */ 270 271 sectionsize = sectionhighbits = 0; 272 if (!bitset(M87F_NO8BIT, flags)) 273 { 274 /* remember where we were */ 275 offset = ftell(e->e_dfp); 276 if (offset == -1) 277 syserr("mime8to7: cannot ftell on df%s", e->e_id); 278 279 /* do a scan of this body type to count character types */ 280 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 281 { 282 if (mimeboundary(buf, boundaries) != MBT_NOTSEP) 283 break; 284 for (p = buf; *p != '\0'; p++) 285 { 286 /* count bytes with the high bit set */ 287 sectionsize++; 288 if (bitset(0200, *p)) 289 sectionhighbits++; 290 } 291 292 /* 293 ** Heuristic: if 1/4 of the first 4K bytes are 8-bit, 294 ** assume base64. This heuristic avoids double-reading 295 ** large graphics or video files. 296 */ 297 298 if (sectionsize >= 4096 && 299 sectionhighbits > sectionsize / 4) 300 break; 301 } 302 303 /* return to the original offset for processing */ 304 /* XXX use relative seeks to handle >31 bit file sizes? */ 305 if (fseek(e->e_dfp, offset, SEEK_SET) < 0) 306 syserr("mime8to7: cannot fseek on df%s", e->e_id); 307 else 308 clearerr(e->e_dfp); 309 } 310 311 /* 312 ** Heuristically determine encoding method. 313 ** If more than 1/8 of the total characters have the 314 ** eighth bit set, use base64; else use quoted-printable. 315 */ 316 317 if (tTd(43, 8)) 318 { 319 printf("mime8to7: %ld high bit(s) in %ld byte(s)\n", 320 sectionhighbits, sectionsize); 321 } 322 linelen = 0; 323 if (sectionhighbits == 0) 324 { 325 /* no encoding necessary */ 326 p = hvalue("content-transfer-encoding", header); 327 if (p != NULL) 328 { 329 sprintf(buf, "Content-Transfer-Encoding: %s", p); 330 putline(buf, mci); 331 if (tTd(43, 36)) 332 printf(" ...%s\n", buf); 333 } 334 putline("", mci); 335 mci->mci_flags &= ~MCIF_INHEADER; 336 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 337 { 338 bt = mimeboundary(buf, boundaries); 339 if (bt != MBT_NOTSEP) 340 break; 341 if (buf[0] == 'F' && 342 bitnset(M_ESCFROM, mci->mci_mailer->m_flags) && 343 strncmp(buf, "From ", 5) == 0) 344 (void) putc('>', mci->mci_out); 345 putline(buf, mci); 346 } 347 if (feof(e->e_dfp)) 348 bt = MBT_FINAL; 349 } 350 else if (sectionsize / 8 < sectionhighbits) 351 { 352 /* use base64 encoding */ 353 int c1, c2; 354 355 putline("Content-Transfer-Encoding: base64", mci); 356 if (tTd(43, 36)) 357 printf(" ...Content-Transfer-Encoding: base64\n"); 358 putline("", mci); 359 mci->mci_flags &= ~MCIF_INHEADER; 360 while ((c1 = mime_getchar(e->e_dfp, boundaries, &bt)) != EOF) 361 { 362 if (linelen > 71) 363 { 364 fputs(mci->mci_mailer->m_eol, mci->mci_out); 365 linelen = 0; 366 } 367 linelen += 4; 368 fputc(Base64Code[c1 >> 2], mci->mci_out); 369 c1 = (c1 & 0x03) << 4; 370 c2 = mime_getchar(e->e_dfp, boundaries, &bt); 371 if (c2 == EOF) 372 { 373 fputc(Base64Code[c1], mci->mci_out); 374 fputc('=', mci->mci_out); 375 fputc('=', mci->mci_out); 376 break; 377 } 378 c1 |= (c2 >> 4) & 0x0f; 379 fputc(Base64Code[c1], mci->mci_out); 380 c1 = (c2 & 0x0f) << 2; 381 c2 = mime_getchar(e->e_dfp, boundaries, &bt); 382 if (c2 == EOF) 383 { 384 fputc(Base64Code[c1], mci->mci_out); 385 fputc('=', mci->mci_out); 386 break; 387 } 388 c1 |= (c2 >> 6) & 0x03; 389 fputc(Base64Code[c1], mci->mci_out); 390 fputc(Base64Code[c2 & 0x3f], mci->mci_out); 391 } 392 } 393 else 394 { 395 /* use quoted-printable encoding */ 396 int c1, c2; 397 int fromstate; 398 399 putline("Content-Transfer-Encoding: quoted-printable", mci); 400 if (tTd(43, 36)) 401 printf(" ...Content-Transfer-Encoding: quoted-printable\n"); 402 putline("", mci); 403 mci->mci_flags &= ~MCIF_INHEADER; 404 fromstate = 0; 405 c2 = '\n'; 406 while ((c1 = mime_getchar(e->e_dfp, boundaries, &bt)) != EOF) 407 { 408 if (c1 == '\n') 409 { 410 if (c2 == ' ' || c2 == '\t') 411 { 412 fputc('=', mci->mci_out); 413 fputc(Base16Code[(c2 >> 4) & 0x0f], 414 mci->mci_out); 415 fputc(Base16Code[c2 & 0x0f], 416 mci->mci_out); 417 fputs(mci->mci_mailer->m_eol, 418 mci->mci_out); 419 } 420 fputs(mci->mci_mailer->m_eol, mci->mci_out); 421 linelen = fromstate = 0; 422 c2 = c1; 423 continue; 424 } 425 if (c2 == ' ' && linelen == 4 && fromstate == 4 && 426 bitnset(M_ESCFROM, mci->mci_mailer->m_flags)) 427 { 428 fputs("=20", mci->mci_out); 429 linelen += 3; 430 } 431 else if (c2 == ' ' || c2 == '\t') 432 { 433 fputc(c2, mci->mci_out); 434 linelen++; 435 } 436 if (linelen > 72) 437 { 438 fputc('=', mci->mci_out); 439 fputs(mci->mci_mailer->m_eol, mci->mci_out); 440 linelen = fromstate = 0; 441 c2 = '\n'; 442 } 443 if (c2 == '\n' && c1 == '.' && 444 bitnset(M_XDOT, mci->mci_mailer->m_flags)) 445 { 446 fputc('.', mci->mci_out); 447 linelen++; 448 } 449 if ((c1 < 0x20 && c1 != '\t') || c1 >= 0x7f || c1 == '=') 450 { 451 fputc('=', mci->mci_out); 452 fputc(Base16Code[(c1 >> 4) & 0x0f], mci->mci_out); 453 fputc(Base16Code[c1 & 0x0f], mci->mci_out); 454 linelen += 3; 455 } 456 else if (c1 != ' ' && c1 != '\t') 457 { 458 if (linelen < 4 && c1 == "From"[linelen]) 459 fromstate++; 460 fputc(c1, mci->mci_out); 461 linelen++; 462 } 463 c2 = c1; 464 } 465 466 /* output any saved character */ 467 if (c2 == ' ' || c2 == '\t') 468 { 469 fputc('=', mci->mci_out); 470 fputc(Base16Code[(c2 >> 4) & 0x0f], mci->mci_out); 471 fputc(Base16Code[c2 & 0x0f], mci->mci_out); 472 linelen += 3; 473 } 474 } 475 if (linelen > 0) 476 fputs(mci->mci_mailer->m_eol, mci->mci_out); 477 if (tTd(43, 3)) 478 printf("\t\t\tmime8to7=>%s (basic)\n", MimeBoundaryNames[bt]); 479 return bt; 480 } 481 /* 482 ** MIME_GETCHAR -- get a character for MIME processing 483 ** 484 ** Treats boundaries as EOF. 485 ** 486 ** Parameters: 487 ** fp -- the input file. 488 ** boundaries -- the current MIME boundaries. 489 ** btp -- if the return value is EOF, *btp is set to 490 ** the type of the boundary. 491 ** 492 ** Returns: 493 ** The next character in the input stream. 494 */ 495 496 int 497 mime_getchar(fp, boundaries, btp) 498 register FILE *fp; 499 char **boundaries; 500 int *btp; 501 { 502 int c; 503 static char *bp = NULL; 504 static int buflen = 0; 505 static bool atbol = TRUE; /* at beginning of line */ 506 static int bt = MBT_SYNTAX; /* boundary type of next EOF */ 507 static char buf[128]; /* need not be a full line */ 508 509 if (buflen > 0) 510 { 511 buflen--; 512 return *bp++; 513 } 514 bp = buf; 515 buflen = 0; 516 c = fgetc(fp); 517 if (c == '\n') 518 { 519 /* might be part of a MIME boundary */ 520 *bp++ = c; 521 atbol = TRUE; 522 c = fgetc(fp); 523 } 524 if (c != EOF) 525 *bp++ = c; 526 else 527 bt = MBT_FINAL; 528 if (atbol && c == '-') 529 { 530 /* check for a message boundary */ 531 c = fgetc(fp); 532 if (c != '-') 533 { 534 if (c != EOF) 535 *bp++ = c; 536 else 537 bt = MBT_FINAL; 538 buflen = bp - buf - 1; 539 bp = buf; 540 return *bp++; 541 } 542 543 /* got "--", now check for rest of separator */ 544 *bp++ = '-'; 545 while (bp < &buf[sizeof buf - 1] && 546 (c = fgetc(fp)) != EOF && c != '\n') 547 { 548 *bp++ = c; 549 } 550 *bp = '\0'; 551 bt = mimeboundary(&buf[1], boundaries); 552 switch (bt) 553 { 554 case MBT_FINAL: 555 case MBT_INTERMED: 556 /* we have a message boundary */ 557 buflen = 0; 558 *btp = bt; 559 return EOF; 560 } 561 562 atbol = c == '\n'; 563 if (c != EOF) 564 *bp++ = c; 565 } 566 567 buflen = bp - buf - 1; 568 if (buflen < 0) 569 { 570 *btp = bt; 571 return EOF; 572 } 573 bp = buf; 574 return *bp++; 575 } 576 /* 577 ** MIMEBOUNDARY -- determine if this line is a MIME boundary & its type 578 ** 579 ** Parameters: 580 ** line -- the input line. 581 ** boundaries -- the set of currently pending boundaries. 582 ** 583 ** Returns: 584 ** MBT_NOTSEP -- if this is not a separator line 585 ** MBT_INTERMED -- if this is an intermediate separator 586 ** MBT_FINAL -- if this is a final boundary 587 ** MBT_SYNTAX -- if this is a boundary for the wrong 588 ** enclosure -- i.e., a syntax error. 589 */ 590 591 int 592 mimeboundary(line, boundaries) 593 register char *line; 594 char **boundaries; 595 { 596 int type; 597 int i; 598 int savec; 599 600 if (line[0] != '-' || line[1] != '-' || boundaries == NULL) 601 return MBT_NOTSEP; 602 i = strlen(line); 603 if (line[i - 1] == '\n') 604 i--; 605 if (tTd(43, 5)) 606 printf("mimeboundary: line=\"%.*s\"... ", i, line); 607 while (line[i - 1] == ' ' || line[i - 1] == '\t') 608 i--; 609 if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0) 610 { 611 type = MBT_FINAL; 612 i -= 2; 613 } 614 else 615 type = MBT_INTERMED; 616 617 savec = line[i]; 618 line[i] = '\0'; 619 /* XXX should check for improper nesting here */ 620 if (isboundary(&line[2], boundaries) < 0) 621 type = MBT_NOTSEP; 622 line[i] = savec; 623 if (tTd(43, 5)) 624 printf("%s\n", MimeBoundaryNames[type]); 625 return type; 626 } 627 /* 628 ** DEFCHARSET -- return default character set for message 629 ** 630 ** The first choice for character set is for the mailer 631 ** corresponding to the envelope sender. If neither that 632 ** nor the global configuration file has a default character 633 ** set defined, return "unknown-8bit" as recommended by 634 ** RFC 1428 section 3. 635 ** 636 ** Parameters: 637 ** e -- the envelope for this message. 638 ** 639 ** Returns: 640 ** The default character set for that mailer. 641 */ 642 643 char * 644 defcharset(e) 645 register ENVELOPE *e; 646 { 647 if (e != NULL && e->e_from.q_mailer != NULL && 648 e->e_from.q_mailer->m_defcharset != NULL) 649 return e->e_from.q_mailer->m_defcharset; 650 if (DefaultCharSet != NULL) 651 return DefaultCharSet; 652 return "unknown-8bit"; 653 } 654 /* 655 ** ISBOUNDARY -- is a given string a currently valid boundary? 656 ** 657 ** Parameters: 658 ** line -- the current input line. 659 ** boundaries -- the list of valid boundaries. 660 ** 661 ** Returns: 662 ** The index number in boundaries if the line is found. 663 ** -1 -- otherwise. 664 ** 665 */ 666 667 int 668 isboundary(line, boundaries) 669 char *line; 670 char **boundaries; 671 { 672 register int i; 673 674 for (i = 0; boundaries[i] != NULL; i++) 675 { 676 if (strcmp(line, boundaries[i]) == 0) 677 return i; 678 } 679 return -1; 680 } 681