1 /* 2 * Copyright (c) 1994 Eric P. Allman 3 * Copyright (c) 1994 4 * The Regents of the University of California. All rights reserved. 5 * 6 * %sccs.include.redist.c% 7 */ 8 9 # include "sendmail.h" 10 # include <string.h> 11 12 #ifndef lint 13 static char sccsid[] = "@(#)mime.c 8.13 (Berkeley) 04/02/95"; 14 #endif /* not lint */ 15 16 /* 17 ** MIME support. 18 ** 19 ** I am indebted to John Beck of Hewlett-Packard, who contributed 20 ** his code to me for inclusion. As it turns out, I did not use 21 ** his code since he used a "minimum change" approach that used 22 ** several temp files, and I wanted a "minimum impact" approach 23 ** that would avoid copying. However, looking over his code 24 ** helped me cement my understanding of the problem. 25 ** 26 ** I also looked at, but did not directly use, Nathaniel 27 ** Borenstein's "code.c" module. Again, it functioned as 28 ** a file-to-file translator, which did not fit within my 29 ** design bounds, but it was a useful base for understanding 30 ** the problem. 31 */ 32 33 34 /* character set for hex and base64 encoding */ 35 char Base16Code[] = "0123456789ABCDEF"; 36 char Base64Code[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 37 38 /* types of MIME boundaries */ 39 #define MBT_SYNTAX 0 /* syntax error */ 40 #define MBT_NOTSEP 1 /* not a boundary */ 41 #define MBT_INTERMED 2 /* intermediate boundary (no trailing --) */ 42 #define MBT_FINAL 3 /* final boundary (trailing -- included) */ 43 44 static int MimeBoundaryType; /* internal linkage */ 45 /* 46 ** MIME8TO7 -- output 8 bit body in 7 bit format 47 ** 48 ** The header has already been output -- this has to do the 49 ** 8 to 7 bit conversion. It would be easy if we didn't have 50 ** to deal with nested formats (multipart/xxx and message/rfc822). 51 ** 52 ** We won't be called if we don't have to do a conversion, and 53 ** appropriate MIME-Version: and Content-Type: fields have been 54 ** output. Any Content-Transfer-Encoding: field has not been 55 ** output, and we can add it here. 56 ** 57 ** Parameters: 58 ** mci -- mailer connection information. 59 ** header -- the header for this body part. 60 ** e -- envelope. 61 ** boundaries -- the currently pending message boundaries. 62 ** NULL if we are processing the outer portion. 63 ** flags -- to tweak processing. 64 ** 65 ** Returns: 66 ** An indicator of what terminated the message part: 67 ** MBT_FINAL -- the final boundary 68 ** MBT_INTERMED -- an intermediate boundary 69 ** MBT_NOTSEP -- an end of file 70 */ 71 72 struct args 73 { 74 char *field; /* name of field */ 75 char *value; /* value of that field */ 76 }; 77 78 int 79 mime8to7(mci, header, e, boundaries, flags) 80 register MCI *mci; 81 HDR *header; register ENVELOPE *e; 82 char **boundaries; 83 int flags; 84 { 85 register char *p; 86 int linelen; 87 int bt; 88 off_t offset; 89 size_t sectionsize, sectionhighbits; 90 int i; 91 char *type; 92 char *subtype; 93 char **pvp; 94 int argc = 0; 95 struct args argv[MAXMIMEARGS]; 96 char bbuf[128]; 97 char buf[MAXLINE]; 98 char pvpbuf[MAXLINE]; 99 extern char MimeTokenTab[256]; 100 101 if (tTd(43, 1)) 102 { 103 printf("mime8to7: boundary=%s\n", 104 boundaries[0] == NULL ? "<none>" : boundaries[0]); 105 for (i = 1; boundaries[i] != NULL; i++) 106 printf("\t%s\n", boundaries[i]); 107 } 108 type = subtype = "-none-"; 109 p = hvalue("Content-Type", header); 110 if (p != NULL && 111 (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL, 112 MimeTokenTab)) != NULL && 113 pvp[0] != NULL) 114 { 115 type = *pvp++; 116 if (*pvp != NULL && strcmp(*pvp, "/") == 0 && 117 *++pvp != NULL) 118 { 119 subtype = *pvp++; 120 } 121 122 /* break out parameters */ 123 while (*pvp != NULL && argc < MAXMIMEARGS) 124 { 125 /* skip to semicolon separator */ 126 while (*pvp != NULL && strcmp(*pvp, ";") != 0) 127 pvp++; 128 if (*pvp++ == NULL || *pvp == NULL) 129 break; 130 131 /* extract field name */ 132 argv[argc].field = *pvp++; 133 134 /* see if there is a value */ 135 if (*pvp != NULL && strcmp(*pvp, "=") == 0 && 136 (*++pvp == NULL || strcmp(*pvp, ";") != 0)) 137 { 138 argv[argc].value = *pvp; 139 argc++; 140 } 141 } 142 } 143 if (strcasecmp(type, "multipart") == 0) 144 { 145 register char *q; 146 147 for (i = 0; i < argc; i++) 148 { 149 if (strcasecmp(argv[i].field, "boundary") == 0) 150 break; 151 } 152 if (i >= argc) 153 { 154 syserr("mime8to7: Content-Type: %s missing boundary", p); 155 p = "---"; 156 } 157 else 158 p = argv[i].value; 159 if (*p == '"') 160 q = strchr(++p, '"'); 161 else 162 q = p + strlen(p); 163 if (q - p > sizeof bbuf - 1) 164 { 165 syserr("mime8to7: multipart boundary \"%.*s\" too long", 166 q - p, p); 167 q = p + sizeof bbuf - 1; 168 } 169 strncpy(bbuf, p, q - p); 170 bbuf[q - p] = '\0'; 171 if (tTd(43, 1)) 172 { 173 printf("mime8to7: multipart boundary \"%s\"\n", bbuf); 174 } 175 for (i = 0; i < MAXMIMENESTING; i++) 176 if (boundaries[i] == NULL) 177 break; 178 if (i >= MAXMIMENESTING) 179 syserr("mime8to7: multipart nesting boundary too deep"); 180 else 181 { 182 boundaries[i] = bbuf; 183 boundaries[i + 1] = NULL; 184 } 185 186 /* flag subtypes that can't have any 8-bit data */ 187 if (strcasecmp(subtype, "signed") == 0) 188 flags |= M87F_NO8BIT; 189 190 /* skip the early "comment" prologue */ 191 bt = MBT_FINAL; 192 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 193 { 194 bt = mimeboundary(buf, boundaries); 195 if (bt != MBT_NOTSEP) 196 break; 197 putline(buf, mci); 198 } 199 while (bt != MBT_FINAL) 200 { 201 auto HDR *hdr = NULL; 202 203 sprintf(buf, "--%s", bbuf); 204 putline(buf, mci); 205 collect(e->e_dfp, FALSE, FALSE, &hdr, e); 206 putheader(mci, hdr, e, 0); 207 bt = mime8to7(mci, hdr, e, boundaries, flags); 208 } 209 sprintf(buf, "--%s--", bbuf); 210 putline(buf, mci); 211 212 /* skip the late "comment" epilogue */ 213 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 214 { 215 putline(buf, mci); 216 bt = mimeboundary(buf, boundaries); 217 if (bt != MBT_NOTSEP) 218 break; 219 } 220 boundaries[i] = NULL; 221 return bt; 222 } 223 224 /* 225 ** Non-compound body type 226 ** 227 ** Compute the ratio of seven to eight bit characters; 228 ** use that as a heuristic to decide how to do the 229 ** encoding. 230 */ 231 232 /* handle types that cannot have 8-bit data internally */ 233 sprintf(buf, "%s/%s", type, subtype); 234 if (wordinclass(buf, 'n')) 235 flags |= M87F_NO8BIT; 236 237 sectionsize = sectionhighbits = 0; 238 if (!bitset(M87F_NO8BIT, flags)) 239 { 240 /* remember where we were */ 241 offset = ftell(e->e_dfp); 242 if (offset == -1) 243 syserr("mime8to7: cannot ftell on df%s", e->e_id); 244 245 /* do a scan of this body type to count character types */ 246 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 247 { 248 bt = mimeboundary(buf, boundaries); 249 if (bt != MBT_NOTSEP) 250 break; 251 for (p = buf; *p != '\0'; p++) 252 { 253 /* count bytes with the high bit set */ 254 sectionsize++; 255 if (bitset(0200, *p)) 256 sectionhighbits++; 257 } 258 259 /* 260 ** Heuristic: if 1/4 of the first 4K bytes are 8-bit, 261 ** assume base64. This heuristic avoids double-reading 262 ** large graphics or video files. 263 */ 264 265 if (sectionsize >= 4096 && 266 sectionhighbits > sectionsize / 4) 267 break; 268 } 269 if (feof(e->e_dfp)) 270 bt = MBT_FINAL; 271 272 /* return to the original offset for processing */ 273 /* XXX use relative seeks to handle >31 bit file sizes? */ 274 if (fseek(e->e_dfp, offset, SEEK_SET) < 0) 275 syserr("mime8to7: cannot fseek on df%s", e->e_id); 276 } 277 278 /* 279 ** Heuristically determine encoding method. 280 ** If more than 1/8 of the total characters have the 281 ** eighth bit set, use base64; else use quoted-printable. 282 */ 283 284 if (tTd(43, 8)) 285 { 286 printf("mime8to7: %ld high bits in %ld bytes\n", 287 sectionhighbits, sectionsize); 288 } 289 if (sectionhighbits == 0) 290 { 291 /* no encoding necessary */ 292 p = hvalue("content-transfer-encoding", header); 293 if (p != NULL) 294 { 295 sprintf(buf, "Content-Transfer-Encoding: %s", p); 296 putline(buf, mci); 297 } 298 putline("", mci); 299 mci->mci_flags &= ~MCIF_INHEADER; 300 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 301 { 302 bt = mimeboundary(buf, boundaries); 303 if (bt != MBT_NOTSEP) 304 break; 305 if (buf[0] == 'F' && 306 bitnset(M_ESCFROM, mci->mci_mailer->m_flags) && 307 strncmp(buf, "From ", 5) == 0) 308 (void) putc('>', mci->mci_out); 309 putline(buf, mci); 310 } 311 } 312 else if (sectionsize / 8 < sectionhighbits) 313 { 314 /* use base64 encoding */ 315 int c1, c2; 316 317 putline("Content-Transfer-Encoding: base64", mci); 318 putline("", mci); 319 mci->mci_flags &= ~MCIF_INHEADER; 320 linelen = 0; 321 while ((c1 = mime_getchar(e->e_dfp, boundaries)) != EOF) 322 { 323 if (linelen > 71) 324 { 325 fputs(mci->mci_mailer->m_eol, mci->mci_out); 326 linelen = 0; 327 } 328 linelen += 4; 329 fputc(Base64Code[c1 >> 2], mci->mci_out); 330 c1 = (c1 & 0x03) << 4; 331 c2 = mime_getchar(e->e_dfp, boundaries); 332 if (c2 == EOF) 333 { 334 fputc(Base64Code[c1], mci->mci_out); 335 fputc('=', mci->mci_out); 336 fputc('=', mci->mci_out); 337 break; 338 } 339 c1 |= (c2 >> 4) & 0x0f; 340 fputc(Base64Code[c1], mci->mci_out); 341 c1 = (c2 & 0x0f) << 2; 342 c2 = mime_getchar(e->e_dfp, boundaries); 343 if (c2 == EOF) 344 { 345 fputc(Base64Code[c1], mci->mci_out); 346 fputc('=', mci->mci_out); 347 break; 348 } 349 c1 |= (c2 >> 6) & 0x03; 350 fputc(Base64Code[c1], mci->mci_out); 351 fputc(Base64Code[c2 & 0x3f], mci->mci_out); 352 } 353 } 354 else 355 { 356 /* use quoted-printable encoding */ 357 int c1, c2; 358 int fromstate; 359 360 putline("Content-Transfer-Encoding: quoted-printable", mci); 361 putline("", mci); 362 mci->mci_flags &= ~MCIF_INHEADER; 363 linelen = fromstate = 0; 364 c2 = '\n'; 365 while ((c1 = mime_getchar(e->e_dfp, boundaries)) != EOF) 366 { 367 if (c1 == '\n') 368 { 369 if (c2 == ' ' || c2 == '\t') 370 { 371 fputc('=', mci->mci_out); 372 fputc(Base16Code[(c2 >> 4) & 0x0f], 373 mci->mci_out); 374 fputc(Base16Code[c2 & 0x0f], 375 mci->mci_out); 376 fputs(mci->mci_mailer->m_eol, 377 mci->mci_out); 378 } 379 fputs(mci->mci_mailer->m_eol, mci->mci_out); 380 linelen = fromstate = 0; 381 c2 = c1; 382 continue; 383 } 384 if (c2 == ' ' && linelen == 4 && fromstate == 4 && 385 bitnset(M_ESCFROM, mci->mci_mailer->m_flags)) 386 { 387 fputs("=20", mci->mci_out); 388 linelen += 3; 389 } 390 else if (c2 == ' ' || c2 == '\t') 391 { 392 fputc(c2, mci->mci_out); 393 linelen++; 394 } 395 if (linelen > 72) 396 { 397 fputc('=', mci->mci_out); 398 fputs(mci->mci_mailer->m_eol, mci->mci_out); 399 linelen = fromstate = 0; 400 c2 = '\n'; 401 } 402 if (c2 == '\n' && c1 == '.' && 403 bitnset(M_XDOT, mci->mci_mailer->m_flags)) 404 { 405 fputc('.', mci->mci_out); 406 linelen++; 407 } 408 if ((c1 < 0x20 && c1 != '\t') || c1 >= 0x7f || c1 == '=') 409 { 410 fputc('=', mci->mci_out); 411 fputc(Base16Code[(c1 >> 4) & 0x0f], mci->mci_out); 412 fputc(Base16Code[c1 & 0x0f], mci->mci_out); 413 linelen += 3; 414 } 415 else if (c1 != ' ' && c1 != '\t') 416 { 417 if (linelen < 4 && c1 == "From"[linelen]) 418 fromstate++; 419 fputc(c1, mci->mci_out); 420 linelen++; 421 } 422 c2 = c1; 423 } 424 425 /* output any saved character */ 426 if (c2 == ' ' || c2 == '\t') 427 { 428 fputc('=', mci->mci_out); 429 fputc(Base16Code[(c2 >> 4) & 0x0f], mci->mci_out); 430 fputc(Base16Code[c2 & 0x0f], mci->mci_out); 431 linelen += 3; 432 } 433 } 434 if (linelen > 0) 435 fputs(mci->mci_mailer->m_eol, mci->mci_out); 436 return MimeBoundaryType; 437 } 438 /* 439 ** MIME_GETCHAR -- get a character for MIME processing 440 ** 441 ** Treats boundaries as EOF. 442 ** 443 ** Parameters: 444 ** fp -- the input file. 445 ** boundaries -- the current MIME boundaries. 446 ** 447 ** Returns: 448 ** The next character in the input stream. 449 */ 450 451 int 452 mime_getchar(fp, boundaries) 453 register FILE *fp; 454 char **boundaries; 455 { 456 int c; 457 static char *bp = NULL; 458 static int buflen = 0; 459 static bool atbol = TRUE; /* at beginning of line */ 460 static char buf[128]; /* need not be a full line */ 461 462 if (buflen > 0) 463 { 464 buflen--; 465 return *bp++; 466 } 467 bp = buf; 468 buflen = 0; 469 c = fgetc(fp); 470 if (c == '\n') 471 { 472 /* might be part of a MIME boundary */ 473 *bp++ = c; 474 atbol = TRUE; 475 c = fgetc(fp); 476 } 477 if (c != EOF) 478 *bp++ = c; 479 if (atbol && c == '-') 480 { 481 /* check for a message boundary */ 482 c = fgetc(fp); 483 if (c != '-') 484 { 485 if (c != EOF) 486 *bp++ = c; 487 buflen = bp - buf - 1; 488 bp = buf; 489 return *bp++; 490 } 491 492 /* got "--", now check for rest of separator */ 493 *bp++ = '-'; 494 while (bp < &buf[sizeof buf - 1] && 495 (c = fgetc(fp)) != EOF && c != '\n') 496 { 497 *bp++ = c; 498 } 499 *bp = '\0'; 500 MimeBoundaryType = mimeboundary(buf, boundaries); 501 switch (MimeBoundaryType) 502 { 503 case MBT_FINAL: 504 case MBT_INTERMED: 505 /* we have a message boundary */ 506 buflen = 0; 507 return EOF; 508 } 509 510 atbol = c == '\n'; 511 if (c != EOF) 512 *bp++ = c; 513 } 514 515 buflen = bp - buf - 1; 516 if (buflen < 0) 517 return EOF; 518 bp = buf; 519 return *bp++; 520 } 521 /* 522 ** MIMEBOUNDARY -- determine if this line is a MIME boundary & its type 523 ** 524 ** Parameters: 525 ** line -- the input line. 526 ** boundaries -- the set of currently pending boundaries. 527 ** 528 ** Returns: 529 ** MBT_NOTSEP -- if this is not a separator line 530 ** MBT_INTERMED -- if this is an intermediate separator 531 ** MBT_FINAL -- if this is a final boundary 532 ** MBT_SYNTAX -- if this is a boundary for the wrong 533 ** enclosure -- i.e., a syntax error. 534 */ 535 536 int 537 mimeboundary(line, boundaries) 538 register char *line; 539 char **boundaries; 540 { 541 int type; 542 int i; 543 int savec; 544 545 if (line[0] != '-' || line[1] != '-' || boundaries == NULL) 546 return MBT_NOTSEP; 547 if (tTd(43, 5)) 548 printf("mimeboundary: line=\"%s\"... ", line); 549 i = strlen(line); 550 if (line[i - 1] == '\n') 551 i--; 552 while (line[i - 1] == ' ' || line[i - 1] == '\t') 553 i--; 554 if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0) 555 { 556 type = MBT_FINAL; 557 i -= 2; 558 } 559 else 560 type = MBT_INTERMED; 561 562 savec = line[i]; 563 line[i] = '\0'; 564 /* XXX should check for improper nesting here */ 565 if (isboundary(&line[2], boundaries) < 0) 566 type = MBT_NOTSEP; 567 line[i] = savec; 568 if (tTd(43, 5)) 569 printf("%d\n", type); 570 return type; 571 } 572 /* 573 ** DEFCHARSET -- return default character set for message 574 ** 575 ** The first choice for character set is for the mailer 576 ** corresponding to the envelope sender. If neither that 577 ** nor the global configuration file has a default character 578 ** set defined, return "unknown-8bit" as recommended by 579 ** RFC 1428 section 3. 580 ** 581 ** Parameters: 582 ** e -- the envelope for this message. 583 ** 584 ** Returns: 585 ** The default character set for that mailer. 586 */ 587 588 char * 589 defcharset(e) 590 register ENVELOPE *e; 591 { 592 if (e != NULL && e->e_from.q_mailer != NULL && 593 e->e_from.q_mailer->m_defcharset != NULL) 594 return e->e_from.q_mailer->m_defcharset; 595 if (DefaultCharSet != NULL) 596 return DefaultCharSet; 597 return "unknown-8bit"; 598 } 599 /* 600 ** ISBOUNDARY -- is a given string a currently valid boundary? 601 ** 602 ** Parameters: 603 ** line -- the current input line. 604 ** boundaries -- the list of valid boundaries. 605 ** 606 ** Returns: 607 ** The index number in boundaries if the line is found. 608 ** -1 -- otherwise. 609 ** 610 */ 611 612 int 613 isboundary(line, boundaries) 614 char *line; 615 char **boundaries; 616 { 617 register int i; 618 619 for (i = 0; boundaries[i] != NULL; i++) 620 { 621 if (strcmp(line, boundaries[i]) == 0) 622 return i; 623 } 624 return -1; 625 } 626