167545Seric /* 267545Seric * Copyright (c) 1994 Eric P. Allman 367545Seric * Copyright (c) 1994 467545Seric * The Regents of the University of California. All rights reserved. 567545Seric * 667545Seric * %sccs.include.redist.c% 767545Seric */ 867545Seric 967545Seric # include "sendmail.h" 1067545Seric # include <string.h> 1167545Seric 1267545Seric #ifndef lint 13*67936Seric static char sccsid[] = "@(#)mime.c 8.9 (Berkeley) 11/19/94"; 1467545Seric #endif /* not lint */ 1567545Seric 1667545Seric /* 1767545Seric ** MIME support. 1867545Seric ** 1967545Seric ** I am indebted to John Beck of Hewlett-Packard, who contributed 2067545Seric ** his code to me for inclusion. As it turns out, I did not use 2167545Seric ** his code since he used a "minimum change" approach that used 2267545Seric ** several temp files, and I wanted a "minimum impact" approach 2367545Seric ** that would avoid copying. However, looking over his code 2467545Seric ** helped me cement my understanding of the problem. 2567545Seric ** 2667545Seric ** I also looked at, but did not directly use, Nathaniel 2767545Seric ** Borenstein's "code.c" module. Again, it functioned as 2867545Seric ** a file-to-file translator, which did not fit within my 2967545Seric ** design bounds, but it was a useful base for understanding 3067545Seric ** the problem. 3167545Seric */ 3267545Seric 3367545Seric 3467545Seric /* character set for hex and base64 encoding */ 3567545Seric char Base16Code[] = "0123456789ABCDEF"; 3667545Seric char Base64Code[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 3767545Seric 3867545Seric /* types of MIME boundaries */ 3967545Seric #define MBT_SYNTAX 0 /* syntax error */ 4067545Seric #define MBT_NOTSEP 1 /* not a boundary */ 4167545Seric #define MBT_INTERMED 2 /* intermediate boundary (no trailing --) */ 4267545Seric #define MBT_FINAL 3 /* final boundary (trailing -- included) */ 4367547Seric 4467547Seric static int MimeBoundaryType; /* internal linkage */ 4567545Seric /* 4667545Seric ** MIME8TO7 -- output 8 bit body in 7 bit format 4767545Seric ** 4867545Seric ** The header has already been output -- this has to do the 4967545Seric ** 8 to 7 bit conversion. It would be easy if we didn't have 5067545Seric ** to deal with nested formats (multipart/xxx and message/rfc822). 5167545Seric ** 5267545Seric ** We won't be called if we don't have to do a conversion, and 5367545Seric ** appropriate MIME-Version: and Content-Type: fields have been 5467545Seric ** output. Any Content-Transfer-Encoding: field has not been 5567545Seric ** output, and we can add it here. 5667545Seric ** 5767545Seric ** Parameters: 5867545Seric ** mci -- mailer connection information. 5967545Seric ** header -- the header for this body part. 6067545Seric ** e -- envelope. 6167545Seric ** boundary -- the message boundary -- NULL if we are 6267545Seric ** processing the outer portion. 6367545Seric ** 6467545Seric ** Returns: 6567545Seric ** An indicator of what terminated the message part: 6667545Seric ** MBT_FINAL -- the final boundary 6767545Seric ** MBT_INTERMED -- an intermediate boundary 6867545Seric ** MBT_NOTSEP -- an end of file 6967545Seric */ 7067545Seric 7167545Seric int 7267545Seric mime8to7(mci, header, e, boundary) 7367545Seric register MCI *mci; 7467545Seric HDR *header; 7567545Seric register ENVELOPE *e; 7667545Seric char *boundary; 7767545Seric { 7867545Seric register char *p; 7967545Seric int linelen; 8067545Seric int bt; 8167545Seric off_t offset; 8267545Seric size_t sectionsize, sectionhighbits; 8367545Seric char bbuf[128]; 8467545Seric char buf[MAXLINE]; 8567545Seric 8667545Seric if (tTd(43, 1)) 8767545Seric { 8867545Seric printf("mime8to7: boundary=%s\n", 8967545Seric boundary == NULL ? "<none>" : boundary); 9067545Seric } 9167545Seric p = hvalue("Content-Type", header); 9267545Seric if (p != NULL && strncasecmp(p, "multipart/", 10) == 0) 9367545Seric { 9467545Seric register char *q; 9567545Seric 9667545Seric /* oh dear -- this part is hard */ 9767545Seric p = strstr(p, "boundary="); /*XXX*/ 9867545Seric if (p == NULL) 9967545Seric { 10067545Seric syserr("mime8to7: Content-Type: %s missing boundary", p); 10167545Seric p = "---"; 10267545Seric } 10367545Seric else 10467545Seric p += 9; 10567545Seric if (*p == '"') 10667545Seric q = strchr(p, '"'); 10767545Seric else 10867545Seric q = strchr(p, ','); 10967545Seric if (q == NULL) 11067545Seric q = p + strlen(p); 11167545Seric if (q - p > sizeof bbuf - 1) 11267545Seric { 11367545Seric syserr("mime8to7: multipart boundary \"%.*s\" too long", 11467545Seric q - p, p); 11567545Seric q = p + sizeof bbuf - 1; 11667545Seric } 11767545Seric strncpy(bbuf, p, q - p); 11867545Seric bbuf[q - p] = '\0'; 11967545Seric if (tTd(43, 1)) 12067545Seric { 12167545Seric printf("mime8to7: multipart boundary \"%s\"\n", bbuf); 12267545Seric } 12367545Seric 12467545Seric /* skip the early "comment" prologue */ 12567545Seric bt = MBT_FINAL; 12667545Seric while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 12767545Seric { 12867545Seric bt = mimeboundary(buf, bbuf); 12967545Seric if (bt != MBT_NOTSEP) 13067545Seric break; 13167545Seric putline(buf, mci); 13267545Seric } 13367545Seric while (bt != MBT_FINAL) 13467545Seric { 13567545Seric auto HDR *hdr = NULL; 13667545Seric 13767545Seric sprintf(buf, "--%s", bbuf); 13867545Seric putline(buf, mci); 13967545Seric collect(e->e_dfp, FALSE, FALSE, &hdr, e); 140*67936Seric putheader(mci, hdr, e, 0); 14167545Seric bt = mime8to7(mci, hdr, e, bbuf); 14267545Seric } 14367545Seric sprintf(buf, "--%s--", bbuf); 14467545Seric putline(buf, mci); 14567545Seric 14667545Seric /* skip the late "comment" epilogue */ 14767545Seric while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 14867545Seric { 14967545Seric putline(buf, mci); 15067545Seric bt = mimeboundary(buf, boundary); 15167545Seric if (bt != MBT_NOTSEP) 15267545Seric break; 15367545Seric } 15467545Seric return bt; 15567545Seric } 15667545Seric 15767545Seric /* 15867545Seric ** Non-compound body type 15967545Seric ** 16067545Seric ** Compute the ratio of seven to eight bit characters; 16167545Seric ** use that as a heuristic to decide how to do the 16267545Seric ** encoding. 16367545Seric */ 16467545Seric 16567545Seric /* remember where we were */ 16667545Seric offset = ftell(e->e_dfp); 16767545Seric if (offset == -1) 16867545Seric syserr("mime8to7: cannot ftell on %s", e->e_df); 16967545Seric 17067545Seric /* do a scan of this body type to count character types */ 17167545Seric sectionsize = sectionhighbits = 0; 17267545Seric while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 17367545Seric { 17467545Seric bt = mimeboundary(buf, boundary); 17567545Seric if (bt != MBT_NOTSEP) 17667545Seric break; 17767545Seric for (p = buf; *p != '\0'; p++) 17867545Seric { 17967547Seric /* count bytes with the high bit set */ 18067545Seric sectionsize++; 18167545Seric if (bitset(0200, *p)) 18267545Seric sectionhighbits++; 18367545Seric } 18467547Seric 18567547Seric /* 18667547Seric ** Heuristic: if 1/4 of the first 4K bytes are 8-bit, 18767547Seric ** assume base64. This heuristic avoids double-reading 18867547Seric ** large graphics or video files. 18967547Seric */ 19067547Seric 19167547Seric if (sectionsize >= 4096 && sectionhighbits > sectionsize / 4) 19267547Seric break; 19367545Seric } 19467545Seric if (feof(e->e_dfp)) 19567545Seric bt = MBT_FINAL; 19667545Seric 19767545Seric /* return to the original offset for processing */ 19867547Seric /* XXX use relative seeks to handle >31 bit file sizes? */ 19967545Seric if (fseek(e->e_dfp, offset, SEEK_SET) < 0) 20067545Seric syserr("mime8to7: cannot fseek on %s", e->e_df); 20167545Seric 20267547Seric /* 20367547Seric ** Heuristically determine encoding method. 20467547Seric ** If more than 1/8 of the total characters have the 20567547Seric ** eighth bit set, use base64; else use quoted-printable. 20667547Seric */ 20767547Seric 20867545Seric if (tTd(43, 8)) 20967545Seric { 21067545Seric printf("mime8to7: %ld high bits in %ld bytes\n", 21167545Seric sectionhighbits, sectionsize); 21267545Seric } 21367554Seric if (sectionhighbits == 0) 21467545Seric { 21567554Seric /* no encoding necessary */ 21667695Seric p = hvalue("content-transfer-encoding", header); 21767695Seric if (p != NULL) 21867695Seric { 21967695Seric sprintf(buf, "Content-Transfer-Encoding: %s", p); 22067695Seric putline(buf, mci); 22167695Seric } 22267554Seric putline("", mci); 22367554Seric mci->mci_flags &= ~MCIF_INHEADER; 22467554Seric while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 22567554Seric { 22667554Seric bt = mimeboundary(buf, boundary); 22767554Seric if (bt != MBT_NOTSEP) 22867554Seric break; 22967554Seric if (buf[0] == 'F' && 23067554Seric bitnset(M_ESCFROM, mci->mci_mailer->m_flags) && 23167554Seric strncmp(buf, "From ", 5) == 0) 23267554Seric (void) putc('>', mci->mci_out); 23367554Seric putline(buf, mci); 23467554Seric } 23567554Seric } 23667554Seric else if (sectionsize / 8 < sectionhighbits) 23767554Seric { 23867545Seric /* use base64 encoding */ 23967545Seric int c1, c2; 24067545Seric 24167545Seric putline("Content-Transfer-Encoding: base64", mci); 24267545Seric putline("", mci); 24367545Seric mci->mci_flags &= ~MCIF_INHEADER; 24467545Seric linelen = 0; 24567545Seric while ((c1 = mime_getchar(e->e_dfp, boundary)) != EOF) 24667545Seric { 24767545Seric if (linelen > 71) 24867545Seric { 24967545Seric fputs(mci->mci_mailer->m_eol, mci->mci_out); 25067545Seric linelen = 0; 25167545Seric } 25267545Seric linelen += 4; 25367545Seric fputc(Base64Code[c1 >> 2], mci->mci_out); 25467545Seric c1 = (c1 & 0x03) << 4; 25567545Seric c2 = mime_getchar(e->e_dfp, boundary); 25667545Seric if (c2 == EOF) 25767545Seric { 25867545Seric fputc(Base64Code[c1], mci->mci_out); 25967545Seric fputc('=', mci->mci_out); 26067545Seric fputc('=', mci->mci_out); 26167545Seric break; 26267545Seric } 26367545Seric c1 |= (c2 >> 4) & 0x0f; 26467545Seric fputc(Base64Code[c1], mci->mci_out); 26567545Seric c1 = (c2 & 0x0f) << 2; 26667545Seric c2 = mime_getchar(e->e_dfp, boundary); 26767545Seric if (c2 == EOF) 26867545Seric { 26967545Seric fputc(Base64Code[c1], mci->mci_out); 27067545Seric fputc('=', mci->mci_out); 27167545Seric break; 27267545Seric } 27367545Seric c1 |= (c2 >> 6) & 0x03; 27467545Seric fputc(Base64Code[c1], mci->mci_out); 27567545Seric fputc(Base64Code[c2 & 0x3f], mci->mci_out); 27667545Seric } 27767545Seric } 27867545Seric else 27967545Seric { 28067545Seric /* use quoted-printable encoding */ 28167545Seric int c1, c2; 28267545Seric 28367545Seric putline("Content-Transfer-Encoding: quoted-printable", mci); 28467545Seric putline("", mci); 28567545Seric mci->mci_flags &= ~MCIF_INHEADER; 28667545Seric linelen = 0; 28767554Seric c2 = '\n'; 28867545Seric while ((c1 = mime_getchar(e->e_dfp, boundary)) != EOF) 28967545Seric { 29067545Seric if (c1 == '\n') 29167545Seric { 29267545Seric if (c2 == ' ' || c2 == '\t') 29367545Seric { 29467545Seric fputc('=', mci->mci_out); 29567840Seric fputc(Base16Code[(c2 >> 4) & 0x0f], 29667840Seric mci->mci_out); 29767840Seric fputc(Base16Code[c2 & 0x0f], 29867840Seric mci->mci_out); 29967840Seric fputs(mci->mci_mailer->m_eol, 30067840Seric mci->mci_out); 30167545Seric } 30267545Seric fputs(mci->mci_mailer->m_eol, mci->mci_out); 30367545Seric linelen = 0; 30467545Seric c2 = c1; 30567545Seric continue; 30667545Seric } 30767840Seric if (c2 == ' ' || c2 == '\t') 30867840Seric { 30967840Seric fputc(c2, mci->mci_out); 31067840Seric linelen++; 31167840Seric } 31267545Seric if (linelen > 72) 31367545Seric { 31467545Seric fputc('=', mci->mci_out); 31567545Seric fputs(mci->mci_mailer->m_eol, mci->mci_out); 31667545Seric linelen = 0; 31767554Seric c2 = '\n'; 31867545Seric } 31967761Seric if (c2 == '\n' && c1 == '.' && 32067761Seric bitnset(M_XDOT, mci->mci_mailer->m_flags)) 32167761Seric { 32267761Seric fputc('.', mci->mci_out); 32367761Seric linelen++; 32467761Seric } 32567547Seric if ((c1 < 0x20 && c1 != '\t') || c1 >= 0x7f || c1 == '=') 32667545Seric { 32767545Seric fputc('=', mci->mci_out); 32867545Seric fputc(Base16Code[(c1 >> 4) & 0x0f], mci->mci_out); 32967545Seric fputc(Base16Code[c1 & 0x0f], mci->mci_out); 33067545Seric linelen += 3; 33167545Seric } 33267840Seric else if (c1 != ' ' && c1 != '\t') 33367545Seric { 33467545Seric fputc(c1, mci->mci_out); 33567545Seric linelen++; 33667545Seric } 33767545Seric c2 = c1; 33867545Seric } 33967840Seric 34067840Seric /* output any saved character */ 34167840Seric if (c2 == ' ' || c2 == '\t') 34267840Seric { 34367840Seric fputc(c2, mci->mci_out); 34467840Seric linelen++; 34567840Seric } 34667545Seric } 34767545Seric if (linelen > 0) 34867545Seric fputs(mci->mci_mailer->m_eol, mci->mci_out); 34967547Seric return MimeBoundaryType; 35067545Seric } 35167545Seric 35267545Seric 35367545Seric int 35467545Seric mime_getchar(fp, boundary) 35567545Seric register FILE *fp; 35667545Seric char *boundary; 35767545Seric { 35867545Seric int c; 35967545Seric static char *bp = NULL; 36067545Seric static int buflen = 0; 36167545Seric static bool atbol = TRUE; /* at beginning of line */ 36267545Seric static char buf[128]; /* need not be a full line */ 36367545Seric 36467545Seric if (buflen > 0) 36567545Seric { 36667545Seric buflen--; 36767545Seric return *bp++; 36867545Seric } 36967545Seric c = fgetc(fp); 37067545Seric if (atbol && c == '-' && boundary != NULL) 37167545Seric { 37267545Seric /* check for a message boundary */ 37367545Seric bp = buf; 37467545Seric c = fgetc(fp); 37567545Seric if (c != '-') 37667545Seric { 37767545Seric if (c != EOF) 37867545Seric { 37967545Seric *bp = c; 38067545Seric buflen++; 38167545Seric } 38267545Seric return '-'; 38367545Seric } 38467545Seric 38567545Seric /* got "--", now check for rest of separator */ 38667545Seric *bp++ = '-'; 38767545Seric *bp++ = '-'; 38867545Seric while (bp < &buf[sizeof buf - 1] && 38967545Seric (c = fgetc(fp)) != EOF && c != '\n') 39067545Seric { 39167545Seric *bp++ = c; 39267545Seric } 39367545Seric *bp = '\0'; 39467547Seric MimeBoundaryType = mimeboundary(buf, boundary); 39567547Seric switch (MimeBoundaryType) 39667545Seric { 39767545Seric case MBT_FINAL: 39867545Seric case MBT_INTERMED: 39967545Seric /* we have a message boundary */ 40067545Seric buflen = 0; 40167545Seric return EOF; 40267545Seric } 40367545Seric 40467545Seric atbol = c == '\n'; 40567545Seric if (c != EOF) 40667545Seric *bp++ = c; 40767545Seric buflen = bp - buf - 1; 40867545Seric bp = buf; 40967545Seric return *bp++; 41067545Seric } 41167545Seric 41267545Seric atbol = c == '\n'; 41367545Seric return c; 41467545Seric } 41567545Seric /* 41667545Seric ** MIMEBOUNDARY -- determine if this line is a MIME boundary & its type 41767545Seric ** 41867545Seric ** Parameters: 41967545Seric ** line -- the input line. 42067545Seric ** boundary -- the expected boundary. 42167545Seric ** 42267545Seric ** Returns: 42367545Seric ** MBT_NOTSEP -- if this is not a separator line 42467545Seric ** MBT_INTERMED -- if this is an intermediate separator 42567545Seric ** MBT_FINAL -- if this is a final boundary 42667545Seric ** MBT_SYNTAX -- if this is a boundary for the wrong 42767545Seric ** enclosure -- i.e., a syntax error. 42867545Seric */ 42967545Seric 43067545Seric int 43167545Seric mimeboundary(line, boundary) 43267545Seric register char *line; 43367545Seric char *boundary; 43467545Seric { 43567545Seric int type; 43667545Seric int i; 43767545Seric 43867545Seric if (line[0] != '-' || line[1] != '-' || boundary == NULL) 43967545Seric return MBT_NOTSEP; 44067545Seric if (tTd(43, 5)) 44167545Seric printf("mimeboundary: bound=\"%s\", line=\"%s\"... ", 44267545Seric boundary, line); 44367545Seric i = strlen(line); 44467545Seric if (line[i - 1] == '\n') 44567545Seric i--; 44667545Seric if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0) 44767545Seric { 44867545Seric type = MBT_FINAL; 44967545Seric i -= 2; 45067545Seric } 45167545Seric else 45267545Seric type = MBT_INTERMED; 45367545Seric 45467545Seric /* XXX should check for improper nesting here */ 45567545Seric if (strncmp(boundary, &line[2], i - 2) != 0 || 45667545Seric strlen(boundary) != i - 2) 45767545Seric type = MBT_NOTSEP; 45867545Seric if (tTd(43, 5)) 45967545Seric printf("%d\n", type); 46067545Seric return type; 46167545Seric } 46267896Seric /* 46367896Seric ** DEFCHARSET -- return default character set for message 46467896Seric ** 46567896Seric ** The first choice for character set is for the mailer 46667896Seric ** corresponding to the envelope sender. If neither that 46767896Seric ** nor the global configuration file has a default character 46867896Seric ** set defined, return "unknown-8bit" as recommended by 46967896Seric ** RFC 1428 section 3. 47067896Seric ** 47167896Seric ** Parameters: 47267896Seric ** e -- the envelope for this message. 47367896Seric ** 47467896Seric ** Returns: 47567896Seric ** The default character set for that mailer. 47667896Seric */ 47767896Seric 47867896Seric char * 47967896Seric defcharset(e) 48067896Seric register ENVELOPE *e; 48167896Seric { 48267896Seric if (e != NULL && e->e_from.q_mailer != NULL && 48367896Seric e->e_from.q_mailer->m_defcharset != NULL) 48467896Seric return e->e_from.q_mailer->m_defcharset; 48567896Seric if (DefaultCharSet != NULL) 48667896Seric return DefaultCharSet; 48767896Seric return "unknown-8bit"; 48867896Seric } 489