xref: /csrg-svn/usr.sbin/sendmail/src/mime.c (revision 67936)
167545Seric /*
267545Seric  * Copyright (c) 1994 Eric P. Allman
367545Seric  * Copyright (c) 1994
467545Seric  *	The Regents of the University of California.  All rights reserved.
567545Seric  *
667545Seric  * %sccs.include.redist.c%
767545Seric  */
867545Seric 
967545Seric # include "sendmail.h"
1067545Seric # include <string.h>
1167545Seric 
1267545Seric #ifndef lint
13*67936Seric static char sccsid[] = "@(#)mime.c	8.9 (Berkeley) 11/19/94";
1467545Seric #endif /* not lint */
1567545Seric 
1667545Seric /*
1767545Seric **  MIME support.
1867545Seric **
1967545Seric **	I am indebted to John Beck of Hewlett-Packard, who contributed
2067545Seric **	his code to me for inclusion.  As it turns out, I did not use
2167545Seric **	his code since he used a "minimum change" approach that used
2267545Seric **	several temp files, and I wanted a "minimum impact" approach
2367545Seric **	that would avoid copying.  However, looking over his code
2467545Seric **	helped me cement my understanding of the problem.
2567545Seric **
2667545Seric **	I also looked at, but did not directly use, Nathaniel
2767545Seric **	Borenstein's "code.c" module.  Again, it functioned as
2867545Seric **	a file-to-file translator, which did not fit within my
2967545Seric **	design bounds, but it was a useful base for understanding
3067545Seric **	the problem.
3167545Seric */
3267545Seric 
3367545Seric 
3467545Seric /* character set for hex and base64 encoding */
3567545Seric char	Base16Code[] =	"0123456789ABCDEF";
3667545Seric char	Base64Code[] =	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
3767545Seric 
3867545Seric /* types of MIME boundaries */
3967545Seric #define MBT_SYNTAX	0	/* syntax error */
4067545Seric #define MBT_NOTSEP	1	/* not a boundary */
4167545Seric #define MBT_INTERMED	2	/* intermediate boundary (no trailing --) */
4267545Seric #define MBT_FINAL	3	/* final boundary (trailing -- included) */
4367547Seric 
4467547Seric static int	MimeBoundaryType;	/* internal linkage */
4567545Seric /*
4667545Seric **  MIME8TO7 -- output 8 bit body in 7 bit format
4767545Seric **
4867545Seric **	The header has already been output -- this has to do the
4967545Seric **	8 to 7 bit conversion.  It would be easy if we didn't have
5067545Seric **	to deal with nested formats (multipart/xxx and message/rfc822).
5167545Seric **
5267545Seric **	We won't be called if we don't have to do a conversion, and
5367545Seric **	appropriate MIME-Version: and Content-Type: fields have been
5467545Seric **	output.  Any Content-Transfer-Encoding: field has not been
5567545Seric **	output, and we can add it here.
5667545Seric **
5767545Seric **	Parameters:
5867545Seric **		mci -- mailer connection information.
5967545Seric **		header -- the header for this body part.
6067545Seric **		e -- envelope.
6167545Seric **		boundary -- the message boundary -- NULL if we are
6267545Seric **			processing the outer portion.
6367545Seric **
6467545Seric **	Returns:
6567545Seric **		An indicator of what terminated the message part:
6667545Seric **		  MBT_FINAL -- the final boundary
6767545Seric **		  MBT_INTERMED -- an intermediate boundary
6867545Seric **		  MBT_NOTSEP -- an end of file
6967545Seric */
7067545Seric 
7167545Seric int
7267545Seric mime8to7(mci, header, e, boundary)
7367545Seric 	register MCI *mci;
7467545Seric 	HDR *header;
7567545Seric 	register ENVELOPE *e;
7667545Seric 	char *boundary;
7767545Seric {
7867545Seric 	register char *p;
7967545Seric 	int linelen;
8067545Seric 	int bt;
8167545Seric 	off_t offset;
8267545Seric 	size_t sectionsize, sectionhighbits;
8367545Seric 	char bbuf[128];
8467545Seric 	char buf[MAXLINE];
8567545Seric 
8667545Seric 	if (tTd(43, 1))
8767545Seric 	{
8867545Seric 		printf("mime8to7: boundary=%s\n",
8967545Seric 			boundary == NULL ? "<none>" : boundary);
9067545Seric 	}
9167545Seric 	p = hvalue("Content-Type", header);
9267545Seric 	if (p != NULL && strncasecmp(p, "multipart/", 10) == 0)
9367545Seric 	{
9467545Seric 		register char *q;
9567545Seric 
9667545Seric 		/* oh dear -- this part is hard */
9767545Seric 		p = strstr(p, "boundary=");		/*XXX*/
9867545Seric 		if (p == NULL)
9967545Seric 		{
10067545Seric 			syserr("mime8to7: Content-Type: %s missing boundary", p);
10167545Seric 			p = "---";
10267545Seric 		}
10367545Seric 		else
10467545Seric 			p += 9;
10567545Seric 		if (*p == '"')
10667545Seric 			q = strchr(p, '"');
10767545Seric 		else
10867545Seric 			q = strchr(p, ',');
10967545Seric 		if (q == NULL)
11067545Seric 			q = p + strlen(p);
11167545Seric 		if (q - p > sizeof bbuf - 1)
11267545Seric 		{
11367545Seric 			syserr("mime8to7: multipart boundary \"%.*s\" too long",
11467545Seric 				q - p, p);
11567545Seric 			q = p + sizeof bbuf - 1;
11667545Seric 		}
11767545Seric 		strncpy(bbuf, p, q - p);
11867545Seric 		bbuf[q - p] = '\0';
11967545Seric 		if (tTd(43, 1))
12067545Seric 		{
12167545Seric 			printf("mime8to7: multipart boundary \"%s\"\n", bbuf);
12267545Seric 		}
12367545Seric 
12467545Seric 		/* skip the early "comment" prologue */
12567545Seric 		bt = MBT_FINAL;
12667545Seric 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
12767545Seric 		{
12867545Seric 			bt = mimeboundary(buf, bbuf);
12967545Seric 			if (bt != MBT_NOTSEP)
13067545Seric 				break;
13167545Seric 			putline(buf, mci);
13267545Seric 		}
13367545Seric 		while (bt != MBT_FINAL)
13467545Seric 		{
13567545Seric 			auto HDR *hdr = NULL;
13667545Seric 
13767545Seric 			sprintf(buf, "--%s", bbuf);
13867545Seric 			putline(buf, mci);
13967545Seric 			collect(e->e_dfp, FALSE, FALSE, &hdr, e);
140*67936Seric 			putheader(mci, hdr, e, 0);
14167545Seric 			bt = mime8to7(mci, hdr, e, bbuf);
14267545Seric 		}
14367545Seric 		sprintf(buf, "--%s--", bbuf);
14467545Seric 		putline(buf, mci);
14567545Seric 
14667545Seric 		/* skip the late "comment" epilogue */
14767545Seric 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
14867545Seric 		{
14967545Seric 			putline(buf, mci);
15067545Seric 			bt = mimeboundary(buf, boundary);
15167545Seric 			if (bt != MBT_NOTSEP)
15267545Seric 				break;
15367545Seric 		}
15467545Seric 		return bt;
15567545Seric 	}
15667545Seric 
15767545Seric 	/*
15867545Seric 	**  Non-compound body type
15967545Seric 	**
16067545Seric 	**	Compute the ratio of seven to eight bit characters;
16167545Seric 	**	use that as a heuristic to decide how to do the
16267545Seric 	**	encoding.
16367545Seric 	*/
16467545Seric 
16567545Seric 	/* remember where we were */
16667545Seric 	offset = ftell(e->e_dfp);
16767545Seric 	if (offset == -1)
16867545Seric 		syserr("mime8to7: cannot ftell on %s", e->e_df);
16967545Seric 
17067545Seric 	/* do a scan of this body type to count character types */
17167545Seric 	sectionsize = sectionhighbits = 0;
17267545Seric 	while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
17367545Seric 	{
17467545Seric 		bt = mimeboundary(buf, boundary);
17567545Seric 		if (bt != MBT_NOTSEP)
17667545Seric 			break;
17767545Seric 		for (p = buf; *p != '\0'; p++)
17867545Seric 		{
17967547Seric 			/* count bytes with the high bit set */
18067545Seric 			sectionsize++;
18167545Seric 			if (bitset(0200, *p))
18267545Seric 				sectionhighbits++;
18367545Seric 		}
18467547Seric 
18567547Seric 		/*
18667547Seric 		**  Heuristic: if 1/4 of the first 4K bytes are 8-bit,
18767547Seric 		**  assume base64.  This heuristic avoids double-reading
18867547Seric 		**  large graphics or video files.
18967547Seric 		*/
19067547Seric 
19167547Seric 		if (sectionsize >= 4096 && sectionhighbits > sectionsize / 4)
19267547Seric 			break;
19367545Seric 	}
19467545Seric 	if (feof(e->e_dfp))
19567545Seric 		bt = MBT_FINAL;
19667545Seric 
19767545Seric 	/* return to the original offset for processing */
19867547Seric 	/* XXX use relative seeks to handle >31 bit file sizes? */
19967545Seric 	if (fseek(e->e_dfp, offset, SEEK_SET) < 0)
20067545Seric 		syserr("mime8to7: cannot fseek on %s", e->e_df);
20167545Seric 
20267547Seric 	/*
20367547Seric 	**  Heuristically determine encoding method.
20467547Seric 	**	If more than 1/8 of the total characters have the
20567547Seric 	**	eighth bit set, use base64; else use quoted-printable.
20667547Seric 	*/
20767547Seric 
20867545Seric 	if (tTd(43, 8))
20967545Seric 	{
21067545Seric 		printf("mime8to7: %ld high bits in %ld bytes\n",
21167545Seric 			sectionhighbits, sectionsize);
21267545Seric 	}
21367554Seric 	if (sectionhighbits == 0)
21467545Seric 	{
21567554Seric 		/* no encoding necessary */
21667695Seric 		p = hvalue("content-transfer-encoding", header);
21767695Seric 		if (p != NULL)
21867695Seric 		{
21967695Seric 			sprintf(buf, "Content-Transfer-Encoding: %s", p);
22067695Seric 			putline(buf, mci);
22167695Seric 		}
22267554Seric 		putline("", mci);
22367554Seric 		mci->mci_flags &= ~MCIF_INHEADER;
22467554Seric 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
22567554Seric 		{
22667554Seric 			bt = mimeboundary(buf, boundary);
22767554Seric 			if (bt != MBT_NOTSEP)
22867554Seric 				break;
22967554Seric 			if (buf[0] == 'F' &&
23067554Seric 			    bitnset(M_ESCFROM, mci->mci_mailer->m_flags) &&
23167554Seric 			    strncmp(buf, "From ", 5) == 0)
23267554Seric 				(void) putc('>', mci->mci_out);
23367554Seric 			putline(buf, mci);
23467554Seric 		}
23567554Seric 	}
23667554Seric 	else if (sectionsize / 8 < sectionhighbits)
23767554Seric 	{
23867545Seric 		/* use base64 encoding */
23967545Seric 		int c1, c2;
24067545Seric 
24167545Seric 		putline("Content-Transfer-Encoding: base64", mci);
24267545Seric 		putline("", mci);
24367545Seric 		mci->mci_flags &= ~MCIF_INHEADER;
24467545Seric 		linelen = 0;
24567545Seric 		while ((c1 = mime_getchar(e->e_dfp, boundary)) != EOF)
24667545Seric 		{
24767545Seric 			if (linelen > 71)
24867545Seric 			{
24967545Seric 				fputs(mci->mci_mailer->m_eol, mci->mci_out);
25067545Seric 				linelen = 0;
25167545Seric 			}
25267545Seric 			linelen += 4;
25367545Seric 			fputc(Base64Code[c1 >> 2], mci->mci_out);
25467545Seric 			c1 = (c1 & 0x03) << 4;
25567545Seric 			c2 = mime_getchar(e->e_dfp, boundary);
25667545Seric 			if (c2 == EOF)
25767545Seric 			{
25867545Seric 				fputc(Base64Code[c1], mci->mci_out);
25967545Seric 				fputc('=', mci->mci_out);
26067545Seric 				fputc('=', mci->mci_out);
26167545Seric 				break;
26267545Seric 			}
26367545Seric 			c1 |= (c2 >> 4) & 0x0f;
26467545Seric 			fputc(Base64Code[c1], mci->mci_out);
26567545Seric 			c1 = (c2 & 0x0f) << 2;
26667545Seric 			c2 = mime_getchar(e->e_dfp, boundary);
26767545Seric 			if (c2 == EOF)
26867545Seric 			{
26967545Seric 				fputc(Base64Code[c1], mci->mci_out);
27067545Seric 				fputc('=', mci->mci_out);
27167545Seric 				break;
27267545Seric 			}
27367545Seric 			c1 |= (c2 >> 6) & 0x03;
27467545Seric 			fputc(Base64Code[c1], mci->mci_out);
27567545Seric 			fputc(Base64Code[c2 & 0x3f], mci->mci_out);
27667545Seric 		}
27767545Seric 	}
27867545Seric 	else
27967545Seric 	{
28067545Seric 		/* use quoted-printable encoding */
28167545Seric 		int c1, c2;
28267545Seric 
28367545Seric 		putline("Content-Transfer-Encoding: quoted-printable", mci);
28467545Seric 		putline("", mci);
28567545Seric 		mci->mci_flags &= ~MCIF_INHEADER;
28667545Seric 		linelen = 0;
28767554Seric 		c2 = '\n';
28867545Seric 		while ((c1 = mime_getchar(e->e_dfp, boundary)) != EOF)
28967545Seric 		{
29067545Seric 			if (c1 == '\n')
29167545Seric 			{
29267545Seric 				if (c2 == ' ' || c2 == '\t')
29367545Seric 				{
29467545Seric 					fputc('=', mci->mci_out);
29567840Seric 					fputc(Base16Code[(c2 >> 4) & 0x0f],
29667840Seric 								mci->mci_out);
29767840Seric 					fputc(Base16Code[c2 & 0x0f],
29867840Seric 								mci->mci_out);
29967840Seric 					fputs(mci->mci_mailer->m_eol,
30067840Seric 								mci->mci_out);
30167545Seric 				}
30267545Seric 				fputs(mci->mci_mailer->m_eol, mci->mci_out);
30367545Seric 				linelen = 0;
30467545Seric 				c2 = c1;
30567545Seric 				continue;
30667545Seric 			}
30767840Seric 			if (c2 == ' ' || c2 == '\t')
30867840Seric 			{
30967840Seric 				fputc(c2, mci->mci_out);
31067840Seric 				linelen++;
31167840Seric 			}
31267545Seric 			if (linelen > 72)
31367545Seric 			{
31467545Seric 				fputc('=', mci->mci_out);
31567545Seric 				fputs(mci->mci_mailer->m_eol, mci->mci_out);
31667545Seric 				linelen = 0;
31767554Seric 				c2 = '\n';
31867545Seric 			}
31967761Seric 			if (c2 == '\n' && c1 == '.' &&
32067761Seric 				 bitnset(M_XDOT, mci->mci_mailer->m_flags))
32167761Seric 			{
32267761Seric 				fputc('.', mci->mci_out);
32367761Seric 				linelen++;
32467761Seric 			}
32567547Seric 			if ((c1 < 0x20 && c1 != '\t') || c1 >= 0x7f || c1 == '=')
32667545Seric 			{
32767545Seric 				fputc('=', mci->mci_out);
32867545Seric 				fputc(Base16Code[(c1 >> 4) & 0x0f], mci->mci_out);
32967545Seric 				fputc(Base16Code[c1 & 0x0f], mci->mci_out);
33067545Seric 				linelen += 3;
33167545Seric 			}
33267840Seric 			else if (c1 != ' ' && c1 != '\t')
33367545Seric 			{
33467545Seric 				fputc(c1, mci->mci_out);
33567545Seric 				linelen++;
33667545Seric 			}
33767545Seric 			c2 = c1;
33867545Seric 		}
33967840Seric 
34067840Seric 		/* output any saved character */
34167840Seric 		if (c2 == ' ' || c2 == '\t')
34267840Seric 		{
34367840Seric 			fputc(c2, mci->mci_out);
34467840Seric 			linelen++;
34567840Seric 		}
34667545Seric 	}
34767545Seric 	if (linelen > 0)
34867545Seric 		fputs(mci->mci_mailer->m_eol, mci->mci_out);
34967547Seric 	return MimeBoundaryType;
35067545Seric }
35167545Seric 
35267545Seric 
35367545Seric int
35467545Seric mime_getchar(fp, boundary)
35567545Seric 	register FILE *fp;
35667545Seric 	char *boundary;
35767545Seric {
35867545Seric 	int c;
35967545Seric 	static char *bp = NULL;
36067545Seric 	static int buflen = 0;
36167545Seric 	static bool atbol = TRUE;	/* at beginning of line */
36267545Seric 	static char buf[128];		/* need not be a full line */
36367545Seric 
36467545Seric 	if (buflen > 0)
36567545Seric 	{
36667545Seric 		buflen--;
36767545Seric 		return *bp++;
36867545Seric 	}
36967545Seric 	c = fgetc(fp);
37067545Seric 	if (atbol && c == '-' && boundary != NULL)
37167545Seric 	{
37267545Seric 		/* check for a message boundary */
37367545Seric 		bp = buf;
37467545Seric 		c = fgetc(fp);
37567545Seric 		if (c != '-')
37667545Seric 		{
37767545Seric 			if (c != EOF)
37867545Seric 			{
37967545Seric 				*bp = c;
38067545Seric 				buflen++;
38167545Seric 			}
38267545Seric 			return '-';
38367545Seric 		}
38467545Seric 
38567545Seric 		/* got "--", now check for rest of separator */
38667545Seric 		*bp++ = '-';
38767545Seric 		*bp++ = '-';
38867545Seric 		while (bp < &buf[sizeof buf - 1] &&
38967545Seric 		       (c = fgetc(fp)) != EOF && c != '\n')
39067545Seric 		{
39167545Seric 			*bp++ = c;
39267545Seric 		}
39367545Seric 		*bp = '\0';
39467547Seric 		MimeBoundaryType = mimeboundary(buf, boundary);
39567547Seric 		switch (MimeBoundaryType)
39667545Seric 		{
39767545Seric 		  case MBT_FINAL:
39867545Seric 		  case MBT_INTERMED:
39967545Seric 			/* we have a message boundary */
40067545Seric 			buflen = 0;
40167545Seric 			return EOF;
40267545Seric 		}
40367545Seric 
40467545Seric 		atbol = c == '\n';
40567545Seric 		if (c != EOF)
40667545Seric 			*bp++ = c;
40767545Seric 		buflen = bp - buf - 1;
40867545Seric 		bp = buf;
40967545Seric 		return *bp++;
41067545Seric 	}
41167545Seric 
41267545Seric 	atbol = c == '\n';
41367545Seric 	return c;
41467545Seric }
41567545Seric /*
41667545Seric **  MIMEBOUNDARY -- determine if this line is a MIME boundary & its type
41767545Seric **
41867545Seric **	Parameters:
41967545Seric **		line -- the input line.
42067545Seric **		boundary -- the expected boundary.
42167545Seric **
42267545Seric **	Returns:
42367545Seric **		MBT_NOTSEP -- if this is not a separator line
42467545Seric **		MBT_INTERMED -- if this is an intermediate separator
42567545Seric **		MBT_FINAL -- if this is a final boundary
42667545Seric **		MBT_SYNTAX -- if this is a boundary for the wrong
42767545Seric **			enclosure -- i.e., a syntax error.
42867545Seric */
42967545Seric 
43067545Seric int
43167545Seric mimeboundary(line, boundary)
43267545Seric 	register char *line;
43367545Seric 	char *boundary;
43467545Seric {
43567545Seric 	int type;
43667545Seric 	int i;
43767545Seric 
43867545Seric 	if (line[0] != '-' || line[1] != '-' || boundary == NULL)
43967545Seric 		return MBT_NOTSEP;
44067545Seric 	if (tTd(43, 5))
44167545Seric 		printf("mimeboundary: bound=\"%s\", line=\"%s\"... ",
44267545Seric 			boundary, line);
44367545Seric 	i = strlen(line);
44467545Seric 	if (line[i - 1] == '\n')
44567545Seric 		i--;
44667545Seric 	if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0)
44767545Seric 	{
44867545Seric 		type = MBT_FINAL;
44967545Seric 		i -= 2;
45067545Seric 	}
45167545Seric 	else
45267545Seric 		type = MBT_INTERMED;
45367545Seric 
45467545Seric 	/* XXX should check for improper nesting here */
45567545Seric 	if (strncmp(boundary, &line[2], i - 2) != 0 ||
45667545Seric 	    strlen(boundary) != i - 2)
45767545Seric 		type = MBT_NOTSEP;
45867545Seric 	if (tTd(43, 5))
45967545Seric 		printf("%d\n", type);
46067545Seric 	return type;
46167545Seric }
46267896Seric /*
46367896Seric **  DEFCHARSET -- return default character set for message
46467896Seric **
46567896Seric **	The first choice for character set is for the mailer
46667896Seric **	corresponding to the envelope sender.  If neither that
46767896Seric **	nor the global configuration file has a default character
46867896Seric **	set defined, return "unknown-8bit" as recommended by
46967896Seric **	RFC 1428 section 3.
47067896Seric **
47167896Seric **	Parameters:
47267896Seric **		e -- the envelope for this message.
47367896Seric **
47467896Seric **	Returns:
47567896Seric **		The default character set for that mailer.
47667896Seric */
47767896Seric 
47867896Seric char *
47967896Seric defcharset(e)
48067896Seric 	register ENVELOPE *e;
48167896Seric {
48267896Seric 	if (e != NULL && e->e_from.q_mailer != NULL &&
48367896Seric 	    e->e_from.q_mailer->m_defcharset != NULL)
48467896Seric 		return e->e_from.q_mailer->m_defcharset;
48567896Seric 	if (DefaultCharSet != NULL)
48667896Seric 		return DefaultCharSet;
48767896Seric 	return "unknown-8bit";
48867896Seric }
489