xref: /csrg-svn/usr.sbin/sendmail/src/mime.c (revision 67683)
1 /*
2  * Copyright (c) 1994 Eric P. Allman
3  * Copyright (c) 1994
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * %sccs.include.redist.c%
7  */
8 
9 # include "sendmail.h"
10 # include <string.h>
11 
12 #ifndef lint
13 static char sccsid[] = "@(#)mime.c	8.4 (Berkeley) 08/15/94";
14 #endif /* not lint */
15 
16 /*
17 **  MIME support.
18 **
19 **	I am indebted to John Beck of Hewlett-Packard, who contributed
20 **	his code to me for inclusion.  As it turns out, I did not use
21 **	his code since he used a "minimum change" approach that used
22 **	several temp files, and I wanted a "minimum impact" approach
23 **	that would avoid copying.  However, looking over his code
24 **	helped me cement my understanding of the problem.
25 **
26 **	I also looked at, but did not directly use, Nathaniel
27 **	Borenstein's "code.c" module.  Again, it functioned as
28 **	a file-to-file translator, which did not fit within my
29 **	design bounds, but it was a useful base for understanding
30 **	the problem.
31 */
32 
33 
34 /* character set for hex and base64 encoding */
35 char	Base16Code[] =	"0123456789ABCDEF";
36 char	Base64Code[] =	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
37 
38 /* types of MIME boundaries */
39 #define MBT_SYNTAX	0	/* syntax error */
40 #define MBT_NOTSEP	1	/* not a boundary */
41 #define MBT_INTERMED	2	/* intermediate boundary (no trailing --) */
42 #define MBT_FINAL	3	/* final boundary (trailing -- included) */
43 
44 static int	MimeBoundaryType;	/* internal linkage */
45 /*
46 **  MIME8TO7 -- output 8 bit body in 7 bit format
47 **
48 **	The header has already been output -- this has to do the
49 **	8 to 7 bit conversion.  It would be easy if we didn't have
50 **	to deal with nested formats (multipart/xxx and message/rfc822).
51 **
52 **	We won't be called if we don't have to do a conversion, and
53 **	appropriate MIME-Version: and Content-Type: fields have been
54 **	output.  Any Content-Transfer-Encoding: field has not been
55 **	output, and we can add it here.
56 **
57 **	Parameters:
58 **		mci -- mailer connection information.
59 **		header -- the header for this body part.
60 **		e -- envelope.
61 **		boundary -- the message boundary -- NULL if we are
62 **			processing the outer portion.
63 **
64 **	Returns:
65 **		An indicator of what terminated the message part:
66 **		  MBT_FINAL -- the final boundary
67 **		  MBT_INTERMED -- an intermediate boundary
68 **		  MBT_NOTSEP -- an end of file
69 */
70 
71 int
72 mime8to7(mci, header, e, boundary)
73 	register MCI *mci;
74 	HDR *header;
75 	register ENVELOPE *e;
76 	char *boundary;
77 {
78 	register char *p;
79 	int linelen;
80 	int bt;
81 	off_t offset;
82 	size_t sectionsize, sectionhighbits;
83 	char bbuf[128];
84 	char buf[MAXLINE];
85 
86 	if (tTd(43, 1))
87 	{
88 		printf("mime8to7: boundary=%s\n",
89 			boundary == NULL ? "<none>" : boundary);
90 	}
91 	p = hvalue("Content-Type", header);
92 	if (p != NULL && strncasecmp(p, "multipart/", 10) == 0)
93 	{
94 		register char *q;
95 
96 		/* oh dear -- this part is hard */
97 		p = strstr(p, "boundary=");		/*XXX*/
98 		if (p == NULL)
99 		{
100 			syserr("mime8to7: Content-Type: %s missing boundary", p);
101 			p = "---";
102 		}
103 		else
104 			p += 9;
105 		if (*p == '"')
106 			q = strchr(p, '"');
107 		else
108 			q = strchr(p, ',');
109 		if (q == NULL)
110 			q = p + strlen(p);
111 		if (q - p > sizeof bbuf - 1)
112 		{
113 			syserr("mime8to7: multipart boundary \"%.*s\" too long",
114 				q - p, p);
115 			q = p + sizeof bbuf - 1;
116 		}
117 		strncpy(bbuf, p, q - p);
118 		bbuf[q - p] = '\0';
119 		if (tTd(43, 1))
120 		{
121 			printf("mime8to7: multipart boundary \"%s\"\n", bbuf);
122 		}
123 
124 		/* skip the early "comment" prologue */
125 		bt = MBT_FINAL;
126 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
127 		{
128 			bt = mimeboundary(buf, bbuf);
129 			if (bt != MBT_NOTSEP)
130 				break;
131 			putline(buf, mci);
132 		}
133 		while (bt != MBT_FINAL)
134 		{
135 			auto HDR *hdr = NULL;
136 
137 			sprintf(buf, "--%s", bbuf);
138 			putline(buf, mci);
139 			collect(e->e_dfp, FALSE, FALSE, &hdr, e);
140 			putheader(mci, hdr, e);
141 			bt = mime8to7(mci, hdr, e, bbuf);
142 		}
143 		sprintf(buf, "--%s--", bbuf);
144 		putline(buf, mci);
145 
146 		/* skip the late "comment" epilogue */
147 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
148 		{
149 			putline(buf, mci);
150 			bt = mimeboundary(buf, boundary);
151 			if (bt != MBT_NOTSEP)
152 				break;
153 		}
154 		return bt;
155 	}
156 
157 	/*
158 	**  Non-compound body type
159 	**
160 	**	Compute the ratio of seven to eight bit characters;
161 	**	use that as a heuristic to decide how to do the
162 	**	encoding.
163 	*/
164 
165 	/* remember where we were */
166 	offset = ftell(e->e_dfp);
167 	if (offset == -1)
168 		syserr("mime8to7: cannot ftell on %s", e->e_df);
169 
170 	/* do a scan of this body type to count character types */
171 	sectionsize = sectionhighbits = 0;
172 	while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
173 	{
174 		bt = mimeboundary(buf, boundary);
175 		if (bt != MBT_NOTSEP)
176 			break;
177 		for (p = buf; *p != '\0'; p++)
178 		{
179 			/* count bytes with the high bit set */
180 			sectionsize++;
181 			if (bitset(0200, *p))
182 				sectionhighbits++;
183 		}
184 
185 		/*
186 		**  Heuristic: if 1/4 of the first 4K bytes are 8-bit,
187 		**  assume base64.  This heuristic avoids double-reading
188 		**  large graphics or video files.
189 		*/
190 
191 		if (sectionsize >= 4096 && sectionhighbits > sectionsize / 4)
192 			break;
193 	}
194 	if (feof(e->e_dfp))
195 		bt = MBT_FINAL;
196 
197 	/* return to the original offset for processing */
198 	/* XXX use relative seeks to handle >31 bit file sizes? */
199 	if (fseek(e->e_dfp, offset, SEEK_SET) < 0)
200 		syserr("mime8to7: cannot fseek on %s", e->e_df);
201 
202 	/*
203 	**  Heuristically determine encoding method.
204 	**	If more than 1/8 of the total characters have the
205 	**	eighth bit set, use base64; else use quoted-printable.
206 	*/
207 
208 	if (tTd(43, 8))
209 	{
210 		printf("mime8to7: %ld high bits in %ld bytes\n",
211 			sectionhighbits, sectionsize);
212 	}
213 	if (sectionhighbits == 0)
214 	{
215 		/* no encoding necessary */
216 		putline("", mci);
217 		mci->mci_flags &= ~MCIF_INHEADER;
218 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
219 		{
220 			bt = mimeboundary(buf, boundary);
221 			if (bt != MBT_NOTSEP)
222 				break;
223 			if (buf[0] == 'F' &&
224 			    bitnset(M_ESCFROM, mci->mci_mailer->m_flags) &&
225 			    strncmp(buf, "From ", 5) == 0)
226 				(void) putc('>', mci->mci_out);
227 			putline(buf, mci);
228 		}
229 	}
230 	else if (sectionsize / 8 < sectionhighbits)
231 	{
232 		/* use base64 encoding */
233 		int c1, c2;
234 
235 		putline("Content-Transfer-Encoding: base64", mci);
236 		putline("", mci);
237 		mci->mci_flags &= ~MCIF_INHEADER;
238 		linelen = 0;
239 		while ((c1 = mime_getchar(e->e_dfp, boundary)) != EOF)
240 		{
241 			if (linelen > 71)
242 			{
243 				fputs(mci->mci_mailer->m_eol, mci->mci_out);
244 				linelen = 0;
245 			}
246 			linelen += 4;
247 			fputc(Base64Code[c1 >> 2], mci->mci_out);
248 			c1 = (c1 & 0x03) << 4;
249 			c2 = mime_getchar(e->e_dfp, boundary);
250 			if (c2 == EOF)
251 			{
252 				fputc(Base64Code[c1], mci->mci_out);
253 				fputc('=', mci->mci_out);
254 				fputc('=', mci->mci_out);
255 				break;
256 			}
257 			c1 |= (c2 >> 4) & 0x0f;
258 			fputc(Base64Code[c1], mci->mci_out);
259 			c1 = (c2 & 0x0f) << 2;
260 			c2 = mime_getchar(e->e_dfp, boundary);
261 			if (c2 == EOF)
262 			{
263 				fputc(Base64Code[c1], mci->mci_out);
264 				fputc('=', mci->mci_out);
265 				break;
266 			}
267 			c1 |= (c2 >> 6) & 0x03;
268 			fputc(Base64Code[c1], mci->mci_out);
269 			fputc(Base64Code[c2 & 0x3f], mci->mci_out);
270 		}
271 	}
272 	else
273 	{
274 		/* use quoted-printable encoding */
275 		int c1, c2;
276 
277 		putline("Content-Transfer-Encoding: quoted-printable", mci);
278 		putline("", mci);
279 		mci->mci_flags &= ~MCIF_INHEADER;
280 		linelen = 0;
281 		c2 = '\n';
282 		while ((c1 = mime_getchar(e->e_dfp, boundary)) != EOF)
283 		{
284 			if (c1 == '\n')
285 			{
286 				if (c2 == ' ' || c2 == '\t')
287 				{
288 					fputc('=', mci->mci_out);
289 					fputs(mci->mci_mailer->m_eol, mci->mci_out);
290 				}
291 				fputs(mci->mci_mailer->m_eol, mci->mci_out);
292 				linelen = 0;
293 				c2 = c1;
294 				continue;
295 			}
296 			else if (c2 == '\n' && c1 == '.' &&
297 				 bitnset(M_XDOT, mci->mci_mailer->m_flags))
298 			{
299 				fputc('.', mci->mci_out);
300 				linelen++;
301 			}
302 			if (linelen > 72)
303 			{
304 				fputc('=', mci->mci_out);
305 				fputs(mci->mci_mailer->m_eol, mci->mci_out);
306 				linelen = 0;
307 				c2 = '\n';
308 			}
309 			if ((c1 < 0x20 && c1 != '\t') || c1 >= 0x7f || c1 == '=')
310 			{
311 				fputc('=', mci->mci_out);
312 				fputc(Base16Code[(c1 >> 4) & 0x0f], mci->mci_out);
313 				fputc(Base16Code[c1 & 0x0f], mci->mci_out);
314 				linelen += 3;
315 			}
316 			else
317 			{
318 				fputc(c1, mci->mci_out);
319 				linelen++;
320 			}
321 			c2 = c1;
322 		}
323 	}
324 	if (linelen > 0)
325 		fputs(mci->mci_mailer->m_eol, mci->mci_out);
326 	return MimeBoundaryType;
327 }
328 
329 
330 int
331 mime_getchar(fp, boundary)
332 	register FILE *fp;
333 	char *boundary;
334 {
335 	int c;
336 	static char *bp = NULL;
337 	static int buflen = 0;
338 	static bool atbol = TRUE;	/* at beginning of line */
339 	static char buf[128];		/* need not be a full line */
340 
341 	if (buflen > 0)
342 	{
343 		buflen--;
344 		return *bp++;
345 	}
346 	c = fgetc(fp);
347 	if (atbol && c == '-' && boundary != NULL)
348 	{
349 		/* check for a message boundary */
350 		bp = buf;
351 		c = fgetc(fp);
352 		if (c != '-')
353 		{
354 			if (c != EOF)
355 			{
356 				*bp = c;
357 				buflen++;
358 			}
359 			return '-';
360 		}
361 
362 		/* got "--", now check for rest of separator */
363 		*bp++ = '-';
364 		*bp++ = '-';
365 		while (bp < &buf[sizeof buf - 1] &&
366 		       (c = fgetc(fp)) != EOF && c != '\n')
367 		{
368 			*bp++ = c;
369 		}
370 		*bp = '\0';
371 		MimeBoundaryType = mimeboundary(buf, boundary);
372 		switch (MimeBoundaryType)
373 		{
374 		  case MBT_FINAL:
375 		  case MBT_INTERMED:
376 			/* we have a message boundary */
377 			buflen = 0;
378 			return EOF;
379 		}
380 
381 		atbol = c == '\n';
382 		if (c != EOF)
383 			*bp++ = c;
384 		buflen = bp - buf - 1;
385 		bp = buf;
386 		return *bp++;
387 	}
388 
389 	atbol = c == '\n';
390 	return c;
391 }
392 /*
393 **  MIMEBOUNDARY -- determine if this line is a MIME boundary & its type
394 **
395 **	Parameters:
396 **		line -- the input line.
397 **		boundary -- the expected boundary.
398 **
399 **	Returns:
400 **		MBT_NOTSEP -- if this is not a separator line
401 **		MBT_INTERMED -- if this is an intermediate separator
402 **		MBT_FINAL -- if this is a final boundary
403 **		MBT_SYNTAX -- if this is a boundary for the wrong
404 **			enclosure -- i.e., a syntax error.
405 */
406 
407 int
408 mimeboundary(line, boundary)
409 	register char *line;
410 	char *boundary;
411 {
412 	int type;
413 	int i;
414 
415 	if (line[0] != '-' || line[1] != '-' || boundary == NULL)
416 		return MBT_NOTSEP;
417 	if (tTd(43, 5))
418 		printf("mimeboundary: bound=\"%s\", line=\"%s\"... ",
419 			boundary, line);
420 	i = strlen(line);
421 	if (line[i - 1] == '\n')
422 		i--;
423 	if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0)
424 	{
425 		type = MBT_FINAL;
426 		i -= 2;
427 	}
428 	else
429 		type = MBT_INTERMED;
430 
431 	/* XXX should check for improper nesting here */
432 	if (strncmp(boundary, &line[2], i - 2) != 0 ||
433 	    strlen(boundary) != i - 2)
434 		type = MBT_NOTSEP;
435 	if (tTd(43, 5))
436 		printf("%d\n", type);
437 	return type;
438 }
439