xref: /csrg-svn/usr.sbin/sendmail/src/mime.c (revision 67547)
1 /*
2  * Copyright (c) 1994 Eric P. Allman
3  * Copyright (c) 1994
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * %sccs.include.redist.c%
7  */
8 
9 # include "sendmail.h"
10 # include <string.h>
11 
12 #ifndef lint
13 static char sccsid[] = "@(#)mime.c	8.2 (Berkeley) 07/23/94";
14 #endif /* not lint */
15 
16 /*
17 **  MIME support.
18 **
19 **	I am indebted to John Beck of Hewlett-Packard, who contributed
20 **	his code to me for inclusion.  As it turns out, I did not use
21 **	his code since he used a "minimum change" approach that used
22 **	several temp files, and I wanted a "minimum impact" approach
23 **	that would avoid copying.  However, looking over his code
24 **	helped me cement my understanding of the problem.
25 **
26 **	I also looked at, but did not directly use, Nathaniel
27 **	Borenstein's "code.c" module.  Again, it functioned as
28 **	a file-to-file translator, which did not fit within my
29 **	design bounds, but it was a useful base for understanding
30 **	the problem.
31 */
32 
33 
34 /* character set for hex and base64 encoding */
35 char	Base16Code[] =	"0123456789ABCDEF";
36 char	Base64Code[] =	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
37 
38 /* types of MIME boundaries */
39 #define MBT_SYNTAX	0	/* syntax error */
40 #define MBT_NOTSEP	1	/* not a boundary */
41 #define MBT_INTERMED	2	/* intermediate boundary (no trailing --) */
42 #define MBT_FINAL	3	/* final boundary (trailing -- included) */
43 
44 static int	MimeBoundaryType;	/* internal linkage */
45 /*
46 **  MIME8TO7 -- output 8 bit body in 7 bit format
47 **
48 **	The header has already been output -- this has to do the
49 **	8 to 7 bit conversion.  It would be easy if we didn't have
50 **	to deal with nested formats (multipart/xxx and message/rfc822).
51 **
52 **	We won't be called if we don't have to do a conversion, and
53 **	appropriate MIME-Version: and Content-Type: fields have been
54 **	output.  Any Content-Transfer-Encoding: field has not been
55 **	output, and we can add it here.
56 **
57 **	Parameters:
58 **		mci -- mailer connection information.
59 **		header -- the header for this body part.
60 **		e -- envelope.
61 **		boundary -- the message boundary -- NULL if we are
62 **			processing the outer portion.
63 **
64 **	Returns:
65 **		An indicator of what terminated the message part:
66 **		  MBT_FINAL -- the final boundary
67 **		  MBT_INTERMED -- an intermediate boundary
68 **		  MBT_NOTSEP -- an end of file
69 */
70 
71 int
72 mime8to7(mci, header, e, boundary)
73 	register MCI *mci;
74 	HDR *header;
75 	register ENVELOPE *e;
76 	char *boundary;
77 {
78 	register char *p;
79 	int linelen;
80 	int bt;
81 	off_t offset;
82 	size_t sectionsize, sectionhighbits;
83 	char bbuf[128];
84 	char buf[MAXLINE];
85 	extern char *hvalue();
86 
87 	if (tTd(43, 1))
88 	{
89 		printf("mime8to7: boundary=%s\n",
90 			boundary == NULL ? "<none>" : boundary);
91 	}
92 	p = hvalue("Content-Type", header);
93 	if (p != NULL && strncasecmp(p, "multipart/", 10) == 0)
94 	{
95 		register char *q;
96 
97 		/* oh dear -- this part is hard */
98 		p = strstr(p, "boundary=");		/*XXX*/
99 		if (p == NULL)
100 		{
101 			syserr("mime8to7: Content-Type: %s missing boundary", p);
102 			p = "---";
103 		}
104 		else
105 			p += 9;
106 		if (*p == '"')
107 			q = strchr(p, '"');
108 		else
109 			q = strchr(p, ',');
110 		if (q == NULL)
111 			q = p + strlen(p);
112 		if (q - p > sizeof bbuf - 1)
113 		{
114 			syserr("mime8to7: multipart boundary \"%.*s\" too long",
115 				q - p, p);
116 			q = p + sizeof bbuf - 1;
117 		}
118 		strncpy(bbuf, p, q - p);
119 		bbuf[q - p] = '\0';
120 		if (tTd(43, 1))
121 		{
122 			printf("mime8to7: multipart boundary \"%s\"\n", bbuf);
123 		}
124 
125 		/* skip the early "comment" prologue */
126 		bt = MBT_FINAL;
127 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
128 		{
129 			bt = mimeboundary(buf, bbuf);
130 			if (bt != MBT_NOTSEP)
131 				break;
132 			putline(buf, mci);
133 		}
134 		while (bt != MBT_FINAL)
135 		{
136 			auto HDR *hdr = NULL;
137 
138 			sprintf(buf, "--%s", bbuf);
139 			putline(buf, mci);
140 			collect(e->e_dfp, FALSE, FALSE, &hdr, e);
141 			putheader(mci, hdr, e);
142 			bt = mime8to7(mci, hdr, e, bbuf);
143 		}
144 		sprintf(buf, "--%s--", bbuf);
145 		putline(buf, mci);
146 
147 		/* skip the late "comment" epilogue */
148 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
149 		{
150 			putline(buf, mci);
151 			bt = mimeboundary(buf, boundary);
152 			if (bt != MBT_NOTSEP)
153 				break;
154 		}
155 		return bt;
156 	}
157 
158 	/*
159 	**  Non-compound body type
160 	**
161 	**	Compute the ratio of seven to eight bit characters;
162 	**	use that as a heuristic to decide how to do the
163 	**	encoding.
164 	*/
165 
166 	/* remember where we were */
167 	offset = ftell(e->e_dfp);
168 	if (offset == -1)
169 		syserr("mime8to7: cannot ftell on %s", e->e_df);
170 
171 	/* do a scan of this body type to count character types */
172 	sectionsize = sectionhighbits = 0;
173 	while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
174 	{
175 		bt = mimeboundary(buf, boundary);
176 		if (bt != MBT_NOTSEP)
177 			break;
178 		for (p = buf; *p != '\0'; p++)
179 		{
180 			/* count bytes with the high bit set */
181 			/* XXX should this count any character that will */
182 			/* XXX have to be encoded in quoted-printable? */
183 			sectionsize++;
184 			if (bitset(0200, *p))
185 				sectionhighbits++;
186 		}
187 
188 		/*
189 		**  Heuristic: if 1/4 of the first 4K bytes are 8-bit,
190 		**  assume base64.  This heuristic avoids double-reading
191 		**  large graphics or video files.
192 		*/
193 
194 		if (sectionsize >= 4096 && sectionhighbits > sectionsize / 4)
195 			break;
196 	}
197 	if (feof(e->e_dfp))
198 		bt = MBT_FINAL;
199 
200 	/* return to the original offset for processing */
201 	/* XXX use relative seeks to handle >31 bit file sizes? */
202 	if (fseek(e->e_dfp, offset, SEEK_SET) < 0)
203 		syserr("mime8to7: cannot fseek on %s", e->e_df);
204 
205 	/*
206 	**  Heuristically determine encoding method.
207 	**	If more than 1/8 of the total characters have the
208 	**	eighth bit set, use base64; else use quoted-printable.
209 	*/
210 
211 	if (tTd(43, 8))
212 	{
213 		printf("mime8to7: %ld high bits in %ld bytes\n",
214 			sectionhighbits, sectionsize);
215 	}
216 	if (sectionsize / 8 < sectionhighbits)
217 	{
218 		/* use base64 encoding */
219 		int c1, c2;
220 
221 		putline("Content-Transfer-Encoding: base64", mci);
222 		putline("", mci);
223 		mci->mci_flags &= ~MCIF_INHEADER;
224 		linelen = 0;
225 		while ((c1 = mime_getchar(e->e_dfp, boundary)) != EOF)
226 		{
227 			if (linelen > 71)
228 			{
229 				fputs(mci->mci_mailer->m_eol, mci->mci_out);
230 				linelen = 0;
231 			}
232 			linelen += 4;
233 			fputc(Base64Code[c1 >> 2], mci->mci_out);
234 			c1 = (c1 & 0x03) << 4;
235 			c2 = mime_getchar(e->e_dfp, boundary);
236 			if (c2 == EOF)
237 			{
238 				fputc(Base64Code[c1], mci->mci_out);
239 				fputc('=', mci->mci_out);
240 				fputc('=', mci->mci_out);
241 				break;
242 			}
243 			c1 |= (c2 >> 4) & 0x0f;
244 			fputc(Base64Code[c1], mci->mci_out);
245 			c1 = (c2 & 0x0f) << 2;
246 			c2 = mime_getchar(e->e_dfp, boundary);
247 			if (c2 == EOF)
248 			{
249 				fputc(Base64Code[c1], mci->mci_out);
250 				fputc('=', mci->mci_out);
251 				break;
252 			}
253 			c1 |= (c2 >> 6) & 0x03;
254 			fputc(Base64Code[c1], mci->mci_out);
255 			fputc(Base64Code[c2 & 0x3f], mci->mci_out);
256 		}
257 	}
258 	else
259 	{
260 		/* use quoted-printable encoding */
261 		int c1, c2;
262 
263 		putline("Content-Transfer-Encoding: quoted-printable", mci);
264 		putline("", mci);
265 		mci->mci_flags &= ~MCIF_INHEADER;
266 		linelen = 0;
267 		c2 = EOF;
268 		while ((c1 = mime_getchar(e->e_dfp, boundary)) != EOF)
269 		{
270 			if (c1 == '\n')
271 			{
272 				if (c2 == ' ' || c2 == '\t')
273 				{
274 					fputc('=', mci->mci_out);
275 					fputs(mci->mci_mailer->m_eol, mci->mci_out);
276 				}
277 				fputs(mci->mci_mailer->m_eol, mci->mci_out);
278 				linelen = 0;
279 				c2 = c1;
280 				continue;
281 			}
282 			if (linelen > 72)
283 			{
284 				fputc('=', mci->mci_out);
285 				fputs(mci->mci_mailer->m_eol, mci->mci_out);
286 				linelen = 0;
287 			}
288 			if ((c1 < 0x20 && c1 != '\t') || c1 >= 0x7f || c1 == '=')
289 			{
290 				fputc('=', mci->mci_out);
291 				fputc(Base16Code[(c1 >> 4) & 0x0f], mci->mci_out);
292 				fputc(Base16Code[c1 & 0x0f], mci->mci_out);
293 				linelen += 3;
294 			}
295 			else
296 			{
297 				fputc(c1, mci->mci_out);
298 				linelen++;
299 			}
300 			c2 = c1;
301 		}
302 	}
303 	if (linelen > 0)
304 		fputs(mci->mci_mailer->m_eol, mci->mci_out);
305 	return MimeBoundaryType;
306 }
307 
308 
309 int
310 mime_getchar(fp, boundary)
311 	register FILE *fp;
312 	char *boundary;
313 {
314 	int c;
315 	static char *bp = NULL;
316 	static int buflen = 0;
317 	static bool atbol = TRUE;	/* at beginning of line */
318 	static char buf[128];		/* need not be a full line */
319 
320 	if (buflen > 0)
321 	{
322 		buflen--;
323 		return *bp++;
324 	}
325 	c = fgetc(fp);
326 	if (atbol && c == '-' && boundary != NULL)
327 	{
328 		/* check for a message boundary */
329 		bp = buf;
330 		c = fgetc(fp);
331 		if (c != '-')
332 		{
333 			if (c != EOF)
334 			{
335 				*bp = c;
336 				buflen++;
337 			}
338 			return '-';
339 		}
340 
341 		/* got "--", now check for rest of separator */
342 		*bp++ = '-';
343 		*bp++ = '-';
344 		while (bp < &buf[sizeof buf - 1] &&
345 		       (c = fgetc(fp)) != EOF && c != '\n')
346 		{
347 			*bp++ = c;
348 		}
349 		*bp = '\0';
350 		MimeBoundaryType = mimeboundary(buf, boundary);
351 		switch (MimeBoundaryType)
352 		{
353 		  case MBT_FINAL:
354 		  case MBT_INTERMED:
355 			/* we have a message boundary */
356 			buflen = 0;
357 			return EOF;
358 		}
359 
360 		atbol = c == '\n';
361 		if (c != EOF)
362 			*bp++ = c;
363 		buflen = bp - buf - 1;
364 		bp = buf;
365 		return *bp++;
366 	}
367 	else if (atbol && c == '.')
368 	{
369 		/* implement hidden dot algorithm */
370 		bp = buf;
371 		*bp = c;
372 		buflen = 1;
373 		c = fgetc(fp);
374 		if (c != '\n')
375 			return '.';
376 		atbol = TRUE;
377 		buf[0] = '.';
378 		buf[1] = '\n';
379 		buflen = 2;
380 		return '.';
381 	}
382 
383 	atbol = c == '\n';
384 	return c;
385 }
386 /*
387 **  MIMEBOUNDARY -- determine if this line is a MIME boundary & its type
388 **
389 **	Parameters:
390 **		line -- the input line.
391 **		boundary -- the expected boundary.
392 **
393 **	Returns:
394 **		MBT_NOTSEP -- if this is not a separator line
395 **		MBT_INTERMED -- if this is an intermediate separator
396 **		MBT_FINAL -- if this is a final boundary
397 **		MBT_SYNTAX -- if this is a boundary for the wrong
398 **			enclosure -- i.e., a syntax error.
399 */
400 
401 int
402 mimeboundary(line, boundary)
403 	register char *line;
404 	char *boundary;
405 {
406 	int type;
407 	int i;
408 
409 	if (line[0] != '-' || line[1] != '-' || boundary == NULL)
410 		return MBT_NOTSEP;
411 	if (tTd(43, 5))
412 		printf("mimeboundary: bound=\"%s\", line=\"%s\"... ",
413 			boundary, line);
414 	i = strlen(line);
415 	if (line[i - 1] == '\n')
416 		i--;
417 	if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0)
418 	{
419 		type = MBT_FINAL;
420 		i -= 2;
421 	}
422 	else
423 		type = MBT_INTERMED;
424 
425 	/* XXX should check for improper nesting here */
426 	if (strncmp(boundary, &line[2], i - 2) != 0 ||
427 	    strlen(boundary) != i - 2)
428 		type = MBT_NOTSEP;
429 	if (tTd(43, 5))
430 		printf("%d\n", type);
431 	return type;
432 }
433