xref: /csrg-svn/usr.sbin/sendmail/src/mime.c (revision 68711)
1 /*
2  * Copyright (c) 1994 Eric P. Allman
3  * Copyright (c) 1994
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * %sccs.include.redist.c%
7  */
8 
9 # include "sendmail.h"
10 # include <string.h>
11 
12 #ifndef lint
13 static char sccsid[] = "@(#)mime.c	8.13 (Berkeley) 04/02/95";
14 #endif /* not lint */
15 
16 /*
17 **  MIME support.
18 **
19 **	I am indebted to John Beck of Hewlett-Packard, who contributed
20 **	his code to me for inclusion.  As it turns out, I did not use
21 **	his code since he used a "minimum change" approach that used
22 **	several temp files, and I wanted a "minimum impact" approach
23 **	that would avoid copying.  However, looking over his code
24 **	helped me cement my understanding of the problem.
25 **
26 **	I also looked at, but did not directly use, Nathaniel
27 **	Borenstein's "code.c" module.  Again, it functioned as
28 **	a file-to-file translator, which did not fit within my
29 **	design bounds, but it was a useful base for understanding
30 **	the problem.
31 */
32 
33 
34 /* character set for hex and base64 encoding */
35 char	Base16Code[] =	"0123456789ABCDEF";
36 char	Base64Code[] =	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
37 
38 /* types of MIME boundaries */
39 #define MBT_SYNTAX	0	/* syntax error */
40 #define MBT_NOTSEP	1	/* not a boundary */
41 #define MBT_INTERMED	2	/* intermediate boundary (no trailing --) */
42 #define MBT_FINAL	3	/* final boundary (trailing -- included) */
43 
44 static int	MimeBoundaryType;	/* internal linkage */
45 /*
46 **  MIME8TO7 -- output 8 bit body in 7 bit format
47 **
48 **	The header has already been output -- this has to do the
49 **	8 to 7 bit conversion.  It would be easy if we didn't have
50 **	to deal with nested formats (multipart/xxx and message/rfc822).
51 **
52 **	We won't be called if we don't have to do a conversion, and
53 **	appropriate MIME-Version: and Content-Type: fields have been
54 **	output.  Any Content-Transfer-Encoding: field has not been
55 **	output, and we can add it here.
56 **
57 **	Parameters:
58 **		mci -- mailer connection information.
59 **		header -- the header for this body part.
60 **		e -- envelope.
61 **		boundaries -- the currently pending message boundaries.
62 **			NULL if we are processing the outer portion.
63 **		flags -- to tweak processing.
64 **
65 **	Returns:
66 **		An indicator of what terminated the message part:
67 **		  MBT_FINAL -- the final boundary
68 **		  MBT_INTERMED -- an intermediate boundary
69 **		  MBT_NOTSEP -- an end of file
70 */
71 
72 struct args
73 {
74 	char	*field;		/* name of field */
75 	char	*value;		/* value of that field */
76 };
77 
78 int
79 mime8to7(mci, header, e, boundaries, flags)
80 	register MCI *mci;
81 	HDR *header; register ENVELOPE *e;
82 	char **boundaries;
83 	int flags;
84 {
85 	register char *p;
86 	int linelen;
87 	int bt;
88 	off_t offset;
89 	size_t sectionsize, sectionhighbits;
90 	int i;
91 	char *type;
92 	char *subtype;
93 	char **pvp;
94 	int argc = 0;
95 	struct args argv[MAXMIMEARGS];
96 	char bbuf[128];
97 	char buf[MAXLINE];
98 	char pvpbuf[MAXLINE];
99 	extern char MimeTokenTab[256];
100 
101 	if (tTd(43, 1))
102 	{
103 		printf("mime8to7: boundary=%s\n",
104 			boundaries[0] == NULL ? "<none>" : boundaries[0]);
105 		for (i = 1; boundaries[i] != NULL; i++)
106 			printf("\t%s\n", boundaries[i]);
107 	}
108 	type = subtype = "-none-";
109 	p = hvalue("Content-Type", header);
110 	if (p != NULL &&
111 	    (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL,
112 			   MimeTokenTab)) != NULL &&
113 	    pvp[0] != NULL)
114 	{
115 		type = *pvp++;
116 		if (*pvp != NULL && strcmp(*pvp, "/") == 0 &&
117 		    *++pvp != NULL)
118 		{
119 			subtype = *pvp++;
120 		}
121 
122 		/* break out parameters */
123 		while (*pvp != NULL && argc < MAXMIMEARGS)
124 		{
125 			/* skip to semicolon separator */
126 			while (*pvp != NULL && strcmp(*pvp, ";") != 0)
127 				pvp++;
128 			if (*pvp++ == NULL || *pvp == NULL)
129 				break;
130 
131 			/* extract field name */
132 			argv[argc].field = *pvp++;
133 
134 			/* see if there is a value */
135 			if (*pvp != NULL && strcmp(*pvp, "=") == 0 &&
136 			    (*++pvp == NULL || strcmp(*pvp, ";") != 0))
137 			{
138 				argv[argc].value = *pvp;
139 				argc++;
140 			}
141 		}
142 	}
143 	if (strcasecmp(type, "multipart") == 0)
144 	{
145 		register char *q;
146 
147 		for (i = 0; i < argc; i++)
148 		{
149 			if (strcasecmp(argv[i].field, "boundary") == 0)
150 				break;
151 		}
152 		if (i >= argc)
153 		{
154 			syserr("mime8to7: Content-Type: %s missing boundary", p);
155 			p = "---";
156 		}
157 		else
158 			p = argv[i].value;
159 		if (*p == '"')
160 			q = strchr(++p, '"');
161 		else
162 			q = p + strlen(p);
163 		if (q - p > sizeof bbuf - 1)
164 		{
165 			syserr("mime8to7: multipart boundary \"%.*s\" too long",
166 				q - p, p);
167 			q = p + sizeof bbuf - 1;
168 		}
169 		strncpy(bbuf, p, q - p);
170 		bbuf[q - p] = '\0';
171 		if (tTd(43, 1))
172 		{
173 			printf("mime8to7: multipart boundary \"%s\"\n", bbuf);
174 		}
175 		for (i = 0; i < MAXMIMENESTING; i++)
176 			if (boundaries[i] == NULL)
177 				break;
178 		if (i >= MAXMIMENESTING)
179 			syserr("mime8to7: multipart nesting boundary too deep");
180 		else
181 		{
182 			boundaries[i] = bbuf;
183 			boundaries[i + 1] = NULL;
184 		}
185 
186 		/* flag subtypes that can't have any 8-bit data */
187 		if (strcasecmp(subtype, "signed") == 0)
188 			flags |= M87F_NO8BIT;
189 
190 		/* skip the early "comment" prologue */
191 		bt = MBT_FINAL;
192 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
193 		{
194 			bt = mimeboundary(buf, boundaries);
195 			if (bt != MBT_NOTSEP)
196 				break;
197 			putline(buf, mci);
198 		}
199 		while (bt != MBT_FINAL)
200 		{
201 			auto HDR *hdr = NULL;
202 
203 			sprintf(buf, "--%s", bbuf);
204 			putline(buf, mci);
205 			collect(e->e_dfp, FALSE, FALSE, &hdr, e);
206 			putheader(mci, hdr, e, 0);
207 			bt = mime8to7(mci, hdr, e, boundaries, flags);
208 		}
209 		sprintf(buf, "--%s--", bbuf);
210 		putline(buf, mci);
211 
212 		/* skip the late "comment" epilogue */
213 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
214 		{
215 			putline(buf, mci);
216 			bt = mimeboundary(buf, boundaries);
217 			if (bt != MBT_NOTSEP)
218 				break;
219 		}
220 		boundaries[i] = NULL;
221 		return bt;
222 	}
223 
224 	/*
225 	**  Non-compound body type
226 	**
227 	**	Compute the ratio of seven to eight bit characters;
228 	**	use that as a heuristic to decide how to do the
229 	**	encoding.
230 	*/
231 
232 	/* handle types that cannot have 8-bit data internally */
233 	sprintf(buf, "%s/%s", type, subtype);
234 	if (wordinclass(buf, 'n'))
235 		flags |= M87F_NO8BIT;
236 
237 	sectionsize = sectionhighbits = 0;
238 	if (!bitset(M87F_NO8BIT, flags))
239 	{
240 		/* remember where we were */
241 		offset = ftell(e->e_dfp);
242 		if (offset == -1)
243 			syserr("mime8to7: cannot ftell on df%s", e->e_id);
244 
245 		/* do a scan of this body type to count character types */
246 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
247 		{
248 			bt = mimeboundary(buf, boundaries);
249 			if (bt != MBT_NOTSEP)
250 				break;
251 			for (p = buf; *p != '\0'; p++)
252 			{
253 				/* count bytes with the high bit set */
254 				sectionsize++;
255 				if (bitset(0200, *p))
256 					sectionhighbits++;
257 			}
258 
259 			/*
260 			**  Heuristic: if 1/4 of the first 4K bytes are 8-bit,
261 			**  assume base64.  This heuristic avoids double-reading
262 			**  large graphics or video files.
263 			*/
264 
265 			if (sectionsize >= 4096 &&
266 			    sectionhighbits > sectionsize / 4)
267 				break;
268 		}
269 		if (feof(e->e_dfp))
270 			bt = MBT_FINAL;
271 
272 		/* return to the original offset for processing */
273 		/* XXX use relative seeks to handle >31 bit file sizes? */
274 		if (fseek(e->e_dfp, offset, SEEK_SET) < 0)
275 			syserr("mime8to7: cannot fseek on df%s", e->e_id);
276 	}
277 
278 	/*
279 	**  Heuristically determine encoding method.
280 	**	If more than 1/8 of the total characters have the
281 	**	eighth bit set, use base64; else use quoted-printable.
282 	*/
283 
284 	if (tTd(43, 8))
285 	{
286 		printf("mime8to7: %ld high bits in %ld bytes\n",
287 			sectionhighbits, sectionsize);
288 	}
289 	if (sectionhighbits == 0)
290 	{
291 		/* no encoding necessary */
292 		p = hvalue("content-transfer-encoding", header);
293 		if (p != NULL)
294 		{
295 			sprintf(buf, "Content-Transfer-Encoding: %s", p);
296 			putline(buf, mci);
297 		}
298 		putline("", mci);
299 		mci->mci_flags &= ~MCIF_INHEADER;
300 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
301 		{
302 			bt = mimeboundary(buf, boundaries);
303 			if (bt != MBT_NOTSEP)
304 				break;
305 			if (buf[0] == 'F' &&
306 			    bitnset(M_ESCFROM, mci->mci_mailer->m_flags) &&
307 			    strncmp(buf, "From ", 5) == 0)
308 				(void) putc('>', mci->mci_out);
309 			putline(buf, mci);
310 		}
311 	}
312 	else if (sectionsize / 8 < sectionhighbits)
313 	{
314 		/* use base64 encoding */
315 		int c1, c2;
316 
317 		putline("Content-Transfer-Encoding: base64", mci);
318 		putline("", mci);
319 		mci->mci_flags &= ~MCIF_INHEADER;
320 		linelen = 0;
321 		while ((c1 = mime_getchar(e->e_dfp, boundaries)) != EOF)
322 		{
323 			if (linelen > 71)
324 			{
325 				fputs(mci->mci_mailer->m_eol, mci->mci_out);
326 				linelen = 0;
327 			}
328 			linelen += 4;
329 			fputc(Base64Code[c1 >> 2], mci->mci_out);
330 			c1 = (c1 & 0x03) << 4;
331 			c2 = mime_getchar(e->e_dfp, boundaries);
332 			if (c2 == EOF)
333 			{
334 				fputc(Base64Code[c1], mci->mci_out);
335 				fputc('=', mci->mci_out);
336 				fputc('=', mci->mci_out);
337 				break;
338 			}
339 			c1 |= (c2 >> 4) & 0x0f;
340 			fputc(Base64Code[c1], mci->mci_out);
341 			c1 = (c2 & 0x0f) << 2;
342 			c2 = mime_getchar(e->e_dfp, boundaries);
343 			if (c2 == EOF)
344 			{
345 				fputc(Base64Code[c1], mci->mci_out);
346 				fputc('=', mci->mci_out);
347 				break;
348 			}
349 			c1 |= (c2 >> 6) & 0x03;
350 			fputc(Base64Code[c1], mci->mci_out);
351 			fputc(Base64Code[c2 & 0x3f], mci->mci_out);
352 		}
353 	}
354 	else
355 	{
356 		/* use quoted-printable encoding */
357 		int c1, c2;
358 		int fromstate;
359 
360 		putline("Content-Transfer-Encoding: quoted-printable", mci);
361 		putline("", mci);
362 		mci->mci_flags &= ~MCIF_INHEADER;
363 		linelen = fromstate = 0;
364 		c2 = '\n';
365 		while ((c1 = mime_getchar(e->e_dfp, boundaries)) != EOF)
366 		{
367 			if (c1 == '\n')
368 			{
369 				if (c2 == ' ' || c2 == '\t')
370 				{
371 					fputc('=', mci->mci_out);
372 					fputc(Base16Code[(c2 >> 4) & 0x0f],
373 								mci->mci_out);
374 					fputc(Base16Code[c2 & 0x0f],
375 								mci->mci_out);
376 					fputs(mci->mci_mailer->m_eol,
377 								mci->mci_out);
378 				}
379 				fputs(mci->mci_mailer->m_eol, mci->mci_out);
380 				linelen = fromstate = 0;
381 				c2 = c1;
382 				continue;
383 			}
384 			if (c2 == ' ' && linelen == 4 && fromstate == 4 &&
385 			    bitnset(M_ESCFROM, mci->mci_mailer->m_flags))
386 			{
387 				fputs("=20", mci->mci_out);
388 				linelen += 3;
389 			}
390 			else if (c2 == ' ' || c2 == '\t')
391 			{
392 				fputc(c2, mci->mci_out);
393 				linelen++;
394 			}
395 			if (linelen > 72)
396 			{
397 				fputc('=', mci->mci_out);
398 				fputs(mci->mci_mailer->m_eol, mci->mci_out);
399 				linelen = fromstate = 0;
400 				c2 = '\n';
401 			}
402 			if (c2 == '\n' && c1 == '.' &&
403 				 bitnset(M_XDOT, mci->mci_mailer->m_flags))
404 			{
405 				fputc('.', mci->mci_out);
406 				linelen++;
407 			}
408 			if ((c1 < 0x20 && c1 != '\t') || c1 >= 0x7f || c1 == '=')
409 			{
410 				fputc('=', mci->mci_out);
411 				fputc(Base16Code[(c1 >> 4) & 0x0f], mci->mci_out);
412 				fputc(Base16Code[c1 & 0x0f], mci->mci_out);
413 				linelen += 3;
414 			}
415 			else if (c1 != ' ' && c1 != '\t')
416 			{
417 				if (linelen < 4 && c1 == "From"[linelen])
418 					fromstate++;
419 				fputc(c1, mci->mci_out);
420 				linelen++;
421 			}
422 			c2 = c1;
423 		}
424 
425 		/* output any saved character */
426 		if (c2 == ' ' || c2 == '\t')
427 		{
428 			fputc('=', mci->mci_out);
429 			fputc(Base16Code[(c2 >> 4) & 0x0f], mci->mci_out);
430 			fputc(Base16Code[c2 & 0x0f], mci->mci_out);
431 			linelen += 3;
432 		}
433 	}
434 	if (linelen > 0)
435 		fputs(mci->mci_mailer->m_eol, mci->mci_out);
436 	return MimeBoundaryType;
437 }
438 /*
439 **  MIME_GETCHAR -- get a character for MIME processing
440 **
441 **	Treats boundaries as EOF.
442 **
443 **	Parameters:
444 **		fp -- the input file.
445 **		boundaries -- the current MIME boundaries.
446 **
447 **	Returns:
448 **		The next character in the input stream.
449 */
450 
451 int
452 mime_getchar(fp, boundaries)
453 	register FILE *fp;
454 	char **boundaries;
455 {
456 	int c;
457 	static char *bp = NULL;
458 	static int buflen = 0;
459 	static bool atbol = TRUE;	/* at beginning of line */
460 	static char buf[128];		/* need not be a full line */
461 
462 	if (buflen > 0)
463 	{
464 		buflen--;
465 		return *bp++;
466 	}
467 	bp = buf;
468 	buflen = 0;
469 	c = fgetc(fp);
470 	if (c == '\n')
471 	{
472 		/* might be part of a MIME boundary */
473 		*bp++ = c;
474 		atbol = TRUE;
475 		c = fgetc(fp);
476 	}
477 	if (c != EOF)
478 		*bp++ = c;
479 	if (atbol && c == '-')
480 	{
481 		/* check for a message boundary */
482 		c = fgetc(fp);
483 		if (c != '-')
484 		{
485 			if (c != EOF)
486 				*bp++ = c;
487 			buflen = bp - buf - 1;
488 			bp = buf;
489 			return *bp++;
490 		}
491 
492 		/* got "--", now check for rest of separator */
493 		*bp++ = '-';
494 		while (bp < &buf[sizeof buf - 1] &&
495 		       (c = fgetc(fp)) != EOF && c != '\n')
496 		{
497 			*bp++ = c;
498 		}
499 		*bp = '\0';
500 		MimeBoundaryType = mimeboundary(buf, boundaries);
501 		switch (MimeBoundaryType)
502 		{
503 		  case MBT_FINAL:
504 		  case MBT_INTERMED:
505 			/* we have a message boundary */
506 			buflen = 0;
507 			return EOF;
508 		}
509 
510 		atbol = c == '\n';
511 		if (c != EOF)
512 			*bp++ = c;
513 	}
514 
515 	buflen = bp - buf - 1;
516 	if (buflen < 0)
517 		return EOF;
518 	bp = buf;
519 	return *bp++;
520 }
521 /*
522 **  MIMEBOUNDARY -- determine if this line is a MIME boundary & its type
523 **
524 **	Parameters:
525 **		line -- the input line.
526 **		boundaries -- the set of currently pending boundaries.
527 **
528 **	Returns:
529 **		MBT_NOTSEP -- if this is not a separator line
530 **		MBT_INTERMED -- if this is an intermediate separator
531 **		MBT_FINAL -- if this is a final boundary
532 **		MBT_SYNTAX -- if this is a boundary for the wrong
533 **			enclosure -- i.e., a syntax error.
534 */
535 
536 int
537 mimeboundary(line, boundaries)
538 	register char *line;
539 	char **boundaries;
540 {
541 	int type;
542 	int i;
543 	int savec;
544 
545 	if (line[0] != '-' || line[1] != '-' || boundaries == NULL)
546 		return MBT_NOTSEP;
547 	if (tTd(43, 5))
548 		printf("mimeboundary: line=\"%s\"... ", line);
549 	i = strlen(line);
550 	if (line[i - 1] == '\n')
551 		i--;
552 	while (line[i - 1] == ' ' || line[i - 1] == '\t')
553 		i--;
554 	if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0)
555 	{
556 		type = MBT_FINAL;
557 		i -= 2;
558 	}
559 	else
560 		type = MBT_INTERMED;
561 
562 	savec = line[i];
563 	line[i] = '\0';
564 	/* XXX should check for improper nesting here */
565 	if (isboundary(&line[2], boundaries) < 0)
566 		type = MBT_NOTSEP;
567 	line[i] = savec;
568 	if (tTd(43, 5))
569 		printf("%d\n", type);
570 	return type;
571 }
572 /*
573 **  DEFCHARSET -- return default character set for message
574 **
575 **	The first choice for character set is for the mailer
576 **	corresponding to the envelope sender.  If neither that
577 **	nor the global configuration file has a default character
578 **	set defined, return "unknown-8bit" as recommended by
579 **	RFC 1428 section 3.
580 **
581 **	Parameters:
582 **		e -- the envelope for this message.
583 **
584 **	Returns:
585 **		The default character set for that mailer.
586 */
587 
588 char *
589 defcharset(e)
590 	register ENVELOPE *e;
591 {
592 	if (e != NULL && e->e_from.q_mailer != NULL &&
593 	    e->e_from.q_mailer->m_defcharset != NULL)
594 		return e->e_from.q_mailer->m_defcharset;
595 	if (DefaultCharSet != NULL)
596 		return DefaultCharSet;
597 	return "unknown-8bit";
598 }
599 /*
600 **  ISBOUNDARY -- is a given string a currently valid boundary?
601 **
602 **	Parameters:
603 **		line -- the current input line.
604 **		boundaries -- the list of valid boundaries.
605 **
606 **	Returns:
607 **		The index number in boundaries if the line is found.
608 **		-1 -- otherwise.
609 **
610 */
611 
612 int
613 isboundary(line, boundaries)
614 	char *line;
615 	char **boundaries;
616 {
617 	register int i;
618 
619 	for (i = 0; boundaries[i] != NULL; i++)
620 	{
621 		if (strcmp(line, boundaries[i]) == 0)
622 			return i;
623 	}
624 	return -1;
625 }
626