xref: /csrg-svn/usr.sbin/sendmail/src/mime.c (revision 68717)
1 /*
2  * Copyright (c) 1994 Eric P. Allman
3  * Copyright (c) 1994
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * %sccs.include.redist.c%
7  */
8 
9 # include "sendmail.h"
10 # include <string.h>
11 
12 #ifndef lint
13 static char sccsid[] = "@(#)mime.c	8.14 (Berkeley) 04/03/95";
14 #endif /* not lint */
15 
16 /*
17 **  MIME support.
18 **
19 **	I am indebted to John Beck of Hewlett-Packard, who contributed
20 **	his code to me for inclusion.  As it turns out, I did not use
21 **	his code since he used a "minimum change" approach that used
22 **	several temp files, and I wanted a "minimum impact" approach
23 **	that would avoid copying.  However, looking over his code
24 **	helped me cement my understanding of the problem.
25 **
26 **	I also looked at, but did not directly use, Nathaniel
27 **	Borenstein's "code.c" module.  Again, it functioned as
28 **	a file-to-file translator, which did not fit within my
29 **	design bounds, but it was a useful base for understanding
30 **	the problem.
31 */
32 
33 
34 /* character set for hex and base64 encoding */
35 char	Base16Code[] =	"0123456789ABCDEF";
36 char	Base64Code[] =	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
37 
38 /* types of MIME boundaries */
39 #define MBT_SYNTAX	0	/* syntax error */
40 #define MBT_NOTSEP	1	/* not a boundary */
41 #define MBT_INTERMED	2	/* intermediate boundary (no trailing --) */
42 #define MBT_FINAL	3	/* final boundary (trailing -- included) */
43 
44 static char	*MimeBoundaryNames[] =
45 {
46 	"SYNTAX",	"NOTSEP",	"INTERMED",	"FINAL"
47 };
48 /*
49 **  MIME8TO7 -- output 8 bit body in 7 bit format
50 **
51 **	The header has already been output -- this has to do the
52 **	8 to 7 bit conversion.  It would be easy if we didn't have
53 **	to deal with nested formats (multipart/xxx and message/rfc822).
54 **
55 **	We won't be called if we don't have to do a conversion, and
56 **	appropriate MIME-Version: and Content-Type: fields have been
57 **	output.  Any Content-Transfer-Encoding: field has not been
58 **	output, and we can add it here.
59 **
60 **	Parameters:
61 **		mci -- mailer connection information.
62 **		header -- the header for this body part.
63 **		e -- envelope.
64 **		boundaries -- the currently pending message boundaries.
65 **			NULL if we are processing the outer portion.
66 **		flags -- to tweak processing.
67 **
68 **	Returns:
69 **		An indicator of what terminated the message part:
70 **		  MBT_FINAL -- the final boundary
71 **		  MBT_INTERMED -- an intermediate boundary
72 **		  MBT_NOTSEP -- an end of file
73 */
74 
75 struct args
76 {
77 	char	*field;		/* name of field */
78 	char	*value;		/* value of that field */
79 };
80 
81 int
82 mime8to7(mci, header, e, boundaries, flags)
83 	register MCI *mci;
84 	HDR *header;
85 	register ENVELOPE *e;
86 	char **boundaries;
87 	int flags;
88 {
89 	register char *p;
90 	int linelen;
91 	int bt;
92 	off_t offset;
93 	size_t sectionsize, sectionhighbits;
94 	int i;
95 	char *type;
96 	char *subtype;
97 	char **pvp;
98 	int argc = 0;
99 	struct args argv[MAXMIMEARGS];
100 	char bbuf[128];
101 	char buf[MAXLINE];
102 	char pvpbuf[MAXLINE];
103 	extern char MimeTokenTab[256];
104 
105 	if (tTd(43, 1))
106 	{
107 		printf("mime8to7: flags = %x, boundaries =", flags);
108 		if (boundaries[0] == NULL)
109 			printf(" <none>");
110 		else
111 		{
112 			for (i = 0; boundaries[i] != NULL; i++)
113 				printf(" %s", boundaries[i]);
114 		}
115 		printf("\n");
116 	}
117 	type = subtype = "-none-";
118 	p = hvalue("Content-Type", header);
119 	if (p != NULL &&
120 	    (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL,
121 			   MimeTokenTab)) != NULL &&
122 	    pvp[0] != NULL)
123 	{
124 		if (tTd(43, 40))
125 		{
126 			for (i = 0; pvp[i] != NULL; i++)
127 				printf("pvp[%d] = \"%s\"\n", i, pvp[i]);
128 		}
129 		type = *pvp++;
130 		if (*pvp != NULL && strcmp(*pvp, "/") == 0 &&
131 		    *++pvp != NULL)
132 		{
133 			subtype = *pvp++;
134 		}
135 
136 		/* break out parameters */
137 		while (*pvp != NULL && argc < MAXMIMEARGS)
138 		{
139 			/* skip to semicolon separator */
140 			while (*pvp != NULL && strcmp(*pvp, ";") != 0)
141 				pvp++;
142 			if (*pvp++ == NULL || *pvp == NULL)
143 				break;
144 
145 			/* extract field name */
146 			argv[argc].field = *pvp++;
147 
148 			/* see if there is a value */
149 			if (*pvp != NULL && strcmp(*pvp, "=") == 0 &&
150 			    (*++pvp == NULL || strcmp(*pvp, ";") != 0))
151 			{
152 				argv[argc].value = *pvp;
153 				argc++;
154 			}
155 		}
156 	}
157 
158 	/* handle types that cannot have 8-bit data internally */
159 	sprintf(buf, "%s/%s", type, subtype);
160 	if (wordinclass(buf, 'n'))
161 		flags |= M87F_NO8BIT;
162 
163 	/*
164 	**  Multipart requires special processing.
165 	**
166 	**	Do a recursive descent into the message.
167 	*/
168 
169 	if (strcasecmp(type, "multipart") == 0)
170 	{
171 		register char *q;
172 
173 		for (i = 0; i < argc; i++)
174 		{
175 			if (strcasecmp(argv[i].field, "boundary") == 0)
176 				break;
177 		}
178 		if (i >= argc)
179 		{
180 			syserr("mime8to7: Content-Type: %s missing boundary", p);
181 			p = "---";
182 		}
183 		else
184 			p = argv[i].value;
185 		if (*p == '"')
186 			q = strchr(++p, '"');
187 		else
188 			q = p + strlen(p);
189 		if (q - p > sizeof bbuf - 1)
190 		{
191 			syserr("mime8to7: multipart boundary \"%.*s\" too long",
192 				q - p, p);
193 			q = p + sizeof bbuf - 1;
194 		}
195 		strncpy(bbuf, p, q - p);
196 		bbuf[q - p] = '\0';
197 		if (tTd(43, 1))
198 			printf("mime8to7: multipart boundary \"%s\"\n", bbuf);
199 		for (i = 0; i < MAXMIMENESTING; i++)
200 			if (boundaries[i] == NULL)
201 				break;
202 		if (i >= MAXMIMENESTING)
203 			syserr("mime8to7: multipart nesting boundary too deep");
204 		else
205 		{
206 			boundaries[i] = bbuf;
207 			boundaries[i + 1] = NULL;
208 		}
209 
210 		/* skip the early "comment" prologue */
211 		putline("", mci);
212 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
213 		{
214 			bt = mimeboundary(buf, boundaries);
215 			if (bt != MBT_NOTSEP)
216 				break;
217 			putline(buf, mci);
218 			if (tTd(43, 99))
219 				printf("  ...%s", buf);
220 		}
221 		if (feof(e->e_dfp))
222 			bt = MBT_FINAL;
223 		while (bt != MBT_FINAL)
224 		{
225 			auto HDR *hdr = NULL;
226 
227 			sprintf(buf, "--%s", bbuf);
228 			putline(buf, mci);
229 			if (tTd(43, 35))
230 				printf("  ...%s\n", buf);
231 			collect(e->e_dfp, FALSE, FALSE, &hdr, e);
232 			if (tTd(43, 101))
233 				putline("+++after collect", mci);
234 			putheader(mci, hdr, e, 0);
235 			if (tTd(43, 101))
236 				putline("+++after putheader", mci);
237 			bt = mime8to7(mci, hdr, e, boundaries, flags);
238 		}
239 		sprintf(buf, "--%s--", bbuf);
240 		putline(buf, mci);
241 		if (tTd(43, 35))
242 			printf("  ...%s\n", buf);
243 		boundaries[i] = NULL;
244 
245 		/* skip the late "comment" epilogue */
246 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
247 		{
248 			bt = mimeboundary(buf, boundaries);
249 			if (bt != MBT_NOTSEP)
250 				break;
251 			putline(buf, mci);
252 			if (tTd(43, 99))
253 				printf("  ...%s", buf);
254 		}
255 		if (feof(e->e_dfp))
256 			bt = MBT_FINAL;
257 		if (tTd(43, 3))
258 			printf("\t\t\tmime8to7=>%s (multipart)\n",
259 				MimeBoundaryNames[bt]);
260 		return bt;
261 	}
262 
263 	/*
264 	**  Non-compound body type
265 	**
266 	**	Compute the ratio of seven to eight bit characters;
267 	**	use that as a heuristic to decide how to do the
268 	**	encoding.
269 	*/
270 
271 	sectionsize = sectionhighbits = 0;
272 	if (!bitset(M87F_NO8BIT, flags))
273 	{
274 		/* remember where we were */
275 		offset = ftell(e->e_dfp);
276 		if (offset == -1)
277 			syserr("mime8to7: cannot ftell on df%s", e->e_id);
278 
279 		/* do a scan of this body type to count character types */
280 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
281 		{
282 			if (mimeboundary(buf, boundaries) != MBT_NOTSEP)
283 				break;
284 			for (p = buf; *p != '\0'; p++)
285 			{
286 				/* count bytes with the high bit set */
287 				sectionsize++;
288 				if (bitset(0200, *p))
289 					sectionhighbits++;
290 			}
291 
292 			/*
293 			**  Heuristic: if 1/4 of the first 4K bytes are 8-bit,
294 			**  assume base64.  This heuristic avoids double-reading
295 			**  large graphics or video files.
296 			*/
297 
298 			if (sectionsize >= 4096 &&
299 			    sectionhighbits > sectionsize / 4)
300 				break;
301 		}
302 
303 		/* return to the original offset for processing */
304 		/* XXX use relative seeks to handle >31 bit file sizes? */
305 		if (fseek(e->e_dfp, offset, SEEK_SET) < 0)
306 			syserr("mime8to7: cannot fseek on df%s", e->e_id);
307 		else
308 			clearerr(e->e_dfp);
309 	}
310 
311 	/*
312 	**  Heuristically determine encoding method.
313 	**	If more than 1/8 of the total characters have the
314 	**	eighth bit set, use base64; else use quoted-printable.
315 	*/
316 
317 	if (tTd(43, 8))
318 	{
319 		printf("mime8to7: %ld high bit(s) in %ld byte(s)\n",
320 			sectionhighbits, sectionsize);
321 	}
322 	linelen = 0;
323 	if (sectionhighbits == 0)
324 	{
325 		/* no encoding necessary */
326 		p = hvalue("content-transfer-encoding", header);
327 		if (p != NULL)
328 		{
329 			sprintf(buf, "Content-Transfer-Encoding: %s", p);
330 			putline(buf, mci);
331 			if (tTd(43, 36))
332 				printf("  ...%s\n", buf);
333 		}
334 		putline("", mci);
335 		mci->mci_flags &= ~MCIF_INHEADER;
336 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
337 		{
338 			bt = mimeboundary(buf, boundaries);
339 			if (bt != MBT_NOTSEP)
340 				break;
341 			if (buf[0] == 'F' &&
342 			    bitnset(M_ESCFROM, mci->mci_mailer->m_flags) &&
343 			    strncmp(buf, "From ", 5) == 0)
344 				(void) putc('>', mci->mci_out);
345 			putline(buf, mci);
346 		}
347 		if (feof(e->e_dfp))
348 			bt = MBT_FINAL;
349 	}
350 	else if (sectionsize / 8 < sectionhighbits)
351 	{
352 		/* use base64 encoding */
353 		int c1, c2;
354 
355 		putline("Content-Transfer-Encoding: base64", mci);
356 		if (tTd(43, 36))
357 			printf("  ...Content-Transfer-Encoding: base64\n");
358 		putline("", mci);
359 		mci->mci_flags &= ~MCIF_INHEADER;
360 		while ((c1 = mime_getchar(e->e_dfp, boundaries, &bt)) != EOF)
361 		{
362 			if (linelen > 71)
363 			{
364 				fputs(mci->mci_mailer->m_eol, mci->mci_out);
365 				linelen = 0;
366 			}
367 			linelen += 4;
368 			fputc(Base64Code[c1 >> 2], mci->mci_out);
369 			c1 = (c1 & 0x03) << 4;
370 			c2 = mime_getchar(e->e_dfp, boundaries, &bt);
371 			if (c2 == EOF)
372 			{
373 				fputc(Base64Code[c1], mci->mci_out);
374 				fputc('=', mci->mci_out);
375 				fputc('=', mci->mci_out);
376 				break;
377 			}
378 			c1 |= (c2 >> 4) & 0x0f;
379 			fputc(Base64Code[c1], mci->mci_out);
380 			c1 = (c2 & 0x0f) << 2;
381 			c2 = mime_getchar(e->e_dfp, boundaries, &bt);
382 			if (c2 == EOF)
383 			{
384 				fputc(Base64Code[c1], mci->mci_out);
385 				fputc('=', mci->mci_out);
386 				break;
387 			}
388 			c1 |= (c2 >> 6) & 0x03;
389 			fputc(Base64Code[c1], mci->mci_out);
390 			fputc(Base64Code[c2 & 0x3f], mci->mci_out);
391 		}
392 	}
393 	else
394 	{
395 		/* use quoted-printable encoding */
396 		int c1, c2;
397 		int fromstate;
398 
399 		putline("Content-Transfer-Encoding: quoted-printable", mci);
400 		if (tTd(43, 36))
401 			printf("  ...Content-Transfer-Encoding: quoted-printable\n");
402 		putline("", mci);
403 		mci->mci_flags &= ~MCIF_INHEADER;
404 		fromstate = 0;
405 		c2 = '\n';
406 		while ((c1 = mime_getchar(e->e_dfp, boundaries, &bt)) != EOF)
407 		{
408 			if (c1 == '\n')
409 			{
410 				if (c2 == ' ' || c2 == '\t')
411 				{
412 					fputc('=', mci->mci_out);
413 					fputc(Base16Code[(c2 >> 4) & 0x0f],
414 								mci->mci_out);
415 					fputc(Base16Code[c2 & 0x0f],
416 								mci->mci_out);
417 					fputs(mci->mci_mailer->m_eol,
418 								mci->mci_out);
419 				}
420 				fputs(mci->mci_mailer->m_eol, mci->mci_out);
421 				linelen = fromstate = 0;
422 				c2 = c1;
423 				continue;
424 			}
425 			if (c2 == ' ' && linelen == 4 && fromstate == 4 &&
426 			    bitnset(M_ESCFROM, mci->mci_mailer->m_flags))
427 			{
428 				fputs("=20", mci->mci_out);
429 				linelen += 3;
430 			}
431 			else if (c2 == ' ' || c2 == '\t')
432 			{
433 				fputc(c2, mci->mci_out);
434 				linelen++;
435 			}
436 			if (linelen > 72)
437 			{
438 				fputc('=', mci->mci_out);
439 				fputs(mci->mci_mailer->m_eol, mci->mci_out);
440 				linelen = fromstate = 0;
441 				c2 = '\n';
442 			}
443 			if (c2 == '\n' && c1 == '.' &&
444 				 bitnset(M_XDOT, mci->mci_mailer->m_flags))
445 			{
446 				fputc('.', mci->mci_out);
447 				linelen++;
448 			}
449 			if ((c1 < 0x20 && c1 != '\t') || c1 >= 0x7f || c1 == '=')
450 			{
451 				fputc('=', mci->mci_out);
452 				fputc(Base16Code[(c1 >> 4) & 0x0f], mci->mci_out);
453 				fputc(Base16Code[c1 & 0x0f], mci->mci_out);
454 				linelen += 3;
455 			}
456 			else if (c1 != ' ' && c1 != '\t')
457 			{
458 				if (linelen < 4 && c1 == "From"[linelen])
459 					fromstate++;
460 				fputc(c1, mci->mci_out);
461 				linelen++;
462 			}
463 			c2 = c1;
464 		}
465 
466 		/* output any saved character */
467 		if (c2 == ' ' || c2 == '\t')
468 		{
469 			fputc('=', mci->mci_out);
470 			fputc(Base16Code[(c2 >> 4) & 0x0f], mci->mci_out);
471 			fputc(Base16Code[c2 & 0x0f], mci->mci_out);
472 			linelen += 3;
473 		}
474 	}
475 	if (linelen > 0)
476 		fputs(mci->mci_mailer->m_eol, mci->mci_out);
477 	if (tTd(43, 3))
478 		printf("\t\t\tmime8to7=>%s (basic)\n", MimeBoundaryNames[bt]);
479 	return bt;
480 }
481 /*
482 **  MIME_GETCHAR -- get a character for MIME processing
483 **
484 **	Treats boundaries as EOF.
485 **
486 **	Parameters:
487 **		fp -- the input file.
488 **		boundaries -- the current MIME boundaries.
489 **		btp -- if the return value is EOF, *btp is set to
490 **			the type of the boundary.
491 **
492 **	Returns:
493 **		The next character in the input stream.
494 */
495 
496 int
497 mime_getchar(fp, boundaries, btp)
498 	register FILE *fp;
499 	char **boundaries;
500 	int *btp;
501 {
502 	int c;
503 	static char *bp = NULL;
504 	static int buflen = 0;
505 	static bool atbol = TRUE;	/* at beginning of line */
506 	static int bt = MBT_SYNTAX;	/* boundary type of next EOF */
507 	static char buf[128];		/* need not be a full line */
508 
509 	if (buflen > 0)
510 	{
511 		buflen--;
512 		return *bp++;
513 	}
514 	bp = buf;
515 	buflen = 0;
516 	c = fgetc(fp);
517 	if (c == '\n')
518 	{
519 		/* might be part of a MIME boundary */
520 		*bp++ = c;
521 		atbol = TRUE;
522 		c = fgetc(fp);
523 	}
524 	if (c != EOF)
525 		*bp++ = c;
526 	else
527 		bt = MBT_FINAL;
528 	if (atbol && c == '-')
529 	{
530 		/* check for a message boundary */
531 		c = fgetc(fp);
532 		if (c != '-')
533 		{
534 			if (c != EOF)
535 				*bp++ = c;
536 			else
537 				bt = MBT_FINAL;
538 			buflen = bp - buf - 1;
539 			bp = buf;
540 			return *bp++;
541 		}
542 
543 		/* got "--", now check for rest of separator */
544 		*bp++ = '-';
545 		while (bp < &buf[sizeof buf - 1] &&
546 		       (c = fgetc(fp)) != EOF && c != '\n')
547 		{
548 			*bp++ = c;
549 		}
550 		*bp = '\0';
551 		bt = mimeboundary(&buf[1], boundaries);
552 		switch (bt)
553 		{
554 		  case MBT_FINAL:
555 		  case MBT_INTERMED:
556 			/* we have a message boundary */
557 			buflen = 0;
558 			*btp = bt;
559 			return EOF;
560 		}
561 
562 		atbol = c == '\n';
563 		if (c != EOF)
564 			*bp++ = c;
565 	}
566 
567 	buflen = bp - buf - 1;
568 	if (buflen < 0)
569 	{
570 		*btp = bt;
571 		return EOF;
572 	}
573 	bp = buf;
574 	return *bp++;
575 }
576 /*
577 **  MIMEBOUNDARY -- determine if this line is a MIME boundary & its type
578 **
579 **	Parameters:
580 **		line -- the input line.
581 **		boundaries -- the set of currently pending boundaries.
582 **
583 **	Returns:
584 **		MBT_NOTSEP -- if this is not a separator line
585 **		MBT_INTERMED -- if this is an intermediate separator
586 **		MBT_FINAL -- if this is a final boundary
587 **		MBT_SYNTAX -- if this is a boundary for the wrong
588 **			enclosure -- i.e., a syntax error.
589 */
590 
591 int
592 mimeboundary(line, boundaries)
593 	register char *line;
594 	char **boundaries;
595 {
596 	int type;
597 	int i;
598 	int savec;
599 
600 	if (line[0] != '-' || line[1] != '-' || boundaries == NULL)
601 		return MBT_NOTSEP;
602 	i = strlen(line);
603 	if (line[i - 1] == '\n')
604 		i--;
605 	if (tTd(43, 5))
606 		printf("mimeboundary: line=\"%.*s\"... ", i, line);
607 	while (line[i - 1] == ' ' || line[i - 1] == '\t')
608 		i--;
609 	if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0)
610 	{
611 		type = MBT_FINAL;
612 		i -= 2;
613 	}
614 	else
615 		type = MBT_INTERMED;
616 
617 	savec = line[i];
618 	line[i] = '\0';
619 	/* XXX should check for improper nesting here */
620 	if (isboundary(&line[2], boundaries) < 0)
621 		type = MBT_NOTSEP;
622 	line[i] = savec;
623 	if (tTd(43, 5))
624 		printf("%s\n", MimeBoundaryNames[type]);
625 	return type;
626 }
627 /*
628 **  DEFCHARSET -- return default character set for message
629 **
630 **	The first choice for character set is for the mailer
631 **	corresponding to the envelope sender.  If neither that
632 **	nor the global configuration file has a default character
633 **	set defined, return "unknown-8bit" as recommended by
634 **	RFC 1428 section 3.
635 **
636 **	Parameters:
637 **		e -- the envelope for this message.
638 **
639 **	Returns:
640 **		The default character set for that mailer.
641 */
642 
643 char *
644 defcharset(e)
645 	register ENVELOPE *e;
646 {
647 	if (e != NULL && e->e_from.q_mailer != NULL &&
648 	    e->e_from.q_mailer->m_defcharset != NULL)
649 		return e->e_from.q_mailer->m_defcharset;
650 	if (DefaultCharSet != NULL)
651 		return DefaultCharSet;
652 	return "unknown-8bit";
653 }
654 /*
655 **  ISBOUNDARY -- is a given string a currently valid boundary?
656 **
657 **	Parameters:
658 **		line -- the current input line.
659 **		boundaries -- the list of valid boundaries.
660 **
661 **	Returns:
662 **		The index number in boundaries if the line is found.
663 **		-1 -- otherwise.
664 **
665 */
666 
667 int
668 isboundary(line, boundaries)
669 	char *line;
670 	char **boundaries;
671 {
672 	register int i;
673 
674 	for (i = 0; boundaries[i] != NULL; i++)
675 	{
676 		if (strcmp(line, boundaries[i]) == 0)
677 			return i;
678 	}
679 	return -1;
680 }
681