xref: /netbsd-src/usr.bin/mail/mime_codecs.c (revision 8b0f9554ff8762542c4defc4f70e1eb76fb508fa)
1 /*	$NetBSD: mime_codecs.c,v 1.6 2007/10/23 14:58:44 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 2006 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Anon Ymous.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *        This product includes software developed by the NetBSD
21  *        Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 /*
40  * This module contains all mime related codecs.  Typically there are
41  * two versions: one operating on buffers and one operating on files.
42  * All exported routines have a "mime_" prefix.  The file oriented
43  * routines have a "mime_f" prefix replacing the "mime_" prefix of the
44  * equivalent buffer based version.
45  *
46  * The file based API should be:
47  *
48  *   mime_f<name>_{encode,decode}(FILE *in, FILE *out, void *cookie)
49  *
50  * XXX - currently this naming convention has not been adheared to.
51  *
52  * where the cookie is a generic way to pass arguments to the routine.
53  * This way these routines can be run by run_function() in mime.c.
54  *
55  * The buffer based API is not as rigid.
56  */
57 
58 #ifdef MIME_SUPPORT
59 
60 #include <sys/cdefs.h>
61 #ifndef __lint__
62 __RCSID("$NetBSD: mime_codecs.c,v 1.6 2007/10/23 14:58:44 christos Exp $");
63 #endif /* not __lint__ */
64 
65 #include <assert.h>
66 #include <iconv.h>
67 #include <stdio.h>
68 #include <stdlib.h>
69 #include <util.h>
70 
71 #include "def.h"
72 #include "extern.h"
73 #include "mime_codecs.h"
74 
75 
76 #ifdef CHARSET_SUPPORT
77 /************************************************************************
78  * Core character set conversion routines.
79  *
80  */
81 
82 /*
83  * Fault-tolerant iconv() function.
84  *
85  * This routine was borrowed from nail-11.25/mime.c and modified.  It
86  * tries to handle errno == EILSEQ by restarting at the next input
87  * byte (is this a good idea?).  All other errors are handled by the
88  * caller.
89  */
90 PUBLIC size_t
91 mime_iconv(iconv_t cd, const char **inb, size_t *inbleft, char **outb, size_t *outbleft)
92 {
93 	size_t sz = 0;
94 
95 	while ((sz = iconv(cd, inb, inbleft, outb, outbleft)) == (size_t)-1
96 			&& errno == EILSEQ) {
97 		if (*outbleft > 0) {
98 			*(*outb)++ = '?';
99 			(*outbleft)--;
100 		} else {
101 			**outb = '\0';
102 			return E2BIG;
103 		}
104 		if (*inbleft > 0) {
105 			(*inb)++;
106 			(*inbleft)--;
107 		} else {
108 			**outb = '\0';
109 			break;
110 		}
111 	}
112 	return sz;
113 }
114 
115 /*
116  * This routine was mostly borrowed from src/usr.bin/iconv/iconv.c.
117  * We don't care about the invalid character count, so don't bother
118  * with __iconv().  We do care about robustness, so call iconv_ft()
119  * above to try to recover from errors.
120  */
121 #define INBUFSIZE 1024
122 #define OUTBUFSIZE (INBUFSIZE * 2)
123 
124 PUBLIC void
125 mime_ficonv(FILE *fi, FILE *fo, void *cookie)
126 {
127 	char inbuf[INBUFSIZE], outbuf[OUTBUFSIZE], *out;
128 	const char *in;
129 	size_t inbytes, outbytes, ret;
130 	iconv_t cd;
131 
132 	/*
133 	 * NOTE: iconv_t is actually a pointer typedef, so this
134 	 * conversion is not what it appears to be!
135 	 */
136 	cd = (iconv_t)cookie;
137 
138 	while ((inbytes = fread(inbuf, 1, INBUFSIZE, fi)) > 0) {
139 		in = inbuf;
140 		while (inbytes > 0) {
141 			out = outbuf;
142 			outbytes = OUTBUFSIZE;
143 			ret = mime_iconv(cd, &in, &inbytes, &out, &outbytes);
144 			if (ret == (size_t)-1 && errno != E2BIG) {
145 				if (errno != EINVAL || in == inbuf) {
146 					/* XXX - what is proper here?
147 					 * Just copy out the remains? */
148 					(void)fprintf(fo,
149 					    "\n\t[ iconv truncated message: %s ]\n\n",
150 					    strerror(errno));
151 					return;
152 				}
153 				/*
154 				 * If here: errno == EINVAL && in != inbuf
155 				 */
156 				/* incomplete input character */
157 				(void)memmove(inbuf, in, inbytes);
158 				ret = fread(inbuf + inbytes, 1,
159 				    INBUFSIZE - inbytes, fi);
160 				if (ret == 0) {
161 					if (feof(fi)) {
162 						(void)fprintf(fo,
163 						    "\n\t[ unexpected end of file; "
164 						    "the last character is "
165 						    "incomplete. ]\n\n");
166 						return;
167 					}
168 					(void)fprintf(fo,
169 					    "\n\t[ fread(): %s ]\n\n",
170 					    strerror(errno));
171 					return;
172 				}
173 				in = inbuf;
174 				inbytes += ret;
175 
176 			}
177 			if (outbytes < OUTBUFSIZE)
178 				(void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, fo);
179 		}
180 	}
181 	/* reset the shift state of the output buffer */
182 	outbytes = OUTBUFSIZE;
183 	out = outbuf;
184 	ret = iconv(cd, NULL, NULL, &out, &outbytes);
185 	if (ret == (size_t)-1) {
186 		(void)fprintf(fo, "\n\t[ iconv(): %s ]\n\n",
187 		    strerror(errno));
188 		return;
189 	}
190 	if (outbytes < OUTBUFSIZE)
191 		(void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, fo);
192 }
193 
194 #endif	/* CHARSET_SUPPORT */
195 
196 
197 
198 /************************************************************************
199  * Core base64 routines
200  *
201  * Defined in sec 6.8 of RFC 2045.
202  */
203 
204 /*
205  * Decode a base64 buffer.
206  *
207  *   bin:  buffer to hold the decoded (binary) result (see note 1).
208  *   b64:  buffer holding the encoded (base64) source.
209  *   cnt:  number of bytes in the b64 buffer to decode (see note 2).
210  *
211  * Return: the number of bytes written to the 'bin' buffer or -1 on
212  *         error.
213  * NOTES:
214  *   1) It is the callers responsibility to ensure that bin is large
215  *      enough to hold the result.
216  *   2) The b64 buffer should always contain a multiple of 4 bytes of
217  *      data!
218  */
219 PUBLIC ssize_t
220 mime_b64tobin(char *bin, const char *b64, size_t cnt)
221 {
222 	static const signed char b64index[] = {
223 		-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
224 		-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
225 		-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
226 		52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
227 		-1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
228 		15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
229 		-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
230 		41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
231 	};
232 	unsigned char *p;
233 	const unsigned char *q, *end;
234 
235 #define EQU	(unsigned)-2
236 #define BAD	(unsigned)-1
237 #define uchar64(c)  (unsigned)((c) >= sizeof(b64index) ? BAD : b64index[(c)])
238 
239 	p = (unsigned char *)bin;
240 	q = (const unsigned char *)b64;
241 	for (end = q + cnt; q < end; q += 4) {
242 		unsigned a = uchar64(q[0]);
243 		unsigned b = uchar64(q[1]);
244 		unsigned c = uchar64(q[2]);
245 		unsigned d = uchar64(q[3]);
246 
247 		*p++ = ((a << 2) | ((b & 0x30) >> 4));
248 		if (c == EQU)	{ /* got '=' */
249 			if (d != EQU)
250 				return -1;
251 			break;
252 		}
253 		*p++ = (((b & 0x0f) << 4) | ((c & 0x3c) >> 2));
254 		if (d == EQU) { /* got '=' */
255 			break;
256 		}
257 		*p++ = (((c & 0x03) << 6) | d);
258 
259 		if (a == BAD || b == BAD || c == BAD || d == BAD)
260 			return -1;
261 	}
262 
263 #undef uchar64
264 #undef EQU
265 #undef BAD
266 
267 	return p - (unsigned char*)bin;
268 }
269 
270 /*
271  * Encode a buffer as a base64 result.
272  *
273  *   b64:  buffer to hold the encoded (base64) result (see note).
274  *   bin:  buffer holding the binary source.
275  *   cnt:  number of bytes in the bin buffer to encode.
276  *
277  * NOTE: it is the callers responsibility to ensure that 'b64' is
278  *       large enough to hold the result.
279  */
280 PUBLIC void
281 mime_bintob64(char *b64, const char *bin, size_t cnt)
282 {
283 	static const char b64table[] =
284 	    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
285 	const unsigned char *p = (const unsigned char*)bin;
286 	int i;
287 
288 	for (i = cnt; i > 0; i -= 3) {
289 		unsigned a = p[0];
290 		unsigned b = p[1];
291 		unsigned c = p[2];
292 
293 		b64[0] = b64table[a >> 2];
294 		switch(i) {
295 		case 1:
296 			b64[1] = b64table[((a & 0x3) << 4)];
297 			b64[2] = '=';
298 			b64[3] = '=';
299 			break;
300 		case 2:
301 			b64[1] = b64table[((a & 0x3) << 4) | ((b & 0xf0) >> 4)];
302 			b64[2] = b64table[((b & 0xf) << 2)];
303 			b64[3] = '=';
304 			break;
305 		default:
306 			b64[1] = b64table[((a & 0x3) << 4) | ((b & 0xf0) >> 4)];
307 			b64[2] = b64table[((b & 0xf) << 2) | ((c & 0xc0) >> 6)];
308 			b64[3] = b64table[c & 0x3f];
309 			break;
310 		}
311 		p   += 3;
312 		b64 += 4;
313 	}
314 }
315 
316 
317 #define MIME_BASE64_LINE_MAX	(4 * 19)  /* max line length is 76: see RFC2045 sec 6.8 */
318 
319 static void
320 mime_fB64_encode(FILE *fi, FILE *fo, void *cookie __unused)
321 {
322 	static char b64[MIME_BASE64_LINE_MAX];
323 	static char mem[3 * (MIME_BASE64_LINE_MAX / 4)];
324 	int cnt;
325 	char *cp;
326 	size_t limit;
327 #ifdef __lint__
328 	cookie = cookie;
329 #endif
330 	limit = 0;
331 	if ((cp = value(ENAME_MIME_B64_LINE_MAX)) != NULL)
332 		limit = (size_t)atoi(cp);
333 	if (limit == 0 || limit > sizeof(b64))
334 		limit = sizeof(b64);
335 
336 	limit = 3 * roundup(limit, 4) / 4;
337 	if (limit < 3)
338 		limit = 3;
339 
340 	while ((cnt = fread(mem, sizeof(*mem), limit, fi)) > 0) {
341 		mime_bintob64(b64, mem, (size_t)cnt);
342 		(void)fwrite(b64, sizeof(*b64), (size_t)4 * roundup(cnt, 3) / 3, fo);
343 		(void)putc('\n', fo);
344 	}
345 }
346 
347 static void
348 mime_fB64_decode(FILE *fi, FILE *fo, void *add_lf)
349 {
350 	char *line;
351 	size_t len;
352 	char *buf;
353 	size_t buflen;
354 
355 	buflen = 3 * (MIME_BASE64_LINE_MAX / 4);
356 	buf = emalloc(buflen);
357 
358 	while ((line = fgetln(fi, &len)) != NULL) {
359 		ssize_t binlen;
360 		if (line[len-1] == '\n') /* forget the trailing newline */
361 			len--;
362 
363 		/* trash trailing white space */
364 		for (/*EMPTY*/; len > 0 && is_WSP(line[len-1]); len--)
365 			continue;
366 
367 		/* skip leading white space */
368 		for (/*EMPTY*/; len > 0 && is_WSP(line[0]); len--, line++)
369 			continue;
370 
371 		if (len == 0)
372 			break;
373 
374 		if (3 * len > 4 * buflen) {
375 			buflen *= 2;
376 			buf = erealloc(buf, buflen);
377 		}
378 
379 		binlen = mime_b64tobin(buf, line, len);
380 
381 		if (binlen <= 0) {
382 			(void)fprintf(fo, "WARN: invalid base64 encoding\n");
383 			break;
384 		}
385 		(void)fwrite(buf, 1, (size_t)binlen, fo);
386 	}
387 
388 	free(buf);
389 
390 	if (add_lf)
391 		(void)fputc('\n', fo);
392 }
393 
394 
395 /************************************************************************
396  * Core quoted-printable routines.
397  *
398  * Note: the header QP routines are slightly different and burried
399  * inside mime_header.c
400  */
401 
402 static int
403 mustquote(unsigned char *p, unsigned char *end, size_t l)
404 {
405 #define N	0	/* do not quote */
406 #define Q	1	/* must quote */
407 #define SP	2	/* white space */
408 #define XF	3	/* special character 'F' - maybe quoted */
409 #define XD	4	/* special character '.' - maybe quoted */
410 #define EQ	Q	/* '=' must be quoted */
411 #define TB	SP	/* treat '\t' as a space */
412 #define NL	N	/* don't quote '\n' (NL) - XXX - quoting here breaks the line length algorithm */
413 #define CR	Q	/* always quote a '\r' (CR) - it occurs only in a CRLF combo */
414 
415 	static const signed char quotetab[] = {
416   		 Q, Q, Q, Q,  Q, Q, Q, Q,  Q,TB,NL, Q,  Q,CR, Q, Q,
417 		 Q, Q, Q, Q,  Q, Q, Q, Q,  Q, Q, Q, Q,  Q, Q, Q, Q,
418 		SP, N, N, N,  N, N, N, N,  N, N, N, N,  N, N,XD, N,
419 		 N, N, N, N,  N, N, N, N,  N, N, N, N,  N,EQ, N, N,
420 
421 		 N, N, N, N,  N, N,XF, N,  N, N, N, N,  N, N, N, N,
422 		 N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, N,
423 		 N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, N,
424 		 N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, Q,
425 	};
426 	int flag = *p > 0x7f ? Q : quotetab[*p];
427 
428 	if (flag == N)
429 		return 0;
430 	if (flag == Q)
431 		return 1;
432 	if (flag == SP)
433 		return p + 1 < end && p[1] == '\n'; /* trailing white space */
434 
435 	/* The remainder are special start-of-line cases. */
436 	if (l != 0)
437 		return 0;
438 
439 	if (flag == XF)	/* line may start with "From" */
440 		return p + 4 < end && p[1] == 'r' && p[2] == 'o' && p[3] == 'm';
441 
442 	if (flag == XD)	/* line may consist of a single dot */
443 		return p + 1 < end && p[1] == '\n';
444 
445 	errx(EXIT_FAILURE,
446 	    "mustquote: invalid logic: *p=0x%x (%d) flag=%d, l=%zu\n",
447 	    *p, *p, flag, l);
448 	/* NOT REACHED */
449 	return 0;	/* appease GCC */
450 
451 #undef N
452 #undef Q
453 #undef SP
454 #undef XX
455 #undef EQ
456 #undef TB
457 #undef NL
458 #undef CR
459 }
460 
461 
462 #define MIME_QUOTED_LINE_MAX	76  /* QP max length: see RFC2045 sec 6.7 */
463 
464 static void
465 fput_quoted_line(FILE *fo, char *line, size_t len, size_t limit)
466 {
467 	size_t l;	/* length of current output line */
468 	unsigned char *beg;
469 	unsigned char *end;
470 	unsigned char *p;
471 
472 	assert(limit <= MIME_QUOTED_LINE_MAX);
473 
474 	beg = (unsigned char*)line;
475 	end = beg + len;
476 	l = 0;
477 	for (p = (unsigned char*)line; p < end; p++) {
478 		if (mustquote(p, end, l)) {
479 			if (l + 4 > limit) {
480 				(void)fputs("=\n", fo);
481 				l = 0;
482 			}
483 			(void)fprintf(fo, "=%02X", *p);
484 			l += 3;
485 		}
486 		else {
487 			if (*p == '\n') {
488 				if (p > beg && p[-1] == '\r')
489 					(void)fputs("=0A=", fo);
490 				l = (size_t)-1;
491 			}
492 			else if (l + 2 > limit) {
493 				(void)fputs("=\n", fo);
494 				l = 0;
495 			}
496 			(void)putc(*p, fo);
497 			l++;
498 		}
499 	}
500 	/*
501 	 * Lines ending in a blank must escape the newline.
502 	 */
503 	if (len && is_WSP(p[-1]))
504 		(void)fputs("=\n", fo);
505 }
506 
507 static void
508 mime_fQP_encode(FILE *fi, FILE *fo, void *cookie __unused)
509 {
510 	char *line;
511 	size_t len;
512 	char *cp;
513 	size_t limit;
514 
515 #ifdef __lint__
516 	cookie = cookie;
517 #endif
518 	limit = 0;
519 	if ((cp = value(ENAME_MIME_QP_LINE_MAX)) != NULL)
520 		limit = (size_t)atoi(cp);
521 	if (limit == 0 || limit > MIME_QUOTED_LINE_MAX)
522 		limit = MIME_QUOTED_LINE_MAX;
523 	if (limit < 4)
524 		limit = 4;
525 
526 	while ((line = fgetln(fi, &len)) != NULL)
527 		fput_quoted_line(fo, line, len, limit);
528 }
529 
530 static void
531 mime_fQP_decode(FILE *fi, FILE *fo, void *cookie __unused)
532 {
533 	char *line;
534 	size_t len;
535 
536 #ifdef __lint__
537 	cookie = cookie;
538 #endif
539 	while ((line = fgetln(fi, &len)) != NULL) {
540 		int c;
541 		char *p;
542 		char *end;
543 		end = line + len;
544 		for (p = line; p < end; p++) {
545 			if (*p == '=') {
546 				p++;
547 				while (p < end && is_WSP(*p))
548 					p++;
549 				if (*p != '\n' && p + 1 < end) {
550 					char buf[3];
551 					buf[0] = *p++;
552 					buf[1] = *p;
553 					buf[2] = '\0';
554 					c = strtol(buf, NULL, 16);
555 					(void)fputc(c, fo);
556 				}
557 			}
558 			else
559 				(void)fputc(*p, fo);
560 		}
561 	}
562 }
563 
564 
565 /************************************************************************
566  * Routines to select the codec by name.
567  */
568 
569 PUBLIC void
570 mime_fio_copy(FILE *fi, FILE *fo, void *cookie __unused)
571 {
572 	int c;
573 
574 #ifdef __lint__
575 	cookie = cookie;
576 #endif
577 	while ((c = getc(fi)) != EOF)
578 		(void)putc(c, fo);
579 
580 	(void)fflush(fo);
581 	if (ferror(fi)) {
582 		warn("read");
583 		rewind(fi);
584 		return;
585 	}
586 	if (ferror(fo)) {
587 		warn("write");
588 		(void)Fclose(fo);
589 		rewind(fi);
590 		return;
591 	}
592 }
593 
594 
595 static const struct transfer_encoding_s {
596 	const char 	*name;
597 	mime_codec_t	enc;
598 	mime_codec_t	dec;
599 } transfer_encoding_tbl[] = {
600 	{ MIME_TRANSFER_7BIT,	mime_fio_copy,	    mime_fio_copy },
601 	{ MIME_TRANSFER_8BIT, 	mime_fio_copy,	    mime_fio_copy },
602 	{ MIME_TRANSFER_BINARY,	mime_fio_copy,	    mime_fio_copy },
603 	{ MIME_TRANSFER_QUOTED, mime_fQP_encode,    mime_fQP_decode },
604 	{ MIME_TRANSFER_BASE64, mime_fB64_encode,   mime_fB64_decode },
605 	{ NULL,			NULL,		    NULL },
606 };
607 
608 
609 PUBLIC mime_codec_t
610 mime_fio_encoder(const char *ename)
611 {
612 	const struct transfer_encoding_s *tep = NULL;
613 
614 	if (ename == NULL)
615 		return NULL;
616 
617 	for (tep = transfer_encoding_tbl; tep->name; tep++)
618 		if (strcasecmp(tep->name, ename) == 0)
619 			break;
620 	return tep->enc;
621 }
622 
623 PUBLIC mime_codec_t
624 mime_fio_decoder(const char *ename)
625 {
626 	const struct transfer_encoding_s *tep = NULL;
627 
628 	if (ename == NULL)
629 		return NULL;
630 
631 	for (tep = transfer_encoding_tbl; tep->name; tep++)
632 		if (strcasecmp(tep->name, ename) == 0)
633 			break;
634 	return tep->dec;
635 }
636 
637 /*
638  * This is for use in complete.c and mime.c to get the list of
639  * encoding names without exposing the transfer_encoding_tbl[].  The
640  * first name is returned if called with a pointer to a NULL pointer.
641  * Subsequent calls with the same cookie give successive names.  A
642  * NULL return indicates the end of the list.
643  */
644 PUBLIC const char *
645 mime_next_encoding_name(const void **cookie)
646 {
647 	const struct transfer_encoding_s *tep;
648 
649 	tep = *cookie;
650 	if (tep == NULL)
651 		tep = transfer_encoding_tbl;
652 
653 	*cookie = tep->name ? &tep[1] : NULL;
654 
655 	return tep->name;
656 }
657 
658 #endif /* MIME_SUPPORT */
659