xref: /netbsd-src/usr.bin/mail/mime_codecs.c (revision 96fc3e30a7c3f7bba53384bf41dad5f78306fac4)
1 /*	$NetBSD: mime_codecs.c,v 1.10 2012/11/24 21:40:02 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 2006 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Anon Ymous.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * This module contains all mime related codecs.  Typically there are
34  * two versions: one operating on buffers and one operating on files.
35  * All exported routines have a "mime_" prefix.  The file oriented
36  * routines have a "mime_f" prefix replacing the "mime_" prefix of the
37  * equivalent buffer based version.
38  *
39  * The file based API should be:
40  *
41  *   mime_f<name>_{encode,decode}(FILE *in, FILE *out, void *cookie)
42  *
43  * XXX - currently this naming convention has not been adheared to.
44  *
45  * where the cookie is a generic way to pass arguments to the routine.
46  * This way these routines can be run by run_function() in mime.c.
47  *
48  * The buffer based API is not as rigid.
49  */
50 
51 #ifdef MIME_SUPPORT
52 
53 #include <sys/cdefs.h>
54 #ifndef __lint__
55 __RCSID("$NetBSD: mime_codecs.c,v 1.10 2012/11/24 21:40:02 christos Exp $");
56 #endif /* not __lint__ */
57 
58 #include <assert.h>
59 #include <iconv.h>
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include <util.h>
63 
64 #include "def.h"
65 #include "extern.h"
66 #include "mime_codecs.h"
67 
68 
69 #ifdef CHARSET_SUPPORT
70 /************************************************************************
71  * Core character set conversion routines.
72  *
73  */
74 
75 /*
76  * Fault-tolerant iconv() function.
77  *
78  * This routine was borrowed from nail-11.25/mime.c and modified.  It
79  * tries to handle errno == EILSEQ by restarting at the next input
80  * byte (is this a good idea?).  All other errors are handled by the
81  * caller.
82  */
83 PUBLIC size_t
84 mime_iconv(iconv_t cd, const char **inb, size_t *inbleft, char **outb, size_t *outbleft)
85 {
86 	size_t sz = 0;
87 
88 	while ((sz = iconv(cd, inb, inbleft, outb, outbleft)) == (size_t)-1
89 			&& errno == EILSEQ) {
90 		if (*outbleft > 0) {
91 			*(*outb)++ = '?';
92 			(*outbleft)--;
93 		} else {
94 			**outb = '\0';
95 			return E2BIG;
96 		}
97 		if (*inbleft > 0) {
98 			(*inb)++;
99 			(*inbleft)--;
100 		} else {
101 			**outb = '\0';
102 			break;
103 		}
104 	}
105 	return sz;
106 }
107 
108 /*
109  * This routine was mostly borrowed from src/usr.bin/iconv/iconv.c.
110  * We don't care about the invalid character count, so don't bother
111  * with __iconv().  We do care about robustness, so call iconv_ft()
112  * above to try to recover from errors.
113  */
114 #define INBUFSIZE 1024
115 #define OUTBUFSIZE (INBUFSIZE * 2)
116 
117 PUBLIC void
118 mime_ficonv(FILE *fi, FILE *fo, void *cookie)
119 {
120 	char inbuf[INBUFSIZE], outbuf[OUTBUFSIZE], *out;
121 	const char *in;
122 	size_t inbytes, outbytes, ret;
123 	iconv_t cd;
124 
125 	/*
126 	 * NOTE: iconv_t is actually a pointer typedef, so this
127 	 * conversion is not what it appears to be!
128 	 */
129 	cd = (iconv_t)cookie;
130 
131 	while ((inbytes = fread(inbuf, 1, INBUFSIZE, fi)) > 0) {
132 		in = inbuf;
133 		while (inbytes > 0) {
134 			out = outbuf;
135 			outbytes = OUTBUFSIZE;
136 			ret = mime_iconv(cd, &in, &inbytes, &out, &outbytes);
137 			if (ret == (size_t)-1 && errno != E2BIG) {
138 				if (errno != EINVAL || in == inbuf) {
139 					/* XXX - what is proper here?
140 					 * Just copy out the remains? */
141 					(void)fprintf(fo,
142 					    "\n\t[ iconv truncated message: %s ]\n\n",
143 					    strerror(errno));
144 					return;
145 				}
146 				/*
147 				 * If here: errno == EINVAL && in != inbuf
148 				 */
149 				/* incomplete input character */
150 				(void)memmove(inbuf, in, inbytes);
151 				ret = fread(inbuf + inbytes, 1,
152 				    INBUFSIZE - inbytes, fi);
153 				if (ret == 0) {
154 					if (feof(fi)) {
155 						(void)fprintf(fo,
156 						    "\n\t[ unexpected end of file; "
157 						    "the last character is "
158 						    "incomplete. ]\n\n");
159 						return;
160 					}
161 					(void)fprintf(fo,
162 					    "\n\t[ fread(): %s ]\n\n",
163 					    strerror(errno));
164 					return;
165 				}
166 				in = inbuf;
167 				inbytes += ret;
168 
169 			}
170 			if (outbytes < OUTBUFSIZE)
171 				(void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, fo);
172 		}
173 	}
174 	/* reset the shift state of the output buffer */
175 	outbytes = OUTBUFSIZE;
176 	out = outbuf;
177 	ret = iconv(cd, NULL, NULL, &out, &outbytes);
178 	if (ret == (size_t)-1) {
179 		(void)fprintf(fo, "\n\t[ iconv(): %s ]\n\n",
180 		    strerror(errno));
181 		return;
182 	}
183 	if (outbytes < OUTBUFSIZE)
184 		(void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, fo);
185 }
186 
187 #endif	/* CHARSET_SUPPORT */
188 
189 
190 
191 /************************************************************************
192  * Core base64 routines
193  *
194  * Defined in sec 6.8 of RFC 2045.
195  */
196 
197 /*
198  * Decode a base64 buffer.
199  *
200  *   bin:  buffer to hold the decoded (binary) result (see note 1).
201  *   b64:  buffer holding the encoded (base64) source.
202  *   cnt:  number of bytes in the b64 buffer to decode (see note 2).
203  *
204  * Return: the number of bytes written to the 'bin' buffer or -1 on
205  *         error.
206  * NOTES:
207  *   1) It is the callers responsibility to ensure that bin is large
208  *      enough to hold the result.
209  *   2) The b64 buffer should always contain a multiple of 4 bytes of
210  *      data!
211  */
212 PUBLIC ssize_t
213 mime_b64tobin(char *bin, const char *b64, size_t cnt)
214 {
215 	static const signed char b64index[] = {
216 		-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
217 		-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
218 		-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
219 		52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
220 		-1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
221 		15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
222 		-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
223 		41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
224 	};
225 	unsigned char *p;
226 	const unsigned char *q, *end;
227 
228 #define EQU	(unsigned)-2
229 #define BAD	(unsigned)-1
230 #define uchar64(c)  ((c) >= sizeof(b64index) ? BAD : (unsigned)b64index[(c)])
231 
232 	p = (unsigned char *)bin;
233 	q = (const unsigned char *)b64;
234 	for (end = q + cnt; q < end; q += 4) {
235 		unsigned a = uchar64(q[0]);
236 		unsigned b = uchar64(q[1]);
237 		unsigned c = uchar64(q[2]);
238 		unsigned d = uchar64(q[3]);
239 
240 		if (a == BAD || a == EQU || b == BAD || b == EQU ||
241 		    c == BAD || d == BAD)
242 			return -1;
243 
244 		*p++ = ((a << 2) | ((b & 0x30) >> 4));
245 		if (c == EQU)	{ /* got '=' */
246 			if (d != EQU)
247 				return -1;
248 			break;
249 		}
250 		*p++ = (((b & 0x0f) << 4) | ((c & 0x3c) >> 2));
251 		if (d == EQU) { /* got '=' */
252 			break;
253 		}
254 		*p++ = (((c & 0x03) << 6) | d);
255 	}
256 
257 #undef uchar64
258 #undef EQU
259 #undef BAD
260 
261 	return p - (unsigned char*)bin;
262 }
263 
264 /*
265  * Encode a buffer as a base64 result.
266  *
267  *   b64:  buffer to hold the encoded (base64) result (see note).
268  *   bin:  buffer holding the binary source.
269  *   cnt:  number of bytes in the bin buffer to encode.
270  *
271  * NOTE: it is the callers responsibility to ensure that 'b64' is
272  *       large enough to hold the result.
273  */
274 PUBLIC void
275 mime_bintob64(char *b64, const char *bin, size_t cnt)
276 {
277 	static const char b64table[] =
278 	    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
279 	const unsigned char *p = (const unsigned char*)bin;
280 	ssize_t i;
281 
282 	for (i = cnt; i > 0; i -= 3) {
283 		unsigned a = p[0];
284 		unsigned b = p[1];
285 		unsigned c = p[2];
286 
287 		b64[0] = b64table[a >> 2];
288 		switch(i) {
289 		case 1:
290 			b64[1] = b64table[((a & 0x3) << 4)];
291 			b64[2] = '=';
292 			b64[3] = '=';
293 			break;
294 		case 2:
295 			b64[1] = b64table[((a & 0x3) << 4) | ((b & 0xf0) >> 4)];
296 			b64[2] = b64table[((b & 0xf) << 2)];
297 			b64[3] = '=';
298 			break;
299 		default:
300 			b64[1] = b64table[((a & 0x3) << 4) | ((b & 0xf0) >> 4)];
301 			b64[2] = b64table[((b & 0xf) << 2) | ((c & 0xc0) >> 6)];
302 			b64[3] = b64table[c & 0x3f];
303 			break;
304 		}
305 		p   += 3;
306 		b64 += 4;
307 	}
308 }
309 
310 
311 #define MIME_BASE64_LINE_MAX	(4 * 19)  /* max line length is 76: see RFC2045 sec 6.8 */
312 
313 static void
314 mime_fB64_encode(FILE *fi, FILE *fo, void *cookie __unused)
315 {
316 	static char b64[MIME_BASE64_LINE_MAX];
317 	static char mem[3 * (MIME_BASE64_LINE_MAX / 4)];
318 	size_t cnt;
319 	char *cp;
320 	size_t limit;
321 #ifdef __lint__
322 	cookie = cookie;
323 #endif
324 	limit = 0;
325 	if ((cp = value(ENAME_MIME_B64_LINE_MAX)) != NULL)
326 		limit = (size_t)atoi(cp);
327 	if (limit == 0 || limit > sizeof(b64))
328 		limit = sizeof(b64);
329 
330 	limit = 3 * roundup(limit, 4) / 4;
331 	if (limit < 3)
332 		limit = 3;
333 
334 	while ((cnt = fread(mem, sizeof(*mem), limit, fi)) > 0) {
335 		mime_bintob64(b64, mem, (size_t)cnt);
336 		(void)fwrite(b64, sizeof(*b64), (size_t)4 * roundup(cnt, 3) / 3, fo);
337 		(void)putc('\n', fo);
338 	}
339 }
340 
341 static void
342 mime_fB64_decode(FILE *fi, FILE *fo, void *add_lf)
343 {
344 	char *line;
345 	size_t len;
346 	char *buf;
347 	size_t buflen;
348 
349 	buflen = 3 * (MIME_BASE64_LINE_MAX / 4);
350 	buf = emalloc(buflen);
351 
352 	while ((line = fgetln(fi, &len)) != NULL) {
353 		ssize_t binlen;
354 		if (line[len-1] == '\n') /* forget the trailing newline */
355 			len--;
356 
357 		/* trash trailing white space */
358 		for (/*EMPTY*/; len > 0 && is_WSP(line[len-1]); len--)
359 			continue;
360 
361 		/* skip leading white space */
362 		for (/*EMPTY*/; len > 0 && is_WSP(line[0]); len--, line++)
363 			continue;
364 
365 		if (len == 0)
366 			break;
367 
368 		if (3 * len > 4 * buflen) {
369 			buflen *= 2;
370 			buf = erealloc(buf, buflen);
371 		}
372 
373 		binlen = mime_b64tobin(buf, line, len);
374 
375 		if (binlen <= 0) {
376 			(void)fprintf(fo, "WARN: invalid base64 encoding\n");
377 			break;
378 		}
379 		(void)fwrite(buf, 1, (size_t)binlen, fo);
380 	}
381 
382 	free(buf);
383 
384 	if (add_lf)
385 		(void)fputc('\n', fo);
386 }
387 
388 
389 /************************************************************************
390  * Core quoted-printable routines.
391  *
392  * Note: the header QP routines are slightly different and burried
393  * inside mime_header.c
394  */
395 
396 static int
397 mustquote(unsigned char *p, unsigned char *end, size_t l)
398 {
399 #define N	0	/* do not quote */
400 #define Q	1	/* must quote */
401 #define SP	2	/* white space */
402 #define XF	3	/* special character 'F' - maybe quoted */
403 #define XD	4	/* special character '.' - maybe quoted */
404 #define EQ	Q	/* '=' must be quoted */
405 #define TB	SP	/* treat '\t' as a space */
406 #define NL	N	/* don't quote '\n' (NL) - XXX - quoting here breaks the line length algorithm */
407 #define CR	Q	/* always quote a '\r' (CR) - it occurs only in a CRLF combo */
408 
409 	static const signed char quotetab[] = {
410   		 Q, Q, Q, Q,  Q, Q, Q, Q,  Q,TB,NL, Q,  Q,CR, Q, Q,
411 		 Q, Q, Q, Q,  Q, Q, Q, Q,  Q, Q, Q, Q,  Q, Q, Q, Q,
412 		SP, N, N, N,  N, N, N, N,  N, N, N, N,  N, N,XD, N,
413 		 N, N, N, N,  N, N, N, N,  N, N, N, N,  N,EQ, N, N,
414 
415 		 N, N, N, N,  N, N,XF, N,  N, N, N, N,  N, N, N, N,
416 		 N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, N,
417 		 N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, N,
418 		 N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, Q,
419 	};
420 	int flag = *p > 0x7f ? Q : quotetab[*p];
421 
422 	if (flag == N)
423 		return 0;
424 	if (flag == Q)
425 		return 1;
426 	if (flag == SP)
427 		return p + 1 < end && p[1] == '\n'; /* trailing white space */
428 
429 	/* The remainder are special start-of-line cases. */
430 	if (l != 0)
431 		return 0;
432 
433 	if (flag == XF)	/* line may start with "From" */
434 		return p + 4 < end && p[1] == 'r' && p[2] == 'o' && p[3] == 'm';
435 
436 	if (flag == XD)	/* line may consist of a single dot */
437 		return p + 1 < end && p[1] == '\n';
438 
439 	errx(EXIT_FAILURE,
440 	    "mustquote: invalid logic: *p=0x%x (%d) flag=%d, l=%zu\n",
441 	    *p, *p, flag, l);
442 	/* NOT REACHED */
443 	return 0;	/* appease GCC */
444 
445 #undef N
446 #undef Q
447 #undef SP
448 #undef XX
449 #undef EQ
450 #undef TB
451 #undef NL
452 #undef CR
453 }
454 
455 
456 #define MIME_QUOTED_LINE_MAX	76  /* QP max length: see RFC2045 sec 6.7 */
457 
458 static void
459 fput_quoted_line(FILE *fo, char *line, size_t len, size_t limit)
460 {
461 	size_t l;	/* length of current output line */
462 	unsigned char *beg;
463 	unsigned char *end;
464 	unsigned char *p;
465 
466 	assert(limit <= MIME_QUOTED_LINE_MAX);
467 
468 	beg = (unsigned char*)line;
469 	end = beg + len;
470 	l = 0;
471 	for (p = (unsigned char*)line; p < end; p++) {
472 		if (mustquote(p, end, l)) {
473 			if (l + 4 > limit) {
474 				(void)fputs("=\n", fo);
475 				l = 0;
476 			}
477 			(void)fprintf(fo, "=%02X", *p);
478 			l += 3;
479 		}
480 		else {
481 			if (*p == '\n') {
482 				if (p > beg && p[-1] == '\r')
483 					(void)fputs("=0A=", fo);
484 				l = (size_t)-1;
485 			}
486 			else if (l + 2 > limit) {
487 				(void)fputs("=\n", fo);
488 				l = 0;
489 			}
490 			(void)putc(*p, fo);
491 			l++;
492 		}
493 	}
494 	/*
495 	 * Lines ending in a blank must escape the newline.
496 	 */
497 	if (len && is_WSP(p[-1]))
498 		(void)fputs("=\n", fo);
499 }
500 
501 static void
502 mime_fQP_encode(FILE *fi, FILE *fo, void *cookie __unused)
503 {
504 	char *line;
505 	size_t len;
506 	char *cp;
507 	size_t limit;
508 
509 #ifdef __lint__
510 	cookie = cookie;
511 #endif
512 	limit = 0;
513 	if ((cp = value(ENAME_MIME_QP_LINE_MAX)) != NULL)
514 		limit = (size_t)atoi(cp);
515 	if (limit == 0 || limit > MIME_QUOTED_LINE_MAX)
516 		limit = MIME_QUOTED_LINE_MAX;
517 	if (limit < 4)
518 		limit = 4;
519 
520 	while ((line = fgetln(fi, &len)) != NULL)
521 		fput_quoted_line(fo, line, len, limit);
522 }
523 
524 static void
525 mime_fQP_decode(FILE *fi, FILE *fo, void *cookie __unused)
526 {
527 	char *line;
528 	size_t len;
529 
530 #ifdef __lint__
531 	cookie = cookie;
532 #endif
533 	while ((line = fgetln(fi, &len)) != NULL) {
534 		char *p;
535 		char *end;
536 
537 		end = line + len;
538 		for (p = line; p < end; p++) {
539 			if (*p == '=') {
540 				p++;
541 				while (p < end && is_WSP(*p))
542 					p++;
543 				if (*p != '\n' && p + 1 < end) {
544 					int c;
545 					char buf[3];
546 
547 					buf[0] = *p++;
548 					buf[1] = *p;
549 					buf[2] = '\0';
550 					c = (int)strtol(buf, NULL, 16);
551 					(void)fputc(c, fo);
552 				}
553 			}
554 			else
555 				(void)fputc(*p, fo);
556 		}
557 	}
558 }
559 
560 
561 /************************************************************************
562  * Routines to select the codec by name.
563  */
564 
565 PUBLIC void
566 mime_fio_copy(FILE *fi, FILE *fo, void *cookie __unused)
567 {
568 	int c;
569 
570 #ifdef __lint__
571 	cookie = cookie;
572 #endif
573 	while ((c = getc(fi)) != EOF)
574 		(void)putc(c, fo);
575 
576 	(void)fflush(fo);
577 	if (ferror(fi)) {
578 		warn("read");
579 		rewind(fi);
580 		return;
581 	}
582 	if (ferror(fo)) {
583 		warn("write");
584 		(void)Fclose(fo);
585 		rewind(fi);
586 		return;
587 	}
588 }
589 
590 
591 static const struct transfer_encoding_s {
592 	const char 	*name;
593 	mime_codec_t	enc;
594 	mime_codec_t	dec;
595 } transfer_encoding_tbl[] = {
596 	{ MIME_TRANSFER_7BIT,	mime_fio_copy,	    mime_fio_copy },
597 	{ MIME_TRANSFER_8BIT, 	mime_fio_copy,	    mime_fio_copy },
598 	{ MIME_TRANSFER_BINARY,	mime_fio_copy,	    mime_fio_copy },
599 	{ MIME_TRANSFER_QUOTED, mime_fQP_encode,    mime_fQP_decode },
600 	{ MIME_TRANSFER_BASE64, mime_fB64_encode,   mime_fB64_decode },
601 	{ NULL,			NULL,		    NULL },
602 };
603 
604 
605 PUBLIC mime_codec_t
606 mime_fio_encoder(const char *ename)
607 {
608 	const struct transfer_encoding_s *tep = NULL;
609 
610 	if (ename == NULL)
611 		return NULL;
612 
613 	for (tep = transfer_encoding_tbl; tep->name; tep++)
614 		if (strcasecmp(tep->name, ename) == 0)
615 			break;
616 	return tep->enc;
617 }
618 
619 PUBLIC mime_codec_t
620 mime_fio_decoder(const char *ename)
621 {
622 	const struct transfer_encoding_s *tep = NULL;
623 
624 	if (ename == NULL)
625 		return NULL;
626 
627 	for (tep = transfer_encoding_tbl; tep->name; tep++)
628 		if (strcasecmp(tep->name, ename) == 0)
629 			break;
630 	return tep->dec;
631 }
632 
633 /*
634  * This is for use in complete.c and mime.c to get the list of
635  * encoding names without exposing the transfer_encoding_tbl[].  The
636  * first name is returned if called with a pointer to a NULL pointer.
637  * Subsequent calls with the same cookie give successive names.  A
638  * NULL return indicates the end of the list.
639  */
640 PUBLIC const char *
641 mime_next_encoding_name(const void **cookie)
642 {
643 	const struct transfer_encoding_s *tep;
644 
645 	tep = *cookie;
646 	if (tep == NULL)
647 		tep = transfer_encoding_tbl;
648 
649 	*cookie = tep->name ? &tep[1] : NULL;
650 
651 	return tep->name;
652 }
653 
654 #endif /* MIME_SUPPORT */
655