xref: /netbsd-src/usr.bin/mail/mime_codecs.c (revision b5677b36047b601b9addaaa494a58ceae82c2a6c)
1 /*	$NetBSD: mime_codecs.c,v 1.9 2009/04/10 13:08:25 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 2006 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Anon Ymous.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * This module contains all mime related codecs.  Typically there are
34  * two versions: one operating on buffers and one operating on files.
35  * All exported routines have a "mime_" prefix.  The file oriented
36  * routines have a "mime_f" prefix replacing the "mime_" prefix of the
37  * equivalent buffer based version.
38  *
39  * The file based API should be:
40  *
41  *   mime_f<name>_{encode,decode}(FILE *in, FILE *out, void *cookie)
42  *
43  * XXX - currently this naming convention has not been adheared to.
44  *
45  * where the cookie is a generic way to pass arguments to the routine.
46  * This way these routines can be run by run_function() in mime.c.
47  *
48  * The buffer based API is not as rigid.
49  */
50 
51 #ifdef MIME_SUPPORT
52 
53 #include <sys/cdefs.h>
54 #ifndef __lint__
55 __RCSID("$NetBSD: mime_codecs.c,v 1.9 2009/04/10 13:08:25 christos Exp $");
56 #endif /* not __lint__ */
57 
58 #include <assert.h>
59 #include <iconv.h>
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include <util.h>
63 
64 #include "def.h"
65 #include "extern.h"
66 #include "mime_codecs.h"
67 
68 
69 #ifdef CHARSET_SUPPORT
70 /************************************************************************
71  * Core character set conversion routines.
72  *
73  */
74 
75 /*
76  * Fault-tolerant iconv() function.
77  *
78  * This routine was borrowed from nail-11.25/mime.c and modified.  It
79  * tries to handle errno == EILSEQ by restarting at the next input
80  * byte (is this a good idea?).  All other errors are handled by the
81  * caller.
82  */
83 PUBLIC size_t
84 mime_iconv(iconv_t cd, const char **inb, size_t *inbleft, char **outb, size_t *outbleft)
85 {
86 	size_t sz = 0;
87 
88 	while ((sz = iconv(cd, inb, inbleft, outb, outbleft)) == (size_t)-1
89 			&& errno == EILSEQ) {
90 		if (*outbleft > 0) {
91 			*(*outb)++ = '?';
92 			(*outbleft)--;
93 		} else {
94 			**outb = '\0';
95 			return E2BIG;
96 		}
97 		if (*inbleft > 0) {
98 			(*inb)++;
99 			(*inbleft)--;
100 		} else {
101 			**outb = '\0';
102 			break;
103 		}
104 	}
105 	return sz;
106 }
107 
108 /*
109  * This routine was mostly borrowed from src/usr.bin/iconv/iconv.c.
110  * We don't care about the invalid character count, so don't bother
111  * with __iconv().  We do care about robustness, so call iconv_ft()
112  * above to try to recover from errors.
113  */
114 #define INBUFSIZE 1024
115 #define OUTBUFSIZE (INBUFSIZE * 2)
116 
117 PUBLIC void
118 mime_ficonv(FILE *fi, FILE *fo, void *cookie)
119 {
120 	char inbuf[INBUFSIZE], outbuf[OUTBUFSIZE], *out;
121 	const char *in;
122 	size_t inbytes, outbytes, ret;
123 	iconv_t cd;
124 
125 	/*
126 	 * NOTE: iconv_t is actually a pointer typedef, so this
127 	 * conversion is not what it appears to be!
128 	 */
129 	cd = (iconv_t)cookie;
130 
131 	while ((inbytes = fread(inbuf, 1, INBUFSIZE, fi)) > 0) {
132 		in = inbuf;
133 		while (inbytes > 0) {
134 			out = outbuf;
135 			outbytes = OUTBUFSIZE;
136 			ret = mime_iconv(cd, &in, &inbytes, &out, &outbytes);
137 			if (ret == (size_t)-1 && errno != E2BIG) {
138 				if (errno != EINVAL || in == inbuf) {
139 					/* XXX - what is proper here?
140 					 * Just copy out the remains? */
141 					(void)fprintf(fo,
142 					    "\n\t[ iconv truncated message: %s ]\n\n",
143 					    strerror(errno));
144 					return;
145 				}
146 				/*
147 				 * If here: errno == EINVAL && in != inbuf
148 				 */
149 				/* incomplete input character */
150 				(void)memmove(inbuf, in, inbytes);
151 				ret = fread(inbuf + inbytes, 1,
152 				    INBUFSIZE - inbytes, fi);
153 				if (ret == 0) {
154 					if (feof(fi)) {
155 						(void)fprintf(fo,
156 						    "\n\t[ unexpected end of file; "
157 						    "the last character is "
158 						    "incomplete. ]\n\n");
159 						return;
160 					}
161 					(void)fprintf(fo,
162 					    "\n\t[ fread(): %s ]\n\n",
163 					    strerror(errno));
164 					return;
165 				}
166 				in = inbuf;
167 				inbytes += ret;
168 
169 			}
170 			if (outbytes < OUTBUFSIZE)
171 				(void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, fo);
172 		}
173 	}
174 	/* reset the shift state of the output buffer */
175 	outbytes = OUTBUFSIZE;
176 	out = outbuf;
177 	ret = iconv(cd, NULL, NULL, &out, &outbytes);
178 	if (ret == (size_t)-1) {
179 		(void)fprintf(fo, "\n\t[ iconv(): %s ]\n\n",
180 		    strerror(errno));
181 		return;
182 	}
183 	if (outbytes < OUTBUFSIZE)
184 		(void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, fo);
185 }
186 
187 #endif	/* CHARSET_SUPPORT */
188 
189 
190 
191 /************************************************************************
192  * Core base64 routines
193  *
194  * Defined in sec 6.8 of RFC 2045.
195  */
196 
197 /*
198  * Decode a base64 buffer.
199  *
200  *   bin:  buffer to hold the decoded (binary) result (see note 1).
201  *   b64:  buffer holding the encoded (base64) source.
202  *   cnt:  number of bytes in the b64 buffer to decode (see note 2).
203  *
204  * Return: the number of bytes written to the 'bin' buffer or -1 on
205  *         error.
206  * NOTES:
207  *   1) It is the callers responsibility to ensure that bin is large
208  *      enough to hold the result.
209  *   2) The b64 buffer should always contain a multiple of 4 bytes of
210  *      data!
211  */
212 PUBLIC ssize_t
213 mime_b64tobin(char *bin, const char *b64, size_t cnt)
214 {
215 	static const signed char b64index[] = {
216 		-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
217 		-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
218 		-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
219 		52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
220 		-1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
221 		15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
222 		-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
223 		41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
224 	};
225 	unsigned char *p;
226 	const unsigned char *q, *end;
227 
228 #define EQU	(unsigned)-2
229 #define BAD	(unsigned)-1
230 #define uchar64(c)  ((c) >= sizeof(b64index) ? BAD : (unsigned)b64index[(c)])
231 
232 	p = (unsigned char *)bin;
233 	q = (const unsigned char *)b64;
234 	for (end = q + cnt; q < end; q += 4) {
235 		unsigned a = uchar64(q[0]);
236 		unsigned b = uchar64(q[1]);
237 		unsigned c = uchar64(q[2]);
238 		unsigned d = uchar64(q[3]);
239 
240 		*p++ = ((a << 2) | ((b & 0x30) >> 4));
241 		if (c == EQU)	{ /* got '=' */
242 			if (d != EQU)
243 				return -1;
244 			break;
245 		}
246 		*p++ = (((b & 0x0f) << 4) | ((c & 0x3c) >> 2));
247 		if (d == EQU) { /* got '=' */
248 			break;
249 		}
250 		*p++ = (((c & 0x03) << 6) | d);
251 
252 		if (a == BAD || b == BAD || c == BAD || d == BAD)
253 			return -1;
254 	}
255 
256 #undef uchar64
257 #undef EQU
258 #undef BAD
259 
260 	return p - (unsigned char*)bin;
261 }
262 
263 /*
264  * Encode a buffer as a base64 result.
265  *
266  *   b64:  buffer to hold the encoded (base64) result (see note).
267  *   bin:  buffer holding the binary source.
268  *   cnt:  number of bytes in the bin buffer to encode.
269  *
270  * NOTE: it is the callers responsibility to ensure that 'b64' is
271  *       large enough to hold the result.
272  */
273 PUBLIC void
274 mime_bintob64(char *b64, const char *bin, size_t cnt)
275 {
276 	static const char b64table[] =
277 	    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
278 	const unsigned char *p = (const unsigned char*)bin;
279 	ssize_t i;
280 
281 	for (i = cnt; i > 0; i -= 3) {
282 		unsigned a = p[0];
283 		unsigned b = p[1];
284 		unsigned c = p[2];
285 
286 		b64[0] = b64table[a >> 2];
287 		switch(i) {
288 		case 1:
289 			b64[1] = b64table[((a & 0x3) << 4)];
290 			b64[2] = '=';
291 			b64[3] = '=';
292 			break;
293 		case 2:
294 			b64[1] = b64table[((a & 0x3) << 4) | ((b & 0xf0) >> 4)];
295 			b64[2] = b64table[((b & 0xf) << 2)];
296 			b64[3] = '=';
297 			break;
298 		default:
299 			b64[1] = b64table[((a & 0x3) << 4) | ((b & 0xf0) >> 4)];
300 			b64[2] = b64table[((b & 0xf) << 2) | ((c & 0xc0) >> 6)];
301 			b64[3] = b64table[c & 0x3f];
302 			break;
303 		}
304 		p   += 3;
305 		b64 += 4;
306 	}
307 }
308 
309 
310 #define MIME_BASE64_LINE_MAX	(4 * 19)  /* max line length is 76: see RFC2045 sec 6.8 */
311 
312 static void
313 mime_fB64_encode(FILE *fi, FILE *fo, void *cookie __unused)
314 {
315 	static char b64[MIME_BASE64_LINE_MAX];
316 	static char mem[3 * (MIME_BASE64_LINE_MAX / 4)];
317 	size_t cnt;
318 	char *cp;
319 	size_t limit;
320 #ifdef __lint__
321 	cookie = cookie;
322 #endif
323 	limit = 0;
324 	if ((cp = value(ENAME_MIME_B64_LINE_MAX)) != NULL)
325 		limit = (size_t)atoi(cp);
326 	if (limit == 0 || limit > sizeof(b64))
327 		limit = sizeof(b64);
328 
329 	limit = 3 * roundup(limit, 4) / 4;
330 	if (limit < 3)
331 		limit = 3;
332 
333 	while ((cnt = fread(mem, sizeof(*mem), limit, fi)) > 0) {
334 		mime_bintob64(b64, mem, (size_t)cnt);
335 		(void)fwrite(b64, sizeof(*b64), (size_t)4 * roundup(cnt, 3) / 3, fo);
336 		(void)putc('\n', fo);
337 	}
338 }
339 
340 static void
341 mime_fB64_decode(FILE *fi, FILE *fo, void *add_lf)
342 {
343 	char *line;
344 	size_t len;
345 	char *buf;
346 	size_t buflen;
347 
348 	buflen = 3 * (MIME_BASE64_LINE_MAX / 4);
349 	buf = emalloc(buflen);
350 
351 	while ((line = fgetln(fi, &len)) != NULL) {
352 		ssize_t binlen;
353 		if (line[len-1] == '\n') /* forget the trailing newline */
354 			len--;
355 
356 		/* trash trailing white space */
357 		for (/*EMPTY*/; len > 0 && is_WSP(line[len-1]); len--)
358 			continue;
359 
360 		/* skip leading white space */
361 		for (/*EMPTY*/; len > 0 && is_WSP(line[0]); len--, line++)
362 			continue;
363 
364 		if (len == 0)
365 			break;
366 
367 		if (3 * len > 4 * buflen) {
368 			buflen *= 2;
369 			buf = erealloc(buf, buflen);
370 		}
371 
372 		binlen = mime_b64tobin(buf, line, len);
373 
374 		if (binlen <= 0) {
375 			(void)fprintf(fo, "WARN: invalid base64 encoding\n");
376 			break;
377 		}
378 		(void)fwrite(buf, 1, (size_t)binlen, fo);
379 	}
380 
381 	free(buf);
382 
383 	if (add_lf)
384 		(void)fputc('\n', fo);
385 }
386 
387 
388 /************************************************************************
389  * Core quoted-printable routines.
390  *
391  * Note: the header QP routines are slightly different and burried
392  * inside mime_header.c
393  */
394 
395 static int
396 mustquote(unsigned char *p, unsigned char *end, size_t l)
397 {
398 #define N	0	/* do not quote */
399 #define Q	1	/* must quote */
400 #define SP	2	/* white space */
401 #define XF	3	/* special character 'F' - maybe quoted */
402 #define XD	4	/* special character '.' - maybe quoted */
403 #define EQ	Q	/* '=' must be quoted */
404 #define TB	SP	/* treat '\t' as a space */
405 #define NL	N	/* don't quote '\n' (NL) - XXX - quoting here breaks the line length algorithm */
406 #define CR	Q	/* always quote a '\r' (CR) - it occurs only in a CRLF combo */
407 
408 	static const signed char quotetab[] = {
409   		 Q, Q, Q, Q,  Q, Q, Q, Q,  Q,TB,NL, Q,  Q,CR, Q, Q,
410 		 Q, Q, Q, Q,  Q, Q, Q, Q,  Q, Q, Q, Q,  Q, Q, Q, Q,
411 		SP, N, N, N,  N, N, N, N,  N, N, N, N,  N, N,XD, N,
412 		 N, N, N, N,  N, N, N, N,  N, N, N, N,  N,EQ, N, N,
413 
414 		 N, N, N, N,  N, N,XF, N,  N, N, N, N,  N, N, N, N,
415 		 N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, N,
416 		 N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, N,
417 		 N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, Q,
418 	};
419 	int flag = *p > 0x7f ? Q : quotetab[*p];
420 
421 	if (flag == N)
422 		return 0;
423 	if (flag == Q)
424 		return 1;
425 	if (flag == SP)
426 		return p + 1 < end && p[1] == '\n'; /* trailing white space */
427 
428 	/* The remainder are special start-of-line cases. */
429 	if (l != 0)
430 		return 0;
431 
432 	if (flag == XF)	/* line may start with "From" */
433 		return p + 4 < end && p[1] == 'r' && p[2] == 'o' && p[3] == 'm';
434 
435 	if (flag == XD)	/* line may consist of a single dot */
436 		return p + 1 < end && p[1] == '\n';
437 
438 	errx(EXIT_FAILURE,
439 	    "mustquote: invalid logic: *p=0x%x (%d) flag=%d, l=%zu\n",
440 	    *p, *p, flag, l);
441 	/* NOT REACHED */
442 	return 0;	/* appease GCC */
443 
444 #undef N
445 #undef Q
446 #undef SP
447 #undef XX
448 #undef EQ
449 #undef TB
450 #undef NL
451 #undef CR
452 }
453 
454 
455 #define MIME_QUOTED_LINE_MAX	76  /* QP max length: see RFC2045 sec 6.7 */
456 
457 static void
458 fput_quoted_line(FILE *fo, char *line, size_t len, size_t limit)
459 {
460 	size_t l;	/* length of current output line */
461 	unsigned char *beg;
462 	unsigned char *end;
463 	unsigned char *p;
464 
465 	assert(limit <= MIME_QUOTED_LINE_MAX);
466 
467 	beg = (unsigned char*)line;
468 	end = beg + len;
469 	l = 0;
470 	for (p = (unsigned char*)line; p < end; p++) {
471 		if (mustquote(p, end, l)) {
472 			if (l + 4 > limit) {
473 				(void)fputs("=\n", fo);
474 				l = 0;
475 			}
476 			(void)fprintf(fo, "=%02X", *p);
477 			l += 3;
478 		}
479 		else {
480 			if (*p == '\n') {
481 				if (p > beg && p[-1] == '\r')
482 					(void)fputs("=0A=", fo);
483 				l = (size_t)-1;
484 			}
485 			else if (l + 2 > limit) {
486 				(void)fputs("=\n", fo);
487 				l = 0;
488 			}
489 			(void)putc(*p, fo);
490 			l++;
491 		}
492 	}
493 	/*
494 	 * Lines ending in a blank must escape the newline.
495 	 */
496 	if (len && is_WSP(p[-1]))
497 		(void)fputs("=\n", fo);
498 }
499 
500 static void
501 mime_fQP_encode(FILE *fi, FILE *fo, void *cookie __unused)
502 {
503 	char *line;
504 	size_t len;
505 	char *cp;
506 	size_t limit;
507 
508 #ifdef __lint__
509 	cookie = cookie;
510 #endif
511 	limit = 0;
512 	if ((cp = value(ENAME_MIME_QP_LINE_MAX)) != NULL)
513 		limit = (size_t)atoi(cp);
514 	if (limit == 0 || limit > MIME_QUOTED_LINE_MAX)
515 		limit = MIME_QUOTED_LINE_MAX;
516 	if (limit < 4)
517 		limit = 4;
518 
519 	while ((line = fgetln(fi, &len)) != NULL)
520 		fput_quoted_line(fo, line, len, limit);
521 }
522 
523 static void
524 mime_fQP_decode(FILE *fi, FILE *fo, void *cookie __unused)
525 {
526 	char *line;
527 	size_t len;
528 
529 #ifdef __lint__
530 	cookie = cookie;
531 #endif
532 	while ((line = fgetln(fi, &len)) != NULL) {
533 		char *p;
534 		char *end;
535 
536 		end = line + len;
537 		for (p = line; p < end; p++) {
538 			if (*p == '=') {
539 				p++;
540 				while (p < end && is_WSP(*p))
541 					p++;
542 				if (*p != '\n' && p + 1 < end) {
543 					int c;
544 					char buf[3];
545 
546 					buf[0] = *p++;
547 					buf[1] = *p;
548 					buf[2] = '\0';
549 					c = (int)strtol(buf, NULL, 16);
550 					(void)fputc(c, fo);
551 				}
552 			}
553 			else
554 				(void)fputc(*p, fo);
555 		}
556 	}
557 }
558 
559 
560 /************************************************************************
561  * Routines to select the codec by name.
562  */
563 
564 PUBLIC void
565 mime_fio_copy(FILE *fi, FILE *fo, void *cookie __unused)
566 {
567 	int c;
568 
569 #ifdef __lint__
570 	cookie = cookie;
571 #endif
572 	while ((c = getc(fi)) != EOF)
573 		(void)putc(c, fo);
574 
575 	(void)fflush(fo);
576 	if (ferror(fi)) {
577 		warn("read");
578 		rewind(fi);
579 		return;
580 	}
581 	if (ferror(fo)) {
582 		warn("write");
583 		(void)Fclose(fo);
584 		rewind(fi);
585 		return;
586 	}
587 }
588 
589 
590 static const struct transfer_encoding_s {
591 	const char 	*name;
592 	mime_codec_t	enc;
593 	mime_codec_t	dec;
594 } transfer_encoding_tbl[] = {
595 	{ MIME_TRANSFER_7BIT,	mime_fio_copy,	    mime_fio_copy },
596 	{ MIME_TRANSFER_8BIT, 	mime_fio_copy,	    mime_fio_copy },
597 	{ MIME_TRANSFER_BINARY,	mime_fio_copy,	    mime_fio_copy },
598 	{ MIME_TRANSFER_QUOTED, mime_fQP_encode,    mime_fQP_decode },
599 	{ MIME_TRANSFER_BASE64, mime_fB64_encode,   mime_fB64_decode },
600 	{ NULL,			NULL,		    NULL },
601 };
602 
603 
604 PUBLIC mime_codec_t
605 mime_fio_encoder(const char *ename)
606 {
607 	const struct transfer_encoding_s *tep = NULL;
608 
609 	if (ename == NULL)
610 		return NULL;
611 
612 	for (tep = transfer_encoding_tbl; tep->name; tep++)
613 		if (strcasecmp(tep->name, ename) == 0)
614 			break;
615 	return tep->enc;
616 }
617 
618 PUBLIC mime_codec_t
619 mime_fio_decoder(const char *ename)
620 {
621 	const struct transfer_encoding_s *tep = NULL;
622 
623 	if (ename == NULL)
624 		return NULL;
625 
626 	for (tep = transfer_encoding_tbl; tep->name; tep++)
627 		if (strcasecmp(tep->name, ename) == 0)
628 			break;
629 	return tep->dec;
630 }
631 
632 /*
633  * This is for use in complete.c and mime.c to get the list of
634  * encoding names without exposing the transfer_encoding_tbl[].  The
635  * first name is returned if called with a pointer to a NULL pointer.
636  * Subsequent calls with the same cookie give successive names.  A
637  * NULL return indicates the end of the list.
638  */
639 PUBLIC const char *
640 mime_next_encoding_name(const void **cookie)
641 {
642 	const struct transfer_encoding_s *tep;
643 
644 	tep = *cookie;
645 	if (tep == NULL)
646 		tep = transfer_encoding_tbl;
647 
648 	*cookie = tep->name ? &tep[1] : NULL;
649 
650 	return tep->name;
651 }
652 
653 #endif /* MIME_SUPPORT */
654