xref: /netbsd-src/usr.bin/mail/mime_codecs.c (revision c0179c282a5968435315a82f4128c61372c68fc3)
1 /*	$NetBSD: mime_codecs.c,v 1.4 2006/10/24 19:57:05 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 2006 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Anon Ymous.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *        This product includes software developed by the NetBSD
21  *        Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 /*
40  * This module contains all mime related codecs.  Typically there are
41  * two versions: one operating on buffers and one operating on files.
42  * All exported routines have a "mime_" prefix.  The file oriented
43  * routines have a "mime_f" prefix replacing the "mime_" prefix of the
44  * equivalent buffer based version.
45  *
46  * The file based API should be:
47  *
48  *   mime_f<name>_{encode,decode}(FILE *in, FILE *out, void *cookie)
49  *
50  * XXX - currently this naming convention has not been adheared to.
51  *
52  * where the cookie is a generic way to pass arguments to the routine.
53  * This way these routines can be run by run_function() in mime.c.
54  *
55  * The buffer based API is not as rigid.
56  */
57 
58 #ifdef MIME_SUPPORT
59 
60 #include <sys/cdefs.h>
61 #ifndef __lint__
62 __RCSID("$NetBSD: mime_codecs.c,v 1.4 2006/10/24 19:57:05 christos Exp $");
63 #endif /* not __lint__ */
64 
65 #include <assert.h>
66 #include <iconv.h>
67 #include <stdio.h>
68 #include <stdlib.h>
69 #include <util.h>
70 
71 #include "def.h"
72 #include "extern.h"
73 #include "mime_codecs.h"
74 
75 
76 #ifdef CHARSET_SUPPORT
77 /************************************************************************
78  * Core character set conversion routines.
79  *
80  */
81 
82 /*
83  * Fault-tolerant iconv() function.
84  *
85  * This routine was borrowed from nail-11.25/mime.c and modified.  It
86  * tries to handle errno == EILSEQ by restarting at the next input
87  * byte (is this a good idea?).  All other errors are handled by the
88  * caller.
89  */
90 PUBLIC size_t
91 mime_iconv(iconv_t cd, const char **inb, size_t *inbleft, char **outb, size_t *outbleft)
92 {
93 	size_t sz = 0;
94 
95 	while ((sz = iconv(cd, inb, inbleft, outb, outbleft)) == (size_t)-1
96 			&& errno == EILSEQ) {
97 		if (*outbleft > 0) {
98 			*(*outb)++ = '?';
99 			(*outbleft)--;
100 		} else {
101 			**outb = '\0';
102 			return E2BIG;
103 		}
104 		if (*inbleft > 0) {
105 			(*inb)++;
106 			(*inbleft)--;
107 		} else {
108 			**outb = '\0';
109 			break;
110 		}
111 	}
112 	return sz;
113 }
114 
115 /*
116  * This routine was mostly borrowed from src/usr.bin/iconv/iconv.c.
117  * We don't care about the invalid character count, so don't bother
118  * with __iconv().  We do care about robustness, so call iconv_ft()
119  * above to try to recover from errors.
120  */
121 #define INBUFSIZE 1024
122 #define OUTBUFSIZE (INBUFSIZE * 2)
123 
124 PUBLIC void
125 mime_ficonv(FILE *fi, FILE *fo, void *cookie)
126 {
127 	char inbuf[INBUFSIZE], outbuf[OUTBUFSIZE], *out;
128 	const char *in;
129 	size_t inbytes, outbytes, ret;
130 	iconv_t cd;
131 
132 	/*
133 	 * NOTE: iconv_t is actually a pointer typedef, so this
134 	 * conversion is not what it appears to be!
135 	 */
136 	cd = (iconv_t)cookie;
137 
138 	while ((inbytes = fread(inbuf, 1, INBUFSIZE, fi)) > 0) {
139 		in = inbuf;
140 		while (inbytes > 0) {
141 			out = outbuf;
142 			outbytes = OUTBUFSIZE;
143 			ret = mime_iconv(cd, &in, &inbytes, &out, &outbytes);
144 			if (ret == (size_t)-1 && errno != E2BIG) {
145 				if (errno != EINVAL || in == inbuf) {
146 					/* XXX - what is proper here?
147 					 * Just copy out the remains? */
148 					(void)fprintf(fo,
149 					    "\n\t[ iconv truncated message: %s ]\n\n",
150 					    strerror(errno));
151 					return;
152 				}
153 				/*
154 				 * If here: errno == EINVAL && in != inbuf
155 				 */
156 				/* incomplete input character */
157 				(void)memmove(inbuf, in, inbytes);
158 				ret = fread(inbuf + inbytes, 1,
159 				    INBUFSIZE - inbytes, fi);
160 				if (ret == 0) {
161 					if (feof(fi)) {
162 						(void)fprintf(fo,
163 						    "\n\t[ unexpected end of file; "
164 						    "the last character is "
165 						    "incomplete. ]\n\n");
166 						return;
167 					}
168 					(void)fprintf(fo,
169 					    "\n\t[ fread(): %s ]\n\n",
170 					    strerror(errno));
171 					return;
172 				}
173 				in = inbuf;
174 				inbytes += ret;
175 
176 			}
177 			if (outbytes < OUTBUFSIZE)
178 				(void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, fo);
179 		}
180 	}
181 	/* reset the shift state of the output buffer */
182 	outbytes = OUTBUFSIZE;
183 	out = outbuf;
184 	ret = iconv(cd, NULL, NULL, &out, &outbytes);
185 	if (ret == (size_t)-1) {
186 		(void)fprintf(fo, "\n\t[ iconv(): %s ]\n\n",
187 		    strerror(errno));
188 		return;
189 	}
190 	if (outbytes < OUTBUFSIZE)
191 		(void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, fo);
192 }
193 
194 #endif	/* CHARSET_SUPPORT */
195 
196 
197 
198 /************************************************************************
199  * Core base64 routines
200  *
201  * Defined in sec 6.8 of RFC 2045.
202  */
203 
204 /*
205  * Decode a base64 buffer.
206  *
207  *   bin:  buffer to hold the decoded (binary) result (see note 1).
208  *   b64:  buffer holding the encoded (base64) source.
209  *   cnt:  number of bytes in the b64 buffer to decode (see note 2).
210  *
211  * Return: the number of bytes written to the 'bin' buffer or -1 on
212  *         error.
213  * NOTES:
214  *   1) It is the callers responsibility to ensure that bin is large
215  *      enough to hold the result.
216  *   2) The b64 buffer should always contain a multiple of 4 bytes of
217  *      data!
218  */
219 PUBLIC ssize_t
220 mime_b64tobin(char *bin, const char *b64, size_t cnt)
221 {
222 	static const signed char b64index[] = {
223 		-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
224 		-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
225 		-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
226 		52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
227 		-1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
228 		15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
229 		-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
230 		41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
231 	};
232 	unsigned char *p;
233 	const unsigned char *q, *end;
234 
235 #define EQU	(unsigned)-2
236 #define BAD	(unsigned)-1
237 #define uchar64(c)  (unsigned)((c) >= sizeof(b64index) ? BAD : b64index[(c)])
238 
239 	p = (unsigned char *)bin;
240 	q = (const unsigned char *)b64;
241 	for (end = q + cnt; q < end; q += 4) {
242 		unsigned a = uchar64(q[0]);
243 		unsigned b = uchar64(q[1]);
244 		unsigned c = uchar64(q[2]);
245 		unsigned d = uchar64(q[3]);
246 
247 		*p++ = ((a << 2) | ((b & 0x30) >> 4));
248 		if (c == EQU)	{ /* got '=' */
249 			if (d != EQU)
250 				return -1;
251 			break;
252 		}
253 		*p++ = (((b & 0x0f) << 4) | ((c & 0x3c) >> 2));
254 		if (d == EQU) { /* got '=' */
255 			break;
256 		}
257 		*p++ = (((c & 0x03) << 6) | d);
258 
259 		if (a == BAD || b == BAD || c == BAD || d == BAD)
260 			return -1;
261 	}
262 
263 #undef uchar64
264 #undef EQU
265 #undef BAD
266 
267 	return p - (unsigned char*)bin;
268 }
269 
270 /*
271  * Encode a buffer as a base64 result.
272  *
273  *   b64:  buffer to hold the encoded (base64) result (see note).
274  *   bin:  buffer holding the binary source.
275  *   cnt:  number of bytes in the bin buffer to encode.
276  *
277  * NOTE: it is the callers responsibility to ensure that 'b64' is
278  *       large enough to hold the result.
279  */
280 PUBLIC void
281 mime_bintob64(char *b64, const char *bin, size_t cnt)
282 {
283 	static const char b64table[] =
284 	    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
285 	const unsigned char *p = (const unsigned char*)bin;
286 	int i;
287 
288 	for (i = cnt; i > 0; i -= 3) {
289 		unsigned a = p[0];
290 		unsigned b = p[1];
291 		unsigned c = p[2];
292 
293 		b64[0] = b64table[a >> 2];
294 		switch(i) {
295 		case 1:
296 			b64[1] = b64table[((a & 0x3) << 4)];
297 			b64[2] = '=';
298 			b64[3] = '=';
299 			break;
300 		case 2:
301 			b64[1] = b64table[((a & 0x3) << 4) | ((b & 0xf0) >> 4)];
302 			b64[2] = b64table[((b & 0xf) << 2)];
303 			b64[3] = '=';
304 			break;
305 		default:
306 			b64[1] = b64table[((a & 0x3) << 4) | ((b & 0xf0) >> 4)];
307 			b64[2] = b64table[((b & 0xf) << 2) | ((c & 0xc0) >> 6)];
308 			b64[3] = b64table[c & 0x3f];
309 			break;
310 		}
311 		p   += 3;
312 		b64 += 4;
313 	}
314 }
315 
316 
317 #define MIME_BASE64_LINE_MAX	(4 * 19)  /* max line length is 76: see RFC2045 sec 6.8 */
318 
319 static void
320 mime_fB64_encode(FILE *fi, FILE *fo, void *cookie __unused)
321 {
322 	static char b64[MIME_BASE64_LINE_MAX];
323 	static char mem[3 * (MIME_BASE64_LINE_MAX / 4)];
324 	int cnt;
325 	char *cp;
326 	size_t limit;
327 #ifdef __lint__
328 	cookie = cookie;
329 #endif
330 	limit = 0;
331 	if ((cp = value(ENAME_MIME_B64_LINE_MAX)) != NULL)
332 		limit = (size_t)atoi(cp);
333 	if (limit == 0 || limit > sizeof(b64))
334 		limit = sizeof(b64);
335 
336 	limit = 3 * roundup(limit, 4) / 4;
337 	if (limit < 3)
338 		limit = 3;
339 
340 	while ((cnt = fread(mem, sizeof(*mem), limit, fi)) > 0) {
341 		mime_bintob64(b64, mem, (size_t)cnt);
342 		(void)fwrite(b64, sizeof(*b64), (size_t)4 * roundup(cnt, 3) / 3, fo);
343 		(void)putc('\n', fo);
344 	}
345 }
346 
347 static void
348 mime_fB64_decode(FILE *fi, FILE *fo, void *add_lf)
349 {
350 	char *line;
351 	size_t len;
352 	char *buf;
353 	size_t buflen;
354 
355 	buflen = 3 * (MIME_BASE64_LINE_MAX / 4);
356 	buf = emalloc(buflen);
357 
358 	while ((line = fgetln(fi, &len)) != NULL) {
359 		ssize_t binlen;
360 		if (line[len-1] == '\n') /* forget the trailing newline */
361 			len--;
362 
363 		/* trash trailing white space */
364 		for (/* EMPTY */; len > 0 && isblank((unsigned char)line[len-1]); len--)
365 			continue;
366 
367 		/* skip leading white space */
368 		for (/* EMPTY */; len > 0 && isblank((unsigned char)line[0]); len--, line++)
369 			continue;
370 
371 		if (len == 0)
372 			break;
373 
374 		if (3 * len > 4 * buflen) {
375 			buflen *= 2;
376 			buf = erealloc(buf, buflen);
377 		}
378 
379 		binlen = mime_b64tobin(buf, line, len);
380 
381 		if (binlen <= 0) {
382 			(void)fprintf(fo, "WARN: invalid base64 encoding\n");
383 			break;
384 		}
385 		(void)fwrite(buf, 1, (size_t)binlen, fo);
386 	}
387 
388 	free(buf);
389 
390 	if (add_lf)
391 		(void)fputc('\n', fo);
392 }
393 
394 
395 /************************************************************************
396  * Core quoted-printable routines.
397  *
398  * Note: the header QP routines are slightly different and burried
399  * inside mime_header.c
400  */
401 
402 static int
403 mustquote(unsigned char *p, unsigned char *end, size_t l)
404 {
405 #define N	0	/* do not quote */
406 #define Q	1	/* must quote */
407 #define SP	2	/* white space */
408 #define XF	3	/* special character 'F' - maybe quoted */
409 #define XD	4	/* special character '.' - maybe quoted */
410 #define EQ	Q	/* '=' must be quoted */
411 #define TB	SP	/* treat '\t' as a space */
412 #define NL	N	/* don't quote '\n' (NL) - XXX - quoting here breaks the line length algorithm */
413 #define CR	Q	/* always quote a '\r' (CR) - it occurs only in a CRLF combo */
414 
415 	static const signed char quotetab[] = {
416   		 Q, Q, Q, Q,  Q, Q, Q, Q,  Q,TB,NL, Q,  Q,CR, Q, Q,
417 		 Q, Q, Q, Q,  Q, Q, Q, Q,  Q, Q, Q, Q,  Q, Q, Q, Q,
418 		SP, N, N, N,  N, N, N, N,  N, N, N, N,  N, N,XD, N,
419 		 N, N, N, N,  N, N, N, N,  N, N, N, N,  N,EQ, N, N,
420 
421 		 N, N, N, N,  N, N,XF, N,  N, N, N, N,  N, N, N, N,
422 		 N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, N,
423 		 N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, N,
424 		 N, N, N, N,  N, N, N, N,  N, N, N, N,  N, N, N, Q,
425 	};
426 	int flag = *p > 0x7f ? Q : quotetab[*p];
427 
428 	if (flag == N)
429 		return 0;
430 	if (flag == Q)
431 		return 1;
432 	if (flag == SP)
433 		return (p + 1 < end && p[1] == '\n');	/* trailing white space */
434 
435 	/* The remainder are special start-of-line cases. */
436 	if (l != 0)
437 		return 0;
438 
439 	if (flag == XF)	/* line may start with "From" */
440 		return (p + 4 < end && p[1] == 'r' && p[2] == 'o' && p[3] == 'm');
441 
442 	if (flag == XD)	/* line may consist of a single dot */
443 		return (p + 1 < end && p[1] == '\n');
444 
445 	errx(EXIT_FAILURE, "mustquote: invalid logic: *p=0x%x (%d) flag=%d, l=%zu\n",
446 	    *p, *p, flag, l);
447 	/* NOT REACHED */
448 	return 0;	/* appease GCC */
449 
450 #undef N
451 #undef Q
452 #undef SP
453 #undef XX
454 #undef EQ
455 #undef TB
456 #undef NL
457 #undef CR
458 }
459 
460 
461 #define MIME_QUOTED_LINE_MAX	76  /* QP max length: see RFC2045 sec 6.7 */
462 
463 static void
464 fput_quoted_line(FILE *fo, char *line, size_t len, size_t limit)
465 {
466 	size_t l;	/* length of current output line */
467 	unsigned char *beg;
468 	unsigned char *end;
469 	unsigned char *p;
470 
471 	assert(limit <= MIME_QUOTED_LINE_MAX);
472 
473 	beg = (unsigned char*)line;
474 	end = beg + len;
475 	l = 0;
476 	for (p = (unsigned char*)line; p < end; p++) {
477 		if (mustquote(p, end, l)) {
478 			if (l + 4 > limit) {
479 				(void)fputs("=\n", fo);
480 				l = 0;
481 			}
482 			(void)fprintf(fo, "=%02X", *p);
483 			l += 3;
484 		}
485 		else {
486 			if (*p == '\n') {
487 				if (p > beg && p[-1] == '\r')
488 					(void)fputs("=0A=", fo);
489 				l = (size_t)-1;
490 			}
491 			else if (l + 2 > limit) {
492 				(void)fputs("=\n", fo);
493 				l = 0;
494 			}
495 			(void)putc(*p, fo);
496 			l++;
497 		}
498 	}
499 	/*
500 	 * Lines ending in a blank must escape the newline.
501 	 */
502 	if (len && isblank((unsigned char)p[-1]))
503 		(void)fputs("=\n", fo);
504 }
505 
506 static void
507 mime_fQP_encode(FILE *fi, FILE *fo, void *cookie __unused)
508 {
509 	char *line;
510 	size_t len;
511 	char *cp;
512 	size_t limit;
513 
514 #ifdef __lint__
515 	cookie = cookie;
516 #endif
517 	limit = 0;
518 	if ((cp = value(ENAME_MIME_QP_LINE_MAX)) != NULL)
519 		limit = (size_t)atoi(cp);
520 	if (limit == 0 || limit > MIME_QUOTED_LINE_MAX)
521 		limit = MIME_QUOTED_LINE_MAX;
522 	if (limit < 4)
523 		limit = 4;
524 
525 	while ((line = fgetln(fi, &len)) != NULL)
526 		fput_quoted_line(fo, line, len, limit);
527 }
528 
529 static void
530 mime_fQP_decode(FILE *fi, FILE *fo, void *cookie __unused)
531 {
532 	char *line;
533 	size_t len;
534 
535 #ifdef __lint__
536 	cookie = cookie;
537 #endif
538 	while ((line = fgetln(fi, &len)) != NULL) {
539 		int c;
540 		char *p;
541 		char *end;
542 		end = line + len;
543 		for (p = line; p < end; p++) {
544 			if (*p == '=') {
545 				p++;
546 				while (p < end && isblank((unsigned char)*p))
547 					p++;
548 				if (*p != '\n' && p + 1 < end) {
549 					char buf[3];
550 					buf[0] = *p++;
551 					buf[1] = *p;
552 					buf[2] = '\0';
553 					c = strtol(buf, NULL, 16);
554 					(void)fputc(c, fo);
555 				}
556 			}
557 			else
558 				(void)fputc(*p, fo);
559 		}
560 	}
561 }
562 
563 
564 /************************************************************************
565  * Routines to select the codec by name.
566  */
567 
568 PUBLIC void
569 mime_fio_copy(FILE *fi, FILE *fo, void *cookie __unused)
570 {
571 	int c;
572 
573 #ifdef __lint__
574 	cookie = cookie;
575 #endif
576 	while ((c = getc(fi)) != EOF)
577 		(void)putc(c, fo);
578 
579 	(void)fflush(fo);
580 	if (ferror(fi)) {
581 		warn("read");
582 		rewind(fi);
583 		return;
584 	}
585 	if (ferror(fo)) {
586 		warn("write");
587 		(void)Fclose(fo);
588 		rewind(fi);
589 		return;
590 	}
591 }
592 
593 
594 static const struct transfer_encoding_s {
595 	const char 	*name;
596 	mime_codec_t	enc;
597 	mime_codec_t	dec;
598 } transfer_encoding_tbl[] = {
599 	{ MIME_TRANSFER_7BIT,	mime_fio_copy,	    mime_fio_copy },
600 	{ MIME_TRANSFER_8BIT, 	mime_fio_copy,	    mime_fio_copy },
601 	{ MIME_TRANSFER_BINARY,	mime_fio_copy,	    mime_fio_copy },
602 	{ MIME_TRANSFER_QUOTED, mime_fQP_encode,    mime_fQP_decode },
603 	{ MIME_TRANSFER_BASE64, mime_fB64_encode,   mime_fB64_decode },
604 	{ NULL,			NULL,		    NULL },
605 };
606 
607 
608 PUBLIC mime_codec_t
609 mime_fio_encoder(const char *ename)
610 {
611 	const struct transfer_encoding_s *tep = NULL;
612 
613 	if (ename == NULL)
614 		return NULL;
615 
616 	for (tep = transfer_encoding_tbl; tep->name; tep++)
617 		if (strcasecmp(tep->name, ename) == 0)
618 			break;
619 	return tep->enc;
620 }
621 
622 PUBLIC mime_codec_t
623 mime_fio_decoder(const char *ename)
624 {
625 	const struct transfer_encoding_s *tep = NULL;
626 
627 	if (ename == NULL)
628 		return NULL;
629 
630 	for (tep = transfer_encoding_tbl; tep->name; tep++)
631 		if (strcasecmp(tep->name, ename) == 0)
632 			break;
633 	return tep->dec;
634 }
635 
636 /*
637  * This is for use in complete.c and mime.c to get the list of
638  * encoding names without exposing the transfer_encoding_tbl[].  The
639  * first name is returned if called with a pointer to a NULL pointer.
640  * Subsequent calls with the same cookie give successive names.  A
641  * NULL return indicates the end of the list.
642  */
643 PUBLIC const char *
644 mime_next_encoding_name(const void **cookie)
645 {
646 	const struct transfer_encoding_s *tep;
647 
648 	tep = *cookie;
649 	if (tep == NULL)
650 		tep = transfer_encoding_tbl;
651 
652 	*cookie = tep->name ? &tep[1] : NULL;
653 
654 	return tep->name;
655 }
656 
657 
658 #endif /* MIME_SUPPORT */
659