xref: /netbsd-src/usr.bin/mail/mime_header.c (revision ba2b5111fb00c84327d206476c903e46045b42f9)
1*ba2b5111Schristos /*	$NetBSD: mime_header.c,v 1.9 2013/02/14 18:23:45 christos Exp $	*/
28207b28aSchristos 
38207b28aSchristos /*-
48207b28aSchristos  * Copyright (c) 2006 The NetBSD Foundation, Inc.
58207b28aSchristos  * All rights reserved.
68207b28aSchristos  *
78207b28aSchristos  * This code is derived from software contributed to The NetBSD Foundation
88207b28aSchristos  * by Anon Ymous.
98207b28aSchristos  *
108207b28aSchristos  * Redistribution and use in source and binary forms, with or without
118207b28aSchristos  * modification, are permitted provided that the following conditions
128207b28aSchristos  * are met:
138207b28aSchristos  * 1. Redistributions of source code must retain the above copyright
148207b28aSchristos  *    notice, this list of conditions and the following disclaimer.
158207b28aSchristos  * 2. Redistributions in binary form must reproduce the above copyright
168207b28aSchristos  *    notice, this list of conditions and the following disclaimer in the
178207b28aSchristos  *    documentation and/or other materials provided with the distribution.
188207b28aSchristos  *
198207b28aSchristos  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
208207b28aSchristos  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
218207b28aSchristos  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
228207b28aSchristos  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
238207b28aSchristos  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
248207b28aSchristos  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
258207b28aSchristos  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
268207b28aSchristos  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
278207b28aSchristos  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
288207b28aSchristos  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
298207b28aSchristos  * POSSIBILITY OF SUCH DAMAGE.
308207b28aSchristos  */
318207b28aSchristos 
328207b28aSchristos 
338207b28aSchristos /*
348207b28aSchristos  * This module contains the core MIME header decoding routines.
358207b28aSchristos  * Please refer to RFC 2047 and RFC 2822.
368207b28aSchristos  */
378207b28aSchristos 
388207b28aSchristos #ifdef MIME_SUPPORT
398207b28aSchristos 
408207b28aSchristos #include <sys/cdefs.h>
418207b28aSchristos #ifndef __lint__
42*ba2b5111Schristos __RCSID("$NetBSD: mime_header.c,v 1.9 2013/02/14 18:23:45 christos Exp $");
438207b28aSchristos #endif /* not __lint__ */
448207b28aSchristos 
45ca13337dSchristos #include <assert.h>
468207b28aSchristos #include <stdio.h>
478207b28aSchristos #include <stdlib.h>
488207b28aSchristos #include <string.h>
498207b28aSchristos 
508207b28aSchristos #include "def.h"
518207b28aSchristos #include "extern.h"
528207b28aSchristos #include "mime.h"
538207b28aSchristos #include "mime_header.h"
548207b28aSchristos #include "mime_codecs.h"
558207b28aSchristos 
568207b28aSchristos static const char *
grab_charset(char * from_cs,size_t from_cs_len,const char * p)578207b28aSchristos grab_charset(char *from_cs, size_t from_cs_len, const char *p)
588207b28aSchristos {
598207b28aSchristos 	char *q;
608207b28aSchristos 	q = from_cs;
618207b28aSchristos 	for (/*EMPTY*/; *p != '?'; p++) {
628207b28aSchristos 		if (*p == '\0' || q >= from_cs + from_cs_len - 1)
638207b28aSchristos 			return NULL;
648207b28aSchristos 		*q++ = *p;
658207b28aSchristos 	}
668207b28aSchristos 	*q = '\0';
678207b28aSchristos 	return ++p;	/* if here, then we got the '?' */
688207b28aSchristos }
698207b28aSchristos 
708207b28aSchristos /*
718207b28aSchristos  * An encoded word is a string of at most 75 non-white space
728207b28aSchristos  * characters of the following form:
738207b28aSchristos  *
748207b28aSchristos  *  =?charset?X?encoding?=
758207b28aSchristos  *
768207b28aSchristos  * where:
778207b28aSchristos  *   'charset'	is the original character set of the unencoded string.
788207b28aSchristos  *
798207b28aSchristos  *   'X'	is the encoding type 'B' or 'Q' for "base64" or
808207b28aSchristos  *              "quoted-printable", respectively,
818207b28aSchristos  *   'encoding'	is the encoded string.
828207b28aSchristos  *
838207b28aSchristos  * Both 'charset' and 'X' are case independent and 'encoding' cannot
848207b28aSchristos  * contain any whitespace or '?' characters.  The 'encoding' must also
858207b28aSchristos  * be fully contained within the encoded words, i.e., it cannot be
868207b28aSchristos  * split between encoded words.
878207b28aSchristos  *
888207b28aSchristos  * Note: the 'B' encoding is a slightly modified "quoted-printable"
898207b28aSchristos  * encoding.  In particular, spaces (' ') may be encoded as '_' to
908207b28aSchristos  * improve undecoded readability.
918207b28aSchristos  */
928207b28aSchristos static int
decode_word(const char ** ibuf,char ** obuf,char * oend,const char * to_cs)938207b28aSchristos decode_word(const char **ibuf, char **obuf, char *oend, const char *to_cs)
948207b28aSchristos {
958207b28aSchristos 	ssize_t declen;
968207b28aSchristos 	size_t enclen, dstlen;
978207b28aSchristos 	char decword[LINESIZE];
988207b28aSchristos 	char from_cs[LINESIZE];
998207b28aSchristos 	const char *encword, *iend, *p;
1008207b28aSchristos 	char *dstend;
1018207b28aSchristos 	char enctype;
1028207b28aSchristos 
1038207b28aSchristos 	p = *ibuf;
1048207b28aSchristos 	if (p[0] != '=' && p[1] != '?')
1058207b28aSchristos 		return -1;
1068207b28aSchristos 	if (strlen(p) <  2 + 1 + 3 + 1 + 2)
1078207b28aSchristos 		return -1;
1088207b28aSchristos 	p = grab_charset(from_cs, sizeof(from_cs), p + 2);
1098207b28aSchristos 	if (p == NULL)
1108207b28aSchristos 		return -1;
1118207b28aSchristos 	enctype = *p++;
1128207b28aSchristos 	if (*p++ != '?')
1138207b28aSchristos 		return -1;
1148207b28aSchristos 	encword = p;
1158207b28aSchristos 	p = strchr(p, '?');
1168207b28aSchristos 	if (p == NULL || p[1] != '=')
1178207b28aSchristos 		return -1;
1188207b28aSchristos 	enclen = p - encword;	/* length of encoded substring */
1198207b28aSchristos 	iend = p + 2;
1208207b28aSchristos 	/* encoded words are at most 75 characters (RFC 2047, sec 2) */
1218207b28aSchristos 	if (iend > *ibuf + 75)
1228207b28aSchristos 		return -1;
1238207b28aSchristos 
124ca13337dSchristos 	if (oend < *obuf + 1) {
125ca13337dSchristos 		assert(/*CONSTCOND*/ 0);	/* We have a coding error! */
126ca13337dSchristos 		return -1;
127ca13337dSchristos 	}
1288207b28aSchristos 	dstend = to_cs ? decword : *obuf;
129c172e3b9Slukem 	dstlen = (to_cs ? sizeof(decword) : (size_t)(oend - *obuf)) - 1;
1308207b28aSchristos 
131*ba2b5111Schristos 	declen = mime_rfc2047_decode(enctype, dstend, dstlen, encword, enclen);
1328207b28aSchristos 	if (declen == -1)
1338207b28aSchristos 		return -1;
1348207b28aSchristos 
1358207b28aSchristos 	dstend += declen;
1368207b28aSchristos #ifdef CHARSET_SUPPORT
1378207b28aSchristos 	if (to_cs != NULL) {
1388207b28aSchristos 		iconv_t cd;
1398207b28aSchristos 		const char *src;
1408207b28aSchristos 		size_t srclen;
1418207b28aSchristos 		size_t cnt;
1428207b28aSchristos 
1438207b28aSchristos 		cd = iconv_open(to_cs, from_cs);
1448207b28aSchristos 		if (cd == (iconv_t)-1)
1458207b28aSchristos 			return -1;
1468207b28aSchristos 
1478207b28aSchristos 		src = decword;
1488207b28aSchristos 		srclen = declen;
1498207b28aSchristos 		dstend = *obuf;
1508207b28aSchristos 		dstlen = oend - *obuf - 1;
1518207b28aSchristos 		cnt = mime_iconv(cd, &src, &srclen, &dstend, &dstlen);
1528207b28aSchristos 
1538207b28aSchristos 		(void)iconv_close(cd);
1548207b28aSchristos 		if (cnt == (size_t)-1)
1558207b28aSchristos 			return -1;
1568207b28aSchristos 	}
1578207b28aSchristos #endif /* CHARSET_SUPPORT */
1588207b28aSchristos 	*dstend = '\0';
1598207b28aSchristos 	*ibuf = iend;
1608207b28aSchristos 	*obuf = dstend;
1618207b28aSchristos 	return 0;
1628207b28aSchristos }
1638207b28aSchristos 
1648207b28aSchristos 
1658207b28aSchristos /*
1668207b28aSchristos  * Folding White Space.  See RFC 2822.
167d727506fSchristos  *
168d727506fSchristos  * Note: RFC 2822 specifies that '\n' and '\r' only occur as CRLF
169d727506fSchristos  * pairs (i.e., "\r\n") and never separately.  However, by the time
170d727506fSchristos  * mail(1) sees the messages, all CRLF pairs have been converted to
171d727506fSchristos  * '\n' characters.
172d727506fSchristos  *
173d727506fSchristos  * XXX - pull is_FWS() and skip_FWS() up to def.h?
1748207b28aSchristos  */
1758207b28aSchristos static inline int
is_FWS(int c)1768207b28aSchristos is_FWS(int c)
1778207b28aSchristos {
178d727506fSchristos 	return c == ' ' || c == '\t' || c == '\n';
1798207b28aSchristos }
1808207b28aSchristos 
1818207b28aSchristos static inline const char *
skip_FWS(const char * p)1828207b28aSchristos skip_FWS(const char *p)
1838207b28aSchristos {
184d727506fSchristos 	while (is_FWS(*p))
1858207b28aSchristos 		p++;
1868207b28aSchristos 	return p;
1878207b28aSchristos }
1888207b28aSchristos 
1898207b28aSchristos static inline void
copy_skipped_FWS(char ** dst,char * dstend,const char ** src,const char * srcend)1908207b28aSchristos copy_skipped_FWS(char **dst, char *dstend, const char **src, const char *srcend)
1918207b28aSchristos {
1928207b28aSchristos 	const char *p, *pend;
1938207b28aSchristos 	char *q, *qend;
1948207b28aSchristos 
1958207b28aSchristos 	p = *src;
1968207b28aSchristos 	q = *dst;
1978207b28aSchristos 	pend = srcend;
1988207b28aSchristos 	qend = dstend;
1998207b28aSchristos 
2008207b28aSchristos 	if (p) {  /* copy any skipped linear-white-space */
2018207b28aSchristos 		while (p < pend && q < qend)
2028207b28aSchristos 			*q++ = *p++;
2038207b28aSchristos 		*dst = q;
2048207b28aSchristos 		*src = NULL;
2058207b28aSchristos 	}
2068207b28aSchristos }
2078207b28aSchristos 
2088207b28aSchristos /*
2098207b28aSchristos  * Decode an unstructured field.
2108207b28aSchristos  *
2118207b28aSchristos  * See RFC 2822 Sec 2.2.1 and 3.6.5.
2128207b28aSchristos  * Encoded words may occur anywhere in unstructured fields provided
2138207b28aSchristos  * they are separated from any other text or encoded words by at least
2148207b28aSchristos  * one linear-white-space character. (See RFC 2047 sec 5.1.)  If two
2158207b28aSchristos  * encoded words occur sequentially (separated by only FWS) then the
2168207b28aSchristos  * separating FWS is removed.
2178207b28aSchristos  *
2188207b28aSchristos  * NOTE: unstructured fields cannot contain 'quoted-pairs' (see
2198207b28aSchristos  * RFC2822 sec 3.2.6 and RFC 2047), but that is no problem as a '\\'
2208207b28aSchristos  * (or any non-whitespace character) immediately before an
2218207b28aSchristos  * encoded-word will prevent it from being decoded.
2228207b28aSchristos  *
2238207b28aSchristos  * hstring should be a NULL terminated string.
2248207b28aSchristos  * outbuf should be sufficiently large to hold the result.
2258207b28aSchristos  */
2268207b28aSchristos static void
mime_decode_usfield(char * outbuf,size_t outsize,const char * hstring)2278207b28aSchristos mime_decode_usfield(char *outbuf, size_t outsize, const char *hstring)
2288207b28aSchristos {
2298207b28aSchristos 	const char *p, *p0;
2308207b28aSchristos 	char *q, *qend;
2318207b28aSchristos 	int lastc;
2328207b28aSchristos 	const char *charset;
2338207b28aSchristos 
2348207b28aSchristos 	charset = value(ENAME_MIME_CHARSET);
2358207b28aSchristos 	qend = outbuf + outsize - 1; /* Make sure there is room for the trailing NULL! */
2368207b28aSchristos 	q = outbuf;
2378207b28aSchristos 	p = hstring;
2388207b28aSchristos 	p0 = NULL;
2398207b28aSchristos 	lastc = (unsigned char)' ';
2408207b28aSchristos 	while (*p && q < qend) {
2418207b28aSchristos 		const char *p1;
2428207b28aSchristos 		char *q1;
2438207b28aSchristos 		if (is_FWS(lastc) && p[0] == '=' && p[1] == '?' &&
2448207b28aSchristos 		    decode_word((p1 = p, &p1), (q1 = q, &q1), qend, charset) == 0 &&
245d727506fSchristos 		    (*p1 == '\0' || is_FWS(*p1))) {
2468207b28aSchristos 			p0 = p1;  /* pointer to first character after encoded word */
2478207b28aSchristos 			q = q1;
2488207b28aSchristos 			p = skip_FWS(p1);
2498207b28aSchristos 			lastc = (unsigned char)*p0;
2508207b28aSchristos 		}
2518207b28aSchristos 		else {
2528207b28aSchristos 			copy_skipped_FWS(&q, qend, &p0, p);
2538207b28aSchristos 			lastc = (unsigned char)*p;
2548207b28aSchristos 			if (q < qend)
2558207b28aSchristos 				*q++ = *p++;
2568207b28aSchristos 		}
2578207b28aSchristos 	}
2588207b28aSchristos 	copy_skipped_FWS(&q, qend, &p0, p);
2598207b28aSchristos 	*q = '\0';
2608207b28aSchristos }
2618207b28aSchristos 
2628207b28aSchristos /*
2638207b28aSchristos  * Decode a field comment.
2648207b28aSchristos  *
2658207b28aSchristos  * Comments only occur in structured fields, can be nested (rfc 2822,
2668207b28aSchristos  * sec 3.2.3), and can contain 'encoded-words' and 'quoted-pairs'.
2678207b28aSchristos  * Otherwise, they can be regarded as unstructured fields that are
2688207b28aSchristos  * bounded by '(' and ')' characters.
2698207b28aSchristos  */
2708207b28aSchristos static int
decode_comment(char ** obuf,char * oend,const char ** ibuf,const char * iend,const char * charset)2718207b28aSchristos decode_comment(char **obuf, char *oend, const char **ibuf, const char *iend, const char *charset)
2728207b28aSchristos {
2738207b28aSchristos 	const char *p, *pend, *p0;
2748207b28aSchristos 	char *q, *qend;
2758207b28aSchristos 	int lastc;
2768207b28aSchristos 
2778207b28aSchristos 	p = *ibuf;
2788207b28aSchristos 	q = *obuf;
2798207b28aSchristos 	pend = iend;
2808207b28aSchristos 	qend = oend;
281d727506fSchristos 	lastc = ' ';
2828207b28aSchristos 	p0 = NULL;
2838207b28aSchristos 	while (p < pend && q < qend) {
2848207b28aSchristos 		const char *p1;
2858207b28aSchristos 		char *q1;
2868207b28aSchristos 
2878207b28aSchristos 		if (is_FWS(lastc) && p[0] == '=' && p[1] == '?' &&
2888207b28aSchristos 		    decode_word((p1 = p, &p1), (q1 = q, &q1), qend, charset) == 0 &&
289d727506fSchristos 		    (*p1 == ')' || is_FWS(*p1))) {
2908207b28aSchristos 			lastc = (unsigned char)*p1;
2918207b28aSchristos 			p0 = p1;
2928207b28aSchristos 			q = q1;
2938207b28aSchristos 			p = skip_FWS(p1);
2948207b28aSchristos 			/*
2958207b28aSchristos 			 * XXX - this check should be unnecessary as *pend should
2968207b28aSchristos 			 * be '\0' which will stop skip_FWS()
2978207b28aSchristos 			 */
2988207b28aSchristos 			if (p > pend)
2998207b28aSchristos 				p = pend;
3008207b28aSchristos 		}
3018207b28aSchristos 		else {
3028207b28aSchristos 			copy_skipped_FWS(&q, qend, &p0, p);
3038207b28aSchristos 			if (q >= qend)	/* XXX - q > qend cannot happen */
3048207b28aSchristos 				break;
3058207b28aSchristos 
3068207b28aSchristos 			if (*p == ')') {
3078207b28aSchristos 				*q++ = *p++;	/* copy the closing ')' */
3088207b28aSchristos 				break;		/* and get out of here! */
3098207b28aSchristos 			}
3108207b28aSchristos 
3118207b28aSchristos 			if (*p == '(') {
3128207b28aSchristos 				*q++ = *p++;	/* copy the opening '(' */
3138207b28aSchristos 				if (decode_comment(&q, qend, &p, pend, charset) == -1)
3148207b28aSchristos 					return -1;	/* is this right or should we update? */
3158207b28aSchristos 				lastc = ')';
3168207b28aSchristos 			}
3178207b28aSchristos 			else if (*p == '\\' && p + 1 < pend) {	/* quoted-pair */
3188207b28aSchristos 				if (p[1] == '(' || p[1] == ')' || p[1] == '\\') /* need quoted-pair*/
3198207b28aSchristos 					*q++ = *p;
3208207b28aSchristos 				p++;
3218207b28aSchristos 				lastc = (unsigned char)*p;
3228207b28aSchristos 				if (q < qend)
3238207b28aSchristos 					*q++ = *p++;
3248207b28aSchristos 			}
3258207b28aSchristos 			else {
3268207b28aSchristos 				lastc = (unsigned char)*p;
3278207b28aSchristos 				*q++ = *p++;
3288207b28aSchristos 			}
3298207b28aSchristos 		}
3308207b28aSchristos 	}
3318207b28aSchristos 	*ibuf = p;
3328207b28aSchristos 	*obuf = q;
3338207b28aSchristos 	return 0;
3348207b28aSchristos }
3358207b28aSchristos 
3368207b28aSchristos /*
3378207b28aSchristos  * Decode a quoted-string or no-fold-quote.
3388207b28aSchristos  *
3398207b28aSchristos  * These cannot contain encoded words.  They can contain quoted-pairs,
3408207b28aSchristos  * making '\\' special.  They have no other structure.  See RFC 2822
3418207b28aSchristos  * sec 3.2.5 and 3.6.4.
3428207b28aSchristos  */
3438207b28aSchristos static void
decode_quoted_string(char ** obuf,char * oend,const char ** ibuf,const char * iend)3448207b28aSchristos decode_quoted_string(char **obuf, char *oend, const char **ibuf, const char *iend)
3458207b28aSchristos {
3468207b28aSchristos 	const char *p, *pend;
3478207b28aSchristos 	char *q, *qend;
3488207b28aSchristos 
3498207b28aSchristos 	qend = oend;
3508207b28aSchristos 	pend = iend;
3518207b28aSchristos 	p = *ibuf;
3528207b28aSchristos 	q = *obuf;
3538207b28aSchristos 	while (p < pend && q < qend) {
3548207b28aSchristos 		if (*p == '"') {
3558207b28aSchristos 			*q++ = *p++;	/* copy the closing '"' */
3568207b28aSchristos 			break;
3578207b28aSchristos 		}
3588207b28aSchristos 		if (*p == '\\' && p + 1 < pend) { /* quoted-pair */
3598207b28aSchristos 			if (p[1] == '"' || p[1] == '\\') {
3608207b28aSchristos 				*q++ = *p;
3618207b28aSchristos 				if (q >= qend)
3628207b28aSchristos 					break;
3638207b28aSchristos 			}
3648207b28aSchristos 			p++;
3658207b28aSchristos 		}
3668207b28aSchristos 		*q++ = *p++;
3678207b28aSchristos 	}
3688207b28aSchristos 	*ibuf = p;
3698207b28aSchristos 	*obuf = q;
3708207b28aSchristos }
3718207b28aSchristos 
3728207b28aSchristos /*
3738207b28aSchristos  * Decode a domain-literal or no-fold-literal.
3748207b28aSchristos  *
3758207b28aSchristos  * These cannot contain encoded words.  They can have quoted pairs and
3768207b28aSchristos  * are delimited by '[' and ']' making '\\', '[', and ']' special.
3778207b28aSchristos  * They have no other structure.  See RFC 2822 sec 3.4.1 and 3.6.4.
3788207b28aSchristos  */
3798207b28aSchristos static void
decode_domain_literal(char ** obuf,char * oend,const char ** ibuf,const char * iend)3808207b28aSchristos decode_domain_literal(char **obuf, char *oend, const char **ibuf, const char *iend)
3818207b28aSchristos {
3828207b28aSchristos 	const char *p, *pend;
3838207b28aSchristos 	char *q, *qend;
3848207b28aSchristos 
3858207b28aSchristos 	qend = oend;
3868207b28aSchristos 	pend = iend;
3878207b28aSchristos 	p = *ibuf;
3888207b28aSchristos 	q = *obuf;
3898207b28aSchristos 	while (p < pend && q < qend) {
3908207b28aSchristos 		if (*p == ']') {
3918207b28aSchristos 			*q++ = *p++;	/* copy the closing ']' */
3928207b28aSchristos 			break;
3938207b28aSchristos 		}
3948207b28aSchristos 		if (*p == '\\' && p + 1 < pend) { /* quoted-pair */
3958207b28aSchristos 			if (p[1] == '[' || p[1] == ']' || p[1] == '\\') {
3968207b28aSchristos 				*q++ = *p;
3978207b28aSchristos 				if (q >= qend)
3988207b28aSchristos 					break;
3998207b28aSchristos 			}
4008207b28aSchristos 			p++;
4018207b28aSchristos 		}
4028207b28aSchristos 		*q++ = *p++;
4038207b28aSchristos 	}
4048207b28aSchristos 	*ibuf = p;
4058207b28aSchristos 	*obuf = q;
4068207b28aSchristos }
4078207b28aSchristos 
4088207b28aSchristos /*
4098207b28aSchristos  * Specials: see RFC 2822 sec 3.2.1.
4108207b28aSchristos  */
4118207b28aSchristos static inline int
is_specials(int c)4128207b28aSchristos is_specials(int c)
4138207b28aSchristos {
4148207b28aSchristos 	static const char specialtab[] = {
4158207b28aSchristos 		0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
4168207b28aSchristos 		0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
4178207b28aSchristos 		0, 0, 1, 0,  0, 0, 0, 0,  1, 1, 0, 0,  1, 0, 1, 0,
4188207b28aSchristos 		0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 1, 1,  1, 0, 1, 0,
4198207b28aSchristos 
4208207b28aSchristos 		1, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
4218207b28aSchristos 		0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 1,  1, 1, 0, 0,
4228207b28aSchristos 		0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
4238207b28aSchristos 		0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
4248207b28aSchristos 	};
425d727506fSchristos 	return !(c & ~0x7f) ? specialtab[c] : 0;
4268207b28aSchristos }
4278207b28aSchristos 
4288207b28aSchristos /*
4298207b28aSchristos  * Decode a structured field.
4308207b28aSchristos  *
4318207b28aSchristos  * At the top level, structured fields can only contain encoded-words
4328207b28aSchristos  * via 'phrases' and 'comments'.  See RFC 2047 sec 5.
4338207b28aSchristos  */
4348207b28aSchristos static void
mime_decode_sfield(char * linebuf,size_t bufsize,const char * hstring)4358207b28aSchristos mime_decode_sfield(char *linebuf, size_t bufsize, const char *hstring)
4368207b28aSchristos {
4378207b28aSchristos 	const char *p, *pend, *p0;
4388207b28aSchristos 	char *q, *qend;
4398207b28aSchristos 	const char *charset;
4408207b28aSchristos 	int lastc;
4418207b28aSchristos 
4428207b28aSchristos 	charset = value(ENAME_MIME_CHARSET);
4438207b28aSchristos 
4448207b28aSchristos 	p = hstring;
4458207b28aSchristos 	q = linebuf;
4468207b28aSchristos 	pend = hstring + strlen(hstring);
4478207b28aSchristos 	qend = linebuf + bufsize - 1;	/* save room for the NULL terminator */
4488207b28aSchristos 	lastc = (unsigned char)' ';
4498207b28aSchristos 	p0 = NULL;
4508207b28aSchristos 	while (p < pend && q < qend) {
4518207b28aSchristos 		const char *p1;
4528207b28aSchristos 		char *q1;
4538207b28aSchristos 
4548207b28aSchristos 		if (*p != '=') {
4558207b28aSchristos 			copy_skipped_FWS(&q, qend, &p0, p);
4568207b28aSchristos 			if (q >= qend)
4578207b28aSchristos 				break;
4588207b28aSchristos 		}
4598207b28aSchristos 
4608207b28aSchristos 		switch (*p) {
4618207b28aSchristos 		case '(':	/* start of comment */
4628207b28aSchristos 			*q++ = *p++;	/* copy the opening '(' */
4638207b28aSchristos 			(void)decode_comment(&q, qend, &p, pend, charset);
4648207b28aSchristos 			lastc = (unsigned char)p[-1];
4658207b28aSchristos 			break;
4668207b28aSchristos 
4678207b28aSchristos 		case '"':	/* start of quoted-string or no-fold-quote */
4688207b28aSchristos 			*q++ = *p++;	/* copy the opening '"' */
4698207b28aSchristos 			decode_quoted_string(&q, qend, &p, pend);
4708207b28aSchristos 			lastc = (unsigned char)p[-1];
4718207b28aSchristos 			break;
4728207b28aSchristos 
4738207b28aSchristos 		case '[':	/* start of domain-literal or no-fold-literal */
4748207b28aSchristos 			*q++ = *p++;	/* copy the opening '[' */
4758207b28aSchristos 			decode_domain_literal(&q, qend, &p, pend);
4768207b28aSchristos 			lastc = (unsigned char)p[-1];
4778207b28aSchristos 			break;
4788207b28aSchristos 
4798207b28aSchristos 		case '\\':	/* start of quoted-pair */
4808207b28aSchristos 			if (p + 1 < pend) {		/* quoted pair */
4818207b28aSchristos 				if (is_specials(p[1])) {
4828207b28aSchristos 					*q++ = *p;
4838207b28aSchristos 					if (q >= qend)
4848207b28aSchristos 						break;
4858207b28aSchristos 				}
4868207b28aSchristos 				p++;	/* skip the '\\' */
4878207b28aSchristos 			}
4888207b28aSchristos 			goto copy_char;
4898207b28aSchristos 
4908207b28aSchristos 		case '=':
4918207b28aSchristos 			/*
4928207b28aSchristos 			 * At this level encoded words can appear via
4938207b28aSchristos 			 * 'phrases' (possibly delimited by ',' as in
4948207b28aSchristos 			 * 'keywords').  Thus we handle them as such.
4958207b28aSchristos 			 * Hopefully this is sufficient.
4968207b28aSchristos 			 */
4978207b28aSchristos 			if ((lastc == ',' || is_FWS(lastc)) && p[1] == '?' &&
4988207b28aSchristos 			    decode_word((p1 = p, &p1), (q1 = q, &q1), qend, charset) == 0 &&
499d727506fSchristos 			    (*p1 == '\0' || *p1 == ',' || is_FWS(*p1))) {
5008207b28aSchristos 				lastc = (unsigned char)*p1;
5018207b28aSchristos 				p0 = p1;
5028207b28aSchristos 				q = q1;
5038207b28aSchristos 				p = skip_FWS(p1);
5048207b28aSchristos 				/*
5058207b28aSchristos 				 * XXX - this check should be
5068207b28aSchristos 				 * unnecessary as *pend should be '\0'
5078207b28aSchristos 				 * which will stop skip_FWS()
5088207b28aSchristos 				 */
5098207b28aSchristos 				if (p > pend)
5108207b28aSchristos 					p = pend;
5118207b28aSchristos 				break;
5128207b28aSchristos 			}
5138207b28aSchristos 			else {
5148207b28aSchristos 				copy_skipped_FWS(&q, qend, &p0, p);
5158207b28aSchristos 				if (q >= qend)
5168207b28aSchristos 					break;
5178207b28aSchristos 				goto copy_char;
5188207b28aSchristos 			}
5198207b28aSchristos 
5208207b28aSchristos 		case '<':	/* start of angle-addr, msg-id, or path. */
5218207b28aSchristos 			/*
5228207b28aSchristos 			 * A msg-id cannot contain encoded-pairs or
5238207b28aSchristos 			 * encoded-words, but angle-addr and path can.
5248207b28aSchristos 			 * Distinguishing between them seems to be
5258207b28aSchristos 			 * unnecessary, so let's be loose and just
5268207b28aSchristos 			 * decode them as if they were all the same.
5278207b28aSchristos 			 */
5288207b28aSchristos 		default:
5298207b28aSchristos 	copy_char:
5308207b28aSchristos 			lastc = (unsigned char)*p;
5318207b28aSchristos 			*q++ = *p++;
5328207b28aSchristos 			break;
5338207b28aSchristos 		}
5348207b28aSchristos 	}
5358207b28aSchristos 	copy_skipped_FWS(&q, qend, &p0, p);
5368207b28aSchristos 	*q = '\0';	/* null terminate the result! */
5378207b28aSchristos }
5388207b28aSchristos 
5398207b28aSchristos /*
5408207b28aSchristos  * Returns the correct hfield decoder, or NULL if none.
5418207b28aSchristos  * Info extracted from RFC 2822.
542a2fe0ba0Schristos  *
543a2fe0ba0Schristos  * name - pointer to field name of header line (with colon).
5448207b28aSchristos  */
5458207b28aSchristos PUBLIC hfield_decoder_t
mime_hfield_decoder(const char * name)546a2fe0ba0Schristos mime_hfield_decoder(const char *name)
5478207b28aSchristos {
5488207b28aSchristos 	static const struct field_decoder_tbl_s {
5498207b28aSchristos 		const char *field_name;
550a2fe0ba0Schristos 		size_t field_len;
5518207b28aSchristos 		hfield_decoder_t decoder;
5528207b28aSchristos 	} field_decoder_tbl[] = {
553a2fe0ba0Schristos #define X(s)	s, sizeof(s) - 1
554a2fe0ba0Schristos 		{ X("Received:"),			NULL },
555a2fe0ba0Schristos 
556a2fe0ba0Schristos 		{ X("Content-Type:"),			NULL },
557a2fe0ba0Schristos 		{ X("Content-Disposition:"),		NULL },
558a2fe0ba0Schristos 		{ X("Content-Transfer-Encoding:"),	NULL },
559a2fe0ba0Schristos 		{ X("Content-Description:"),		mime_decode_sfield },
560a2fe0ba0Schristos 		{ X("Content-ID:"),			mime_decode_sfield },
561a2fe0ba0Schristos 		{ X("MIME-Version:"),			mime_decode_sfield },
562a2fe0ba0Schristos 
563a2fe0ba0Schristos 		{ X("Bcc:"),				mime_decode_sfield },
564a2fe0ba0Schristos 		{ X("Cc:"),				mime_decode_sfield },
565a2fe0ba0Schristos 		{ X("Date:"),				mime_decode_sfield },
566a2fe0ba0Schristos 		{ X("From:"),				mime_decode_sfield },
567a2fe0ba0Schristos 		{ X("In-Reply-To:"),			mime_decode_sfield },
568a2fe0ba0Schristos 		{ X("Keywords:"),			mime_decode_sfield },
569a2fe0ba0Schristos 		{ X("Message-ID:"),			mime_decode_sfield },
570a2fe0ba0Schristos 		{ X("References:"),			mime_decode_sfield },
571a2fe0ba0Schristos 		{ X("Reply-To:"),			mime_decode_sfield },
572a2fe0ba0Schristos 		{ X("Return-Path:"),			mime_decode_sfield },
573a2fe0ba0Schristos 		{ X("Sender:"),				mime_decode_sfield },
574a2fe0ba0Schristos 		{ X("To:"),				mime_decode_sfield },
575a2fe0ba0Schristos 		{ X("Subject:"),			mime_decode_usfield },
576a2fe0ba0Schristos 		{ X("Comments:"),			mime_decode_usfield },
577a2fe0ba0Schristos 		{ X("X-"),				mime_decode_usfield },
578a2fe0ba0Schristos 		{ NULL, 0,				mime_decode_usfield },	/* optional-fields */
579a2fe0ba0Schristos #undef X
5808207b28aSchristos 	};
5818207b28aSchristos 	const struct field_decoder_tbl_s *fp;
5828207b28aSchristos 
5838207b28aSchristos 	/* XXX - this begs for a hash table! */
5848207b28aSchristos 	for (fp = field_decoder_tbl; fp->field_name; fp++)
585a2fe0ba0Schristos 		if (strncasecmp(name, fp->field_name, fp->field_len) == 0)
586a2fe0ba0Schristos 			break;
5878207b28aSchristos 	return fp->decoder;
5888207b28aSchristos }
5898207b28aSchristos 
5908207b28aSchristos #endif /* MIME_SUPPORT */
591