1*ba2b5111Schristos /* $NetBSD: mime_header.c,v 1.9 2013/02/14 18:23:45 christos Exp $ */
28207b28aSchristos
38207b28aSchristos /*-
48207b28aSchristos * Copyright (c) 2006 The NetBSD Foundation, Inc.
58207b28aSchristos * All rights reserved.
68207b28aSchristos *
78207b28aSchristos * This code is derived from software contributed to The NetBSD Foundation
88207b28aSchristos * by Anon Ymous.
98207b28aSchristos *
108207b28aSchristos * Redistribution and use in source and binary forms, with or without
118207b28aSchristos * modification, are permitted provided that the following conditions
128207b28aSchristos * are met:
138207b28aSchristos * 1. Redistributions of source code must retain the above copyright
148207b28aSchristos * notice, this list of conditions and the following disclaimer.
158207b28aSchristos * 2. Redistributions in binary form must reproduce the above copyright
168207b28aSchristos * notice, this list of conditions and the following disclaimer in the
178207b28aSchristos * documentation and/or other materials provided with the distribution.
188207b28aSchristos *
198207b28aSchristos * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
208207b28aSchristos * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
218207b28aSchristos * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
228207b28aSchristos * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
238207b28aSchristos * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
248207b28aSchristos * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
258207b28aSchristos * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
268207b28aSchristos * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
278207b28aSchristos * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
288207b28aSchristos * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
298207b28aSchristos * POSSIBILITY OF SUCH DAMAGE.
308207b28aSchristos */
318207b28aSchristos
328207b28aSchristos
338207b28aSchristos /*
348207b28aSchristos * This module contains the core MIME header decoding routines.
358207b28aSchristos * Please refer to RFC 2047 and RFC 2822.
368207b28aSchristos */
378207b28aSchristos
388207b28aSchristos #ifdef MIME_SUPPORT
398207b28aSchristos
408207b28aSchristos #include <sys/cdefs.h>
418207b28aSchristos #ifndef __lint__
42*ba2b5111Schristos __RCSID("$NetBSD: mime_header.c,v 1.9 2013/02/14 18:23:45 christos Exp $");
438207b28aSchristos #endif /* not __lint__ */
448207b28aSchristos
45ca13337dSchristos #include <assert.h>
468207b28aSchristos #include <stdio.h>
478207b28aSchristos #include <stdlib.h>
488207b28aSchristos #include <string.h>
498207b28aSchristos
508207b28aSchristos #include "def.h"
518207b28aSchristos #include "extern.h"
528207b28aSchristos #include "mime.h"
538207b28aSchristos #include "mime_header.h"
548207b28aSchristos #include "mime_codecs.h"
558207b28aSchristos
568207b28aSchristos static const char *
grab_charset(char * from_cs,size_t from_cs_len,const char * p)578207b28aSchristos grab_charset(char *from_cs, size_t from_cs_len, const char *p)
588207b28aSchristos {
598207b28aSchristos char *q;
608207b28aSchristos q = from_cs;
618207b28aSchristos for (/*EMPTY*/; *p != '?'; p++) {
628207b28aSchristos if (*p == '\0' || q >= from_cs + from_cs_len - 1)
638207b28aSchristos return NULL;
648207b28aSchristos *q++ = *p;
658207b28aSchristos }
668207b28aSchristos *q = '\0';
678207b28aSchristos return ++p; /* if here, then we got the '?' */
688207b28aSchristos }
698207b28aSchristos
708207b28aSchristos /*
718207b28aSchristos * An encoded word is a string of at most 75 non-white space
728207b28aSchristos * characters of the following form:
738207b28aSchristos *
748207b28aSchristos * =?charset?X?encoding?=
758207b28aSchristos *
768207b28aSchristos * where:
778207b28aSchristos * 'charset' is the original character set of the unencoded string.
788207b28aSchristos *
798207b28aSchristos * 'X' is the encoding type 'B' or 'Q' for "base64" or
808207b28aSchristos * "quoted-printable", respectively,
818207b28aSchristos * 'encoding' is the encoded string.
828207b28aSchristos *
838207b28aSchristos * Both 'charset' and 'X' are case independent and 'encoding' cannot
848207b28aSchristos * contain any whitespace or '?' characters. The 'encoding' must also
858207b28aSchristos * be fully contained within the encoded words, i.e., it cannot be
868207b28aSchristos * split between encoded words.
878207b28aSchristos *
888207b28aSchristos * Note: the 'B' encoding is a slightly modified "quoted-printable"
898207b28aSchristos * encoding. In particular, spaces (' ') may be encoded as '_' to
908207b28aSchristos * improve undecoded readability.
918207b28aSchristos */
928207b28aSchristos static int
decode_word(const char ** ibuf,char ** obuf,char * oend,const char * to_cs)938207b28aSchristos decode_word(const char **ibuf, char **obuf, char *oend, const char *to_cs)
948207b28aSchristos {
958207b28aSchristos ssize_t declen;
968207b28aSchristos size_t enclen, dstlen;
978207b28aSchristos char decword[LINESIZE];
988207b28aSchristos char from_cs[LINESIZE];
998207b28aSchristos const char *encword, *iend, *p;
1008207b28aSchristos char *dstend;
1018207b28aSchristos char enctype;
1028207b28aSchristos
1038207b28aSchristos p = *ibuf;
1048207b28aSchristos if (p[0] != '=' && p[1] != '?')
1058207b28aSchristos return -1;
1068207b28aSchristos if (strlen(p) < 2 + 1 + 3 + 1 + 2)
1078207b28aSchristos return -1;
1088207b28aSchristos p = grab_charset(from_cs, sizeof(from_cs), p + 2);
1098207b28aSchristos if (p == NULL)
1108207b28aSchristos return -1;
1118207b28aSchristos enctype = *p++;
1128207b28aSchristos if (*p++ != '?')
1138207b28aSchristos return -1;
1148207b28aSchristos encword = p;
1158207b28aSchristos p = strchr(p, '?');
1168207b28aSchristos if (p == NULL || p[1] != '=')
1178207b28aSchristos return -1;
1188207b28aSchristos enclen = p - encword; /* length of encoded substring */
1198207b28aSchristos iend = p + 2;
1208207b28aSchristos /* encoded words are at most 75 characters (RFC 2047, sec 2) */
1218207b28aSchristos if (iend > *ibuf + 75)
1228207b28aSchristos return -1;
1238207b28aSchristos
124ca13337dSchristos if (oend < *obuf + 1) {
125ca13337dSchristos assert(/*CONSTCOND*/ 0); /* We have a coding error! */
126ca13337dSchristos return -1;
127ca13337dSchristos }
1288207b28aSchristos dstend = to_cs ? decword : *obuf;
129c172e3b9Slukem dstlen = (to_cs ? sizeof(decword) : (size_t)(oend - *obuf)) - 1;
1308207b28aSchristos
131*ba2b5111Schristos declen = mime_rfc2047_decode(enctype, dstend, dstlen, encword, enclen);
1328207b28aSchristos if (declen == -1)
1338207b28aSchristos return -1;
1348207b28aSchristos
1358207b28aSchristos dstend += declen;
1368207b28aSchristos #ifdef CHARSET_SUPPORT
1378207b28aSchristos if (to_cs != NULL) {
1388207b28aSchristos iconv_t cd;
1398207b28aSchristos const char *src;
1408207b28aSchristos size_t srclen;
1418207b28aSchristos size_t cnt;
1428207b28aSchristos
1438207b28aSchristos cd = iconv_open(to_cs, from_cs);
1448207b28aSchristos if (cd == (iconv_t)-1)
1458207b28aSchristos return -1;
1468207b28aSchristos
1478207b28aSchristos src = decword;
1488207b28aSchristos srclen = declen;
1498207b28aSchristos dstend = *obuf;
1508207b28aSchristos dstlen = oend - *obuf - 1;
1518207b28aSchristos cnt = mime_iconv(cd, &src, &srclen, &dstend, &dstlen);
1528207b28aSchristos
1538207b28aSchristos (void)iconv_close(cd);
1548207b28aSchristos if (cnt == (size_t)-1)
1558207b28aSchristos return -1;
1568207b28aSchristos }
1578207b28aSchristos #endif /* CHARSET_SUPPORT */
1588207b28aSchristos *dstend = '\0';
1598207b28aSchristos *ibuf = iend;
1608207b28aSchristos *obuf = dstend;
1618207b28aSchristos return 0;
1628207b28aSchristos }
1638207b28aSchristos
1648207b28aSchristos
1658207b28aSchristos /*
1668207b28aSchristos * Folding White Space. See RFC 2822.
167d727506fSchristos *
168d727506fSchristos * Note: RFC 2822 specifies that '\n' and '\r' only occur as CRLF
169d727506fSchristos * pairs (i.e., "\r\n") and never separately. However, by the time
170d727506fSchristos * mail(1) sees the messages, all CRLF pairs have been converted to
171d727506fSchristos * '\n' characters.
172d727506fSchristos *
173d727506fSchristos * XXX - pull is_FWS() and skip_FWS() up to def.h?
1748207b28aSchristos */
1758207b28aSchristos static inline int
is_FWS(int c)1768207b28aSchristos is_FWS(int c)
1778207b28aSchristos {
178d727506fSchristos return c == ' ' || c == '\t' || c == '\n';
1798207b28aSchristos }
1808207b28aSchristos
1818207b28aSchristos static inline const char *
skip_FWS(const char * p)1828207b28aSchristos skip_FWS(const char *p)
1838207b28aSchristos {
184d727506fSchristos while (is_FWS(*p))
1858207b28aSchristos p++;
1868207b28aSchristos return p;
1878207b28aSchristos }
1888207b28aSchristos
1898207b28aSchristos static inline void
copy_skipped_FWS(char ** dst,char * dstend,const char ** src,const char * srcend)1908207b28aSchristos copy_skipped_FWS(char **dst, char *dstend, const char **src, const char *srcend)
1918207b28aSchristos {
1928207b28aSchristos const char *p, *pend;
1938207b28aSchristos char *q, *qend;
1948207b28aSchristos
1958207b28aSchristos p = *src;
1968207b28aSchristos q = *dst;
1978207b28aSchristos pend = srcend;
1988207b28aSchristos qend = dstend;
1998207b28aSchristos
2008207b28aSchristos if (p) { /* copy any skipped linear-white-space */
2018207b28aSchristos while (p < pend && q < qend)
2028207b28aSchristos *q++ = *p++;
2038207b28aSchristos *dst = q;
2048207b28aSchristos *src = NULL;
2058207b28aSchristos }
2068207b28aSchristos }
2078207b28aSchristos
2088207b28aSchristos /*
2098207b28aSchristos * Decode an unstructured field.
2108207b28aSchristos *
2118207b28aSchristos * See RFC 2822 Sec 2.2.1 and 3.6.5.
2128207b28aSchristos * Encoded words may occur anywhere in unstructured fields provided
2138207b28aSchristos * they are separated from any other text or encoded words by at least
2148207b28aSchristos * one linear-white-space character. (See RFC 2047 sec 5.1.) If two
2158207b28aSchristos * encoded words occur sequentially (separated by only FWS) then the
2168207b28aSchristos * separating FWS is removed.
2178207b28aSchristos *
2188207b28aSchristos * NOTE: unstructured fields cannot contain 'quoted-pairs' (see
2198207b28aSchristos * RFC2822 sec 3.2.6 and RFC 2047), but that is no problem as a '\\'
2208207b28aSchristos * (or any non-whitespace character) immediately before an
2218207b28aSchristos * encoded-word will prevent it from being decoded.
2228207b28aSchristos *
2238207b28aSchristos * hstring should be a NULL terminated string.
2248207b28aSchristos * outbuf should be sufficiently large to hold the result.
2258207b28aSchristos */
2268207b28aSchristos static void
mime_decode_usfield(char * outbuf,size_t outsize,const char * hstring)2278207b28aSchristos mime_decode_usfield(char *outbuf, size_t outsize, const char *hstring)
2288207b28aSchristos {
2298207b28aSchristos const char *p, *p0;
2308207b28aSchristos char *q, *qend;
2318207b28aSchristos int lastc;
2328207b28aSchristos const char *charset;
2338207b28aSchristos
2348207b28aSchristos charset = value(ENAME_MIME_CHARSET);
2358207b28aSchristos qend = outbuf + outsize - 1; /* Make sure there is room for the trailing NULL! */
2368207b28aSchristos q = outbuf;
2378207b28aSchristos p = hstring;
2388207b28aSchristos p0 = NULL;
2398207b28aSchristos lastc = (unsigned char)' ';
2408207b28aSchristos while (*p && q < qend) {
2418207b28aSchristos const char *p1;
2428207b28aSchristos char *q1;
2438207b28aSchristos if (is_FWS(lastc) && p[0] == '=' && p[1] == '?' &&
2448207b28aSchristos decode_word((p1 = p, &p1), (q1 = q, &q1), qend, charset) == 0 &&
245d727506fSchristos (*p1 == '\0' || is_FWS(*p1))) {
2468207b28aSchristos p0 = p1; /* pointer to first character after encoded word */
2478207b28aSchristos q = q1;
2488207b28aSchristos p = skip_FWS(p1);
2498207b28aSchristos lastc = (unsigned char)*p0;
2508207b28aSchristos }
2518207b28aSchristos else {
2528207b28aSchristos copy_skipped_FWS(&q, qend, &p0, p);
2538207b28aSchristos lastc = (unsigned char)*p;
2548207b28aSchristos if (q < qend)
2558207b28aSchristos *q++ = *p++;
2568207b28aSchristos }
2578207b28aSchristos }
2588207b28aSchristos copy_skipped_FWS(&q, qend, &p0, p);
2598207b28aSchristos *q = '\0';
2608207b28aSchristos }
2618207b28aSchristos
2628207b28aSchristos /*
2638207b28aSchristos * Decode a field comment.
2648207b28aSchristos *
2658207b28aSchristos * Comments only occur in structured fields, can be nested (rfc 2822,
2668207b28aSchristos * sec 3.2.3), and can contain 'encoded-words' and 'quoted-pairs'.
2678207b28aSchristos * Otherwise, they can be regarded as unstructured fields that are
2688207b28aSchristos * bounded by '(' and ')' characters.
2698207b28aSchristos */
2708207b28aSchristos static int
decode_comment(char ** obuf,char * oend,const char ** ibuf,const char * iend,const char * charset)2718207b28aSchristos decode_comment(char **obuf, char *oend, const char **ibuf, const char *iend, const char *charset)
2728207b28aSchristos {
2738207b28aSchristos const char *p, *pend, *p0;
2748207b28aSchristos char *q, *qend;
2758207b28aSchristos int lastc;
2768207b28aSchristos
2778207b28aSchristos p = *ibuf;
2788207b28aSchristos q = *obuf;
2798207b28aSchristos pend = iend;
2808207b28aSchristos qend = oend;
281d727506fSchristos lastc = ' ';
2828207b28aSchristos p0 = NULL;
2838207b28aSchristos while (p < pend && q < qend) {
2848207b28aSchristos const char *p1;
2858207b28aSchristos char *q1;
2868207b28aSchristos
2878207b28aSchristos if (is_FWS(lastc) && p[0] == '=' && p[1] == '?' &&
2888207b28aSchristos decode_word((p1 = p, &p1), (q1 = q, &q1), qend, charset) == 0 &&
289d727506fSchristos (*p1 == ')' || is_FWS(*p1))) {
2908207b28aSchristos lastc = (unsigned char)*p1;
2918207b28aSchristos p0 = p1;
2928207b28aSchristos q = q1;
2938207b28aSchristos p = skip_FWS(p1);
2948207b28aSchristos /*
2958207b28aSchristos * XXX - this check should be unnecessary as *pend should
2968207b28aSchristos * be '\0' which will stop skip_FWS()
2978207b28aSchristos */
2988207b28aSchristos if (p > pend)
2998207b28aSchristos p = pend;
3008207b28aSchristos }
3018207b28aSchristos else {
3028207b28aSchristos copy_skipped_FWS(&q, qend, &p0, p);
3038207b28aSchristos if (q >= qend) /* XXX - q > qend cannot happen */
3048207b28aSchristos break;
3058207b28aSchristos
3068207b28aSchristos if (*p == ')') {
3078207b28aSchristos *q++ = *p++; /* copy the closing ')' */
3088207b28aSchristos break; /* and get out of here! */
3098207b28aSchristos }
3108207b28aSchristos
3118207b28aSchristos if (*p == '(') {
3128207b28aSchristos *q++ = *p++; /* copy the opening '(' */
3138207b28aSchristos if (decode_comment(&q, qend, &p, pend, charset) == -1)
3148207b28aSchristos return -1; /* is this right or should we update? */
3158207b28aSchristos lastc = ')';
3168207b28aSchristos }
3178207b28aSchristos else if (*p == '\\' && p + 1 < pend) { /* quoted-pair */
3188207b28aSchristos if (p[1] == '(' || p[1] == ')' || p[1] == '\\') /* need quoted-pair*/
3198207b28aSchristos *q++ = *p;
3208207b28aSchristos p++;
3218207b28aSchristos lastc = (unsigned char)*p;
3228207b28aSchristos if (q < qend)
3238207b28aSchristos *q++ = *p++;
3248207b28aSchristos }
3258207b28aSchristos else {
3268207b28aSchristos lastc = (unsigned char)*p;
3278207b28aSchristos *q++ = *p++;
3288207b28aSchristos }
3298207b28aSchristos }
3308207b28aSchristos }
3318207b28aSchristos *ibuf = p;
3328207b28aSchristos *obuf = q;
3338207b28aSchristos return 0;
3348207b28aSchristos }
3358207b28aSchristos
3368207b28aSchristos /*
3378207b28aSchristos * Decode a quoted-string or no-fold-quote.
3388207b28aSchristos *
3398207b28aSchristos * These cannot contain encoded words. They can contain quoted-pairs,
3408207b28aSchristos * making '\\' special. They have no other structure. See RFC 2822
3418207b28aSchristos * sec 3.2.5 and 3.6.4.
3428207b28aSchristos */
3438207b28aSchristos static void
decode_quoted_string(char ** obuf,char * oend,const char ** ibuf,const char * iend)3448207b28aSchristos decode_quoted_string(char **obuf, char *oend, const char **ibuf, const char *iend)
3458207b28aSchristos {
3468207b28aSchristos const char *p, *pend;
3478207b28aSchristos char *q, *qend;
3488207b28aSchristos
3498207b28aSchristos qend = oend;
3508207b28aSchristos pend = iend;
3518207b28aSchristos p = *ibuf;
3528207b28aSchristos q = *obuf;
3538207b28aSchristos while (p < pend && q < qend) {
3548207b28aSchristos if (*p == '"') {
3558207b28aSchristos *q++ = *p++; /* copy the closing '"' */
3568207b28aSchristos break;
3578207b28aSchristos }
3588207b28aSchristos if (*p == '\\' && p + 1 < pend) { /* quoted-pair */
3598207b28aSchristos if (p[1] == '"' || p[1] == '\\') {
3608207b28aSchristos *q++ = *p;
3618207b28aSchristos if (q >= qend)
3628207b28aSchristos break;
3638207b28aSchristos }
3648207b28aSchristos p++;
3658207b28aSchristos }
3668207b28aSchristos *q++ = *p++;
3678207b28aSchristos }
3688207b28aSchristos *ibuf = p;
3698207b28aSchristos *obuf = q;
3708207b28aSchristos }
3718207b28aSchristos
3728207b28aSchristos /*
3738207b28aSchristos * Decode a domain-literal or no-fold-literal.
3748207b28aSchristos *
3758207b28aSchristos * These cannot contain encoded words. They can have quoted pairs and
3768207b28aSchristos * are delimited by '[' and ']' making '\\', '[', and ']' special.
3778207b28aSchristos * They have no other structure. See RFC 2822 sec 3.4.1 and 3.6.4.
3788207b28aSchristos */
3798207b28aSchristos static void
decode_domain_literal(char ** obuf,char * oend,const char ** ibuf,const char * iend)3808207b28aSchristos decode_domain_literal(char **obuf, char *oend, const char **ibuf, const char *iend)
3818207b28aSchristos {
3828207b28aSchristos const char *p, *pend;
3838207b28aSchristos char *q, *qend;
3848207b28aSchristos
3858207b28aSchristos qend = oend;
3868207b28aSchristos pend = iend;
3878207b28aSchristos p = *ibuf;
3888207b28aSchristos q = *obuf;
3898207b28aSchristos while (p < pend && q < qend) {
3908207b28aSchristos if (*p == ']') {
3918207b28aSchristos *q++ = *p++; /* copy the closing ']' */
3928207b28aSchristos break;
3938207b28aSchristos }
3948207b28aSchristos if (*p == '\\' && p + 1 < pend) { /* quoted-pair */
3958207b28aSchristos if (p[1] == '[' || p[1] == ']' || p[1] == '\\') {
3968207b28aSchristos *q++ = *p;
3978207b28aSchristos if (q >= qend)
3988207b28aSchristos break;
3998207b28aSchristos }
4008207b28aSchristos p++;
4018207b28aSchristos }
4028207b28aSchristos *q++ = *p++;
4038207b28aSchristos }
4048207b28aSchristos *ibuf = p;
4058207b28aSchristos *obuf = q;
4068207b28aSchristos }
4078207b28aSchristos
4088207b28aSchristos /*
4098207b28aSchristos * Specials: see RFC 2822 sec 3.2.1.
4108207b28aSchristos */
4118207b28aSchristos static inline int
is_specials(int c)4128207b28aSchristos is_specials(int c)
4138207b28aSchristos {
4148207b28aSchristos static const char specialtab[] = {
4158207b28aSchristos 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4168207b28aSchristos 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4178207b28aSchristos 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0,
4188207b28aSchristos 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0,
4198207b28aSchristos
4208207b28aSchristos 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4218207b28aSchristos 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,
4228207b28aSchristos 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4238207b28aSchristos 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4248207b28aSchristos };
425d727506fSchristos return !(c & ~0x7f) ? specialtab[c] : 0;
4268207b28aSchristos }
4278207b28aSchristos
4288207b28aSchristos /*
4298207b28aSchristos * Decode a structured field.
4308207b28aSchristos *
4318207b28aSchristos * At the top level, structured fields can only contain encoded-words
4328207b28aSchristos * via 'phrases' and 'comments'. See RFC 2047 sec 5.
4338207b28aSchristos */
4348207b28aSchristos static void
mime_decode_sfield(char * linebuf,size_t bufsize,const char * hstring)4358207b28aSchristos mime_decode_sfield(char *linebuf, size_t bufsize, const char *hstring)
4368207b28aSchristos {
4378207b28aSchristos const char *p, *pend, *p0;
4388207b28aSchristos char *q, *qend;
4398207b28aSchristos const char *charset;
4408207b28aSchristos int lastc;
4418207b28aSchristos
4428207b28aSchristos charset = value(ENAME_MIME_CHARSET);
4438207b28aSchristos
4448207b28aSchristos p = hstring;
4458207b28aSchristos q = linebuf;
4468207b28aSchristos pend = hstring + strlen(hstring);
4478207b28aSchristos qend = linebuf + bufsize - 1; /* save room for the NULL terminator */
4488207b28aSchristos lastc = (unsigned char)' ';
4498207b28aSchristos p0 = NULL;
4508207b28aSchristos while (p < pend && q < qend) {
4518207b28aSchristos const char *p1;
4528207b28aSchristos char *q1;
4538207b28aSchristos
4548207b28aSchristos if (*p != '=') {
4558207b28aSchristos copy_skipped_FWS(&q, qend, &p0, p);
4568207b28aSchristos if (q >= qend)
4578207b28aSchristos break;
4588207b28aSchristos }
4598207b28aSchristos
4608207b28aSchristos switch (*p) {
4618207b28aSchristos case '(': /* start of comment */
4628207b28aSchristos *q++ = *p++; /* copy the opening '(' */
4638207b28aSchristos (void)decode_comment(&q, qend, &p, pend, charset);
4648207b28aSchristos lastc = (unsigned char)p[-1];
4658207b28aSchristos break;
4668207b28aSchristos
4678207b28aSchristos case '"': /* start of quoted-string or no-fold-quote */
4688207b28aSchristos *q++ = *p++; /* copy the opening '"' */
4698207b28aSchristos decode_quoted_string(&q, qend, &p, pend);
4708207b28aSchristos lastc = (unsigned char)p[-1];
4718207b28aSchristos break;
4728207b28aSchristos
4738207b28aSchristos case '[': /* start of domain-literal or no-fold-literal */
4748207b28aSchristos *q++ = *p++; /* copy the opening '[' */
4758207b28aSchristos decode_domain_literal(&q, qend, &p, pend);
4768207b28aSchristos lastc = (unsigned char)p[-1];
4778207b28aSchristos break;
4788207b28aSchristos
4798207b28aSchristos case '\\': /* start of quoted-pair */
4808207b28aSchristos if (p + 1 < pend) { /* quoted pair */
4818207b28aSchristos if (is_specials(p[1])) {
4828207b28aSchristos *q++ = *p;
4838207b28aSchristos if (q >= qend)
4848207b28aSchristos break;
4858207b28aSchristos }
4868207b28aSchristos p++; /* skip the '\\' */
4878207b28aSchristos }
4888207b28aSchristos goto copy_char;
4898207b28aSchristos
4908207b28aSchristos case '=':
4918207b28aSchristos /*
4928207b28aSchristos * At this level encoded words can appear via
4938207b28aSchristos * 'phrases' (possibly delimited by ',' as in
4948207b28aSchristos * 'keywords'). Thus we handle them as such.
4958207b28aSchristos * Hopefully this is sufficient.
4968207b28aSchristos */
4978207b28aSchristos if ((lastc == ',' || is_FWS(lastc)) && p[1] == '?' &&
4988207b28aSchristos decode_word((p1 = p, &p1), (q1 = q, &q1), qend, charset) == 0 &&
499d727506fSchristos (*p1 == '\0' || *p1 == ',' || is_FWS(*p1))) {
5008207b28aSchristos lastc = (unsigned char)*p1;
5018207b28aSchristos p0 = p1;
5028207b28aSchristos q = q1;
5038207b28aSchristos p = skip_FWS(p1);
5048207b28aSchristos /*
5058207b28aSchristos * XXX - this check should be
5068207b28aSchristos * unnecessary as *pend should be '\0'
5078207b28aSchristos * which will stop skip_FWS()
5088207b28aSchristos */
5098207b28aSchristos if (p > pend)
5108207b28aSchristos p = pend;
5118207b28aSchristos break;
5128207b28aSchristos }
5138207b28aSchristos else {
5148207b28aSchristos copy_skipped_FWS(&q, qend, &p0, p);
5158207b28aSchristos if (q >= qend)
5168207b28aSchristos break;
5178207b28aSchristos goto copy_char;
5188207b28aSchristos }
5198207b28aSchristos
5208207b28aSchristos case '<': /* start of angle-addr, msg-id, or path. */
5218207b28aSchristos /*
5228207b28aSchristos * A msg-id cannot contain encoded-pairs or
5238207b28aSchristos * encoded-words, but angle-addr and path can.
5248207b28aSchristos * Distinguishing between them seems to be
5258207b28aSchristos * unnecessary, so let's be loose and just
5268207b28aSchristos * decode them as if they were all the same.
5278207b28aSchristos */
5288207b28aSchristos default:
5298207b28aSchristos copy_char:
5308207b28aSchristos lastc = (unsigned char)*p;
5318207b28aSchristos *q++ = *p++;
5328207b28aSchristos break;
5338207b28aSchristos }
5348207b28aSchristos }
5358207b28aSchristos copy_skipped_FWS(&q, qend, &p0, p);
5368207b28aSchristos *q = '\0'; /* null terminate the result! */
5378207b28aSchristos }
5388207b28aSchristos
5398207b28aSchristos /*
5408207b28aSchristos * Returns the correct hfield decoder, or NULL if none.
5418207b28aSchristos * Info extracted from RFC 2822.
542a2fe0ba0Schristos *
543a2fe0ba0Schristos * name - pointer to field name of header line (with colon).
5448207b28aSchristos */
5458207b28aSchristos PUBLIC hfield_decoder_t
mime_hfield_decoder(const char * name)546a2fe0ba0Schristos mime_hfield_decoder(const char *name)
5478207b28aSchristos {
5488207b28aSchristos static const struct field_decoder_tbl_s {
5498207b28aSchristos const char *field_name;
550a2fe0ba0Schristos size_t field_len;
5518207b28aSchristos hfield_decoder_t decoder;
5528207b28aSchristos } field_decoder_tbl[] = {
553a2fe0ba0Schristos #define X(s) s, sizeof(s) - 1
554a2fe0ba0Schristos { X("Received:"), NULL },
555a2fe0ba0Schristos
556a2fe0ba0Schristos { X("Content-Type:"), NULL },
557a2fe0ba0Schristos { X("Content-Disposition:"), NULL },
558a2fe0ba0Schristos { X("Content-Transfer-Encoding:"), NULL },
559a2fe0ba0Schristos { X("Content-Description:"), mime_decode_sfield },
560a2fe0ba0Schristos { X("Content-ID:"), mime_decode_sfield },
561a2fe0ba0Schristos { X("MIME-Version:"), mime_decode_sfield },
562a2fe0ba0Schristos
563a2fe0ba0Schristos { X("Bcc:"), mime_decode_sfield },
564a2fe0ba0Schristos { X("Cc:"), mime_decode_sfield },
565a2fe0ba0Schristos { X("Date:"), mime_decode_sfield },
566a2fe0ba0Schristos { X("From:"), mime_decode_sfield },
567a2fe0ba0Schristos { X("In-Reply-To:"), mime_decode_sfield },
568a2fe0ba0Schristos { X("Keywords:"), mime_decode_sfield },
569a2fe0ba0Schristos { X("Message-ID:"), mime_decode_sfield },
570a2fe0ba0Schristos { X("References:"), mime_decode_sfield },
571a2fe0ba0Schristos { X("Reply-To:"), mime_decode_sfield },
572a2fe0ba0Schristos { X("Return-Path:"), mime_decode_sfield },
573a2fe0ba0Schristos { X("Sender:"), mime_decode_sfield },
574a2fe0ba0Schristos { X("To:"), mime_decode_sfield },
575a2fe0ba0Schristos { X("Subject:"), mime_decode_usfield },
576a2fe0ba0Schristos { X("Comments:"), mime_decode_usfield },
577a2fe0ba0Schristos { X("X-"), mime_decode_usfield },
578a2fe0ba0Schristos { NULL, 0, mime_decode_usfield }, /* optional-fields */
579a2fe0ba0Schristos #undef X
5808207b28aSchristos };
5818207b28aSchristos const struct field_decoder_tbl_s *fp;
5828207b28aSchristos
5838207b28aSchristos /* XXX - this begs for a hash table! */
5848207b28aSchristos for (fp = field_decoder_tbl; fp->field_name; fp++)
585a2fe0ba0Schristos if (strncasecmp(name, fp->field_name, fp->field_len) == 0)
586a2fe0ba0Schristos break;
5878207b28aSchristos return fp->decoder;
5888207b28aSchristos }
5898207b28aSchristos
5908207b28aSchristos #endif /* MIME_SUPPORT */
591