xref: /netbsd-src/external/ibm-public/postfix/dist/src/global/header_token.c (revision 41fbaed053f8fbfdf9d2a4ee0a7386a3c83f8505)
1 /*	$NetBSD: header_token.c,v 1.1.1.1 2009/06/23 10:08:46 tron Exp $	*/
2 
3 /*++
4 /* NAME
5 /*	header_token 3
6 /* SUMMARY
7 /*	mail header parser
8 /* SYNOPSIS
9 /*	#include <header_token.h>
10 /*
11 /*	typedef struct {
12 /* .in +4
13 /*	    int     type;
14 /*	    const char *u.value;
15 /*	    /* ... */
16 /* .in
17 /*	} HEADER_TOKEN;
18 /*
19 /*	ssize_t	header_token(token, token_len, token_buffer, ptr,
20 /*				specials, terminator)
21 /*	HEADER_TOKEN *token;
22 /*	ssize_t	token_len;
23 /*	VSTRING *token_buffer;
24 /*	const char **ptr;
25 /*	const char *specials;
26 /*	int	terminator;
27 /* DESCRIPTION
28 /*	This module parses a mail header value (text after field-name:)
29 /*	into tokens. The parser understands RFC 822 linear white space,
30 /*	quoted-string, comment, control characters, and a set of
31 /*	user-specified special characters.
32 /*
33 /*	A result token type is one of the following:
34 /* .IP HEADER_TOK_QSTRING
35 /*	Quoted string as per RFC 822.
36 /* .IP HEADER_TOK_TOKEN
37 /*	Token as per RFC 822, and the special characters supplied by the
38 /*	caller.
39 /* .IP other
40 /*	The value of a control character or special character.
41 /* .PP
42 /*	header_token() tokenizes the input and stops after a user-specified
43 /*	terminator (ignoring all tokens that exceed the capacity of
44 /*	the result storage), or when it runs out of space for the result.
45 /*	The terminator is not stored. The result value is the number of
46 /*	tokens stored, or -1 when the input was exhausted before any tokens
47 /*	were found.
48 /*
49 /*	Arguments:
50 /* .IP token
51 /*	Result array of HEADER_TOKEN structures. Token string values
52 /*	are pointers to null-terminated substrings in the token_buffer.
53 /* .IP token_len
54 /*	Length of the array of HEADER_TOKEN structures.
55 /* .IP token_buffer
56 /*	Storage for result token string values.
57 /* .IP ptr
58 /*	Input/output read position. The input is a null-terminated string.
59 /* .IP specials
60 /*	Special characters according to the relevant RFC, or a
61 /*	null pointer (default to the RFC 822 special characters).
62 /*	This must include the optional terminator if one is specified.
63 /* .IP terminator
64 /*	The special character to stop after, or zero.
65 /* BUGS
66 /*	Eight-bit characters are not given special treatment.
67 /* SEE ALSO
68 /*	RFC 822 (ARPA Internet Text Messages)
69 /* DIAGNOSTICS
70 /*	Fatal errors: memory allocation problem.
71 /* LICENSE
72 /* .ad
73 /* .fi
74 /*	The Secure Mailer license must be distributed with this software.
75 /* AUTHOR(S)
76 /*	Wietse Venema
77 /*	IBM T.J. Watson Research
78 /*	P.O. Box 704
79 /*	Yorktown Heights, NY 10598, USA
80 /*--*/
81 
82 /* System library. */
83 
84 #include <sys_defs.h>
85 #include <string.h>
86 #include <ctype.h>
87 
88 /* Utility library. */
89 
90 #include <msg.h>
91 #include <vstring.h>
92 
93 /* Global library. */
94 
95 #include <lex_822.h>
96 #include <header_token.h>
97 
98 /* Application-specific. */
99 
100  /*
101   * Silly little macros.
102   */
103 #define STR(x)	vstring_str(x)
104 #define LEN(x)	VSTRING_LEN(x)
105 #define CU_CHAR_PTR(x)	((const unsigned char *) (x))
106 
107 /* header_token - parse out the next item in a message header */
108 
header_token(HEADER_TOKEN * token,ssize_t token_len,VSTRING * token_buffer,const char ** ptr,const char * user_specials,int user_terminator)109 ssize_t header_token(HEADER_TOKEN *token, ssize_t token_len,
110 		             VSTRING *token_buffer, const char **ptr,
111 		             const char *user_specials, int user_terminator)
112 {
113     ssize_t comment_level;
114     const unsigned char *cp;
115     ssize_t len;
116     int     ch;
117     ssize_t tok_count;
118     ssize_t n;
119 
120     /*
121      * Initialize.
122      */
123     VSTRING_RESET(token_buffer);
124     cp = CU_CHAR_PTR(*ptr);
125     tok_count = 0;
126     if (user_specials == 0)
127 	user_specials = LEX_822_SPECIALS;
128 
129     /*
130      * Main parsing loop.
131      *
132      * XXX What was the reason to continue parsing when user_terminator is
133      * specified? Perhaps this was needed at some intermediate stage of
134      * development?
135      */
136     while ((ch = *cp) != 0 && (user_terminator != 0 || tok_count < token_len)) {
137 	cp++;
138 
139 	/*
140 	 * Skip RFC 822 linear white space.
141 	 */
142 	if (IS_SPACE_TAB_CR_LF(ch))
143 	    continue;
144 
145 	/*
146 	 * Terminator.
147 	 */
148 	if (ch == user_terminator)
149 	    break;
150 
151 	/*
152 	 * Skip RFC 822 comment.
153 	 */
154 	if (ch == '(') {
155 	    comment_level = 1;
156 	    while ((ch = *cp) != 0) {
157 		cp++;
158 		if (ch == '(') {		/* comments can nest! */
159 		    comment_level++;
160 		} else if (ch == ')') {
161 		    if (--comment_level == 0)
162 			break;
163 		} else if (ch == '\\') {
164 		    if ((ch = *cp) == 0)
165 			break;
166 		    cp++;
167 		}
168 	    }
169 	    continue;
170 	}
171 
172 	/*
173 	 * Copy quoted text according to RFC 822.
174 	 */
175 	if (ch == '"') {
176 	    if (tok_count < token_len) {
177 		token[tok_count].u.offset = LEN(token_buffer);
178 		token[tok_count].type = HEADER_TOK_QSTRING;
179 	    }
180 	    while ((ch = *cp) != 0) {
181 		cp++;
182 		if (ch == '"')
183 		    break;
184 		if (ch == '\n') {		/* unfold */
185 		    if (tok_count < token_len) {
186 			len = LEN(token_buffer);
187 			while (len > 0
188 			  && IS_SPACE_TAB_CR_LF(STR(token_buffer)[len - 1]))
189 			    len--;
190 			if (len < LEN(token_buffer))
191 			    vstring_truncate(token_buffer, len);
192 		    }
193 		    continue;
194 		}
195 		if (ch == '\\') {
196 		    if ((ch = *cp) == 0)
197 			break;
198 		    cp++;
199 		}
200 		if (tok_count < token_len)
201 		    VSTRING_ADDCH(token_buffer, ch);
202 	    }
203 	    if (tok_count < token_len) {
204 		VSTRING_ADDCH(token_buffer, 0);
205 		tok_count++;
206 	    }
207 	    continue;
208 	}
209 
210 	/*
211 	 * Control, or special.
212 	 */
213 	if (strchr(user_specials, ch) || ISCNTRL(ch)) {
214 	    if (tok_count < token_len) {
215 		token[tok_count].u.offset = LEN(token_buffer);
216 		token[tok_count].type = ch;
217 		VSTRING_ADDCH(token_buffer, ch);
218 		VSTRING_ADDCH(token_buffer, 0);
219 		tok_count++;
220 	    }
221 	    continue;
222 	}
223 
224 	/*
225 	 * Token.
226 	 */
227 	else {
228 	    if (tok_count < token_len) {
229 		token[tok_count].u.offset = LEN(token_buffer);
230 		token[tok_count].type = HEADER_TOK_TOKEN;
231 		VSTRING_ADDCH(token_buffer, ch);
232 	    }
233 	    while ((ch = *cp) != 0 && !IS_SPACE_TAB_CR_LF(ch)
234 		   && !ISCNTRL(ch) && !strchr(user_specials, ch)) {
235 		cp++;
236 		if (tok_count < token_len)
237 		    VSTRING_ADDCH(token_buffer, ch);
238 	    }
239 	    if (tok_count < token_len) {
240 		VSTRING_ADDCH(token_buffer, 0);
241 		tok_count++;
242 	    }
243 	    continue;
244 	}
245     }
246 
247     /*
248      * Ignore a zero-length item after the last terminator.
249      */
250     if (tok_count == 0 && ch == 0)
251 	return (-1);
252 
253     /*
254      * Finalize. Fill in the string pointer array, now that the token buffer
255      * is no longer dynamically reallocated as it grows.
256      */
257     *ptr = (const char *) cp;
258     for (n = 0; n < tok_count; n++)
259 	token[n].u.value = STR(token_buffer) + token[n].u.offset;
260 
261     if (msg_verbose)
262 	msg_info("header_token: %s %s %s",
263 		 tok_count > 0 ? token[0].u.value : "",
264 		 tok_count > 1 ? token[1].u.value : "",
265 		 tok_count > 2 ? token[2].u.value : "");
266 
267     return (tok_count);
268 }
269