1 /* $NetBSD: header_token.c,v 1.1.1.1 2009/06/23 10:08:46 tron Exp $ */ 2 3 /*++ 4 /* NAME 5 /* header_token 3 6 /* SUMMARY 7 /* mail header parser 8 /* SYNOPSIS 9 /* #include <header_token.h> 10 /* 11 /* typedef struct { 12 /* .in +4 13 /* int type; 14 /* const char *u.value; 15 /* /* ... */ 16 /* .in 17 /* } HEADER_TOKEN; 18 /* 19 /* ssize_t header_token(token, token_len, token_buffer, ptr, 20 /* specials, terminator) 21 /* HEADER_TOKEN *token; 22 /* ssize_t token_len; 23 /* VSTRING *token_buffer; 24 /* const char **ptr; 25 /* const char *specials; 26 /* int terminator; 27 /* DESCRIPTION 28 /* This module parses a mail header value (text after field-name:) 29 /* into tokens. The parser understands RFC 822 linear white space, 30 /* quoted-string, comment, control characters, and a set of 31 /* user-specified special characters. 32 /* 33 /* A result token type is one of the following: 34 /* .IP HEADER_TOK_QSTRING 35 /* Quoted string as per RFC 822. 36 /* .IP HEADER_TOK_TOKEN 37 /* Token as per RFC 822, and the special characters supplied by the 38 /* caller. 39 /* .IP other 40 /* The value of a control character or special character. 41 /* .PP 42 /* header_token() tokenizes the input and stops after a user-specified 43 /* terminator (ignoring all tokens that exceed the capacity of 44 /* the result storage), or when it runs out of space for the result. 45 /* The terminator is not stored. The result value is the number of 46 /* tokens stored, or -1 when the input was exhausted before any tokens 47 /* were found. 48 /* 49 /* Arguments: 50 /* .IP token 51 /* Result array of HEADER_TOKEN structures. Token string values 52 /* are pointers to null-terminated substrings in the token_buffer. 53 /* .IP token_len 54 /* Length of the array of HEADER_TOKEN structures. 55 /* .IP token_buffer 56 /* Storage for result token string values. 57 /* .IP ptr 58 /* Input/output read position. The input is a null-terminated string. 59 /* .IP specials 60 /* Special characters according to the relevant RFC, or a 61 /* null pointer (default to the RFC 822 special characters). 62 /* This must include the optional terminator if one is specified. 63 /* .IP terminator 64 /* The special character to stop after, or zero. 65 /* BUGS 66 /* Eight-bit characters are not given special treatment. 67 /* SEE ALSO 68 /* RFC 822 (ARPA Internet Text Messages) 69 /* DIAGNOSTICS 70 /* Fatal errors: memory allocation problem. 71 /* LICENSE 72 /* .ad 73 /* .fi 74 /* The Secure Mailer license must be distributed with this software. 75 /* AUTHOR(S) 76 /* Wietse Venema 77 /* IBM T.J. Watson Research 78 /* P.O. Box 704 79 /* Yorktown Heights, NY 10598, USA 80 /*--*/ 81 82 /* System library. */ 83 84 #include <sys_defs.h> 85 #include <string.h> 86 #include <ctype.h> 87 88 /* Utility library. */ 89 90 #include <msg.h> 91 #include <vstring.h> 92 93 /* Global library. */ 94 95 #include <lex_822.h> 96 #include <header_token.h> 97 98 /* Application-specific. */ 99 100 /* 101 * Silly little macros. 102 */ 103 #define STR(x) vstring_str(x) 104 #define LEN(x) VSTRING_LEN(x) 105 #define CU_CHAR_PTR(x) ((const unsigned char *) (x)) 106 107 /* header_token - parse out the next item in a message header */ 108 109 ssize_t header_token(HEADER_TOKEN *token, ssize_t token_len, 110 VSTRING *token_buffer, const char **ptr, 111 const char *user_specials, int user_terminator) 112 { 113 ssize_t comment_level; 114 const unsigned char *cp; 115 ssize_t len; 116 int ch; 117 ssize_t tok_count; 118 ssize_t n; 119 120 /* 121 * Initialize. 122 */ 123 VSTRING_RESET(token_buffer); 124 cp = CU_CHAR_PTR(*ptr); 125 tok_count = 0; 126 if (user_specials == 0) 127 user_specials = LEX_822_SPECIALS; 128 129 /* 130 * Main parsing loop. 131 * 132 * XXX What was the reason to continue parsing when user_terminator is 133 * specified? Perhaps this was needed at some intermediate stage of 134 * development? 135 */ 136 while ((ch = *cp) != 0 && (user_terminator != 0 || tok_count < token_len)) { 137 cp++; 138 139 /* 140 * Skip RFC 822 linear white space. 141 */ 142 if (IS_SPACE_TAB_CR_LF(ch)) 143 continue; 144 145 /* 146 * Terminator. 147 */ 148 if (ch == user_terminator) 149 break; 150 151 /* 152 * Skip RFC 822 comment. 153 */ 154 if (ch == '(') { 155 comment_level = 1; 156 while ((ch = *cp) != 0) { 157 cp++; 158 if (ch == '(') { /* comments can nest! */ 159 comment_level++; 160 } else if (ch == ')') { 161 if (--comment_level == 0) 162 break; 163 } else if (ch == '\\') { 164 if ((ch = *cp) == 0) 165 break; 166 cp++; 167 } 168 } 169 continue; 170 } 171 172 /* 173 * Copy quoted text according to RFC 822. 174 */ 175 if (ch == '"') { 176 if (tok_count < token_len) { 177 token[tok_count].u.offset = LEN(token_buffer); 178 token[tok_count].type = HEADER_TOK_QSTRING; 179 } 180 while ((ch = *cp) != 0) { 181 cp++; 182 if (ch == '"') 183 break; 184 if (ch == '\n') { /* unfold */ 185 if (tok_count < token_len) { 186 len = LEN(token_buffer); 187 while (len > 0 188 && IS_SPACE_TAB_CR_LF(STR(token_buffer)[len - 1])) 189 len--; 190 if (len < LEN(token_buffer)) 191 vstring_truncate(token_buffer, len); 192 } 193 continue; 194 } 195 if (ch == '\\') { 196 if ((ch = *cp) == 0) 197 break; 198 cp++; 199 } 200 if (tok_count < token_len) 201 VSTRING_ADDCH(token_buffer, ch); 202 } 203 if (tok_count < token_len) { 204 VSTRING_ADDCH(token_buffer, 0); 205 tok_count++; 206 } 207 continue; 208 } 209 210 /* 211 * Control, or special. 212 */ 213 if (strchr(user_specials, ch) || ISCNTRL(ch)) { 214 if (tok_count < token_len) { 215 token[tok_count].u.offset = LEN(token_buffer); 216 token[tok_count].type = ch; 217 VSTRING_ADDCH(token_buffer, ch); 218 VSTRING_ADDCH(token_buffer, 0); 219 tok_count++; 220 } 221 continue; 222 } 223 224 /* 225 * Token. 226 */ 227 else { 228 if (tok_count < token_len) { 229 token[tok_count].u.offset = LEN(token_buffer); 230 token[tok_count].type = HEADER_TOK_TOKEN; 231 VSTRING_ADDCH(token_buffer, ch); 232 } 233 while ((ch = *cp) != 0 && !IS_SPACE_TAB_CR_LF(ch) 234 && !ISCNTRL(ch) && !strchr(user_specials, ch)) { 235 cp++; 236 if (tok_count < token_len) 237 VSTRING_ADDCH(token_buffer, ch); 238 } 239 if (tok_count < token_len) { 240 VSTRING_ADDCH(token_buffer, 0); 241 tok_count++; 242 } 243 continue; 244 } 245 } 246 247 /* 248 * Ignore a zero-length item after the last terminator. 249 */ 250 if (tok_count == 0 && ch == 0) 251 return (-1); 252 253 /* 254 * Finalize. Fill in the string pointer array, now that the token buffer 255 * is no longer dynamically reallocated as it grows. 256 */ 257 *ptr = (const char *) cp; 258 for (n = 0; n < tok_count; n++) 259 token[n].u.value = STR(token_buffer) + token[n].u.offset; 260 261 if (msg_verbose) 262 msg_info("header_token: %s %s %s", 263 tok_count > 0 ? token[0].u.value : "", 264 tok_count > 1 ? token[1].u.value : "", 265 tok_count > 2 ? token[2].u.value : ""); 266 267 return (tok_count); 268 } 269