1 /* $NetBSD: tok822_parse.c,v 1.1.1.1 2009/06/23 10:08:48 tron Exp $ */ 2 3 /*++ 4 /* NAME 5 /* tok822_parse 3 6 /* SUMMARY 7 /* RFC 822 address parser 8 /* SYNOPSIS 9 /* #include <tok822.h> 10 /* 11 /* TOK822 *tok822_scan_limit(str, tailp, limit) 12 /* const char *str; 13 /* TOK822 **tailp; 14 /* int limit; 15 /* 16 /* TOK822 *tok822_scan(str, tailp) 17 /* const char *str; 18 /* TOK822 **tailp; 19 /* 20 /* TOK822 *tok822_parse_limit(str, limit) 21 /* const char *str; 22 /* int limit; 23 /* 24 /* TOK822 *tok822_parse(str) 25 /* const char *str; 26 /* 27 /* TOK822 *tok822_scan_addr(str) 28 /* const char *str; 29 /* 30 /* VSTRING *tok822_externalize(buffer, tree, flags) 31 /* VSTRING *buffer; 32 /* TOK822 *tree; 33 /* int flags; 34 /* 35 /* VSTRING *tok822_internalize(buffer, tree, flags) 36 /* VSTRING *buffer; 37 /* TOK822 *tree; 38 /* int flags; 39 /* DESCRIPTION 40 /* This module converts address lists between string form and parse 41 /* tree formats. The string form can appear in two different ways: 42 /* external (or quoted) form, as used in message headers, and internal 43 /* (unquoted) form, as used internally by the mail software. 44 /* Although RFC 822 expects 7-bit data, these routines pay no 45 /* special attention to 8-bit characters. 46 /* 47 /* tok822_scan() converts the external-form string in \fIstr\fR 48 /* to a linear token list. The \fItailp\fR argument is a null pointer 49 /* or receives the pointer value of the last result list element. 50 /* 51 /* tok822_scan_limit() implements tok822_scan(), which is a macro. 52 /* The \fIlimit\fR argument is either zero or an upper bound on the 53 /* number of tokens produced. 54 /* 55 /* tok822_parse() converts the external-form address list in 56 /* \fIstr\fR to the corresponding token tree. The parser is permissive 57 /* and will not throw away information that it does not understand. 58 /* The parser adds missing commas between addresses. 59 /* 60 /* tok822_parse_limit() implements tok822_parse(), which is a macro. 61 /* The \fIlimit\fR argument is either zero or an upper bound on the 62 /* number of tokens produced. 63 /* 64 /* tok822_scan_addr() converts the external-form string in 65 /* \fIstr\fR to an address token tree. This is just string to 66 /* token list conversion; no parsing is done. This routine is 67 /* suitable for data that should contain just one address and no 68 /* other information. 69 /* 70 /* tok822_externalize() converts a token list to external form. 71 /* Where appropriate, characters and strings are quoted and white 72 /* space is inserted. The \fIflags\fR argument is the binary OR of 73 /* zero or more of the following: 74 /* .IP TOK822_STR_WIPE 75 /* Initially, truncate the result to zero length. 76 /* .IP TOK822_STR_TERM 77 /* Append a null terminator to the result when done. 78 /* .IP TOK822_STR_LINE 79 /* Append a line break after each comma token, instead of appending 80 /* whitespace. It is up to the caller to concatenate short lines to 81 /* produce longer ones. 82 /* .IP TOK822_STR_TRNC 83 /* Truncate non-address information to 250 characters per address, to 84 /* protect Sendmail systems that are vulnerable to the problem in CERT 85 /* advisory CA-2003-07. 86 /* This flag has effect with tok822_externalize() only. 87 /* .PP 88 /* The macro TOK_822_NONE expresses that none of the above features 89 /* should be activated. 90 /* 91 /* The macro TOK822_STR_DEFL combines the TOK822_STR_WIPE and 92 /* TOK822_STR_TERM flags. This is useful for most token to string 93 /* conversions. 94 /* 95 /* The macro TOK822_STR_HEAD combines the TOK822_STR_TERM, 96 /* TOK822_STR_LINE and TOK822_STR_TRNC flags. This is useful for 97 /* the special case of token to mail header conversion. 98 /* 99 /* tok822_internalize() converts a token list to string form, 100 /* without quoting. White space is inserted where appropriate. 101 /* The \fIflags\fR argument is as with tok822_externalize(). 102 /* STANDARDS 103 /* .ad 104 /* .fi 105 /* RFC 822 (ARPA Internet Text Messages). In addition to this standard 106 /* this module implements additional operators such as % and !. These 107 /* are needed because the real world is not all RFC 822. Also, the ':' 108 /* operator is allowed to appear inside addresses, to accommodate DECnet. 109 /* In addition, 8-bit data is not given special treatment. 110 /* LICENSE 111 /* .ad 112 /* .fi 113 /* The Secure Mailer license must be distributed with this software. 114 /* AUTHOR(S) 115 /* Wietse Venema 116 /* IBM T.J. Watson Research 117 /* P.O. Box 704 118 /* Yorktown Heights, NY 10598, USA 119 /*--*/ 120 121 /* System library. */ 122 123 #include <sys_defs.h> 124 #include <ctype.h> 125 #include <string.h> 126 127 /* Utility library. */ 128 129 #include <vstring.h> 130 #include <msg.h> 131 #include <stringops.h> 132 133 /* Global library. */ 134 135 #include "lex_822.h" 136 #include "quote_822_local.h" 137 #include "tok822.h" 138 139 /* 140 * I suppose this is my favorite macro. Used heavily for tokenizing. 141 */ 142 #define COLLECT(t,s,c,cond) { \ 143 while ((c = *(unsigned char *) s) != 0) { \ 144 if (c == '\\') { \ 145 if ((c = *(unsigned char *)++s) == 0) \ 146 break; \ 147 } else if (!(cond)) { \ 148 break; \ 149 } \ 150 VSTRING_ADDCH(t->vstr, IS_SPACE_TAB_CR_LF(c) ? ' ' : c); \ 151 s++; \ 152 } \ 153 VSTRING_TERMINATE(t->vstr); \ 154 } 155 156 #define COLLECT_SKIP_LAST(t,s,c,cond) { COLLECT(t,s,c,cond); if (*s) s++; } 157 158 /* 159 * Not quite as complex. The parser depends heavily on it. 160 */ 161 #define SKIP(tp, cond) { \ 162 while (tp->type && (cond)) \ 163 tp = tp->prev; \ 164 } 165 166 #define MOVE_COMMENT_AND_CONTINUE(tp, right) { \ 167 TOK822 *prev = tok822_unlink(tp); \ 168 right = tok822_prepend(right, tp); \ 169 tp = prev; \ 170 continue; \ 171 } 172 173 #define SKIP_MOVE_COMMENT(tp, cond, right) { \ 174 while (tp->type && (cond)) { \ 175 if (tp->type == TOK822_COMMENT) \ 176 MOVE_COMMENT_AND_CONTINUE(tp, right); \ 177 tp = tp->prev; \ 178 } \ 179 } 180 181 /* 182 * Single-character operators. We include the % and ! operators because not 183 * all the world is RFC822. XXX Make this operator list configurable when we 184 * have a real rewriting language. Include | for aliases file parsing. 185 */ 186 static char tok822_opchar[] = "|%!" LEX_822_SPECIALS; 187 static void tok822_quote_atom(TOK822 *); 188 static const char *tok822_comment(TOK822 *, const char *); 189 static TOK822 *tok822_group(int, TOK822 *, TOK822 *, int); 190 static void tok822_copy_quoted(VSTRING *, char *, char *); 191 static int tok822_append_space(TOK822 *); 192 193 #define DO_WORD (1<<0) /* finding a word is ok here */ 194 #define DO_GROUP (1<<1) /* doing an address group */ 195 196 #define ADD_COMMA ',' /* resynchronize */ 197 #define NO_MISSING_COMMA 0 198 199 /* tok822_internalize - token tree to string, internal form */ 200 201 VSTRING *tok822_internalize(VSTRING *vp, TOK822 *tree, int flags) 202 { 203 TOK822 *tp; 204 205 if (flags & TOK822_STR_WIPE) 206 VSTRING_RESET(vp); 207 208 for (tp = tree; tp; tp = tp->next) { 209 switch (tp->type) { 210 case ',': 211 VSTRING_ADDCH(vp, tp->type); 212 if (flags & TOK822_STR_LINE) { 213 VSTRING_ADDCH(vp, '\n'); 214 continue; 215 } 216 break; 217 case TOK822_ADDR: 218 tok822_internalize(vp, tp->head, TOK822_STR_NONE); 219 break; 220 case TOK822_COMMENT: 221 case TOK822_ATOM: 222 case TOK822_QSTRING: 223 vstring_strcat(vp, vstring_str(tp->vstr)); 224 break; 225 case TOK822_DOMLIT: 226 VSTRING_ADDCH(vp, '['); 227 vstring_strcat(vp, vstring_str(tp->vstr)); 228 VSTRING_ADDCH(vp, ']'); 229 break; 230 case TOK822_STARTGRP: 231 VSTRING_ADDCH(vp, ':'); 232 break; 233 default: 234 if (tp->type >= TOK822_MINTOK) 235 msg_panic("tok822_internalize: unknown operator %d", tp->type); 236 VSTRING_ADDCH(vp, tp->type); 237 } 238 if (tok822_append_space(tp)) 239 VSTRING_ADDCH(vp, ' '); 240 } 241 if (flags & TOK822_STR_TERM) 242 VSTRING_TERMINATE(vp); 243 return (vp); 244 } 245 246 /* strip_address - strip non-address text from address expression */ 247 248 static void strip_address(VSTRING *vp, ssize_t start, TOK822 *addr) 249 { 250 VSTRING *tmp; 251 252 /* 253 * Emit plain <address>. Discard any comments or phrases. 254 */ 255 VSTRING_TERMINATE(vp); 256 msg_warn("stripping too many comments from address: %.100s...", 257 printable(vstring_str(vp) + start, '?')); 258 vstring_truncate(vp, start); 259 VSTRING_ADDCH(vp, '<'); 260 if (addr) { 261 tmp = vstring_alloc(100); 262 tok822_internalize(tmp, addr, TOK822_STR_TERM); 263 quote_822_local_flags(vp, vstring_str(tmp), 264 QUOTE_FLAG_8BITCLEAN | QUOTE_FLAG_APPEND); 265 vstring_free(tmp); 266 } 267 VSTRING_ADDCH(vp, '>'); 268 } 269 270 /* tok822_externalize - token tree to string, external form */ 271 272 VSTRING *tok822_externalize(VSTRING *vp, TOK822 *tree, int flags) 273 { 274 VSTRING *tmp; 275 TOK822 *tp; 276 ssize_t start; 277 TOK822 *addr; 278 ssize_t addr_len; 279 280 /* 281 * Guard against a Sendmail buffer overflow (CERT advisory CA-2003-07). 282 * The problem was that Sendmail could store too much non-address text 283 * (comments, phrases, etc.) into a static 256-byte buffer. 284 * 285 * When the buffer fills up, fixed Sendmail versions remove comments etc. 286 * and reduce the information to just <$g>, which expands to <address>. 287 * No change is made when an address expression (text separated by 288 * commas) contains no address. This fix reportedly also protects 289 * Sendmail systems that are still vulnerable to this problem. 290 * 291 * Postfix takes the same approach, grudgingly. To avoid unnecessary damage, 292 * Postfix removes comments etc. only when the amount of non-address text 293 * in an address expression (text separated by commas) exceeds 250 bytes. 294 * 295 * With Sendmail, the address part of an address expression is the 296 * right-most <> instance in that expression. If an address expression 297 * contains no <>, then Postfix guarantees that it contains at most one 298 * non-comment string; that string is the address part of the address 299 * expression, so there is no ambiguity. 300 * 301 * Finally, we note that stress testing shows that other code in Sendmail 302 * 8.12.8 bluntly truncates ``text <address>'' to 256 bytes even when 303 * this means chopping the <address> somewhere in the middle. This is a 304 * loss of control that we're not entirely comfortable with. However, 305 * unbalanced quotes and dangling backslash do not seem to influence the 306 * way that Sendmail parses headers, so this is not an urgent problem. 307 */ 308 #define MAX_NONADDR_LENGTH 250 309 310 #define RESET_NONADDR_LENGTH { \ 311 start = VSTRING_LEN(vp); \ 312 addr = 0; \ 313 addr_len = 0; \ 314 } 315 316 #define ENFORCE_NONADDR_LENGTH do { \ 317 if (addr && VSTRING_LEN(vp) - addr_len > start + MAX_NONADDR_LENGTH) \ 318 strip_address(vp, start, addr->head); \ 319 } while(0) 320 321 if (flags & TOK822_STR_WIPE) 322 VSTRING_RESET(vp); 323 324 if (flags & TOK822_STR_TRNC) 325 RESET_NONADDR_LENGTH; 326 327 for (tp = tree; tp; tp = tp->next) { 328 switch (tp->type) { 329 case ',': 330 if (flags & TOK822_STR_TRNC) 331 ENFORCE_NONADDR_LENGTH; 332 VSTRING_ADDCH(vp, tp->type); 333 VSTRING_ADDCH(vp, (flags & TOK822_STR_LINE) ? '\n' : ' '); 334 if (flags & TOK822_STR_TRNC) 335 RESET_NONADDR_LENGTH; 336 continue; 337 338 /* 339 * XXX In order to correctly externalize an address, it is not 340 * sufficient to quote individual atoms. There are higher-level 341 * rules that say when an address localpart needs to be quoted. 342 * We wing it with the quote_822_local() routine, which ignores 343 * the issue of atoms in the domain part that would need quoting. 344 */ 345 case TOK822_ADDR: 346 addr = tp; 347 tmp = vstring_alloc(100); 348 tok822_internalize(tmp, tp->head, TOK822_STR_TERM); 349 addr_len = VSTRING_LEN(vp); 350 quote_822_local_flags(vp, vstring_str(tmp), 351 QUOTE_FLAG_8BITCLEAN | QUOTE_FLAG_APPEND); 352 addr_len = VSTRING_LEN(vp) - addr_len; 353 vstring_free(tmp); 354 break; 355 case TOK822_ATOM: 356 case TOK822_COMMENT: 357 vstring_strcat(vp, vstring_str(tp->vstr)); 358 break; 359 case TOK822_QSTRING: 360 VSTRING_ADDCH(vp, '"'); 361 tok822_copy_quoted(vp, vstring_str(tp->vstr), "\"\\\r\n"); 362 VSTRING_ADDCH(vp, '"'); 363 break; 364 case TOK822_DOMLIT: 365 VSTRING_ADDCH(vp, '['); 366 tok822_copy_quoted(vp, vstring_str(tp->vstr), "\\\r\n"); 367 VSTRING_ADDCH(vp, ']'); 368 break; 369 case TOK822_STARTGRP: 370 VSTRING_ADDCH(vp, ':'); 371 break; 372 case '<': 373 if (tp->next && tp->next->type == '>') { 374 addr = tp; 375 addr_len = 0; 376 } 377 VSTRING_ADDCH(vp, '<'); 378 break; 379 default: 380 if (tp->type >= TOK822_MINTOK) 381 msg_panic("tok822_externalize: unknown operator %d", tp->type); 382 VSTRING_ADDCH(vp, tp->type); 383 } 384 if (tok822_append_space(tp)) 385 VSTRING_ADDCH(vp, ' '); 386 } 387 if (flags & TOK822_STR_TRNC) 388 ENFORCE_NONADDR_LENGTH; 389 390 if (flags & TOK822_STR_TERM) 391 VSTRING_TERMINATE(vp); 392 return (vp); 393 } 394 395 /* tok822_copy_quoted - copy a string while quoting */ 396 397 static void tok822_copy_quoted(VSTRING *vp, char *str, char *quote_set) 398 { 399 int ch; 400 401 while ((ch = *(unsigned char *) str++) != 0) { 402 if (strchr(quote_set, ch)) 403 VSTRING_ADDCH(vp, '\\'); 404 VSTRING_ADDCH(vp, ch); 405 } 406 } 407 408 /* tok822_append_space - see if space is needed after this token */ 409 410 static int tok822_append_space(TOK822 *tp) 411 { 412 TOK822 *next; 413 414 if (tp == 0 || (next = tp->next) == 0 || tp->owner != 0) 415 return (0); 416 if (tp->type == ',' || tp->type == TOK822_STARTGRP || next->type == '<') 417 return (1); 418 419 #define NON_OPERATOR(x) \ 420 (x->type == TOK822_ATOM || x->type == TOK822_QSTRING \ 421 || x->type == TOK822_COMMENT || x->type == TOK822_DOMLIT \ 422 || x->type == TOK822_ADDR) 423 424 return (NON_OPERATOR(tp) && NON_OPERATOR(next)); 425 } 426 427 /* tok822_scan_limit - tokenize string */ 428 429 TOK822 *tok822_scan_limit(const char *str, TOK822 **tailp, int tok_count_limit) 430 { 431 TOK822 *head = 0; 432 TOK822 *tail = 0; 433 TOK822 *tp; 434 int ch; 435 int tok_count = 0; 436 437 /* 438 * XXX 2822 new feature: Section 4.1 allows "." to appear in a phrase (to 439 * allow for forms such as: Johnny B. Goode <johhny@domain.org>. I cannot 440 * handle that at the tokenizer level - it is not context sensitive. And 441 * to fix this at the parser level requires radical changes to preserve 442 * white space as part of the token stream. Thanks a lot, people. 443 */ 444 while ((ch = *(unsigned char *) str++) != 0) { 445 if (IS_SPACE_TAB_CR_LF(ch)) 446 continue; 447 if (ch == '(') { 448 tp = tok822_alloc(TOK822_COMMENT, (char *) 0); 449 str = tok822_comment(tp, str); 450 } else if (ch == '[') { 451 tp = tok822_alloc(TOK822_DOMLIT, (char *) 0); 452 COLLECT_SKIP_LAST(tp, str, ch, ch != ']'); 453 } else if (ch == '"') { 454 tp = tok822_alloc(TOK822_QSTRING, (char *) 0); 455 COLLECT_SKIP_LAST(tp, str, ch, ch != '"'); 456 } else if (ch != '\\' && strchr(tok822_opchar, ch)) { 457 tp = tok822_alloc(ch, (char *) 0); 458 } else { 459 tp = tok822_alloc(TOK822_ATOM, (char *) 0); 460 str -= 1; /* \ may be first */ 461 COLLECT(tp, str, ch, !IS_SPACE_TAB_CR_LF(ch) && !strchr(tok822_opchar, ch)); 462 tok822_quote_atom(tp); 463 } 464 if (head == 0) { 465 head = tail = tp; 466 while (tail->next) 467 tail = tail->next; 468 } else { 469 tail = tok822_append(tail, tp); 470 } 471 if (tok_count_limit > 0 && ++tok_count >= tok_count_limit) 472 break; 473 } 474 if (tailp) 475 *tailp = tail; 476 return (head); 477 } 478 479 /* tok822_parse_limit - translate external string to token tree */ 480 481 TOK822 *tok822_parse_limit(const char *str, int tok_count_limit) 482 { 483 TOK822 *head; 484 TOK822 *tail; 485 TOK822 *right; 486 TOK822 *first_token; 487 TOK822 *last_token; 488 TOK822 *tp; 489 int state; 490 491 /* 492 * First, tokenize the string, from left to right. We are not allowed to 493 * throw away any information that we do not understand. With a flat 494 * token list that contains all tokens, we can always convert back to 495 * string form. 496 */ 497 if ((first_token = tok822_scan_limit(str, &last_token, tok_count_limit)) == 0) 498 return (0); 499 500 /* 501 * For convenience, sandwich the token list between two sentinel tokens. 502 */ 503 #define GLUE(left,rite) { left->next = rite; rite->prev = left; } 504 505 head = tok822_alloc(0, (char *) 0); 506 GLUE(head, first_token); 507 tail = tok822_alloc(0, (char *) 0); 508 GLUE(last_token, tail); 509 510 /* 511 * Next step is to transform the token list into a parse tree. This is 512 * done most conveniently from right to left. If there is something that 513 * we do not understand, just leave it alone, don't throw it away. The 514 * address information that we're looking for sits in-between the current 515 * node (tp) and the one called right. Add missing commas on the fly. 516 */ 517 state = DO_WORD; 518 right = tail; 519 tp = tail->prev; 520 while (tp->type) { 521 if (tp->type == TOK822_COMMENT) { /* move comment to the side */ 522 MOVE_COMMENT_AND_CONTINUE(tp, right); 523 } else if (tp->type == ';') { /* rh side of named group */ 524 right = tok822_group(TOK822_ADDR, tp, right, ADD_COMMA); 525 state = DO_GROUP | DO_WORD; 526 } else if (tp->type == ':' && (state & DO_GROUP) != 0) { 527 tp->type = TOK822_STARTGRP; 528 (void) tok822_group(TOK822_ADDR, tp, right, NO_MISSING_COMMA); 529 SKIP(tp, tp->type != ','); 530 right = tp; 531 continue; 532 } else if (tp->type == '>') { /* rh side of <route> */ 533 right = tok822_group(TOK822_ADDR, tp, right, ADD_COMMA); 534 SKIP_MOVE_COMMENT(tp, tp->type != '<', right); 535 (void) tok822_group(TOK822_ADDR, tp, right, NO_MISSING_COMMA); 536 SKIP(tp, tp->type > 0xff || strchr(">;,:", tp->type) == 0); 537 right = tp; 538 state |= DO_WORD; 539 continue; 540 } else if (tp->type == TOK822_ATOM || tp->type == TOK822_QSTRING 541 || tp->type == TOK822_DOMLIT) { 542 if ((state & DO_WORD) == 0) 543 right = tok822_group(TOK822_ADDR, tp, right, ADD_COMMA)->next; 544 state &= ~DO_WORD; 545 } else if (tp->type == ',') { 546 right = tok822_group(TOK822_ADDR, tp, right, NO_MISSING_COMMA); 547 state |= DO_WORD; 548 } else { 549 state |= DO_WORD; 550 } 551 tp = tp->prev; 552 } 553 (void) tok822_group(TOK822_ADDR, tp, right, NO_MISSING_COMMA); 554 555 /* 556 * Discard the sentinel tokens on the left and right extremes. Properly 557 * terminate the resulting list. 558 */ 559 tp = (head->next != tail ? head->next : 0); 560 tok822_cut_before(head->next); 561 tok822_free(head); 562 tok822_cut_before(tail); 563 tok822_free(tail); 564 return (tp); 565 } 566 567 /* tok822_quote_atom - see if an atom needs quoting when externalized */ 568 569 static void tok822_quote_atom(TOK822 *tp) 570 { 571 char *cp; 572 int ch; 573 574 /* 575 * RFC 822 expects 7-bit data. Rather than quoting every 8-bit character 576 * (and still passing it on as 8-bit data) we leave 8-bit data alone. 577 */ 578 for (cp = vstring_str(tp->vstr); (ch = *(unsigned char *) cp) != 0; cp++) { 579 if ( /* !ISASCII(ch) || */ ch == ' ' 580 || ISCNTRL(ch) || strchr(tok822_opchar, ch)) { 581 tp->type = TOK822_QSTRING; 582 break; 583 } 584 } 585 } 586 587 /* tok822_comment - tokenize comment */ 588 589 static const char *tok822_comment(TOK822 *tp, const char *str) 590 { 591 int level = 1; 592 int ch; 593 594 /* 595 * XXX We cheat by storing comments in their external form. Otherwise it 596 * would be a royal pain to preserve \ before (. That would require a 597 * recursive parser; the easy to implement stack-based recursion would be 598 * too expensive. 599 */ 600 VSTRING_ADDCH(tp->vstr, '('); 601 602 while ((ch = *(unsigned char *) str) != 0) { 603 VSTRING_ADDCH(tp->vstr, ch); 604 str++; 605 if (ch == '(') { /* comments can nest! */ 606 level++; 607 } else if (ch == ')') { 608 if (--level == 0) 609 break; 610 } else if (ch == '\\') { 611 if ((ch = *(unsigned char *) str) == 0) 612 break; 613 VSTRING_ADDCH(tp->vstr, ch); 614 str++; 615 } 616 } 617 VSTRING_TERMINATE(tp->vstr); 618 return (str); 619 } 620 621 /* tok822_group - cluster a group of tokens */ 622 623 static TOK822 *tok822_group(int group_type, TOK822 *left, TOK822 *right, int sync_type) 624 { 625 TOK822 *group; 626 TOK822 *sync; 627 TOK822 *first; 628 629 /* 630 * Cluster the tokens between left and right under their own parse tree 631 * node. Optionally insert a resync token. 632 */ 633 if (left != right && (first = left->next) != right) { 634 tok822_cut_before(right); 635 tok822_cut_before(first); 636 group = tok822_alloc(group_type, (char *) 0); 637 tok822_sub_append(group, first); 638 tok822_append(left, group); 639 tok822_append(group, right); 640 if (sync_type) { 641 sync = tok822_alloc(sync_type, (char *) 0); 642 tok822_append(left, sync); 643 } 644 } 645 return (left); 646 } 647 648 /* tok822_scan_addr - convert external address string to address token */ 649 650 TOK822 *tok822_scan_addr(const char *addr) 651 { 652 TOK822 *tree = tok822_alloc(TOK822_ADDR, (char *) 0); 653 654 tree->head = tok822_scan(addr, &tree->tail); 655 return (tree); 656 } 657 658 #ifdef TEST 659 660 #include <unistd.h> 661 #include <vstream.h> 662 #include <readlline.h> 663 664 /* tok822_print - display token */ 665 666 static void tok822_print(TOK822 *list, int indent) 667 { 668 TOK822 *tp; 669 670 for (tp = list; tp; tp = tp->next) { 671 if (tp->type < TOK822_MINTOK) { 672 vstream_printf("%*s %s \"%c\"\n", indent, "", "OP", tp->type); 673 } else if (tp->type == TOK822_ADDR) { 674 vstream_printf("%*s %s\n", indent, "", "address"); 675 tok822_print(tp->head, indent + 2); 676 } else if (tp->type == TOK822_STARTGRP) { 677 vstream_printf("%*s %s\n", indent, "", "group \":\""); 678 } else { 679 vstream_printf("%*s %s \"%s\"\n", indent, "", 680 tp->type == TOK822_COMMENT ? "comment" : 681 tp->type == TOK822_ATOM ? "atom" : 682 tp->type == TOK822_QSTRING ? "quoted string" : 683 tp->type == TOK822_DOMLIT ? "domain literal" : 684 tp->type == TOK822_ADDR ? "address" : 685 "unknown\n", vstring_str(tp->vstr)); 686 } 687 } 688 } 689 690 int main(int unused_argc, char **unused_argv) 691 { 692 VSTRING *vp = vstring_alloc(100); 693 TOK822 *list; 694 VSTRING *buf = vstring_alloc(100); 695 696 #define TEST_TOKEN_LIMIT 20 697 698 while (readlline(buf, VSTREAM_IN, (int *) 0)) { 699 while (VSTRING_LEN(buf) > 0 && vstring_end(buf)[-1] == '\n') { 700 vstring_end(buf)[-1] = 0; 701 vstring_truncate(buf, VSTRING_LEN(buf) - 1); 702 } 703 if (!isatty(vstream_fileno(VSTREAM_IN))) 704 vstream_printf(">>>%s<<<\n\n", vstring_str(buf)); 705 list = tok822_parse_limit(vstring_str(buf), TEST_TOKEN_LIMIT); 706 vstream_printf("Parse tree:\n"); 707 tok822_print(list, 0); 708 vstream_printf("\n"); 709 710 vstream_printf("Internalized:\n%s\n\n", 711 vstring_str(tok822_internalize(vp, list, TOK822_STR_DEFL))); 712 vstream_fflush(VSTREAM_OUT); 713 vstream_printf("Externalized, no newlines inserted:\n%s\n\n", 714 vstring_str(tok822_externalize(vp, list, 715 TOK822_STR_DEFL | TOK822_STR_TRNC))); 716 vstream_fflush(VSTREAM_OUT); 717 vstream_printf("Externalized, newlines inserted:\n%s\n\n", 718 vstring_str(tok822_externalize(vp, list, 719 TOK822_STR_DEFL | TOK822_STR_LINE | TOK822_STR_TRNC))); 720 vstream_fflush(VSTREAM_OUT); 721 tok822_free_tree(list); 722 } 723 vstring_free(vp); 724 vstring_free(buf); 725 return (0); 726 } 727 728 #endif 729