1 # include "sendmail.h" 2 3 static char SccsId[] = "@(#)parseaddr.c 3.34 11/21/81"; 4 5 /* 6 ** PARSE -- Parse an address 7 ** 8 ** Parses an address and breaks it up into three parts: a 9 ** net to transmit the message on, the host to transmit it 10 ** to, and a user on that host. These are loaded into an 11 ** ADDRESS header with the values squirreled away if necessary. 12 ** The "user" part may not be a real user; the process may 13 ** just reoccur on that machine. For example, on a machine 14 ** with an arpanet connection, the address 15 ** csvax.bill@berkeley 16 ** will break up to a "user" of 'csvax.bill' and a host 17 ** of 'berkeley' -- to be transmitted over the arpanet. 18 ** 19 ** Parameters: 20 ** addr -- the address to parse. 21 ** a -- a pointer to the address descriptor buffer. 22 ** If NULL, a header will be created. 23 ** copyf -- determines what shall be copied: 24 ** -1 -- don't copy anything. The printname 25 ** (q_paddr) is just addr, and the 26 ** user & host are allocated internally 27 ** to parse. 28 ** 0 -- copy out the parsed user & host, but 29 ** don't copy the printname. 30 ** +1 -- copy everything. 31 ** 32 ** Returns: 33 ** A pointer to the address descriptor header (`a' if 34 ** `a' is non-NULL). 35 ** NULL on error. 36 ** 37 ** Side Effects: 38 ** none 39 */ 40 41 # define DELIMCHARS "$()<>,;\\\"\r\n" /* word delimiters */ 42 43 ADDRESS * 44 parse(addr, a, copyf) 45 char *addr; 46 register ADDRESS *a; 47 int copyf; 48 { 49 register char **pvp; 50 register struct mailer *m; 51 extern char **prescan(); 52 extern ADDRESS *buildaddr(); 53 54 /* 55 ** Initialize and prescan address. 56 */ 57 58 To = addr; 59 # ifdef DEBUG 60 if (Debug) 61 printf("\n--parse(%s)\n", addr); 62 # endif DEBUG 63 64 pvp = prescan(addr, '\0'); 65 if (pvp == NULL) 66 return (NULL); 67 68 /* 69 ** Apply rewriting rules. 70 */ 71 72 rewrite(pvp, 0); 73 74 /* 75 ** See if we resolved to a real mailer. 76 */ 77 78 if (pvp[0][0] != CANONNET) 79 { 80 setstat(EX_USAGE); 81 usrerr("cannot resolve name"); 82 return (NULL); 83 } 84 85 /* 86 ** Build canonical address from pvp. 87 */ 88 89 a = buildaddr(pvp, a); 90 if (a == NULL) 91 return (NULL); 92 m = a->q_mailer; 93 94 /* 95 ** Make local copies of the host & user and then 96 ** transport them out. 97 */ 98 99 if (copyf > 0) 100 a->q_paddr = newstr(addr); 101 else 102 a->q_paddr = addr; 103 104 if (copyf >= 0) 105 { 106 if (a->q_host != NULL) 107 a->q_host = newstr(a->q_host); 108 else 109 a->q_host = ""; 110 if (a->q_user != a->q_paddr) 111 a->q_user = newstr(a->q_user); 112 } 113 114 /* 115 ** Do UPPER->lower case mapping unless inhibited. 116 */ 117 118 if (!bitset(M_HST_UPPER, m->m_flags)) 119 makelower(a->q_host); 120 if (!bitset(M_USR_UPPER, m->m_flags)) 121 makelower(a->q_user); 122 123 /* 124 ** Compute return value. 125 */ 126 127 # ifdef DEBUG 128 if (Debug) 129 { 130 printf("parse-->"); 131 printaddr(a, FALSE); 132 } 133 # endif DEBUG 134 135 return (a); 136 } 137 /* 138 ** PRESCAN -- Prescan name and make it canonical 139 ** 140 ** Scans a name and turns it into canonical form. This involves 141 ** deleting blanks, comments (in parentheses), and turning the 142 ** word "at" into an at-sign ("@"). The name is copied as this 143 ** is done; it is legal to copy a name onto itself, since this 144 ** process can only make things smaller. 145 ** 146 ** This routine knows about quoted strings and angle brackets. 147 ** 148 ** There are certain subtleties to this routine. The one that 149 ** comes to mind now is that backslashes on the ends of names 150 ** are silently stripped off; this is intentional. The problem 151 ** is that some versions of sndmsg (like at LBL) set the kill 152 ** character to something other than @ when reading addresses; 153 ** so people type "csvax.eric\@berkeley" -- which screws up the 154 ** berknet mailer. 155 ** 156 ** Parameters: 157 ** addr -- the name to chomp. 158 ** delim -- the delimiter for the address, normally 159 ** '\0' or ','; \0 is accepted in any case. 160 ** are moving in place; set buflim to high core. 161 ** 162 ** Returns: 163 ** A pointer to a vector of tokens. 164 ** NULL on error. 165 ** 166 ** Side Effects: 167 ** none. 168 */ 169 170 # define OPER 1 171 # define ATOM 2 172 # define EOTOK 3 173 # define QSTRING 4 174 # define SPACE 5 175 # define DOLLAR 6 176 # define GETONE 7 177 # define MACRO 8 178 179 char ** 180 prescan(addr, delim) 181 char *addr; 182 char delim; 183 { 184 register char *p; 185 static char buf[MAXNAME+MAXATOM]; 186 static char *av[MAXATOM+1]; 187 char **avp; 188 bool bslashmode; 189 int cmntcnt; 190 int brccnt; 191 register char c; 192 char *tok; 193 register char *q; 194 register int state; 195 int nstate; 196 extern char lower(); 197 198 q = buf; 199 bslashmode = FALSE; 200 cmntcnt = brccnt = 0; 201 avp = av; 202 state = OPER; 203 for (p = addr; *p != '\0' && *p != delim; ) 204 { 205 /* read a token */ 206 tok = q; 207 while ((c = *p++) != '\0' && c != delim) 208 { 209 /* chew up special characters */ 210 c &= ~0200; 211 *q = '\0'; 212 if (bslashmode) 213 { 214 c |= 0200; 215 bslashmode = FALSE; 216 } 217 else if (c == '\\') 218 { 219 bslashmode = TRUE; 220 continue; 221 } 222 else if (c == '"') 223 { 224 if (state == QSTRING) 225 state = OPER; 226 else 227 state = QSTRING; 228 break; 229 } 230 231 if (c == '$' && delim == '\t') 232 nstate = DOLLAR; 233 else 234 nstate = toktype(c); 235 switch (state) 236 { 237 case QSTRING: /* in quoted string */ 238 break; 239 240 case ATOM: /* regular atom */ 241 if (nstate != ATOM) 242 { 243 state = EOTOK; 244 p--; 245 } 246 break; 247 248 case GETONE: /* grab one character */ 249 state = OPER; 250 break; 251 252 case EOTOK: /* after atom or q-string */ 253 state = nstate; 254 if (state == SPACE) 255 continue; 256 break; 257 258 case SPACE: /* linear white space */ 259 state = nstate; 260 break; 261 262 case OPER: /* operator */ 263 if (nstate == SPACE) 264 continue; 265 state = nstate; 266 break; 267 268 case DOLLAR: /* $- etc. */ 269 state = OPER; 270 if (isascii(c) && isdigit(c)) 271 { 272 /* replacement */ 273 c = MATCHREPL; 274 state = GETONE; 275 p--; 276 break; 277 } 278 switch (c) 279 { 280 case '$': /* literal $ */ 281 break; 282 283 case '+': /* match anything */ 284 c = MATCHANY; 285 break; 286 287 case '-': /* match one token */ 288 c = MATCHONE; 289 break; 290 291 case '=': /* match one token of class */ 292 c = MATCHCLASS; 293 state = GETONE; 294 break; 295 296 case '#': /* canonical net name */ 297 c = CANONNET; 298 break; 299 300 case '@': /* canonical host name */ 301 c = CANONHOST; 302 break; 303 304 case ':': /* canonical user name */ 305 c = CANONUSER; 306 break; 307 308 default: 309 state = MACRO; 310 break; 311 } 312 break; 313 314 default: 315 syserr("prescan: unknown state %d", state); 316 } 317 318 if (state == EOTOK || state == SPACE) 319 break; 320 if (state == DOLLAR) 321 continue; 322 323 /* squirrel it away */ 324 if (q >= &buf[sizeof buf - 5]) 325 { 326 usrerr("Address too long"); 327 return (NULL); 328 } 329 if (state == MACRO) 330 { 331 char mbuf[3]; 332 333 mbuf[0] = '$'; 334 mbuf[1] = c; 335 mbuf[2] = '\0'; 336 (void) expand(mbuf, q, &buf[sizeof buf - 5]); 337 q += strlen(q); 338 state = EOTOK; 339 break; 340 } 341 *q++ = c; 342 343 /* decide whether this represents end of token */ 344 if (state == OPER) 345 break; 346 } 347 if (c == '\0' || c == delim) 348 p--; 349 350 /* new token */ 351 if (tok == q) 352 continue; 353 *q++ = '\0'; 354 355 c = tok[0]; 356 if (c == '(') 357 { 358 cmntcnt++; 359 continue; 360 } 361 else if (c == ')') 362 { 363 if (cmntcnt <= 0) 364 { 365 usrerr("Unbalanced ')'"); 366 return (NULL); 367 } 368 else 369 { 370 cmntcnt--; 371 continue; 372 } 373 } 374 else if (cmntcnt > 0) 375 continue; 376 377 /* we prefer <> specs */ 378 if (c == '<') 379 { 380 if (brccnt < 0) 381 { 382 usrerr("multiple < spec"); 383 return (NULL); 384 } 385 brccnt++; 386 if (brccnt == 1) 387 { 388 /* we prefer using machine readable name */ 389 q = buf; 390 *q = '\0'; 391 avp = av; 392 continue; 393 } 394 } 395 else if (c == '>') 396 { 397 if (brccnt <= 0) 398 { 399 usrerr("Unbalanced `>'"); 400 return (NULL); 401 } 402 else 403 brccnt--; 404 if (brccnt <= 0) 405 { 406 brccnt = -1; 407 continue; 408 } 409 } 410 411 if (avp >= &av[MAXATOM]) 412 { 413 syserr("prescan: too many tokens"); 414 return (NULL); 415 } 416 *avp++ = tok; 417 } 418 *avp = NULL; 419 if (cmntcnt > 0) 420 usrerr("Unbalanced '('"); 421 else if (brccnt > 0) 422 usrerr("Unbalanced '<'"); 423 else if (state == QSTRING) 424 usrerr("Unbalanced '\"'"); 425 else if (av[0] != NULL) 426 return (av); 427 return (NULL); 428 } 429 /* 430 ** TOKTYPE -- return token type 431 ** 432 ** Parameters: 433 ** c -- the character in question. 434 ** 435 ** Returns: 436 ** Its type. 437 ** 438 ** Side Effects: 439 ** none. 440 */ 441 442 toktype(c) 443 register char c; 444 { 445 static char buf[50]; 446 static bool firstime = TRUE; 447 448 if (firstime) 449 { 450 firstime = FALSE; 451 (void) expand("$o", buf, &buf[sizeof buf - 1]); 452 strcat(buf, DELIMCHARS); 453 } 454 if (!isascii(c)) 455 return (ATOM); 456 if (isspace(c)) 457 return (SPACE); 458 if (iscntrl(c) || index(buf, c) != NULL) 459 return (OPER); 460 return (ATOM); 461 } 462 /* 463 ** REWRITE -- apply rewrite rules to token vector. 464 ** 465 ** This routine is an ordered production system. Each rewrite 466 ** rule has a LHS (called the pattern) and a RHS (called the 467 ** rewrite); 'rwr' points the the current rewrite rule. 468 ** 469 ** For each rewrite rule, 'avp' points the address vector we 470 ** are trying to match against, and 'pvp' points to the pattern. 471 ** If pvp points to a special match value (MATCHANY, MATCHONE, 472 ** MATCHCLASS) then the address in avp matched is saved away 473 ** in the match vector (pointed to by 'mvp'). 474 ** 475 ** When a match between avp & pvp does not match, we try to 476 ** back out. If we back up over a MATCHONE or a MATCHCLASS 477 ** we must also back out the match in mvp. If we reach a 478 ** MATCHANY we just extend the match and start over again. 479 ** 480 ** When we finally match, we rewrite the address vector 481 ** and try over again. 482 ** 483 ** Parameters: 484 ** pvp -- pointer to token vector. 485 ** 486 ** Returns: 487 ** none. 488 ** 489 ** Side Effects: 490 ** pvp is modified. 491 */ 492 493 struct match 494 { 495 char **first; /* first token matched */ 496 char **last; /* last token matched */ 497 }; 498 499 # define MAXMATCH 9 /* max params per rewrite */ 500 501 502 rewrite(pvp, ruleset) 503 char **pvp; 504 int ruleset; 505 { 506 register char *ap; /* address pointer */ 507 register char *rp; /* rewrite pointer */ 508 register char **avp; /* address vector pointer */ 509 register char **rvp; /* rewrite vector pointer */ 510 struct rewrite *rwr; /* pointer to current rewrite rule */ 511 struct match mlist[MAXMATCH]; /* stores match on LHS */ 512 struct match *mlp; /* cur ptr into mlist */ 513 char *npvp[MAXATOM+1]; /* temporary space for rebuild */ 514 extern bool sameword(); 515 516 # ifdef DEBUG 517 if (Debug > 9) 518 { 519 printf("rewrite: original pvp:\n"); 520 printav(pvp); 521 } 522 # endif DEBUG 523 524 /* 525 ** Run through the list of rewrite rules, applying 526 ** any that match. 527 */ 528 529 for (rwr = RewriteRules[ruleset]; rwr != NULL; ) 530 { 531 # ifdef DEBUG 532 if (Debug > 10) 533 { 534 printf("-----trying rule:\n"); 535 printav(rwr->r_lhs); 536 } 537 # endif DEBUG 538 539 /* try to match on this rule */ 540 mlp = mlist; 541 for (rvp = rwr->r_lhs, avp = pvp; *avp != NULL; ) 542 { 543 ap = *avp; 544 rp = *rvp; 545 546 if (rp == NULL) 547 { 548 /* end-of-pattern before end-of-address */ 549 goto fail; 550 } 551 552 switch (*rp) 553 { 554 register STAB *s; 555 register int class; 556 557 case MATCHCLASS: 558 /* match any token in a class */ 559 class = rp[1]; 560 if (!isalpha(class)) 561 goto fail; 562 if (isupper(class)) 563 class -= 'A'; 564 else 565 class -= 'a'; 566 s = stab(ap, ST_CLASS, ST_FIND); 567 if (s == NULL || (s->s_class & (1 << class)) == 0) 568 goto fail; 569 570 /* explicit fall-through */ 571 572 case MATCHONE: 573 case MATCHANY: 574 /* match exactly one token */ 575 mlp->first = mlp->last = avp++; 576 mlp++; 577 break; 578 579 default: 580 /* must have exact match */ 581 if (!sameword(rp, ap)) 582 goto fail; 583 avp++; 584 break; 585 } 586 587 /* successful match on this token */ 588 rvp++; 589 continue; 590 591 fail: 592 /* match failed -- back up */ 593 while (--rvp >= rwr->r_lhs) 594 { 595 rp = *rvp; 596 if (*rp == MATCHANY) 597 { 598 /* extend binding and continue */ 599 mlp[-1].last = avp++; 600 rvp++; 601 break; 602 } 603 avp--; 604 if (*rp == MATCHONE || *rp == MATCHCLASS) 605 { 606 /* back out binding */ 607 mlp--; 608 } 609 } 610 611 if (rvp < rwr->r_lhs) 612 { 613 /* total failure to match */ 614 break; 615 } 616 } 617 618 /* 619 ** See if we successfully matched 620 */ 621 622 if (rvp >= rwr->r_lhs && *rvp == NULL) 623 { 624 # ifdef DEBUG 625 if (Debug > 10) 626 { 627 printf("-----rule matches:\n"); 628 printav(rwr->r_rhs); 629 } 630 # endif DEBUG 631 632 /* substitute */ 633 for (rvp = rwr->r_rhs, avp = npvp; *rvp != NULL; rvp++) 634 { 635 rp = *rvp; 636 if (*rp == MATCHREPL) 637 { 638 register struct match *m; 639 register char **pp; 640 641 m = &mlist[rp[1] - '1']; 642 # ifdef DEBUG 643 if (Debug > 13) 644 { 645 printf("$%c:", rp[1]); 646 pp = m->first; 647 do 648 { 649 printf(" %x=\"", *pp); 650 (void) fflush(stdout); 651 printf("%s\"", *pp); 652 } while (pp++ != m->last); 653 printf("\n"); 654 } 655 # endif DEBUG 656 pp = m->first; 657 do 658 { 659 if (avp >= &npvp[MAXATOM]) 660 { 661 syserr("rewrite: expansion too long"); 662 return; 663 } 664 *avp++ = *pp; 665 } while (pp++ != m->last); 666 } 667 else 668 { 669 if (avp >= &npvp[MAXATOM]) 670 { 671 syserr("rewrite: expansion too long"); 672 return; 673 } 674 *avp++ = rp; 675 } 676 } 677 *avp++ = NULL; 678 bmove((char *) npvp, (char *) pvp, (avp - npvp) * sizeof *avp); 679 # ifdef DEBUG 680 if (Debug > 3) 681 { 682 char **vp; 683 684 printf("rewritten as `"); 685 for (vp = pvp; *vp != NULL; vp++) 686 { 687 if (vp != pvp) 688 printf("_"); 689 xputs(*vp); 690 } 691 printf("'\n"); 692 } 693 # endif DEBUG 694 if (pvp[0][0] == CANONNET) 695 break; 696 } 697 else 698 { 699 # ifdef DEBUG 700 if (Debug > 10) 701 printf("----- rule fails\n"); 702 # endif DEBUG 703 rwr = rwr->r_next; 704 } 705 } 706 } 707 /* 708 ** BUILDADDR -- build address from token vector. 709 ** 710 ** Parameters: 711 ** tv -- token vector. 712 ** a -- pointer to address descriptor to fill. 713 ** If NULL, one will be allocated. 714 ** 715 ** Returns: 716 ** NULL if there was an error. 717 ** 'a' otherwise. 718 ** 719 ** Side Effects: 720 ** fills in 'a' 721 */ 722 723 ADDRESS * 724 buildaddr(tv, a) 725 register char **tv; 726 register ADDRESS *a; 727 { 728 static char buf[MAXNAME]; 729 struct mailer **mp; 730 register struct mailer *m; 731 extern bool sameword(); 732 733 if (a == NULL) 734 a = (ADDRESS *) xalloc(sizeof *a); 735 clear((char *) a, sizeof *a); 736 737 /* figure out what net/mailer to use */ 738 if (**tv != CANONNET) 739 { 740 syserr("buildaddr: no net"); 741 return (NULL); 742 } 743 tv++; 744 if (sameword(*tv, "error")) 745 { 746 if (**++tv != CANONUSER) 747 syserr("buildaddr: error: no user"); 748 buf[0] = '\0'; 749 while (*++tv != NULL) 750 { 751 if (buf[0] != '\0') 752 strcat(buf, " "); 753 strcat(buf, *tv); 754 } 755 usrerr(buf); 756 return (NULL); 757 } 758 for (mp = Mailer; (m = *mp++) != NULL; ) 759 { 760 if (sameword(m->m_name, *tv)) 761 break; 762 } 763 if (m == NULL) 764 { 765 syserr("buildaddr: unknown net %s", *tv); 766 return (NULL); 767 } 768 a->q_mailer = m; 769 770 /* figure out what host (if any) */ 771 tv++; 772 if (!bitset(M_LOCAL, m->m_flags)) 773 { 774 if (**tv != CANONHOST) 775 { 776 syserr("buildaddr: no host"); 777 return (NULL); 778 } 779 tv++; 780 a->q_host = *tv; 781 tv++; 782 } 783 else 784 a->q_host = NULL; 785 786 /* figure out the user */ 787 if (**tv != CANONUSER) 788 { 789 syserr("buildaddr: no user"); 790 return (NULL); 791 } 792 cataddr(++tv, buf, sizeof buf); 793 a->q_user = buf; 794 795 return (a); 796 } 797 /* 798 ** CATADDR -- concatenate pieces of addresses (putting in <LWSP> subs) 799 ** 800 ** Parameters: 801 ** pvp -- parameter vector to rebuild. 802 ** buf -- buffer to build the string into. 803 ** sz -- size of buf. 804 ** 805 ** Returns: 806 ** none. 807 ** 808 ** Side Effects: 809 ** Destroys buf. 810 */ 811 812 cataddr(pvp, buf, sz) 813 char **pvp; 814 char *buf; 815 register int sz; 816 { 817 bool oatomtok = FALSE; 818 bool natomtok = FALSE; 819 register int i; 820 register char *p; 821 822 p = buf; 823 sz--; 824 while (*pvp != NULL && (i = strlen(*pvp)) < sz) 825 { 826 natomtok = (toktype(**pvp) == ATOM); 827 if (oatomtok && natomtok) 828 *p++ = SPACESUB; 829 (void) strcpy(p, *pvp); 830 oatomtok = natomtok; 831 p += i; 832 sz -= i; 833 pvp++; 834 } 835 *p = '\0'; 836 } 837 /* 838 ** SAMEADDR -- Determine if two addresses are the same 839 ** 840 ** This is not just a straight comparison -- if the mailer doesn't 841 ** care about the host we just ignore it, etc. 842 ** 843 ** Parameters: 844 ** a, b -- pointers to the internal forms to compare. 845 ** wildflg -- if TRUE, 'a' may have no user specified, 846 ** in which case it is to match anything. 847 ** 848 ** Returns: 849 ** TRUE -- they represent the same mailbox. 850 ** FALSE -- they don't. 851 ** 852 ** Side Effects: 853 ** none. 854 */ 855 856 bool 857 sameaddr(a, b, wildflg) 858 register ADDRESS *a; 859 register ADDRESS *b; 860 bool wildflg; 861 { 862 /* if they don't have the same mailer, forget it */ 863 if (a->q_mailer != b->q_mailer) 864 return (FALSE); 865 866 /* if the user isn't the same, we can drop out */ 867 if ((!wildflg || a->q_user[0] != '\0') && strcmp(a->q_user, b->q_user) != 0) 868 return (FALSE); 869 870 /* if the mailer ignores hosts, we have succeeded! */ 871 if (bitset(M_LOCAL, a->q_mailer->m_flags)) 872 return (TRUE); 873 874 /* otherwise compare hosts (but be careful for NULL ptrs) */ 875 if (a->q_host == NULL || b->q_host == NULL) 876 return (FALSE); 877 if (strcmp(a->q_host, b->q_host) != 0) 878 return (FALSE); 879 880 return (TRUE); 881 } 882 /* 883 ** PRINTADDR -- print address (for debugging) 884 ** 885 ** Parameters: 886 ** a -- the address to print 887 ** follow -- follow the q_next chain. 888 ** 889 ** Returns: 890 ** none. 891 ** 892 ** Side Effects: 893 ** none. 894 */ 895 896 # ifdef DEBUG 897 898 printaddr(a, follow) 899 register ADDRESS *a; 900 bool follow; 901 { 902 while (a != NULL) 903 { 904 printf("%x=", a); 905 (void) fflush(stdout); 906 printf("%s: mailer %d (%s), host `%s', user `%s'\n", a->q_paddr, 907 a->q_mailer->m_mno, a->q_mailer->m_name, a->q_host, a->q_user); 908 printf("\tnext=%x, flags=%o, rmailer %d\n", a->q_next, 909 a->q_flags, a->q_rmailer); 910 911 if (!follow) 912 return; 913 a = a->q_next; 914 } 915 if (!follow) 916 printf("[NULL]\n"); 917 } 918 919 # endif DEBUG 920