1297Seric # include <stdio.h> 2297Seric # include <ctype.h> 32973Seric # include "postbox.h" 4297Seric 5*2984Seric static char SccsId[] = "@(#)parseaddr.c 3.2 03/07/81"; 6407Seric 7297Seric /* 8297Seric ** PARSE -- Parse an address 9297Seric ** 10297Seric ** Parses an address and breaks it up into three parts: a 11297Seric ** net to transmit the message on, the host to transmit it 12297Seric ** to, and a user on that host. These are loaded into an 132973Seric ** ADDRESS header with the values squirreled away if necessary. 14297Seric ** The "user" part may not be a real user; the process may 15297Seric ** just reoccur on that machine. For example, on a machine 16297Seric ** with an arpanet connection, the address 17297Seric ** csvax.bill@berkeley 18297Seric ** will break up to a "user" of 'csvax.bill' and a host 19297Seric ** of 'berkeley' -- to be transmitted over the arpanet. 20297Seric ** 21297Seric ** Parameters: 22297Seric ** addr -- the address to parse. 23297Seric ** a -- a pointer to the address descriptor buffer. 24297Seric ** If NULL, a header will be created. 25297Seric ** copyf -- determines what shall be copied: 26297Seric ** -1 -- don't copy anything. The printname 27297Seric ** (q_paddr) is just addr, and the 28297Seric ** user & host are allocated internally 29297Seric ** to parse. 30297Seric ** 0 -- copy out the parsed user & host, but 31297Seric ** don't copy the printname. 32297Seric ** +1 -- copy everything. 33297Seric ** 34297Seric ** Returns: 35297Seric ** A pointer to the address descriptor header (`a' if 36297Seric ** `a' is non-NULL). 37297Seric ** NULL on error. 38297Seric ** 39297Seric ** Side Effects: 40297Seric ** none 41297Seric ** 42297Seric ** Called By: 43297Seric ** main 44297Seric ** sendto 45297Seric ** alias 46297Seric ** savemail 47297Seric */ 48297Seric 492096Seric # define DELIMCHARS "()<>@!.,;:\\\" \t\r\n" /* word delimiters */ 502091Seric # define SPACESUB ('.'|0200) /* substitution for <lwsp> */ 512091Seric 522973Seric ADDRESS * 53297Seric parse(addr, a, copyf) 54297Seric char *addr; 552973Seric register ADDRESS *a; 56297Seric int copyf; 57297Seric { 58297Seric register char *p; 59297Seric register struct parsetab *t; 60297Seric extern struct parsetab ParseTab[]; 61297Seric static char buf[MAXNAME]; 62297Seric register char c; 63297Seric register char *q; 64297Seric bool got_one; 65297Seric extern char *prescan(); 66297Seric extern char *xalloc(); 672973Seric extern char *newstr(); 681516Seric char **pvp; 69297Seric 70297Seric /* 71297Seric ** Initialize and prescan address. 72297Seric */ 73297Seric 74297Seric To = addr; 75297Seric if (prescan(addr, buf, &buf[sizeof buf], '\0') == NULL) 76297Seric return (NULL); 77297Seric 78297Seric /* 79297Seric ** Scan parse table. 80297Seric ** Look for the first entry designating a character 81297Seric ** that is contained in the address. 82297Seric ** Arrange for q to point to that character. 83297Seric ** Check to see that there is only one of the char 84297Seric ** if it must be unique. 85297Seric ** Find the last one if the host is on the RHS. 86297Seric ** Insist that the host name is atomic. 87297Seric ** If just doing a map, do the map and then start all 88297Seric ** over. 89297Seric */ 90297Seric 91297Seric rescan: 92297Seric got_one = FALSE; 93297Seric for (t = ParseTab; t->p_char != '\0'; t++) 94297Seric { 95297Seric q = NULL; 96297Seric for (p = buf; (c = *p) != '\0'; p++) 97297Seric { 98297Seric /* find the end of this token */ 99297Seric while (isalnum(c) || c == '-' || c == '_') 100297Seric c = *++p; 101297Seric if (c == '\0') 102297Seric break; 103297Seric 104297Seric if (c == t->p_char) 105297Seric { 106297Seric got_one = TRUE; 107297Seric 108297Seric /* do mapping as appropriate */ 1092973Seric if (bitset(P_MAP, t->p_flags)) 110297Seric { 111297Seric *p = t->p_arg[0]; 1122973Seric if (bitset(P_ONE, t->p_flags)) 113297Seric goto rescan; 114297Seric else 115297Seric continue; 116297Seric } 117297Seric 118297Seric /* arrange for q to point to it */ 1192973Seric if (q != NULL && bitset(P_ONE, t->p_flags)) 120297Seric { 121297Seric usrerr("multichar error"); 122297Seric ExitStat = EX_USAGE; 123297Seric return (NULL); 124297Seric } 1252973Seric if (q == NULL || bitset(P_HLAST, t->p_flags)) 126297Seric q = p; 127297Seric } 128297Seric else 129297Seric { 130297Seric /* insist that host name is atomic */ 1312973Seric if (bitset(P_HLAST, t->p_flags)) 132297Seric q = NULL; 133297Seric else 134297Seric break; 135297Seric } 136297Seric } 137297Seric 138297Seric if (q != NULL) 139297Seric break; 140297Seric } 141297Seric 142297Seric /* 143297Seric ** If we matched nothing cleanly, but we did match something 144297Seric ** somewhere in the process of scanning, then we have a 145297Seric ** syntax error. This can happen on things like a@b:c where 146297Seric ** @ has a right host and : has a left host. 147297Seric ** 148297Seric ** We also set `q' to the null string, in case someone forgets 149297Seric ** to put the P_MOVE bit in the local mailer entry of the 150297Seric ** configuration table. 151297Seric */ 152297Seric 153297Seric if (q == NULL) 154297Seric { 155297Seric q = ""; 156297Seric if (got_one) 157297Seric { 158297Seric usrerr("syntax error"); 159297Seric ExitStat = EX_USAGE; 160297Seric return (NULL); 161297Seric } 162297Seric } 163297Seric 164297Seric /* 165297Seric ** Interpret entry. 166297Seric ** t points to the entry for the mailer we will use. 167297Seric ** q points to the significant character. 168297Seric */ 169297Seric 170297Seric if (a == NULL) 1712973Seric a = (ADDRESS *) xalloc(sizeof *a); 172297Seric if (copyf > 0) 1732973Seric a->q_paddr = newstr(addr); 174297Seric else 175297Seric a->q_paddr = addr; 176*2984Seric a->q_rmailer = t->p_mailer; 177297Seric a->q_mailer = &Mailer[t->p_mailer]; 178297Seric 1792973Seric if (bitset(P_MOVE, t->p_flags)) 180297Seric { 181297Seric /* send the message to another host & retry */ 182297Seric a->q_host = t->p_arg; 183297Seric if (copyf >= 0) 1842973Seric a->q_user = newstr(buf); 185297Seric else 186297Seric a->q_user = buf; 187297Seric } 188297Seric else 189297Seric { 190297Seric /* 191297Seric ** Make local copies of the host & user and then 192297Seric ** transport them out. 193297Seric */ 194297Seric 195297Seric *q++ = '\0'; 1962973Seric if (bitset(P_HLAST, t->p_flags)) 197297Seric { 198297Seric a->q_host = q; 199297Seric a->q_user = buf; 200297Seric } 201297Seric else 202297Seric { 203297Seric a->q_host = buf; 204297Seric a->q_user = q; 205297Seric } 2061516Seric 2071516Seric /* 2081516Seric ** Don't go to the net if already on the target host. 2091516Seric ** This is important on the berkeley network, since 2101516Seric ** it get confused if we ask to send to ourselves. 2111516Seric ** For nets like the ARPANET, we probably will have 2121516Seric ** the local list set to NULL to simplify testing. 2131516Seric ** The canonical representation of the name is also set 2141516Seric ** to be just the local name so the duplicate letter 2151516Seric ** suppression algorithm will work. 2161516Seric */ 2171516Seric 2181516Seric if ((pvp = a->q_mailer->m_local) != NULL) 2191516Seric { 2201516Seric while (*pvp != NULL) 2211516Seric { 2221516Seric auto char buf2[MAXNAME]; 2231516Seric 2241516Seric strcpy(buf2, a->q_host); 2252973Seric if (!bitset(P_HST_UPPER, t->p_flags)) 2261516Seric makelower(buf2); 2271516Seric if (strcmp(*pvp++, buf2) == 0) 2281516Seric { 2291516Seric strcpy(buf2, a->q_user); 2301516Seric p = a->q_paddr; 2311516Seric if (parse(buf2, a, -1) == NULL) 2321516Seric { 2331516Seric To = addr; 2341516Seric return (NULL); 2351516Seric } 2361516Seric To = a->q_paddr = p; 2371516Seric break; 2381516Seric } 2391516Seric } 2401516Seric } 2411516Seric 2421516Seric /* make copies if specified */ 243297Seric if (copyf >= 0) 244297Seric { 2452973Seric a->q_host = newstr(a->q_host); 2462973Seric a->q_user = newstr(a->q_user); 247297Seric } 248297Seric } 249297Seric 250297Seric /* 251297Seric ** Do UPPER->lower case mapping unless inhibited. 252297Seric */ 253297Seric 2542973Seric if (!bitset(P_HST_UPPER, t->p_flags)) 255297Seric makelower(a->q_host); 2562973Seric if (!bitset(P_USR_UPPER, t->p_flags)) 257297Seric makelower(a->q_user); 258297Seric 259297Seric /* 260297Seric ** Compute return value. 261297Seric */ 262297Seric 263297Seric # ifdef DEBUG 2641583Seric if (Debug) 265297Seric printf("parse(\"%s\"): host \"%s\" user \"%s\" mailer %d\n", 266297Seric addr, a->q_host, a->q_user, t->p_mailer); 267297Seric # endif DEBUG 268297Seric 269297Seric return (a); 270297Seric } 271297Seric /* 272297Seric ** MAKELOWER -- Translate a line into lower case 273297Seric ** 274297Seric ** Parameters: 275297Seric ** p -- the string to translate. If NULL, return is 276297Seric ** immediate. 277297Seric ** 278297Seric ** Returns: 279297Seric ** none. 280297Seric ** 281297Seric ** Side Effects: 282297Seric ** String pointed to by p is translated to lower case. 283297Seric ** 284297Seric ** Called By: 285297Seric ** parse 286297Seric */ 287297Seric 288297Seric makelower(p) 289297Seric register char *p; 290297Seric { 291297Seric register char c; 292297Seric 293297Seric if (p == NULL) 294297Seric return; 295297Seric for (; (c = *p) != '\0'; p++) 296297Seric if ((c & 0200) == 0 && isupper(c)) 297297Seric *p = c - 'A' + 'a'; 298297Seric } 299297Seric /* 300297Seric ** PRESCAN -- Prescan name and make it canonical 301297Seric ** 302297Seric ** Scans a name and turns it into canonical form. This involves 303297Seric ** deleting blanks, comments (in parentheses), and turning the 304297Seric ** word "at" into an at-sign ("@"). The name is copied as this 305297Seric ** is done; it is legal to copy a name onto itself, since this 306297Seric ** process can only make things smaller. 307297Seric ** 308297Seric ** This routine knows about quoted strings and angle brackets. 309297Seric ** 310297Seric ** There are certain subtleties to this routine. The one that 311297Seric ** comes to mind now is that backslashes on the ends of names 312297Seric ** are silently stripped off; this is intentional. The problem 313297Seric ** is that some versions of sndmsg (like at LBL) set the kill 314297Seric ** character to something other than @ when reading addresses; 315297Seric ** so people type "csvax.eric\@berkeley" -- which screws up the 316297Seric ** berknet mailer. 317297Seric ** 318297Seric ** Parameters: 319297Seric ** addr -- the name to chomp. 320297Seric ** buf -- the buffer to copy it into. 321297Seric ** buflim -- the last usable address in the buffer 322297Seric ** (which will old a null byte). Normally 323297Seric ** &buf[sizeof buf - 1]. 324297Seric ** delim -- the delimiter for the address, normally 325297Seric ** '\0' or ','; \0 is accepted in any case. 326297Seric ** are moving in place; set buflim to high core. 327297Seric ** 328297Seric ** Returns: 329297Seric ** A pointer to the terminator of buf. 330297Seric ** NULL on error. 331297Seric ** 332297Seric ** Side Effects: 333297Seric ** buf gets clobbered. 334297Seric ** 335297Seric ** Called By: 336297Seric ** parse 337297Seric ** maketemp 338297Seric */ 339297Seric 340297Seric char * 341297Seric prescan(addr, buf, buflim, delim) 342297Seric char *addr; 343297Seric char *buf; 344297Seric char *buflim; 345297Seric char delim; 346297Seric { 347297Seric register char *p; 348297Seric bool space; 349297Seric bool quotemode; 350297Seric bool bslashmode; 3512091Seric bool delimmode; 352297Seric int cmntcnt; 353297Seric int brccnt; 354297Seric register char c; 355297Seric register char *q; 3562973Seric extern char *index(); 357297Seric 3582091Seric space = FALSE; 3592091Seric delimmode = TRUE; 360297Seric q = buf; 361297Seric bslashmode = quotemode = FALSE; 362297Seric cmntcnt = brccnt = 0; 3631585Seric for (p = addr; (c = *p++) != '\0'; ) 364297Seric { 365297Seric /* chew up special characters */ 366297Seric *q = '\0'; 367297Seric if (bslashmode) 368297Seric { 369297Seric c |= 0200; 3701585Seric bslashmode = FALSE; 371297Seric } 372297Seric else if (c == '"') 373297Seric quotemode = !quotemode; 374297Seric else if (c == '\\') 375297Seric { 376297Seric bslashmode++; 377297Seric continue; 378297Seric } 379297Seric else if (quotemode) 380297Seric c |= 0200; 381297Seric else if (c == delim) 382297Seric break; 383297Seric else if (c == '(') 3841378Seric { 385297Seric cmntcnt++; 3861378Seric continue; 3871378Seric } 388297Seric else if (c == ')') 389297Seric { 390297Seric if (cmntcnt <= 0) 391297Seric { 392297Seric usrerr("Unbalanced ')'"); 393297Seric return (NULL); 394297Seric } 395297Seric else 396297Seric { 397297Seric cmntcnt--; 398297Seric continue; 399297Seric } 400297Seric } 4012091Seric if (cmntcnt > 0) 4022091Seric continue; 4032091Seric else if (isascii(c) && isspace(c) && (space || delimmode)) 4042091Seric continue; 405297Seric else if (c == '<') 406297Seric { 4072092Seric if (brccnt < 0) 4082092Seric { 4092092Seric usrerr("multiple < spec"); 4102092Seric return (NULL); 4112092Seric } 412297Seric brccnt++; 4132091Seric delimmode = TRUE; 4142091Seric space = FALSE; 415297Seric if (brccnt == 1) 416297Seric { 417297Seric /* we prefer using machine readable name */ 418297Seric q = buf; 419297Seric *q = '\0'; 420297Seric continue; 421297Seric } 422297Seric } 423297Seric else if (c == '>') 424297Seric { 425297Seric if (brccnt <= 0) 426297Seric { 427297Seric usrerr("Unbalanced `>'"); 428297Seric return (NULL); 429297Seric } 430297Seric else 431297Seric brccnt--; 432297Seric if (brccnt <= 0) 4332092Seric { 4342092Seric brccnt = -1; 435297Seric continue; 4362092Seric } 437297Seric } 438297Seric 439297Seric /* 440297Seric ** Turn "at" into "@", 4411378Seric ** but only if "at" is a word. 442297Seric ** By the way, I violate the ARPANET RFC-733 443297Seric ** standard here, by assuming that 'space' delimits 444297Seric ** atoms. I assume that is just a mistake, since 445297Seric ** it violates the spirit of the semantics 446297Seric ** of the document..... 447297Seric */ 448297Seric 4492091Seric if (delimmode && (c == 'a' || c == 'A') && 450297Seric (p[0] == 't' || p[0] == 'T') && 4512973Seric (index(DELIMCHARS, p[1]) != NULL || p[1] <= 040)) 452297Seric { 453297Seric c = '@'; 454297Seric p++; 455297Seric } 456297Seric 4572973Seric if (delimmode = (index(DELIMCHARS, c) != NULL)) 4582091Seric space = FALSE; 4592091Seric 4602092Seric /* if not a space, squirrel it away */ 4612092Seric if ((!isascii(c) || !isspace(c)) && brccnt >= 0) 462297Seric { 4632091Seric if (q >= buflim-1) 464297Seric { 465297Seric usrerr("Address too long"); 466297Seric return (NULL); 467297Seric } 4682091Seric if (space) 4692091Seric *q++ = SPACESUB; 470297Seric *q++ = c; 471297Seric } 4722094Seric space = isascii(c) && isspace(c); 473297Seric } 474297Seric *q = '\0'; 475297Seric if (c == '\0') 476297Seric p--; 477297Seric if (cmntcnt > 0) 478297Seric usrerr("Unbalanced '('"); 479297Seric else if (quotemode) 480297Seric usrerr("Unbalanced '\"'"); 481297Seric else if (brccnt > 0) 482297Seric usrerr("Unbalanced '<'"); 483297Seric else if (buf[0] != '\0') 484297Seric return (p); 485297Seric return (NULL); 486297Seric } 487