1297Seric # include <stdio.h> 2297Seric # include <ctype.h> 32973Seric # include "postbox.h" 4297Seric 5*2990Seric static char SccsId[] = "@(#)parseaddr.c 3.3 03/07/81"; 6407Seric 7297Seric /* 8297Seric ** PARSE -- Parse an address 9297Seric ** 10297Seric ** Parses an address and breaks it up into three parts: a 11297Seric ** net to transmit the message on, the host to transmit it 12297Seric ** to, and a user on that host. These are loaded into an 132973Seric ** ADDRESS header with the values squirreled away if necessary. 14297Seric ** The "user" part may not be a real user; the process may 15297Seric ** just reoccur on that machine. For example, on a machine 16297Seric ** with an arpanet connection, the address 17297Seric ** csvax.bill@berkeley 18297Seric ** will break up to a "user" of 'csvax.bill' and a host 19297Seric ** of 'berkeley' -- to be transmitted over the arpanet. 20297Seric ** 21297Seric ** Parameters: 22297Seric ** addr -- the address to parse. 23297Seric ** a -- a pointer to the address descriptor buffer. 24297Seric ** If NULL, a header will be created. 25297Seric ** copyf -- determines what shall be copied: 26297Seric ** -1 -- don't copy anything. The printname 27297Seric ** (q_paddr) is just addr, and the 28297Seric ** user & host are allocated internally 29297Seric ** to parse. 30297Seric ** 0 -- copy out the parsed user & host, but 31297Seric ** don't copy the printname. 32297Seric ** +1 -- copy everything. 33297Seric ** 34297Seric ** Returns: 35297Seric ** A pointer to the address descriptor header (`a' if 36297Seric ** `a' is non-NULL). 37297Seric ** NULL on error. 38297Seric ** 39297Seric ** Side Effects: 40297Seric ** none 41297Seric ** 42297Seric ** Called By: 43297Seric ** main 44297Seric ** sendto 45297Seric ** alias 46297Seric ** savemail 47297Seric */ 48297Seric 492096Seric # define DELIMCHARS "()<>@!.,;:\\\" \t\r\n" /* word delimiters */ 502091Seric # define SPACESUB ('.'|0200) /* substitution for <lwsp> */ 512091Seric 522973Seric ADDRESS * 53297Seric parse(addr, a, copyf) 54297Seric char *addr; 552973Seric register ADDRESS *a; 56297Seric int copyf; 57297Seric { 58297Seric register char *p; 59297Seric register struct parsetab *t; 60297Seric extern struct parsetab ParseTab[]; 61297Seric static char buf[MAXNAME]; 62297Seric register char c; 63297Seric register char *q; 64297Seric bool got_one; 65297Seric extern char *prescan(); 66297Seric extern char *xalloc(); 672973Seric extern char *newstr(); 681516Seric char **pvp; 69*2990Seric extern char *strcpy(); 70297Seric 71297Seric /* 72297Seric ** Initialize and prescan address. 73297Seric */ 74297Seric 75297Seric To = addr; 76297Seric if (prescan(addr, buf, &buf[sizeof buf], '\0') == NULL) 77297Seric return (NULL); 78297Seric 79297Seric /* 80297Seric ** Scan parse table. 81297Seric ** Look for the first entry designating a character 82297Seric ** that is contained in the address. 83297Seric ** Arrange for q to point to that character. 84297Seric ** Check to see that there is only one of the char 85297Seric ** if it must be unique. 86297Seric ** Find the last one if the host is on the RHS. 87297Seric ** Insist that the host name is atomic. 88297Seric ** If just doing a map, do the map and then start all 89297Seric ** over. 90297Seric */ 91297Seric 92297Seric rescan: 93297Seric got_one = FALSE; 94297Seric for (t = ParseTab; t->p_char != '\0'; t++) 95297Seric { 96297Seric q = NULL; 97297Seric for (p = buf; (c = *p) != '\0'; p++) 98297Seric { 99297Seric /* find the end of this token */ 100297Seric while (isalnum(c) || c == '-' || c == '_') 101297Seric c = *++p; 102297Seric if (c == '\0') 103297Seric break; 104297Seric 105297Seric if (c == t->p_char) 106297Seric { 107297Seric got_one = TRUE; 108297Seric 109297Seric /* do mapping as appropriate */ 1102973Seric if (bitset(P_MAP, t->p_flags)) 111297Seric { 112297Seric *p = t->p_arg[0]; 1132973Seric if (bitset(P_ONE, t->p_flags)) 114297Seric goto rescan; 115297Seric else 116297Seric continue; 117297Seric } 118297Seric 119297Seric /* arrange for q to point to it */ 1202973Seric if (q != NULL && bitset(P_ONE, t->p_flags)) 121297Seric { 122297Seric usrerr("multichar error"); 123297Seric ExitStat = EX_USAGE; 124297Seric return (NULL); 125297Seric } 1262973Seric if (q == NULL || bitset(P_HLAST, t->p_flags)) 127297Seric q = p; 128297Seric } 129297Seric else 130297Seric { 131297Seric /* insist that host name is atomic */ 1322973Seric if (bitset(P_HLAST, t->p_flags)) 133297Seric q = NULL; 134297Seric else 135297Seric break; 136297Seric } 137297Seric } 138297Seric 139297Seric if (q != NULL) 140297Seric break; 141297Seric } 142297Seric 143297Seric /* 144297Seric ** If we matched nothing cleanly, but we did match something 145297Seric ** somewhere in the process of scanning, then we have a 146297Seric ** syntax error. This can happen on things like a@b:c where 147297Seric ** @ has a right host and : has a left host. 148297Seric ** 149297Seric ** We also set `q' to the null string, in case someone forgets 150297Seric ** to put the P_MOVE bit in the local mailer entry of the 151297Seric ** configuration table. 152297Seric */ 153297Seric 154297Seric if (q == NULL) 155297Seric { 156297Seric q = ""; 157297Seric if (got_one) 158297Seric { 159297Seric usrerr("syntax error"); 160297Seric ExitStat = EX_USAGE; 161297Seric return (NULL); 162297Seric } 163297Seric } 164297Seric 165297Seric /* 166297Seric ** Interpret entry. 167297Seric ** t points to the entry for the mailer we will use. 168297Seric ** q points to the significant character. 169297Seric */ 170297Seric 171297Seric if (a == NULL) 1722973Seric a = (ADDRESS *) xalloc(sizeof *a); 173297Seric if (copyf > 0) 1742973Seric a->q_paddr = newstr(addr); 175297Seric else 176297Seric a->q_paddr = addr; 1772984Seric a->q_rmailer = t->p_mailer; 178297Seric a->q_mailer = &Mailer[t->p_mailer]; 179297Seric 1802973Seric if (bitset(P_MOVE, t->p_flags)) 181297Seric { 182297Seric /* send the message to another host & retry */ 183297Seric a->q_host = t->p_arg; 184297Seric if (copyf >= 0) 1852973Seric a->q_user = newstr(buf); 186297Seric else 187297Seric a->q_user = buf; 188297Seric } 189297Seric else 190297Seric { 191297Seric /* 192297Seric ** Make local copies of the host & user and then 193297Seric ** transport them out. 194297Seric */ 195297Seric 196297Seric *q++ = '\0'; 1972973Seric if (bitset(P_HLAST, t->p_flags)) 198297Seric { 199297Seric a->q_host = q; 200297Seric a->q_user = buf; 201297Seric } 202297Seric else 203297Seric { 204297Seric a->q_host = buf; 205297Seric a->q_user = q; 206297Seric } 2071516Seric 2081516Seric /* 2091516Seric ** Don't go to the net if already on the target host. 2101516Seric ** This is important on the berkeley network, since 2111516Seric ** it get confused if we ask to send to ourselves. 2121516Seric ** For nets like the ARPANET, we probably will have 2131516Seric ** the local list set to NULL to simplify testing. 2141516Seric ** The canonical representation of the name is also set 2151516Seric ** to be just the local name so the duplicate letter 2161516Seric ** suppression algorithm will work. 2171516Seric */ 2181516Seric 2191516Seric if ((pvp = a->q_mailer->m_local) != NULL) 2201516Seric { 2211516Seric while (*pvp != NULL) 2221516Seric { 2231516Seric auto char buf2[MAXNAME]; 2241516Seric 2251516Seric strcpy(buf2, a->q_host); 2262973Seric if (!bitset(P_HST_UPPER, t->p_flags)) 2271516Seric makelower(buf2); 2281516Seric if (strcmp(*pvp++, buf2) == 0) 2291516Seric { 2301516Seric strcpy(buf2, a->q_user); 2311516Seric p = a->q_paddr; 2321516Seric if (parse(buf2, a, -1) == NULL) 2331516Seric { 2341516Seric To = addr; 2351516Seric return (NULL); 2361516Seric } 2371516Seric To = a->q_paddr = p; 2381516Seric break; 2391516Seric } 2401516Seric } 2411516Seric } 2421516Seric 2431516Seric /* make copies if specified */ 244297Seric if (copyf >= 0) 245297Seric { 2462973Seric a->q_host = newstr(a->q_host); 2472973Seric a->q_user = newstr(a->q_user); 248297Seric } 249297Seric } 250297Seric 251297Seric /* 252297Seric ** Do UPPER->lower case mapping unless inhibited. 253297Seric */ 254297Seric 2552973Seric if (!bitset(P_HST_UPPER, t->p_flags)) 256297Seric makelower(a->q_host); 2572973Seric if (!bitset(P_USR_UPPER, t->p_flags)) 258297Seric makelower(a->q_user); 259297Seric 260297Seric /* 261297Seric ** Compute return value. 262297Seric */ 263297Seric 264297Seric # ifdef DEBUG 2651583Seric if (Debug) 266297Seric printf("parse(\"%s\"): host \"%s\" user \"%s\" mailer %d\n", 267297Seric addr, a->q_host, a->q_user, t->p_mailer); 268297Seric # endif DEBUG 269297Seric 270297Seric return (a); 271297Seric } 272297Seric /* 273297Seric ** MAKELOWER -- Translate a line into lower case 274297Seric ** 275297Seric ** Parameters: 276297Seric ** p -- the string to translate. If NULL, return is 277297Seric ** immediate. 278297Seric ** 279297Seric ** Returns: 280297Seric ** none. 281297Seric ** 282297Seric ** Side Effects: 283297Seric ** String pointed to by p is translated to lower case. 284297Seric ** 285297Seric ** Called By: 286297Seric ** parse 287297Seric */ 288297Seric 289297Seric makelower(p) 290297Seric register char *p; 291297Seric { 292297Seric register char c; 293297Seric 294297Seric if (p == NULL) 295297Seric return; 296297Seric for (; (c = *p) != '\0'; p++) 297297Seric if ((c & 0200) == 0 && isupper(c)) 298297Seric *p = c - 'A' + 'a'; 299297Seric } 300297Seric /* 301297Seric ** PRESCAN -- Prescan name and make it canonical 302297Seric ** 303297Seric ** Scans a name and turns it into canonical form. This involves 304297Seric ** deleting blanks, comments (in parentheses), and turning the 305297Seric ** word "at" into an at-sign ("@"). The name is copied as this 306297Seric ** is done; it is legal to copy a name onto itself, since this 307297Seric ** process can only make things smaller. 308297Seric ** 309297Seric ** This routine knows about quoted strings and angle brackets. 310297Seric ** 311297Seric ** There are certain subtleties to this routine. The one that 312297Seric ** comes to mind now is that backslashes on the ends of names 313297Seric ** are silently stripped off; this is intentional. The problem 314297Seric ** is that some versions of sndmsg (like at LBL) set the kill 315297Seric ** character to something other than @ when reading addresses; 316297Seric ** so people type "csvax.eric\@berkeley" -- which screws up the 317297Seric ** berknet mailer. 318297Seric ** 319297Seric ** Parameters: 320297Seric ** addr -- the name to chomp. 321297Seric ** buf -- the buffer to copy it into. 322297Seric ** buflim -- the last usable address in the buffer 323297Seric ** (which will old a null byte). Normally 324297Seric ** &buf[sizeof buf - 1]. 325297Seric ** delim -- the delimiter for the address, normally 326297Seric ** '\0' or ','; \0 is accepted in any case. 327297Seric ** are moving in place; set buflim to high core. 328297Seric ** 329297Seric ** Returns: 330297Seric ** A pointer to the terminator of buf. 331297Seric ** NULL on error. 332297Seric ** 333297Seric ** Side Effects: 334297Seric ** buf gets clobbered. 335297Seric ** 336297Seric ** Called By: 337297Seric ** parse 338297Seric ** maketemp 339297Seric */ 340297Seric 341297Seric char * 342297Seric prescan(addr, buf, buflim, delim) 343297Seric char *addr; 344297Seric char *buf; 345297Seric char *buflim; 346297Seric char delim; 347297Seric { 348297Seric register char *p; 349297Seric bool space; 350297Seric bool quotemode; 351297Seric bool bslashmode; 3522091Seric bool delimmode; 353297Seric int cmntcnt; 354297Seric int brccnt; 355297Seric register char c; 356297Seric register char *q; 3572973Seric extern char *index(); 358297Seric 3592091Seric space = FALSE; 3602091Seric delimmode = TRUE; 361297Seric q = buf; 362297Seric bslashmode = quotemode = FALSE; 363297Seric cmntcnt = brccnt = 0; 3641585Seric for (p = addr; (c = *p++) != '\0'; ) 365297Seric { 366297Seric /* chew up special characters */ 367297Seric *q = '\0'; 368297Seric if (bslashmode) 369297Seric { 370297Seric c |= 0200; 3711585Seric bslashmode = FALSE; 372297Seric } 373297Seric else if (c == '"') 374297Seric quotemode = !quotemode; 375297Seric else if (c == '\\') 376297Seric { 377297Seric bslashmode++; 378297Seric continue; 379297Seric } 380297Seric else if (quotemode) 381297Seric c |= 0200; 382297Seric else if (c == delim) 383297Seric break; 384297Seric else if (c == '(') 3851378Seric { 386297Seric cmntcnt++; 3871378Seric continue; 3881378Seric } 389297Seric else if (c == ')') 390297Seric { 391297Seric if (cmntcnt <= 0) 392297Seric { 393297Seric usrerr("Unbalanced ')'"); 394297Seric return (NULL); 395297Seric } 396297Seric else 397297Seric { 398297Seric cmntcnt--; 399297Seric continue; 400297Seric } 401297Seric } 4022091Seric if (cmntcnt > 0) 4032091Seric continue; 4042091Seric else if (isascii(c) && isspace(c) && (space || delimmode)) 4052091Seric continue; 406297Seric else if (c == '<') 407297Seric { 4082092Seric if (brccnt < 0) 4092092Seric { 4102092Seric usrerr("multiple < spec"); 4112092Seric return (NULL); 4122092Seric } 413297Seric brccnt++; 4142091Seric delimmode = TRUE; 4152091Seric space = FALSE; 416297Seric if (brccnt == 1) 417297Seric { 418297Seric /* we prefer using machine readable name */ 419297Seric q = buf; 420297Seric *q = '\0'; 421297Seric continue; 422297Seric } 423297Seric } 424297Seric else if (c == '>') 425297Seric { 426297Seric if (brccnt <= 0) 427297Seric { 428297Seric usrerr("Unbalanced `>'"); 429297Seric return (NULL); 430297Seric } 431297Seric else 432297Seric brccnt--; 433297Seric if (brccnt <= 0) 4342092Seric { 4352092Seric brccnt = -1; 436297Seric continue; 4372092Seric } 438297Seric } 439297Seric 440297Seric /* 441297Seric ** Turn "at" into "@", 4421378Seric ** but only if "at" is a word. 443297Seric ** By the way, I violate the ARPANET RFC-733 444297Seric ** standard here, by assuming that 'space' delimits 445297Seric ** atoms. I assume that is just a mistake, since 446297Seric ** it violates the spirit of the semantics 447297Seric ** of the document..... 448297Seric */ 449297Seric 4502091Seric if (delimmode && (c == 'a' || c == 'A') && 451297Seric (p[0] == 't' || p[0] == 'T') && 4522973Seric (index(DELIMCHARS, p[1]) != NULL || p[1] <= 040)) 453297Seric { 454297Seric c = '@'; 455297Seric p++; 456297Seric } 457297Seric 4582973Seric if (delimmode = (index(DELIMCHARS, c) != NULL)) 4592091Seric space = FALSE; 4602091Seric 4612092Seric /* if not a space, squirrel it away */ 4622092Seric if ((!isascii(c) || !isspace(c)) && brccnt >= 0) 463297Seric { 4642091Seric if (q >= buflim-1) 465297Seric { 466297Seric usrerr("Address too long"); 467297Seric return (NULL); 468297Seric } 4692091Seric if (space) 4702091Seric *q++ = SPACESUB; 471297Seric *q++ = c; 472297Seric } 4732094Seric space = isascii(c) && isspace(c); 474297Seric } 475297Seric *q = '\0'; 476297Seric if (c == '\0') 477297Seric p--; 478297Seric if (cmntcnt > 0) 479297Seric usrerr("Unbalanced '('"); 480297Seric else if (quotemode) 481297Seric usrerr("Unbalanced '\"'"); 482297Seric else if (brccnt > 0) 483297Seric usrerr("Unbalanced '<'"); 484297Seric else if (buf[0] != '\0') 485297Seric return (p); 486297Seric return (NULL); 487297Seric } 488