1297Seric # include <stdio.h> 2297Seric # include <ctype.h> 3*2973Seric # include "postbox.h" 4297Seric 5*2973Seric static char SccsId[] = "@(#)parseaddr.c 3.1 03/07/81"; 6407Seric 7297Seric /* 8297Seric ** PARSE -- Parse an address 9297Seric ** 10297Seric ** Parses an address and breaks it up into three parts: a 11297Seric ** net to transmit the message on, the host to transmit it 12297Seric ** to, and a user on that host. These are loaded into an 13*2973Seric ** ADDRESS header with the values squirreled away if necessary. 14297Seric ** The "user" part may not be a real user; the process may 15297Seric ** just reoccur on that machine. For example, on a machine 16297Seric ** with an arpanet connection, the address 17297Seric ** csvax.bill@berkeley 18297Seric ** will break up to a "user" of 'csvax.bill' and a host 19297Seric ** of 'berkeley' -- to be transmitted over the arpanet. 20297Seric ** 21297Seric ** Parameters: 22297Seric ** addr -- the address to parse. 23297Seric ** a -- a pointer to the address descriptor buffer. 24297Seric ** If NULL, a header will be created. 25297Seric ** copyf -- determines what shall be copied: 26297Seric ** -1 -- don't copy anything. The printname 27297Seric ** (q_paddr) is just addr, and the 28297Seric ** user & host are allocated internally 29297Seric ** to parse. 30297Seric ** 0 -- copy out the parsed user & host, but 31297Seric ** don't copy the printname. 32297Seric ** +1 -- copy everything. 33297Seric ** 34297Seric ** Returns: 35297Seric ** A pointer to the address descriptor header (`a' if 36297Seric ** `a' is non-NULL). 37297Seric ** NULL on error. 38297Seric ** 39297Seric ** Side Effects: 40297Seric ** none 41297Seric ** 42297Seric ** Called By: 43297Seric ** main 44297Seric ** sendto 45297Seric ** alias 46297Seric ** savemail 47297Seric */ 48297Seric 492096Seric # define DELIMCHARS "()<>@!.,;:\\\" \t\r\n" /* word delimiters */ 502091Seric # define SPACESUB ('.'|0200) /* substitution for <lwsp> */ 512091Seric 52*2973Seric ADDRESS * 53297Seric parse(addr, a, copyf) 54297Seric char *addr; 55*2973Seric register ADDRESS *a; 56297Seric int copyf; 57297Seric { 58297Seric register char *p; 59297Seric register struct parsetab *t; 60297Seric extern struct parsetab ParseTab[]; 61297Seric static char buf[MAXNAME]; 62297Seric register char c; 63297Seric register char *q; 64297Seric bool got_one; 65297Seric extern char *prescan(); 66297Seric extern char *xalloc(); 67*2973Seric extern char *newstr(); 681516Seric char **pvp; 69297Seric 70297Seric /* 71297Seric ** Initialize and prescan address. 72297Seric */ 73297Seric 74297Seric To = addr; 75297Seric if (prescan(addr, buf, &buf[sizeof buf], '\0') == NULL) 76297Seric return (NULL); 77297Seric 78297Seric /* 79297Seric ** Scan parse table. 80297Seric ** Look for the first entry designating a character 81297Seric ** that is contained in the address. 82297Seric ** Arrange for q to point to that character. 83297Seric ** Check to see that there is only one of the char 84297Seric ** if it must be unique. 85297Seric ** Find the last one if the host is on the RHS. 86297Seric ** Insist that the host name is atomic. 87297Seric ** If just doing a map, do the map and then start all 88297Seric ** over. 89297Seric */ 90297Seric 91297Seric rescan: 92297Seric got_one = FALSE; 93297Seric for (t = ParseTab; t->p_char != '\0'; t++) 94297Seric { 95297Seric q = NULL; 96297Seric for (p = buf; (c = *p) != '\0'; p++) 97297Seric { 98297Seric /* find the end of this token */ 99297Seric while (isalnum(c) || c == '-' || c == '_') 100297Seric c = *++p; 101297Seric if (c == '\0') 102297Seric break; 103297Seric 104297Seric if (c == t->p_char) 105297Seric { 106297Seric got_one = TRUE; 107297Seric 108297Seric /* do mapping as appropriate */ 109*2973Seric if (bitset(P_MAP, t->p_flags)) 110297Seric { 111297Seric *p = t->p_arg[0]; 112*2973Seric if (bitset(P_ONE, t->p_flags)) 113297Seric goto rescan; 114297Seric else 115297Seric continue; 116297Seric } 117297Seric 118297Seric /* arrange for q to point to it */ 119*2973Seric if (q != NULL && bitset(P_ONE, t->p_flags)) 120297Seric { 121297Seric usrerr("multichar error"); 122297Seric ExitStat = EX_USAGE; 123297Seric return (NULL); 124297Seric } 125*2973Seric if (q == NULL || bitset(P_HLAST, t->p_flags)) 126297Seric q = p; 127297Seric } 128297Seric else 129297Seric { 130297Seric /* insist that host name is atomic */ 131*2973Seric if (bitset(P_HLAST, t->p_flags)) 132297Seric q = NULL; 133297Seric else 134297Seric break; 135297Seric } 136297Seric } 137297Seric 138297Seric if (q != NULL) 139297Seric break; 140297Seric } 141297Seric 142297Seric /* 143297Seric ** If we matched nothing cleanly, but we did match something 144297Seric ** somewhere in the process of scanning, then we have a 145297Seric ** syntax error. This can happen on things like a@b:c where 146297Seric ** @ has a right host and : has a left host. 147297Seric ** 148297Seric ** We also set `q' to the null string, in case someone forgets 149297Seric ** to put the P_MOVE bit in the local mailer entry of the 150297Seric ** configuration table. 151297Seric */ 152297Seric 153297Seric if (q == NULL) 154297Seric { 155297Seric q = ""; 156297Seric if (got_one) 157297Seric { 158297Seric usrerr("syntax error"); 159297Seric ExitStat = EX_USAGE; 160297Seric return (NULL); 161297Seric } 162297Seric } 163297Seric 164297Seric /* 165297Seric ** Interpret entry. 166297Seric ** t points to the entry for the mailer we will use. 167297Seric ** q points to the significant character. 168297Seric */ 169297Seric 170297Seric if (a == NULL) 171*2973Seric a = (ADDRESS *) xalloc(sizeof *a); 172297Seric if (copyf > 0) 173*2973Seric a->q_paddr = newstr(addr); 174297Seric else 175297Seric a->q_paddr = addr; 176297Seric a->q_mailer = &Mailer[t->p_mailer]; 177297Seric 178*2973Seric if (bitset(P_MOVE, t->p_flags)) 179297Seric { 180297Seric /* send the message to another host & retry */ 181297Seric a->q_host = t->p_arg; 182297Seric if (copyf >= 0) 183*2973Seric a->q_user = newstr(buf); 184297Seric else 185297Seric a->q_user = buf; 186297Seric } 187297Seric else 188297Seric { 189297Seric /* 190297Seric ** Make local copies of the host & user and then 191297Seric ** transport them out. 192297Seric */ 193297Seric 194297Seric *q++ = '\0'; 195*2973Seric if (bitset(P_HLAST, t->p_flags)) 196297Seric { 197297Seric a->q_host = q; 198297Seric a->q_user = buf; 199297Seric } 200297Seric else 201297Seric { 202297Seric a->q_host = buf; 203297Seric a->q_user = q; 204297Seric } 2051516Seric 2061516Seric /* 2071516Seric ** Don't go to the net if already on the target host. 2081516Seric ** This is important on the berkeley network, since 2091516Seric ** it get confused if we ask to send to ourselves. 2101516Seric ** For nets like the ARPANET, we probably will have 2111516Seric ** the local list set to NULL to simplify testing. 2121516Seric ** The canonical representation of the name is also set 2131516Seric ** to be just the local name so the duplicate letter 2141516Seric ** suppression algorithm will work. 2151516Seric */ 2161516Seric 2171516Seric if ((pvp = a->q_mailer->m_local) != NULL) 2181516Seric { 2191516Seric while (*pvp != NULL) 2201516Seric { 2211516Seric auto char buf2[MAXNAME]; 2221516Seric 2231516Seric strcpy(buf2, a->q_host); 224*2973Seric if (!bitset(P_HST_UPPER, t->p_flags)) 2251516Seric makelower(buf2); 2261516Seric if (strcmp(*pvp++, buf2) == 0) 2271516Seric { 2281516Seric strcpy(buf2, a->q_user); 2291516Seric p = a->q_paddr; 2301516Seric if (parse(buf2, a, -1) == NULL) 2311516Seric { 2321516Seric To = addr; 2331516Seric return (NULL); 2341516Seric } 2351516Seric To = a->q_paddr = p; 2361516Seric break; 2371516Seric } 2381516Seric } 2391516Seric } 2401516Seric 2411516Seric /* make copies if specified */ 242297Seric if (copyf >= 0) 243297Seric { 244*2973Seric a->q_host = newstr(a->q_host); 245*2973Seric a->q_user = newstr(a->q_user); 246297Seric } 247297Seric } 248297Seric 249297Seric /* 250297Seric ** Do UPPER->lower case mapping unless inhibited. 251297Seric */ 252297Seric 253*2973Seric if (!bitset(P_HST_UPPER, t->p_flags)) 254297Seric makelower(a->q_host); 255*2973Seric if (!bitset(P_USR_UPPER, t->p_flags)) 256297Seric makelower(a->q_user); 257297Seric 258297Seric /* 259297Seric ** Compute return value. 260297Seric */ 261297Seric 262297Seric # ifdef DEBUG 2631583Seric if (Debug) 264297Seric printf("parse(\"%s\"): host \"%s\" user \"%s\" mailer %d\n", 265297Seric addr, a->q_host, a->q_user, t->p_mailer); 266297Seric # endif DEBUG 267297Seric 268297Seric return (a); 269297Seric } 270297Seric /* 271297Seric ** MAKELOWER -- Translate a line into lower case 272297Seric ** 273297Seric ** Parameters: 274297Seric ** p -- the string to translate. If NULL, return is 275297Seric ** immediate. 276297Seric ** 277297Seric ** Returns: 278297Seric ** none. 279297Seric ** 280297Seric ** Side Effects: 281297Seric ** String pointed to by p is translated to lower case. 282297Seric ** 283297Seric ** Called By: 284297Seric ** parse 285297Seric */ 286297Seric 287297Seric makelower(p) 288297Seric register char *p; 289297Seric { 290297Seric register char c; 291297Seric 292297Seric if (p == NULL) 293297Seric return; 294297Seric for (; (c = *p) != '\0'; p++) 295297Seric if ((c & 0200) == 0 && isupper(c)) 296297Seric *p = c - 'A' + 'a'; 297297Seric } 298297Seric /* 299297Seric ** PRESCAN -- Prescan name and make it canonical 300297Seric ** 301297Seric ** Scans a name and turns it into canonical form. This involves 302297Seric ** deleting blanks, comments (in parentheses), and turning the 303297Seric ** word "at" into an at-sign ("@"). The name is copied as this 304297Seric ** is done; it is legal to copy a name onto itself, since this 305297Seric ** process can only make things smaller. 306297Seric ** 307297Seric ** This routine knows about quoted strings and angle brackets. 308297Seric ** 309297Seric ** There are certain subtleties to this routine. The one that 310297Seric ** comes to mind now is that backslashes on the ends of names 311297Seric ** are silently stripped off; this is intentional. The problem 312297Seric ** is that some versions of sndmsg (like at LBL) set the kill 313297Seric ** character to something other than @ when reading addresses; 314297Seric ** so people type "csvax.eric\@berkeley" -- which screws up the 315297Seric ** berknet mailer. 316297Seric ** 317297Seric ** Parameters: 318297Seric ** addr -- the name to chomp. 319297Seric ** buf -- the buffer to copy it into. 320297Seric ** buflim -- the last usable address in the buffer 321297Seric ** (which will old a null byte). Normally 322297Seric ** &buf[sizeof buf - 1]. 323297Seric ** delim -- the delimiter for the address, normally 324297Seric ** '\0' or ','; \0 is accepted in any case. 325297Seric ** are moving in place; set buflim to high core. 326297Seric ** 327297Seric ** Returns: 328297Seric ** A pointer to the terminator of buf. 329297Seric ** NULL on error. 330297Seric ** 331297Seric ** Side Effects: 332297Seric ** buf gets clobbered. 333297Seric ** 334297Seric ** Called By: 335297Seric ** parse 336297Seric ** maketemp 337297Seric */ 338297Seric 339297Seric char * 340297Seric prescan(addr, buf, buflim, delim) 341297Seric char *addr; 342297Seric char *buf; 343297Seric char *buflim; 344297Seric char delim; 345297Seric { 346297Seric register char *p; 347297Seric bool space; 348297Seric bool quotemode; 349297Seric bool bslashmode; 3502091Seric bool delimmode; 351297Seric int cmntcnt; 352297Seric int brccnt; 353297Seric register char c; 354297Seric register char *q; 355*2973Seric extern char *index(); 356297Seric 3572091Seric space = FALSE; 3582091Seric delimmode = TRUE; 359297Seric q = buf; 360297Seric bslashmode = quotemode = FALSE; 361297Seric cmntcnt = brccnt = 0; 3621585Seric for (p = addr; (c = *p++) != '\0'; ) 363297Seric { 364297Seric /* chew up special characters */ 365297Seric *q = '\0'; 366297Seric if (bslashmode) 367297Seric { 368297Seric c |= 0200; 3691585Seric bslashmode = FALSE; 370297Seric } 371297Seric else if (c == '"') 372297Seric quotemode = !quotemode; 373297Seric else if (c == '\\') 374297Seric { 375297Seric bslashmode++; 376297Seric continue; 377297Seric } 378297Seric else if (quotemode) 379297Seric c |= 0200; 380297Seric else if (c == delim) 381297Seric break; 382297Seric else if (c == '(') 3831378Seric { 384297Seric cmntcnt++; 3851378Seric continue; 3861378Seric } 387297Seric else if (c == ')') 388297Seric { 389297Seric if (cmntcnt <= 0) 390297Seric { 391297Seric usrerr("Unbalanced ')'"); 392297Seric return (NULL); 393297Seric } 394297Seric else 395297Seric { 396297Seric cmntcnt--; 397297Seric continue; 398297Seric } 399297Seric } 4002091Seric if (cmntcnt > 0) 4012091Seric continue; 4022091Seric else if (isascii(c) && isspace(c) && (space || delimmode)) 4032091Seric continue; 404297Seric else if (c == '<') 405297Seric { 4062092Seric if (brccnt < 0) 4072092Seric { 4082092Seric usrerr("multiple < spec"); 4092092Seric return (NULL); 4102092Seric } 411297Seric brccnt++; 4122091Seric delimmode = TRUE; 4132091Seric space = FALSE; 414297Seric if (brccnt == 1) 415297Seric { 416297Seric /* we prefer using machine readable name */ 417297Seric q = buf; 418297Seric *q = '\0'; 419297Seric continue; 420297Seric } 421297Seric } 422297Seric else if (c == '>') 423297Seric { 424297Seric if (brccnt <= 0) 425297Seric { 426297Seric usrerr("Unbalanced `>'"); 427297Seric return (NULL); 428297Seric } 429297Seric else 430297Seric brccnt--; 431297Seric if (brccnt <= 0) 4322092Seric { 4332092Seric brccnt = -1; 434297Seric continue; 4352092Seric } 436297Seric } 437297Seric 438297Seric /* 439297Seric ** Turn "at" into "@", 4401378Seric ** but only if "at" is a word. 441297Seric ** By the way, I violate the ARPANET RFC-733 442297Seric ** standard here, by assuming that 'space' delimits 443297Seric ** atoms. I assume that is just a mistake, since 444297Seric ** it violates the spirit of the semantics 445297Seric ** of the document..... 446297Seric */ 447297Seric 4482091Seric if (delimmode && (c == 'a' || c == 'A') && 449297Seric (p[0] == 't' || p[0] == 'T') && 450*2973Seric (index(DELIMCHARS, p[1]) != NULL || p[1] <= 040)) 451297Seric { 452297Seric c = '@'; 453297Seric p++; 454297Seric } 455297Seric 456*2973Seric if (delimmode = (index(DELIMCHARS, c) != NULL)) 4572091Seric space = FALSE; 4582091Seric 4592092Seric /* if not a space, squirrel it away */ 4602092Seric if ((!isascii(c) || !isspace(c)) && brccnt >= 0) 461297Seric { 4622091Seric if (q >= buflim-1) 463297Seric { 464297Seric usrerr("Address too long"); 465297Seric return (NULL); 466297Seric } 4672091Seric if (space) 4682091Seric *q++ = SPACESUB; 469297Seric *q++ = c; 470297Seric } 4712094Seric space = isascii(c) && isspace(c); 472297Seric } 473297Seric *q = '\0'; 474297Seric if (c == '\0') 475297Seric p--; 476297Seric if (cmntcnt > 0) 477297Seric usrerr("Unbalanced '('"); 478297Seric else if (quotemode) 479297Seric usrerr("Unbalanced '\"'"); 480297Seric else if (brccnt > 0) 481297Seric usrerr("Unbalanced '<'"); 482297Seric else if (buf[0] != '\0') 483297Seric return (p); 484297Seric return (NULL); 485297Seric } 486