1297Seric # include <stdio.h> 2297Seric # include <ctype.h> 3297Seric # include "dlvrmail.h" 4297Seric 5*2091Seric static char SccsId[] = "@(#)parseaddr.c 2.2 01/08/81"; 6407Seric 7297Seric /* 8297Seric ** PARSE -- Parse an address 9297Seric ** 10297Seric ** Parses an address and breaks it up into three parts: a 11297Seric ** net to transmit the message on, the host to transmit it 12297Seric ** to, and a user on that host. These are loaded into an 13297Seric ** addrq header with the values squirreled away if necessary. 14297Seric ** The "user" part may not be a real user; the process may 15297Seric ** just reoccur on that machine. For example, on a machine 16297Seric ** with an arpanet connection, the address 17297Seric ** csvax.bill@berkeley 18297Seric ** will break up to a "user" of 'csvax.bill' and a host 19297Seric ** of 'berkeley' -- to be transmitted over the arpanet. 20297Seric ** 21297Seric ** Parameters: 22297Seric ** addr -- the address to parse. 23297Seric ** a -- a pointer to the address descriptor buffer. 24297Seric ** If NULL, a header will be created. 25297Seric ** copyf -- determines what shall be copied: 26297Seric ** -1 -- don't copy anything. The printname 27297Seric ** (q_paddr) is just addr, and the 28297Seric ** user & host are allocated internally 29297Seric ** to parse. 30297Seric ** 0 -- copy out the parsed user & host, but 31297Seric ** don't copy the printname. 32297Seric ** +1 -- copy everything. 33297Seric ** 34297Seric ** Returns: 35297Seric ** A pointer to the address descriptor header (`a' if 36297Seric ** `a' is non-NULL). 37297Seric ** NULL on error. 38297Seric ** 39297Seric ** Side Effects: 40297Seric ** none 41297Seric ** 42297Seric ** Called By: 43297Seric ** main 44297Seric ** sendto 45297Seric ** alias 46297Seric ** savemail 47297Seric */ 48297Seric 49*2091Seric # define DELIMCHARS "()<>@!.,;:\\\"" /* word delimiters */ 50*2091Seric # define SPACESUB ('.'|0200) /* substitution for <lwsp> */ 51*2091Seric 52297Seric addrq * 53297Seric parse(addr, a, copyf) 54297Seric char *addr; 55297Seric register addrq *a; 56297Seric int copyf; 57297Seric { 58297Seric register char *p; 59297Seric register struct parsetab *t; 60297Seric extern struct parsetab ParseTab[]; 61297Seric static char buf[MAXNAME]; 62297Seric register char c; 63297Seric register char *q; 64297Seric bool got_one; 65297Seric extern char *prescan(); 66297Seric extern char *xalloc(); 671516Seric char **pvp; 68297Seric 69297Seric /* 70297Seric ** Initialize and prescan address. 71297Seric */ 72297Seric 73297Seric To = addr; 74297Seric if (prescan(addr, buf, &buf[sizeof buf], '\0') == NULL) 75297Seric return (NULL); 76297Seric 77297Seric /* 78297Seric ** Scan parse table. 79297Seric ** Look for the first entry designating a character 80297Seric ** that is contained in the address. 81297Seric ** Arrange for q to point to that character. 82297Seric ** Check to see that there is only one of the char 83297Seric ** if it must be unique. 84297Seric ** Find the last one if the host is on the RHS. 85297Seric ** Insist that the host name is atomic. 86297Seric ** If just doing a map, do the map and then start all 87297Seric ** over. 88297Seric */ 89297Seric 90297Seric rescan: 91297Seric got_one = FALSE; 92297Seric for (t = ParseTab; t->p_char != '\0'; t++) 93297Seric { 94297Seric q = NULL; 95297Seric for (p = buf; (c = *p) != '\0'; p++) 96297Seric { 97297Seric /* find the end of this token */ 98297Seric while (isalnum(c) || c == '-' || c == '_') 99297Seric c = *++p; 100297Seric if (c == '\0') 101297Seric break; 102297Seric 103297Seric if (c == t->p_char) 104297Seric { 105297Seric got_one = TRUE; 106297Seric 107297Seric /* do mapping as appropriate */ 108297Seric if (flagset(P_MAP, t->p_flags)) 109297Seric { 110297Seric *p = t->p_arg[0]; 111297Seric if (flagset(P_ONE, t->p_flags)) 112297Seric goto rescan; 113297Seric else 114297Seric continue; 115297Seric } 116297Seric 117297Seric /* arrange for q to point to it */ 118297Seric if (q != NULL && flagset(P_ONE, t->p_flags)) 119297Seric { 120297Seric usrerr("multichar error"); 121297Seric ExitStat = EX_USAGE; 122297Seric return (NULL); 123297Seric } 124297Seric if (q == NULL || flagset(P_HLAST, t->p_flags)) 125297Seric q = p; 126297Seric } 127297Seric else 128297Seric { 129297Seric /* insist that host name is atomic */ 130297Seric if (flagset(P_HLAST, t->p_flags)) 131297Seric q = NULL; 132297Seric else 133297Seric break; 134297Seric } 135297Seric } 136297Seric 137297Seric if (q != NULL) 138297Seric break; 139297Seric } 140297Seric 141297Seric /* 142297Seric ** If we matched nothing cleanly, but we did match something 143297Seric ** somewhere in the process of scanning, then we have a 144297Seric ** syntax error. This can happen on things like a@b:c where 145297Seric ** @ has a right host and : has a left host. 146297Seric ** 147297Seric ** We also set `q' to the null string, in case someone forgets 148297Seric ** to put the P_MOVE bit in the local mailer entry of the 149297Seric ** configuration table. 150297Seric */ 151297Seric 152297Seric if (q == NULL) 153297Seric { 154297Seric q = ""; 155297Seric if (got_one) 156297Seric { 157297Seric usrerr("syntax error"); 158297Seric ExitStat = EX_USAGE; 159297Seric return (NULL); 160297Seric } 161297Seric } 162297Seric 163297Seric /* 164297Seric ** Interpret entry. 165297Seric ** t points to the entry for the mailer we will use. 166297Seric ** q points to the significant character. 167297Seric */ 168297Seric 169297Seric if (a == NULL) 170297Seric a = (addrq *) xalloc(sizeof *a); 171297Seric if (copyf > 0) 172297Seric { 173297Seric p = xalloc((unsigned) strlen(addr) + 1); 174297Seric strcpy(p, addr); 175297Seric a->q_paddr = p; 176297Seric } 177297Seric else 178297Seric a->q_paddr = addr; 179297Seric a->q_mailer = &Mailer[t->p_mailer]; 180297Seric 181297Seric if (flagset(P_MOVE, t->p_flags)) 182297Seric { 183297Seric /* send the message to another host & retry */ 184297Seric a->q_host = t->p_arg; 185297Seric if (copyf >= 0) 186297Seric { 187297Seric p = xalloc((unsigned) strlen(buf) + 1); 188297Seric strcpy(p, buf); 189297Seric a->q_user = p; 190297Seric } 191297Seric else 192297Seric a->q_user = buf; 193297Seric } 194297Seric else 195297Seric { 196297Seric /* 197297Seric ** Make local copies of the host & user and then 198297Seric ** transport them out. 199297Seric */ 200297Seric 201297Seric *q++ = '\0'; 202297Seric if (flagset(P_HLAST, t->p_flags)) 203297Seric { 204297Seric a->q_host = q; 205297Seric a->q_user = buf; 206297Seric } 207297Seric else 208297Seric { 209297Seric a->q_host = buf; 210297Seric a->q_user = q; 211297Seric } 2121516Seric 2131516Seric /* 2141516Seric ** Don't go to the net if already on the target host. 2151516Seric ** This is important on the berkeley network, since 2161516Seric ** it get confused if we ask to send to ourselves. 2171516Seric ** For nets like the ARPANET, we probably will have 2181516Seric ** the local list set to NULL to simplify testing. 2191516Seric ** The canonical representation of the name is also set 2201516Seric ** to be just the local name so the duplicate letter 2211516Seric ** suppression algorithm will work. 2221516Seric */ 2231516Seric 2241516Seric if ((pvp = a->q_mailer->m_local) != NULL) 2251516Seric { 2261516Seric while (*pvp != NULL) 2271516Seric { 2281516Seric auto char buf2[MAXNAME]; 2291516Seric 2301516Seric strcpy(buf2, a->q_host); 2311516Seric if (!flagset(P_HST_UPPER, t->p_flags)) 2321516Seric makelower(buf2); 2331516Seric if (strcmp(*pvp++, buf2) == 0) 2341516Seric { 2351516Seric strcpy(buf2, a->q_user); 2361516Seric p = a->q_paddr; 2371516Seric if (parse(buf2, a, -1) == NULL) 2381516Seric { 2391516Seric To = addr; 2401516Seric return (NULL); 2411516Seric } 2421516Seric To = a->q_paddr = p; 2431516Seric break; 2441516Seric } 2451516Seric } 2461516Seric } 2471516Seric 2481516Seric /* make copies if specified */ 249297Seric if (copyf >= 0) 250297Seric { 251297Seric p = xalloc((unsigned) strlen(a->q_host) + 1); 252297Seric strcpy(p, a->q_host); 253297Seric a->q_host = p; 254297Seric p = xalloc((unsigned) strlen(a->q_user) + 1); 255297Seric strcpy(p, a->q_user); 256297Seric a->q_user = p; 257297Seric } 258297Seric } 259297Seric 260297Seric /* 261297Seric ** Do UPPER->lower case mapping unless inhibited. 262297Seric */ 263297Seric 264297Seric if (!flagset(P_HST_UPPER, t->p_flags)) 265297Seric makelower(a->q_host); 266297Seric if (!flagset(P_USR_UPPER, t->p_flags)) 267297Seric makelower(a->q_user); 268297Seric 269297Seric /* 270297Seric ** Compute return value. 271297Seric */ 272297Seric 273297Seric # ifdef DEBUG 2741583Seric if (Debug) 275297Seric printf("parse(\"%s\"): host \"%s\" user \"%s\" mailer %d\n", 276297Seric addr, a->q_host, a->q_user, t->p_mailer); 277297Seric # endif DEBUG 278297Seric 279297Seric return (a); 280297Seric } 281297Seric /* 282297Seric ** MAKELOWER -- Translate a line into lower case 283297Seric ** 284297Seric ** Parameters: 285297Seric ** p -- the string to translate. If NULL, return is 286297Seric ** immediate. 287297Seric ** 288297Seric ** Returns: 289297Seric ** none. 290297Seric ** 291297Seric ** Side Effects: 292297Seric ** String pointed to by p is translated to lower case. 293297Seric ** 294297Seric ** Called By: 295297Seric ** parse 296297Seric */ 297297Seric 298297Seric makelower(p) 299297Seric register char *p; 300297Seric { 301297Seric register char c; 302297Seric 303297Seric if (p == NULL) 304297Seric return; 305297Seric for (; (c = *p) != '\0'; p++) 306297Seric if ((c & 0200) == 0 && isupper(c)) 307297Seric *p = c - 'A' + 'a'; 308297Seric } 309297Seric /* 310297Seric ** PRESCAN -- Prescan name and make it canonical 311297Seric ** 312297Seric ** Scans a name and turns it into canonical form. This involves 313297Seric ** deleting blanks, comments (in parentheses), and turning the 314297Seric ** word "at" into an at-sign ("@"). The name is copied as this 315297Seric ** is done; it is legal to copy a name onto itself, since this 316297Seric ** process can only make things smaller. 317297Seric ** 318297Seric ** This routine knows about quoted strings and angle brackets. 319297Seric ** 320297Seric ** There are certain subtleties to this routine. The one that 321297Seric ** comes to mind now is that backslashes on the ends of names 322297Seric ** are silently stripped off; this is intentional. The problem 323297Seric ** is that some versions of sndmsg (like at LBL) set the kill 324297Seric ** character to something other than @ when reading addresses; 325297Seric ** so people type "csvax.eric\@berkeley" -- which screws up the 326297Seric ** berknet mailer. 327297Seric ** 328297Seric ** Parameters: 329297Seric ** addr -- the name to chomp. 330297Seric ** buf -- the buffer to copy it into. 331297Seric ** buflim -- the last usable address in the buffer 332297Seric ** (which will old a null byte). Normally 333297Seric ** &buf[sizeof buf - 1]. 334297Seric ** delim -- the delimiter for the address, normally 335297Seric ** '\0' or ','; \0 is accepted in any case. 336297Seric ** are moving in place; set buflim to high core. 337297Seric ** 338297Seric ** Returns: 339297Seric ** A pointer to the terminator of buf. 340297Seric ** NULL on error. 341297Seric ** 342297Seric ** Side Effects: 343297Seric ** buf gets clobbered. 344297Seric ** 345297Seric ** Called By: 346297Seric ** parse 347297Seric ** maketemp 348297Seric */ 349297Seric 350297Seric char * 351297Seric prescan(addr, buf, buflim, delim) 352297Seric char *addr; 353297Seric char *buf; 354297Seric char *buflim; 355297Seric char delim; 356297Seric { 357297Seric register char *p; 358297Seric bool space; 359297Seric bool quotemode; 360297Seric bool bslashmode; 361*2091Seric bool delimmode; 362297Seric int cmntcnt; 363297Seric int brccnt; 364297Seric register char c; 365297Seric register char *q; 366297Seric extern bool any(); 367297Seric 368*2091Seric space = FALSE; 369*2091Seric delimmode = TRUE; 370297Seric q = buf; 371297Seric bslashmode = quotemode = FALSE; 372297Seric cmntcnt = brccnt = 0; 3731585Seric for (p = addr; (c = *p++) != '\0'; ) 374297Seric { 375297Seric /* chew up special characters */ 376297Seric *q = '\0'; 377297Seric if (bslashmode) 378297Seric { 379297Seric c |= 0200; 3801585Seric bslashmode = FALSE; 381297Seric } 382297Seric else if (c == '"') 383297Seric quotemode = !quotemode; 384297Seric else if (c == '\\') 385297Seric { 386297Seric bslashmode++; 387297Seric continue; 388297Seric } 389297Seric else if (quotemode) 390297Seric c |= 0200; 391297Seric else if (c == delim) 392297Seric break; 393297Seric else if (c == '(') 3941378Seric { 395297Seric cmntcnt++; 3961378Seric continue; 3971378Seric } 398297Seric else if (c == ')') 399297Seric { 400297Seric if (cmntcnt <= 0) 401297Seric { 402297Seric usrerr("Unbalanced ')'"); 403297Seric return (NULL); 404297Seric } 405297Seric else 406297Seric { 407297Seric cmntcnt--; 408297Seric continue; 409297Seric } 410297Seric } 411*2091Seric if (cmntcnt > 0) 412*2091Seric continue; 413*2091Seric else if (isascii(c) && isspace(c) && (space || delimmode)) 414*2091Seric continue; 415297Seric else if (c == '<') 416297Seric { 417297Seric brccnt++; 418*2091Seric delimmode = TRUE; 419*2091Seric space = FALSE; 420297Seric if (brccnt == 1) 421297Seric { 422297Seric /* we prefer using machine readable name */ 423297Seric q = buf; 424297Seric *q = '\0'; 425297Seric continue; 426297Seric } 427297Seric } 428297Seric else if (c == '>') 429297Seric { 430297Seric if (brccnt <= 0) 431297Seric { 432297Seric usrerr("Unbalanced `>'"); 433297Seric return (NULL); 434297Seric } 435297Seric else 436297Seric brccnt--; 437297Seric if (brccnt <= 0) 438297Seric continue; 439297Seric } 440297Seric 441297Seric /* 442297Seric ** Turn "at" into "@", 4431378Seric ** but only if "at" is a word. 444297Seric ** By the way, I violate the ARPANET RFC-733 445297Seric ** standard here, by assuming that 'space' delimits 446297Seric ** atoms. I assume that is just a mistake, since 447297Seric ** it violates the spirit of the semantics 448297Seric ** of the document..... 449297Seric */ 450297Seric 451*2091Seric if (delimmode && (c == 'a' || c == 'A') && 452297Seric (p[0] == 't' || p[0] == 'T') && 453*2091Seric (any(p[1], DELIMCHARS) || p[1] <= 040)) 454297Seric { 455297Seric c = '@'; 456297Seric p++; 457297Seric } 458297Seric 459*2091Seric if (delimmode = any(c, DELIMCHARS)) 460*2091Seric space = FALSE; 461*2091Seric 462297Seric /* skip blanks */ 463*2091Seric if (!isascii(c) || !isspace(c)) 464297Seric { 465*2091Seric if (q >= buflim-1) 466297Seric { 467297Seric usrerr("Address too long"); 468297Seric return (NULL); 469297Seric } 470*2091Seric if (space) 471*2091Seric *q++ = SPACESUB; 472297Seric *q++ = c; 473297Seric } 474297Seric space = isspace(c); 475297Seric } 476297Seric *q = '\0'; 477297Seric if (c == '\0') 478297Seric p--; 479297Seric if (cmntcnt > 0) 480297Seric usrerr("Unbalanced '('"); 481297Seric else if (quotemode) 482297Seric usrerr("Unbalanced '\"'"); 483297Seric else if (brccnt > 0) 484297Seric usrerr("Unbalanced '<'"); 485297Seric else if (buf[0] != '\0') 486297Seric return (p); 487297Seric return (NULL); 488297Seric } 489