1297Seric # include <stdio.h> 2297Seric # include <ctype.h> 3297Seric # include "dlvrmail.h" 4297Seric 5*1516Seric static char SccsId[] = "@(#)parseaddr.c 1.5 10/18/80"; 6407Seric 7297Seric /* 8297Seric ** PARSE -- Parse an address 9297Seric ** 10297Seric ** Parses an address and breaks it up into three parts: a 11297Seric ** net to transmit the message on, the host to transmit it 12297Seric ** to, and a user on that host. These are loaded into an 13297Seric ** addrq header with the values squirreled away if necessary. 14297Seric ** The "user" part may not be a real user; the process may 15297Seric ** just reoccur on that machine. For example, on a machine 16297Seric ** with an arpanet connection, the address 17297Seric ** csvax.bill@berkeley 18297Seric ** will break up to a "user" of 'csvax.bill' and a host 19297Seric ** of 'berkeley' -- to be transmitted over the arpanet. 20297Seric ** 21297Seric ** Parameters: 22297Seric ** addr -- the address to parse. 23297Seric ** a -- a pointer to the address descriptor buffer. 24297Seric ** If NULL, a header will be created. 25297Seric ** copyf -- determines what shall be copied: 26297Seric ** -1 -- don't copy anything. The printname 27297Seric ** (q_paddr) is just addr, and the 28297Seric ** user & host are allocated internally 29297Seric ** to parse. 30297Seric ** 0 -- copy out the parsed user & host, but 31297Seric ** don't copy the printname. 32297Seric ** +1 -- copy everything. 33297Seric ** 34297Seric ** Returns: 35297Seric ** A pointer to the address descriptor header (`a' if 36297Seric ** `a' is non-NULL). 37297Seric ** NULL on error. 38297Seric ** 39297Seric ** Side Effects: 40297Seric ** none 41297Seric ** 42297Seric ** Called By: 43297Seric ** main 44297Seric ** sendto 45297Seric ** alias 46297Seric ** savemail 47297Seric */ 48297Seric 49297Seric addrq * 50297Seric parse(addr, a, copyf) 51297Seric char *addr; 52297Seric register addrq *a; 53297Seric int copyf; 54297Seric { 55297Seric register char *p; 56297Seric register struct parsetab *t; 57297Seric extern struct parsetab ParseTab[]; 58297Seric static char buf[MAXNAME]; 59297Seric register char c; 60297Seric register char *q; 61297Seric bool got_one; 62297Seric extern char *prescan(); 63297Seric extern char *xalloc(); 64*1516Seric char **pvp; 65297Seric 66297Seric /* 67297Seric ** Initialize and prescan address. 68297Seric */ 69297Seric 70297Seric To = addr; 71297Seric if (prescan(addr, buf, &buf[sizeof buf], '\0') == NULL) 72297Seric return (NULL); 73297Seric 74297Seric /* 75297Seric ** Scan parse table. 76297Seric ** Look for the first entry designating a character 77297Seric ** that is contained in the address. 78297Seric ** Arrange for q to point to that character. 79297Seric ** Check to see that there is only one of the char 80297Seric ** if it must be unique. 81297Seric ** Find the last one if the host is on the RHS. 82297Seric ** Insist that the host name is atomic. 83297Seric ** If just doing a map, do the map and then start all 84297Seric ** over. 85297Seric */ 86297Seric 87297Seric rescan: 88297Seric got_one = FALSE; 89297Seric for (t = ParseTab; t->p_char != '\0'; t++) 90297Seric { 91297Seric q = NULL; 92297Seric for (p = buf; (c = *p) != '\0'; p++) 93297Seric { 94297Seric /* find the end of this token */ 95297Seric while (isalnum(c) || c == '-' || c == '_') 96297Seric c = *++p; 97297Seric if (c == '\0') 98297Seric break; 99297Seric 100297Seric if (c == t->p_char) 101297Seric { 102297Seric got_one = TRUE; 103297Seric 104297Seric /* do mapping as appropriate */ 105297Seric if (flagset(P_MAP, t->p_flags)) 106297Seric { 107297Seric *p = t->p_arg[0]; 108297Seric if (flagset(P_ONE, t->p_flags)) 109297Seric goto rescan; 110297Seric else 111297Seric continue; 112297Seric } 113297Seric 114297Seric /* arrange for q to point to it */ 115297Seric if (q != NULL && flagset(P_ONE, t->p_flags)) 116297Seric { 117297Seric usrerr("multichar error"); 118297Seric ExitStat = EX_USAGE; 119297Seric return (NULL); 120297Seric } 121297Seric if (q == NULL || flagset(P_HLAST, t->p_flags)) 122297Seric q = p; 123297Seric } 124297Seric else 125297Seric { 126297Seric /* insist that host name is atomic */ 127297Seric if (flagset(P_HLAST, t->p_flags)) 128297Seric q = NULL; 129297Seric else 130297Seric break; 131297Seric } 132297Seric } 133297Seric 134297Seric if (q != NULL) 135297Seric break; 136297Seric } 137297Seric 138297Seric /* 139297Seric ** If we matched nothing cleanly, but we did match something 140297Seric ** somewhere in the process of scanning, then we have a 141297Seric ** syntax error. This can happen on things like a@b:c where 142297Seric ** @ has a right host and : has a left host. 143297Seric ** 144297Seric ** We also set `q' to the null string, in case someone forgets 145297Seric ** to put the P_MOVE bit in the local mailer entry of the 146297Seric ** configuration table. 147297Seric */ 148297Seric 149297Seric if (q == NULL) 150297Seric { 151297Seric q = ""; 152297Seric if (got_one) 153297Seric { 154297Seric usrerr("syntax error"); 155297Seric ExitStat = EX_USAGE; 156297Seric return (NULL); 157297Seric } 158297Seric } 159297Seric 160297Seric /* 161297Seric ** Interpret entry. 162297Seric ** t points to the entry for the mailer we will use. 163297Seric ** q points to the significant character. 164297Seric */ 165297Seric 166297Seric if (a == NULL) 167297Seric a = (addrq *) xalloc(sizeof *a); 168297Seric if (copyf > 0) 169297Seric { 170297Seric p = xalloc((unsigned) strlen(addr) + 1); 171297Seric strcpy(p, addr); 172297Seric a->q_paddr = p; 173297Seric } 174297Seric else 175297Seric a->q_paddr = addr; 176297Seric a->q_mailer = &Mailer[t->p_mailer]; 177297Seric 178297Seric if (flagset(P_MOVE, t->p_flags)) 179297Seric { 180297Seric /* send the message to another host & retry */ 181297Seric a->q_host = t->p_arg; 182297Seric if (copyf >= 0) 183297Seric { 184297Seric p = xalloc((unsigned) strlen(buf) + 1); 185297Seric strcpy(p, buf); 186297Seric a->q_user = p; 187297Seric } 188297Seric else 189297Seric a->q_user = buf; 190297Seric } 191297Seric else 192297Seric { 193297Seric /* 194297Seric ** Make local copies of the host & user and then 195297Seric ** transport them out. 196297Seric */ 197297Seric 198297Seric *q++ = '\0'; 199297Seric if (flagset(P_HLAST, t->p_flags)) 200297Seric { 201297Seric a->q_host = q; 202297Seric a->q_user = buf; 203297Seric } 204297Seric else 205297Seric { 206297Seric a->q_host = buf; 207297Seric a->q_user = q; 208297Seric } 209*1516Seric 210*1516Seric /* 211*1516Seric ** Don't go to the net if already on the target host. 212*1516Seric ** This is important on the berkeley network, since 213*1516Seric ** it get confused if we ask to send to ourselves. 214*1516Seric ** For nets like the ARPANET, we probably will have 215*1516Seric ** the local list set to NULL to simplify testing. 216*1516Seric ** The canonical representation of the name is also set 217*1516Seric ** to be just the local name so the duplicate letter 218*1516Seric ** suppression algorithm will work. 219*1516Seric */ 220*1516Seric 221*1516Seric if ((pvp = a->q_mailer->m_local) != NULL) 222*1516Seric { 223*1516Seric while (*pvp != NULL) 224*1516Seric { 225*1516Seric auto char buf2[MAXNAME]; 226*1516Seric 227*1516Seric strcpy(buf2, a->q_host); 228*1516Seric if (!flagset(P_HST_UPPER, t->p_flags)) 229*1516Seric makelower(buf2); 230*1516Seric if (strcmp(*pvp++, buf2) == 0) 231*1516Seric { 232*1516Seric strcpy(buf2, a->q_user); 233*1516Seric p = a->q_paddr; 234*1516Seric if (parse(buf2, a, -1) == NULL) 235*1516Seric { 236*1516Seric To = addr; 237*1516Seric return (NULL); 238*1516Seric } 239*1516Seric To = a->q_paddr = p; 240*1516Seric break; 241*1516Seric } 242*1516Seric } 243*1516Seric } 244*1516Seric 245*1516Seric /* make copies if specified */ 246297Seric if (copyf >= 0) 247297Seric { 248297Seric p = xalloc((unsigned) strlen(a->q_host) + 1); 249297Seric strcpy(p, a->q_host); 250297Seric a->q_host = p; 251297Seric p = xalloc((unsigned) strlen(a->q_user) + 1); 252297Seric strcpy(p, a->q_user); 253297Seric a->q_user = p; 254297Seric } 255297Seric } 256297Seric 257297Seric /* 258297Seric ** Do UPPER->lower case mapping unless inhibited. 259297Seric */ 260297Seric 261297Seric if (!flagset(P_HST_UPPER, t->p_flags)) 262297Seric makelower(a->q_host); 263297Seric if (!flagset(P_USR_UPPER, t->p_flags)) 264297Seric makelower(a->q_user); 265297Seric 266297Seric /* 267297Seric ** Compute return value. 268297Seric */ 269297Seric 270297Seric # ifdef DEBUG 271297Seric if (Debug && copyf >= 0) 272297Seric printf("parse(\"%s\"): host \"%s\" user \"%s\" mailer %d\n", 273297Seric addr, a->q_host, a->q_user, t->p_mailer); 274297Seric # endif DEBUG 275297Seric 276297Seric return (a); 277297Seric } 278297Seric /* 279297Seric ** MAKELOWER -- Translate a line into lower case 280297Seric ** 281297Seric ** Parameters: 282297Seric ** p -- the string to translate. If NULL, return is 283297Seric ** immediate. 284297Seric ** 285297Seric ** Returns: 286297Seric ** none. 287297Seric ** 288297Seric ** Side Effects: 289297Seric ** String pointed to by p is translated to lower case. 290297Seric ** 291297Seric ** Called By: 292297Seric ** parse 293297Seric */ 294297Seric 295297Seric makelower(p) 296297Seric register char *p; 297297Seric { 298297Seric register char c; 299297Seric 300297Seric if (p == NULL) 301297Seric return; 302297Seric for (; (c = *p) != '\0'; p++) 303297Seric if ((c & 0200) == 0 && isupper(c)) 304297Seric *p = c - 'A' + 'a'; 305297Seric } 306297Seric /* 307297Seric ** PRESCAN -- Prescan name and make it canonical 308297Seric ** 309297Seric ** Scans a name and turns it into canonical form. This involves 310297Seric ** deleting blanks, comments (in parentheses), and turning the 311297Seric ** word "at" into an at-sign ("@"). The name is copied as this 312297Seric ** is done; it is legal to copy a name onto itself, since this 313297Seric ** process can only make things smaller. 314297Seric ** 315297Seric ** This routine knows about quoted strings and angle brackets. 316297Seric ** 317297Seric ** There are certain subtleties to this routine. The one that 318297Seric ** comes to mind now is that backslashes on the ends of names 319297Seric ** are silently stripped off; this is intentional. The problem 320297Seric ** is that some versions of sndmsg (like at LBL) set the kill 321297Seric ** character to something other than @ when reading addresses; 322297Seric ** so people type "csvax.eric\@berkeley" -- which screws up the 323297Seric ** berknet mailer. 324297Seric ** 325297Seric ** Parameters: 326297Seric ** addr -- the name to chomp. 327297Seric ** buf -- the buffer to copy it into. 328297Seric ** buflim -- the last usable address in the buffer 329297Seric ** (which will old a null byte). Normally 330297Seric ** &buf[sizeof buf - 1]. 331297Seric ** delim -- the delimiter for the address, normally 332297Seric ** '\0' or ','; \0 is accepted in any case. 333297Seric ** are moving in place; set buflim to high core. 334297Seric ** 335297Seric ** Returns: 336297Seric ** A pointer to the terminator of buf. 337297Seric ** NULL on error. 338297Seric ** 339297Seric ** Side Effects: 340297Seric ** buf gets clobbered. 341297Seric ** 342297Seric ** Called By: 343297Seric ** parse 344297Seric ** maketemp 345297Seric */ 346297Seric 347297Seric char * 348297Seric prescan(addr, buf, buflim, delim) 349297Seric char *addr; 350297Seric char *buf; 351297Seric char *buflim; 352297Seric char delim; 353297Seric { 354297Seric register char *p; 355297Seric bool space; 356297Seric bool quotemode; 357297Seric bool bslashmode; 358297Seric int cmntcnt; 359297Seric int brccnt; 360297Seric register char c; 361297Seric register char *q; 362297Seric extern bool any(); 363297Seric 364297Seric space = TRUE; 365297Seric q = buf; 366297Seric bslashmode = quotemode = FALSE; 367297Seric cmntcnt = brccnt = 0; 368297Seric for (p = addr; (c = *p++ & 0177) != '\0'; ) 369297Seric { 370297Seric /* chew up special characters */ 371297Seric *q = '\0'; 372297Seric if (bslashmode) 373297Seric { 374297Seric c |= 0200; 375297Seric bslashmode == FALSE; 376297Seric } 377297Seric else if (c == '"') 378297Seric quotemode = !quotemode; 379297Seric else if (c == '\\') 380297Seric { 381297Seric bslashmode++; 382297Seric continue; 383297Seric } 384297Seric else if (quotemode) 385297Seric c |= 0200; 386297Seric else if (c == delim) 387297Seric break; 388297Seric else if (c == '(') 3891378Seric { 390297Seric cmntcnt++; 3911378Seric continue; 3921378Seric } 393297Seric else if (c == ')') 394297Seric { 395297Seric if (cmntcnt <= 0) 396297Seric { 397297Seric usrerr("Unbalanced ')'"); 398297Seric return (NULL); 399297Seric } 400297Seric else 401297Seric { 402297Seric cmntcnt--; 403297Seric continue; 404297Seric } 405297Seric } 406297Seric else if (c == '<') 407297Seric { 408297Seric brccnt++; 409297Seric if (brccnt == 1) 410297Seric { 411297Seric /* we prefer using machine readable name */ 412297Seric q = buf; 413297Seric *q = '\0'; 414297Seric continue; 415297Seric } 416297Seric } 417297Seric else if (c == '>') 418297Seric { 419297Seric if (brccnt <= 0) 420297Seric { 421297Seric usrerr("Unbalanced `>'"); 422297Seric return (NULL); 423297Seric } 424297Seric else 425297Seric brccnt--; 426297Seric if (brccnt <= 0) 427297Seric continue; 428297Seric } 429297Seric 430297Seric /* 431297Seric ** Turn "at" into "@", 4321378Seric ** but only if "at" is a word. 433297Seric ** By the way, I violate the ARPANET RFC-733 434297Seric ** standard here, by assuming that 'space' delimits 435297Seric ** atoms. I assume that is just a mistake, since 436297Seric ** it violates the spirit of the semantics 437297Seric ** of the document..... 438297Seric */ 439297Seric 440297Seric if (space && (c == 'a' || c == 'A') && 441297Seric (p[0] == 't' || p[0] == 'T') && 442297Seric (any(p[1], "()<>@,;:\\\"") || p[1] <= 040)) 443297Seric { 444297Seric c = '@'; 445297Seric p++; 446297Seric } 447297Seric 448297Seric /* skip blanks */ 449297Seric if (((c & 0200) != 0 || !isspace(c)) && cmntcnt <= 0) 450297Seric { 451297Seric if (q >= buflim) 452297Seric { 453297Seric usrerr("Address too long"); 454297Seric return (NULL); 455297Seric } 456297Seric *q++ = c; 457297Seric } 458297Seric space = isspace(c); 459297Seric } 460297Seric *q = '\0'; 461297Seric if (c == '\0') 462297Seric p--; 463297Seric if (cmntcnt > 0) 464297Seric usrerr("Unbalanced '('"); 465297Seric else if (quotemode) 466297Seric usrerr("Unbalanced '\"'"); 467297Seric else if (brccnt > 0) 468297Seric usrerr("Unbalanced '<'"); 469297Seric else if (buf[0] != '\0') 470297Seric return (p); 471297Seric return (NULL); 472297Seric } 473