1297Seric # include <stdio.h> 2297Seric # include <ctype.h> 3297Seric # include "dlvrmail.h" 4297Seric 5*407Seric static char SccsId[] = "@(#)parseaddr.c 1.2 07/25/80"; 6*407Seric 7297Seric /* 8297Seric ** PARSE -- Parse an address 9297Seric ** 10297Seric ** Parses an address and breaks it up into three parts: a 11297Seric ** net to transmit the message on, the host to transmit it 12297Seric ** to, and a user on that host. These are loaded into an 13297Seric ** addrq header with the values squirreled away if necessary. 14297Seric ** The "user" part may not be a real user; the process may 15297Seric ** just reoccur on that machine. For example, on a machine 16297Seric ** with an arpanet connection, the address 17297Seric ** csvax.bill@berkeley 18297Seric ** will break up to a "user" of 'csvax.bill' and a host 19297Seric ** of 'berkeley' -- to be transmitted over the arpanet. 20297Seric ** 21297Seric ** Parameters: 22297Seric ** addr -- the address to parse. 23297Seric ** a -- a pointer to the address descriptor buffer. 24297Seric ** If NULL, a header will be created. 25297Seric ** copyf -- determines what shall be copied: 26297Seric ** -1 -- don't copy anything. The printname 27297Seric ** (q_paddr) is just addr, and the 28297Seric ** user & host are allocated internally 29297Seric ** to parse. 30297Seric ** 0 -- copy out the parsed user & host, but 31297Seric ** don't copy the printname. 32297Seric ** +1 -- copy everything. 33297Seric ** 34297Seric ** Returns: 35297Seric ** A pointer to the address descriptor header (`a' if 36297Seric ** `a' is non-NULL). 37297Seric ** NULL on error. 38297Seric ** 39297Seric ** Side Effects: 40297Seric ** none 41297Seric ** 42297Seric ** Defined Constants: 43297Seric ** none 44297Seric ** 45297Seric ** Requires: 46297Seric ** usrerr 47297Seric ** strcpy (sys) 48297Seric ** isalpha (sys) 49297Seric ** xalloc 50297Seric ** prescan 51297Seric ** flagset 52297Seric ** makelower 53297Seric ** printf (sys) 54297Seric ** ParseTab -- the parse table. 55297Seric ** 56297Seric ** Called By: 57297Seric ** main 58297Seric ** sendto 59297Seric ** alias 60297Seric ** savemail 61297Seric ** 62297Seric ** History: 63297Seric ** 12/26/79 -- written. 64297Seric */ 65297Seric 66297Seric addrq * 67297Seric parse(addr, a, copyf) 68297Seric char *addr; 69297Seric register addrq *a; 70297Seric int copyf; 71297Seric { 72297Seric register char *p; 73297Seric register struct parsetab *t; 74297Seric extern struct parsetab ParseTab[]; 75297Seric static char buf[MAXNAME]; 76297Seric register char c; 77297Seric register char *q; 78297Seric bool got_one; 79297Seric extern char *prescan(); 80297Seric extern char *xalloc(); 81297Seric 82297Seric /* 83297Seric ** Initialize and prescan address. 84297Seric */ 85297Seric 86297Seric To = addr; 87297Seric if (prescan(addr, buf, &buf[sizeof buf], '\0') == NULL) 88297Seric return (NULL); 89297Seric 90297Seric /* 91297Seric ** Scan parse table. 92297Seric ** Look for the first entry designating a character 93297Seric ** that is contained in the address. 94297Seric ** Arrange for q to point to that character. 95297Seric ** Check to see that there is only one of the char 96297Seric ** if it must be unique. 97297Seric ** Find the last one if the host is on the RHS. 98297Seric ** Insist that the host name is atomic. 99297Seric ** If just doing a map, do the map and then start all 100297Seric ** over. 101297Seric */ 102297Seric 103297Seric rescan: 104297Seric got_one = FALSE; 105297Seric for (t = ParseTab; t->p_char != '\0'; t++) 106297Seric { 107297Seric q = NULL; 108297Seric for (p = buf; (c = *p) != '\0'; p++) 109297Seric { 110297Seric /* find the end of this token */ 111297Seric while (isalnum(c) || c == '-' || c == '_') 112297Seric c = *++p; 113297Seric if (c == '\0') 114297Seric break; 115297Seric 116297Seric if (c == t->p_char) 117297Seric { 118297Seric got_one = TRUE; 119297Seric 120297Seric /* do mapping as appropriate */ 121297Seric if (flagset(P_MAP, t->p_flags)) 122297Seric { 123297Seric *p = t->p_arg[0]; 124297Seric if (flagset(P_ONE, t->p_flags)) 125297Seric goto rescan; 126297Seric else 127297Seric continue; 128297Seric } 129297Seric 130297Seric /* arrange for q to point to it */ 131297Seric if (q != NULL && flagset(P_ONE, t->p_flags)) 132297Seric { 133297Seric usrerr("multichar error"); 134297Seric ExitStat = EX_USAGE; 135297Seric return (NULL); 136297Seric } 137297Seric if (q == NULL || flagset(P_HLAST, t->p_flags)) 138297Seric q = p; 139297Seric } 140297Seric else 141297Seric { 142297Seric /* insist that host name is atomic */ 143297Seric if (flagset(P_HLAST, t->p_flags)) 144297Seric q = NULL; 145297Seric else 146297Seric break; 147297Seric } 148297Seric } 149297Seric 150297Seric if (q != NULL) 151297Seric break; 152297Seric } 153297Seric 154297Seric /* 155297Seric ** If we matched nothing cleanly, but we did match something 156297Seric ** somewhere in the process of scanning, then we have a 157297Seric ** syntax error. This can happen on things like a@b:c where 158297Seric ** @ has a right host and : has a left host. 159297Seric ** 160297Seric ** We also set `q' to the null string, in case someone forgets 161297Seric ** to put the P_MOVE bit in the local mailer entry of the 162297Seric ** configuration table. 163297Seric */ 164297Seric 165297Seric if (q == NULL) 166297Seric { 167297Seric q = ""; 168297Seric if (got_one) 169297Seric { 170297Seric usrerr("syntax error"); 171297Seric ExitStat = EX_USAGE; 172297Seric return (NULL); 173297Seric } 174297Seric } 175297Seric 176297Seric /* 177297Seric ** Interpret entry. 178297Seric ** t points to the entry for the mailer we will use. 179297Seric ** q points to the significant character. 180297Seric */ 181297Seric 182297Seric if (a == NULL) 183297Seric a = (addrq *) xalloc(sizeof *a); 184297Seric if (copyf > 0) 185297Seric { 186297Seric p = xalloc((unsigned) strlen(addr) + 1); 187297Seric strcpy(p, addr); 188297Seric a->q_paddr = p; 189297Seric } 190297Seric else 191297Seric a->q_paddr = addr; 192297Seric a->q_mailer = &Mailer[t->p_mailer]; 193297Seric 194297Seric if (flagset(P_MOVE, t->p_flags)) 195297Seric { 196297Seric /* send the message to another host & retry */ 197297Seric a->q_host = t->p_arg; 198297Seric if (copyf >= 0) 199297Seric { 200297Seric p = xalloc((unsigned) strlen(buf) + 1); 201297Seric strcpy(p, buf); 202297Seric a->q_user = p; 203297Seric } 204297Seric else 205297Seric a->q_user = buf; 206297Seric } 207297Seric else 208297Seric { 209297Seric /* 210297Seric ** Make local copies of the host & user and then 211297Seric ** transport them out. 212297Seric */ 213297Seric 214297Seric *q++ = '\0'; 215297Seric if (flagset(P_HLAST, t->p_flags)) 216297Seric { 217297Seric a->q_host = q; 218297Seric a->q_user = buf; 219297Seric } 220297Seric else 221297Seric { 222297Seric a->q_host = buf; 223297Seric a->q_user = q; 224297Seric } 225297Seric if (copyf >= 0) 226297Seric { 227297Seric p = xalloc((unsigned) strlen(a->q_host) + 1); 228297Seric strcpy(p, a->q_host); 229297Seric a->q_host = p; 230297Seric p = xalloc((unsigned) strlen(a->q_user) + 1); 231297Seric strcpy(p, a->q_user); 232297Seric a->q_user = p; 233297Seric } 234297Seric } 235297Seric 236297Seric /* 237297Seric ** Do UPPER->lower case mapping unless inhibited. 238297Seric */ 239297Seric 240297Seric if (!flagset(P_HST_UPPER, t->p_flags)) 241297Seric makelower(a->q_host); 242297Seric if (!flagset(P_USR_UPPER, t->p_flags)) 243297Seric makelower(a->q_user); 244297Seric 245297Seric /* 246297Seric ** Compute return value. 247297Seric */ 248297Seric 249297Seric # ifdef DEBUG 250297Seric if (Debug && copyf >= 0) 251297Seric printf("parse(\"%s\"): host \"%s\" user \"%s\" mailer %d\n", 252297Seric addr, a->q_host, a->q_user, t->p_mailer); 253297Seric # endif DEBUG 254297Seric 255297Seric return (a); 256297Seric } 257297Seric /* 258297Seric ** MAKELOWER -- Translate a line into lower case 259297Seric ** 260297Seric ** Parameters: 261297Seric ** p -- the string to translate. If NULL, return is 262297Seric ** immediate. 263297Seric ** 264297Seric ** Returns: 265297Seric ** none. 266297Seric ** 267297Seric ** Side Effects: 268297Seric ** String pointed to by p is translated to lower case. 269297Seric ** 270297Seric ** Requires: 271297Seric ** isupper (sys) 272297Seric ** 273297Seric ** Called By: 274297Seric ** parse 275297Seric ** 276297Seric ** History: 277297Seric ** 12/26/79 -- written. 278297Seric */ 279297Seric 280297Seric makelower(p) 281297Seric register char *p; 282297Seric { 283297Seric register char c; 284297Seric 285297Seric if (p == NULL) 286297Seric return; 287297Seric for (; (c = *p) != '\0'; p++) 288297Seric if ((c & 0200) == 0 && isupper(c)) 289297Seric *p = c - 'A' + 'a'; 290297Seric } 291297Seric /* 292297Seric ** PRESCAN -- Prescan name and make it canonical 293297Seric ** 294297Seric ** Scans a name and turns it into canonical form. This involves 295297Seric ** deleting blanks, comments (in parentheses), and turning the 296297Seric ** word "at" into an at-sign ("@"). The name is copied as this 297297Seric ** is done; it is legal to copy a name onto itself, since this 298297Seric ** process can only make things smaller. 299297Seric ** 300297Seric ** This routine knows about quoted strings and angle brackets. 301297Seric ** 302297Seric ** There are certain subtleties to this routine. The one that 303297Seric ** comes to mind now is that backslashes on the ends of names 304297Seric ** are silently stripped off; this is intentional. The problem 305297Seric ** is that some versions of sndmsg (like at LBL) set the kill 306297Seric ** character to something other than @ when reading addresses; 307297Seric ** so people type "csvax.eric\@berkeley" -- which screws up the 308297Seric ** berknet mailer. 309297Seric ** 310297Seric ** Parameters: 311297Seric ** addr -- the name to chomp. 312297Seric ** buf -- the buffer to copy it into. 313297Seric ** buflim -- the last usable address in the buffer 314297Seric ** (which will old a null byte). Normally 315297Seric ** &buf[sizeof buf - 1]. 316297Seric ** delim -- the delimiter for the address, normally 317297Seric ** '\0' or ','; \0 is accepted in any case. 318297Seric ** are moving in place; set buflim to high core. 319297Seric ** 320297Seric ** Returns: 321297Seric ** A pointer to the terminator of buf. 322297Seric ** NULL on error. 323297Seric ** 324297Seric ** Side Effects: 325297Seric ** buf gets clobbered. 326297Seric ** 327297Seric ** Requires: 328297Seric ** isspace (sys) 329297Seric ** any 330297Seric ** usrerr 331297Seric ** 332297Seric ** Called By: 333297Seric ** parse 334297Seric ** maketemp 335297Seric ** 336297Seric ** History: 337297Seric ** 12/30/79 -- broken from parse; comment processing 338297Seric ** added. 339297Seric */ 340297Seric 341297Seric char * 342297Seric prescan(addr, buf, buflim, delim) 343297Seric char *addr; 344297Seric char *buf; 345297Seric char *buflim; 346297Seric char delim; 347297Seric { 348297Seric register char *p; 349297Seric bool space; 350297Seric bool quotemode; 351297Seric bool bslashmode; 352297Seric int cmntcnt; 353297Seric int brccnt; 354297Seric register char c; 355297Seric register char *q; 356297Seric extern bool any(); 357297Seric 358297Seric space = TRUE; 359297Seric q = buf; 360297Seric bslashmode = quotemode = FALSE; 361297Seric cmntcnt = brccnt = 0; 362297Seric for (p = addr; (c = *p++ & 0177) != '\0'; ) 363297Seric { 364297Seric /* chew up special characters */ 365297Seric *q = '\0'; 366297Seric if (bslashmode) 367297Seric { 368297Seric c |= 0200; 369297Seric bslashmode == FALSE; 370297Seric } 371297Seric else if (c == '"') 372297Seric quotemode = !quotemode; 373297Seric else if (c == '\\') 374297Seric { 375297Seric bslashmode++; 376297Seric continue; 377297Seric } 378297Seric else if (quotemode) 379297Seric c |= 0200; 380297Seric else if (c == delim) 381297Seric break; 382297Seric else if (c == '(') 383297Seric cmntcnt++; 384297Seric else if (c == ')') 385297Seric { 386297Seric if (cmntcnt <= 0) 387297Seric { 388297Seric usrerr("Unbalanced ')'"); 389297Seric return (NULL); 390297Seric } 391297Seric else 392297Seric { 393297Seric cmntcnt--; 394297Seric continue; 395297Seric } 396297Seric } 397297Seric if (cmntcnt > 0) 398297Seric continue; 399297Seric else if (c == '<') 400297Seric { 401297Seric brccnt++; 402297Seric if (brccnt == 1) 403297Seric { 404297Seric /* we prefer using machine readable name */ 405297Seric q = buf; 406297Seric *q = '\0'; 407297Seric continue; 408297Seric } 409297Seric } 410297Seric else if (c == '>') 411297Seric { 412297Seric if (brccnt <= 0) 413297Seric { 414297Seric usrerr("Unbalanced `>'"); 415297Seric return (NULL); 416297Seric } 417297Seric else 418297Seric brccnt--; 419297Seric if (brccnt <= 0) 420297Seric continue; 421297Seric } 422297Seric 423297Seric /* 424297Seric ** Turn "at" into "@", 425297Seric ** but only if "at" is a word in and to itself. 426297Seric ** By the way, I violate the ARPANET RFC-733 427297Seric ** standard here, by assuming that 'space' delimits 428297Seric ** atoms. I assume that is just a mistake, since 429297Seric ** it violates the spirit of the semantics 430297Seric ** of the document..... 431297Seric */ 432297Seric 433297Seric if (space && (c == 'a' || c == 'A') && 434297Seric (p[0] == 't' || p[0] == 'T') && 435297Seric (any(p[1], "()<>@,;:\\\"") || p[1] <= 040)) 436297Seric { 437297Seric c = '@'; 438297Seric p++; 439297Seric } 440297Seric 441297Seric /* skip blanks */ 442297Seric if (((c & 0200) != 0 || !isspace(c)) && cmntcnt <= 0) 443297Seric { 444297Seric if (q >= buflim) 445297Seric { 446297Seric usrerr("Address too long"); 447297Seric return (NULL); 448297Seric } 449297Seric *q++ = c; 450297Seric } 451297Seric space = isspace(c); 452297Seric } 453297Seric *q = '\0'; 454297Seric if (c == '\0') 455297Seric p--; 456297Seric if (cmntcnt > 0) 457297Seric usrerr("Unbalanced '('"); 458297Seric else if (quotemode) 459297Seric usrerr("Unbalanced '\"'"); 460297Seric else if (brccnt > 0) 461297Seric usrerr("Unbalanced '<'"); 462297Seric else if (buf[0] != '\0') 463297Seric return (p); 464297Seric return (NULL); 465297Seric } 466