1297Seric # include <stdio.h> 2297Seric # include <ctype.h> 3297Seric # include "dlvrmail.h" 4297Seric 5*1378Seric static char SccsId[] = "@(#)parseaddr.c 1.4 10/11/80"; 6407Seric 7297Seric /* 8297Seric ** PARSE -- Parse an address 9297Seric ** 10297Seric ** Parses an address and breaks it up into three parts: a 11297Seric ** net to transmit the message on, the host to transmit it 12297Seric ** to, and a user on that host. These are loaded into an 13297Seric ** addrq header with the values squirreled away if necessary. 14297Seric ** The "user" part may not be a real user; the process may 15297Seric ** just reoccur on that machine. For example, on a machine 16297Seric ** with an arpanet connection, the address 17297Seric ** csvax.bill@berkeley 18297Seric ** will break up to a "user" of 'csvax.bill' and a host 19297Seric ** of 'berkeley' -- to be transmitted over the arpanet. 20297Seric ** 21297Seric ** Parameters: 22297Seric ** addr -- the address to parse. 23297Seric ** a -- a pointer to the address descriptor buffer. 24297Seric ** If NULL, a header will be created. 25297Seric ** copyf -- determines what shall be copied: 26297Seric ** -1 -- don't copy anything. The printname 27297Seric ** (q_paddr) is just addr, and the 28297Seric ** user & host are allocated internally 29297Seric ** to parse. 30297Seric ** 0 -- copy out the parsed user & host, but 31297Seric ** don't copy the printname. 32297Seric ** +1 -- copy everything. 33297Seric ** 34297Seric ** Returns: 35297Seric ** A pointer to the address descriptor header (`a' if 36297Seric ** `a' is non-NULL). 37297Seric ** NULL on error. 38297Seric ** 39297Seric ** Side Effects: 40297Seric ** none 41297Seric ** 42297Seric ** Called By: 43297Seric ** main 44297Seric ** sendto 45297Seric ** alias 46297Seric ** savemail 47297Seric */ 48297Seric 49297Seric addrq * 50297Seric parse(addr, a, copyf) 51297Seric char *addr; 52297Seric register addrq *a; 53297Seric int copyf; 54297Seric { 55297Seric register char *p; 56297Seric register struct parsetab *t; 57297Seric extern struct parsetab ParseTab[]; 58297Seric static char buf[MAXNAME]; 59297Seric register char c; 60297Seric register char *q; 61297Seric bool got_one; 62297Seric extern char *prescan(); 63297Seric extern char *xalloc(); 64297Seric 65297Seric /* 66297Seric ** Initialize and prescan address. 67297Seric */ 68297Seric 69297Seric To = addr; 70297Seric if (prescan(addr, buf, &buf[sizeof buf], '\0') == NULL) 71297Seric return (NULL); 72297Seric 73297Seric /* 74297Seric ** Scan parse table. 75297Seric ** Look for the first entry designating a character 76297Seric ** that is contained in the address. 77297Seric ** Arrange for q to point to that character. 78297Seric ** Check to see that there is only one of the char 79297Seric ** if it must be unique. 80297Seric ** Find the last one if the host is on the RHS. 81297Seric ** Insist that the host name is atomic. 82297Seric ** If just doing a map, do the map and then start all 83297Seric ** over. 84297Seric */ 85297Seric 86297Seric rescan: 87297Seric got_one = FALSE; 88297Seric for (t = ParseTab; t->p_char != '\0'; t++) 89297Seric { 90297Seric q = NULL; 91297Seric for (p = buf; (c = *p) != '\0'; p++) 92297Seric { 93297Seric /* find the end of this token */ 94297Seric while (isalnum(c) || c == '-' || c == '_') 95297Seric c = *++p; 96297Seric if (c == '\0') 97297Seric break; 98297Seric 99297Seric if (c == t->p_char) 100297Seric { 101297Seric got_one = TRUE; 102297Seric 103297Seric /* do mapping as appropriate */ 104297Seric if (flagset(P_MAP, t->p_flags)) 105297Seric { 106297Seric *p = t->p_arg[0]; 107297Seric if (flagset(P_ONE, t->p_flags)) 108297Seric goto rescan; 109297Seric else 110297Seric continue; 111297Seric } 112297Seric 113297Seric /* arrange for q to point to it */ 114297Seric if (q != NULL && flagset(P_ONE, t->p_flags)) 115297Seric { 116297Seric usrerr("multichar error"); 117297Seric ExitStat = EX_USAGE; 118297Seric return (NULL); 119297Seric } 120297Seric if (q == NULL || flagset(P_HLAST, t->p_flags)) 121297Seric q = p; 122297Seric } 123297Seric else 124297Seric { 125297Seric /* insist that host name is atomic */ 126297Seric if (flagset(P_HLAST, t->p_flags)) 127297Seric q = NULL; 128297Seric else 129297Seric break; 130297Seric } 131297Seric } 132297Seric 133297Seric if (q != NULL) 134297Seric break; 135297Seric } 136297Seric 137297Seric /* 138297Seric ** If we matched nothing cleanly, but we did match something 139297Seric ** somewhere in the process of scanning, then we have a 140297Seric ** syntax error. This can happen on things like a@b:c where 141297Seric ** @ has a right host and : has a left host. 142297Seric ** 143297Seric ** We also set `q' to the null string, in case someone forgets 144297Seric ** to put the P_MOVE bit in the local mailer entry of the 145297Seric ** configuration table. 146297Seric */ 147297Seric 148297Seric if (q == NULL) 149297Seric { 150297Seric q = ""; 151297Seric if (got_one) 152297Seric { 153297Seric usrerr("syntax error"); 154297Seric ExitStat = EX_USAGE; 155297Seric return (NULL); 156297Seric } 157297Seric } 158297Seric 159297Seric /* 160297Seric ** Interpret entry. 161297Seric ** t points to the entry for the mailer we will use. 162297Seric ** q points to the significant character. 163297Seric */ 164297Seric 165297Seric if (a == NULL) 166297Seric a = (addrq *) xalloc(sizeof *a); 167297Seric if (copyf > 0) 168297Seric { 169297Seric p = xalloc((unsigned) strlen(addr) + 1); 170297Seric strcpy(p, addr); 171297Seric a->q_paddr = p; 172297Seric } 173297Seric else 174297Seric a->q_paddr = addr; 175297Seric a->q_mailer = &Mailer[t->p_mailer]; 176297Seric 177297Seric if (flagset(P_MOVE, t->p_flags)) 178297Seric { 179297Seric /* send the message to another host & retry */ 180297Seric a->q_host = t->p_arg; 181297Seric if (copyf >= 0) 182297Seric { 183297Seric p = xalloc((unsigned) strlen(buf) + 1); 184297Seric strcpy(p, buf); 185297Seric a->q_user = p; 186297Seric } 187297Seric else 188297Seric a->q_user = buf; 189297Seric } 190297Seric else 191297Seric { 192297Seric /* 193297Seric ** Make local copies of the host & user and then 194297Seric ** transport them out. 195297Seric */ 196297Seric 197297Seric *q++ = '\0'; 198297Seric if (flagset(P_HLAST, t->p_flags)) 199297Seric { 200297Seric a->q_host = q; 201297Seric a->q_user = buf; 202297Seric } 203297Seric else 204297Seric { 205297Seric a->q_host = buf; 206297Seric a->q_user = q; 207297Seric } 208297Seric if (copyf >= 0) 209297Seric { 210297Seric p = xalloc((unsigned) strlen(a->q_host) + 1); 211297Seric strcpy(p, a->q_host); 212297Seric a->q_host = p; 213297Seric p = xalloc((unsigned) strlen(a->q_user) + 1); 214297Seric strcpy(p, a->q_user); 215297Seric a->q_user = p; 216297Seric } 217297Seric } 218297Seric 219297Seric /* 220297Seric ** Do UPPER->lower case mapping unless inhibited. 221297Seric */ 222297Seric 223297Seric if (!flagset(P_HST_UPPER, t->p_flags)) 224297Seric makelower(a->q_host); 225297Seric if (!flagset(P_USR_UPPER, t->p_flags)) 226297Seric makelower(a->q_user); 227297Seric 228297Seric /* 229297Seric ** Compute return value. 230297Seric */ 231297Seric 232297Seric # ifdef DEBUG 233297Seric if (Debug && copyf >= 0) 234297Seric printf("parse(\"%s\"): host \"%s\" user \"%s\" mailer %d\n", 235297Seric addr, a->q_host, a->q_user, t->p_mailer); 236297Seric # endif DEBUG 237297Seric 238297Seric return (a); 239297Seric } 240297Seric /* 241297Seric ** MAKELOWER -- Translate a line into lower case 242297Seric ** 243297Seric ** Parameters: 244297Seric ** p -- the string to translate. If NULL, return is 245297Seric ** immediate. 246297Seric ** 247297Seric ** Returns: 248297Seric ** none. 249297Seric ** 250297Seric ** Side Effects: 251297Seric ** String pointed to by p is translated to lower case. 252297Seric ** 253297Seric ** Called By: 254297Seric ** parse 255297Seric */ 256297Seric 257297Seric makelower(p) 258297Seric register char *p; 259297Seric { 260297Seric register char c; 261297Seric 262297Seric if (p == NULL) 263297Seric return; 264297Seric for (; (c = *p) != '\0'; p++) 265297Seric if ((c & 0200) == 0 && isupper(c)) 266297Seric *p = c - 'A' + 'a'; 267297Seric } 268297Seric /* 269297Seric ** PRESCAN -- Prescan name and make it canonical 270297Seric ** 271297Seric ** Scans a name and turns it into canonical form. This involves 272297Seric ** deleting blanks, comments (in parentheses), and turning the 273297Seric ** word "at" into an at-sign ("@"). The name is copied as this 274297Seric ** is done; it is legal to copy a name onto itself, since this 275297Seric ** process can only make things smaller. 276297Seric ** 277297Seric ** This routine knows about quoted strings and angle brackets. 278297Seric ** 279297Seric ** There are certain subtleties to this routine. The one that 280297Seric ** comes to mind now is that backslashes on the ends of names 281297Seric ** are silently stripped off; this is intentional. The problem 282297Seric ** is that some versions of sndmsg (like at LBL) set the kill 283297Seric ** character to something other than @ when reading addresses; 284297Seric ** so people type "csvax.eric\@berkeley" -- which screws up the 285297Seric ** berknet mailer. 286297Seric ** 287297Seric ** Parameters: 288297Seric ** addr -- the name to chomp. 289297Seric ** buf -- the buffer to copy it into. 290297Seric ** buflim -- the last usable address in the buffer 291297Seric ** (which will old a null byte). Normally 292297Seric ** &buf[sizeof buf - 1]. 293297Seric ** delim -- the delimiter for the address, normally 294297Seric ** '\0' or ','; \0 is accepted in any case. 295297Seric ** are moving in place; set buflim to high core. 296297Seric ** 297297Seric ** Returns: 298297Seric ** A pointer to the terminator of buf. 299297Seric ** NULL on error. 300297Seric ** 301297Seric ** Side Effects: 302297Seric ** buf gets clobbered. 303297Seric ** 304297Seric ** Called By: 305297Seric ** parse 306297Seric ** maketemp 307297Seric */ 308297Seric 309297Seric char * 310297Seric prescan(addr, buf, buflim, delim) 311297Seric char *addr; 312297Seric char *buf; 313297Seric char *buflim; 314297Seric char delim; 315297Seric { 316297Seric register char *p; 317297Seric bool space; 318297Seric bool quotemode; 319297Seric bool bslashmode; 320297Seric int cmntcnt; 321297Seric int brccnt; 322297Seric register char c; 323297Seric register char *q; 324297Seric extern bool any(); 325297Seric 326297Seric space = TRUE; 327297Seric q = buf; 328297Seric bslashmode = quotemode = FALSE; 329297Seric cmntcnt = brccnt = 0; 330297Seric for (p = addr; (c = *p++ & 0177) != '\0'; ) 331297Seric { 332297Seric /* chew up special characters */ 333297Seric *q = '\0'; 334297Seric if (bslashmode) 335297Seric { 336297Seric c |= 0200; 337297Seric bslashmode == FALSE; 338297Seric } 339297Seric else if (c == '"') 340297Seric quotemode = !quotemode; 341297Seric else if (c == '\\') 342297Seric { 343297Seric bslashmode++; 344297Seric continue; 345297Seric } 346297Seric else if (quotemode) 347297Seric c |= 0200; 348297Seric else if (c == delim) 349297Seric break; 350297Seric else if (c == '(') 351*1378Seric { 352297Seric cmntcnt++; 353*1378Seric continue; 354*1378Seric } 355297Seric else if (c == ')') 356297Seric { 357297Seric if (cmntcnt <= 0) 358297Seric { 359297Seric usrerr("Unbalanced ')'"); 360297Seric return (NULL); 361297Seric } 362297Seric else 363297Seric { 364297Seric cmntcnt--; 365297Seric continue; 366297Seric } 367297Seric } 368297Seric else if (c == '<') 369297Seric { 370297Seric brccnt++; 371297Seric if (brccnt == 1) 372297Seric { 373297Seric /* we prefer using machine readable name */ 374297Seric q = buf; 375297Seric *q = '\0'; 376297Seric continue; 377297Seric } 378297Seric } 379297Seric else if (c == '>') 380297Seric { 381297Seric if (brccnt <= 0) 382297Seric { 383297Seric usrerr("Unbalanced `>'"); 384297Seric return (NULL); 385297Seric } 386297Seric else 387297Seric brccnt--; 388297Seric if (brccnt <= 0) 389297Seric continue; 390297Seric } 391297Seric 392297Seric /* 393297Seric ** Turn "at" into "@", 394*1378Seric ** but only if "at" is a word. 395297Seric ** By the way, I violate the ARPANET RFC-733 396297Seric ** standard here, by assuming that 'space' delimits 397297Seric ** atoms. I assume that is just a mistake, since 398297Seric ** it violates the spirit of the semantics 399297Seric ** of the document..... 400297Seric */ 401297Seric 402297Seric if (space && (c == 'a' || c == 'A') && 403297Seric (p[0] == 't' || p[0] == 'T') && 404297Seric (any(p[1], "()<>@,;:\\\"") || p[1] <= 040)) 405297Seric { 406297Seric c = '@'; 407297Seric p++; 408297Seric } 409297Seric 410297Seric /* skip blanks */ 411297Seric if (((c & 0200) != 0 || !isspace(c)) && cmntcnt <= 0) 412297Seric { 413297Seric if (q >= buflim) 414297Seric { 415297Seric usrerr("Address too long"); 416297Seric return (NULL); 417297Seric } 418297Seric *q++ = c; 419297Seric } 420297Seric space = isspace(c); 421297Seric } 422297Seric *q = '\0'; 423297Seric if (c == '\0') 424297Seric p--; 425297Seric if (cmntcnt > 0) 426297Seric usrerr("Unbalanced '('"); 427297Seric else if (quotemode) 428297Seric usrerr("Unbalanced '\"'"); 429297Seric else if (brccnt > 0) 430297Seric usrerr("Unbalanced '<'"); 431297Seric else if (buf[0] != '\0') 432297Seric return (p); 433297Seric return (NULL); 434297Seric } 435