1 # include <stdio.h> 2 # include <ctype.h> 3 # include "postbox.h" 4 5 static char SccsId[] = "@(#)parseaddr.c 3.3 03/07/81"; 6 7 /* 8 ** PARSE -- Parse an address 9 ** 10 ** Parses an address and breaks it up into three parts: a 11 ** net to transmit the message on, the host to transmit it 12 ** to, and a user on that host. These are loaded into an 13 ** ADDRESS header with the values squirreled away if necessary. 14 ** The "user" part may not be a real user; the process may 15 ** just reoccur on that machine. For example, on a machine 16 ** with an arpanet connection, the address 17 ** csvax.bill@berkeley 18 ** will break up to a "user" of 'csvax.bill' and a host 19 ** of 'berkeley' -- to be transmitted over the arpanet. 20 ** 21 ** Parameters: 22 ** addr -- the address to parse. 23 ** a -- a pointer to the address descriptor buffer. 24 ** If NULL, a header will be created. 25 ** copyf -- determines what shall be copied: 26 ** -1 -- don't copy anything. The printname 27 ** (q_paddr) is just addr, and the 28 ** user & host are allocated internally 29 ** to parse. 30 ** 0 -- copy out the parsed user & host, but 31 ** don't copy the printname. 32 ** +1 -- copy everything. 33 ** 34 ** Returns: 35 ** A pointer to the address descriptor header (`a' if 36 ** `a' is non-NULL). 37 ** NULL on error. 38 ** 39 ** Side Effects: 40 ** none 41 ** 42 ** Called By: 43 ** main 44 ** sendto 45 ** alias 46 ** savemail 47 */ 48 49 # define DELIMCHARS "()<>@!.,;:\\\" \t\r\n" /* word delimiters */ 50 # define SPACESUB ('.'|0200) /* substitution for <lwsp> */ 51 52 ADDRESS * 53 parse(addr, a, copyf) 54 char *addr; 55 register ADDRESS *a; 56 int copyf; 57 { 58 register char *p; 59 register struct parsetab *t; 60 extern struct parsetab ParseTab[]; 61 static char buf[MAXNAME]; 62 register char c; 63 register char *q; 64 bool got_one; 65 extern char *prescan(); 66 extern char *xalloc(); 67 extern char *newstr(); 68 char **pvp; 69 extern char *strcpy(); 70 71 /* 72 ** Initialize and prescan address. 73 */ 74 75 To = addr; 76 if (prescan(addr, buf, &buf[sizeof buf], '\0') == NULL) 77 return (NULL); 78 79 /* 80 ** Scan parse table. 81 ** Look for the first entry designating a character 82 ** that is contained in the address. 83 ** Arrange for q to point to that character. 84 ** Check to see that there is only one of the char 85 ** if it must be unique. 86 ** Find the last one if the host is on the RHS. 87 ** Insist that the host name is atomic. 88 ** If just doing a map, do the map and then start all 89 ** over. 90 */ 91 92 rescan: 93 got_one = FALSE; 94 for (t = ParseTab; t->p_char != '\0'; t++) 95 { 96 q = NULL; 97 for (p = buf; (c = *p) != '\0'; p++) 98 { 99 /* find the end of this token */ 100 while (isalnum(c) || c == '-' || c == '_') 101 c = *++p; 102 if (c == '\0') 103 break; 104 105 if (c == t->p_char) 106 { 107 got_one = TRUE; 108 109 /* do mapping as appropriate */ 110 if (bitset(P_MAP, t->p_flags)) 111 { 112 *p = t->p_arg[0]; 113 if (bitset(P_ONE, t->p_flags)) 114 goto rescan; 115 else 116 continue; 117 } 118 119 /* arrange for q to point to it */ 120 if (q != NULL && bitset(P_ONE, t->p_flags)) 121 { 122 usrerr("multichar error"); 123 ExitStat = EX_USAGE; 124 return (NULL); 125 } 126 if (q == NULL || bitset(P_HLAST, t->p_flags)) 127 q = p; 128 } 129 else 130 { 131 /* insist that host name is atomic */ 132 if (bitset(P_HLAST, t->p_flags)) 133 q = NULL; 134 else 135 break; 136 } 137 } 138 139 if (q != NULL) 140 break; 141 } 142 143 /* 144 ** If we matched nothing cleanly, but we did match something 145 ** somewhere in the process of scanning, then we have a 146 ** syntax error. This can happen on things like a@b:c where 147 ** @ has a right host and : has a left host. 148 ** 149 ** We also set `q' to the null string, in case someone forgets 150 ** to put the P_MOVE bit in the local mailer entry of the 151 ** configuration table. 152 */ 153 154 if (q == NULL) 155 { 156 q = ""; 157 if (got_one) 158 { 159 usrerr("syntax error"); 160 ExitStat = EX_USAGE; 161 return (NULL); 162 } 163 } 164 165 /* 166 ** Interpret entry. 167 ** t points to the entry for the mailer we will use. 168 ** q points to the significant character. 169 */ 170 171 if (a == NULL) 172 a = (ADDRESS *) xalloc(sizeof *a); 173 if (copyf > 0) 174 a->q_paddr = newstr(addr); 175 else 176 a->q_paddr = addr; 177 a->q_rmailer = t->p_mailer; 178 a->q_mailer = &Mailer[t->p_mailer]; 179 180 if (bitset(P_MOVE, t->p_flags)) 181 { 182 /* send the message to another host & retry */ 183 a->q_host = t->p_arg; 184 if (copyf >= 0) 185 a->q_user = newstr(buf); 186 else 187 a->q_user = buf; 188 } 189 else 190 { 191 /* 192 ** Make local copies of the host & user and then 193 ** transport them out. 194 */ 195 196 *q++ = '\0'; 197 if (bitset(P_HLAST, t->p_flags)) 198 { 199 a->q_host = q; 200 a->q_user = buf; 201 } 202 else 203 { 204 a->q_host = buf; 205 a->q_user = q; 206 } 207 208 /* 209 ** Don't go to the net if already on the target host. 210 ** This is important on the berkeley network, since 211 ** it get confused if we ask to send to ourselves. 212 ** For nets like the ARPANET, we probably will have 213 ** the local list set to NULL to simplify testing. 214 ** The canonical representation of the name is also set 215 ** to be just the local name so the duplicate letter 216 ** suppression algorithm will work. 217 */ 218 219 if ((pvp = a->q_mailer->m_local) != NULL) 220 { 221 while (*pvp != NULL) 222 { 223 auto char buf2[MAXNAME]; 224 225 strcpy(buf2, a->q_host); 226 if (!bitset(P_HST_UPPER, t->p_flags)) 227 makelower(buf2); 228 if (strcmp(*pvp++, buf2) == 0) 229 { 230 strcpy(buf2, a->q_user); 231 p = a->q_paddr; 232 if (parse(buf2, a, -1) == NULL) 233 { 234 To = addr; 235 return (NULL); 236 } 237 To = a->q_paddr = p; 238 break; 239 } 240 } 241 } 242 243 /* make copies if specified */ 244 if (copyf >= 0) 245 { 246 a->q_host = newstr(a->q_host); 247 a->q_user = newstr(a->q_user); 248 } 249 } 250 251 /* 252 ** Do UPPER->lower case mapping unless inhibited. 253 */ 254 255 if (!bitset(P_HST_UPPER, t->p_flags)) 256 makelower(a->q_host); 257 if (!bitset(P_USR_UPPER, t->p_flags)) 258 makelower(a->q_user); 259 260 /* 261 ** Compute return value. 262 */ 263 264 # ifdef DEBUG 265 if (Debug) 266 printf("parse(\"%s\"): host \"%s\" user \"%s\" mailer %d\n", 267 addr, a->q_host, a->q_user, t->p_mailer); 268 # endif DEBUG 269 270 return (a); 271 } 272 /* 273 ** MAKELOWER -- Translate a line into lower case 274 ** 275 ** Parameters: 276 ** p -- the string to translate. If NULL, return is 277 ** immediate. 278 ** 279 ** Returns: 280 ** none. 281 ** 282 ** Side Effects: 283 ** String pointed to by p is translated to lower case. 284 ** 285 ** Called By: 286 ** parse 287 */ 288 289 makelower(p) 290 register char *p; 291 { 292 register char c; 293 294 if (p == NULL) 295 return; 296 for (; (c = *p) != '\0'; p++) 297 if ((c & 0200) == 0 && isupper(c)) 298 *p = c - 'A' + 'a'; 299 } 300 /* 301 ** PRESCAN -- Prescan name and make it canonical 302 ** 303 ** Scans a name and turns it into canonical form. This involves 304 ** deleting blanks, comments (in parentheses), and turning the 305 ** word "at" into an at-sign ("@"). The name is copied as this 306 ** is done; it is legal to copy a name onto itself, since this 307 ** process can only make things smaller. 308 ** 309 ** This routine knows about quoted strings and angle brackets. 310 ** 311 ** There are certain subtleties to this routine. The one that 312 ** comes to mind now is that backslashes on the ends of names 313 ** are silently stripped off; this is intentional. The problem 314 ** is that some versions of sndmsg (like at LBL) set the kill 315 ** character to something other than @ when reading addresses; 316 ** so people type "csvax.eric\@berkeley" -- which screws up the 317 ** berknet mailer. 318 ** 319 ** Parameters: 320 ** addr -- the name to chomp. 321 ** buf -- the buffer to copy it into. 322 ** buflim -- the last usable address in the buffer 323 ** (which will old a null byte). Normally 324 ** &buf[sizeof buf - 1]. 325 ** delim -- the delimiter for the address, normally 326 ** '\0' or ','; \0 is accepted in any case. 327 ** are moving in place; set buflim to high core. 328 ** 329 ** Returns: 330 ** A pointer to the terminator of buf. 331 ** NULL on error. 332 ** 333 ** Side Effects: 334 ** buf gets clobbered. 335 ** 336 ** Called By: 337 ** parse 338 ** maketemp 339 */ 340 341 char * 342 prescan(addr, buf, buflim, delim) 343 char *addr; 344 char *buf; 345 char *buflim; 346 char delim; 347 { 348 register char *p; 349 bool space; 350 bool quotemode; 351 bool bslashmode; 352 bool delimmode; 353 int cmntcnt; 354 int brccnt; 355 register char c; 356 register char *q; 357 extern char *index(); 358 359 space = FALSE; 360 delimmode = TRUE; 361 q = buf; 362 bslashmode = quotemode = FALSE; 363 cmntcnt = brccnt = 0; 364 for (p = addr; (c = *p++) != '\0'; ) 365 { 366 /* chew up special characters */ 367 *q = '\0'; 368 if (bslashmode) 369 { 370 c |= 0200; 371 bslashmode = FALSE; 372 } 373 else if (c == '"') 374 quotemode = !quotemode; 375 else if (c == '\\') 376 { 377 bslashmode++; 378 continue; 379 } 380 else if (quotemode) 381 c |= 0200; 382 else if (c == delim) 383 break; 384 else if (c == '(') 385 { 386 cmntcnt++; 387 continue; 388 } 389 else if (c == ')') 390 { 391 if (cmntcnt <= 0) 392 { 393 usrerr("Unbalanced ')'"); 394 return (NULL); 395 } 396 else 397 { 398 cmntcnt--; 399 continue; 400 } 401 } 402 if (cmntcnt > 0) 403 continue; 404 else if (isascii(c) && isspace(c) && (space || delimmode)) 405 continue; 406 else if (c == '<') 407 { 408 if (brccnt < 0) 409 { 410 usrerr("multiple < spec"); 411 return (NULL); 412 } 413 brccnt++; 414 delimmode = TRUE; 415 space = FALSE; 416 if (brccnt == 1) 417 { 418 /* we prefer using machine readable name */ 419 q = buf; 420 *q = '\0'; 421 continue; 422 } 423 } 424 else if (c == '>') 425 { 426 if (brccnt <= 0) 427 { 428 usrerr("Unbalanced `>'"); 429 return (NULL); 430 } 431 else 432 brccnt--; 433 if (brccnt <= 0) 434 { 435 brccnt = -1; 436 continue; 437 } 438 } 439 440 /* 441 ** Turn "at" into "@", 442 ** but only if "at" is a word. 443 ** By the way, I violate the ARPANET RFC-733 444 ** standard here, by assuming that 'space' delimits 445 ** atoms. I assume that is just a mistake, since 446 ** it violates the spirit of the semantics 447 ** of the document..... 448 */ 449 450 if (delimmode && (c == 'a' || c == 'A') && 451 (p[0] == 't' || p[0] == 'T') && 452 (index(DELIMCHARS, p[1]) != NULL || p[1] <= 040)) 453 { 454 c = '@'; 455 p++; 456 } 457 458 if (delimmode = (index(DELIMCHARS, c) != NULL)) 459 space = FALSE; 460 461 /* if not a space, squirrel it away */ 462 if ((!isascii(c) || !isspace(c)) && brccnt >= 0) 463 { 464 if (q >= buflim-1) 465 { 466 usrerr("Address too long"); 467 return (NULL); 468 } 469 if (space) 470 *q++ = SPACESUB; 471 *q++ = c; 472 } 473 space = isascii(c) && isspace(c); 474 } 475 *q = '\0'; 476 if (c == '\0') 477 p--; 478 if (cmntcnt > 0) 479 usrerr("Unbalanced '('"); 480 else if (quotemode) 481 usrerr("Unbalanced '\"'"); 482 else if (brccnt > 0) 483 usrerr("Unbalanced '<'"); 484 else if (buf[0] != '\0') 485 return (p); 486 return (NULL); 487 } 488