1 # include <stdio.h> 2 # include <ctype.h> 3 # include "postbox.h" 4 5 static char SccsId[] = "@(#)parseaddr.c 3.5 03/08/81"; 6 7 /* 8 ** PARSE -- Parse an address 9 ** 10 ** Parses an address and breaks it up into three parts: a 11 ** net to transmit the message on, the host to transmit it 12 ** to, and a user on that host. These are loaded into an 13 ** ADDRESS header with the values squirreled away if necessary. 14 ** The "user" part may not be a real user; the process may 15 ** just reoccur on that machine. For example, on a machine 16 ** with an arpanet connection, the address 17 ** csvax.bill@berkeley 18 ** will break up to a "user" of 'csvax.bill' and a host 19 ** of 'berkeley' -- to be transmitted over the arpanet. 20 ** 21 ** Parameters: 22 ** addr -- the address to parse. 23 ** a -- a pointer to the address descriptor buffer. 24 ** If NULL, a header will be created. 25 ** copyf -- determines what shall be copied: 26 ** -1 -- don't copy anything. The printname 27 ** (q_paddr) is just addr, and the 28 ** user & host are allocated internally 29 ** to parse. 30 ** 0 -- copy out the parsed user & host, but 31 ** don't copy the printname. 32 ** +1 -- copy everything. 33 ** 34 ** Returns: 35 ** A pointer to the address descriptor header (`a' if 36 ** `a' is non-NULL). 37 ** NULL on error. 38 ** 39 ** Side Effects: 40 ** none 41 ** 42 ** Called By: 43 ** main 44 ** sendto 45 ** alias 46 ** savemail 47 */ 48 49 # define DELIMCHARS "()<>@!.,;:\\\" \t\r\n" /* word delimiters */ 50 # define SPACESUB ('.'|0200) /* substitution for <lwsp> */ 51 52 ADDRESS * 53 parse(addr, a, copyf) 54 char *addr; 55 register ADDRESS *a; 56 int copyf; 57 { 58 register char *p; 59 register struct parsetab *t; 60 extern struct parsetab ParseTab[]; 61 static char buf[MAXNAME]; 62 register char c; 63 register char *q; 64 bool got_one; 65 extern char *prescan(); 66 extern char *xalloc(); 67 extern char *newstr(); 68 char **pvp; 69 char ***hvp; 70 extern char *strcpy(); 71 72 /* 73 ** Initialize and prescan address. 74 */ 75 76 To = addr; 77 if (prescan(addr, buf, &buf[sizeof buf], '\0') == NULL) 78 return (NULL); 79 80 /* 81 ** Scan parse table. 82 ** Look for the first entry designating a character 83 ** that is contained in the address. 84 ** Arrange for q to point to that character. 85 ** Check to see that there is only one of the char 86 ** if it must be unique. 87 ** Find the last one if the host is on the RHS. 88 ** Insist that the host name is atomic. 89 ** If just doing a map, do the map and then start all 90 ** over. 91 */ 92 93 rescan: 94 got_one = FALSE; 95 for (t = ParseTab; t->p_char != '\0'; t++) 96 { 97 q = NULL; 98 for (p = buf; (c = *p) != '\0'; p++) 99 { 100 /* find the end of this token */ 101 while (isalnum(c) || c == '-' || c == '_') 102 c = *++p; 103 if (c == '\0') 104 break; 105 106 if (c == t->p_char) 107 { 108 got_one = TRUE; 109 110 /* do mapping as appropriate */ 111 if (bitset(P_MAP, t->p_flags)) 112 { 113 *p = t->p_arg[0]; 114 if (bitset(P_ONE, t->p_flags)) 115 goto rescan; 116 else 117 continue; 118 } 119 120 /* arrange for q to point to it */ 121 if (q != NULL && bitset(P_ONE, t->p_flags)) 122 { 123 usrerr("multichar error"); 124 ExitStat = EX_USAGE; 125 return (NULL); 126 } 127 if (q == NULL || bitset(P_HLAST, t->p_flags)) 128 q = p; 129 } 130 else 131 { 132 /* insist that host name is atomic */ 133 if (bitset(P_HLAST, t->p_flags)) 134 q = NULL; 135 else 136 break; 137 } 138 } 139 140 if (q != NULL) 141 break; 142 } 143 144 /* 145 ** If we matched nothing cleanly, but we did match something 146 ** somewhere in the process of scanning, then we have a 147 ** syntax error. This can happen on things like a@b:c where 148 ** @ has a right host and : has a left host. 149 ** 150 ** We also set `q' to the null string, in case someone forgets 151 ** to put the P_MOVE bit in the local mailer entry of the 152 ** configuration table. 153 */ 154 155 if (q == NULL) 156 { 157 q = ""; 158 if (got_one) 159 { 160 usrerr("syntax error"); 161 ExitStat = EX_USAGE; 162 return (NULL); 163 } 164 } 165 166 /* 167 ** Interpret entry. 168 ** t points to the entry for the mailer we will use. 169 ** q points to the significant character. 170 */ 171 172 if (a == NULL) 173 a = (ADDRESS *) xalloc(sizeof *a); 174 if (copyf > 0) 175 a->q_paddr = newstr(addr); 176 else 177 a->q_paddr = addr; 178 a->q_mailer = a->q_rmailer = t->p_mailer; 179 180 if (bitset(P_MOVE, t->p_flags)) 181 { 182 /* send the message to another host & retry */ 183 a->q_host = t->p_arg; 184 if (copyf >= 0) 185 a->q_user = newstr(buf); 186 else 187 a->q_user = buf; 188 } 189 else 190 { 191 /* 192 ** Make local copies of the host & user and then 193 ** transport them out. 194 */ 195 196 *q++ = '\0'; 197 if (bitset(P_HLAST, t->p_flags)) 198 { 199 a->q_host = q; 200 a->q_user = buf; 201 } 202 else 203 { 204 a->q_host = buf; 205 a->q_user = q; 206 } 207 208 /* 209 ** Don't go to the net if already on the target host. 210 ** This is important on the berkeley network, since 211 ** it get confused if we ask to send to ourselves. 212 ** For nets like the ARPANET, we probably will have 213 ** the local list set to NULL to simplify testing. 214 ** The canonical representation of the name is also set 215 ** to be just the local name so the duplicate letter 216 ** suppression algorithm will work. 217 */ 218 219 if ((pvp = Mailer[a->q_mailer]->m_local) != NULL) 220 { 221 while (*pvp != NULL) 222 { 223 auto char buf2[MAXNAME]; 224 225 strcpy(buf2, a->q_host); 226 if (!bitset(P_HST_UPPER, t->p_flags)) 227 makelower(buf2); 228 if (strcmp(*pvp++, buf2) == 0) 229 { 230 strcpy(buf2, a->q_user); 231 p = a->q_paddr; 232 if (parse(buf2, a, -1) == NULL) 233 { 234 To = addr; 235 return (NULL); 236 } 237 To = a->q_paddr = p; 238 break; 239 } 240 } 241 } 242 243 /* 244 ** Do host equivalence. 245 ** This allows us to map together messages that 246 ** would otherwise have several copies going 247 ** through the same net link. 248 */ 249 250 for (hvp = Mailer[a->q_mailer]->m_hmap; *hvp != NULL; hvp++) 251 { 252 register bool doremap; 253 254 doremap = FALSE; 255 for (pvp = *hvp; *pvp != NULL; pvp++) 256 { 257 p = *pvp; 258 if (*p == '\0') 259 { 260 /* null string: match everything */ 261 doremap = TRUE; 262 } 263 else if (strcmp(p, a->q_host) == 0) 264 doremap = TRUE; 265 } 266 267 if (doremap) 268 { 269 a->q_host = pvp[-1]; 270 a->q_user = a->q_paddr; 271 } 272 } 273 274 /* make copies if specified */ 275 if (copyf >= 0) 276 { 277 a->q_host = newstr(a->q_host); 278 if (a->q_user != a->q_paddr) 279 a->q_user = newstr(a->q_user); 280 } 281 } 282 283 /* 284 ** Do UPPER->lower case mapping unless inhibited. 285 */ 286 287 if (!bitset(P_HST_UPPER, t->p_flags)) 288 makelower(a->q_host); 289 if (!bitset(P_USR_UPPER, t->p_flags)) 290 makelower(a->q_user); 291 292 /* 293 ** Compute return value. 294 */ 295 296 # ifdef DEBUG 297 if (Debug) 298 printf("parse(\"%s\"): host \"%s\" user \"%s\" mailer %d\n", 299 addr, a->q_host, a->q_user, t->p_mailer); 300 # endif DEBUG 301 302 return (a); 303 } 304 /* 305 ** MAKELOWER -- Translate a line into lower case 306 ** 307 ** Parameters: 308 ** p -- the string to translate. If NULL, return is 309 ** immediate. 310 ** 311 ** Returns: 312 ** none. 313 ** 314 ** Side Effects: 315 ** String pointed to by p is translated to lower case. 316 ** 317 ** Called By: 318 ** parse 319 */ 320 321 makelower(p) 322 register char *p; 323 { 324 register char c; 325 326 if (p == NULL) 327 return; 328 for (; (c = *p) != '\0'; p++) 329 if ((c & 0200) == 0 && isupper(c)) 330 *p = c - 'A' + 'a'; 331 } 332 /* 333 ** PRESCAN -- Prescan name and make it canonical 334 ** 335 ** Scans a name and turns it into canonical form. This involves 336 ** deleting blanks, comments (in parentheses), and turning the 337 ** word "at" into an at-sign ("@"). The name is copied as this 338 ** is done; it is legal to copy a name onto itself, since this 339 ** process can only make things smaller. 340 ** 341 ** This routine knows about quoted strings and angle brackets. 342 ** 343 ** There are certain subtleties to this routine. The one that 344 ** comes to mind now is that backslashes on the ends of names 345 ** are silently stripped off; this is intentional. The problem 346 ** is that some versions of sndmsg (like at LBL) set the kill 347 ** character to something other than @ when reading addresses; 348 ** so people type "csvax.eric\@berkeley" -- which screws up the 349 ** berknet mailer. 350 ** 351 ** Parameters: 352 ** addr -- the name to chomp. 353 ** buf -- the buffer to copy it into. 354 ** buflim -- the last usable address in the buffer 355 ** (which will old a null byte). Normally 356 ** &buf[sizeof buf - 1]. 357 ** delim -- the delimiter for the address, normally 358 ** '\0' or ','; \0 is accepted in any case. 359 ** are moving in place; set buflim to high core. 360 ** 361 ** Returns: 362 ** A pointer to the terminator of buf. 363 ** NULL on error. 364 ** 365 ** Side Effects: 366 ** buf gets clobbered. 367 ** 368 ** Called By: 369 ** parse 370 ** maketemp 371 */ 372 373 char * 374 prescan(addr, buf, buflim, delim) 375 char *addr; 376 char *buf; 377 char *buflim; 378 char delim; 379 { 380 register char *p; 381 bool space; 382 bool quotemode; 383 bool bslashmode; 384 bool delimmode; 385 int cmntcnt; 386 int brccnt; 387 register char c; 388 register char *q; 389 extern char *index(); 390 391 space = FALSE; 392 delimmode = TRUE; 393 q = buf; 394 bslashmode = quotemode = FALSE; 395 cmntcnt = brccnt = 0; 396 for (p = addr; (c = *p++) != '\0'; ) 397 { 398 /* chew up special characters */ 399 *q = '\0'; 400 if (bslashmode) 401 { 402 c |= 0200; 403 bslashmode = FALSE; 404 } 405 else if (c == '"') 406 quotemode = !quotemode; 407 else if (c == '\\') 408 { 409 bslashmode++; 410 continue; 411 } 412 else if (quotemode) 413 c |= 0200; 414 else if (c == delim) 415 break; 416 else if (c == '(') 417 { 418 cmntcnt++; 419 continue; 420 } 421 else if (c == ')') 422 { 423 if (cmntcnt <= 0) 424 { 425 usrerr("Unbalanced ')'"); 426 return (NULL); 427 } 428 else 429 { 430 cmntcnt--; 431 continue; 432 } 433 } 434 if (cmntcnt > 0) 435 continue; 436 else if (isascii(c) && isspace(c) && (space || delimmode)) 437 continue; 438 else if (c == '<') 439 { 440 if (brccnt < 0) 441 { 442 usrerr("multiple < spec"); 443 return (NULL); 444 } 445 brccnt++; 446 delimmode = TRUE; 447 space = FALSE; 448 if (brccnt == 1) 449 { 450 /* we prefer using machine readable name */ 451 q = buf; 452 *q = '\0'; 453 continue; 454 } 455 } 456 else if (c == '>') 457 { 458 if (brccnt <= 0) 459 { 460 usrerr("Unbalanced `>'"); 461 return (NULL); 462 } 463 else 464 brccnt--; 465 if (brccnt <= 0) 466 { 467 brccnt = -1; 468 continue; 469 } 470 } 471 472 /* 473 ** Turn "at" into "@", 474 ** but only if "at" is a word. 475 ** By the way, I violate the ARPANET RFC-733 476 ** standard here, by assuming that 'space' delimits 477 ** atoms. I assume that is just a mistake, since 478 ** it violates the spirit of the semantics 479 ** of the document..... 480 */ 481 482 if (delimmode && (c == 'a' || c == 'A') && 483 (p[0] == 't' || p[0] == 'T') && 484 (index(DELIMCHARS, p[1]) != NULL || p[1] <= 040)) 485 { 486 c = '@'; 487 p++; 488 } 489 490 if (delimmode = (index(DELIMCHARS, c) != NULL)) 491 space = FALSE; 492 493 /* if not a space, squirrel it away */ 494 if ((!isascii(c) || !isspace(c)) && brccnt >= 0) 495 { 496 if (q >= buflim-1) 497 { 498 usrerr("Address too long"); 499 return (NULL); 500 } 501 if (space) 502 *q++ = SPACESUB; 503 *q++ = c; 504 } 505 space = isascii(c) && isspace(c); 506 } 507 *q = '\0'; 508 if (c == '\0') 509 p--; 510 if (cmntcnt > 0) 511 usrerr("Unbalanced '('"); 512 else if (quotemode) 513 usrerr("Unbalanced '\"'"); 514 else if (brccnt > 0) 515 usrerr("Unbalanced '<'"); 516 else if (buf[0] != '\0') 517 return (p); 518 return (NULL); 519 } 520