1 # include <stdio.h> 2 # include <ctype.h> 3 # include "postbox.h" 4 5 static char SccsId[] = "@(#)parseaddr.c 3.1 03/07/81"; 6 7 /* 8 ** PARSE -- Parse an address 9 ** 10 ** Parses an address and breaks it up into three parts: a 11 ** net to transmit the message on, the host to transmit it 12 ** to, and a user on that host. These are loaded into an 13 ** ADDRESS header with the values squirreled away if necessary. 14 ** The "user" part may not be a real user; the process may 15 ** just reoccur on that machine. For example, on a machine 16 ** with an arpanet connection, the address 17 ** csvax.bill@berkeley 18 ** will break up to a "user" of 'csvax.bill' and a host 19 ** of 'berkeley' -- to be transmitted over the arpanet. 20 ** 21 ** Parameters: 22 ** addr -- the address to parse. 23 ** a -- a pointer to the address descriptor buffer. 24 ** If NULL, a header will be created. 25 ** copyf -- determines what shall be copied: 26 ** -1 -- don't copy anything. The printname 27 ** (q_paddr) is just addr, and the 28 ** user & host are allocated internally 29 ** to parse. 30 ** 0 -- copy out the parsed user & host, but 31 ** don't copy the printname. 32 ** +1 -- copy everything. 33 ** 34 ** Returns: 35 ** A pointer to the address descriptor header (`a' if 36 ** `a' is non-NULL). 37 ** NULL on error. 38 ** 39 ** Side Effects: 40 ** none 41 ** 42 ** Called By: 43 ** main 44 ** sendto 45 ** alias 46 ** savemail 47 */ 48 49 # define DELIMCHARS "()<>@!.,;:\\\" \t\r\n" /* word delimiters */ 50 # define SPACESUB ('.'|0200) /* substitution for <lwsp> */ 51 52 ADDRESS * 53 parse(addr, a, copyf) 54 char *addr; 55 register ADDRESS *a; 56 int copyf; 57 { 58 register char *p; 59 register struct parsetab *t; 60 extern struct parsetab ParseTab[]; 61 static char buf[MAXNAME]; 62 register char c; 63 register char *q; 64 bool got_one; 65 extern char *prescan(); 66 extern char *xalloc(); 67 extern char *newstr(); 68 char **pvp; 69 70 /* 71 ** Initialize and prescan address. 72 */ 73 74 To = addr; 75 if (prescan(addr, buf, &buf[sizeof buf], '\0') == NULL) 76 return (NULL); 77 78 /* 79 ** Scan parse table. 80 ** Look for the first entry designating a character 81 ** that is contained in the address. 82 ** Arrange for q to point to that character. 83 ** Check to see that there is only one of the char 84 ** if it must be unique. 85 ** Find the last one if the host is on the RHS. 86 ** Insist that the host name is atomic. 87 ** If just doing a map, do the map and then start all 88 ** over. 89 */ 90 91 rescan: 92 got_one = FALSE; 93 for (t = ParseTab; t->p_char != '\0'; t++) 94 { 95 q = NULL; 96 for (p = buf; (c = *p) != '\0'; p++) 97 { 98 /* find the end of this token */ 99 while (isalnum(c) || c == '-' || c == '_') 100 c = *++p; 101 if (c == '\0') 102 break; 103 104 if (c == t->p_char) 105 { 106 got_one = TRUE; 107 108 /* do mapping as appropriate */ 109 if (bitset(P_MAP, t->p_flags)) 110 { 111 *p = t->p_arg[0]; 112 if (bitset(P_ONE, t->p_flags)) 113 goto rescan; 114 else 115 continue; 116 } 117 118 /* arrange for q to point to it */ 119 if (q != NULL && bitset(P_ONE, t->p_flags)) 120 { 121 usrerr("multichar error"); 122 ExitStat = EX_USAGE; 123 return (NULL); 124 } 125 if (q == NULL || bitset(P_HLAST, t->p_flags)) 126 q = p; 127 } 128 else 129 { 130 /* insist that host name is atomic */ 131 if (bitset(P_HLAST, t->p_flags)) 132 q = NULL; 133 else 134 break; 135 } 136 } 137 138 if (q != NULL) 139 break; 140 } 141 142 /* 143 ** If we matched nothing cleanly, but we did match something 144 ** somewhere in the process of scanning, then we have a 145 ** syntax error. This can happen on things like a@b:c where 146 ** @ has a right host and : has a left host. 147 ** 148 ** We also set `q' to the null string, in case someone forgets 149 ** to put the P_MOVE bit in the local mailer entry of the 150 ** configuration table. 151 */ 152 153 if (q == NULL) 154 { 155 q = ""; 156 if (got_one) 157 { 158 usrerr("syntax error"); 159 ExitStat = EX_USAGE; 160 return (NULL); 161 } 162 } 163 164 /* 165 ** Interpret entry. 166 ** t points to the entry for the mailer we will use. 167 ** q points to the significant character. 168 */ 169 170 if (a == NULL) 171 a = (ADDRESS *) xalloc(sizeof *a); 172 if (copyf > 0) 173 a->q_paddr = newstr(addr); 174 else 175 a->q_paddr = addr; 176 a->q_mailer = &Mailer[t->p_mailer]; 177 178 if (bitset(P_MOVE, t->p_flags)) 179 { 180 /* send the message to another host & retry */ 181 a->q_host = t->p_arg; 182 if (copyf >= 0) 183 a->q_user = newstr(buf); 184 else 185 a->q_user = buf; 186 } 187 else 188 { 189 /* 190 ** Make local copies of the host & user and then 191 ** transport them out. 192 */ 193 194 *q++ = '\0'; 195 if (bitset(P_HLAST, t->p_flags)) 196 { 197 a->q_host = q; 198 a->q_user = buf; 199 } 200 else 201 { 202 a->q_host = buf; 203 a->q_user = q; 204 } 205 206 /* 207 ** Don't go to the net if already on the target host. 208 ** This is important on the berkeley network, since 209 ** it get confused if we ask to send to ourselves. 210 ** For nets like the ARPANET, we probably will have 211 ** the local list set to NULL to simplify testing. 212 ** The canonical representation of the name is also set 213 ** to be just the local name so the duplicate letter 214 ** suppression algorithm will work. 215 */ 216 217 if ((pvp = a->q_mailer->m_local) != NULL) 218 { 219 while (*pvp != NULL) 220 { 221 auto char buf2[MAXNAME]; 222 223 strcpy(buf2, a->q_host); 224 if (!bitset(P_HST_UPPER, t->p_flags)) 225 makelower(buf2); 226 if (strcmp(*pvp++, buf2) == 0) 227 { 228 strcpy(buf2, a->q_user); 229 p = a->q_paddr; 230 if (parse(buf2, a, -1) == NULL) 231 { 232 To = addr; 233 return (NULL); 234 } 235 To = a->q_paddr = p; 236 break; 237 } 238 } 239 } 240 241 /* make copies if specified */ 242 if (copyf >= 0) 243 { 244 a->q_host = newstr(a->q_host); 245 a->q_user = newstr(a->q_user); 246 } 247 } 248 249 /* 250 ** Do UPPER->lower case mapping unless inhibited. 251 */ 252 253 if (!bitset(P_HST_UPPER, t->p_flags)) 254 makelower(a->q_host); 255 if (!bitset(P_USR_UPPER, t->p_flags)) 256 makelower(a->q_user); 257 258 /* 259 ** Compute return value. 260 */ 261 262 # ifdef DEBUG 263 if (Debug) 264 printf("parse(\"%s\"): host \"%s\" user \"%s\" mailer %d\n", 265 addr, a->q_host, a->q_user, t->p_mailer); 266 # endif DEBUG 267 268 return (a); 269 } 270 /* 271 ** MAKELOWER -- Translate a line into lower case 272 ** 273 ** Parameters: 274 ** p -- the string to translate. If NULL, return is 275 ** immediate. 276 ** 277 ** Returns: 278 ** none. 279 ** 280 ** Side Effects: 281 ** String pointed to by p is translated to lower case. 282 ** 283 ** Called By: 284 ** parse 285 */ 286 287 makelower(p) 288 register char *p; 289 { 290 register char c; 291 292 if (p == NULL) 293 return; 294 for (; (c = *p) != '\0'; p++) 295 if ((c & 0200) == 0 && isupper(c)) 296 *p = c - 'A' + 'a'; 297 } 298 /* 299 ** PRESCAN -- Prescan name and make it canonical 300 ** 301 ** Scans a name and turns it into canonical form. This involves 302 ** deleting blanks, comments (in parentheses), and turning the 303 ** word "at" into an at-sign ("@"). The name is copied as this 304 ** is done; it is legal to copy a name onto itself, since this 305 ** process can only make things smaller. 306 ** 307 ** This routine knows about quoted strings and angle brackets. 308 ** 309 ** There are certain subtleties to this routine. The one that 310 ** comes to mind now is that backslashes on the ends of names 311 ** are silently stripped off; this is intentional. The problem 312 ** is that some versions of sndmsg (like at LBL) set the kill 313 ** character to something other than @ when reading addresses; 314 ** so people type "csvax.eric\@berkeley" -- which screws up the 315 ** berknet mailer. 316 ** 317 ** Parameters: 318 ** addr -- the name to chomp. 319 ** buf -- the buffer to copy it into. 320 ** buflim -- the last usable address in the buffer 321 ** (which will old a null byte). Normally 322 ** &buf[sizeof buf - 1]. 323 ** delim -- the delimiter for the address, normally 324 ** '\0' or ','; \0 is accepted in any case. 325 ** are moving in place; set buflim to high core. 326 ** 327 ** Returns: 328 ** A pointer to the terminator of buf. 329 ** NULL on error. 330 ** 331 ** Side Effects: 332 ** buf gets clobbered. 333 ** 334 ** Called By: 335 ** parse 336 ** maketemp 337 */ 338 339 char * 340 prescan(addr, buf, buflim, delim) 341 char *addr; 342 char *buf; 343 char *buflim; 344 char delim; 345 { 346 register char *p; 347 bool space; 348 bool quotemode; 349 bool bslashmode; 350 bool delimmode; 351 int cmntcnt; 352 int brccnt; 353 register char c; 354 register char *q; 355 extern char *index(); 356 357 space = FALSE; 358 delimmode = TRUE; 359 q = buf; 360 bslashmode = quotemode = FALSE; 361 cmntcnt = brccnt = 0; 362 for (p = addr; (c = *p++) != '\0'; ) 363 { 364 /* chew up special characters */ 365 *q = '\0'; 366 if (bslashmode) 367 { 368 c |= 0200; 369 bslashmode = FALSE; 370 } 371 else if (c == '"') 372 quotemode = !quotemode; 373 else if (c == '\\') 374 { 375 bslashmode++; 376 continue; 377 } 378 else if (quotemode) 379 c |= 0200; 380 else if (c == delim) 381 break; 382 else if (c == '(') 383 { 384 cmntcnt++; 385 continue; 386 } 387 else if (c == ')') 388 { 389 if (cmntcnt <= 0) 390 { 391 usrerr("Unbalanced ')'"); 392 return (NULL); 393 } 394 else 395 { 396 cmntcnt--; 397 continue; 398 } 399 } 400 if (cmntcnt > 0) 401 continue; 402 else if (isascii(c) && isspace(c) && (space || delimmode)) 403 continue; 404 else if (c == '<') 405 { 406 if (brccnt < 0) 407 { 408 usrerr("multiple < spec"); 409 return (NULL); 410 } 411 brccnt++; 412 delimmode = TRUE; 413 space = FALSE; 414 if (brccnt == 1) 415 { 416 /* we prefer using machine readable name */ 417 q = buf; 418 *q = '\0'; 419 continue; 420 } 421 } 422 else if (c == '>') 423 { 424 if (brccnt <= 0) 425 { 426 usrerr("Unbalanced `>'"); 427 return (NULL); 428 } 429 else 430 brccnt--; 431 if (brccnt <= 0) 432 { 433 brccnt = -1; 434 continue; 435 } 436 } 437 438 /* 439 ** Turn "at" into "@", 440 ** but only if "at" is a word. 441 ** By the way, I violate the ARPANET RFC-733 442 ** standard here, by assuming that 'space' delimits 443 ** atoms. I assume that is just a mistake, since 444 ** it violates the spirit of the semantics 445 ** of the document..... 446 */ 447 448 if (delimmode && (c == 'a' || c == 'A') && 449 (p[0] == 't' || p[0] == 'T') && 450 (index(DELIMCHARS, p[1]) != NULL || p[1] <= 040)) 451 { 452 c = '@'; 453 p++; 454 } 455 456 if (delimmode = (index(DELIMCHARS, c) != NULL)) 457 space = FALSE; 458 459 /* if not a space, squirrel it away */ 460 if ((!isascii(c) || !isspace(c)) && brccnt >= 0) 461 { 462 if (q >= buflim-1) 463 { 464 usrerr("Address too long"); 465 return (NULL); 466 } 467 if (space) 468 *q++ = SPACESUB; 469 *q++ = c; 470 } 471 space = isascii(c) && isspace(c); 472 } 473 *q = '\0'; 474 if (c == '\0') 475 p--; 476 if (cmntcnt > 0) 477 usrerr("Unbalanced '('"); 478 else if (quotemode) 479 usrerr("Unbalanced '\"'"); 480 else if (brccnt > 0) 481 usrerr("Unbalanced '<'"); 482 else if (buf[0] != '\0') 483 return (p); 484 return (NULL); 485 } 486