1 # include <stdio.h>
2 # include <ctype.h>
3 # include "postbox.h"
4 
5 static char	SccsId[] = "@(#)parseaddr.c	3.6	03/09/81";
6 
7 /*
8 **  PARSE -- Parse an address
9 **
10 **	Parses an address and breaks it up into three parts: a
11 **	net to transmit the message on, the host to transmit it
12 **	to, and a user on that host.  These are loaded into an
13 **	ADDRESS header with the values squirreled away if necessary.
14 **	The "user" part may not be a real user; the process may
15 **	just reoccur on that machine.  For example, on a machine
16 **	with an arpanet connection, the address
17 **		csvax.bill@berkeley
18 **	will break up to a "user" of 'csvax.bill' and a host
19 **	of 'berkeley' -- to be transmitted over the arpanet.
20 **
21 **	Parameters:
22 **		addr -- the address to parse.
23 **		a -- a pointer to the address descriptor buffer.
24 **			If NULL, a header will be created.
25 **		copyf -- determines what shall be copied:
26 **			-1 -- don't copy anything.  The printname
27 **				(q_paddr) is just addr, and the
28 **				user & host are allocated internally
29 **				to parse.
30 **			0 -- copy out the parsed user & host, but
31 **				don't copy the printname.
32 **			+1 -- copy everything.
33 **
34 **	Returns:
35 **		A pointer to the address descriptor header (`a' if
36 **			`a' is non-NULL).
37 **		NULL on error.
38 **
39 **	Side Effects:
40 **		none
41 **
42 **	Called By:
43 **		main
44 **		sendto
45 **		alias
46 **		savemail
47 */
48 
49 # define DELIMCHARS	"$()<>@!.,;:\\\" \t\r\n"	/* word delimiters */
50 # define SPACESUB	('.'|0200)		/* substitution for <lwsp> */
51 
52 ADDRESS *
53 parse(addr, a, copyf)
54 	char *addr;
55 	register ADDRESS *a;
56 	int copyf;
57 {
58 	register char **pvp;
59 	register struct mailer *m;
60 	extern char **prescan();
61 	extern char *xalloc();
62 	extern char *newstr();
63 	extern char *strcpy();
64 	extern ADDRESS *buildaddr();
65 
66 	/*
67 	**  Initialize and prescan address.
68 	*/
69 
70 	To = addr;
71 	pvp = prescan(addr, '\0');
72 	if (pvp == NULL)
73 		return (NULL);
74 
75 	/*
76 	**  Apply rewriting rules.
77 	*/
78 
79 	rewrite(pvp);
80 
81 	/*
82 	**  See if we resolved to a real mailer.
83 	*/
84 
85 	if (pvp[0][0] != CANONNET)
86 	{
87 		setstat(EX_USAGE);
88 		usrerr("cannot resolve name");
89 		return (NULL);
90 	}
91 
92 	/*
93 	**  Build canonical address from pvp.
94 	*/
95 
96 	a = buildaddr(pvp, a);
97 	m = Mailer[a->q_mailer];
98 
99 	/*
100 	**  Make local copies of the host & user and then
101 	**  transport them out.
102 	*/
103 
104 	if (copyf > 0)
105 		a->q_paddr = newstr(addr);
106 	else
107 		a->q_paddr = addr;
108 
109 	if (copyf >= 0)
110 	{
111 		if (a->q_host != NULL)
112 			a->q_host = newstr(a->q_host);
113 		else
114 			a->q_host = "";
115 		if (a->q_user != a->q_paddr)
116 			a->q_user = newstr(a->q_user);
117 	}
118 
119 	/*
120 	**  Do UPPER->lower case mapping unless inhibited.
121 	*/
122 
123 	if (!bitset(M_HST_UPPER, m->m_flags))
124 		makelower(a->q_host);
125 	if (!bitset(M_USR_UPPER, m->m_flags))
126 		makelower(a->q_user);
127 
128 	/*
129 	**  Compute return value.
130 	*/
131 
132 # ifdef DEBUG
133 	if (Debug)
134 		printf("parse(\"%s\"): host \"%s\" user \"%s\" mailer %d\n",
135 		    addr, a->q_host, a->q_user, a->q_mailer);
136 # endif DEBUG
137 
138 	return (a);
139 }
140 /*
141 **  PRESCAN -- Prescan name and make it canonical
142 **
143 **	Scans a name and turns it into canonical form.  This involves
144 **	deleting blanks, comments (in parentheses), and turning the
145 **	word "at" into an at-sign ("@").  The name is copied as this
146 **	is done; it is legal to copy a name onto itself, since this
147 **	process can only make things smaller.
148 **
149 **	This routine knows about quoted strings and angle brackets.
150 **
151 **	There are certain subtleties to this routine.  The one that
152 **	comes to mind now is that backslashes on the ends of names
153 **	are silently stripped off; this is intentional.  The problem
154 **	is that some versions of sndmsg (like at LBL) set the kill
155 **	character to something other than @ when reading addresses;
156 **	so people type "csvax.eric\@berkeley" -- which screws up the
157 **	berknet mailer.
158 **
159 **	Parameters:
160 **		addr -- the name to chomp.
161 **		delim -- the delimiter for the address, normally
162 **			'\0' or ','; \0 is accepted in any case.
163 **			are moving in place; set buflim to high core.
164 **
165 **	Returns:
166 **		A pointer to a vector of tokens.
167 **		NULL on error.
168 **
169 **	Side Effects:
170 **		none.
171 */
172 
173 # define OPER		1
174 # define ATOM		2
175 # define EOTOK		3
176 # define QSTRING	4
177 # define SPACE		5
178 # define DOLLAR		6
179 # define GETONE		7
180 
181 char **
182 prescan(addr, delim)
183 	char *addr;
184 	char delim;
185 {
186 	register char *p;
187 	static char buf[MAXNAME+MAXATOM];
188 	static char *av[MAXATOM+1];
189 	char **avp;
190 	bool space;
191 	bool bslashmode;
192 	int cmntcnt;
193 	int brccnt;
194 	register char c;
195 	char *tok;
196 	register char *q;
197 	extern char *index();
198 	register int state;
199 	int nstate;
200 
201 	space = FALSE;
202 	q = buf;
203 	bslashmode = FALSE;
204 	cmntcnt = brccnt = 0;
205 	avp = av;
206 	state = OPER;
207 	for (p = addr; *p != '\0' && *p != delim; )
208 	{
209 		/* read a token */
210 		tok = q;
211 		while ((c = *p++) != '\0' && c != delim)
212 		{
213 			/* chew up special characters */
214 			*q = '\0';
215 			if (bslashmode)
216 			{
217 				c |= 0200;
218 				bslashmode = FALSE;
219 			}
220 			else if (c == '\\')
221 			{
222 				bslashmode = TRUE;
223 				continue;
224 			}
225 
226 			nstate = toktype(c);
227 			switch (state)
228 			{
229 			  case QSTRING:		/* in quoted string */
230 				if (c == '"')
231 					state = OPER;
232 				break;
233 
234 			  case ATOM:		/* regular atom */
235 				state = nstate;
236 				if (state != ATOM)
237 				{
238 					state = EOTOK;
239 					p--;
240 				}
241 				break;
242 
243 			  case GETONE:		/* grab one character */
244 				state = OPER;
245 				break;
246 
247 			  case EOTOK:		/* after atom or q-string */
248 				state = nstate;
249 				if (state == SPACE)
250 					continue;
251 				break;
252 
253 			  case SPACE:		/* linear white space */
254 				state = nstate;
255 				space = TRUE;
256 				continue;
257 
258 			  case OPER:		/* operator */
259 				if (nstate == SPACE)
260 					continue;
261 				state = nstate;
262 				break;
263 
264 			  case DOLLAR:		/* $- etc. */
265 				state = OPER;
266 				switch (c)
267 				{
268 				  case '$':		/* literal $ */
269 					break;
270 
271 				  case '+':		/* match anything */
272 					c = MATCHANY;
273 					state = GETONE;
274 					break;
275 
276 				  case '-':		/* match one token */
277 					c = MATCHONE;
278 					state = GETONE;
279 					break;
280 
281 				  case '#':		/* canonical net name */
282 					c = CANONNET;
283 					break;
284 
285 				  case '@':		/* canonical host name */
286 					c = CANONHOST;
287 					break;
288 
289 				  case ':':		/* canonical user name */
290 					c = CANONUSER;
291 					break;
292 
293 				  default:
294 					c = '$';
295 					state = OPER;
296 					p--;
297 					break;
298 				}
299 				break;
300 
301 			  default:
302 				syserr("prescan: unknown state %d", state);
303 			}
304 
305 			if (state == OPER)
306 				space = FALSE;
307 			else if (state == EOTOK)
308 				break;
309 			if (c == '$' && delim == '\t')
310 			{
311 				state = DOLLAR;
312 				continue;
313 			}
314 
315 			/* squirrel it away */
316 			if (q >= &buf[sizeof buf - 5])
317 			{
318 				usrerr("Address too long");
319 				return (NULL);
320 			}
321 			if (space)
322 				*q++ = SPACESUB;
323 			*q++ = c;
324 
325 			/* decide whether this represents end of token */
326 			if (state == OPER)
327 				break;
328 		}
329 		if (c == '\0' || c == delim)
330 			p--;
331 
332 		/* new token */
333 		if (tok == q)
334 			continue;
335 		*q++ = '\0';
336 
337 		c = tok[0];
338 		if (c == '(')
339 		{
340 			cmntcnt++;
341 			continue;
342 		}
343 		else if (c == ')')
344 		{
345 			if (cmntcnt <= 0)
346 			{
347 				usrerr("Unbalanced ')'");
348 				return (NULL);
349 			}
350 			else
351 			{
352 				cmntcnt--;
353 				continue;
354 			}
355 		}
356 		else if (cmntcnt > 0)
357 			continue;
358 
359 		*avp++ = tok;
360 
361 		/* we prefer <> specs */
362 		if (c == '<')
363 		{
364 			if (brccnt < 0)
365 			{
366 				usrerr("multiple < spec");
367 				return (NULL);
368 			}
369 			brccnt++;
370 			space = FALSE;
371 			if (brccnt == 1)
372 			{
373 				/* we prefer using machine readable name */
374 				q = buf;
375 				*q = '\0';
376 				avp = av;
377 				continue;
378 			}
379 		}
380 		else if (c == '>')
381 		{
382 			if (brccnt <= 0)
383 			{
384 				usrerr("Unbalanced `>'");
385 				return (NULL);
386 			}
387 			else
388 				brccnt--;
389 			if (brccnt <= 0)
390 			{
391 				brccnt = -1;
392 				continue;
393 			}
394 		}
395 
396 		/*
397 		**  Turn "at" into "@",
398 		**	but only if "at" is a word.
399 		*/
400 
401 		if (lower(tok[0]) == 'a' && lower(tok[1]) == 't' && tok[2] == '\0')
402 		{
403 			tok[0] = '@';
404 			tok[1] = '\0';
405 		}
406 	}
407 	*avp = NULL;
408 	if (cmntcnt > 0)
409 		usrerr("Unbalanced '('");
410 	else if (brccnt > 0)
411 		usrerr("Unbalanced '<'");
412 	else if (state == QSTRING)
413 		usrerr("Unbalanced '\"'");
414 	else if (av[0] != NULL)
415 		return (av);
416 	return (NULL);
417 }
418 /*
419 **  TOKTYPE -- return token type
420 **
421 **	Parameters:
422 **		c -- the character in question.
423 **
424 **	Returns:
425 **		Its type.
426 **
427 **	Side Effects:
428 **		none.
429 */
430 
431 toktype(c)
432 	register char c;
433 {
434 	if (isspace(c))
435 		return (SPACE);
436 	if (index(DELIMCHARS, c) != NULL || iscntrl(c))
437 		return (OPER);
438 	return (ATOM);
439 }
440 /*
441 **  REWRITE -- apply rewrite rules to token vector.
442 **
443 **	Parameters:
444 **		pvp -- pointer to token vector.
445 **
446 **	Returns:
447 **		none.
448 **
449 **	Side Effects:
450 **		pvp is modified.
451 */
452 
453 struct match
454 {
455 	char	**firsttok;	/* first token matched */
456 	char	**lasttok;	/* last token matched */
457 	char	name;		/* name of parameter */
458 };
459 
460 # define MAXMATCH	8	/* max params per rewrite */
461 
462 
463 rewrite(pvp)
464 	char **pvp;
465 {
466 	register char *ap;		/* address pointer */
467 	register char *rp;		/* rewrite pointer */
468 	register char **avp;		/* address vector pointer */
469 	register char **rvp;		/* rewrite vector pointer */
470 	struct rewrite *rwr;
471 	struct match mlist[MAXMATCH];
472 	char *npvp[MAXATOM+1];		/* temporary space for rebuild */
473 
474 # ifdef DEBUG
475 	if (Debug)
476 	{
477 		printf("rewrite: original pvp:\n");
478 		printav(pvp);
479 	}
480 # endif DEBUG
481 
482 	/*
483 	**  Run through the list of rewrite rules, applying
484 	**	any that match.
485 	*/
486 
487 	for (rwr = RewriteRules; rwr != NULL; )
488 	{
489 # ifdef DEBUG
490 		if (Debug)
491 		{
492 			printf("-----trying rule:\n");
493 			printav(rwr->r_lhs);
494 		}
495 # endif DEBUG
496 
497 		/* try to match on this rule */
498 		clrmatch(mlist);
499 		for (rvp = rwr->r_lhs, avp = pvp; *avp != NULL; )
500 		{
501 			ap = *avp;
502 			rp = *rvp;
503 
504 			if (rp == NULL)
505 			{
506 				/* end-of-pattern before end-of-address */
507 				goto fail;
508 			}
509 
510 			switch (*rp)
511 			{
512 			  case MATCHONE:
513 				/* match exactly one token */
514 				setmatch(mlist, rp[1], avp, avp);
515 				break;
516 
517 			  case MATCHANY:
518 				/* match any number of tokens */
519 				setmatch(mlist, rp[1], NULL, avp);
520 				break;
521 
522 			  default:
523 				/* must have exact match */
524 				/* can scribble rp & ap here safely */
525 				while (*rp != '\0' && *ap != '\0')
526 				{
527 					if (*rp++ != lower(*ap++))
528 						goto fail;
529 				}
530 				break;
531 			}
532 
533 			/* successful match on this token */
534 			avp++;
535 			rvp++;
536 			continue;
537 
538 		  fail:
539 			/* match failed -- back up */
540 			while (--rvp >= rwr->r_lhs)
541 			{
542 				rp = *rvp;
543 				if (*rp == MATCHANY)
544 					break;
545 
546 				/* can't extend match: back up everything */
547 				avp--;
548 
549 				if (*rp == MATCHONE)
550 				{
551 					/* undo binding */
552 					setmatch(mlist, rp[1], NULL, NULL);
553 				}
554 			}
555 
556 			if (rvp < rwr->r_lhs)
557 			{
558 				/* total failure to match */
559 				break;
560 			}
561 		}
562 
563 		/*
564 		**  See if we successfully matched
565 		*/
566 
567 		if (rvp >= rwr->r_lhs && *rvp == NULL)
568 		{
569 # ifdef DEBUG
570 			if (Debug)
571 			{
572 				printf("-----rule matches:\n");
573 				printav(rwr->r_rhs);
574 			}
575 # endif DEBUG
576 
577 			/* substitute */
578 			for (rvp = rwr->r_rhs, avp = npvp; *rvp != NULL; rvp++)
579 			{
580 				rp = *rvp;
581 				if (*rp == MATCHANY)
582 				{
583 					register struct match *m;
584 					register char **pp;
585 					extern struct match *findmatch();
586 
587 					m = findmatch(mlist, rp[1]);
588 					if (m != NULL)
589 					{
590 						pp = m->firsttok;
591 						do
592 						{
593 							*avp++ = *pp;
594 						} while (pp++ != m->lasttok);
595 					}
596 				}
597 				else
598 					*avp++ = rp;
599 			}
600 			*avp++ = NULL;
601 			bmove(npvp, pvp, (avp - npvp) * sizeof *avp);
602 # ifdef DEBUG
603 			if (Debug)
604 			{
605 				printf("rewritten as:\n");
606 				printav(pvp);
607 			}
608 # endif DEBUG
609 			if (pvp[0][0] == CANONNET)
610 				break;
611 		}
612 		else
613 		{
614 # ifdef DEBUG
615 			if (Debug)
616 				printf("----- rule fails\n");
617 # endif DEBUG
618 			rwr = rwr->r_next;
619 		}
620 	}
621 }
622 /*
623 **  SETMATCH -- set parameter value in match vector
624 **
625 **	Parameters:
626 **		mlist -- list of match values.
627 **		name -- the character name of this parameter.
628 **		first -- the first location of the replacement.
629 **		last -- the last location of the replacement.
630 **
631 **		If last == NULL, delete this entry.
632 **		If first == NULL, extend this entry (or add it if
633 **			it does not exist).
634 **
635 **	Returns:
636 **		nothing.
637 **
638 **	Side Effects:
639 **		munges with mlist.
640 */
641 
642 setmatch(mlist, name, first, last)
643 	struct match *mlist;
644 	char name;
645 	char **first;
646 	char **last;
647 {
648 	register struct match *m;
649 	struct match *nullm = NULL;
650 
651 	for (m = mlist; m < &mlist[MAXMATCH]; m++)
652 	{
653 		if (m->name == name)
654 			break;
655 		if (m->name == '\0')
656 			nullm = m;
657 	}
658 
659 	if (m >= &mlist[MAXMATCH])
660 		m = nullm;
661 
662 	if (last == NULL)
663 	{
664 		m->name = '\0';
665 		return;
666 	}
667 
668 	if (m->name == '\0')
669 	{
670 		if (first == NULL)
671 			m->firsttok = last;
672 		else
673 			m->firsttok = first;
674 	}
675 	m->name = name;
676 	m->lasttok = last;
677 }
678 /*
679 **  FINDMATCH -- find match in mlist
680 **
681 **	Parameters:
682 **		mlist -- list to search.
683 **		name -- name to find.
684 **
685 **	Returns:
686 **		pointer to match structure.
687 **		NULL if no match.
688 **
689 **	Side Effects:
690 **		none.
691 */
692 
693 struct match *
694 findmatch(mlist, name)
695 	struct match *mlist;
696 	char name;
697 {
698 	register struct match *m;
699 
700 	for (m = mlist; m < &mlist[MAXMATCH]; m++)
701 	{
702 		if (m->name == name)
703 			return (m);
704 	}
705 
706 	return (NULL);
707 }
708 /*
709 **  CLRMATCH -- clear match list
710 **
711 **	Parameters:
712 **		mlist -- list to clear.
713 **
714 **	Returns:
715 **		none.
716 **
717 **	Side Effects:
718 **		mlist is cleared.
719 */
720 
721 clrmatch(mlist)
722 	struct match *mlist;
723 {
724 	register struct match *m;
725 
726 	for (m = mlist; m < &mlist[MAXMATCH]; m++)
727 		m->name = '\0';
728 }
729 /*
730 **  BUILDADDR -- build address from token vector.
731 **
732 **	Parameters:
733 **		tv -- token vector.
734 **		a -- pointer to address descriptor to fill.
735 **			If NULL, one will be allocated.
736 **
737 **	Returns:
738 **		'a'
739 **
740 **	Side Effects:
741 **		fills in 'a'
742 */
743 
744 ADDRESS *
745 buildaddr(tv, a)
746 	register char **tv;
747 	register ADDRESS *a;
748 {
749 	register int i;
750 	static char buf[MAXNAME];
751 	struct mailer **mp;
752 	register struct mailer *m;
753 	extern char *xalloc();
754 
755 	if (a == NULL)
756 		a = (ADDRESS *) xalloc(sizeof *a);
757 
758 	/* figure out what net/mailer to use */
759 	if (**tv != CANONNET)
760 		syserr("buildaddr: no net");
761 	tv++;
762 	for (mp = Mailer, i = 0; (m = *mp) != NULL; m++, i++)
763 	{
764 		if (strcmp(m->m_name, *tv) == 0)
765 			break;
766 	}
767 	if (m == NULL)
768 		syserr("buildaddr: unknown net %s", *tv);
769 	a->q_mailer = i;
770 
771 	/* figure out what host (if any) */
772 	tv++;
773 	if (!bitset(M_NOHOST, m->m_flags))
774 	{
775 		if (**tv != CANONHOST)
776 			syserr("buildaddr: no host");
777 		tv++;
778 		a->q_host = *tv;
779 		tv++;
780 	}
781 	else
782 		a->q_host = NULL;
783 
784 	/* figure out the user */
785 	if (**tv != CANONUSER)
786 		syserr("buildaddr: no user");
787 	buf[0] = '\0';
788 	while (**++tv != NULL)
789 		strcat(buf, *tv);
790 	a->q_user = buf;
791 
792 	return (a);
793 }
794