1 # include "sendmail.h"
2 
3 static char	SccsId[] = "@(#)parseaddr.c	3.19	08/21/81";
4 
5 /*
6 **  PARSE -- Parse an address
7 **
8 **	Parses an address and breaks it up into three parts: a
9 **	net to transmit the message on, the host to transmit it
10 **	to, and a user on that host.  These are loaded into an
11 **	ADDRESS header with the values squirreled away if necessary.
12 **	The "user" part may not be a real user; the process may
13 **	just reoccur on that machine.  For example, on a machine
14 **	with an arpanet connection, the address
15 **		csvax.bill@berkeley
16 **	will break up to a "user" of 'csvax.bill' and a host
17 **	of 'berkeley' -- to be transmitted over the arpanet.
18 **
19 **	Parameters:
20 **		addr -- the address to parse.
21 **		a -- a pointer to the address descriptor buffer.
22 **			If NULL, a header will be created.
23 **		copyf -- determines what shall be copied:
24 **			-1 -- don't copy anything.  The printname
25 **				(q_paddr) is just addr, and the
26 **				user & host are allocated internally
27 **				to parse.
28 **			0 -- copy out the parsed user & host, but
29 **				don't copy the printname.
30 **			+1 -- copy everything.
31 **
32 **	Returns:
33 **		A pointer to the address descriptor header (`a' if
34 **			`a' is non-NULL).
35 **		NULL on error.
36 **
37 **	Side Effects:
38 **		none
39 **
40 **	Called By:
41 **		main
42 **		sendto
43 **		alias
44 **		savemail
45 */
46 
47 # define DELIMCHARS	"$()<>,;\\\"\r\n"	/* word delimiters */
48 # define SPACESUB	('.'|0200)		/* substitution for <lwsp> */
49 
50 ADDRESS *
51 parse(addr, a, copyf)
52 	char *addr;
53 	register ADDRESS *a;
54 	int copyf;
55 {
56 	register char **pvp;
57 	register struct mailer *m;
58 	extern char **prescan();
59 	extern ADDRESS *buildaddr();
60 
61 	/*
62 	**  Initialize and prescan address.
63 	*/
64 
65 	To = addr;
66 # ifdef DEBUG
67 	if (Debug)
68 		printf("\n--parse(%s)\n", addr);
69 # endif DEBUG
70 
71 	pvp = prescan(addr, '\0');
72 	if (pvp == NULL)
73 		return (NULL);
74 
75 	/*
76 	**  Apply rewriting rules.
77 	*/
78 
79 	rewrite(pvp, 0);
80 
81 	/*
82 	**  See if we resolved to a real mailer.
83 	*/
84 
85 	if (pvp[0][0] != CANONNET)
86 	{
87 		setstat(EX_USAGE);
88 		usrerr("cannot resolve name");
89 		return (NULL);
90 	}
91 
92 	/*
93 	**  Build canonical address from pvp.
94 	*/
95 
96 	a = buildaddr(pvp, a);
97 	m = Mailer[a->q_mailer];
98 
99 	/*
100 	**  Make local copies of the host & user and then
101 	**  transport them out.
102 	*/
103 
104 	if (copyf > 0)
105 		a->q_paddr = newstr(addr);
106 	else
107 		a->q_paddr = addr;
108 
109 	if (copyf >= 0)
110 	{
111 		if (a->q_host != NULL)
112 			a->q_host = newstr(a->q_host);
113 		else
114 			a->q_host = "";
115 		if (a->q_user != a->q_paddr)
116 			a->q_user = newstr(a->q_user);
117 	}
118 
119 	/*
120 	**  Do UPPER->lower case mapping unless inhibited.
121 	*/
122 
123 	if (!bitset(M_HST_UPPER, m->m_flags))
124 		makelower(a->q_host);
125 	if (!bitset(M_USR_UPPER, m->m_flags))
126 		makelower(a->q_user);
127 
128 	/*
129 	**  Compute return value.
130 	*/
131 
132 # ifdef DEBUG
133 	if (Debug)
134 		printf("parse(\"%s\"): host \"%s\" user \"%s\" mailer %d\n",
135 		    addr, a->q_host, a->q_user, a->q_mailer);
136 # endif DEBUG
137 
138 	return (a);
139 }
140 /*
141 **  PRESCAN -- Prescan name and make it canonical
142 **
143 **	Scans a name and turns it into canonical form.  This involves
144 **	deleting blanks, comments (in parentheses), and turning the
145 **	word "at" into an at-sign ("@").  The name is copied as this
146 **	is done; it is legal to copy a name onto itself, since this
147 **	process can only make things smaller.
148 **
149 **	This routine knows about quoted strings and angle brackets.
150 **
151 **	There are certain subtleties to this routine.  The one that
152 **	comes to mind now is that backslashes on the ends of names
153 **	are silently stripped off; this is intentional.  The problem
154 **	is that some versions of sndmsg (like at LBL) set the kill
155 **	character to something other than @ when reading addresses;
156 **	so people type "csvax.eric\@berkeley" -- which screws up the
157 **	berknet mailer.
158 **
159 **	Parameters:
160 **		addr -- the name to chomp.
161 **		delim -- the delimiter for the address, normally
162 **			'\0' or ','; \0 is accepted in any case.
163 **			are moving in place; set buflim to high core.
164 **
165 **	Returns:
166 **		A pointer to a vector of tokens.
167 **		NULL on error.
168 **
169 **	Side Effects:
170 **		none.
171 */
172 
173 # define OPER		1
174 # define ATOM		2
175 # define EOTOK		3
176 # define QSTRING	4
177 # define SPACE		5
178 # define DOLLAR		6
179 # define GETONE		7
180 
181 char **
182 prescan(addr, delim)
183 	char *addr;
184 	char delim;
185 {
186 	register char *p;
187 	static char buf[MAXNAME+MAXATOM];
188 	static char *av[MAXATOM+1];
189 	char **avp;
190 	bool space;
191 	bool bslashmode;
192 	int cmntcnt;
193 	int brccnt;
194 	register char c;
195 	char *tok;
196 	register char *q;
197 	register int state;
198 	int nstate;
199 	extern char lower();
200 
201 	space = FALSE;
202 	q = buf;
203 	bslashmode = FALSE;
204 	cmntcnt = brccnt = 0;
205 	avp = av;
206 	state = OPER;
207 	for (p = addr; *p != '\0' && *p != delim; )
208 	{
209 		/* read a token */
210 		tok = q;
211 		while ((c = *p++) != '\0' && c != delim)
212 		{
213 			/* chew up special characters */
214 			c &= ~0200;
215 			*q = '\0';
216 			if (bslashmode)
217 			{
218 				c |= 0200;
219 				bslashmode = FALSE;
220 			}
221 			else if (c == '\\')
222 			{
223 				bslashmode = TRUE;
224 				continue;
225 			}
226 			else if (c == '"')
227 			{
228 				if (state == QSTRING)
229 					state = OPER;
230 				else
231 					state = QSTRING;
232 				break;
233 			}
234 
235 			nstate = toktype(c);
236 			switch (state)
237 			{
238 			  case QSTRING:		/* in quoted string */
239 				break;
240 
241 			  case ATOM:		/* regular atom */
242 				state = nstate;
243 				if (state != ATOM)
244 				{
245 					state = EOTOK;
246 					p--;
247 				}
248 				break;
249 
250 			  case GETONE:		/* grab one character */
251 				state = OPER;
252 				break;
253 
254 			  case EOTOK:		/* after atom or q-string */
255 				state = nstate;
256 				if (state == SPACE)
257 					continue;
258 				break;
259 
260 			  case SPACE:		/* linear white space */
261 				state = nstate;
262 				space = TRUE;
263 				continue;
264 
265 			  case OPER:		/* operator */
266 				if (nstate == SPACE)
267 					continue;
268 				state = nstate;
269 				break;
270 
271 			  case DOLLAR:		/* $- etc. */
272 				state = OPER;
273 				switch (c)
274 				{
275 				  case '$':		/* literal $ */
276 					break;
277 
278 				  case '+':		/* match anything */
279 					c = MATCHANY;
280 					state = GETONE;
281 					break;
282 
283 				  case '-':		/* match one token */
284 					c = MATCHONE;
285 					state = GETONE;
286 					break;
287 
288 				  case '=':		/* match one token of class */
289 					c = MATCHCLASS;
290 					state = GETONE;
291 					break;
292 
293 				  case '#':		/* canonical net name */
294 					c = CANONNET;
295 					break;
296 
297 				  case '@':		/* canonical host name */
298 					c = CANONHOST;
299 					break;
300 
301 				  case ':':		/* canonical user name */
302 					c = CANONUSER;
303 					break;
304 
305 				  default:
306 					c = '$';
307 					state = OPER;
308 					p--;
309 					break;
310 				}
311 				break;
312 
313 			  default:
314 				syserr("prescan: unknown state %d", state);
315 			}
316 
317 			if (state == OPER)
318 				space = FALSE;
319 			else if (state == EOTOK)
320 				break;
321 			if (c == '$' && delim == '\t')
322 			{
323 				state = DOLLAR;
324 				continue;
325 			}
326 
327 			/* squirrel it away */
328 			if (q >= &buf[sizeof buf - 5])
329 			{
330 				usrerr("Address too long");
331 				return (NULL);
332 			}
333 			if (space)
334 				*q++ = SPACESUB;
335 			*q++ = c;
336 
337 			/* decide whether this represents end of token */
338 			if (state == OPER)
339 				break;
340 		}
341 		if (c == '\0' || c == delim)
342 			p--;
343 
344 		/* new token */
345 		if (tok == q)
346 			continue;
347 		*q++ = '\0';
348 
349 		c = tok[0];
350 		if (c == '(')
351 		{
352 			cmntcnt++;
353 			continue;
354 		}
355 		else if (c == ')')
356 		{
357 			if (cmntcnt <= 0)
358 			{
359 				usrerr("Unbalanced ')'");
360 				return (NULL);
361 			}
362 			else
363 			{
364 				cmntcnt--;
365 				continue;
366 			}
367 		}
368 		else if (cmntcnt > 0)
369 			continue;
370 
371 		*avp++ = tok;
372 
373 		/* we prefer <> specs */
374 		if (c == '<')
375 		{
376 			if (brccnt < 0)
377 			{
378 				usrerr("multiple < spec");
379 				return (NULL);
380 			}
381 			brccnt++;
382 			space = FALSE;
383 			if (brccnt == 1)
384 			{
385 				/* we prefer using machine readable name */
386 				q = buf;
387 				*q = '\0';
388 				avp = av;
389 				continue;
390 			}
391 		}
392 		else if (c == '>')
393 		{
394 			if (brccnt <= 0)
395 			{
396 				usrerr("Unbalanced `>'");
397 				return (NULL);
398 			}
399 			else
400 				brccnt--;
401 			if (brccnt <= 0)
402 			{
403 				brccnt = -1;
404 				continue;
405 			}
406 		}
407 
408 		/*
409 		**  Turn "at" into "@",
410 		**	but only if "at" is a word.
411 		*/
412 
413 		if (lower(tok[0]) == 'a' && lower(tok[1]) == 't' && tok[2] == '\0')
414 		{
415 			tok[0] = '@';
416 			tok[1] = '\0';
417 		}
418 	}
419 	*avp = NULL;
420 	if (cmntcnt > 0)
421 		usrerr("Unbalanced '('");
422 	else if (brccnt > 0)
423 		usrerr("Unbalanced '<'");
424 	else if (state == QSTRING)
425 		usrerr("Unbalanced '\"'");
426 	else if (av[0] != NULL)
427 		return (av);
428 	return (NULL);
429 }
430 /*
431 **  TOKTYPE -- return token type
432 **
433 **	Parameters:
434 **		c -- the character in question.
435 **
436 **	Returns:
437 **		Its type.
438 **
439 **	Side Effects:
440 **		none.
441 */
442 
443 toktype(c)
444 	register char c;
445 {
446 	static char buf[50];
447 	static bool firstime = TRUE;
448 
449 	if (firstime)
450 	{
451 		firstime = FALSE;
452 		(void) expand("$o", buf, &buf[sizeof buf - 1]);
453 		strcat(buf, DELIMCHARS);
454 	}
455 	if (!isascii(c))
456 		return (ATOM);
457 	if (isspace(c))
458 		return (SPACE);
459 	if (iscntrl(c) || index(buf, c) != NULL)
460 		return (OPER);
461 	return (ATOM);
462 }
463 /*
464 **  REWRITE -- apply rewrite rules to token vector.
465 **
466 **	Parameters:
467 **		pvp -- pointer to token vector.
468 **
469 **	Returns:
470 **		none.
471 **
472 **	Side Effects:
473 **		pvp is modified.
474 */
475 
476 struct match
477 {
478 	char	**firsttok;	/* first token matched */
479 	char	**lasttok;	/* last token matched */
480 	char	name;		/* name of parameter */
481 };
482 
483 # define MAXMATCH	8	/* max params per rewrite */
484 
485 
486 rewrite(pvp, ruleset)
487 	char **pvp;
488 	int ruleset;
489 {
490 	register char *ap;		/* address pointer */
491 	register char *rp;		/* rewrite pointer */
492 	register char **avp;		/* address vector pointer */
493 	register char **rvp;		/* rewrite vector pointer */
494 	struct rewrite *rwr;
495 	struct match mlist[MAXMATCH];
496 	char *npvp[MAXATOM+1];		/* temporary space for rebuild */
497 	extern bool sameword();
498 
499 # ifdef DEBUG
500 	if (Debug > 10)
501 	{
502 		printf("rewrite: original pvp:\n");
503 		printav(pvp);
504 	}
505 # endif DEBUG
506 
507 	/*
508 	**  Run through the list of rewrite rules, applying
509 	**	any that match.
510 	*/
511 
512 	for (rwr = RewriteRules[ruleset]; rwr != NULL; )
513 	{
514 # ifdef DEBUG
515 		if (Debug > 10)
516 		{
517 			printf("-----trying rule:\n");
518 			printav(rwr->r_lhs);
519 		}
520 # endif DEBUG
521 
522 		/* try to match on this rule */
523 		clrmatch(mlist);
524 		for (rvp = rwr->r_lhs, avp = pvp; *avp != NULL; )
525 		{
526 			ap = *avp;
527 			rp = *rvp;
528 
529 			if (rp == NULL)
530 			{
531 				/* end-of-pattern before end-of-address */
532 				goto fail;
533 			}
534 
535 			switch (*rp)
536 			{
537 				register STAB *s;
538 				register int class;
539 
540 			  case MATCHONE:
541 				/* match exactly one token */
542 				setmatch(mlist, rp[1], avp, avp);
543 				break;
544 
545 			  case MATCHANY:
546 				/* match any number of tokens */
547 				setmatch(mlist, rp[1], (char **) NULL, avp);
548 				break;
549 
550 			  case MATCHCLASS:
551 				/* match any token in a class */
552 				class = rp[1];
553 				if (!isalpha(class))
554 					goto fail;
555 				if (isupper(class))
556 					class -= 'A';
557 				else
558 					class -= 'a';
559 				s = stab(ap, ST_CLASS, ST_FIND);
560 				if (s == NULL || (s->s_class & (1 << class)) == 0)
561 					goto fail;
562 				break;
563 
564 			  default:
565 				/* must have exact match */
566 				if (!sameword(rp, ap))
567 					goto fail;
568 				break;
569 			}
570 
571 			/* successful match on this token */
572 			avp++;
573 			rvp++;
574 			continue;
575 
576 		  fail:
577 			/* match failed -- back up */
578 			while (--rvp >= rwr->r_lhs)
579 			{
580 				rp = *rvp;
581 				if (*rp == MATCHANY)
582 					break;
583 
584 				/* can't extend match: back up everything */
585 				avp--;
586 
587 				if (*rp == MATCHONE)
588 				{
589 					/* undo binding */
590 					setmatch(mlist, rp[1], (char **) NULL, (char **) NULL);
591 				}
592 			}
593 
594 			if (rvp < rwr->r_lhs)
595 			{
596 				/* total failure to match */
597 				break;
598 			}
599 		}
600 
601 		/*
602 		**  See if we successfully matched
603 		*/
604 
605 		if (rvp >= rwr->r_lhs && *rvp == NULL)
606 		{
607 # ifdef DEBUG
608 			if (Debug > 10)
609 			{
610 				printf("-----rule matches:\n");
611 				printav(rwr->r_rhs);
612 			}
613 # endif DEBUG
614 
615 			/* substitute */
616 			for (rvp = rwr->r_rhs, avp = npvp; *rvp != NULL; rvp++)
617 			{
618 				rp = *rvp;
619 				if (*rp == MATCHANY)
620 				{
621 					register struct match *m;
622 					register char **pp;
623 					extern struct match *findmatch();
624 
625 					m = findmatch(mlist, rp[1]);
626 					if (m != NULL)
627 					{
628 						pp = m->firsttok;
629 						do
630 						{
631 							*avp++ = *pp;
632 						} while (pp++ != m->lasttok);
633 					}
634 				}
635 				else
636 					*avp++ = rp;
637 			}
638 			*avp++ = NULL;
639 			bmove((char *) npvp, (char *) pvp, (avp - npvp) * sizeof *avp);
640 # ifdef DEBUG
641 			if (Debug)
642 			{
643 				char **vp;
644 
645 				printf("rewritten as `");
646 				for (vp = pvp; *vp != NULL; vp++)
647 					xputs(*vp);
648 				printf("'\n");
649 			}
650 # endif DEBUG
651 			if (pvp[0][0] == CANONNET)
652 				break;
653 		}
654 		else
655 		{
656 # ifdef DEBUG
657 			if (Debug > 10)
658 				printf("----- rule fails\n");
659 # endif DEBUG
660 			rwr = rwr->r_next;
661 		}
662 	}
663 }
664 /*
665 **  SETMATCH -- set parameter value in match vector
666 **
667 **	Parameters:
668 **		mlist -- list of match values.
669 **		name -- the character name of this parameter.
670 **		first -- the first location of the replacement.
671 **		last -- the last location of the replacement.
672 **
673 **		If last == NULL, delete this entry.
674 **		If first == NULL, extend this entry (or add it if
675 **			it does not exist).
676 **
677 **	Returns:
678 **		nothing.
679 **
680 **	Side Effects:
681 **		munges with mlist.
682 */
683 
684 setmatch(mlist, name, first, last)
685 	struct match *mlist;
686 	char name;
687 	char **first;
688 	char **last;
689 {
690 	register struct match *m;
691 	struct match *nullm = NULL;
692 
693 	for (m = mlist; m < &mlist[MAXMATCH]; m++)
694 	{
695 		if (m->name == name)
696 			break;
697 		if (m->name == '\0')
698 			nullm = m;
699 	}
700 
701 	if (m >= &mlist[MAXMATCH])
702 		m = nullm;
703 
704 	if (last == NULL)
705 	{
706 		m->name = '\0';
707 		return;
708 	}
709 
710 	if (m->name == '\0')
711 	{
712 		if (first == NULL)
713 			m->firsttok = last;
714 		else
715 			m->firsttok = first;
716 	}
717 	m->name = name;
718 	m->lasttok = last;
719 }
720 /*
721 **  FINDMATCH -- find match in mlist
722 **
723 **	Parameters:
724 **		mlist -- list to search.
725 **		name -- name to find.
726 **
727 **	Returns:
728 **		pointer to match structure.
729 **		NULL if no match.
730 **
731 **	Side Effects:
732 **		none.
733 */
734 
735 struct match *
736 findmatch(mlist, name)
737 	struct match *mlist;
738 	char name;
739 {
740 	register struct match *m;
741 
742 	for (m = mlist; m < &mlist[MAXMATCH]; m++)
743 	{
744 		if (m->name == name)
745 			return (m);
746 	}
747 
748 	return (NULL);
749 }
750 /*
751 **  CLRMATCH -- clear match list
752 **
753 **	Parameters:
754 **		mlist -- list to clear.
755 **
756 **	Returns:
757 **		none.
758 **
759 **	Side Effects:
760 **		mlist is cleared.
761 */
762 
763 clrmatch(mlist)
764 	struct match *mlist;
765 {
766 	register struct match *m;
767 
768 	for (m = mlist; m < &mlist[MAXMATCH]; m++)
769 		m->name = '\0';
770 }
771 /*
772 **  BUILDADDR -- build address from token vector.
773 **
774 **	Parameters:
775 **		tv -- token vector.
776 **		a -- pointer to address descriptor to fill.
777 **			If NULL, one will be allocated.
778 **
779 **	Returns:
780 **		'a'
781 **
782 **	Side Effects:
783 **		fills in 'a'
784 */
785 
786 ADDRESS *
787 buildaddr(tv, a)
788 	register char **tv;
789 	register ADDRESS *a;
790 {
791 	register int i;
792 	static char buf[MAXNAME];
793 	struct mailer **mp;
794 	register struct mailer *m;
795 
796 	if (a == NULL)
797 		a = (ADDRESS *) xalloc(sizeof *a);
798 	a->q_flags = 0;
799 	a->q_home = NULL;
800 
801 	/* figure out what net/mailer to use */
802 	if (**tv != CANONNET)
803 		syserr("buildaddr: no net");
804 	tv++;
805 	for (mp = Mailer, i = 0; (m = *mp++) != NULL; i++)
806 	{
807 		if (strcmp(m->m_name, *tv) == 0)
808 			break;
809 	}
810 	if (m == NULL)
811 		syserr("buildaddr: unknown net %s", *tv);
812 	a->q_mailer = i;
813 
814 	/* figure out what host (if any) */
815 	tv++;
816 	if (!bitset(M_LOCAL, m->m_flags))
817 	{
818 		if (**tv != CANONHOST)
819 			syserr("buildaddr: no host");
820 		tv++;
821 		a->q_host = *tv;
822 		tv++;
823 	}
824 	else
825 		a->q_host = NULL;
826 
827 	/* figure out the user */
828 	if (**tv != CANONUSER)
829 		syserr("buildaddr: no user");
830 	buf[0] = '\0';
831 	while (**++tv != NULL)
832 		(void) strcat(buf, *tv);
833 	a->q_user = buf;
834 
835 	return (a);
836 }
837 /*
838 **  SAMEADDR -- Determine if two addresses are the same
839 **
840 **	This is not just a straight comparison -- if the mailer doesn't
841 **	care about the host we just ignore it, etc.
842 **
843 **	Parameters:
844 **		a, b -- pointers to the internal forms to compare.
845 **		wildflg -- if TRUE, 'a' may have no user specified,
846 **			in which case it is to match anything.
847 **
848 **	Returns:
849 **		TRUE -- they represent the same mailbox.
850 **		FALSE -- they don't.
851 **
852 **	Side Effects:
853 **		none.
854 */
855 
856 bool
857 sameaddr(a, b, wildflg)
858 	register ADDRESS *a;
859 	register ADDRESS *b;
860 	bool wildflg;
861 {
862 	/* if they don't have the same mailer, forget it */
863 	if (a->q_mailer != b->q_mailer)
864 		return (FALSE);
865 
866 	/* if the user isn't the same, we can drop out */
867 	if ((!wildflg || a->q_user[0] != '\0') && strcmp(a->q_user, b->q_user) != 0)
868 		return (FALSE);
869 
870 	/* if the mailer ignores hosts, we have succeeded! */
871 	if (bitset(M_LOCAL, Mailer[a->q_mailer]->m_flags))
872 		return (TRUE);
873 
874 	/* otherwise compare hosts (but be careful for NULL ptrs) */
875 	if (a->q_host == NULL || b->q_host == NULL)
876 		return (FALSE);
877 	if (strcmp(a->q_host, b->q_host) != 0)
878 		return (FALSE);
879 
880 	return (TRUE);
881 }
882 /*
883 **  PRINTADDR -- print address (for debugging)
884 **
885 **	Parameters:
886 **		a -- the address to print
887 **		follow -- follow the q_next chain.
888 **
889 **	Returns:
890 **		none.
891 **
892 **	Side Effects:
893 **		none.
894 */
895 
896 printaddr(a, follow)
897 	register ADDRESS *a;
898 	bool follow;
899 {
900 	while (a != NULL)
901 	{
902 		printf("addr@%x: ", a);
903 		(void) fflush(stdout);
904 		printf("%s: mailer %d (%s), host `%s', user `%s'\n", a->q_paddr,
905 		       a->q_mailer, Mailer[a->q_mailer]->m_name, a->q_host, a->q_user);
906 		printf("\tnext=%x flags=%o, rmailer %d\n", a->q_next,
907 		       a->q_flags, a->q_rmailer);
908 
909 		if (!follow)
910 			return;
911 		a = a->q_next;
912 	}
913 }
914