1 # include <stdio.h>
2 # include <ctype.h>
3 # include "sendmail.h"
4 
5 static char	SccsId[] = "@(#)parseaddr.c	3.17	08/10/81";
6 
7 /*
8 **  PARSE -- Parse an address
9 **
10 **	Parses an address and breaks it up into three parts: a
11 **	net to transmit the message on, the host to transmit it
12 **	to, and a user on that host.  These are loaded into an
13 **	ADDRESS header with the values squirreled away if necessary.
14 **	The "user" part may not be a real user; the process may
15 **	just reoccur on that machine.  For example, on a machine
16 **	with an arpanet connection, the address
17 **		csvax.bill@berkeley
18 **	will break up to a "user" of 'csvax.bill' and a host
19 **	of 'berkeley' -- to be transmitted over the arpanet.
20 **
21 **	Parameters:
22 **		addr -- the address to parse.
23 **		a -- a pointer to the address descriptor buffer.
24 **			If NULL, a header will be created.
25 **		copyf -- determines what shall be copied:
26 **			-1 -- don't copy anything.  The printname
27 **				(q_paddr) is just addr, and the
28 **				user & host are allocated internally
29 **				to parse.
30 **			0 -- copy out the parsed user & host, but
31 **				don't copy the printname.
32 **			+1 -- copy everything.
33 **
34 **	Returns:
35 **		A pointer to the address descriptor header (`a' if
36 **			`a' is non-NULL).
37 **		NULL on error.
38 **
39 **	Side Effects:
40 **		none
41 **
42 **	Called By:
43 **		main
44 **		sendto
45 **		alias
46 **		savemail
47 */
48 
49 # define DELIMCHARS	"$()<>,;\\\"\r\n"	/* word delimiters */
50 # define SPACESUB	('.'|0200)		/* substitution for <lwsp> */
51 
52 ADDRESS *
53 parse(addr, a, copyf)
54 	char *addr;
55 	register ADDRESS *a;
56 	int copyf;
57 {
58 	register char **pvp;
59 	register struct mailer *m;
60 	extern char **prescan();
61 	extern ADDRESS *buildaddr();
62 
63 	/*
64 	**  Initialize and prescan address.
65 	*/
66 
67 	To = addr;
68 # ifdef DEBUG
69 	if (Debug)
70 		printf("\n--parse(%s)\n", addr);
71 # endif DEBUG
72 
73 	pvp = prescan(addr, '\0');
74 	if (pvp == NULL)
75 		return (NULL);
76 
77 	/*
78 	**  Apply rewriting rules.
79 	*/
80 
81 	rewrite(pvp, 0);
82 
83 	/*
84 	**  See if we resolved to a real mailer.
85 	*/
86 
87 	if (pvp[0][0] != CANONNET)
88 	{
89 		setstat(EX_USAGE);
90 		usrerr("cannot resolve name");
91 		return (NULL);
92 	}
93 
94 	/*
95 	**  Build canonical address from pvp.
96 	*/
97 
98 	a = buildaddr(pvp, a);
99 	m = Mailer[a->q_mailer];
100 
101 	/*
102 	**  Make local copies of the host & user and then
103 	**  transport them out.
104 	*/
105 
106 	if (copyf > 0)
107 		a->q_paddr = newstr(addr);
108 	else
109 		a->q_paddr = addr;
110 
111 	if (copyf >= 0)
112 	{
113 		if (a->q_host != NULL)
114 			a->q_host = newstr(a->q_host);
115 		else
116 			a->q_host = "";
117 		if (a->q_user != a->q_paddr)
118 			a->q_user = newstr(a->q_user);
119 	}
120 
121 	/*
122 	**  Do UPPER->lower case mapping unless inhibited.
123 	*/
124 
125 	if (!bitset(M_HST_UPPER, m->m_flags))
126 		makelower(a->q_host);
127 	if (!bitset(M_USR_UPPER, m->m_flags))
128 		makelower(a->q_user);
129 
130 	/*
131 	**  Compute return value.
132 	*/
133 
134 # ifdef DEBUG
135 	if (Debug)
136 		printf("parse(\"%s\"): host \"%s\" user \"%s\" mailer %d\n",
137 		    addr, a->q_host, a->q_user, a->q_mailer);
138 # endif DEBUG
139 
140 	return (a);
141 }
142 /*
143 **  PRESCAN -- Prescan name and make it canonical
144 **
145 **	Scans a name and turns it into canonical form.  This involves
146 **	deleting blanks, comments (in parentheses), and turning the
147 **	word "at" into an at-sign ("@").  The name is copied as this
148 **	is done; it is legal to copy a name onto itself, since this
149 **	process can only make things smaller.
150 **
151 **	This routine knows about quoted strings and angle brackets.
152 **
153 **	There are certain subtleties to this routine.  The one that
154 **	comes to mind now is that backslashes on the ends of names
155 **	are silently stripped off; this is intentional.  The problem
156 **	is that some versions of sndmsg (like at LBL) set the kill
157 **	character to something other than @ when reading addresses;
158 **	so people type "csvax.eric\@berkeley" -- which screws up the
159 **	berknet mailer.
160 **
161 **	Parameters:
162 **		addr -- the name to chomp.
163 **		delim -- the delimiter for the address, normally
164 **			'\0' or ','; \0 is accepted in any case.
165 **			are moving in place; set buflim to high core.
166 **
167 **	Returns:
168 **		A pointer to a vector of tokens.
169 **		NULL on error.
170 **
171 **	Side Effects:
172 **		none.
173 */
174 
175 # define OPER		1
176 # define ATOM		2
177 # define EOTOK		3
178 # define QSTRING	4
179 # define SPACE		5
180 # define DOLLAR		6
181 # define GETONE		7
182 
183 char **
184 prescan(addr, delim)
185 	char *addr;
186 	char delim;
187 {
188 	register char *p;
189 	static char buf[MAXNAME+MAXATOM];
190 	static char *av[MAXATOM+1];
191 	char **avp;
192 	bool space;
193 	bool bslashmode;
194 	int cmntcnt;
195 	int brccnt;
196 	register char c;
197 	char *tok;
198 	register char *q;
199 	register int state;
200 	int nstate;
201 	extern char lower();
202 
203 	space = FALSE;
204 	q = buf;
205 	bslashmode = FALSE;
206 	cmntcnt = brccnt = 0;
207 	avp = av;
208 	state = OPER;
209 	for (p = addr; *p != '\0' && *p != delim; )
210 	{
211 		/* read a token */
212 		tok = q;
213 		while ((c = *p++) != '\0' && c != delim)
214 		{
215 			/* chew up special characters */
216 			c &= ~0200;
217 			*q = '\0';
218 			if (bslashmode)
219 			{
220 				c |= 0200;
221 				bslashmode = FALSE;
222 			}
223 			else if (c == '\\')
224 			{
225 				bslashmode = TRUE;
226 				continue;
227 			}
228 			else if (c == '"')
229 			{
230 				if (state == QSTRING)
231 					state = OPER;
232 				else
233 					state = QSTRING;
234 				break;
235 			}
236 
237 			nstate = toktype(c);
238 			switch (state)
239 			{
240 			  case QSTRING:		/* in quoted string */
241 				break;
242 
243 			  case ATOM:		/* regular atom */
244 				state = nstate;
245 				if (state != ATOM)
246 				{
247 					state = EOTOK;
248 					p--;
249 				}
250 				break;
251 
252 			  case GETONE:		/* grab one character */
253 				state = OPER;
254 				break;
255 
256 			  case EOTOK:		/* after atom or q-string */
257 				state = nstate;
258 				if (state == SPACE)
259 					continue;
260 				break;
261 
262 			  case SPACE:		/* linear white space */
263 				state = nstate;
264 				space = TRUE;
265 				continue;
266 
267 			  case OPER:		/* operator */
268 				if (nstate == SPACE)
269 					continue;
270 				state = nstate;
271 				break;
272 
273 			  case DOLLAR:		/* $- etc. */
274 				state = OPER;
275 				switch (c)
276 				{
277 				  case '$':		/* literal $ */
278 					break;
279 
280 				  case '+':		/* match anything */
281 					c = MATCHANY;
282 					state = GETONE;
283 					break;
284 
285 				  case '-':		/* match one token */
286 					c = MATCHONE;
287 					state = GETONE;
288 					break;
289 
290 				  case '=':		/* match one token of class */
291 					c = MATCHCLASS;
292 					state = GETONE;
293 					break;
294 
295 				  case '#':		/* canonical net name */
296 					c = CANONNET;
297 					break;
298 
299 				  case '@':		/* canonical host name */
300 					c = CANONHOST;
301 					break;
302 
303 				  case ':':		/* canonical user name */
304 					c = CANONUSER;
305 					break;
306 
307 				  default:
308 					c = '$';
309 					state = OPER;
310 					p--;
311 					break;
312 				}
313 				break;
314 
315 			  default:
316 				syserr("prescan: unknown state %d", state);
317 			}
318 
319 			if (state == OPER)
320 				space = FALSE;
321 			else if (state == EOTOK)
322 				break;
323 			if (c == '$' && delim == '\t')
324 			{
325 				state = DOLLAR;
326 				continue;
327 			}
328 
329 			/* squirrel it away */
330 			if (q >= &buf[sizeof buf - 5])
331 			{
332 				usrerr("Address too long");
333 				return (NULL);
334 			}
335 			if (space)
336 				*q++ = SPACESUB;
337 			*q++ = c;
338 
339 			/* decide whether this represents end of token */
340 			if (state == OPER)
341 				break;
342 		}
343 		if (c == '\0' || c == delim)
344 			p--;
345 
346 		/* new token */
347 		if (tok == q)
348 			continue;
349 		*q++ = '\0';
350 
351 		c = tok[0];
352 		if (c == '(')
353 		{
354 			cmntcnt++;
355 			continue;
356 		}
357 		else if (c == ')')
358 		{
359 			if (cmntcnt <= 0)
360 			{
361 				usrerr("Unbalanced ')'");
362 				return (NULL);
363 			}
364 			else
365 			{
366 				cmntcnt--;
367 				continue;
368 			}
369 		}
370 		else if (cmntcnt > 0)
371 			continue;
372 
373 		*avp++ = tok;
374 
375 		/* we prefer <> specs */
376 		if (c == '<')
377 		{
378 			if (brccnt < 0)
379 			{
380 				usrerr("multiple < spec");
381 				return (NULL);
382 			}
383 			brccnt++;
384 			space = FALSE;
385 			if (brccnt == 1)
386 			{
387 				/* we prefer using machine readable name */
388 				q = buf;
389 				*q = '\0';
390 				avp = av;
391 				continue;
392 			}
393 		}
394 		else if (c == '>')
395 		{
396 			if (brccnt <= 0)
397 			{
398 				usrerr("Unbalanced `>'");
399 				return (NULL);
400 			}
401 			else
402 				brccnt--;
403 			if (brccnt <= 0)
404 			{
405 				brccnt = -1;
406 				continue;
407 			}
408 		}
409 
410 		/*
411 		**  Turn "at" into "@",
412 		**	but only if "at" is a word.
413 		*/
414 
415 		if (lower(tok[0]) == 'a' && lower(tok[1]) == 't' && tok[2] == '\0')
416 		{
417 			tok[0] = '@';
418 			tok[1] = '\0';
419 		}
420 	}
421 	*avp = NULL;
422 	if (cmntcnt > 0)
423 		usrerr("Unbalanced '('");
424 	else if (brccnt > 0)
425 		usrerr("Unbalanced '<'");
426 	else if (state == QSTRING)
427 		usrerr("Unbalanced '\"'");
428 	else if (av[0] != NULL)
429 		return (av);
430 	return (NULL);
431 }
432 /*
433 **  TOKTYPE -- return token type
434 **
435 **	Parameters:
436 **		c -- the character in question.
437 **
438 **	Returns:
439 **		Its type.
440 **
441 **	Side Effects:
442 **		none.
443 */
444 
445 toktype(c)
446 	register char c;
447 {
448 	static char buf[50];
449 	static bool firstime = TRUE;
450 
451 	if (firstime)
452 	{
453 		firstime = FALSE;
454 		(void) expand("$o", buf, &buf[sizeof buf - 1]);
455 		strcat(buf, DELIMCHARS);
456 	}
457 	if (!isascii(c))
458 		return (ATOM);
459 	if (isspace(c))
460 		return (SPACE);
461 	if (iscntrl(c) || index(buf, c) != NULL)
462 		return (OPER);
463 	return (ATOM);
464 }
465 /*
466 **  REWRITE -- apply rewrite rules to token vector.
467 **
468 **	Parameters:
469 **		pvp -- pointer to token vector.
470 **
471 **	Returns:
472 **		none.
473 **
474 **	Side Effects:
475 **		pvp is modified.
476 */
477 
478 struct match
479 {
480 	char	**firsttok;	/* first token matched */
481 	char	**lasttok;	/* last token matched */
482 	char	name;		/* name of parameter */
483 };
484 
485 # define MAXMATCH	8	/* max params per rewrite */
486 
487 
488 rewrite(pvp, ruleset)
489 	char **pvp;
490 	int ruleset;
491 {
492 	register char *ap;		/* address pointer */
493 	register char *rp;		/* rewrite pointer */
494 	register char **avp;		/* address vector pointer */
495 	register char **rvp;		/* rewrite vector pointer */
496 	struct rewrite *rwr;
497 	struct match mlist[MAXMATCH];
498 	char *npvp[MAXATOM+1];		/* temporary space for rebuild */
499 	extern bool sameword();
500 
501 # ifdef DEBUG
502 	if (Debug > 10)
503 	{
504 		printf("rewrite: original pvp:\n");
505 		printav(pvp);
506 	}
507 # endif DEBUG
508 
509 	/*
510 	**  Run through the list of rewrite rules, applying
511 	**	any that match.
512 	*/
513 
514 	for (rwr = RewriteRules[ruleset]; rwr != NULL; )
515 	{
516 # ifdef DEBUG
517 		if (Debug > 10)
518 		{
519 			printf("-----trying rule:\n");
520 			printav(rwr->r_lhs);
521 		}
522 # endif DEBUG
523 
524 		/* try to match on this rule */
525 		clrmatch(mlist);
526 		for (rvp = rwr->r_lhs, avp = pvp; *avp != NULL; )
527 		{
528 			ap = *avp;
529 			rp = *rvp;
530 
531 			if (rp == NULL)
532 			{
533 				/* end-of-pattern before end-of-address */
534 				goto fail;
535 			}
536 
537 			switch (*rp)
538 			{
539 				register STAB *s;
540 				register int class;
541 
542 			  case MATCHONE:
543 				/* match exactly one token */
544 				setmatch(mlist, rp[1], avp, avp);
545 				break;
546 
547 			  case MATCHANY:
548 				/* match any number of tokens */
549 				setmatch(mlist, rp[1], (char **) NULL, avp);
550 				break;
551 
552 			  case MATCHCLASS:
553 				/* match any token in a class */
554 				class = rp[1];
555 				if (!isalpha(class))
556 					goto fail;
557 				if (isupper(class))
558 					class -= 'A';
559 				else
560 					class -= 'a';
561 				s = stab(ap, ST_CLASS, ST_FIND);
562 				if (s == NULL || (s->s_class & (1 << class)) == 0)
563 					goto fail;
564 				break;
565 
566 			  default:
567 				/* must have exact match */
568 				if (!sameword(rp, ap))
569 					goto fail;
570 				break;
571 			}
572 
573 			/* successful match on this token */
574 			avp++;
575 			rvp++;
576 			continue;
577 
578 		  fail:
579 			/* match failed -- back up */
580 			while (--rvp >= rwr->r_lhs)
581 			{
582 				rp = *rvp;
583 				if (*rp == MATCHANY)
584 					break;
585 
586 				/* can't extend match: back up everything */
587 				avp--;
588 
589 				if (*rp == MATCHONE)
590 				{
591 					/* undo binding */
592 					setmatch(mlist, rp[1], (char **) NULL, (char **) NULL);
593 				}
594 			}
595 
596 			if (rvp < rwr->r_lhs)
597 			{
598 				/* total failure to match */
599 				break;
600 			}
601 		}
602 
603 		/*
604 		**  See if we successfully matched
605 		*/
606 
607 		if (rvp >= rwr->r_lhs && *rvp == NULL)
608 		{
609 # ifdef DEBUG
610 			if (Debug > 10)
611 			{
612 				printf("-----rule matches:\n");
613 				printav(rwr->r_rhs);
614 			}
615 # endif DEBUG
616 
617 			/* substitute */
618 			for (rvp = rwr->r_rhs, avp = npvp; *rvp != NULL; rvp++)
619 			{
620 				rp = *rvp;
621 				if (*rp == MATCHANY)
622 				{
623 					register struct match *m;
624 					register char **pp;
625 					extern struct match *findmatch();
626 
627 					m = findmatch(mlist, rp[1]);
628 					if (m != NULL)
629 					{
630 						pp = m->firsttok;
631 						do
632 						{
633 							*avp++ = *pp;
634 						} while (pp++ != m->lasttok);
635 					}
636 				}
637 				else
638 					*avp++ = rp;
639 			}
640 			*avp++ = NULL;
641 			bmove((char *) npvp, (char *) pvp, (avp - npvp) * sizeof *avp);
642 # ifdef DEBUG
643 			if (Debug)
644 			{
645 				char **vp;
646 
647 				printf("rewritten as `");
648 				for (vp = pvp; *vp != NULL; vp++)
649 					xputs(*vp);
650 				printf("'\n");
651 			}
652 # endif DEBUG
653 			if (pvp[0][0] == CANONNET)
654 				break;
655 		}
656 		else
657 		{
658 # ifdef DEBUG
659 			if (Debug > 10)
660 				printf("----- rule fails\n");
661 # endif DEBUG
662 			rwr = rwr->r_next;
663 		}
664 	}
665 }
666 /*
667 **  SETMATCH -- set parameter value in match vector
668 **
669 **	Parameters:
670 **		mlist -- list of match values.
671 **		name -- the character name of this parameter.
672 **		first -- the first location of the replacement.
673 **		last -- the last location of the replacement.
674 **
675 **		If last == NULL, delete this entry.
676 **		If first == NULL, extend this entry (or add it if
677 **			it does not exist).
678 **
679 **	Returns:
680 **		nothing.
681 **
682 **	Side Effects:
683 **		munges with mlist.
684 */
685 
686 setmatch(mlist, name, first, last)
687 	struct match *mlist;
688 	char name;
689 	char **first;
690 	char **last;
691 {
692 	register struct match *m;
693 	struct match *nullm = NULL;
694 
695 	for (m = mlist; m < &mlist[MAXMATCH]; m++)
696 	{
697 		if (m->name == name)
698 			break;
699 		if (m->name == '\0')
700 			nullm = m;
701 	}
702 
703 	if (m >= &mlist[MAXMATCH])
704 		m = nullm;
705 
706 	if (last == NULL)
707 	{
708 		m->name = '\0';
709 		return;
710 	}
711 
712 	if (m->name == '\0')
713 	{
714 		if (first == NULL)
715 			m->firsttok = last;
716 		else
717 			m->firsttok = first;
718 	}
719 	m->name = name;
720 	m->lasttok = last;
721 }
722 /*
723 **  FINDMATCH -- find match in mlist
724 **
725 **	Parameters:
726 **		mlist -- list to search.
727 **		name -- name to find.
728 **
729 **	Returns:
730 **		pointer to match structure.
731 **		NULL if no match.
732 **
733 **	Side Effects:
734 **		none.
735 */
736 
737 struct match *
738 findmatch(mlist, name)
739 	struct match *mlist;
740 	char name;
741 {
742 	register struct match *m;
743 
744 	for (m = mlist; m < &mlist[MAXMATCH]; m++)
745 	{
746 		if (m->name == name)
747 			return (m);
748 	}
749 
750 	return (NULL);
751 }
752 /*
753 **  CLRMATCH -- clear match list
754 **
755 **	Parameters:
756 **		mlist -- list to clear.
757 **
758 **	Returns:
759 **		none.
760 **
761 **	Side Effects:
762 **		mlist is cleared.
763 */
764 
765 clrmatch(mlist)
766 	struct match *mlist;
767 {
768 	register struct match *m;
769 
770 	for (m = mlist; m < &mlist[MAXMATCH]; m++)
771 		m->name = '\0';
772 }
773 /*
774 **  BUILDADDR -- build address from token vector.
775 **
776 **	Parameters:
777 **		tv -- token vector.
778 **		a -- pointer to address descriptor to fill.
779 **			If NULL, one will be allocated.
780 **
781 **	Returns:
782 **		'a'
783 **
784 **	Side Effects:
785 **		fills in 'a'
786 */
787 
788 ADDRESS *
789 buildaddr(tv, a)
790 	register char **tv;
791 	register ADDRESS *a;
792 {
793 	register int i;
794 	static char buf[MAXNAME];
795 	struct mailer **mp;
796 	register struct mailer *m;
797 
798 	if (a == NULL)
799 		a = (ADDRESS *) xalloc(sizeof *a);
800 	a->q_flags = 0;
801 	a->q_home = NULL;
802 
803 	/* figure out what net/mailer to use */
804 	if (**tv != CANONNET)
805 		syserr("buildaddr: no net");
806 	tv++;
807 	for (mp = Mailer, i = 0; (m = *mp++) != NULL; i++)
808 	{
809 		if (strcmp(m->m_name, *tv) == 0)
810 			break;
811 	}
812 	if (m == NULL)
813 		syserr("buildaddr: unknown net %s", *tv);
814 	a->q_mailer = i;
815 
816 	/* figure out what host (if any) */
817 	tv++;
818 	if (!bitset(M_NOHOST, m->m_flags))
819 	{
820 		if (**tv != CANONHOST)
821 			syserr("buildaddr: no host");
822 		tv++;
823 		a->q_host = *tv;
824 		tv++;
825 	}
826 	else
827 		a->q_host = NULL;
828 
829 	/* figure out the user */
830 	if (**tv != CANONUSER)
831 		syserr("buildaddr: no user");
832 	buf[0] = '\0';
833 	while (**++tv != NULL)
834 		(void) strcat(buf, *tv);
835 	a->q_user = buf;
836 
837 	return (a);
838 }
839 /*
840 **  SAMEADDR -- Determine if two addresses are the same
841 **
842 **	This is not just a straight comparison -- if the mailer doesn't
843 **	care about the host we just ignore it, etc.
844 **
845 **	Parameters:
846 **		a, b -- pointers to the internal forms to compare.
847 **		wildflg -- if TRUE, 'a' may have no user specified,
848 **			in which case it is to match anything.
849 **
850 **	Returns:
851 **		TRUE -- they represent the same mailbox.
852 **		FALSE -- they don't.
853 **
854 **	Side Effects:
855 **		none.
856 */
857 
858 bool
859 sameaddr(a, b, wildflg)
860 	register ADDRESS *a;
861 	register ADDRESS *b;
862 	bool wildflg;
863 {
864 	/* if they don't have the same mailer, forget it */
865 	if (a->q_mailer != b->q_mailer)
866 		return (FALSE);
867 
868 	/* if the user isn't the same, we can drop out */
869 	if ((!wildflg || a->q_user[0] != '\0') && strcmp(a->q_user, b->q_user) != 0)
870 		return (FALSE);
871 
872 	/* if the mailer ignores hosts, we have succeeded! */
873 	if (bitset(M_NOHOST, Mailer[a->q_mailer]->m_flags))
874 		return (TRUE);
875 
876 	/* otherwise compare hosts (but be careful for NULL ptrs) */
877 	if (a->q_host == NULL || b->q_host == NULL)
878 		return (FALSE);
879 	if (strcmp(a->q_host, b->q_host) != 0)
880 		return (FALSE);
881 
882 	return (TRUE);
883 }
884 /*
885 **  PRINTADDR -- print address (for debugging)
886 **
887 **	Parameters:
888 **		a -- the address to print
889 **		follow -- follow the q_next chain.
890 **
891 **	Returns:
892 **		none.
893 **
894 **	Side Effects:
895 **		none.
896 */
897 
898 printaddr(a, follow)
899 	register ADDRESS *a;
900 	bool follow;
901 {
902 	while (a != NULL)
903 	{
904 		printf("addr@%x: ", a);
905 		(void) fflush(stdout);
906 		printf("%s: mailer %d (%s), host `%s', user `%s'\n", a->q_paddr,
907 		       a->q_mailer, Mailer[a->q_mailer]->m_name, a->q_host, a->q_user);
908 		printf("\tnext=%x flags=%o, rmailer %d\n", a->q_next,
909 		       a->q_flags, a->q_rmailer);
910 
911 		if (!follow)
912 			return;
913 		a = a->q_next;
914 	}
915 }
916