1 # include "sendmail.h"
2 
3 static char	SccsId[] = "@(#)parseaddr.c	3.19.1.1	08/25/81";
4 
5 /*
6 **  PARSE -- Parse an address
7 **
8 **	Parses an address and breaks it up into three parts: a
9 **	net to transmit the message on, the host to transmit it
10 **	to, and a user on that host.  These are loaded into an
11 **	ADDRESS header with the values squirreled away if necessary.
12 **	The "user" part may not be a real user; the process may
13 **	just reoccur on that machine.  For example, on a machine
14 **	with an arpanet connection, the address
15 **		csvax.bill@berkeley
16 **	will break up to a "user" of 'csvax.bill' and a host
17 **	of 'berkeley' -- to be transmitted over the arpanet.
18 **
19 **	Parameters:
20 **		addr -- the address to parse.
21 **		a -- a pointer to the address descriptor buffer.
22 **			If NULL, a header will be created.
23 **		copyf -- determines what shall be copied:
24 **			-1 -- don't copy anything.  The printname
25 **				(q_paddr) is just addr, and the
26 **				user & host are allocated internally
27 **				to parse.
28 **			0 -- copy out the parsed user & host, but
29 **				don't copy the printname.
30 **			+1 -- copy everything.
31 **
32 **	Returns:
33 **		A pointer to the address descriptor header (`a' if
34 **			`a' is non-NULL).
35 **		NULL on error.
36 **
37 **	Side Effects:
38 **		none
39 **
40 **	Called By:
41 **		main
42 **		sendto
43 **		alias
44 **		savemail
45 */
46 
47 # define DELIMCHARS	"$()<>,;\\\"\r\n"	/* word delimiters */
48 # define SPACESUB	('.'|0200)		/* substitution for <lwsp> */
49 
50 ADDRESS *
51 parse(addr, a, copyf)
52 	char *addr;
53 	register ADDRESS *a;
54 	int copyf;
55 {
56 	register char **pvp;
57 	register struct mailer *m;
58 	extern char **prescan();
59 	extern ADDRESS *buildaddr();
60 
61 	/*
62 	**  Initialize and prescan address.
63 	*/
64 
65 	To = addr;
66 # ifdef DEBUG
67 	if (Debug)
68 		printf("\n--parse(%s)\n", addr);
69 # endif DEBUG
70 
71 	pvp = prescan(addr, '\0');
72 	if (pvp == NULL)
73 		return (NULL);
74 
75 	/*
76 	**  Apply rewriting rules.
77 	*/
78 
79 	rewrite(pvp, 0);
80 
81 	/*
82 	**  See if we resolved to a real mailer.
83 	*/
84 
85 	if (pvp[0][0] != CANONNET)
86 	{
87 		setstat(EX_USAGE);
88 		usrerr("cannot resolve name");
89 		return (NULL);
90 	}
91 
92 	/*
93 	**  Build canonical address from pvp.
94 	*/
95 
96 	a = buildaddr(pvp, a);
97 	m = Mailer[a->q_mailer];
98 
99 	/*
100 	**  Make local copies of the host & user and then
101 	**  transport them out.
102 	*/
103 
104 	if (copyf > 0)
105 		a->q_paddr = newstr(addr);
106 	else
107 		a->q_paddr = addr;
108 
109 	if (copyf >= 0)
110 	{
111 		if (a->q_host != NULL)
112 			a->q_host = newstr(a->q_host);
113 		else
114 			a->q_host = "";
115 		if (a->q_user != a->q_paddr)
116 			a->q_user = newstr(a->q_user);
117 	}
118 
119 	/*
120 	**  Do UPPER->lower case mapping unless inhibited.
121 	*/
122 
123 	if (!bitset(M_HST_UPPER, m->m_flags))
124 		makelower(a->q_host);
125 	if (!bitset(M_USR_UPPER, m->m_flags))
126 		makelower(a->q_user);
127 
128 	/*
129 	**  Compute return value.
130 	*/
131 
132 # ifdef DEBUG
133 	if (Debug)
134 		printf("parse(\"%s\"): host \"%s\" user \"%s\" mailer %d\n",
135 		    addr, a->q_host, a->q_user, a->q_mailer);
136 # endif DEBUG
137 
138 	return (a);
139 }
140 /*
141 **  PRESCAN -- Prescan name and make it canonical
142 **
143 **	Scans a name and turns it into canonical form.  This involves
144 **	deleting blanks, comments (in parentheses), and turning the
145 **	word "at" into an at-sign ("@").  The name is copied as this
146 **	is done; it is legal to copy a name onto itself, since this
147 **	process can only make things smaller.
148 **
149 **	This routine knows about quoted strings and angle brackets.
150 **
151 **	There are certain subtleties to this routine.  The one that
152 **	comes to mind now is that backslashes on the ends of names
153 **	are silently stripped off; this is intentional.  The problem
154 **	is that some versions of sndmsg (like at LBL) set the kill
155 **	character to something other than @ when reading addresses;
156 **	so people type "csvax.eric\@berkeley" -- which screws up the
157 **	berknet mailer.
158 **
159 **	Parameters:
160 **		addr -- the name to chomp.
161 **		delim -- the delimiter for the address, normally
162 **			'\0' or ','; \0 is accepted in any case.
163 **			are moving in place; set buflim to high core.
164 **
165 **	Returns:
166 **		A pointer to a vector of tokens.
167 **		NULL on error.
168 **
169 **	Side Effects:
170 **		none.
171 */
172 
173 # define OPER		1
174 # define ATOM		2
175 # define EOTOK		3
176 # define QSTRING	4
177 # define SPACE		5
178 # define DOLLAR		6
179 # define GETONE		7
180 
181 char **
182 prescan(addr, delim)
183 	char *addr;
184 	char delim;
185 {
186 	register char *p;
187 	static char buf[MAXNAME+MAXATOM];
188 	static char *av[MAXATOM+1];
189 	char **avp;
190 	bool space;
191 	bool bslashmode;
192 	int cmntcnt;
193 	int brccnt;
194 	register char c;
195 	char *tok;
196 	register char *q;
197 	register int state;
198 	int nstate;
199 	extern char lower();
200 
201 	space = FALSE;
202 	q = buf;
203 	bslashmode = FALSE;
204 	cmntcnt = brccnt = 0;
205 	avp = av;
206 	state = OPER;
207 	for (p = addr; *p != '\0' && *p != delim; )
208 	{
209 		/* read a token */
210 		tok = q;
211 		while ((c = *p++) != '\0' && c != delim)
212 		{
213 			/* chew up special characters */
214 			c &= ~0200;
215 			*q = '\0';
216 			if (bslashmode)
217 			{
218 				c |= 0200;
219 				bslashmode = FALSE;
220 			}
221 			else if (c == '\\')
222 			{
223 				bslashmode = TRUE;
224 				continue;
225 			}
226 			else if (c == '"')
227 			{
228 				if (state == QSTRING)
229 					state = OPER;
230 				else
231 					state = QSTRING;
232 				break;
233 			}
234 
235 			nstate = toktype(c);
236 			switch (state)
237 			{
238 			  case QSTRING:		/* in quoted string */
239 				break;
240 
241 			  case ATOM:		/* regular atom */
242 				state = nstate;
243 				if (state != ATOM)
244 				{
245 					state = EOTOK;
246 					p--;
247 				}
248 				break;
249 
250 			  case GETONE:		/* grab one character */
251 				state = OPER;
252 				break;
253 
254 			  case EOTOK:		/* after atom or q-string */
255 				state = nstate;
256 				if (state == SPACE)
257 					continue;
258 				break;
259 
260 			  case SPACE:		/* linear white space */
261 				state = nstate;
262 				space = TRUE;
263 				continue;
264 
265 			  case OPER:		/* operator */
266 				if (nstate == SPACE)
267 					continue;
268 				state = nstate;
269 				break;
270 
271 			  case DOLLAR:		/* $- etc. */
272 				state = OPER;
273 				switch (c)
274 				{
275 				  case '$':		/* literal $ */
276 					break;
277 
278 				  case '+':		/* match anything */
279 					c = MATCHANY;
280 					state = GETONE;
281 					break;
282 
283 				  case '-':		/* match one token */
284 					c = MATCHONE;
285 					state = GETONE;
286 					break;
287 
288 				  case '=':		/* match one token of class */
289 					c = MATCHCLASS;
290 					state = GETONE;
291 					break;
292 
293 				  case '#':		/* canonical net name */
294 					c = CANONNET;
295 					break;
296 
297 				  case '@':		/* canonical host name */
298 					c = CANONHOST;
299 					break;
300 
301 				  case ':':		/* canonical user name */
302 					c = CANONUSER;
303 					break;
304 
305 				  default:
306 					c = '$';
307 					state = OPER;
308 					p--;
309 					break;
310 				}
311 				break;
312 
313 			  default:
314 				syserr("prescan: unknown state %d", state);
315 			}
316 
317 			if (state == OPER)
318 				space = FALSE;
319 			else if (state == EOTOK)
320 				break;
321 			if (c == '$' && delim == '\t')
322 			{
323 				state = DOLLAR;
324 				continue;
325 			}
326 
327 			/* squirrel it away */
328 			if (q >= &buf[sizeof buf - 5])
329 			{
330 				usrerr("Address too long");
331 				return (NULL);
332 			}
333 			if (space)
334 				*q++ = SPACESUB;
335 			*q++ = c;
336 
337 			/* decide whether this represents end of token */
338 			if (state == OPER)
339 				break;
340 		}
341 		if (c == '\0' || c == delim)
342 			p--;
343 
344 		/* new token */
345 		if (tok == q)
346 			continue;
347 		*q++ = '\0';
348 
349 		c = tok[0];
350 		if (c == '(')
351 		{
352 			cmntcnt++;
353 			continue;
354 		}
355 		else if (c == ')')
356 		{
357 			if (cmntcnt <= 0)
358 			{
359 				usrerr("Unbalanced ')'");
360 				return (NULL);
361 			}
362 			else
363 			{
364 				cmntcnt--;
365 				continue;
366 			}
367 		}
368 		else if (cmntcnt > 0)
369 			continue;
370 
371 		*avp++ = tok;
372 
373 		/* we prefer <> specs */
374 		if (c == '<')
375 		{
376 			if (brccnt < 0)
377 			{
378 				usrerr("multiple < spec");
379 				return (NULL);
380 			}
381 			brccnt++;
382 			space = FALSE;
383 			if (brccnt == 1)
384 			{
385 				/* we prefer using machine readable name */
386 				q = buf;
387 				*q = '\0';
388 				avp = av;
389 				continue;
390 			}
391 		}
392 		else if (c == '>')
393 		{
394 			if (brccnt <= 0)
395 			{
396 				usrerr("Unbalanced `>'");
397 				return (NULL);
398 			}
399 			else
400 				brccnt--;
401 			if (brccnt <= 0)
402 			{
403 				brccnt = -1;
404 				continue;
405 			}
406 		}
407 	}
408 	*avp = NULL;
409 	if (cmntcnt > 0)
410 		usrerr("Unbalanced '('");
411 	else if (brccnt > 0)
412 		usrerr("Unbalanced '<'");
413 	else if (state == QSTRING)
414 		usrerr("Unbalanced '\"'");
415 	else if (av[0] != NULL)
416 		return (av);
417 	return (NULL);
418 }
419 /*
420 **  TOKTYPE -- return token type
421 **
422 **	Parameters:
423 **		c -- the character in question.
424 **
425 **	Returns:
426 **		Its type.
427 **
428 **	Side Effects:
429 **		none.
430 */
431 
432 toktype(c)
433 	register char c;
434 {
435 	static char buf[50];
436 	static bool firstime = TRUE;
437 
438 	if (firstime)
439 	{
440 		firstime = FALSE;
441 		(void) expand("$o", buf, &buf[sizeof buf - 1]);
442 		strcat(buf, DELIMCHARS);
443 	}
444 	if (!isascii(c))
445 		return (ATOM);
446 	if (isspace(c))
447 		return (SPACE);
448 	if (iscntrl(c) || index(buf, c) != NULL)
449 		return (OPER);
450 	return (ATOM);
451 }
452 /*
453 **  REWRITE -- apply rewrite rules to token vector.
454 **
455 **	Parameters:
456 **		pvp -- pointer to token vector.
457 **
458 **	Returns:
459 **		none.
460 **
461 **	Side Effects:
462 **		pvp is modified.
463 */
464 
465 struct match
466 {
467 	char	**firsttok;	/* first token matched */
468 	char	**lasttok;	/* last token matched */
469 	char	name;		/* name of parameter */
470 };
471 
472 # define MAXMATCH	8	/* max params per rewrite */
473 
474 
475 rewrite(pvp, ruleset)
476 	char **pvp;
477 	int ruleset;
478 {
479 	register char *ap;		/* address pointer */
480 	register char *rp;		/* rewrite pointer */
481 	register char **avp;		/* address vector pointer */
482 	register char **rvp;		/* rewrite vector pointer */
483 	struct rewrite *rwr;
484 	struct match mlist[MAXMATCH];
485 	char *npvp[MAXATOM+1];		/* temporary space for rebuild */
486 	extern bool sameword();
487 
488 # ifdef DEBUG
489 	if (Debug > 10)
490 	{
491 		printf("rewrite: original pvp:\n");
492 		printav(pvp);
493 	}
494 # endif DEBUG
495 
496 	/*
497 	**  Run through the list of rewrite rules, applying
498 	**	any that match.
499 	*/
500 
501 	for (rwr = RewriteRules[ruleset]; rwr != NULL; )
502 	{
503 # ifdef DEBUG
504 		if (Debug > 10)
505 		{
506 			printf("-----trying rule:\n");
507 			printav(rwr->r_lhs);
508 		}
509 # endif DEBUG
510 
511 		/* try to match on this rule */
512 		clrmatch(mlist);
513 		for (rvp = rwr->r_lhs, avp = pvp; *avp != NULL; )
514 		{
515 			ap = *avp;
516 			rp = *rvp;
517 
518 			if (rp == NULL)
519 			{
520 				/* end-of-pattern before end-of-address */
521 				goto fail;
522 			}
523 
524 			switch (*rp)
525 			{
526 				register STAB *s;
527 				register int class;
528 
529 			  case MATCHONE:
530 				/* match exactly one token */
531 				setmatch(mlist, rp[1], avp, avp);
532 				break;
533 
534 			  case MATCHANY:
535 				/* match any number of tokens */
536 				setmatch(mlist, rp[1], (char **) NULL, avp);
537 				break;
538 
539 			  case MATCHCLASS:
540 				/* match any token in a class */
541 				class = rp[1];
542 				if (!isalpha(class))
543 					goto fail;
544 				if (isupper(class))
545 					class -= 'A';
546 				else
547 					class -= 'a';
548 				s = stab(ap, ST_CLASS, ST_FIND);
549 				if (s == NULL || (s->s_class & (1 << class)) == 0)
550 					goto fail;
551 				break;
552 
553 			  default:
554 				/* must have exact match */
555 				if (!sameword(rp, ap))
556 					goto fail;
557 				break;
558 			}
559 
560 			/* successful match on this token */
561 			avp++;
562 			rvp++;
563 			continue;
564 
565 		  fail:
566 			/* match failed -- back up */
567 			while (--rvp >= rwr->r_lhs)
568 			{
569 				rp = *rvp;
570 				if (*rp == MATCHANY)
571 					break;
572 
573 				/* can't extend match: back up everything */
574 				avp--;
575 
576 				if (*rp == MATCHONE)
577 				{
578 					/* undo binding */
579 					setmatch(mlist, rp[1], (char **) NULL, (char **) NULL);
580 				}
581 			}
582 
583 			if (rvp < rwr->r_lhs)
584 			{
585 				/* total failure to match */
586 				break;
587 			}
588 		}
589 
590 		/*
591 		**  See if we successfully matched
592 		*/
593 
594 		if (rvp >= rwr->r_lhs && *rvp == NULL)
595 		{
596 # ifdef DEBUG
597 			if (Debug > 10)
598 			{
599 				printf("-----rule matches:\n");
600 				printav(rwr->r_rhs);
601 			}
602 # endif DEBUG
603 
604 			/* substitute */
605 			for (rvp = rwr->r_rhs, avp = npvp; *rvp != NULL; rvp++)
606 			{
607 				rp = *rvp;
608 				if (*rp == MATCHANY)
609 				{
610 					register struct match *m;
611 					register char **pp;
612 					extern struct match *findmatch();
613 
614 					m = findmatch(mlist, rp[1]);
615 					if (m != NULL)
616 					{
617 						pp = m->firsttok;
618 						do
619 						{
620 							*avp++ = *pp;
621 						} while (pp++ != m->lasttok);
622 					}
623 				}
624 				else
625 					*avp++ = rp;
626 			}
627 			*avp++ = NULL;
628 			bmove((char *) npvp, (char *) pvp, (avp - npvp) * sizeof *avp);
629 # ifdef DEBUG
630 			if (Debug)
631 			{
632 				char **vp;
633 
634 				printf("rewritten as `");
635 				for (vp = pvp; *vp != NULL; vp++)
636 					xputs(*vp);
637 				printf("'\n");
638 			}
639 # endif DEBUG
640 			if (pvp[0][0] == CANONNET)
641 				break;
642 		}
643 		else
644 		{
645 # ifdef DEBUG
646 			if (Debug > 10)
647 				printf("----- rule fails\n");
648 # endif DEBUG
649 			rwr = rwr->r_next;
650 		}
651 	}
652 }
653 /*
654 **  SETMATCH -- set parameter value in match vector
655 **
656 **	Parameters:
657 **		mlist -- list of match values.
658 **		name -- the character name of this parameter.
659 **		first -- the first location of the replacement.
660 **		last -- the last location of the replacement.
661 **
662 **		If last == NULL, delete this entry.
663 **		If first == NULL, extend this entry (or add it if
664 **			it does not exist).
665 **
666 **	Returns:
667 **		nothing.
668 **
669 **	Side Effects:
670 **		munges with mlist.
671 */
672 
673 setmatch(mlist, name, first, last)
674 	struct match *mlist;
675 	char name;
676 	char **first;
677 	char **last;
678 {
679 	register struct match *m;
680 	struct match *nullm = NULL;
681 
682 	for (m = mlist; m < &mlist[MAXMATCH]; m++)
683 	{
684 		if (m->name == name)
685 			break;
686 		if (m->name == '\0')
687 			nullm = m;
688 	}
689 
690 	if (m >= &mlist[MAXMATCH])
691 		m = nullm;
692 
693 	if (last == NULL)
694 	{
695 		m->name = '\0';
696 		return;
697 	}
698 
699 	if (m->name == '\0')
700 	{
701 		if (first == NULL)
702 			m->firsttok = last;
703 		else
704 			m->firsttok = first;
705 	}
706 	m->name = name;
707 	m->lasttok = last;
708 }
709 /*
710 **  FINDMATCH -- find match in mlist
711 **
712 **	Parameters:
713 **		mlist -- list to search.
714 **		name -- name to find.
715 **
716 **	Returns:
717 **		pointer to match structure.
718 **		NULL if no match.
719 **
720 **	Side Effects:
721 **		none.
722 */
723 
724 struct match *
725 findmatch(mlist, name)
726 	struct match *mlist;
727 	char name;
728 {
729 	register struct match *m;
730 
731 	for (m = mlist; m < &mlist[MAXMATCH]; m++)
732 	{
733 		if (m->name == name)
734 			return (m);
735 	}
736 
737 	return (NULL);
738 }
739 /*
740 **  CLRMATCH -- clear match list
741 **
742 **	Parameters:
743 **		mlist -- list to clear.
744 **
745 **	Returns:
746 **		none.
747 **
748 **	Side Effects:
749 **		mlist is cleared.
750 */
751 
752 clrmatch(mlist)
753 	struct match *mlist;
754 {
755 	register struct match *m;
756 
757 	for (m = mlist; m < &mlist[MAXMATCH]; m++)
758 		m->name = '\0';
759 }
760 /*
761 **  BUILDADDR -- build address from token vector.
762 **
763 **	Parameters:
764 **		tv -- token vector.
765 **		a -- pointer to address descriptor to fill.
766 **			If NULL, one will be allocated.
767 **
768 **	Returns:
769 **		'a'
770 **
771 **	Side Effects:
772 **		fills in 'a'
773 */
774 
775 ADDRESS *
776 buildaddr(tv, a)
777 	register char **tv;
778 	register ADDRESS *a;
779 {
780 	register int i;
781 	static char buf[MAXNAME];
782 	struct mailer **mp;
783 	register struct mailer *m;
784 
785 	if (a == NULL)
786 		a = (ADDRESS *) xalloc(sizeof *a);
787 	a->q_flags = 0;
788 	a->q_home = NULL;
789 
790 	/* figure out what net/mailer to use */
791 	if (**tv != CANONNET)
792 		syserr("buildaddr: no net");
793 	tv++;
794 	for (mp = Mailer, i = 0; (m = *mp++) != NULL; i++)
795 	{
796 		if (strcmp(m->m_name, *tv) == 0)
797 			break;
798 	}
799 	if (m == NULL)
800 		syserr("buildaddr: unknown net %s", *tv);
801 	a->q_mailer = i;
802 
803 	/* figure out what host (if any) */
804 	tv++;
805 	if (!bitset(M_LOCAL, m->m_flags))
806 	{
807 		if (**tv != CANONHOST)
808 			syserr("buildaddr: no host");
809 		tv++;
810 		a->q_host = *tv;
811 		tv++;
812 	}
813 	else
814 		a->q_host = NULL;
815 
816 	/* figure out the user */
817 	if (**tv != CANONUSER)
818 		syserr("buildaddr: no user");
819 	buf[0] = '\0';
820 	while (**++tv != NULL)
821 		(void) strcat(buf, *tv);
822 	a->q_user = buf;
823 
824 	return (a);
825 }
826 /*
827 **  SAMEADDR -- Determine if two addresses are the same
828 **
829 **	This is not just a straight comparison -- if the mailer doesn't
830 **	care about the host we just ignore it, etc.
831 **
832 **	Parameters:
833 **		a, b -- pointers to the internal forms to compare.
834 **		wildflg -- if TRUE, 'a' may have no user specified,
835 **			in which case it is to match anything.
836 **
837 **	Returns:
838 **		TRUE -- they represent the same mailbox.
839 **		FALSE -- they don't.
840 **
841 **	Side Effects:
842 **		none.
843 */
844 
845 bool
846 sameaddr(a, b, wildflg)
847 	register ADDRESS *a;
848 	register ADDRESS *b;
849 	bool wildflg;
850 {
851 	/* if they don't have the same mailer, forget it */
852 	if (a->q_mailer != b->q_mailer)
853 		return (FALSE);
854 
855 	/* if the user isn't the same, we can drop out */
856 	if ((!wildflg || a->q_user[0] != '\0') && strcmp(a->q_user, b->q_user) != 0)
857 		return (FALSE);
858 
859 	/* if the mailer ignores hosts, we have succeeded! */
860 	if (bitset(M_LOCAL, Mailer[a->q_mailer]->m_flags))
861 		return (TRUE);
862 
863 	/* otherwise compare hosts (but be careful for NULL ptrs) */
864 	if (a->q_host == NULL || b->q_host == NULL)
865 		return (FALSE);
866 	if (strcmp(a->q_host, b->q_host) != 0)
867 		return (FALSE);
868 
869 	return (TRUE);
870 }
871 /*
872 **  PRINTADDR -- print address (for debugging)
873 **
874 **	Parameters:
875 **		a -- the address to print
876 **		follow -- follow the q_next chain.
877 **
878 **	Returns:
879 **		none.
880 **
881 **	Side Effects:
882 **		none.
883 */
884 
885 printaddr(a, follow)
886 	register ADDRESS *a;
887 	bool follow;
888 {
889 	while (a != NULL)
890 	{
891 		printf("addr@%x: ", a);
892 		(void) fflush(stdout);
893 		printf("%s: mailer %d (%s), host `%s', user `%s'\n", a->q_paddr,
894 		       a->q_mailer, Mailer[a->q_mailer]->m_name, a->q_host, a->q_user);
895 		printf("\tnext=%x flags=%o, rmailer %d\n", a->q_next,
896 		       a->q_flags, a->q_rmailer);
897 
898 		if (!follow)
899 			return;
900 		a = a->q_next;
901 	}
902 }
903