1 # include <stdio.h>
2 # include <ctype.h>
3 # include "sendmail.h"
4 
5 static char	SccsId[] = "@(#)parseaddr.c	3.16	08/09/81";
6 
7 /*
8 **  PARSE -- Parse an address
9 **
10 **	Parses an address and breaks it up into three parts: a
11 **	net to transmit the message on, the host to transmit it
12 **	to, and a user on that host.  These are loaded into an
13 **	ADDRESS header with the values squirreled away if necessary.
14 **	The "user" part may not be a real user; the process may
15 **	just reoccur on that machine.  For example, on a machine
16 **	with an arpanet connection, the address
17 **		csvax.bill@berkeley
18 **	will break up to a "user" of 'csvax.bill' and a host
19 **	of 'berkeley' -- to be transmitted over the arpanet.
20 **
21 **	Parameters:
22 **		addr -- the address to parse.
23 **		a -- a pointer to the address descriptor buffer.
24 **			If NULL, a header will be created.
25 **		copyf -- determines what shall be copied:
26 **			-1 -- don't copy anything.  The printname
27 **				(q_paddr) is just addr, and the
28 **				user & host are allocated internally
29 **				to parse.
30 **			0 -- copy out the parsed user & host, but
31 **				don't copy the printname.
32 **			+1 -- copy everything.
33 **
34 **	Returns:
35 **		A pointer to the address descriptor header (`a' if
36 **			`a' is non-NULL).
37 **		NULL on error.
38 **
39 **	Side Effects:
40 **		none
41 **
42 **	Called By:
43 **		main
44 **		sendto
45 **		alias
46 **		savemail
47 */
48 
49 # define DELIMCHARS	"$()<>,;\\\"\r\n"	/* word delimiters */
50 # define SPACESUB	('.'|0200)		/* substitution for <lwsp> */
51 
52 ADDRESS *
53 parse(addr, a, copyf)
54 	char *addr;
55 	register ADDRESS *a;
56 	int copyf;
57 {
58 	register char **pvp;
59 	register struct mailer *m;
60 	extern char **prescan();
61 	extern ADDRESS *buildaddr();
62 
63 	/*
64 	**  Initialize and prescan address.
65 	*/
66 
67 	To = addr;
68 # ifdef DEBUG
69 	if (Debug)
70 		printf("\n--parse(%s)\n", addr);
71 # endif DEBUG
72 
73 	pvp = prescan(addr, '\0');
74 	if (pvp == NULL)
75 		return (NULL);
76 
77 	/*
78 	**  Apply rewriting rules.
79 	*/
80 
81 	rewrite(pvp, 0);
82 
83 	/*
84 	**  See if we resolved to a real mailer.
85 	*/
86 
87 	if (pvp[0][0] != CANONNET)
88 	{
89 		setstat(EX_USAGE);
90 		usrerr("cannot resolve name");
91 		return (NULL);
92 	}
93 
94 	/*
95 	**  Build canonical address from pvp.
96 	*/
97 
98 	a = buildaddr(pvp, a);
99 	m = Mailer[a->q_mailer];
100 
101 	/*
102 	**  Make local copies of the host & user and then
103 	**  transport them out.
104 	*/
105 
106 	if (copyf > 0)
107 		a->q_paddr = newstr(addr);
108 	else
109 		a->q_paddr = addr;
110 
111 	if (copyf >= 0)
112 	{
113 		if (a->q_host != NULL)
114 			a->q_host = newstr(a->q_host);
115 		else
116 			a->q_host = "";
117 		if (a->q_user != a->q_paddr)
118 			a->q_user = newstr(a->q_user);
119 	}
120 
121 	/*
122 	**  Do UPPER->lower case mapping unless inhibited.
123 	*/
124 
125 	if (!bitset(M_HST_UPPER, m->m_flags))
126 		makelower(a->q_host);
127 	if (!bitset(M_USR_UPPER, m->m_flags))
128 		makelower(a->q_user);
129 
130 	/*
131 	**  Compute return value.
132 	*/
133 
134 # ifdef DEBUG
135 	if (Debug)
136 		printf("parse(\"%s\"): host \"%s\" user \"%s\" mailer %d\n",
137 		    addr, a->q_host, a->q_user, a->q_mailer);
138 # endif DEBUG
139 
140 	return (a);
141 }
142 /*
143 **  PRESCAN -- Prescan name and make it canonical
144 **
145 **	Scans a name and turns it into canonical form.  This involves
146 **	deleting blanks, comments (in parentheses), and turning the
147 **	word "at" into an at-sign ("@").  The name is copied as this
148 **	is done; it is legal to copy a name onto itself, since this
149 **	process can only make things smaller.
150 **
151 **	This routine knows about quoted strings and angle brackets.
152 **
153 **	There are certain subtleties to this routine.  The one that
154 **	comes to mind now is that backslashes on the ends of names
155 **	are silently stripped off; this is intentional.  The problem
156 **	is that some versions of sndmsg (like at LBL) set the kill
157 **	character to something other than @ when reading addresses;
158 **	so people type "csvax.eric\@berkeley" -- which screws up the
159 **	berknet mailer.
160 **
161 **	Parameters:
162 **		addr -- the name to chomp.
163 **		delim -- the delimiter for the address, normally
164 **			'\0' or ','; \0 is accepted in any case.
165 **			are moving in place; set buflim to high core.
166 **
167 **	Returns:
168 **		A pointer to a vector of tokens.
169 **		NULL on error.
170 **
171 **	Side Effects:
172 **		none.
173 */
174 
175 # define OPER		1
176 # define ATOM		2
177 # define EOTOK		3
178 # define QSTRING	4
179 # define SPACE		5
180 # define DOLLAR		6
181 # define GETONE		7
182 
183 char **
184 prescan(addr, delim)
185 	char *addr;
186 	char delim;
187 {
188 	register char *p;
189 	static char buf[MAXNAME+MAXATOM];
190 	static char *av[MAXATOM+1];
191 	char **avp;
192 	bool space;
193 	bool bslashmode;
194 	int cmntcnt;
195 	int brccnt;
196 	register char c;
197 	char *tok;
198 	register char *q;
199 	register int state;
200 	int nstate;
201 	extern char lower();
202 
203 	space = FALSE;
204 	q = buf;
205 	bslashmode = FALSE;
206 	cmntcnt = brccnt = 0;
207 	avp = av;
208 	state = OPER;
209 	for (p = addr; *p != '\0' && *p != delim; )
210 	{
211 		/* read a token */
212 		tok = q;
213 		while ((c = *p++) != '\0' && c != delim)
214 		{
215 			/* chew up special characters */
216 			*q = '\0';
217 			if (bslashmode)
218 			{
219 				c |= 0200;
220 				bslashmode = FALSE;
221 			}
222 			else if (c == '\\')
223 			{
224 				bslashmode = TRUE;
225 				continue;
226 			}
227 
228 			nstate = toktype(c);
229 			switch (state)
230 			{
231 			  case QSTRING:		/* in quoted string */
232 				if (c == '"')
233 					state = OPER;
234 				break;
235 
236 			  case ATOM:		/* regular atom */
237 				state = nstate;
238 				if (state != ATOM)
239 				{
240 					state = EOTOK;
241 					p--;
242 				}
243 				break;
244 
245 			  case GETONE:		/* grab one character */
246 				state = OPER;
247 				break;
248 
249 			  case EOTOK:		/* after atom or q-string */
250 				state = nstate;
251 				if (state == SPACE)
252 					continue;
253 				break;
254 
255 			  case SPACE:		/* linear white space */
256 				state = nstate;
257 				space = TRUE;
258 				continue;
259 
260 			  case OPER:		/* operator */
261 				if (nstate == SPACE)
262 					continue;
263 				state = nstate;
264 				break;
265 
266 			  case DOLLAR:		/* $- etc. */
267 				state = OPER;
268 				switch (c)
269 				{
270 				  case '$':		/* literal $ */
271 					break;
272 
273 				  case '+':		/* match anything */
274 					c = MATCHANY;
275 					state = GETONE;
276 					break;
277 
278 				  case '-':		/* match one token */
279 					c = MATCHONE;
280 					state = GETONE;
281 					break;
282 
283 				  case '=':		/* match one token of class */
284 					c = MATCHCLASS;
285 					state = GETONE;
286 					break;
287 
288 				  case '#':		/* canonical net name */
289 					c = CANONNET;
290 					break;
291 
292 				  case '@':		/* canonical host name */
293 					c = CANONHOST;
294 					break;
295 
296 				  case ':':		/* canonical user name */
297 					c = CANONUSER;
298 					break;
299 
300 				  default:
301 					c = '$';
302 					state = OPER;
303 					p--;
304 					break;
305 				}
306 				break;
307 
308 			  default:
309 				syserr("prescan: unknown state %d", state);
310 			}
311 
312 			if (state == OPER)
313 				space = FALSE;
314 			else if (state == EOTOK)
315 				break;
316 			if (c == '$' && delim == '\t')
317 			{
318 				state = DOLLAR;
319 				continue;
320 			}
321 
322 			/* squirrel it away */
323 			if (q >= &buf[sizeof buf - 5])
324 			{
325 				usrerr("Address too long");
326 				return (NULL);
327 			}
328 			if (space)
329 				*q++ = SPACESUB;
330 			*q++ = c;
331 
332 			/* decide whether this represents end of token */
333 			if (state == OPER)
334 				break;
335 		}
336 		if (c == '\0' || c == delim)
337 			p--;
338 
339 		/* new token */
340 		if (tok == q)
341 			continue;
342 		*q++ = '\0';
343 
344 		c = tok[0];
345 		if (c == '(')
346 		{
347 			cmntcnt++;
348 			continue;
349 		}
350 		else if (c == ')')
351 		{
352 			if (cmntcnt <= 0)
353 			{
354 				usrerr("Unbalanced ')'");
355 				return (NULL);
356 			}
357 			else
358 			{
359 				cmntcnt--;
360 				continue;
361 			}
362 		}
363 		else if (cmntcnt > 0)
364 			continue;
365 
366 		*avp++ = tok;
367 
368 		/* we prefer <> specs */
369 		if (c == '<')
370 		{
371 			if (brccnt < 0)
372 			{
373 				usrerr("multiple < spec");
374 				return (NULL);
375 			}
376 			brccnt++;
377 			space = FALSE;
378 			if (brccnt == 1)
379 			{
380 				/* we prefer using machine readable name */
381 				q = buf;
382 				*q = '\0';
383 				avp = av;
384 				continue;
385 			}
386 		}
387 		else if (c == '>')
388 		{
389 			if (brccnt <= 0)
390 			{
391 				usrerr("Unbalanced `>'");
392 				return (NULL);
393 			}
394 			else
395 				brccnt--;
396 			if (brccnt <= 0)
397 			{
398 				brccnt = -1;
399 				continue;
400 			}
401 		}
402 
403 		/*
404 		**  Turn "at" into "@",
405 		**	but only if "at" is a word.
406 		*/
407 
408 		if (lower(tok[0]) == 'a' && lower(tok[1]) == 't' && tok[2] == '\0')
409 		{
410 			tok[0] = '@';
411 			tok[1] = '\0';
412 		}
413 	}
414 	*avp = NULL;
415 	if (cmntcnt > 0)
416 		usrerr("Unbalanced '('");
417 	else if (brccnt > 0)
418 		usrerr("Unbalanced '<'");
419 	else if (state == QSTRING)
420 		usrerr("Unbalanced '\"'");
421 	else if (av[0] != NULL)
422 		return (av);
423 	return (NULL);
424 }
425 /*
426 **  TOKTYPE -- return token type
427 **
428 **	Parameters:
429 **		c -- the character in question.
430 **
431 **	Returns:
432 **		Its type.
433 **
434 **	Side Effects:
435 **		none.
436 */
437 
438 toktype(c)
439 	register char c;
440 {
441 	static char buf[50];
442 	static bool firstime = TRUE;
443 
444 	if (firstime)
445 	{
446 		firstime = FALSE;
447 		(void) expand("$o", buf, &buf[sizeof buf - 1]);
448 		strcat(buf, DELIMCHARS);
449 	}
450 	if (isspace(c))
451 		return (SPACE);
452 	if (iscntrl(c) || index(buf, c) != NULL)
453 		return (OPER);
454 	return (ATOM);
455 }
456 /*
457 **  REWRITE -- apply rewrite rules to token vector.
458 **
459 **	Parameters:
460 **		pvp -- pointer to token vector.
461 **
462 **	Returns:
463 **		none.
464 **
465 **	Side Effects:
466 **		pvp is modified.
467 */
468 
469 struct match
470 {
471 	char	**firsttok;	/* first token matched */
472 	char	**lasttok;	/* last token matched */
473 	char	name;		/* name of parameter */
474 };
475 
476 # define MAXMATCH	8	/* max params per rewrite */
477 
478 
479 rewrite(pvp, ruleset)
480 	char **pvp;
481 	int ruleset;
482 {
483 	register char *ap;		/* address pointer */
484 	register char *rp;		/* rewrite pointer */
485 	register char **avp;		/* address vector pointer */
486 	register char **rvp;		/* rewrite vector pointer */
487 	struct rewrite *rwr;
488 	struct match mlist[MAXMATCH];
489 	char *npvp[MAXATOM+1];		/* temporary space for rebuild */
490 	extern bool sameword();
491 
492 # ifdef DEBUGX
493 	if (Debug)
494 	{
495 		printf("rewrite: original pvp:\n");
496 		printav(pvp);
497 	}
498 # endif DEBUGX
499 
500 	/*
501 	**  Run through the list of rewrite rules, applying
502 	**	any that match.
503 	*/
504 
505 	for (rwr = RewriteRules[ruleset]; rwr != NULL; )
506 	{
507 # ifdef DEBUGX
508 		if (Debug)
509 		{
510 			printf("-----trying rule:\n");
511 			printav(rwr->r_lhs);
512 		}
513 # endif DEBUGX
514 
515 		/* try to match on this rule */
516 		clrmatch(mlist);
517 		for (rvp = rwr->r_lhs, avp = pvp; *avp != NULL; )
518 		{
519 			ap = *avp;
520 			rp = *rvp;
521 
522 			if (rp == NULL)
523 			{
524 				/* end-of-pattern before end-of-address */
525 				goto fail;
526 			}
527 
528 			switch (*rp)
529 			{
530 				register STAB *s;
531 				register int class;
532 
533 			  case MATCHONE:
534 				/* match exactly one token */
535 				setmatch(mlist, rp[1], avp, avp);
536 				break;
537 
538 			  case MATCHANY:
539 				/* match any number of tokens */
540 				setmatch(mlist, rp[1], (char **) NULL, avp);
541 				break;
542 
543 			  case MATCHCLASS:
544 				/* match any token in a class */
545 				class = rp[1];
546 				if (!isalpha(class))
547 					goto fail;
548 				if (isupper(class))
549 					class -= 'A';
550 				else
551 					class -= 'a';
552 				s = stab(ap, ST_FIND);
553 				if (s == NULL || (s->s_class & (1 << class)) == 0)
554 					goto fail;
555 				break;
556 
557 			  default:
558 				/* must have exact match */
559 				if (!sameword(rp, ap))
560 					goto fail;
561 				break;
562 			}
563 
564 			/* successful match on this token */
565 			avp++;
566 			rvp++;
567 			continue;
568 
569 		  fail:
570 			/* match failed -- back up */
571 			while (--rvp >= rwr->r_lhs)
572 			{
573 				rp = *rvp;
574 				if (*rp == MATCHANY)
575 					break;
576 
577 				/* can't extend match: back up everything */
578 				avp--;
579 
580 				if (*rp == MATCHONE)
581 				{
582 					/* undo binding */
583 					setmatch(mlist, rp[1], (char **) NULL, (char **) NULL);
584 				}
585 			}
586 
587 			if (rvp < rwr->r_lhs)
588 			{
589 				/* total failure to match */
590 				break;
591 			}
592 		}
593 
594 		/*
595 		**  See if we successfully matched
596 		*/
597 
598 		if (rvp >= rwr->r_lhs && *rvp == NULL)
599 		{
600 # ifdef DEBUGX
601 			if (Debug)
602 			{
603 				printf("-----rule matches:\n");
604 				printav(rwr->r_rhs);
605 			}
606 # endif DEBUGX
607 
608 			/* substitute */
609 			for (rvp = rwr->r_rhs, avp = npvp; *rvp != NULL; rvp++)
610 			{
611 				rp = *rvp;
612 				if (*rp == MATCHANY)
613 				{
614 					register struct match *m;
615 					register char **pp;
616 					extern struct match *findmatch();
617 
618 					m = findmatch(mlist, rp[1]);
619 					if (m != NULL)
620 					{
621 						pp = m->firsttok;
622 						do
623 						{
624 							*avp++ = *pp;
625 						} while (pp++ != m->lasttok);
626 					}
627 				}
628 				else
629 					*avp++ = rp;
630 			}
631 			*avp++ = NULL;
632 			bmove((char *) npvp, (char *) pvp, (avp - npvp) * sizeof *avp);
633 # ifdef DEBUG
634 			if (Debug)
635 			{
636 				char **vp;
637 
638 				printf("rewritten as `");
639 				for (vp = pvp; *vp != NULL; vp++)
640 					xputs(*vp);
641 				printf("'\n");
642 			}
643 # endif DEBUG
644 			if (pvp[0][0] == CANONNET)
645 				break;
646 		}
647 		else
648 		{
649 # ifdef DEBUGX
650 			if (Debug)
651 				printf("----- rule fails\n");
652 # endif DEBUGX
653 			rwr = rwr->r_next;
654 		}
655 	}
656 }
657 /*
658 **  SETMATCH -- set parameter value in match vector
659 **
660 **	Parameters:
661 **		mlist -- list of match values.
662 **		name -- the character name of this parameter.
663 **		first -- the first location of the replacement.
664 **		last -- the last location of the replacement.
665 **
666 **		If last == NULL, delete this entry.
667 **		If first == NULL, extend this entry (or add it if
668 **			it does not exist).
669 **
670 **	Returns:
671 **		nothing.
672 **
673 **	Side Effects:
674 **		munges with mlist.
675 */
676 
677 setmatch(mlist, name, first, last)
678 	struct match *mlist;
679 	char name;
680 	char **first;
681 	char **last;
682 {
683 	register struct match *m;
684 	struct match *nullm = NULL;
685 
686 	for (m = mlist; m < &mlist[MAXMATCH]; m++)
687 	{
688 		if (m->name == name)
689 			break;
690 		if (m->name == '\0')
691 			nullm = m;
692 	}
693 
694 	if (m >= &mlist[MAXMATCH])
695 		m = nullm;
696 
697 	if (last == NULL)
698 	{
699 		m->name = '\0';
700 		return;
701 	}
702 
703 	if (m->name == '\0')
704 	{
705 		if (first == NULL)
706 			m->firsttok = last;
707 		else
708 			m->firsttok = first;
709 	}
710 	m->name = name;
711 	m->lasttok = last;
712 }
713 /*
714 **  FINDMATCH -- find match in mlist
715 **
716 **	Parameters:
717 **		mlist -- list to search.
718 **		name -- name to find.
719 **
720 **	Returns:
721 **		pointer to match structure.
722 **		NULL if no match.
723 **
724 **	Side Effects:
725 **		none.
726 */
727 
728 struct match *
729 findmatch(mlist, name)
730 	struct match *mlist;
731 	char name;
732 {
733 	register struct match *m;
734 
735 	for (m = mlist; m < &mlist[MAXMATCH]; m++)
736 	{
737 		if (m->name == name)
738 			return (m);
739 	}
740 
741 	return (NULL);
742 }
743 /*
744 **  CLRMATCH -- clear match list
745 **
746 **	Parameters:
747 **		mlist -- list to clear.
748 **
749 **	Returns:
750 **		none.
751 **
752 **	Side Effects:
753 **		mlist is cleared.
754 */
755 
756 clrmatch(mlist)
757 	struct match *mlist;
758 {
759 	register struct match *m;
760 
761 	for (m = mlist; m < &mlist[MAXMATCH]; m++)
762 		m->name = '\0';
763 }
764 /*
765 **  BUILDADDR -- build address from token vector.
766 **
767 **	Parameters:
768 **		tv -- token vector.
769 **		a -- pointer to address descriptor to fill.
770 **			If NULL, one will be allocated.
771 **
772 **	Returns:
773 **		'a'
774 **
775 **	Side Effects:
776 **		fills in 'a'
777 */
778 
779 ADDRESS *
780 buildaddr(tv, a)
781 	register char **tv;
782 	register ADDRESS *a;
783 {
784 	register int i;
785 	static char buf[MAXNAME];
786 	struct mailer **mp;
787 	register struct mailer *m;
788 
789 	if (a == NULL)
790 		a = (ADDRESS *) xalloc(sizeof *a);
791 	a->q_flags = 0;
792 	a->q_home = NULL;
793 
794 	/* figure out what net/mailer to use */
795 	if (**tv != CANONNET)
796 		syserr("buildaddr: no net");
797 	tv++;
798 	for (mp = Mailer, i = 0; (m = *mp++) != NULL; i++)
799 	{
800 		if (strcmp(m->m_name, *tv) == 0)
801 			break;
802 	}
803 	if (m == NULL)
804 		syserr("buildaddr: unknown net %s", *tv);
805 	a->q_mailer = i;
806 
807 	/* figure out what host (if any) */
808 	tv++;
809 	if (!bitset(M_NOHOST, m->m_flags))
810 	{
811 		if (**tv != CANONHOST)
812 			syserr("buildaddr: no host");
813 		tv++;
814 		a->q_host = *tv;
815 		tv++;
816 	}
817 	else
818 		a->q_host = NULL;
819 
820 	/* figure out the user */
821 	if (**tv != CANONUSER)
822 		syserr("buildaddr: no user");
823 	buf[0] = '\0';
824 	while (**++tv != NULL)
825 		(void) strcat(buf, *tv);
826 	a->q_user = buf;
827 
828 	return (a);
829 }
830 /*
831 **  SAMEADDR -- Determine if two addresses are the same
832 **
833 **	This is not just a straight comparison -- if the mailer doesn't
834 **	care about the host we just ignore it, etc.
835 **
836 **	Parameters:
837 **		a, b -- pointers to the internal forms to compare.
838 **		wildflg -- if TRUE, 'a' may have no user specified,
839 **			in which case it is to match anything.
840 **
841 **	Returns:
842 **		TRUE -- they represent the same mailbox.
843 **		FALSE -- they don't.
844 **
845 **	Side Effects:
846 **		none.
847 */
848 
849 bool
850 sameaddr(a, b, wildflg)
851 	register ADDRESS *a;
852 	register ADDRESS *b;
853 	bool wildflg;
854 {
855 	/* if they don't have the same mailer, forget it */
856 	if (a->q_mailer != b->q_mailer)
857 		return (FALSE);
858 
859 	/* if the user isn't the same, we can drop out */
860 	if ((!wildflg || a->q_user[0] != '\0') && strcmp(a->q_user, b->q_user) != 0)
861 		return (FALSE);
862 
863 	/* if the mailer ignores hosts, we have succeeded! */
864 	if (bitset(M_NOHOST, Mailer[a->q_mailer]->m_flags))
865 		return (TRUE);
866 
867 	/* otherwise compare hosts (but be careful for NULL ptrs) */
868 	if (a->q_host == NULL || b->q_host == NULL)
869 		return (FALSE);
870 	if (strcmp(a->q_host, b->q_host) != 0)
871 		return (FALSE);
872 
873 	return (TRUE);
874 }
875 /*
876 **  PRINTADDR -- print address (for debugging)
877 **
878 **	Parameters:
879 **		a -- the address to print
880 **		follow -- follow the q_next chain.
881 **
882 **	Returns:
883 **		none.
884 **
885 **	Side Effects:
886 **		none.
887 */
888 
889 printaddr(a, follow)
890 	register ADDRESS *a;
891 	bool follow;
892 {
893 	while (a != NULL)
894 	{
895 		printf("addr@%x: ", a);
896 		(void) fflush(stdout);
897 		printf("%s: mailer %d (%s), host `%s', user `%s'\n", a->q_paddr,
898 		       a->q_mailer, Mailer[a->q_mailer]->m_name, a->q_host, a->q_user);
899 		printf("\tnext=%x flags=%o, rmailer %d\n", a->q_next,
900 		       a->q_flags, a->q_rmailer);
901 
902 		if (!follow)
903 			return;
904 		a = a->q_next;
905 	}
906 }
907