1 # include <stdio.h>
2 # include <ctype.h>
3 # include "sendmail.h"
4 
5 static char	SccsId[] = "@(#)parseaddr.c	3.14	08/08/81";
6 
7 /*
8 **  PARSE -- Parse an address
9 **
10 **	Parses an address and breaks it up into three parts: a
11 **	net to transmit the message on, the host to transmit it
12 **	to, and a user on that host.  These are loaded into an
13 **	ADDRESS header with the values squirreled away if necessary.
14 **	The "user" part may not be a real user; the process may
15 **	just reoccur on that machine.  For example, on a machine
16 **	with an arpanet connection, the address
17 **		csvax.bill@berkeley
18 **	will break up to a "user" of 'csvax.bill' and a host
19 **	of 'berkeley' -- to be transmitted over the arpanet.
20 **
21 **	Parameters:
22 **		addr -- the address to parse.
23 **		a -- a pointer to the address descriptor buffer.
24 **			If NULL, a header will be created.
25 **		copyf -- determines what shall be copied:
26 **			-1 -- don't copy anything.  The printname
27 **				(q_paddr) is just addr, and the
28 **				user & host are allocated internally
29 **				to parse.
30 **			0 -- copy out the parsed user & host, but
31 **				don't copy the printname.
32 **			+1 -- copy everything.
33 **
34 **	Returns:
35 **		A pointer to the address descriptor header (`a' if
36 **			`a' is non-NULL).
37 **		NULL on error.
38 **
39 **	Side Effects:
40 **		none
41 **
42 **	Called By:
43 **		main
44 **		sendto
45 **		alias
46 **		savemail
47 */
48 
49 # define DELIMCHARS	"$()<>,;\\\"\r\n"	/* word delimiters */
50 # define SPACESUB	('.'|0200)		/* substitution for <lwsp> */
51 
52 ADDRESS *
53 parse(addr, a, copyf)
54 	char *addr;
55 	register ADDRESS *a;
56 	int copyf;
57 {
58 	register char **pvp;
59 	register struct mailer *m;
60 	extern char **prescan();
61 	extern char *newstr();
62 	extern char *strcpy();
63 	extern ADDRESS *buildaddr();
64 
65 	/*
66 	**  Initialize and prescan address.
67 	*/
68 
69 	To = addr;
70 # ifdef DEBUG
71 	if (Debug)
72 		printf("\n--parse(%s)\n", addr);
73 # endif DEBUG
74 
75 	pvp = prescan(addr, '\0');
76 	if (pvp == NULL)
77 		return (NULL);
78 
79 	/*
80 	**  Apply rewriting rules.
81 	*/
82 
83 	rewrite(pvp, 0);
84 
85 	/*
86 	**  See if we resolved to a real mailer.
87 	*/
88 
89 	if (pvp[0][0] != CANONNET)
90 	{
91 		setstat(EX_USAGE);
92 		usrerr("cannot resolve name");
93 		return (NULL);
94 	}
95 
96 	/*
97 	**  Build canonical address from pvp.
98 	*/
99 
100 	a = buildaddr(pvp, a);
101 	m = Mailer[a->q_mailer];
102 
103 	/*
104 	**  Make local copies of the host & user and then
105 	**  transport them out.
106 	*/
107 
108 	if (copyf > 0)
109 		a->q_paddr = newstr(addr);
110 	else
111 		a->q_paddr = addr;
112 
113 	if (copyf >= 0)
114 	{
115 		if (a->q_host != NULL)
116 			a->q_host = newstr(a->q_host);
117 		else
118 			a->q_host = "";
119 		if (a->q_user != a->q_paddr)
120 			a->q_user = newstr(a->q_user);
121 	}
122 
123 	/*
124 	**  Do UPPER->lower case mapping unless inhibited.
125 	*/
126 
127 	if (!bitset(M_HST_UPPER, m->m_flags))
128 		makelower(a->q_host);
129 	if (!bitset(M_USR_UPPER, m->m_flags))
130 		makelower(a->q_user);
131 
132 	/*
133 	**  Compute return value.
134 	*/
135 
136 # ifdef DEBUG
137 	if (Debug)
138 		printf("parse(\"%s\"): host \"%s\" user \"%s\" mailer %d\n",
139 		    addr, a->q_host, a->q_user, a->q_mailer);
140 # endif DEBUG
141 
142 	return (a);
143 }
144 /*
145 **  PRESCAN -- Prescan name and make it canonical
146 **
147 **	Scans a name and turns it into canonical form.  This involves
148 **	deleting blanks, comments (in parentheses), and turning the
149 **	word "at" into an at-sign ("@").  The name is copied as this
150 **	is done; it is legal to copy a name onto itself, since this
151 **	process can only make things smaller.
152 **
153 **	This routine knows about quoted strings and angle brackets.
154 **
155 **	There are certain subtleties to this routine.  The one that
156 **	comes to mind now is that backslashes on the ends of names
157 **	are silently stripped off; this is intentional.  The problem
158 **	is that some versions of sndmsg (like at LBL) set the kill
159 **	character to something other than @ when reading addresses;
160 **	so people type "csvax.eric\@berkeley" -- which screws up the
161 **	berknet mailer.
162 **
163 **	Parameters:
164 **		addr -- the name to chomp.
165 **		delim -- the delimiter for the address, normally
166 **			'\0' or ','; \0 is accepted in any case.
167 **			are moving in place; set buflim to high core.
168 **
169 **	Returns:
170 **		A pointer to a vector of tokens.
171 **		NULL on error.
172 **
173 **	Side Effects:
174 **		none.
175 */
176 
177 # define OPER		1
178 # define ATOM		2
179 # define EOTOK		3
180 # define QSTRING	4
181 # define SPACE		5
182 # define DOLLAR		6
183 # define GETONE		7
184 
185 char **
186 prescan(addr, delim)
187 	char *addr;
188 	char delim;
189 {
190 	register char *p;
191 	static char buf[MAXNAME+MAXATOM];
192 	static char *av[MAXATOM+1];
193 	char **avp;
194 	bool space;
195 	bool bslashmode;
196 	int cmntcnt;
197 	int brccnt;
198 	register char c;
199 	char *tok;
200 	register char *q;
201 	extern char *index();
202 	register int state;
203 	int nstate;
204 
205 	space = FALSE;
206 	q = buf;
207 	bslashmode = FALSE;
208 	cmntcnt = brccnt = 0;
209 	avp = av;
210 	state = OPER;
211 	for (p = addr; *p != '\0' && *p != delim; )
212 	{
213 		/* read a token */
214 		tok = q;
215 		while ((c = *p++) != '\0' && c != delim)
216 		{
217 			/* chew up special characters */
218 			*q = '\0';
219 			if (bslashmode)
220 			{
221 				c |= 0200;
222 				bslashmode = FALSE;
223 			}
224 			else if (c == '\\')
225 			{
226 				bslashmode = TRUE;
227 				continue;
228 			}
229 
230 			nstate = toktype(c);
231 			switch (state)
232 			{
233 			  case QSTRING:		/* in quoted string */
234 				if (c == '"')
235 					state = OPER;
236 				break;
237 
238 			  case ATOM:		/* regular atom */
239 				state = nstate;
240 				if (state != ATOM)
241 				{
242 					state = EOTOK;
243 					p--;
244 				}
245 				break;
246 
247 			  case GETONE:		/* grab one character */
248 				state = OPER;
249 				break;
250 
251 			  case EOTOK:		/* after atom or q-string */
252 				state = nstate;
253 				if (state == SPACE)
254 					continue;
255 				break;
256 
257 			  case SPACE:		/* linear white space */
258 				state = nstate;
259 				space = TRUE;
260 				continue;
261 
262 			  case OPER:		/* operator */
263 				if (nstate == SPACE)
264 					continue;
265 				state = nstate;
266 				break;
267 
268 			  case DOLLAR:		/* $- etc. */
269 				state = OPER;
270 				switch (c)
271 				{
272 				  case '$':		/* literal $ */
273 					break;
274 
275 				  case '+':		/* match anything */
276 					c = MATCHANY;
277 					state = GETONE;
278 					break;
279 
280 				  case '-':		/* match one token */
281 					c = MATCHONE;
282 					state = GETONE;
283 					break;
284 
285 				  case '=':		/* match one token of class */
286 					c = MATCHCLASS;
287 					state = GETONE;
288 					break;
289 
290 				  case '#':		/* canonical net name */
291 					c = CANONNET;
292 					break;
293 
294 				  case '@':		/* canonical host name */
295 					c = CANONHOST;
296 					break;
297 
298 				  case ':':		/* canonical user name */
299 					c = CANONUSER;
300 					break;
301 
302 				  default:
303 					c = '$';
304 					state = OPER;
305 					p--;
306 					break;
307 				}
308 				break;
309 
310 			  default:
311 				syserr("prescan: unknown state %d", state);
312 			}
313 
314 			if (state == OPER)
315 				space = FALSE;
316 			else if (state == EOTOK)
317 				break;
318 			if (c == '$' && delim == '\t')
319 			{
320 				state = DOLLAR;
321 				continue;
322 			}
323 
324 			/* squirrel it away */
325 			if (q >= &buf[sizeof buf - 5])
326 			{
327 				usrerr("Address too long");
328 				return (NULL);
329 			}
330 			if (space)
331 				*q++ = SPACESUB;
332 			*q++ = c;
333 
334 			/* decide whether this represents end of token */
335 			if (state == OPER)
336 				break;
337 		}
338 		if (c == '\0' || c == delim)
339 			p--;
340 
341 		/* new token */
342 		if (tok == q)
343 			continue;
344 		*q++ = '\0';
345 
346 		c = tok[0];
347 		if (c == '(')
348 		{
349 			cmntcnt++;
350 			continue;
351 		}
352 		else if (c == ')')
353 		{
354 			if (cmntcnt <= 0)
355 			{
356 				usrerr("Unbalanced ')'");
357 				return (NULL);
358 			}
359 			else
360 			{
361 				cmntcnt--;
362 				continue;
363 			}
364 		}
365 		else if (cmntcnt > 0)
366 			continue;
367 
368 		*avp++ = tok;
369 
370 		/* we prefer <> specs */
371 		if (c == '<')
372 		{
373 			if (brccnt < 0)
374 			{
375 				usrerr("multiple < spec");
376 				return (NULL);
377 			}
378 			brccnt++;
379 			space = FALSE;
380 			if (brccnt == 1)
381 			{
382 				/* we prefer using machine readable name */
383 				q = buf;
384 				*q = '\0';
385 				avp = av;
386 				continue;
387 			}
388 		}
389 		else if (c == '>')
390 		{
391 			if (brccnt <= 0)
392 			{
393 				usrerr("Unbalanced `>'");
394 				return (NULL);
395 			}
396 			else
397 				brccnt--;
398 			if (brccnt <= 0)
399 			{
400 				brccnt = -1;
401 				continue;
402 			}
403 		}
404 
405 		/*
406 		**  Turn "at" into "@",
407 		**	but only if "at" is a word.
408 		*/
409 
410 		if (lower(tok[0]) == 'a' && lower(tok[1]) == 't' && tok[2] == '\0')
411 		{
412 			tok[0] = '@';
413 			tok[1] = '\0';
414 		}
415 	}
416 	*avp = NULL;
417 	if (cmntcnt > 0)
418 		usrerr("Unbalanced '('");
419 	else if (brccnt > 0)
420 		usrerr("Unbalanced '<'");
421 	else if (state == QSTRING)
422 		usrerr("Unbalanced '\"'");
423 	else if (av[0] != NULL)
424 		return (av);
425 	return (NULL);
426 }
427 /*
428 **  TOKTYPE -- return token type
429 **
430 **	Parameters:
431 **		c -- the character in question.
432 **
433 **	Returns:
434 **		Its type.
435 **
436 **	Side Effects:
437 **		none.
438 */
439 
440 toktype(c)
441 	register char c;
442 {
443 	static char buf[50];
444 	static bool firstime = TRUE;
445 
446 	if (firstime)
447 	{
448 		firstime = FALSE;
449 		expand("$o", buf, &buf[sizeof buf - 1]);
450 		strcat(buf, DELIMCHARS);
451 	}
452 	if (isspace(c))
453 		return (SPACE);
454 	if (iscntrl(c) || index(buf, c) != NULL)
455 		return (OPER);
456 	return (ATOM);
457 }
458 /*
459 **  REWRITE -- apply rewrite rules to token vector.
460 **
461 **	Parameters:
462 **		pvp -- pointer to token vector.
463 **
464 **	Returns:
465 **		none.
466 **
467 **	Side Effects:
468 **		pvp is modified.
469 */
470 
471 struct match
472 {
473 	char	**firsttok;	/* first token matched */
474 	char	**lasttok;	/* last token matched */
475 	char	name;		/* name of parameter */
476 };
477 
478 # define MAXMATCH	8	/* max params per rewrite */
479 
480 
481 rewrite(pvp, ruleset)
482 	char **pvp;
483 	int ruleset;
484 {
485 	register char *ap;		/* address pointer */
486 	register char *rp;		/* rewrite pointer */
487 	register char **avp;		/* address vector pointer */
488 	register char **rvp;		/* rewrite vector pointer */
489 	struct rewrite *rwr;
490 	struct match mlist[MAXMATCH];
491 	char *npvp[MAXATOM+1];		/* temporary space for rebuild */
492 	extern bool sameword();
493 
494 # ifdef DEBUGX
495 	if (Debug)
496 	{
497 		printf("rewrite: original pvp:\n");
498 		printav(pvp);
499 	}
500 # endif DEBUGX
501 
502 	/*
503 	**  Run through the list of rewrite rules, applying
504 	**	any that match.
505 	*/
506 
507 	for (rwr = RewriteRules[ruleset]; rwr != NULL; )
508 	{
509 # ifdef DEBUGX
510 		if (Debug)
511 		{
512 			printf("-----trying rule:\n");
513 			printav(rwr->r_lhs);
514 		}
515 # endif DEBUGX
516 
517 		/* try to match on this rule */
518 		clrmatch(mlist);
519 		for (rvp = rwr->r_lhs, avp = pvp; *avp != NULL; )
520 		{
521 			ap = *avp;
522 			rp = *rvp;
523 
524 			if (rp == NULL)
525 			{
526 				/* end-of-pattern before end-of-address */
527 				goto fail;
528 			}
529 
530 			switch (*rp)
531 			{
532 				register STAB *s;
533 				register int class;
534 
535 			  case MATCHONE:
536 				/* match exactly one token */
537 				setmatch(mlist, rp[1], avp, avp);
538 				break;
539 
540 			  case MATCHANY:
541 				/* match any number of tokens */
542 				setmatch(mlist, rp[1], NULL, avp);
543 				break;
544 
545 			  case MATCHCLASS:
546 				/* match any token in a class */
547 				class = rp[1];
548 				if (!isalpha(class))
549 					goto fail;
550 				if (isupper(class))
551 					class -= 'A';
552 				else
553 					class -= 'a';
554 				s = stab(ap, ST_FIND);
555 				if (s == NULL || (s->s_class & (1 << class)) == 0)
556 					goto fail;
557 				break;
558 
559 			  default:
560 				/* must have exact match */
561 				if (!sameword(rp, ap))
562 					goto fail;
563 				break;
564 			}
565 
566 			/* successful match on this token */
567 			avp++;
568 			rvp++;
569 			continue;
570 
571 		  fail:
572 			/* match failed -- back up */
573 			while (--rvp >= rwr->r_lhs)
574 			{
575 				rp = *rvp;
576 				if (*rp == MATCHANY)
577 					break;
578 
579 				/* can't extend match: back up everything */
580 				avp--;
581 
582 				if (*rp == MATCHONE)
583 				{
584 					/* undo binding */
585 					setmatch(mlist, rp[1], NULL, NULL);
586 				}
587 			}
588 
589 			if (rvp < rwr->r_lhs)
590 			{
591 				/* total failure to match */
592 				break;
593 			}
594 		}
595 
596 		/*
597 		**  See if we successfully matched
598 		*/
599 
600 		if (rvp >= rwr->r_lhs && *rvp == NULL)
601 		{
602 # ifdef DEBUGX
603 			if (Debug)
604 			{
605 				printf("-----rule matches:\n");
606 				printav(rwr->r_rhs);
607 			}
608 # endif DEBUGX
609 
610 			/* substitute */
611 			for (rvp = rwr->r_rhs, avp = npvp; *rvp != NULL; rvp++)
612 			{
613 				rp = *rvp;
614 				if (*rp == MATCHANY)
615 				{
616 					register struct match *m;
617 					register char **pp;
618 					extern struct match *findmatch();
619 
620 					m = findmatch(mlist, rp[1]);
621 					if (m != NULL)
622 					{
623 						pp = m->firsttok;
624 						do
625 						{
626 							*avp++ = *pp;
627 						} while (pp++ != m->lasttok);
628 					}
629 				}
630 				else
631 					*avp++ = rp;
632 			}
633 			*avp++ = NULL;
634 			bmove(npvp, pvp, (avp - npvp) * sizeof *avp);
635 # ifdef DEBUG
636 			if (Debug)
637 			{
638 				char **vp;
639 
640 				printf("rewritten as `");
641 				for (vp = pvp; *vp != NULL; vp++)
642 					xputs(*vp);
643 				printf("'\n");
644 			}
645 # endif DEBUG
646 			if (pvp[0][0] == CANONNET)
647 				break;
648 		}
649 		else
650 		{
651 # ifdef DEBUGX
652 			if (Debug)
653 				printf("----- rule fails\n");
654 # endif DEBUGX
655 			rwr = rwr->r_next;
656 		}
657 	}
658 }
659 /*
660 **  SETMATCH -- set parameter value in match vector
661 **
662 **	Parameters:
663 **		mlist -- list of match values.
664 **		name -- the character name of this parameter.
665 **		first -- the first location of the replacement.
666 **		last -- the last location of the replacement.
667 **
668 **		If last == NULL, delete this entry.
669 **		If first == NULL, extend this entry (or add it if
670 **			it does not exist).
671 **
672 **	Returns:
673 **		nothing.
674 **
675 **	Side Effects:
676 **		munges with mlist.
677 */
678 
679 setmatch(mlist, name, first, last)
680 	struct match *mlist;
681 	char name;
682 	char **first;
683 	char **last;
684 {
685 	register struct match *m;
686 	struct match *nullm = NULL;
687 
688 	for (m = mlist; m < &mlist[MAXMATCH]; m++)
689 	{
690 		if (m->name == name)
691 			break;
692 		if (m->name == '\0')
693 			nullm = m;
694 	}
695 
696 	if (m >= &mlist[MAXMATCH])
697 		m = nullm;
698 
699 	if (last == NULL)
700 	{
701 		m->name = '\0';
702 		return;
703 	}
704 
705 	if (m->name == '\0')
706 	{
707 		if (first == NULL)
708 			m->firsttok = last;
709 		else
710 			m->firsttok = first;
711 	}
712 	m->name = name;
713 	m->lasttok = last;
714 }
715 /*
716 **  FINDMATCH -- find match in mlist
717 **
718 **	Parameters:
719 **		mlist -- list to search.
720 **		name -- name to find.
721 **
722 **	Returns:
723 **		pointer to match structure.
724 **		NULL if no match.
725 **
726 **	Side Effects:
727 **		none.
728 */
729 
730 struct match *
731 findmatch(mlist, name)
732 	struct match *mlist;
733 	char name;
734 {
735 	register struct match *m;
736 
737 	for (m = mlist; m < &mlist[MAXMATCH]; m++)
738 	{
739 		if (m->name == name)
740 			return (m);
741 	}
742 
743 	return (NULL);
744 }
745 /*
746 **  CLRMATCH -- clear match list
747 **
748 **	Parameters:
749 **		mlist -- list to clear.
750 **
751 **	Returns:
752 **		none.
753 **
754 **	Side Effects:
755 **		mlist is cleared.
756 */
757 
758 clrmatch(mlist)
759 	struct match *mlist;
760 {
761 	register struct match *m;
762 
763 	for (m = mlist; m < &mlist[MAXMATCH]; m++)
764 		m->name = '\0';
765 }
766 /*
767 **  BUILDADDR -- build address from token vector.
768 **
769 **	Parameters:
770 **		tv -- token vector.
771 **		a -- pointer to address descriptor to fill.
772 **			If NULL, one will be allocated.
773 **
774 **	Returns:
775 **		'a'
776 **
777 **	Side Effects:
778 **		fills in 'a'
779 */
780 
781 ADDRESS *
782 buildaddr(tv, a)
783 	register char **tv;
784 	register ADDRESS *a;
785 {
786 	register int i;
787 	static char buf[MAXNAME];
788 	struct mailer **mp;
789 	register struct mailer *m;
790 	extern char *xalloc();
791 
792 	if (a == NULL)
793 		a = (ADDRESS *) xalloc(sizeof *a);
794 	a->q_flags = 0;
795 
796 	/* figure out what net/mailer to use */
797 	if (**tv != CANONNET)
798 		syserr("buildaddr: no net");
799 	tv++;
800 	for (mp = Mailer, i = 0; (m = *mp++) != NULL; i++)
801 	{
802 		if (strcmp(m->m_name, *tv) == 0)
803 			break;
804 	}
805 	if (m == NULL)
806 		syserr("buildaddr: unknown net %s", *tv);
807 	a->q_mailer = i;
808 
809 	/* figure out what host (if any) */
810 	tv++;
811 	if (!bitset(M_NOHOST, m->m_flags))
812 	{
813 		if (**tv != CANONHOST)
814 			syserr("buildaddr: no host");
815 		tv++;
816 		a->q_host = *tv;
817 		tv++;
818 	}
819 	else
820 		a->q_host = NULL;
821 
822 	/* figure out the user */
823 	if (**tv != CANONUSER)
824 		syserr("buildaddr: no user");
825 	buf[0] = '\0';
826 	while (**++tv != NULL)
827 		strcat(buf, *tv);
828 	a->q_user = buf;
829 
830 	return (a);
831 }
832 /*
833 **  SAMEADDR -- Determine if two addresses are the same
834 **
835 **	This is not just a straight comparison -- if the mailer doesn't
836 **	care about the host we just ignore it, etc.
837 **
838 **	Parameters:
839 **		a, b -- pointers to the internal forms to compare.
840 **		wildflg -- if TRUE, 'a' may have no user specified,
841 **			in which case it is to match anything.
842 **
843 **	Returns:
844 **		TRUE -- they represent the same mailbox.
845 **		FALSE -- they don't.
846 **
847 **	Side Effects:
848 **		none.
849 */
850 
851 bool
852 sameaddr(a, b, wildflg)
853 	register ADDRESS *a;
854 	register ADDRESS *b;
855 	bool wildflg;
856 {
857 	/* if they don't have the same mailer, forget it */
858 	if (a->q_mailer != b->q_mailer)
859 		return (FALSE);
860 
861 	/* if the user isn't the same, we can drop out */
862 	if ((!wildflg || a->q_user[0] != '\0') && strcmp(a->q_user, b->q_user) != 0)
863 		return (FALSE);
864 
865 	/* if the mailer ignores hosts, we have succeeded! */
866 	if (bitset(M_NOHOST, Mailer[a->q_mailer]->m_flags))
867 		return (TRUE);
868 
869 	/* otherwise compare hosts (but be careful for NULL ptrs) */
870 	if (a->q_host == NULL || b->q_host == NULL)
871 		return (FALSE);
872 	if (strcmp(a->q_host, b->q_host) != 0)
873 		return (FALSE);
874 
875 	return (TRUE);
876 }
877 /*
878 **  PRINTADDR -- print address (for debugging)
879 **
880 **	Parameters:
881 **		a -- the address to print
882 **		follow -- follow the q_next chain.
883 **
884 **	Returns:
885 **		none.
886 **
887 **	Side Effects:
888 **		none.
889 */
890 
891 printaddr(a, follow)
892 	register ADDRESS *a;
893 	bool follow;
894 {
895 	while (a != NULL)
896 	{
897 		printf("addr@%x: ", a);
898 		fflush(stdout);
899 		printf("%s: mailer %d (%s), host `%s', user `%s'\n", a->q_paddr,
900 		       a->q_mailer, Mailer[a->q_mailer]->m_name, a->q_host, a->q_user);
901 		printf("\tnext=%x flags=%o, rmailer %d\n", a->q_next,
902 		       a->q_flags, a->q_rmailer);
903 
904 		if (!follow)
905 			return;
906 		a = a->q_next;
907 	}
908 }
909