1 # include "sendmail.h"
2 
3 static char	SccsId[] = "@(#)parseaddr.c	3.29	10/06/81";
4 
5 /*
6 **  PARSE -- Parse an address
7 **
8 **	Parses an address and breaks it up into three parts: a
9 **	net to transmit the message on, the host to transmit it
10 **	to, and a user on that host.  These are loaded into an
11 **	ADDRESS header with the values squirreled away if necessary.
12 **	The "user" part may not be a real user; the process may
13 **	just reoccur on that machine.  For example, on a machine
14 **	with an arpanet connection, the address
15 **		csvax.bill@berkeley
16 **	will break up to a "user" of 'csvax.bill' and a host
17 **	of 'berkeley' -- to be transmitted over the arpanet.
18 **
19 **	Parameters:
20 **		addr -- the address to parse.
21 **		a -- a pointer to the address descriptor buffer.
22 **			If NULL, a header will be created.
23 **		copyf -- determines what shall be copied:
24 **			-1 -- don't copy anything.  The printname
25 **				(q_paddr) is just addr, and the
26 **				user & host are allocated internally
27 **				to parse.
28 **			0 -- copy out the parsed user & host, but
29 **				don't copy the printname.
30 **			+1 -- copy everything.
31 **
32 **	Returns:
33 **		A pointer to the address descriptor header (`a' if
34 **			`a' is non-NULL).
35 **		NULL on error.
36 **
37 **	Side Effects:
38 **		none
39 */
40 
41 # define DELIMCHARS	"$()<>,;\\\"\r\n"	/* word delimiters */
42 
43 ADDRESS *
44 parse(addr, a, copyf)
45 	char *addr;
46 	register ADDRESS *a;
47 	int copyf;
48 {
49 	register char **pvp;
50 	register struct mailer *m;
51 	extern char **prescan();
52 	extern ADDRESS *buildaddr();
53 
54 	/*
55 	**  Initialize and prescan address.
56 	*/
57 
58 	To = addr;
59 # ifdef DEBUG
60 	if (Debug)
61 		printf("\n--parse(%s)\n", addr);
62 # endif DEBUG
63 
64 	pvp = prescan(addr, '\0');
65 	if (pvp == NULL)
66 		return (NULL);
67 
68 	/*
69 	**  Apply rewriting rules.
70 	*/
71 
72 	rewrite(pvp, 0);
73 
74 	/*
75 	**  See if we resolved to a real mailer.
76 	*/
77 
78 	if (pvp[0][0] != CANONNET)
79 	{
80 		setstat(EX_USAGE);
81 		usrerr("cannot resolve name");
82 		return (NULL);
83 	}
84 
85 	/*
86 	**  Build canonical address from pvp.
87 	*/
88 
89 	a = buildaddr(pvp, a);
90 	if (a == NULL)
91 		return (NULL);
92 	m = Mailer[a->q_mailer];
93 
94 	/*
95 	**  Make local copies of the host & user and then
96 	**  transport them out.
97 	*/
98 
99 	if (copyf > 0)
100 		a->q_paddr = newstr(addr);
101 	else
102 		a->q_paddr = addr;
103 
104 	if (copyf >= 0)
105 	{
106 		if (a->q_host != NULL)
107 			a->q_host = newstr(a->q_host);
108 		else
109 			a->q_host = "";
110 		if (a->q_user != a->q_paddr)
111 			a->q_user = newstr(a->q_user);
112 	}
113 
114 	/*
115 	**  Do UPPER->lower case mapping unless inhibited.
116 	*/
117 
118 	if (!bitset(M_HST_UPPER, m->m_flags))
119 		makelower(a->q_host);
120 	if (!bitset(M_USR_UPPER, m->m_flags))
121 		makelower(a->q_user);
122 
123 	/*
124 	**  Compute return value.
125 	*/
126 
127 # ifdef DEBUG
128 	if (Debug)
129 	{
130 		printf("parse-->");
131 		printaddr(a, FALSE);
132 	}
133 # endif DEBUG
134 
135 	return (a);
136 }
137 /*
138 **  PRESCAN -- Prescan name and make it canonical
139 **
140 **	Scans a name and turns it into canonical form.  This involves
141 **	deleting blanks, comments (in parentheses), and turning the
142 **	word "at" into an at-sign ("@").  The name is copied as this
143 **	is done; it is legal to copy a name onto itself, since this
144 **	process can only make things smaller.
145 **
146 **	This routine knows about quoted strings and angle brackets.
147 **
148 **	There are certain subtleties to this routine.  The one that
149 **	comes to mind now is that backslashes on the ends of names
150 **	are silently stripped off; this is intentional.  The problem
151 **	is that some versions of sndmsg (like at LBL) set the kill
152 **	character to something other than @ when reading addresses;
153 **	so people type "csvax.eric\@berkeley" -- which screws up the
154 **	berknet mailer.
155 **
156 **	Parameters:
157 **		addr -- the name to chomp.
158 **		delim -- the delimiter for the address, normally
159 **			'\0' or ','; \0 is accepted in any case.
160 **			are moving in place; set buflim to high core.
161 **
162 **	Returns:
163 **		A pointer to a vector of tokens.
164 **		NULL on error.
165 **
166 **	Side Effects:
167 **		none.
168 */
169 
170 # define OPER		1
171 # define ATOM		2
172 # define EOTOK		3
173 # define QSTRING	4
174 # define SPACE		5
175 # define DOLLAR		6
176 # define GETONE		7
177 # define MACRO		8
178 
179 char **
180 prescan(addr, delim)
181 	char *addr;
182 	char delim;
183 {
184 	register char *p;
185 	static char buf[MAXNAME+MAXATOM];
186 	static char *av[MAXATOM+1];
187 	char **avp;
188 	bool bslashmode;
189 	int cmntcnt;
190 	int brccnt;
191 	register char c;
192 	char *tok;
193 	register char *q;
194 	register int state;
195 	int nstate;
196 	extern char lower();
197 
198 	q = buf;
199 	bslashmode = FALSE;
200 	cmntcnt = brccnt = 0;
201 	avp = av;
202 	state = OPER;
203 	for (p = addr; *p != '\0' && *p != delim; )
204 	{
205 		/* read a token */
206 		tok = q;
207 		while ((c = *p++) != '\0' && c != delim)
208 		{
209 			/* chew up special characters */
210 			c &= ~0200;
211 			*q = '\0';
212 			if (bslashmode)
213 			{
214 				c |= 0200;
215 				bslashmode = FALSE;
216 			}
217 			else if (c == '\\')
218 			{
219 				bslashmode = TRUE;
220 				continue;
221 			}
222 			else if (c == '"')
223 			{
224 				if (state == QSTRING)
225 					state = OPER;
226 				else
227 					state = QSTRING;
228 				break;
229 			}
230 
231 			if (c == '$' && delim == '\t')
232 				nstate = DOLLAR;
233 			else
234 				nstate = toktype(c);
235 			switch (state)
236 			{
237 			  case QSTRING:		/* in quoted string */
238 				break;
239 
240 			  case ATOM:		/* regular atom */
241 				if (nstate != ATOM)
242 				{
243 					state = EOTOK;
244 					p--;
245 				}
246 				break;
247 
248 			  case GETONE:		/* grab one character */
249 				state = OPER;
250 				break;
251 
252 			  case EOTOK:		/* after atom or q-string */
253 				state = nstate;
254 				if (state == SPACE)
255 					continue;
256 				break;
257 
258 			  case SPACE:		/* linear white space */
259 				state = nstate;
260 				break;
261 
262 			  case OPER:		/* operator */
263 				if (nstate == SPACE)
264 					continue;
265 				state = nstate;
266 				break;
267 
268 			  case DOLLAR:		/* $- etc. */
269 				state = OPER;
270 				if (isascii(c) && isdigit(c))
271 				{
272 					/* replacement */
273 					c = MATCHREPL;
274 					state = GETONE;
275 					p--;
276 					break;
277 				}
278 				switch (c)
279 				{
280 				  case '$':		/* literal $ */
281 					break;
282 
283 				  case '+':		/* match anything */
284 					c = MATCHANY;
285 					break;
286 
287 				  case '-':		/* match one token */
288 					c = MATCHONE;
289 					break;
290 
291 				  case '=':		/* match one token of class */
292 					c = MATCHCLASS;
293 					state = GETONE;
294 					break;
295 
296 				  case '#':		/* canonical net name */
297 					c = CANONNET;
298 					break;
299 
300 				  case '@':		/* canonical host name */
301 					c = CANONHOST;
302 					break;
303 
304 				  case ':':		/* canonical user name */
305 					c = CANONUSER;
306 					break;
307 
308 				  default:
309 					state = MACRO;
310 					break;
311 				}
312 				break;
313 
314 			  default:
315 				syserr("prescan: unknown state %d", state);
316 			}
317 
318 			if (state == EOTOK || state == SPACE)
319 				break;
320 			if (state == DOLLAR)
321 				continue;
322 
323 			/* squirrel it away */
324 			if (q >= &buf[sizeof buf - 5])
325 			{
326 				usrerr("Address too long");
327 				return (NULL);
328 			}
329 			if (state == MACRO)
330 			{
331 				char mbuf[3];
332 
333 				mbuf[0] = '$';
334 				mbuf[1] = c;
335 				mbuf[2] = '\0';
336 				(void) expand(mbuf, q, &buf[sizeof buf - 5]);
337 				q += strlen(q);
338 				state = EOTOK;
339 				break;
340 			}
341 			*q++ = c;
342 
343 			/* decide whether this represents end of token */
344 			if (state == OPER)
345 				break;
346 		}
347 		if (c == '\0' || c == delim)
348 			p--;
349 
350 		/* new token */
351 		if (tok == q)
352 			continue;
353 		*q++ = '\0';
354 
355 		c = tok[0];
356 		if (c == '(')
357 		{
358 			cmntcnt++;
359 			continue;
360 		}
361 		else if (c == ')')
362 		{
363 			if (cmntcnt <= 0)
364 			{
365 				usrerr("Unbalanced ')'");
366 				return (NULL);
367 			}
368 			else
369 			{
370 				cmntcnt--;
371 				continue;
372 			}
373 		}
374 		else if (cmntcnt > 0)
375 			continue;
376 
377 		/* we prefer <> specs */
378 		if (c == '<')
379 		{
380 			if (brccnt < 0)
381 			{
382 				usrerr("multiple < spec");
383 				return (NULL);
384 			}
385 			brccnt++;
386 			if (brccnt == 1)
387 			{
388 				/* we prefer using machine readable name */
389 				q = buf;
390 				*q = '\0';
391 				avp = av;
392 				continue;
393 			}
394 		}
395 		else if (c == '>')
396 		{
397 			if (brccnt <= 0)
398 			{
399 				usrerr("Unbalanced `>'");
400 				return (NULL);
401 			}
402 			else
403 				brccnt--;
404 			if (brccnt <= 0)
405 			{
406 				brccnt = -1;
407 				continue;
408 			}
409 		}
410 
411 		if (avp >= &av[MAXATOM])
412 		{
413 			syserr("prescan: too many tokens");
414 			return (NULL);
415 		}
416 		*avp++ = tok;
417 	}
418 	*avp = NULL;
419 	if (cmntcnt > 0)
420 		usrerr("Unbalanced '('");
421 	else if (brccnt > 0)
422 		usrerr("Unbalanced '<'");
423 	else if (state == QSTRING)
424 		usrerr("Unbalanced '\"'");
425 	else if (av[0] != NULL)
426 		return (av);
427 	return (NULL);
428 }
429 /*
430 **  TOKTYPE -- return token type
431 **
432 **	Parameters:
433 **		c -- the character in question.
434 **
435 **	Returns:
436 **		Its type.
437 **
438 **	Side Effects:
439 **		none.
440 */
441 
442 toktype(c)
443 	register char c;
444 {
445 	static char buf[50];
446 	static bool firstime = TRUE;
447 
448 	if (firstime)
449 	{
450 		firstime = FALSE;
451 		(void) expand("$o", buf, &buf[sizeof buf - 1]);
452 		strcat(buf, DELIMCHARS);
453 	}
454 	if (!isascii(c))
455 		return (ATOM);
456 	if (isspace(c))
457 		return (SPACE);
458 	if (iscntrl(c) || index(buf, c) != NULL)
459 		return (OPER);
460 	return (ATOM);
461 }
462 /*
463 **  REWRITE -- apply rewrite rules to token vector.
464 **
465 **	Parameters:
466 **		pvp -- pointer to token vector.
467 **
468 **	Returns:
469 **		none.
470 **
471 **	Side Effects:
472 **		pvp is modified.
473 */
474 
475 struct match
476 {
477 	char	**first;	/* first token matched */
478 	char	**last;		/* last token matched */
479 };
480 
481 # define MAXMATCH	9	/* max params per rewrite */
482 
483 
484 rewrite(pvp, ruleset)
485 	char **pvp;
486 	int ruleset;
487 {
488 	register char *ap;		/* address pointer */
489 	register char *rp;		/* rewrite pointer */
490 	register char **avp;		/* address vector pointer */
491 	char **avfp;			/* first word in current match */
492 	register char **rvp;		/* rewrite vector pointer */
493 	struct rewrite *rwr;		/* pointer to current rewrite rule */
494 	struct match mlist[MAXMATCH];	/* stores match on LHS */
495 	struct match *mlp;		/* cur ptr into mlist */
496 	char *npvp[MAXATOM+1];		/* temporary space for rebuild */
497 	extern bool sameword();
498 
499 # ifdef DEBUG
500 	if (Debug > 9)
501 	{
502 		printf("rewrite: original pvp:\n");
503 		printav(pvp);
504 	}
505 # endif DEBUG
506 
507 	/*
508 	**  Run through the list of rewrite rules, applying
509 	**	any that match.
510 	*/
511 
512 	for (rwr = RewriteRules[ruleset]; rwr != NULL; )
513 	{
514 # ifdef DEBUG
515 		if (Debug > 10)
516 		{
517 			printf("-----trying rule:\n");
518 			printav(rwr->r_lhs);
519 		}
520 # endif DEBUG
521 
522 		/* try to match on this rule */
523 		mlp = mlist;
524 		for (rvp = rwr->r_lhs, avfp = avp = pvp; *avp != NULL; )
525 		{
526 			ap = *avp;
527 			rp = *rvp;
528 
529 			if (rp == NULL)
530 			{
531 				/* end-of-pattern before end-of-address */
532 				goto fail;
533 			}
534 
535 			switch (*rp)
536 			{
537 				register STAB *s;
538 				register int class;
539 
540 			  case MATCHONE:
541 				/* match exactly one token */
542 				mlp->first = mlp->last = avp++;
543 				mlp++;
544 				avfp = avp;
545 				break;
546 
547 			  case MATCHANY:
548 				/* match any number of tokens */
549 				mlp->first = avfp;
550 				mlp->last = avp++;
551 				mlp++;
552 				break;
553 
554 			  case MATCHCLASS:
555 				/* match any token in a class */
556 				class = rp[1];
557 				if (!isalpha(class))
558 					goto fail;
559 				if (isupper(class))
560 					class -= 'A';
561 				else
562 					class -= 'a';
563 				s = stab(ap, ST_CLASS, ST_FIND);
564 				if (s == NULL || (s->s_class & (1 << class)) == 0)
565 					goto fail;
566 
567 				/* mark match */
568 				mlp->first = mlp->last = avp++;
569 				mlp++;
570 				avfp = avp;
571 				break;
572 
573 			  default:
574 				/* must have exact match */
575 				if (!sameword(rp, ap))
576 					goto fail;
577 				avp++;
578 				avfp = avp;
579 				break;
580 			}
581 
582 			/* successful match on this token */
583 			rvp++;
584 			continue;
585 
586 		  fail:
587 			/* match failed -- back up */
588 			while (--rvp >= rwr->r_lhs)
589 			{
590 				rp = *rvp;
591 				if (*rp == MATCHANY)
592 				{
593 					avfp = mlp->first;
594 					break;
595 				}
596 				else if (*rp == MATCHONE || *rp == MATCHCLASS)
597 				{
598 					/* back out binding */
599 					avp--;
600 					avfp = avp;
601 					mlp--;
602 				}
603 			}
604 
605 			if (rvp < rwr->r_lhs)
606 			{
607 				/* total failure to match */
608 				break;
609 			}
610 		}
611 
612 		/*
613 		**  See if we successfully matched
614 		*/
615 
616 		if (rvp >= rwr->r_lhs && *rvp == NULL)
617 		{
618 # ifdef DEBUG
619 			if (Debug > 10)
620 			{
621 				printf("-----rule matches:\n");
622 				printav(rwr->r_rhs);
623 			}
624 # endif DEBUG
625 
626 			/* substitute */
627 			for (rvp = rwr->r_rhs, avp = npvp; *rvp != NULL; rvp++)
628 			{
629 				rp = *rvp;
630 				if (*rp == MATCHREPL)
631 				{
632 					register struct match *m;
633 					register char **pp;
634 
635 					m = &mlist[rp[1] - '1'];
636 					pp = m->first;
637 					do
638 					{
639 						if (avp >= &npvp[MAXATOM])
640 						{
641 							syserr("rewrite: expansion too long");
642 							return;
643 						}
644 						*avp++ = *pp;
645 					} while (pp++ != m->last);
646 				}
647 				else
648 				{
649 					if (avp >= &npvp[MAXATOM])
650 					{
651 						syserr("rewrite: expansion too long");
652 						return;
653 					}
654 					*avp++ = rp;
655 				}
656 			}
657 			*avp++ = NULL;
658 			bmove((char *) npvp, (char *) pvp, (avp - npvp) * sizeof *avp);
659 # ifdef DEBUG
660 			if (Debug > 3)
661 			{
662 				char **vp;
663 
664 				printf("rewritten as `");
665 				for (vp = pvp; *vp != NULL; vp++)
666 				{
667 					if (vp != pvp)
668 						printf("_");
669 					xputs(*vp);
670 				}
671 				printf("'\n");
672 			}
673 # endif DEBUG
674 			if (pvp[0][0] == CANONNET)
675 				break;
676 		}
677 		else
678 		{
679 # ifdef DEBUG
680 			if (Debug > 10)
681 				printf("----- rule fails\n");
682 # endif DEBUG
683 			rwr = rwr->r_next;
684 		}
685 	}
686 }
687 /*
688 **  BUILDADDR -- build address from token vector.
689 **
690 **	Parameters:
691 **		tv -- token vector.
692 **		a -- pointer to address descriptor to fill.
693 **			If NULL, one will be allocated.
694 **
695 **	Returns:
696 **		NULL if there was an error.
697 **		'a' otherwise.
698 **
699 **	Side Effects:
700 **		fills in 'a'
701 */
702 
703 ADDRESS *
704 buildaddr(tv, a)
705 	register char **tv;
706 	register ADDRESS *a;
707 {
708 	register int i;
709 	static char buf[MAXNAME];
710 	struct mailer **mp;
711 	register struct mailer *m;
712 
713 	if (a == NULL)
714 		a = (ADDRESS *) xalloc(sizeof *a);
715 	a->q_flags = 0;
716 	a->q_home = NULL;
717 
718 	/* figure out what net/mailer to use */
719 	if (**tv != CANONNET)
720 	{
721 		syserr("buildaddr: no net");
722 		return (NULL);
723 	}
724 	tv++;
725 	if (strcmp(*tv, "error") == 0)
726 	{
727 		if (**++tv != CANONUSER)
728 			syserr("buildaddr: error: no user");
729 		buf[0] = '\0';
730 		while (*++tv != NULL)
731 		{
732 			if (buf[0] != '\0')
733 				strcat(buf, " ");
734 			strcat(buf, *tv);
735 		}
736 		usrerr(buf);
737 		return (NULL);
738 	}
739 	for (mp = Mailer, i = 0; (m = *mp++) != NULL; i++)
740 	{
741 		if (strcmp(m->m_name, *tv) == 0)
742 			break;
743 	}
744 	if (m == NULL)
745 	{
746 		syserr("buildaddr: unknown net %s", *tv);
747 		return (NULL);
748 	}
749 	a->q_mailer = i;
750 
751 	/* figure out what host (if any) */
752 	tv++;
753 	if (!bitset(M_LOCAL, m->m_flags))
754 	{
755 		if (**tv != CANONHOST)
756 		{
757 			syserr("buildaddr: no host");
758 			return (NULL);
759 		}
760 		tv++;
761 		a->q_host = *tv;
762 		tv++;
763 	}
764 	else
765 		a->q_host = NULL;
766 
767 	/* figure out the user */
768 	if (**tv != CANONUSER)
769 	{
770 		syserr("buildaddr: no user");
771 		return (NULL);
772 	}
773 	cataddr(++tv, buf, sizeof buf);
774 	a->q_user = buf;
775 
776 	return (a);
777 }
778 /*
779 **  CATADDR -- concatenate pieces of addresses (putting in <LWSP> subs)
780 **
781 **	Parameters:
782 **		pvp -- parameter vector to rebuild.
783 **		buf -- buffer to build the string into.
784 **		sz -- size of buf.
785 **
786 **	Returns:
787 **		none.
788 **
789 **	Side Effects:
790 **		Destroys buf.
791 */
792 
793 cataddr(pvp, buf, sz)
794 	char **pvp;
795 	char *buf;
796 	register int sz;
797 {
798 	bool oatomtok = FALSE;
799 	bool natomtok = FALSE;
800 	register int i;
801 	register char *p;
802 
803 	p = buf;
804 	sz--;
805 	while (*pvp != NULL && (i = strlen(*pvp)) < sz)
806 	{
807 		natomtok = (toktype(**pvp) == ATOM);
808 		if (oatomtok && natomtok)
809 			*p++ = SPACESUB;
810 		(void) strcpy(p, *pvp);
811 		oatomtok = natomtok;
812 		p += i;
813 		sz -= i;
814 		pvp++;
815 	}
816 	*p = '\0';
817 }
818 /*
819 **  SAMEADDR -- Determine if two addresses are the same
820 **
821 **	This is not just a straight comparison -- if the mailer doesn't
822 **	care about the host we just ignore it, etc.
823 **
824 **	Parameters:
825 **		a, b -- pointers to the internal forms to compare.
826 **		wildflg -- if TRUE, 'a' may have no user specified,
827 **			in which case it is to match anything.
828 **
829 **	Returns:
830 **		TRUE -- they represent the same mailbox.
831 **		FALSE -- they don't.
832 **
833 **	Side Effects:
834 **		none.
835 */
836 
837 bool
838 sameaddr(a, b, wildflg)
839 	register ADDRESS *a;
840 	register ADDRESS *b;
841 	bool wildflg;
842 {
843 	/* if they don't have the same mailer, forget it */
844 	if (a->q_mailer != b->q_mailer)
845 		return (FALSE);
846 
847 	/* if the user isn't the same, we can drop out */
848 	if ((!wildflg || a->q_user[0] != '\0') && strcmp(a->q_user, b->q_user) != 0)
849 		return (FALSE);
850 
851 	/* if the mailer ignores hosts, we have succeeded! */
852 	if (bitset(M_LOCAL, Mailer[a->q_mailer]->m_flags))
853 		return (TRUE);
854 
855 	/* otherwise compare hosts (but be careful for NULL ptrs) */
856 	if (a->q_host == NULL || b->q_host == NULL)
857 		return (FALSE);
858 	if (strcmp(a->q_host, b->q_host) != 0)
859 		return (FALSE);
860 
861 	return (TRUE);
862 }
863 /*
864 **  PRINTADDR -- print address (for debugging)
865 **
866 **	Parameters:
867 **		a -- the address to print
868 **		follow -- follow the q_next chain.
869 **
870 **	Returns:
871 **		none.
872 **
873 **	Side Effects:
874 **		none.
875 */
876 
877 # ifdef DEBUG
878 
879 printaddr(a, follow)
880 	register ADDRESS *a;
881 	bool follow;
882 {
883 	while (a != NULL)
884 	{
885 		printf("%x=", a);
886 		(void) fflush(stdout);
887 		printf("%s: mailer %d (%s), host `%s', user `%s'\n", a->q_paddr,
888 		       a->q_mailer, Mailer[a->q_mailer]->m_name, a->q_host, a->q_user);
889 		printf("\tnext=%x, flags=%o, rmailer %d\n", a->q_next,
890 		       a->q_flags, a->q_rmailer);
891 
892 		if (!follow)
893 			return;
894 		a = a->q_next;
895 	}
896 	if (!follow)
897 		printf("[NULL]\n");
898 }
899 
900 # endif DEBUG
901