1297Seric # include <stdio.h>
2297Seric # include <ctype.h>
32973Seric # include "postbox.h"
4297Seric 
5*2990Seric static char	SccsId[] = "@(#)parseaddr.c	3.3	03/07/81";
6407Seric 
7297Seric /*
8297Seric **  PARSE -- Parse an address
9297Seric **
10297Seric **	Parses an address and breaks it up into three parts: a
11297Seric **	net to transmit the message on, the host to transmit it
12297Seric **	to, and a user on that host.  These are loaded into an
132973Seric **	ADDRESS header with the values squirreled away if necessary.
14297Seric **	The "user" part may not be a real user; the process may
15297Seric **	just reoccur on that machine.  For example, on a machine
16297Seric **	with an arpanet connection, the address
17297Seric **		csvax.bill@berkeley
18297Seric **	will break up to a "user" of 'csvax.bill' and a host
19297Seric **	of 'berkeley' -- to be transmitted over the arpanet.
20297Seric **
21297Seric **	Parameters:
22297Seric **		addr -- the address to parse.
23297Seric **		a -- a pointer to the address descriptor buffer.
24297Seric **			If NULL, a header will be created.
25297Seric **		copyf -- determines what shall be copied:
26297Seric **			-1 -- don't copy anything.  The printname
27297Seric **				(q_paddr) is just addr, and the
28297Seric **				user & host are allocated internally
29297Seric **				to parse.
30297Seric **			0 -- copy out the parsed user & host, but
31297Seric **				don't copy the printname.
32297Seric **			+1 -- copy everything.
33297Seric **
34297Seric **	Returns:
35297Seric **		A pointer to the address descriptor header (`a' if
36297Seric **			`a' is non-NULL).
37297Seric **		NULL on error.
38297Seric **
39297Seric **	Side Effects:
40297Seric **		none
41297Seric **
42297Seric **	Called By:
43297Seric **		main
44297Seric **		sendto
45297Seric **		alias
46297Seric **		savemail
47297Seric */
48297Seric 
492096Seric # define DELIMCHARS	"()<>@!.,;:\\\" \t\r\n"	/* word delimiters */
502091Seric # define SPACESUB	('.'|0200)		/* substitution for <lwsp> */
512091Seric 
522973Seric ADDRESS *
53297Seric parse(addr, a, copyf)
54297Seric 	char *addr;
552973Seric 	register ADDRESS *a;
56297Seric 	int copyf;
57297Seric {
58297Seric 	register char *p;
59297Seric 	register struct parsetab *t;
60297Seric 	extern struct parsetab ParseTab[];
61297Seric 	static char buf[MAXNAME];
62297Seric 	register char c;
63297Seric 	register char *q;
64297Seric 	bool got_one;
65297Seric 	extern char *prescan();
66297Seric 	extern char *xalloc();
672973Seric 	extern char *newstr();
681516Seric 	char **pvp;
69*2990Seric 	extern char *strcpy();
70297Seric 
71297Seric 	/*
72297Seric 	**  Initialize and prescan address.
73297Seric 	*/
74297Seric 
75297Seric 	To = addr;
76297Seric 	if (prescan(addr, buf, &buf[sizeof buf], '\0') == NULL)
77297Seric 		return (NULL);
78297Seric 
79297Seric 	/*
80297Seric 	**  Scan parse table.
81297Seric 	**	Look for the first entry designating a character
82297Seric 	**		that is contained in the address.
83297Seric 	**	Arrange for q to point to that character.
84297Seric 	**	Check to see that there is only one of the char
85297Seric 	**		if it must be unique.
86297Seric 	**	Find the last one if the host is on the RHS.
87297Seric 	**	Insist that the host name is atomic.
88297Seric 	**	If just doing a map, do the map and then start all
89297Seric 	**		over.
90297Seric 	*/
91297Seric 
92297Seric  rescan:
93297Seric 	got_one = FALSE;
94297Seric 	for (t = ParseTab; t->p_char != '\0'; t++)
95297Seric 	{
96297Seric 		q = NULL;
97297Seric 		for (p = buf; (c = *p) != '\0'; p++)
98297Seric 		{
99297Seric 			/* find the end of this token */
100297Seric 			while (isalnum(c) || c == '-' || c == '_')
101297Seric 				c = *++p;
102297Seric 			if (c == '\0')
103297Seric 				break;
104297Seric 
105297Seric 			if (c == t->p_char)
106297Seric 			{
107297Seric 				got_one = TRUE;
108297Seric 
109297Seric 				/* do mapping as appropriate */
1102973Seric 				if (bitset(P_MAP, t->p_flags))
111297Seric 				{
112297Seric 					*p = t->p_arg[0];
1132973Seric 					if (bitset(P_ONE, t->p_flags))
114297Seric 						goto rescan;
115297Seric 					else
116297Seric 						continue;
117297Seric 				}
118297Seric 
119297Seric 				/* arrange for q to point to it */
1202973Seric 				if (q != NULL && bitset(P_ONE, t->p_flags))
121297Seric 				{
122297Seric 					usrerr("multichar error");
123297Seric 					ExitStat = EX_USAGE;
124297Seric 					return (NULL);
125297Seric 				}
1262973Seric 				if (q == NULL || bitset(P_HLAST, t->p_flags))
127297Seric 					q = p;
128297Seric 			}
129297Seric 			else
130297Seric 			{
131297Seric 				/* insist that host name is atomic */
1322973Seric 				if (bitset(P_HLAST, t->p_flags))
133297Seric 					q = NULL;
134297Seric 				else
135297Seric 					break;
136297Seric 			}
137297Seric 		}
138297Seric 
139297Seric 		if (q != NULL)
140297Seric 			break;
141297Seric 	}
142297Seric 
143297Seric 	/*
144297Seric 	**  If we matched nothing cleanly, but we did match something
145297Seric 	**  somewhere in the process of scanning, then we have a
146297Seric 	**  syntax error.  This can happen on things like a@b:c where
147297Seric 	**  @ has a right host and : has a left host.
148297Seric 	**
149297Seric 	**  We also set `q' to the null string, in case someone forgets
150297Seric 	**  to put the P_MOVE bit in the local mailer entry of the
151297Seric 	**  configuration table.
152297Seric 	*/
153297Seric 
154297Seric 	if (q == NULL)
155297Seric 	{
156297Seric 		q = "";
157297Seric 		if (got_one)
158297Seric 		{
159297Seric 			usrerr("syntax error");
160297Seric 			ExitStat = EX_USAGE;
161297Seric 			return (NULL);
162297Seric 		}
163297Seric 	}
164297Seric 
165297Seric 	/*
166297Seric 	**  Interpret entry.
167297Seric 	**	t points to the entry for the mailer we will use.
168297Seric 	**	q points to the significant character.
169297Seric 	*/
170297Seric 
171297Seric 	if (a == NULL)
1722973Seric 		a = (ADDRESS *) xalloc(sizeof *a);
173297Seric 	if (copyf > 0)
1742973Seric 		a->q_paddr = newstr(addr);
175297Seric 	else
176297Seric 		a->q_paddr = addr;
1772984Seric 	a->q_rmailer = t->p_mailer;
178297Seric 	a->q_mailer = &Mailer[t->p_mailer];
179297Seric 
1802973Seric 	if (bitset(P_MOVE, t->p_flags))
181297Seric 	{
182297Seric 		/* send the message to another host & retry */
183297Seric 		a->q_host = t->p_arg;
184297Seric 		if (copyf >= 0)
1852973Seric 			a->q_user = newstr(buf);
186297Seric 		else
187297Seric 			a->q_user = buf;
188297Seric 	}
189297Seric 	else
190297Seric 	{
191297Seric 		/*
192297Seric 		**  Make local copies of the host & user and then
193297Seric 		**  transport them out.
194297Seric 		*/
195297Seric 
196297Seric 		*q++ = '\0';
1972973Seric 		if (bitset(P_HLAST, t->p_flags))
198297Seric 		{
199297Seric 			a->q_host = q;
200297Seric 			a->q_user = buf;
201297Seric 		}
202297Seric 		else
203297Seric 		{
204297Seric 			a->q_host = buf;
205297Seric 			a->q_user = q;
206297Seric 		}
2071516Seric 
2081516Seric 		/*
2091516Seric 		**  Don't go to the net if already on the target host.
2101516Seric 		**	This is important on the berkeley network, since
2111516Seric 		**	it get confused if we ask to send to ourselves.
2121516Seric 		**	For nets like the ARPANET, we probably will have
2131516Seric 		**	the local list set to NULL to simplify testing.
2141516Seric 		**	The canonical representation of the name is also set
2151516Seric 		**	to be just the local name so the duplicate letter
2161516Seric 		**	suppression algorithm will work.
2171516Seric 		*/
2181516Seric 
2191516Seric 		if ((pvp = a->q_mailer->m_local) != NULL)
2201516Seric 		{
2211516Seric 			while (*pvp != NULL)
2221516Seric 			{
2231516Seric 				auto char buf2[MAXNAME];
2241516Seric 
2251516Seric 				strcpy(buf2, a->q_host);
2262973Seric 				if (!bitset(P_HST_UPPER, t->p_flags))
2271516Seric 					makelower(buf2);
2281516Seric 				if (strcmp(*pvp++, buf2) == 0)
2291516Seric 				{
2301516Seric 					strcpy(buf2, a->q_user);
2311516Seric 					p = a->q_paddr;
2321516Seric 					if (parse(buf2, a, -1) == NULL)
2331516Seric 					{
2341516Seric 						To = addr;
2351516Seric 						return (NULL);
2361516Seric 					}
2371516Seric 					To = a->q_paddr = p;
2381516Seric 					break;
2391516Seric 				}
2401516Seric 			}
2411516Seric 		}
2421516Seric 
2431516Seric 		/* make copies if specified */
244297Seric 		if (copyf >= 0)
245297Seric 		{
2462973Seric 			a->q_host = newstr(a->q_host);
2472973Seric 			a->q_user = newstr(a->q_user);
248297Seric 		}
249297Seric 	}
250297Seric 
251297Seric 	/*
252297Seric 	**  Do UPPER->lower case mapping unless inhibited.
253297Seric 	*/
254297Seric 
2552973Seric 	if (!bitset(P_HST_UPPER, t->p_flags))
256297Seric 		makelower(a->q_host);
2572973Seric 	if (!bitset(P_USR_UPPER, t->p_flags))
258297Seric 		makelower(a->q_user);
259297Seric 
260297Seric 	/*
261297Seric 	**  Compute return value.
262297Seric 	*/
263297Seric 
264297Seric # ifdef DEBUG
2651583Seric 	if (Debug)
266297Seric 		printf("parse(\"%s\"): host \"%s\" user \"%s\" mailer %d\n",
267297Seric 		    addr, a->q_host, a->q_user, t->p_mailer);
268297Seric # endif DEBUG
269297Seric 
270297Seric 	return (a);
271297Seric }
272297Seric /*
273297Seric **  MAKELOWER -- Translate a line into lower case
274297Seric **
275297Seric **	Parameters:
276297Seric **		p -- the string to translate.  If NULL, return is
277297Seric **			immediate.
278297Seric **
279297Seric **	Returns:
280297Seric **		none.
281297Seric **
282297Seric **	Side Effects:
283297Seric **		String pointed to by p is translated to lower case.
284297Seric **
285297Seric **	Called By:
286297Seric **		parse
287297Seric */
288297Seric 
289297Seric makelower(p)
290297Seric 	register char *p;
291297Seric {
292297Seric 	register char c;
293297Seric 
294297Seric 	if (p == NULL)
295297Seric 		return;
296297Seric 	for (; (c = *p) != '\0'; p++)
297297Seric 		if ((c & 0200) == 0 && isupper(c))
298297Seric 			*p = c - 'A' + 'a';
299297Seric }
300297Seric /*
301297Seric **  PRESCAN -- Prescan name and make it canonical
302297Seric **
303297Seric **	Scans a name and turns it into canonical form.  This involves
304297Seric **	deleting blanks, comments (in parentheses), and turning the
305297Seric **	word "at" into an at-sign ("@").  The name is copied as this
306297Seric **	is done; it is legal to copy a name onto itself, since this
307297Seric **	process can only make things smaller.
308297Seric **
309297Seric **	This routine knows about quoted strings and angle brackets.
310297Seric **
311297Seric **	There are certain subtleties to this routine.  The one that
312297Seric **	comes to mind now is that backslashes on the ends of names
313297Seric **	are silently stripped off; this is intentional.  The problem
314297Seric **	is that some versions of sndmsg (like at LBL) set the kill
315297Seric **	character to something other than @ when reading addresses;
316297Seric **	so people type "csvax.eric\@berkeley" -- which screws up the
317297Seric **	berknet mailer.
318297Seric **
319297Seric **	Parameters:
320297Seric **		addr -- the name to chomp.
321297Seric **		buf -- the buffer to copy it into.
322297Seric **		buflim -- the last usable address in the buffer
323297Seric **			(which will old a null byte).  Normally
324297Seric **			&buf[sizeof buf - 1].
325297Seric **		delim -- the delimiter for the address, normally
326297Seric **			'\0' or ','; \0 is accepted in any case.
327297Seric **			are moving in place; set buflim to high core.
328297Seric **
329297Seric **	Returns:
330297Seric **		A pointer to the terminator of buf.
331297Seric **		NULL on error.
332297Seric **
333297Seric **	Side Effects:
334297Seric **		buf gets clobbered.
335297Seric **
336297Seric **	Called By:
337297Seric **		parse
338297Seric **		maketemp
339297Seric */
340297Seric 
341297Seric char *
342297Seric prescan(addr, buf, buflim, delim)
343297Seric 	char *addr;
344297Seric 	char *buf;
345297Seric 	char *buflim;
346297Seric 	char delim;
347297Seric {
348297Seric 	register char *p;
349297Seric 	bool space;
350297Seric 	bool quotemode;
351297Seric 	bool bslashmode;
3522091Seric 	bool delimmode;
353297Seric 	int cmntcnt;
354297Seric 	int brccnt;
355297Seric 	register char c;
356297Seric 	register char *q;
3572973Seric 	extern char *index();
358297Seric 
3592091Seric 	space = FALSE;
3602091Seric 	delimmode = TRUE;
361297Seric 	q = buf;
362297Seric 	bslashmode = quotemode = FALSE;
363297Seric 	cmntcnt = brccnt = 0;
3641585Seric 	for (p = addr; (c = *p++) != '\0'; )
365297Seric 	{
366297Seric 		/* chew up special characters */
367297Seric 		*q = '\0';
368297Seric 		if (bslashmode)
369297Seric 		{
370297Seric 			c |= 0200;
3711585Seric 			bslashmode = FALSE;
372297Seric 		}
373297Seric 		else if (c == '"')
374297Seric 			quotemode = !quotemode;
375297Seric 		else if (c == '\\')
376297Seric 		{
377297Seric 			bslashmode++;
378297Seric 			continue;
379297Seric 		}
380297Seric 		else if (quotemode)
381297Seric 			c |= 0200;
382297Seric 		else if (c == delim)
383297Seric 			break;
384297Seric 		else if (c == '(')
3851378Seric 		{
386297Seric 			cmntcnt++;
3871378Seric 			continue;
3881378Seric 		}
389297Seric 		else if (c == ')')
390297Seric 		{
391297Seric 			if (cmntcnt <= 0)
392297Seric 			{
393297Seric 				usrerr("Unbalanced ')'");
394297Seric 				return (NULL);
395297Seric 			}
396297Seric 			else
397297Seric 			{
398297Seric 				cmntcnt--;
399297Seric 				continue;
400297Seric 			}
401297Seric 		}
4022091Seric 		if (cmntcnt > 0)
4032091Seric 			continue;
4042091Seric 		else if (isascii(c) && isspace(c) && (space || delimmode))
4052091Seric 			continue;
406297Seric 		else if (c == '<')
407297Seric 		{
4082092Seric 			if (brccnt < 0)
4092092Seric 			{
4102092Seric 				usrerr("multiple < spec");
4112092Seric 				return (NULL);
4122092Seric 			}
413297Seric 			brccnt++;
4142091Seric 			delimmode = TRUE;
4152091Seric 			space = FALSE;
416297Seric 			if (brccnt == 1)
417297Seric 			{
418297Seric 				/* we prefer using machine readable name */
419297Seric 				q = buf;
420297Seric 				*q = '\0';
421297Seric 				continue;
422297Seric 			}
423297Seric 		}
424297Seric 		else if (c == '>')
425297Seric 		{
426297Seric 			if (brccnt <= 0)
427297Seric 			{
428297Seric 				usrerr("Unbalanced `>'");
429297Seric 				return (NULL);
430297Seric 			}
431297Seric 			else
432297Seric 				brccnt--;
433297Seric 			if (brccnt <= 0)
4342092Seric 			{
4352092Seric 				brccnt = -1;
436297Seric 				continue;
4372092Seric 			}
438297Seric 		}
439297Seric 
440297Seric 		/*
441297Seric 		**  Turn "at" into "@",
4421378Seric 		**	but only if "at" is a word.
443297Seric 		**	By the way, I violate the ARPANET RFC-733
444297Seric 		**	standard here, by assuming that 'space' delimits
445297Seric 		**	atoms.  I assume that is just a mistake, since
446297Seric 		**	it violates the spirit of the semantics
447297Seric 		**	of the document.....
448297Seric 		*/
449297Seric 
4502091Seric 		if (delimmode && (c == 'a' || c == 'A') &&
451297Seric 		    (p[0] == 't' || p[0] == 'T') &&
4522973Seric 		    (index(DELIMCHARS, p[1]) != NULL || p[1] <= 040))
453297Seric 		{
454297Seric 			c = '@';
455297Seric 			p++;
456297Seric 		}
457297Seric 
4582973Seric 		if (delimmode = (index(DELIMCHARS, c) != NULL))
4592091Seric 			space = FALSE;
4602091Seric 
4612092Seric 		/* if not a space, squirrel it away */
4622092Seric 		if ((!isascii(c) || !isspace(c)) && brccnt >= 0)
463297Seric 		{
4642091Seric 			if (q >= buflim-1)
465297Seric 			{
466297Seric 				usrerr("Address too long");
467297Seric 				return (NULL);
468297Seric 			}
4692091Seric 			if (space)
4702091Seric 				*q++ = SPACESUB;
471297Seric 			*q++ = c;
472297Seric 		}
4732094Seric 		space = isascii(c) && isspace(c);
474297Seric 	}
475297Seric 	*q = '\0';
476297Seric 	if (c == '\0')
477297Seric 		p--;
478297Seric 	if (cmntcnt > 0)
479297Seric 		usrerr("Unbalanced '('");
480297Seric 	else if (quotemode)
481297Seric 		usrerr("Unbalanced '\"'");
482297Seric 	else if (brccnt > 0)
483297Seric 		usrerr("Unbalanced '<'");
484297Seric 	else if (buf[0] != '\0')
485297Seric 		return (p);
486297Seric 	return (NULL);
487297Seric }
488