1297Seric # include <stdio.h>
2297Seric # include <ctype.h>
3297Seric # include "dlvrmail.h"
4297Seric 
5*407Seric static char	SccsId[] = "@(#)parseaddr.c	1.2	07/25/80";
6*407Seric 
7297Seric /*
8297Seric **  PARSE -- Parse an address
9297Seric **
10297Seric **	Parses an address and breaks it up into three parts: a
11297Seric **	net to transmit the message on, the host to transmit it
12297Seric **	to, and a user on that host.  These are loaded into an
13297Seric **	addrq header with the values squirreled away if necessary.
14297Seric **	The "user" part may not be a real user; the process may
15297Seric **	just reoccur on that machine.  For example, on a machine
16297Seric **	with an arpanet connection, the address
17297Seric **		csvax.bill@berkeley
18297Seric **	will break up to a "user" of 'csvax.bill' and a host
19297Seric **	of 'berkeley' -- to be transmitted over the arpanet.
20297Seric **
21297Seric **	Parameters:
22297Seric **		addr -- the address to parse.
23297Seric **		a -- a pointer to the address descriptor buffer.
24297Seric **			If NULL, a header will be created.
25297Seric **		copyf -- determines what shall be copied:
26297Seric **			-1 -- don't copy anything.  The printname
27297Seric **				(q_paddr) is just addr, and the
28297Seric **				user & host are allocated internally
29297Seric **				to parse.
30297Seric **			0 -- copy out the parsed user & host, but
31297Seric **				don't copy the printname.
32297Seric **			+1 -- copy everything.
33297Seric **
34297Seric **	Returns:
35297Seric **		A pointer to the address descriptor header (`a' if
36297Seric **			`a' is non-NULL).
37297Seric **		NULL on error.
38297Seric **
39297Seric **	Side Effects:
40297Seric **		none
41297Seric **
42297Seric **	Defined Constants:
43297Seric **		none
44297Seric **
45297Seric **	Requires:
46297Seric **		usrerr
47297Seric **		strcpy (sys)
48297Seric **		isalpha (sys)
49297Seric **		xalloc
50297Seric **		prescan
51297Seric **		flagset
52297Seric **		makelower
53297Seric **		printf (sys)
54297Seric **		ParseTab -- the parse table.
55297Seric **
56297Seric **	Called By:
57297Seric **		main
58297Seric **		sendto
59297Seric **		alias
60297Seric **		savemail
61297Seric **
62297Seric **	History:
63297Seric **		12/26/79 -- written.
64297Seric */
65297Seric 
66297Seric addrq *
67297Seric parse(addr, a, copyf)
68297Seric 	char *addr;
69297Seric 	register addrq *a;
70297Seric 	int copyf;
71297Seric {
72297Seric 	register char *p;
73297Seric 	register struct parsetab *t;
74297Seric 	extern struct parsetab ParseTab[];
75297Seric 	static char buf[MAXNAME];
76297Seric 	register char c;
77297Seric 	register char *q;
78297Seric 	bool got_one;
79297Seric 	extern char *prescan();
80297Seric 	extern char *xalloc();
81297Seric 
82297Seric 	/*
83297Seric 	**  Initialize and prescan address.
84297Seric 	*/
85297Seric 
86297Seric 	To = addr;
87297Seric 	if (prescan(addr, buf, &buf[sizeof buf], '\0') == NULL)
88297Seric 		return (NULL);
89297Seric 
90297Seric 	/*
91297Seric 	**  Scan parse table.
92297Seric 	**	Look for the first entry designating a character
93297Seric 	**		that is contained in the address.
94297Seric 	**	Arrange for q to point to that character.
95297Seric 	**	Check to see that there is only one of the char
96297Seric 	**		if it must be unique.
97297Seric 	**	Find the last one if the host is on the RHS.
98297Seric 	**	Insist that the host name is atomic.
99297Seric 	**	If just doing a map, do the map and then start all
100297Seric 	**		over.
101297Seric 	*/
102297Seric 
103297Seric  rescan:
104297Seric 	got_one = FALSE;
105297Seric 	for (t = ParseTab; t->p_char != '\0'; t++)
106297Seric 	{
107297Seric 		q = NULL;
108297Seric 		for (p = buf; (c = *p) != '\0'; p++)
109297Seric 		{
110297Seric 			/* find the end of this token */
111297Seric 			while (isalnum(c) || c == '-' || c == '_')
112297Seric 				c = *++p;
113297Seric 			if (c == '\0')
114297Seric 				break;
115297Seric 
116297Seric 			if (c == t->p_char)
117297Seric 			{
118297Seric 				got_one = TRUE;
119297Seric 
120297Seric 				/* do mapping as appropriate */
121297Seric 				if (flagset(P_MAP, t->p_flags))
122297Seric 				{
123297Seric 					*p = t->p_arg[0];
124297Seric 					if (flagset(P_ONE, t->p_flags))
125297Seric 						goto rescan;
126297Seric 					else
127297Seric 						continue;
128297Seric 				}
129297Seric 
130297Seric 				/* arrange for q to point to it */
131297Seric 				if (q != NULL && flagset(P_ONE, t->p_flags))
132297Seric 				{
133297Seric 					usrerr("multichar error");
134297Seric 					ExitStat = EX_USAGE;
135297Seric 					return (NULL);
136297Seric 				}
137297Seric 				if (q == NULL || flagset(P_HLAST, t->p_flags))
138297Seric 					q = p;
139297Seric 			}
140297Seric 			else
141297Seric 			{
142297Seric 				/* insist that host name is atomic */
143297Seric 				if (flagset(P_HLAST, t->p_flags))
144297Seric 					q = NULL;
145297Seric 				else
146297Seric 					break;
147297Seric 			}
148297Seric 		}
149297Seric 
150297Seric 		if (q != NULL)
151297Seric 			break;
152297Seric 	}
153297Seric 
154297Seric 	/*
155297Seric 	**  If we matched nothing cleanly, but we did match something
156297Seric 	**  somewhere in the process of scanning, then we have a
157297Seric 	**  syntax error.  This can happen on things like a@b:c where
158297Seric 	**  @ has a right host and : has a left host.
159297Seric 	**
160297Seric 	**  We also set `q' to the null string, in case someone forgets
161297Seric 	**  to put the P_MOVE bit in the local mailer entry of the
162297Seric 	**  configuration table.
163297Seric 	*/
164297Seric 
165297Seric 	if (q == NULL)
166297Seric 	{
167297Seric 		q = "";
168297Seric 		if (got_one)
169297Seric 		{
170297Seric 			usrerr("syntax error");
171297Seric 			ExitStat = EX_USAGE;
172297Seric 			return (NULL);
173297Seric 		}
174297Seric 	}
175297Seric 
176297Seric 	/*
177297Seric 	**  Interpret entry.
178297Seric 	**	t points to the entry for the mailer we will use.
179297Seric 	**	q points to the significant character.
180297Seric 	*/
181297Seric 
182297Seric 	if (a == NULL)
183297Seric 		a = (addrq *) xalloc(sizeof *a);
184297Seric 	if (copyf > 0)
185297Seric 	{
186297Seric 		p = xalloc((unsigned) strlen(addr) + 1);
187297Seric 		strcpy(p, addr);
188297Seric 		a->q_paddr = p;
189297Seric 	}
190297Seric 	else
191297Seric 		a->q_paddr = addr;
192297Seric 	a->q_mailer = &Mailer[t->p_mailer];
193297Seric 
194297Seric 	if (flagset(P_MOVE, t->p_flags))
195297Seric 	{
196297Seric 		/* send the message to another host & retry */
197297Seric 		a->q_host = t->p_arg;
198297Seric 		if (copyf >= 0)
199297Seric 		{
200297Seric 			p = xalloc((unsigned) strlen(buf) + 1);
201297Seric 			strcpy(p, buf);
202297Seric 			a->q_user = p;
203297Seric 		}
204297Seric 		else
205297Seric 			a->q_user = buf;
206297Seric 	}
207297Seric 	else
208297Seric 	{
209297Seric 		/*
210297Seric 		**  Make local copies of the host & user and then
211297Seric 		**  transport them out.
212297Seric 		*/
213297Seric 
214297Seric 		*q++ = '\0';
215297Seric 		if (flagset(P_HLAST, t->p_flags))
216297Seric 		{
217297Seric 			a->q_host = q;
218297Seric 			a->q_user = buf;
219297Seric 		}
220297Seric 		else
221297Seric 		{
222297Seric 			a->q_host = buf;
223297Seric 			a->q_user = q;
224297Seric 		}
225297Seric 		if (copyf >= 0)
226297Seric 		{
227297Seric 			p = xalloc((unsigned) strlen(a->q_host) + 1);
228297Seric 			strcpy(p, a->q_host);
229297Seric 			a->q_host = p;
230297Seric 			p = xalloc((unsigned) strlen(a->q_user) + 1);
231297Seric 			strcpy(p, a->q_user);
232297Seric 			a->q_user = p;
233297Seric 		}
234297Seric 	}
235297Seric 
236297Seric 	/*
237297Seric 	**  Do UPPER->lower case mapping unless inhibited.
238297Seric 	*/
239297Seric 
240297Seric 	if (!flagset(P_HST_UPPER, t->p_flags))
241297Seric 		makelower(a->q_host);
242297Seric 	if (!flagset(P_USR_UPPER, t->p_flags))
243297Seric 		makelower(a->q_user);
244297Seric 
245297Seric 	/*
246297Seric 	**  Compute return value.
247297Seric 	*/
248297Seric 
249297Seric # ifdef DEBUG
250297Seric 	if (Debug && copyf >= 0)
251297Seric 		printf("parse(\"%s\"): host \"%s\" user \"%s\" mailer %d\n",
252297Seric 		    addr, a->q_host, a->q_user, t->p_mailer);
253297Seric # endif DEBUG
254297Seric 
255297Seric 	return (a);
256297Seric }
257297Seric /*
258297Seric **  MAKELOWER -- Translate a line into lower case
259297Seric **
260297Seric **	Parameters:
261297Seric **		p -- the string to translate.  If NULL, return is
262297Seric **			immediate.
263297Seric **
264297Seric **	Returns:
265297Seric **		none.
266297Seric **
267297Seric **	Side Effects:
268297Seric **		String pointed to by p is translated to lower case.
269297Seric **
270297Seric **	Requires:
271297Seric **		isupper (sys)
272297Seric **
273297Seric **	Called By:
274297Seric **		parse
275297Seric **
276297Seric **	History:
277297Seric **		12/26/79 -- written.
278297Seric */
279297Seric 
280297Seric makelower(p)
281297Seric 	register char *p;
282297Seric {
283297Seric 	register char c;
284297Seric 
285297Seric 	if (p == NULL)
286297Seric 		return;
287297Seric 	for (; (c = *p) != '\0'; p++)
288297Seric 		if ((c & 0200) == 0 && isupper(c))
289297Seric 			*p = c - 'A' + 'a';
290297Seric }
291297Seric /*
292297Seric **  PRESCAN -- Prescan name and make it canonical
293297Seric **
294297Seric **	Scans a name and turns it into canonical form.  This involves
295297Seric **	deleting blanks, comments (in parentheses), and turning the
296297Seric **	word "at" into an at-sign ("@").  The name is copied as this
297297Seric **	is done; it is legal to copy a name onto itself, since this
298297Seric **	process can only make things smaller.
299297Seric **
300297Seric **	This routine knows about quoted strings and angle brackets.
301297Seric **
302297Seric **	There are certain subtleties to this routine.  The one that
303297Seric **	comes to mind now is that backslashes on the ends of names
304297Seric **	are silently stripped off; this is intentional.  The problem
305297Seric **	is that some versions of sndmsg (like at LBL) set the kill
306297Seric **	character to something other than @ when reading addresses;
307297Seric **	so people type "csvax.eric\@berkeley" -- which screws up the
308297Seric **	berknet mailer.
309297Seric **
310297Seric **	Parameters:
311297Seric **		addr -- the name to chomp.
312297Seric **		buf -- the buffer to copy it into.
313297Seric **		buflim -- the last usable address in the buffer
314297Seric **			(which will old a null byte).  Normally
315297Seric **			&buf[sizeof buf - 1].
316297Seric **		delim -- the delimiter for the address, normally
317297Seric **			'\0' or ','; \0 is accepted in any case.
318297Seric **			are moving in place; set buflim to high core.
319297Seric **
320297Seric **	Returns:
321297Seric **		A pointer to the terminator of buf.
322297Seric **		NULL on error.
323297Seric **
324297Seric **	Side Effects:
325297Seric **		buf gets clobbered.
326297Seric **
327297Seric **	Requires:
328297Seric **		isspace (sys)
329297Seric **		any
330297Seric **		usrerr
331297Seric **
332297Seric **	Called By:
333297Seric **		parse
334297Seric **		maketemp
335297Seric **
336297Seric **	History:
337297Seric **		12/30/79 -- broken from parse; comment processing
338297Seric **			added.
339297Seric */
340297Seric 
341297Seric char *
342297Seric prescan(addr, buf, buflim, delim)
343297Seric 	char *addr;
344297Seric 	char *buf;
345297Seric 	char *buflim;
346297Seric 	char delim;
347297Seric {
348297Seric 	register char *p;
349297Seric 	bool space;
350297Seric 	bool quotemode;
351297Seric 	bool bslashmode;
352297Seric 	int cmntcnt;
353297Seric 	int brccnt;
354297Seric 	register char c;
355297Seric 	register char *q;
356297Seric 	extern bool any();
357297Seric 
358297Seric 	space = TRUE;
359297Seric 	q = buf;
360297Seric 	bslashmode = quotemode = FALSE;
361297Seric 	cmntcnt = brccnt = 0;
362297Seric 	for (p = addr; (c = *p++ & 0177) != '\0'; )
363297Seric 	{
364297Seric 		/* chew up special characters */
365297Seric 		*q = '\0';
366297Seric 		if (bslashmode)
367297Seric 		{
368297Seric 			c |= 0200;
369297Seric 			bslashmode == FALSE;
370297Seric 		}
371297Seric 		else if (c == '"')
372297Seric 			quotemode = !quotemode;
373297Seric 		else if (c == '\\')
374297Seric 		{
375297Seric 			bslashmode++;
376297Seric 			continue;
377297Seric 		}
378297Seric 		else if (quotemode)
379297Seric 			c |= 0200;
380297Seric 		else if (c == delim)
381297Seric 			break;
382297Seric 		else if (c == '(')
383297Seric 			cmntcnt++;
384297Seric 		else if (c == ')')
385297Seric 		{
386297Seric 			if (cmntcnt <= 0)
387297Seric 			{
388297Seric 				usrerr("Unbalanced ')'");
389297Seric 				return (NULL);
390297Seric 			}
391297Seric 			else
392297Seric 			{
393297Seric 				cmntcnt--;
394297Seric 				continue;
395297Seric 			}
396297Seric 		}
397297Seric 		if (cmntcnt > 0)
398297Seric 			continue;
399297Seric 		else if (c == '<')
400297Seric 		{
401297Seric 			brccnt++;
402297Seric 			if (brccnt == 1)
403297Seric 			{
404297Seric 				/* we prefer using machine readable name */
405297Seric 				q = buf;
406297Seric 				*q = '\0';
407297Seric 				continue;
408297Seric 			}
409297Seric 		}
410297Seric 		else if (c == '>')
411297Seric 		{
412297Seric 			if (brccnt <= 0)
413297Seric 			{
414297Seric 				usrerr("Unbalanced `>'");
415297Seric 				return (NULL);
416297Seric 			}
417297Seric 			else
418297Seric 				brccnt--;
419297Seric 			if (brccnt <= 0)
420297Seric 				continue;
421297Seric 		}
422297Seric 
423297Seric 		/*
424297Seric 		**  Turn "at" into "@",
425297Seric 		**	but only if "at" is a word in and to itself.
426297Seric 		**	By the way, I violate the ARPANET RFC-733
427297Seric 		**	standard here, by assuming that 'space' delimits
428297Seric 		**	atoms.  I assume that is just a mistake, since
429297Seric 		**	it violates the spirit of the semantics
430297Seric 		**	of the document.....
431297Seric 		*/
432297Seric 
433297Seric 		if (space && (c == 'a' || c == 'A') &&
434297Seric 		    (p[0] == 't' || p[0] == 'T') &&
435297Seric 		    (any(p[1], "()<>@,;:\\\"") || p[1] <= 040))
436297Seric 		{
437297Seric 			c = '@';
438297Seric 			p++;
439297Seric 		}
440297Seric 
441297Seric 		/* skip blanks */
442297Seric 		if (((c & 0200) != 0 || !isspace(c)) && cmntcnt <= 0)
443297Seric 		{
444297Seric 			if (q >= buflim)
445297Seric 			{
446297Seric 				usrerr("Address too long");
447297Seric 				return (NULL);
448297Seric 			}
449297Seric 			*q++ = c;
450297Seric 		}
451297Seric 		space = isspace(c);
452297Seric 	}
453297Seric 	*q = '\0';
454297Seric 	if (c == '\0')
455297Seric 		p--;
456297Seric 	if (cmntcnt > 0)
457297Seric 		usrerr("Unbalanced '('");
458297Seric 	else if (quotemode)
459297Seric 		usrerr("Unbalanced '\"'");
460297Seric 	else if (brccnt > 0)
461297Seric 		usrerr("Unbalanced '<'");
462297Seric 	else if (buf[0] != '\0')
463297Seric 		return (p);
464297Seric 	return (NULL);
465297Seric }
466