1*297Seric # include <stdio.h>
2*297Seric # include <ctype.h>
3*297Seric # include "dlvrmail.h"
4*297Seric 
5*297Seric /*
6*297Seric **  PARSE -- Parse an address
7*297Seric **
8*297Seric **	Parses an address and breaks it up into three parts: a
9*297Seric **	net to transmit the message on, the host to transmit it
10*297Seric **	to, and a user on that host.  These are loaded into an
11*297Seric **	addrq header with the values squirreled away if necessary.
12*297Seric **	The "user" part may not be a real user; the process may
13*297Seric **	just reoccur on that machine.  For example, on a machine
14*297Seric **	with an arpanet connection, the address
15*297Seric **		csvax.bill@berkeley
16*297Seric **	will break up to a "user" of 'csvax.bill' and a host
17*297Seric **	of 'berkeley' -- to be transmitted over the arpanet.
18*297Seric **
19*297Seric **	Parameters:
20*297Seric **		addr -- the address to parse.
21*297Seric **		a -- a pointer to the address descriptor buffer.
22*297Seric **			If NULL, a header will be created.
23*297Seric **		copyf -- determines what shall be copied:
24*297Seric **			-1 -- don't copy anything.  The printname
25*297Seric **				(q_paddr) is just addr, and the
26*297Seric **				user & host are allocated internally
27*297Seric **				to parse.
28*297Seric **			0 -- copy out the parsed user & host, but
29*297Seric **				don't copy the printname.
30*297Seric **			+1 -- copy everything.
31*297Seric **
32*297Seric **	Returns:
33*297Seric **		A pointer to the address descriptor header (`a' if
34*297Seric **			`a' is non-NULL).
35*297Seric **		NULL on error.
36*297Seric **
37*297Seric **	Side Effects:
38*297Seric **		none
39*297Seric **
40*297Seric **	Defined Constants:
41*297Seric **		none
42*297Seric **
43*297Seric **	Requires:
44*297Seric **		usrerr
45*297Seric **		strcpy (sys)
46*297Seric **		isalpha (sys)
47*297Seric **		xalloc
48*297Seric **		prescan
49*297Seric **		flagset
50*297Seric **		makelower
51*297Seric **		printf (sys)
52*297Seric **		ParseTab -- the parse table.
53*297Seric **
54*297Seric **	Called By:
55*297Seric **		main
56*297Seric **		sendto
57*297Seric **		alias
58*297Seric **		savemail
59*297Seric **
60*297Seric **	History:
61*297Seric **		12/26/79 -- written.
62*297Seric */
63*297Seric 
64*297Seric addrq *
65*297Seric parse(addr, a, copyf)
66*297Seric 	char *addr;
67*297Seric 	register addrq *a;
68*297Seric 	int copyf;
69*297Seric {
70*297Seric 	register char *p;
71*297Seric 	register struct parsetab *t;
72*297Seric 	extern struct parsetab ParseTab[];
73*297Seric 	static char buf[MAXNAME];
74*297Seric 	register char c;
75*297Seric 	register char *q;
76*297Seric 	bool got_one;
77*297Seric 	extern char *prescan();
78*297Seric 	extern char *xalloc();
79*297Seric 
80*297Seric 	/*
81*297Seric 	**  Initialize and prescan address.
82*297Seric 	*/
83*297Seric 
84*297Seric 	To = addr;
85*297Seric 	if (prescan(addr, buf, &buf[sizeof buf], '\0') == NULL)
86*297Seric 		return (NULL);
87*297Seric 
88*297Seric 	/*
89*297Seric 	**  Scan parse table.
90*297Seric 	**	Look for the first entry designating a character
91*297Seric 	**		that is contained in the address.
92*297Seric 	**	Arrange for q to point to that character.
93*297Seric 	**	Check to see that there is only one of the char
94*297Seric 	**		if it must be unique.
95*297Seric 	**	Find the last one if the host is on the RHS.
96*297Seric 	**	Insist that the host name is atomic.
97*297Seric 	**	If just doing a map, do the map and then start all
98*297Seric 	**		over.
99*297Seric 	*/
100*297Seric 
101*297Seric  rescan:
102*297Seric 	got_one = FALSE;
103*297Seric 	for (t = ParseTab; t->p_char != '\0'; t++)
104*297Seric 	{
105*297Seric 		q = NULL;
106*297Seric 		for (p = buf; (c = *p) != '\0'; p++)
107*297Seric 		{
108*297Seric 			/* find the end of this token */
109*297Seric 			while (isalnum(c) || c == '-' || c == '_')
110*297Seric 				c = *++p;
111*297Seric 			if (c == '\0')
112*297Seric 				break;
113*297Seric 
114*297Seric 			if (c == t->p_char)
115*297Seric 			{
116*297Seric 				got_one = TRUE;
117*297Seric 
118*297Seric 				/* do mapping as appropriate */
119*297Seric 				if (flagset(P_MAP, t->p_flags))
120*297Seric 				{
121*297Seric 					*p = t->p_arg[0];
122*297Seric 					if (flagset(P_ONE, t->p_flags))
123*297Seric 						goto rescan;
124*297Seric 					else
125*297Seric 						continue;
126*297Seric 				}
127*297Seric 
128*297Seric 				/* arrange for q to point to it */
129*297Seric 				if (q != NULL && flagset(P_ONE, t->p_flags))
130*297Seric 				{
131*297Seric 					usrerr("multichar error");
132*297Seric 					ExitStat = EX_USAGE;
133*297Seric 					return (NULL);
134*297Seric 				}
135*297Seric 				if (q == NULL || flagset(P_HLAST, t->p_flags))
136*297Seric 					q = p;
137*297Seric 			}
138*297Seric 			else
139*297Seric 			{
140*297Seric 				/* insist that host name is atomic */
141*297Seric 				if (flagset(P_HLAST, t->p_flags))
142*297Seric 					q = NULL;
143*297Seric 				else
144*297Seric 					break;
145*297Seric 			}
146*297Seric 		}
147*297Seric 
148*297Seric 		if (q != NULL)
149*297Seric 			break;
150*297Seric 	}
151*297Seric 
152*297Seric 	/*
153*297Seric 	**  If we matched nothing cleanly, but we did match something
154*297Seric 	**  somewhere in the process of scanning, then we have a
155*297Seric 	**  syntax error.  This can happen on things like a@b:c where
156*297Seric 	**  @ has a right host and : has a left host.
157*297Seric 	**
158*297Seric 	**  We also set `q' to the null string, in case someone forgets
159*297Seric 	**  to put the P_MOVE bit in the local mailer entry of the
160*297Seric 	**  configuration table.
161*297Seric 	*/
162*297Seric 
163*297Seric 	if (q == NULL)
164*297Seric 	{
165*297Seric 		q = "";
166*297Seric 		if (got_one)
167*297Seric 		{
168*297Seric 			usrerr("syntax error");
169*297Seric 			ExitStat = EX_USAGE;
170*297Seric 			return (NULL);
171*297Seric 		}
172*297Seric 	}
173*297Seric 
174*297Seric 	/*
175*297Seric 	**  Interpret entry.
176*297Seric 	**	t points to the entry for the mailer we will use.
177*297Seric 	**	q points to the significant character.
178*297Seric 	*/
179*297Seric 
180*297Seric 	if (a == NULL)
181*297Seric 		a = (addrq *) xalloc(sizeof *a);
182*297Seric 	if (copyf > 0)
183*297Seric 	{
184*297Seric 		p = xalloc((unsigned) strlen(addr) + 1);
185*297Seric 		strcpy(p, addr);
186*297Seric 		a->q_paddr = p;
187*297Seric 	}
188*297Seric 	else
189*297Seric 		a->q_paddr = addr;
190*297Seric 	a->q_mailer = &Mailer[t->p_mailer];
191*297Seric 
192*297Seric 	if (flagset(P_MOVE, t->p_flags))
193*297Seric 	{
194*297Seric 		/* send the message to another host & retry */
195*297Seric 		a->q_host = t->p_arg;
196*297Seric 		if (copyf >= 0)
197*297Seric 		{
198*297Seric 			p = xalloc((unsigned) strlen(buf) + 1);
199*297Seric 			strcpy(p, buf);
200*297Seric 			a->q_user = p;
201*297Seric 		}
202*297Seric 		else
203*297Seric 			a->q_user = buf;
204*297Seric 	}
205*297Seric 	else
206*297Seric 	{
207*297Seric 		/*
208*297Seric 		**  Make local copies of the host & user and then
209*297Seric 		**  transport them out.
210*297Seric 		*/
211*297Seric 
212*297Seric 		*q++ = '\0';
213*297Seric 		if (flagset(P_HLAST, t->p_flags))
214*297Seric 		{
215*297Seric 			a->q_host = q;
216*297Seric 			a->q_user = buf;
217*297Seric 		}
218*297Seric 		else
219*297Seric 		{
220*297Seric 			a->q_host = buf;
221*297Seric 			a->q_user = q;
222*297Seric 		}
223*297Seric 		if (copyf >= 0)
224*297Seric 		{
225*297Seric 			p = xalloc((unsigned) strlen(a->q_host) + 1);
226*297Seric 			strcpy(p, a->q_host);
227*297Seric 			a->q_host = p;
228*297Seric 			p = xalloc((unsigned) strlen(a->q_user) + 1);
229*297Seric 			strcpy(p, a->q_user);
230*297Seric 			a->q_user = p;
231*297Seric 		}
232*297Seric 	}
233*297Seric 
234*297Seric 	/*
235*297Seric 	**  Do UPPER->lower case mapping unless inhibited.
236*297Seric 	*/
237*297Seric 
238*297Seric 	if (!flagset(P_HST_UPPER, t->p_flags))
239*297Seric 		makelower(a->q_host);
240*297Seric 	if (!flagset(P_USR_UPPER, t->p_flags))
241*297Seric 		makelower(a->q_user);
242*297Seric 
243*297Seric 	/*
244*297Seric 	**  Compute return value.
245*297Seric 	*/
246*297Seric 
247*297Seric # ifdef DEBUG
248*297Seric 	if (Debug && copyf >= 0)
249*297Seric 		printf("parse(\"%s\"): host \"%s\" user \"%s\" mailer %d\n",
250*297Seric 		    addr, a->q_host, a->q_user, t->p_mailer);
251*297Seric # endif DEBUG
252*297Seric 
253*297Seric 	return (a);
254*297Seric }
255*297Seric /*
256*297Seric **  MAKELOWER -- Translate a line into lower case
257*297Seric **
258*297Seric **	Parameters:
259*297Seric **		p -- the string to translate.  If NULL, return is
260*297Seric **			immediate.
261*297Seric **
262*297Seric **	Returns:
263*297Seric **		none.
264*297Seric **
265*297Seric **	Side Effects:
266*297Seric **		String pointed to by p is translated to lower case.
267*297Seric **
268*297Seric **	Requires:
269*297Seric **		isupper (sys)
270*297Seric **
271*297Seric **	Called By:
272*297Seric **		parse
273*297Seric **
274*297Seric **	History:
275*297Seric **		12/26/79 -- written.
276*297Seric */
277*297Seric 
278*297Seric makelower(p)
279*297Seric 	register char *p;
280*297Seric {
281*297Seric 	register char c;
282*297Seric 
283*297Seric 	if (p == NULL)
284*297Seric 		return;
285*297Seric 	for (; (c = *p) != '\0'; p++)
286*297Seric 		if ((c & 0200) == 0 && isupper(c))
287*297Seric 			*p = c - 'A' + 'a';
288*297Seric }
289*297Seric /*
290*297Seric **  PRESCAN -- Prescan name and make it canonical
291*297Seric **
292*297Seric **	Scans a name and turns it into canonical form.  This involves
293*297Seric **	deleting blanks, comments (in parentheses), and turning the
294*297Seric **	word "at" into an at-sign ("@").  The name is copied as this
295*297Seric **	is done; it is legal to copy a name onto itself, since this
296*297Seric **	process can only make things smaller.
297*297Seric **
298*297Seric **	This routine knows about quoted strings and angle brackets.
299*297Seric **
300*297Seric **	There are certain subtleties to this routine.  The one that
301*297Seric **	comes to mind now is that backslashes on the ends of names
302*297Seric **	are silently stripped off; this is intentional.  The problem
303*297Seric **	is that some versions of sndmsg (like at LBL) set the kill
304*297Seric **	character to something other than @ when reading addresses;
305*297Seric **	so people type "csvax.eric\@berkeley" -- which screws up the
306*297Seric **	berknet mailer.
307*297Seric **
308*297Seric **	Parameters:
309*297Seric **		addr -- the name to chomp.
310*297Seric **		buf -- the buffer to copy it into.
311*297Seric **		buflim -- the last usable address in the buffer
312*297Seric **			(which will old a null byte).  Normally
313*297Seric **			&buf[sizeof buf - 1].
314*297Seric **		delim -- the delimiter for the address, normally
315*297Seric **			'\0' or ','; \0 is accepted in any case.
316*297Seric **			are moving in place; set buflim to high core.
317*297Seric **
318*297Seric **	Returns:
319*297Seric **		A pointer to the terminator of buf.
320*297Seric **		NULL on error.
321*297Seric **
322*297Seric **	Side Effects:
323*297Seric **		buf gets clobbered.
324*297Seric **
325*297Seric **	Requires:
326*297Seric **		isspace (sys)
327*297Seric **		any
328*297Seric **		usrerr
329*297Seric **
330*297Seric **	Called By:
331*297Seric **		parse
332*297Seric **		maketemp
333*297Seric **
334*297Seric **	History:
335*297Seric **		12/30/79 -- broken from parse; comment processing
336*297Seric **			added.
337*297Seric */
338*297Seric 
339*297Seric char *
340*297Seric prescan(addr, buf, buflim, delim)
341*297Seric 	char *addr;
342*297Seric 	char *buf;
343*297Seric 	char *buflim;
344*297Seric 	char delim;
345*297Seric {
346*297Seric 	register char *p;
347*297Seric 	bool space;
348*297Seric 	bool quotemode;
349*297Seric 	bool bslashmode;
350*297Seric 	int cmntcnt;
351*297Seric 	int brccnt;
352*297Seric 	register char c;
353*297Seric 	register char *q;
354*297Seric 	extern bool any();
355*297Seric 
356*297Seric 	space = TRUE;
357*297Seric 	q = buf;
358*297Seric 	bslashmode = quotemode = FALSE;
359*297Seric 	cmntcnt = brccnt = 0;
360*297Seric 	for (p = addr; (c = *p++ & 0177) != '\0'; )
361*297Seric 	{
362*297Seric 		/* chew up special characters */
363*297Seric 		*q = '\0';
364*297Seric 		if (bslashmode)
365*297Seric 		{
366*297Seric 			c |= 0200;
367*297Seric 			bslashmode == FALSE;
368*297Seric 		}
369*297Seric 		else if (c == '"')
370*297Seric 			quotemode = !quotemode;
371*297Seric 		else if (c == '\\')
372*297Seric 		{
373*297Seric 			bslashmode++;
374*297Seric 			continue;
375*297Seric 		}
376*297Seric 		else if (quotemode)
377*297Seric 			c |= 0200;
378*297Seric 		else if (c == delim)
379*297Seric 			break;
380*297Seric 		else if (c == '(')
381*297Seric 			cmntcnt++;
382*297Seric 		else if (c == ')')
383*297Seric 		{
384*297Seric 			if (cmntcnt <= 0)
385*297Seric 			{
386*297Seric 				usrerr("Unbalanced ')'");
387*297Seric 				return (NULL);
388*297Seric 			}
389*297Seric 			else
390*297Seric 			{
391*297Seric 				cmntcnt--;
392*297Seric 				continue;
393*297Seric 			}
394*297Seric 		}
395*297Seric 		if (cmntcnt > 0)
396*297Seric 			continue;
397*297Seric 		else if (c == '<')
398*297Seric 		{
399*297Seric 			brccnt++;
400*297Seric 			if (brccnt == 1)
401*297Seric 			{
402*297Seric 				/* we prefer using machine readable name */
403*297Seric 				q = buf;
404*297Seric 				*q = '\0';
405*297Seric 				continue;
406*297Seric 			}
407*297Seric 		}
408*297Seric 		else if (c == '>')
409*297Seric 		{
410*297Seric 			if (brccnt <= 0)
411*297Seric 			{
412*297Seric 				usrerr("Unbalanced `>'");
413*297Seric 				return (NULL);
414*297Seric 			}
415*297Seric 			else
416*297Seric 				brccnt--;
417*297Seric 			if (brccnt <= 0)
418*297Seric 				continue;
419*297Seric 		}
420*297Seric 
421*297Seric 		/*
422*297Seric 		**  Turn "at" into "@",
423*297Seric 		**	but only if "at" is a word in and to itself.
424*297Seric 		**	By the way, I violate the ARPANET RFC-733
425*297Seric 		**	standard here, by assuming that 'space' delimits
426*297Seric 		**	atoms.  I assume that is just a mistake, since
427*297Seric 		**	it violates the spirit of the semantics
428*297Seric 		**	of the document.....
429*297Seric 		*/
430*297Seric 
431*297Seric 		if (space && (c == 'a' || c == 'A') &&
432*297Seric 		    (p[0] == 't' || p[0] == 'T') &&
433*297Seric 		    (any(p[1], "()<>@,;:\\\"") || p[1] <= 040))
434*297Seric 		{
435*297Seric 			c = '@';
436*297Seric 			p++;
437*297Seric 		}
438*297Seric 
439*297Seric 		/* skip blanks */
440*297Seric 		if (((c & 0200) != 0 || !isspace(c)) && cmntcnt <= 0)
441*297Seric 		{
442*297Seric 			if (q >= buflim)
443*297Seric 			{
444*297Seric 				usrerr("Address too long");
445*297Seric 				return (NULL);
446*297Seric 			}
447*297Seric 			*q++ = c;
448*297Seric 		}
449*297Seric 		space = isspace(c);
450*297Seric 	}
451*297Seric 	*q = '\0';
452*297Seric 	if (c == '\0')
453*297Seric 		p--;
454*297Seric 	if (cmntcnt > 0)
455*297Seric 		usrerr("Unbalanced '('");
456*297Seric 	else if (quotemode)
457*297Seric 		usrerr("Unbalanced '\"'");
458*297Seric 	else if (brccnt > 0)
459*297Seric 		usrerr("Unbalanced '<'");
460*297Seric 	else if (buf[0] != '\0')
461*297Seric 		return (p);
462*297Seric 	return (NULL);
463*297Seric }
464