1 # include <stdio.h>
2 # include <ctype.h>
3 # include "dlvrmail.h"
4 
5 static char	SccsId[] = "@(#)parseaddr.c	1.2	07/25/80";
6 
7 /*
8 **  PARSE -- Parse an address
9 **
10 **	Parses an address and breaks it up into three parts: a
11 **	net to transmit the message on, the host to transmit it
12 **	to, and a user on that host.  These are loaded into an
13 **	addrq header with the values squirreled away if necessary.
14 **	The "user" part may not be a real user; the process may
15 **	just reoccur on that machine.  For example, on a machine
16 **	with an arpanet connection, the address
17 **		csvax.bill@berkeley
18 **	will break up to a "user" of 'csvax.bill' and a host
19 **	of 'berkeley' -- to be transmitted over the arpanet.
20 **
21 **	Parameters:
22 **		addr -- the address to parse.
23 **		a -- a pointer to the address descriptor buffer.
24 **			If NULL, a header will be created.
25 **		copyf -- determines what shall be copied:
26 **			-1 -- don't copy anything.  The printname
27 **				(q_paddr) is just addr, and the
28 **				user & host are allocated internally
29 **				to parse.
30 **			0 -- copy out the parsed user & host, but
31 **				don't copy the printname.
32 **			+1 -- copy everything.
33 **
34 **	Returns:
35 **		A pointer to the address descriptor header (`a' if
36 **			`a' is non-NULL).
37 **		NULL on error.
38 **
39 **	Side Effects:
40 **		none
41 **
42 **	Defined Constants:
43 **		none
44 **
45 **	Requires:
46 **		usrerr
47 **		strcpy (sys)
48 **		isalpha (sys)
49 **		xalloc
50 **		prescan
51 **		flagset
52 **		makelower
53 **		printf (sys)
54 **		ParseTab -- the parse table.
55 **
56 **	Called By:
57 **		main
58 **		sendto
59 **		alias
60 **		savemail
61 **
62 **	History:
63 **		12/26/79 -- written.
64 */
65 
66 addrq *
67 parse(addr, a, copyf)
68 	char *addr;
69 	register addrq *a;
70 	int copyf;
71 {
72 	register char *p;
73 	register struct parsetab *t;
74 	extern struct parsetab ParseTab[];
75 	static char buf[MAXNAME];
76 	register char c;
77 	register char *q;
78 	bool got_one;
79 	extern char *prescan();
80 	extern char *xalloc();
81 
82 	/*
83 	**  Initialize and prescan address.
84 	*/
85 
86 	To = addr;
87 	if (prescan(addr, buf, &buf[sizeof buf], '\0') == NULL)
88 		return (NULL);
89 
90 	/*
91 	**  Scan parse table.
92 	**	Look for the first entry designating a character
93 	**		that is contained in the address.
94 	**	Arrange for q to point to that character.
95 	**	Check to see that there is only one of the char
96 	**		if it must be unique.
97 	**	Find the last one if the host is on the RHS.
98 	**	Insist that the host name is atomic.
99 	**	If just doing a map, do the map and then start all
100 	**		over.
101 	*/
102 
103  rescan:
104 	got_one = FALSE;
105 	for (t = ParseTab; t->p_char != '\0'; t++)
106 	{
107 		q = NULL;
108 		for (p = buf; (c = *p) != '\0'; p++)
109 		{
110 			/* find the end of this token */
111 			while (isalnum(c) || c == '-' || c == '_')
112 				c = *++p;
113 			if (c == '\0')
114 				break;
115 
116 			if (c == t->p_char)
117 			{
118 				got_one = TRUE;
119 
120 				/* do mapping as appropriate */
121 				if (flagset(P_MAP, t->p_flags))
122 				{
123 					*p = t->p_arg[0];
124 					if (flagset(P_ONE, t->p_flags))
125 						goto rescan;
126 					else
127 						continue;
128 				}
129 
130 				/* arrange for q to point to it */
131 				if (q != NULL && flagset(P_ONE, t->p_flags))
132 				{
133 					usrerr("multichar error");
134 					ExitStat = EX_USAGE;
135 					return (NULL);
136 				}
137 				if (q == NULL || flagset(P_HLAST, t->p_flags))
138 					q = p;
139 			}
140 			else
141 			{
142 				/* insist that host name is atomic */
143 				if (flagset(P_HLAST, t->p_flags))
144 					q = NULL;
145 				else
146 					break;
147 			}
148 		}
149 
150 		if (q != NULL)
151 			break;
152 	}
153 
154 	/*
155 	**  If we matched nothing cleanly, but we did match something
156 	**  somewhere in the process of scanning, then we have a
157 	**  syntax error.  This can happen on things like a@b:c where
158 	**  @ has a right host and : has a left host.
159 	**
160 	**  We also set `q' to the null string, in case someone forgets
161 	**  to put the P_MOVE bit in the local mailer entry of the
162 	**  configuration table.
163 	*/
164 
165 	if (q == NULL)
166 	{
167 		q = "";
168 		if (got_one)
169 		{
170 			usrerr("syntax error");
171 			ExitStat = EX_USAGE;
172 			return (NULL);
173 		}
174 	}
175 
176 	/*
177 	**  Interpret entry.
178 	**	t points to the entry for the mailer we will use.
179 	**	q points to the significant character.
180 	*/
181 
182 	if (a == NULL)
183 		a = (addrq *) xalloc(sizeof *a);
184 	if (copyf > 0)
185 	{
186 		p = xalloc((unsigned) strlen(addr) + 1);
187 		strcpy(p, addr);
188 		a->q_paddr = p;
189 	}
190 	else
191 		a->q_paddr = addr;
192 	a->q_mailer = &Mailer[t->p_mailer];
193 
194 	if (flagset(P_MOVE, t->p_flags))
195 	{
196 		/* send the message to another host & retry */
197 		a->q_host = t->p_arg;
198 		if (copyf >= 0)
199 		{
200 			p = xalloc((unsigned) strlen(buf) + 1);
201 			strcpy(p, buf);
202 			a->q_user = p;
203 		}
204 		else
205 			a->q_user = buf;
206 	}
207 	else
208 	{
209 		/*
210 		**  Make local copies of the host & user and then
211 		**  transport them out.
212 		*/
213 
214 		*q++ = '\0';
215 		if (flagset(P_HLAST, t->p_flags))
216 		{
217 			a->q_host = q;
218 			a->q_user = buf;
219 		}
220 		else
221 		{
222 			a->q_host = buf;
223 			a->q_user = q;
224 		}
225 		if (copyf >= 0)
226 		{
227 			p = xalloc((unsigned) strlen(a->q_host) + 1);
228 			strcpy(p, a->q_host);
229 			a->q_host = p;
230 			p = xalloc((unsigned) strlen(a->q_user) + 1);
231 			strcpy(p, a->q_user);
232 			a->q_user = p;
233 		}
234 	}
235 
236 	/*
237 	**  Do UPPER->lower case mapping unless inhibited.
238 	*/
239 
240 	if (!flagset(P_HST_UPPER, t->p_flags))
241 		makelower(a->q_host);
242 	if (!flagset(P_USR_UPPER, t->p_flags))
243 		makelower(a->q_user);
244 
245 	/*
246 	**  Compute return value.
247 	*/
248 
249 # ifdef DEBUG
250 	if (Debug && copyf >= 0)
251 		printf("parse(\"%s\"): host \"%s\" user \"%s\" mailer %d\n",
252 		    addr, a->q_host, a->q_user, t->p_mailer);
253 # endif DEBUG
254 
255 	return (a);
256 }
257 /*
258 **  MAKELOWER -- Translate a line into lower case
259 **
260 **	Parameters:
261 **		p -- the string to translate.  If NULL, return is
262 **			immediate.
263 **
264 **	Returns:
265 **		none.
266 **
267 **	Side Effects:
268 **		String pointed to by p is translated to lower case.
269 **
270 **	Requires:
271 **		isupper (sys)
272 **
273 **	Called By:
274 **		parse
275 **
276 **	History:
277 **		12/26/79 -- written.
278 */
279 
280 makelower(p)
281 	register char *p;
282 {
283 	register char c;
284 
285 	if (p == NULL)
286 		return;
287 	for (; (c = *p) != '\0'; p++)
288 		if ((c & 0200) == 0 && isupper(c))
289 			*p = c - 'A' + 'a';
290 }
291 /*
292 **  PRESCAN -- Prescan name and make it canonical
293 **
294 **	Scans a name and turns it into canonical form.  This involves
295 **	deleting blanks, comments (in parentheses), and turning the
296 **	word "at" into an at-sign ("@").  The name is copied as this
297 **	is done; it is legal to copy a name onto itself, since this
298 **	process can only make things smaller.
299 **
300 **	This routine knows about quoted strings and angle brackets.
301 **
302 **	There are certain subtleties to this routine.  The one that
303 **	comes to mind now is that backslashes on the ends of names
304 **	are silently stripped off; this is intentional.  The problem
305 **	is that some versions of sndmsg (like at LBL) set the kill
306 **	character to something other than @ when reading addresses;
307 **	so people type "csvax.eric\@berkeley" -- which screws up the
308 **	berknet mailer.
309 **
310 **	Parameters:
311 **		addr -- the name to chomp.
312 **		buf -- the buffer to copy it into.
313 **		buflim -- the last usable address in the buffer
314 **			(which will old a null byte).  Normally
315 **			&buf[sizeof buf - 1].
316 **		delim -- the delimiter for the address, normally
317 **			'\0' or ','; \0 is accepted in any case.
318 **			are moving in place; set buflim to high core.
319 **
320 **	Returns:
321 **		A pointer to the terminator of buf.
322 **		NULL on error.
323 **
324 **	Side Effects:
325 **		buf gets clobbered.
326 **
327 **	Requires:
328 **		isspace (sys)
329 **		any
330 **		usrerr
331 **
332 **	Called By:
333 **		parse
334 **		maketemp
335 **
336 **	History:
337 **		12/30/79 -- broken from parse; comment processing
338 **			added.
339 */
340 
341 char *
342 prescan(addr, buf, buflim, delim)
343 	char *addr;
344 	char *buf;
345 	char *buflim;
346 	char delim;
347 {
348 	register char *p;
349 	bool space;
350 	bool quotemode;
351 	bool bslashmode;
352 	int cmntcnt;
353 	int brccnt;
354 	register char c;
355 	register char *q;
356 	extern bool any();
357 
358 	space = TRUE;
359 	q = buf;
360 	bslashmode = quotemode = FALSE;
361 	cmntcnt = brccnt = 0;
362 	for (p = addr; (c = *p++ & 0177) != '\0'; )
363 	{
364 		/* chew up special characters */
365 		*q = '\0';
366 		if (bslashmode)
367 		{
368 			c |= 0200;
369 			bslashmode == FALSE;
370 		}
371 		else if (c == '"')
372 			quotemode = !quotemode;
373 		else if (c == '\\')
374 		{
375 			bslashmode++;
376 			continue;
377 		}
378 		else if (quotemode)
379 			c |= 0200;
380 		else if (c == delim)
381 			break;
382 		else if (c == '(')
383 			cmntcnt++;
384 		else if (c == ')')
385 		{
386 			if (cmntcnt <= 0)
387 			{
388 				usrerr("Unbalanced ')'");
389 				return (NULL);
390 			}
391 			else
392 			{
393 				cmntcnt--;
394 				continue;
395 			}
396 		}
397 		if (cmntcnt > 0)
398 			continue;
399 		else if (c == '<')
400 		{
401 			brccnt++;
402 			if (brccnt == 1)
403 			{
404 				/* we prefer using machine readable name */
405 				q = buf;
406 				*q = '\0';
407 				continue;
408 			}
409 		}
410 		else if (c == '>')
411 		{
412 			if (brccnt <= 0)
413 			{
414 				usrerr("Unbalanced `>'");
415 				return (NULL);
416 			}
417 			else
418 				brccnt--;
419 			if (brccnt <= 0)
420 				continue;
421 		}
422 
423 		/*
424 		**  Turn "at" into "@",
425 		**	but only if "at" is a word in and to itself.
426 		**	By the way, I violate the ARPANET RFC-733
427 		**	standard here, by assuming that 'space' delimits
428 		**	atoms.  I assume that is just a mistake, since
429 		**	it violates the spirit of the semantics
430 		**	of the document.....
431 		*/
432 
433 		if (space && (c == 'a' || c == 'A') &&
434 		    (p[0] == 't' || p[0] == 'T') &&
435 		    (any(p[1], "()<>@,;:\\\"") || p[1] <= 040))
436 		{
437 			c = '@';
438 			p++;
439 		}
440 
441 		/* skip blanks */
442 		if (((c & 0200) != 0 || !isspace(c)) && cmntcnt <= 0)
443 		{
444 			if (q >= buflim)
445 			{
446 				usrerr("Address too long");
447 				return (NULL);
448 			}
449 			*q++ = c;
450 		}
451 		space = isspace(c);
452 	}
453 	*q = '\0';
454 	if (c == '\0')
455 		p--;
456 	if (cmntcnt > 0)
457 		usrerr("Unbalanced '('");
458 	else if (quotemode)
459 		usrerr("Unbalanced '\"'");
460 	else if (brccnt > 0)
461 		usrerr("Unbalanced '<'");
462 	else if (buf[0] != '\0')
463 		return (p);
464 	return (NULL);
465 }
466