1 # include <stdio.h>
2 # include <ctype.h>
3 # include "dlvrmail.h"
4 
5 static char	SccsId[] = "@(#)parseaddr.c	2.3	01/08/81";
6 
7 /*
8 **  PARSE -- Parse an address
9 **
10 **	Parses an address and breaks it up into three parts: a
11 **	net to transmit the message on, the host to transmit it
12 **	to, and a user on that host.  These are loaded into an
13 **	addrq header with the values squirreled away if necessary.
14 **	The "user" part may not be a real user; the process may
15 **	just reoccur on that machine.  For example, on a machine
16 **	with an arpanet connection, the address
17 **		csvax.bill@berkeley
18 **	will break up to a "user" of 'csvax.bill' and a host
19 **	of 'berkeley' -- to be transmitted over the arpanet.
20 **
21 **	Parameters:
22 **		addr -- the address to parse.
23 **		a -- a pointer to the address descriptor buffer.
24 **			If NULL, a header will be created.
25 **		copyf -- determines what shall be copied:
26 **			-1 -- don't copy anything.  The printname
27 **				(q_paddr) is just addr, and the
28 **				user & host are allocated internally
29 **				to parse.
30 **			0 -- copy out the parsed user & host, but
31 **				don't copy the printname.
32 **			+1 -- copy everything.
33 **
34 **	Returns:
35 **		A pointer to the address descriptor header (`a' if
36 **			`a' is non-NULL).
37 **		NULL on error.
38 **
39 **	Side Effects:
40 **		none
41 **
42 **	Called By:
43 **		main
44 **		sendto
45 **		alias
46 **		savemail
47 */
48 
49 # define DELIMCHARS	"()<>@!.,;:\\\""	/* word delimiters */
50 # define SPACESUB	('.'|0200)		/* substitution for <lwsp> */
51 
52 addrq *
53 parse(addr, a, copyf)
54 	char *addr;
55 	register addrq *a;
56 	int copyf;
57 {
58 	register char *p;
59 	register struct parsetab *t;
60 	extern struct parsetab ParseTab[];
61 	static char buf[MAXNAME];
62 	register char c;
63 	register char *q;
64 	bool got_one;
65 	extern char *prescan();
66 	extern char *xalloc();
67 	char **pvp;
68 
69 	/*
70 	**  Initialize and prescan address.
71 	*/
72 
73 	To = addr;
74 	if (prescan(addr, buf, &buf[sizeof buf], '\0') == NULL)
75 		return (NULL);
76 
77 	/*
78 	**  Scan parse table.
79 	**	Look for the first entry designating a character
80 	**		that is contained in the address.
81 	**	Arrange for q to point to that character.
82 	**	Check to see that there is only one of the char
83 	**		if it must be unique.
84 	**	Find the last one if the host is on the RHS.
85 	**	Insist that the host name is atomic.
86 	**	If just doing a map, do the map and then start all
87 	**		over.
88 	*/
89 
90  rescan:
91 	got_one = FALSE;
92 	for (t = ParseTab; t->p_char != '\0'; t++)
93 	{
94 		q = NULL;
95 		for (p = buf; (c = *p) != '\0'; p++)
96 		{
97 			/* find the end of this token */
98 			while (isalnum(c) || c == '-' || c == '_')
99 				c = *++p;
100 			if (c == '\0')
101 				break;
102 
103 			if (c == t->p_char)
104 			{
105 				got_one = TRUE;
106 
107 				/* do mapping as appropriate */
108 				if (flagset(P_MAP, t->p_flags))
109 				{
110 					*p = t->p_arg[0];
111 					if (flagset(P_ONE, t->p_flags))
112 						goto rescan;
113 					else
114 						continue;
115 				}
116 
117 				/* arrange for q to point to it */
118 				if (q != NULL && flagset(P_ONE, t->p_flags))
119 				{
120 					usrerr("multichar error");
121 					ExitStat = EX_USAGE;
122 					return (NULL);
123 				}
124 				if (q == NULL || flagset(P_HLAST, t->p_flags))
125 					q = p;
126 			}
127 			else
128 			{
129 				/* insist that host name is atomic */
130 				if (flagset(P_HLAST, t->p_flags))
131 					q = NULL;
132 				else
133 					break;
134 			}
135 		}
136 
137 		if (q != NULL)
138 			break;
139 	}
140 
141 	/*
142 	**  If we matched nothing cleanly, but we did match something
143 	**  somewhere in the process of scanning, then we have a
144 	**  syntax error.  This can happen on things like a@b:c where
145 	**  @ has a right host and : has a left host.
146 	**
147 	**  We also set `q' to the null string, in case someone forgets
148 	**  to put the P_MOVE bit in the local mailer entry of the
149 	**  configuration table.
150 	*/
151 
152 	if (q == NULL)
153 	{
154 		q = "";
155 		if (got_one)
156 		{
157 			usrerr("syntax error");
158 			ExitStat = EX_USAGE;
159 			return (NULL);
160 		}
161 	}
162 
163 	/*
164 	**  Interpret entry.
165 	**	t points to the entry for the mailer we will use.
166 	**	q points to the significant character.
167 	*/
168 
169 	if (a == NULL)
170 		a = (addrq *) xalloc(sizeof *a);
171 	if (copyf > 0)
172 	{
173 		p = xalloc((unsigned) strlen(addr) + 1);
174 		strcpy(p, addr);
175 		a->q_paddr = p;
176 	}
177 	else
178 		a->q_paddr = addr;
179 	a->q_mailer = &Mailer[t->p_mailer];
180 
181 	if (flagset(P_MOVE, t->p_flags))
182 	{
183 		/* send the message to another host & retry */
184 		a->q_host = t->p_arg;
185 		if (copyf >= 0)
186 		{
187 			p = xalloc((unsigned) strlen(buf) + 1);
188 			strcpy(p, buf);
189 			a->q_user = p;
190 		}
191 		else
192 			a->q_user = buf;
193 	}
194 	else
195 	{
196 		/*
197 		**  Make local copies of the host & user and then
198 		**  transport them out.
199 		*/
200 
201 		*q++ = '\0';
202 		if (flagset(P_HLAST, t->p_flags))
203 		{
204 			a->q_host = q;
205 			a->q_user = buf;
206 		}
207 		else
208 		{
209 			a->q_host = buf;
210 			a->q_user = q;
211 		}
212 
213 		/*
214 		**  Don't go to the net if already on the target host.
215 		**	This is important on the berkeley network, since
216 		**	it get confused if we ask to send to ourselves.
217 		**	For nets like the ARPANET, we probably will have
218 		**	the local list set to NULL to simplify testing.
219 		**	The canonical representation of the name is also set
220 		**	to be just the local name so the duplicate letter
221 		**	suppression algorithm will work.
222 		*/
223 
224 		if ((pvp = a->q_mailer->m_local) != NULL)
225 		{
226 			while (*pvp != NULL)
227 			{
228 				auto char buf2[MAXNAME];
229 
230 				strcpy(buf2, a->q_host);
231 				if (!flagset(P_HST_UPPER, t->p_flags))
232 					makelower(buf2);
233 				if (strcmp(*pvp++, buf2) == 0)
234 				{
235 					strcpy(buf2, a->q_user);
236 					p = a->q_paddr;
237 					if (parse(buf2, a, -1) == NULL)
238 					{
239 						To = addr;
240 						return (NULL);
241 					}
242 					To = a->q_paddr = p;
243 					break;
244 				}
245 			}
246 		}
247 
248 		/* make copies if specified */
249 		if (copyf >= 0)
250 		{
251 			p = xalloc((unsigned) strlen(a->q_host) + 1);
252 			strcpy(p, a->q_host);
253 			a->q_host = p;
254 			p = xalloc((unsigned) strlen(a->q_user) + 1);
255 			strcpy(p, a->q_user);
256 			a->q_user = p;
257 		}
258 	}
259 
260 	/*
261 	**  Do UPPER->lower case mapping unless inhibited.
262 	*/
263 
264 	if (!flagset(P_HST_UPPER, t->p_flags))
265 		makelower(a->q_host);
266 	if (!flagset(P_USR_UPPER, t->p_flags))
267 		makelower(a->q_user);
268 
269 	/*
270 	**  Compute return value.
271 	*/
272 
273 # ifdef DEBUG
274 	if (Debug)
275 		printf("parse(\"%s\"): host \"%s\" user \"%s\" mailer %d\n",
276 		    addr, a->q_host, a->q_user, t->p_mailer);
277 # endif DEBUG
278 
279 	return (a);
280 }
281 /*
282 **  MAKELOWER -- Translate a line into lower case
283 **
284 **	Parameters:
285 **		p -- the string to translate.  If NULL, return is
286 **			immediate.
287 **
288 **	Returns:
289 **		none.
290 **
291 **	Side Effects:
292 **		String pointed to by p is translated to lower case.
293 **
294 **	Called By:
295 **		parse
296 */
297 
298 makelower(p)
299 	register char *p;
300 {
301 	register char c;
302 
303 	if (p == NULL)
304 		return;
305 	for (; (c = *p) != '\0'; p++)
306 		if ((c & 0200) == 0 && isupper(c))
307 			*p = c - 'A' + 'a';
308 }
309 /*
310 **  PRESCAN -- Prescan name and make it canonical
311 **
312 **	Scans a name and turns it into canonical form.  This involves
313 **	deleting blanks, comments (in parentheses), and turning the
314 **	word "at" into an at-sign ("@").  The name is copied as this
315 **	is done; it is legal to copy a name onto itself, since this
316 **	process can only make things smaller.
317 **
318 **	This routine knows about quoted strings and angle brackets.
319 **
320 **	There are certain subtleties to this routine.  The one that
321 **	comes to mind now is that backslashes on the ends of names
322 **	are silently stripped off; this is intentional.  The problem
323 **	is that some versions of sndmsg (like at LBL) set the kill
324 **	character to something other than @ when reading addresses;
325 **	so people type "csvax.eric\@berkeley" -- which screws up the
326 **	berknet mailer.
327 **
328 **	Parameters:
329 **		addr -- the name to chomp.
330 **		buf -- the buffer to copy it into.
331 **		buflim -- the last usable address in the buffer
332 **			(which will old a null byte).  Normally
333 **			&buf[sizeof buf - 1].
334 **		delim -- the delimiter for the address, normally
335 **			'\0' or ','; \0 is accepted in any case.
336 **			are moving in place; set buflim to high core.
337 **
338 **	Returns:
339 **		A pointer to the terminator of buf.
340 **		NULL on error.
341 **
342 **	Side Effects:
343 **		buf gets clobbered.
344 **
345 **	Called By:
346 **		parse
347 **		maketemp
348 */
349 
350 char *
351 prescan(addr, buf, buflim, delim)
352 	char *addr;
353 	char *buf;
354 	char *buflim;
355 	char delim;
356 {
357 	register char *p;
358 	bool space;
359 	bool quotemode;
360 	bool bslashmode;
361 	bool delimmode;
362 	int cmntcnt;
363 	int brccnt;
364 	register char c;
365 	register char *q;
366 	extern bool any();
367 
368 	space = FALSE;
369 	delimmode = TRUE;
370 	q = buf;
371 	bslashmode = quotemode = FALSE;
372 	cmntcnt = brccnt = 0;
373 	for (p = addr; (c = *p++) != '\0'; )
374 	{
375 		/* chew up special characters */
376 		*q = '\0';
377 		if (bslashmode)
378 		{
379 			c |= 0200;
380 			bslashmode = FALSE;
381 		}
382 		else if (c == '"')
383 			quotemode = !quotemode;
384 		else if (c == '\\')
385 		{
386 			bslashmode++;
387 			continue;
388 		}
389 		else if (quotemode)
390 			c |= 0200;
391 		else if (c == delim)
392 			break;
393 		else if (c == '(')
394 		{
395 			cmntcnt++;
396 			continue;
397 		}
398 		else if (c == ')')
399 		{
400 			if (cmntcnt <= 0)
401 			{
402 				usrerr("Unbalanced ')'");
403 				return (NULL);
404 			}
405 			else
406 			{
407 				cmntcnt--;
408 				continue;
409 			}
410 		}
411 		if (cmntcnt > 0)
412 			continue;
413 		else if (isascii(c) && isspace(c) && (space || delimmode))
414 			continue;
415 		else if (c == '<')
416 		{
417 			if (brccnt < 0)
418 			{
419 				usrerr("multiple < spec");
420 				return (NULL);
421 			}
422 			brccnt++;
423 			delimmode = TRUE;
424 			space = FALSE;
425 			if (brccnt == 1)
426 			{
427 				/* we prefer using machine readable name */
428 				q = buf;
429 				*q = '\0';
430 				continue;
431 			}
432 		}
433 		else if (c == '>')
434 		{
435 			if (brccnt <= 0)
436 			{
437 				usrerr("Unbalanced `>'");
438 				return (NULL);
439 			}
440 			else
441 				brccnt--;
442 			if (brccnt <= 0)
443 			{
444 				brccnt = -1;
445 				continue;
446 			}
447 		}
448 
449 		/*
450 		**  Turn "at" into "@",
451 		**	but only if "at" is a word.
452 		**	By the way, I violate the ARPANET RFC-733
453 		**	standard here, by assuming that 'space' delimits
454 		**	atoms.  I assume that is just a mistake, since
455 		**	it violates the spirit of the semantics
456 		**	of the document.....
457 		*/
458 
459 		if (delimmode && (c == 'a' || c == 'A') &&
460 		    (p[0] == 't' || p[0] == 'T') &&
461 		    (any(p[1], DELIMCHARS) || p[1] <= 040))
462 		{
463 			c = '@';
464 			p++;
465 		}
466 
467 		if (delimmode = any(c, DELIMCHARS))
468 			space = FALSE;
469 
470 		/* if not a space, squirrel it away */
471 		if ((!isascii(c) || !isspace(c)) && brccnt >= 0)
472 		{
473 			if (q >= buflim-1)
474 			{
475 				usrerr("Address too long");
476 				return (NULL);
477 			}
478 			if (space)
479 				*q++ = SPACESUB;
480 			*q++ = c;
481 		}
482 		space = isspace(c);
483 	}
484 	*q = '\0';
485 	if (c == '\0')
486 		p--;
487 	if (cmntcnt > 0)
488 		usrerr("Unbalanced '('");
489 	else if (quotemode)
490 		usrerr("Unbalanced '\"'");
491 	else if (brccnt > 0)
492 		usrerr("Unbalanced '<'");
493 	else if (buf[0] != '\0')
494 		return (p);
495 	return (NULL);
496 }
497