1 # include <stdio.h>
2 # include <ctype.h>
3 # include "postbox.h"
4 
5 static char	SccsId[] = "@(#)parseaddr.c	3.2	03/07/81";
6 
7 /*
8 **  PARSE -- Parse an address
9 **
10 **	Parses an address and breaks it up into three parts: a
11 **	net to transmit the message on, the host to transmit it
12 **	to, and a user on that host.  These are loaded into an
13 **	ADDRESS header with the values squirreled away if necessary.
14 **	The "user" part may not be a real user; the process may
15 **	just reoccur on that machine.  For example, on a machine
16 **	with an arpanet connection, the address
17 **		csvax.bill@berkeley
18 **	will break up to a "user" of 'csvax.bill' and a host
19 **	of 'berkeley' -- to be transmitted over the arpanet.
20 **
21 **	Parameters:
22 **		addr -- the address to parse.
23 **		a -- a pointer to the address descriptor buffer.
24 **			If NULL, a header will be created.
25 **		copyf -- determines what shall be copied:
26 **			-1 -- don't copy anything.  The printname
27 **				(q_paddr) is just addr, and the
28 **				user & host are allocated internally
29 **				to parse.
30 **			0 -- copy out the parsed user & host, but
31 **				don't copy the printname.
32 **			+1 -- copy everything.
33 **
34 **	Returns:
35 **		A pointer to the address descriptor header (`a' if
36 **			`a' is non-NULL).
37 **		NULL on error.
38 **
39 **	Side Effects:
40 **		none
41 **
42 **	Called By:
43 **		main
44 **		sendto
45 **		alias
46 **		savemail
47 */
48 
49 # define DELIMCHARS	"()<>@!.,;:\\\" \t\r\n"	/* word delimiters */
50 # define SPACESUB	('.'|0200)		/* substitution for <lwsp> */
51 
52 ADDRESS *
53 parse(addr, a, copyf)
54 	char *addr;
55 	register ADDRESS *a;
56 	int copyf;
57 {
58 	register char *p;
59 	register struct parsetab *t;
60 	extern struct parsetab ParseTab[];
61 	static char buf[MAXNAME];
62 	register char c;
63 	register char *q;
64 	bool got_one;
65 	extern char *prescan();
66 	extern char *xalloc();
67 	extern char *newstr();
68 	char **pvp;
69 
70 	/*
71 	**  Initialize and prescan address.
72 	*/
73 
74 	To = addr;
75 	if (prescan(addr, buf, &buf[sizeof buf], '\0') == NULL)
76 		return (NULL);
77 
78 	/*
79 	**  Scan parse table.
80 	**	Look for the first entry designating a character
81 	**		that is contained in the address.
82 	**	Arrange for q to point to that character.
83 	**	Check to see that there is only one of the char
84 	**		if it must be unique.
85 	**	Find the last one if the host is on the RHS.
86 	**	Insist that the host name is atomic.
87 	**	If just doing a map, do the map and then start all
88 	**		over.
89 	*/
90 
91  rescan:
92 	got_one = FALSE;
93 	for (t = ParseTab; t->p_char != '\0'; t++)
94 	{
95 		q = NULL;
96 		for (p = buf; (c = *p) != '\0'; p++)
97 		{
98 			/* find the end of this token */
99 			while (isalnum(c) || c == '-' || c == '_')
100 				c = *++p;
101 			if (c == '\0')
102 				break;
103 
104 			if (c == t->p_char)
105 			{
106 				got_one = TRUE;
107 
108 				/* do mapping as appropriate */
109 				if (bitset(P_MAP, t->p_flags))
110 				{
111 					*p = t->p_arg[0];
112 					if (bitset(P_ONE, t->p_flags))
113 						goto rescan;
114 					else
115 						continue;
116 				}
117 
118 				/* arrange for q to point to it */
119 				if (q != NULL && bitset(P_ONE, t->p_flags))
120 				{
121 					usrerr("multichar error");
122 					ExitStat = EX_USAGE;
123 					return (NULL);
124 				}
125 				if (q == NULL || bitset(P_HLAST, t->p_flags))
126 					q = p;
127 			}
128 			else
129 			{
130 				/* insist that host name is atomic */
131 				if (bitset(P_HLAST, t->p_flags))
132 					q = NULL;
133 				else
134 					break;
135 			}
136 		}
137 
138 		if (q != NULL)
139 			break;
140 	}
141 
142 	/*
143 	**  If we matched nothing cleanly, but we did match something
144 	**  somewhere in the process of scanning, then we have a
145 	**  syntax error.  This can happen on things like a@b:c where
146 	**  @ has a right host and : has a left host.
147 	**
148 	**  We also set `q' to the null string, in case someone forgets
149 	**  to put the P_MOVE bit in the local mailer entry of the
150 	**  configuration table.
151 	*/
152 
153 	if (q == NULL)
154 	{
155 		q = "";
156 		if (got_one)
157 		{
158 			usrerr("syntax error");
159 			ExitStat = EX_USAGE;
160 			return (NULL);
161 		}
162 	}
163 
164 	/*
165 	**  Interpret entry.
166 	**	t points to the entry for the mailer we will use.
167 	**	q points to the significant character.
168 	*/
169 
170 	if (a == NULL)
171 		a = (ADDRESS *) xalloc(sizeof *a);
172 	if (copyf > 0)
173 		a->q_paddr = newstr(addr);
174 	else
175 		a->q_paddr = addr;
176 	a->q_rmailer = t->p_mailer;
177 	a->q_mailer = &Mailer[t->p_mailer];
178 
179 	if (bitset(P_MOVE, t->p_flags))
180 	{
181 		/* send the message to another host & retry */
182 		a->q_host = t->p_arg;
183 		if (copyf >= 0)
184 			a->q_user = newstr(buf);
185 		else
186 			a->q_user = buf;
187 	}
188 	else
189 	{
190 		/*
191 		**  Make local copies of the host & user and then
192 		**  transport them out.
193 		*/
194 
195 		*q++ = '\0';
196 		if (bitset(P_HLAST, t->p_flags))
197 		{
198 			a->q_host = q;
199 			a->q_user = buf;
200 		}
201 		else
202 		{
203 			a->q_host = buf;
204 			a->q_user = q;
205 		}
206 
207 		/*
208 		**  Don't go to the net if already on the target host.
209 		**	This is important on the berkeley network, since
210 		**	it get confused if we ask to send to ourselves.
211 		**	For nets like the ARPANET, we probably will have
212 		**	the local list set to NULL to simplify testing.
213 		**	The canonical representation of the name is also set
214 		**	to be just the local name so the duplicate letter
215 		**	suppression algorithm will work.
216 		*/
217 
218 		if ((pvp = a->q_mailer->m_local) != NULL)
219 		{
220 			while (*pvp != NULL)
221 			{
222 				auto char buf2[MAXNAME];
223 
224 				strcpy(buf2, a->q_host);
225 				if (!bitset(P_HST_UPPER, t->p_flags))
226 					makelower(buf2);
227 				if (strcmp(*pvp++, buf2) == 0)
228 				{
229 					strcpy(buf2, a->q_user);
230 					p = a->q_paddr;
231 					if (parse(buf2, a, -1) == NULL)
232 					{
233 						To = addr;
234 						return (NULL);
235 					}
236 					To = a->q_paddr = p;
237 					break;
238 				}
239 			}
240 		}
241 
242 		/* make copies if specified */
243 		if (copyf >= 0)
244 		{
245 			a->q_host = newstr(a->q_host);
246 			a->q_user = newstr(a->q_user);
247 		}
248 	}
249 
250 	/*
251 	**  Do UPPER->lower case mapping unless inhibited.
252 	*/
253 
254 	if (!bitset(P_HST_UPPER, t->p_flags))
255 		makelower(a->q_host);
256 	if (!bitset(P_USR_UPPER, t->p_flags))
257 		makelower(a->q_user);
258 
259 	/*
260 	**  Compute return value.
261 	*/
262 
263 # ifdef DEBUG
264 	if (Debug)
265 		printf("parse(\"%s\"): host \"%s\" user \"%s\" mailer %d\n",
266 		    addr, a->q_host, a->q_user, t->p_mailer);
267 # endif DEBUG
268 
269 	return (a);
270 }
271 /*
272 **  MAKELOWER -- Translate a line into lower case
273 **
274 **	Parameters:
275 **		p -- the string to translate.  If NULL, return is
276 **			immediate.
277 **
278 **	Returns:
279 **		none.
280 **
281 **	Side Effects:
282 **		String pointed to by p is translated to lower case.
283 **
284 **	Called By:
285 **		parse
286 */
287 
288 makelower(p)
289 	register char *p;
290 {
291 	register char c;
292 
293 	if (p == NULL)
294 		return;
295 	for (; (c = *p) != '\0'; p++)
296 		if ((c & 0200) == 0 && isupper(c))
297 			*p = c - 'A' + 'a';
298 }
299 /*
300 **  PRESCAN -- Prescan name and make it canonical
301 **
302 **	Scans a name and turns it into canonical form.  This involves
303 **	deleting blanks, comments (in parentheses), and turning the
304 **	word "at" into an at-sign ("@").  The name is copied as this
305 **	is done; it is legal to copy a name onto itself, since this
306 **	process can only make things smaller.
307 **
308 **	This routine knows about quoted strings and angle brackets.
309 **
310 **	There are certain subtleties to this routine.  The one that
311 **	comes to mind now is that backslashes on the ends of names
312 **	are silently stripped off; this is intentional.  The problem
313 **	is that some versions of sndmsg (like at LBL) set the kill
314 **	character to something other than @ when reading addresses;
315 **	so people type "csvax.eric\@berkeley" -- which screws up the
316 **	berknet mailer.
317 **
318 **	Parameters:
319 **		addr -- the name to chomp.
320 **		buf -- the buffer to copy it into.
321 **		buflim -- the last usable address in the buffer
322 **			(which will old a null byte).  Normally
323 **			&buf[sizeof buf - 1].
324 **		delim -- the delimiter for the address, normally
325 **			'\0' or ','; \0 is accepted in any case.
326 **			are moving in place; set buflim to high core.
327 **
328 **	Returns:
329 **		A pointer to the terminator of buf.
330 **		NULL on error.
331 **
332 **	Side Effects:
333 **		buf gets clobbered.
334 **
335 **	Called By:
336 **		parse
337 **		maketemp
338 */
339 
340 char *
341 prescan(addr, buf, buflim, delim)
342 	char *addr;
343 	char *buf;
344 	char *buflim;
345 	char delim;
346 {
347 	register char *p;
348 	bool space;
349 	bool quotemode;
350 	bool bslashmode;
351 	bool delimmode;
352 	int cmntcnt;
353 	int brccnt;
354 	register char c;
355 	register char *q;
356 	extern char *index();
357 
358 	space = FALSE;
359 	delimmode = TRUE;
360 	q = buf;
361 	bslashmode = quotemode = FALSE;
362 	cmntcnt = brccnt = 0;
363 	for (p = addr; (c = *p++) != '\0'; )
364 	{
365 		/* chew up special characters */
366 		*q = '\0';
367 		if (bslashmode)
368 		{
369 			c |= 0200;
370 			bslashmode = FALSE;
371 		}
372 		else if (c == '"')
373 			quotemode = !quotemode;
374 		else if (c == '\\')
375 		{
376 			bslashmode++;
377 			continue;
378 		}
379 		else if (quotemode)
380 			c |= 0200;
381 		else if (c == delim)
382 			break;
383 		else if (c == '(')
384 		{
385 			cmntcnt++;
386 			continue;
387 		}
388 		else if (c == ')')
389 		{
390 			if (cmntcnt <= 0)
391 			{
392 				usrerr("Unbalanced ')'");
393 				return (NULL);
394 			}
395 			else
396 			{
397 				cmntcnt--;
398 				continue;
399 			}
400 		}
401 		if (cmntcnt > 0)
402 			continue;
403 		else if (isascii(c) && isspace(c) && (space || delimmode))
404 			continue;
405 		else if (c == '<')
406 		{
407 			if (brccnt < 0)
408 			{
409 				usrerr("multiple < spec");
410 				return (NULL);
411 			}
412 			brccnt++;
413 			delimmode = TRUE;
414 			space = FALSE;
415 			if (brccnt == 1)
416 			{
417 				/* we prefer using machine readable name */
418 				q = buf;
419 				*q = '\0';
420 				continue;
421 			}
422 		}
423 		else if (c == '>')
424 		{
425 			if (brccnt <= 0)
426 			{
427 				usrerr("Unbalanced `>'");
428 				return (NULL);
429 			}
430 			else
431 				brccnt--;
432 			if (brccnt <= 0)
433 			{
434 				brccnt = -1;
435 				continue;
436 			}
437 		}
438 
439 		/*
440 		**  Turn "at" into "@",
441 		**	but only if "at" is a word.
442 		**	By the way, I violate the ARPANET RFC-733
443 		**	standard here, by assuming that 'space' delimits
444 		**	atoms.  I assume that is just a mistake, since
445 		**	it violates the spirit of the semantics
446 		**	of the document.....
447 		*/
448 
449 		if (delimmode && (c == 'a' || c == 'A') &&
450 		    (p[0] == 't' || p[0] == 'T') &&
451 		    (index(DELIMCHARS, p[1]) != NULL || p[1] <= 040))
452 		{
453 			c = '@';
454 			p++;
455 		}
456 
457 		if (delimmode = (index(DELIMCHARS, c) != NULL))
458 			space = FALSE;
459 
460 		/* if not a space, squirrel it away */
461 		if ((!isascii(c) || !isspace(c)) && brccnt >= 0)
462 		{
463 			if (q >= buflim-1)
464 			{
465 				usrerr("Address too long");
466 				return (NULL);
467 			}
468 			if (space)
469 				*q++ = SPACESUB;
470 			*q++ = c;
471 		}
472 		space = isascii(c) && isspace(c);
473 	}
474 	*q = '\0';
475 	if (c == '\0')
476 		p--;
477 	if (cmntcnt > 0)
478 		usrerr("Unbalanced '('");
479 	else if (quotemode)
480 		usrerr("Unbalanced '\"'");
481 	else if (brccnt > 0)
482 		usrerr("Unbalanced '<'");
483 	else if (buf[0] != '\0')
484 		return (p);
485 	return (NULL);
486 }
487