1 # include <stdio.h>
2 # include <ctype.h>
3 # include "postbox.h"
4 
5 static char	SccsId[] = "@(#)parseaddr.c	3.3	03/07/81";
6 
7 /*
8 **  PARSE -- Parse an address
9 **
10 **	Parses an address and breaks it up into three parts: a
11 **	net to transmit the message on, the host to transmit it
12 **	to, and a user on that host.  These are loaded into an
13 **	ADDRESS header with the values squirreled away if necessary.
14 **	The "user" part may not be a real user; the process may
15 **	just reoccur on that machine.  For example, on a machine
16 **	with an arpanet connection, the address
17 **		csvax.bill@berkeley
18 **	will break up to a "user" of 'csvax.bill' and a host
19 **	of 'berkeley' -- to be transmitted over the arpanet.
20 **
21 **	Parameters:
22 **		addr -- the address to parse.
23 **		a -- a pointer to the address descriptor buffer.
24 **			If NULL, a header will be created.
25 **		copyf -- determines what shall be copied:
26 **			-1 -- don't copy anything.  The printname
27 **				(q_paddr) is just addr, and the
28 **				user & host are allocated internally
29 **				to parse.
30 **			0 -- copy out the parsed user & host, but
31 **				don't copy the printname.
32 **			+1 -- copy everything.
33 **
34 **	Returns:
35 **		A pointer to the address descriptor header (`a' if
36 **			`a' is non-NULL).
37 **		NULL on error.
38 **
39 **	Side Effects:
40 **		none
41 **
42 **	Called By:
43 **		main
44 **		sendto
45 **		alias
46 **		savemail
47 */
48 
49 # define DELIMCHARS	"()<>@!.,;:\\\" \t\r\n"	/* word delimiters */
50 # define SPACESUB	('.'|0200)		/* substitution for <lwsp> */
51 
52 ADDRESS *
53 parse(addr, a, copyf)
54 	char *addr;
55 	register ADDRESS *a;
56 	int copyf;
57 {
58 	register char *p;
59 	register struct parsetab *t;
60 	extern struct parsetab ParseTab[];
61 	static char buf[MAXNAME];
62 	register char c;
63 	register char *q;
64 	bool got_one;
65 	extern char *prescan();
66 	extern char *xalloc();
67 	extern char *newstr();
68 	char **pvp;
69 	extern char *strcpy();
70 
71 	/*
72 	**  Initialize and prescan address.
73 	*/
74 
75 	To = addr;
76 	if (prescan(addr, buf, &buf[sizeof buf], '\0') == NULL)
77 		return (NULL);
78 
79 	/*
80 	**  Scan parse table.
81 	**	Look for the first entry designating a character
82 	**		that is contained in the address.
83 	**	Arrange for q to point to that character.
84 	**	Check to see that there is only one of the char
85 	**		if it must be unique.
86 	**	Find the last one if the host is on the RHS.
87 	**	Insist that the host name is atomic.
88 	**	If just doing a map, do the map and then start all
89 	**		over.
90 	*/
91 
92  rescan:
93 	got_one = FALSE;
94 	for (t = ParseTab; t->p_char != '\0'; t++)
95 	{
96 		q = NULL;
97 		for (p = buf; (c = *p) != '\0'; p++)
98 		{
99 			/* find the end of this token */
100 			while (isalnum(c) || c == '-' || c == '_')
101 				c = *++p;
102 			if (c == '\0')
103 				break;
104 
105 			if (c == t->p_char)
106 			{
107 				got_one = TRUE;
108 
109 				/* do mapping as appropriate */
110 				if (bitset(P_MAP, t->p_flags))
111 				{
112 					*p = t->p_arg[0];
113 					if (bitset(P_ONE, t->p_flags))
114 						goto rescan;
115 					else
116 						continue;
117 				}
118 
119 				/* arrange for q to point to it */
120 				if (q != NULL && bitset(P_ONE, t->p_flags))
121 				{
122 					usrerr("multichar error");
123 					ExitStat = EX_USAGE;
124 					return (NULL);
125 				}
126 				if (q == NULL || bitset(P_HLAST, t->p_flags))
127 					q = p;
128 			}
129 			else
130 			{
131 				/* insist that host name is atomic */
132 				if (bitset(P_HLAST, t->p_flags))
133 					q = NULL;
134 				else
135 					break;
136 			}
137 		}
138 
139 		if (q != NULL)
140 			break;
141 	}
142 
143 	/*
144 	**  If we matched nothing cleanly, but we did match something
145 	**  somewhere in the process of scanning, then we have a
146 	**  syntax error.  This can happen on things like a@b:c where
147 	**  @ has a right host and : has a left host.
148 	**
149 	**  We also set `q' to the null string, in case someone forgets
150 	**  to put the P_MOVE bit in the local mailer entry of the
151 	**  configuration table.
152 	*/
153 
154 	if (q == NULL)
155 	{
156 		q = "";
157 		if (got_one)
158 		{
159 			usrerr("syntax error");
160 			ExitStat = EX_USAGE;
161 			return (NULL);
162 		}
163 	}
164 
165 	/*
166 	**  Interpret entry.
167 	**	t points to the entry for the mailer we will use.
168 	**	q points to the significant character.
169 	*/
170 
171 	if (a == NULL)
172 		a = (ADDRESS *) xalloc(sizeof *a);
173 	if (copyf > 0)
174 		a->q_paddr = newstr(addr);
175 	else
176 		a->q_paddr = addr;
177 	a->q_rmailer = t->p_mailer;
178 	a->q_mailer = &Mailer[t->p_mailer];
179 
180 	if (bitset(P_MOVE, t->p_flags))
181 	{
182 		/* send the message to another host & retry */
183 		a->q_host = t->p_arg;
184 		if (copyf >= 0)
185 			a->q_user = newstr(buf);
186 		else
187 			a->q_user = buf;
188 	}
189 	else
190 	{
191 		/*
192 		**  Make local copies of the host & user and then
193 		**  transport them out.
194 		*/
195 
196 		*q++ = '\0';
197 		if (bitset(P_HLAST, t->p_flags))
198 		{
199 			a->q_host = q;
200 			a->q_user = buf;
201 		}
202 		else
203 		{
204 			a->q_host = buf;
205 			a->q_user = q;
206 		}
207 
208 		/*
209 		**  Don't go to the net if already on the target host.
210 		**	This is important on the berkeley network, since
211 		**	it get confused if we ask to send to ourselves.
212 		**	For nets like the ARPANET, we probably will have
213 		**	the local list set to NULL to simplify testing.
214 		**	The canonical representation of the name is also set
215 		**	to be just the local name so the duplicate letter
216 		**	suppression algorithm will work.
217 		*/
218 
219 		if ((pvp = a->q_mailer->m_local) != NULL)
220 		{
221 			while (*pvp != NULL)
222 			{
223 				auto char buf2[MAXNAME];
224 
225 				strcpy(buf2, a->q_host);
226 				if (!bitset(P_HST_UPPER, t->p_flags))
227 					makelower(buf2);
228 				if (strcmp(*pvp++, buf2) == 0)
229 				{
230 					strcpy(buf2, a->q_user);
231 					p = a->q_paddr;
232 					if (parse(buf2, a, -1) == NULL)
233 					{
234 						To = addr;
235 						return (NULL);
236 					}
237 					To = a->q_paddr = p;
238 					break;
239 				}
240 			}
241 		}
242 
243 		/* make copies if specified */
244 		if (copyf >= 0)
245 		{
246 			a->q_host = newstr(a->q_host);
247 			a->q_user = newstr(a->q_user);
248 		}
249 	}
250 
251 	/*
252 	**  Do UPPER->lower case mapping unless inhibited.
253 	*/
254 
255 	if (!bitset(P_HST_UPPER, t->p_flags))
256 		makelower(a->q_host);
257 	if (!bitset(P_USR_UPPER, t->p_flags))
258 		makelower(a->q_user);
259 
260 	/*
261 	**  Compute return value.
262 	*/
263 
264 # ifdef DEBUG
265 	if (Debug)
266 		printf("parse(\"%s\"): host \"%s\" user \"%s\" mailer %d\n",
267 		    addr, a->q_host, a->q_user, t->p_mailer);
268 # endif DEBUG
269 
270 	return (a);
271 }
272 /*
273 **  MAKELOWER -- Translate a line into lower case
274 **
275 **	Parameters:
276 **		p -- the string to translate.  If NULL, return is
277 **			immediate.
278 **
279 **	Returns:
280 **		none.
281 **
282 **	Side Effects:
283 **		String pointed to by p is translated to lower case.
284 **
285 **	Called By:
286 **		parse
287 */
288 
289 makelower(p)
290 	register char *p;
291 {
292 	register char c;
293 
294 	if (p == NULL)
295 		return;
296 	for (; (c = *p) != '\0'; p++)
297 		if ((c & 0200) == 0 && isupper(c))
298 			*p = c - 'A' + 'a';
299 }
300 /*
301 **  PRESCAN -- Prescan name and make it canonical
302 **
303 **	Scans a name and turns it into canonical form.  This involves
304 **	deleting blanks, comments (in parentheses), and turning the
305 **	word "at" into an at-sign ("@").  The name is copied as this
306 **	is done; it is legal to copy a name onto itself, since this
307 **	process can only make things smaller.
308 **
309 **	This routine knows about quoted strings and angle brackets.
310 **
311 **	There are certain subtleties to this routine.  The one that
312 **	comes to mind now is that backslashes on the ends of names
313 **	are silently stripped off; this is intentional.  The problem
314 **	is that some versions of sndmsg (like at LBL) set the kill
315 **	character to something other than @ when reading addresses;
316 **	so people type "csvax.eric\@berkeley" -- which screws up the
317 **	berknet mailer.
318 **
319 **	Parameters:
320 **		addr -- the name to chomp.
321 **		buf -- the buffer to copy it into.
322 **		buflim -- the last usable address in the buffer
323 **			(which will old a null byte).  Normally
324 **			&buf[sizeof buf - 1].
325 **		delim -- the delimiter for the address, normally
326 **			'\0' or ','; \0 is accepted in any case.
327 **			are moving in place; set buflim to high core.
328 **
329 **	Returns:
330 **		A pointer to the terminator of buf.
331 **		NULL on error.
332 **
333 **	Side Effects:
334 **		buf gets clobbered.
335 **
336 **	Called By:
337 **		parse
338 **		maketemp
339 */
340 
341 char *
342 prescan(addr, buf, buflim, delim)
343 	char *addr;
344 	char *buf;
345 	char *buflim;
346 	char delim;
347 {
348 	register char *p;
349 	bool space;
350 	bool quotemode;
351 	bool bslashmode;
352 	bool delimmode;
353 	int cmntcnt;
354 	int brccnt;
355 	register char c;
356 	register char *q;
357 	extern char *index();
358 
359 	space = FALSE;
360 	delimmode = TRUE;
361 	q = buf;
362 	bslashmode = quotemode = FALSE;
363 	cmntcnt = brccnt = 0;
364 	for (p = addr; (c = *p++) != '\0'; )
365 	{
366 		/* chew up special characters */
367 		*q = '\0';
368 		if (bslashmode)
369 		{
370 			c |= 0200;
371 			bslashmode = FALSE;
372 		}
373 		else if (c == '"')
374 			quotemode = !quotemode;
375 		else if (c == '\\')
376 		{
377 			bslashmode++;
378 			continue;
379 		}
380 		else if (quotemode)
381 			c |= 0200;
382 		else if (c == delim)
383 			break;
384 		else if (c == '(')
385 		{
386 			cmntcnt++;
387 			continue;
388 		}
389 		else if (c == ')')
390 		{
391 			if (cmntcnt <= 0)
392 			{
393 				usrerr("Unbalanced ')'");
394 				return (NULL);
395 			}
396 			else
397 			{
398 				cmntcnt--;
399 				continue;
400 			}
401 		}
402 		if (cmntcnt > 0)
403 			continue;
404 		else if (isascii(c) && isspace(c) && (space || delimmode))
405 			continue;
406 		else if (c == '<')
407 		{
408 			if (brccnt < 0)
409 			{
410 				usrerr("multiple < spec");
411 				return (NULL);
412 			}
413 			brccnt++;
414 			delimmode = TRUE;
415 			space = FALSE;
416 			if (brccnt == 1)
417 			{
418 				/* we prefer using machine readable name */
419 				q = buf;
420 				*q = '\0';
421 				continue;
422 			}
423 		}
424 		else if (c == '>')
425 		{
426 			if (brccnt <= 0)
427 			{
428 				usrerr("Unbalanced `>'");
429 				return (NULL);
430 			}
431 			else
432 				brccnt--;
433 			if (brccnt <= 0)
434 			{
435 				brccnt = -1;
436 				continue;
437 			}
438 		}
439 
440 		/*
441 		**  Turn "at" into "@",
442 		**	but only if "at" is a word.
443 		**	By the way, I violate the ARPANET RFC-733
444 		**	standard here, by assuming that 'space' delimits
445 		**	atoms.  I assume that is just a mistake, since
446 		**	it violates the spirit of the semantics
447 		**	of the document.....
448 		*/
449 
450 		if (delimmode && (c == 'a' || c == 'A') &&
451 		    (p[0] == 't' || p[0] == 'T') &&
452 		    (index(DELIMCHARS, p[1]) != NULL || p[1] <= 040))
453 		{
454 			c = '@';
455 			p++;
456 		}
457 
458 		if (delimmode = (index(DELIMCHARS, c) != NULL))
459 			space = FALSE;
460 
461 		/* if not a space, squirrel it away */
462 		if ((!isascii(c) || !isspace(c)) && brccnt >= 0)
463 		{
464 			if (q >= buflim-1)
465 			{
466 				usrerr("Address too long");
467 				return (NULL);
468 			}
469 			if (space)
470 				*q++ = SPACESUB;
471 			*q++ = c;
472 		}
473 		space = isascii(c) && isspace(c);
474 	}
475 	*q = '\0';
476 	if (c == '\0')
477 		p--;
478 	if (cmntcnt > 0)
479 		usrerr("Unbalanced '('");
480 	else if (quotemode)
481 		usrerr("Unbalanced '\"'");
482 	else if (brccnt > 0)
483 		usrerr("Unbalanced '<'");
484 	else if (buf[0] != '\0')
485 		return (p);
486 	return (NULL);
487 }
488