1 # include <stdio.h>
2 # include <ctype.h>
3 # include "postbox.h"
4 
5 static char	SccsId[] = "@(#)parseaddr.c	3.5	03/08/81";
6 
7 /*
8 **  PARSE -- Parse an address
9 **
10 **	Parses an address and breaks it up into three parts: a
11 **	net to transmit the message on, the host to transmit it
12 **	to, and a user on that host.  These are loaded into an
13 **	ADDRESS header with the values squirreled away if necessary.
14 **	The "user" part may not be a real user; the process may
15 **	just reoccur on that machine.  For example, on a machine
16 **	with an arpanet connection, the address
17 **		csvax.bill@berkeley
18 **	will break up to a "user" of 'csvax.bill' and a host
19 **	of 'berkeley' -- to be transmitted over the arpanet.
20 **
21 **	Parameters:
22 **		addr -- the address to parse.
23 **		a -- a pointer to the address descriptor buffer.
24 **			If NULL, a header will be created.
25 **		copyf -- determines what shall be copied:
26 **			-1 -- don't copy anything.  The printname
27 **				(q_paddr) is just addr, and the
28 **				user & host are allocated internally
29 **				to parse.
30 **			0 -- copy out the parsed user & host, but
31 **				don't copy the printname.
32 **			+1 -- copy everything.
33 **
34 **	Returns:
35 **		A pointer to the address descriptor header (`a' if
36 **			`a' is non-NULL).
37 **		NULL on error.
38 **
39 **	Side Effects:
40 **		none
41 **
42 **	Called By:
43 **		main
44 **		sendto
45 **		alias
46 **		savemail
47 */
48 
49 # define DELIMCHARS	"()<>@!.,;:\\\" \t\r\n"	/* word delimiters */
50 # define SPACESUB	('.'|0200)		/* substitution for <lwsp> */
51 
52 ADDRESS *
53 parse(addr, a, copyf)
54 	char *addr;
55 	register ADDRESS *a;
56 	int copyf;
57 {
58 	register char *p;
59 	register struct parsetab *t;
60 	extern struct parsetab ParseTab[];
61 	static char buf[MAXNAME];
62 	register char c;
63 	register char *q;
64 	bool got_one;
65 	extern char *prescan();
66 	extern char *xalloc();
67 	extern char *newstr();
68 	char **pvp;
69 	char ***hvp;
70 	extern char *strcpy();
71 
72 	/*
73 	**  Initialize and prescan address.
74 	*/
75 
76 	To = addr;
77 	if (prescan(addr, buf, &buf[sizeof buf], '\0') == NULL)
78 		return (NULL);
79 
80 	/*
81 	**  Scan parse table.
82 	**	Look for the first entry designating a character
83 	**		that is contained in the address.
84 	**	Arrange for q to point to that character.
85 	**	Check to see that there is only one of the char
86 	**		if it must be unique.
87 	**	Find the last one if the host is on the RHS.
88 	**	Insist that the host name is atomic.
89 	**	If just doing a map, do the map and then start all
90 	**		over.
91 	*/
92 
93  rescan:
94 	got_one = FALSE;
95 	for (t = ParseTab; t->p_char != '\0'; t++)
96 	{
97 		q = NULL;
98 		for (p = buf; (c = *p) != '\0'; p++)
99 		{
100 			/* find the end of this token */
101 			while (isalnum(c) || c == '-' || c == '_')
102 				c = *++p;
103 			if (c == '\0')
104 				break;
105 
106 			if (c == t->p_char)
107 			{
108 				got_one = TRUE;
109 
110 				/* do mapping as appropriate */
111 				if (bitset(P_MAP, t->p_flags))
112 				{
113 					*p = t->p_arg[0];
114 					if (bitset(P_ONE, t->p_flags))
115 						goto rescan;
116 					else
117 						continue;
118 				}
119 
120 				/* arrange for q to point to it */
121 				if (q != NULL && bitset(P_ONE, t->p_flags))
122 				{
123 					usrerr("multichar error");
124 					ExitStat = EX_USAGE;
125 					return (NULL);
126 				}
127 				if (q == NULL || bitset(P_HLAST, t->p_flags))
128 					q = p;
129 			}
130 			else
131 			{
132 				/* insist that host name is atomic */
133 				if (bitset(P_HLAST, t->p_flags))
134 					q = NULL;
135 				else
136 					break;
137 			}
138 		}
139 
140 		if (q != NULL)
141 			break;
142 	}
143 
144 	/*
145 	**  If we matched nothing cleanly, but we did match something
146 	**  somewhere in the process of scanning, then we have a
147 	**  syntax error.  This can happen on things like a@b:c where
148 	**  @ has a right host and : has a left host.
149 	**
150 	**  We also set `q' to the null string, in case someone forgets
151 	**  to put the P_MOVE bit in the local mailer entry of the
152 	**  configuration table.
153 	*/
154 
155 	if (q == NULL)
156 	{
157 		q = "";
158 		if (got_one)
159 		{
160 			usrerr("syntax error");
161 			ExitStat = EX_USAGE;
162 			return (NULL);
163 		}
164 	}
165 
166 	/*
167 	**  Interpret entry.
168 	**	t points to the entry for the mailer we will use.
169 	**	q points to the significant character.
170 	*/
171 
172 	if (a == NULL)
173 		a = (ADDRESS *) xalloc(sizeof *a);
174 	if (copyf > 0)
175 		a->q_paddr = newstr(addr);
176 	else
177 		a->q_paddr = addr;
178 	a->q_mailer = a->q_rmailer = t->p_mailer;
179 
180 	if (bitset(P_MOVE, t->p_flags))
181 	{
182 		/* send the message to another host & retry */
183 		a->q_host = t->p_arg;
184 		if (copyf >= 0)
185 			a->q_user = newstr(buf);
186 		else
187 			a->q_user = buf;
188 	}
189 	else
190 	{
191 		/*
192 		**  Make local copies of the host & user and then
193 		**  transport them out.
194 		*/
195 
196 		*q++ = '\0';
197 		if (bitset(P_HLAST, t->p_flags))
198 		{
199 			a->q_host = q;
200 			a->q_user = buf;
201 		}
202 		else
203 		{
204 			a->q_host = buf;
205 			a->q_user = q;
206 		}
207 
208 		/*
209 		**  Don't go to the net if already on the target host.
210 		**	This is important on the berkeley network, since
211 		**	it get confused if we ask to send to ourselves.
212 		**	For nets like the ARPANET, we probably will have
213 		**	the local list set to NULL to simplify testing.
214 		**	The canonical representation of the name is also set
215 		**	to be just the local name so the duplicate letter
216 		**	suppression algorithm will work.
217 		*/
218 
219 		if ((pvp = Mailer[a->q_mailer]->m_local) != NULL)
220 		{
221 			while (*pvp != NULL)
222 			{
223 				auto char buf2[MAXNAME];
224 
225 				strcpy(buf2, a->q_host);
226 				if (!bitset(P_HST_UPPER, t->p_flags))
227 					makelower(buf2);
228 				if (strcmp(*pvp++, buf2) == 0)
229 				{
230 					strcpy(buf2, a->q_user);
231 					p = a->q_paddr;
232 					if (parse(buf2, a, -1) == NULL)
233 					{
234 						To = addr;
235 						return (NULL);
236 					}
237 					To = a->q_paddr = p;
238 					break;
239 				}
240 			}
241 		}
242 
243 		/*
244 		**  Do host equivalence.
245 		**	This allows us to map together messages that
246 		**	would otherwise have several copies going
247 		**	through the same net link.
248 		*/
249 
250 		for (hvp = Mailer[a->q_mailer]->m_hmap; *hvp != NULL; hvp++)
251 		{
252 			register bool doremap;
253 
254 			doremap = FALSE;
255 			for (pvp = *hvp; *pvp != NULL; pvp++)
256 			{
257 				p = *pvp;
258 				if (*p == '\0')
259 				{
260 					/* null string: match everything */
261 					doremap = TRUE;
262 				}
263 				else if (strcmp(p, a->q_host) == 0)
264 					doremap = TRUE;
265 			}
266 
267 			if (doremap)
268 			{
269 				a->q_host = pvp[-1];
270 				a->q_user = a->q_paddr;
271 			}
272 		}
273 
274 		/* make copies if specified */
275 		if (copyf >= 0)
276 		{
277 			a->q_host = newstr(a->q_host);
278 			if (a->q_user != a->q_paddr)
279 				a->q_user = newstr(a->q_user);
280 		}
281 	}
282 
283 	/*
284 	**  Do UPPER->lower case mapping unless inhibited.
285 	*/
286 
287 	if (!bitset(P_HST_UPPER, t->p_flags))
288 		makelower(a->q_host);
289 	if (!bitset(P_USR_UPPER, t->p_flags))
290 		makelower(a->q_user);
291 
292 	/*
293 	**  Compute return value.
294 	*/
295 
296 # ifdef DEBUG
297 	if (Debug)
298 		printf("parse(\"%s\"): host \"%s\" user \"%s\" mailer %d\n",
299 		    addr, a->q_host, a->q_user, t->p_mailer);
300 # endif DEBUG
301 
302 	return (a);
303 }
304 /*
305 **  MAKELOWER -- Translate a line into lower case
306 **
307 **	Parameters:
308 **		p -- the string to translate.  If NULL, return is
309 **			immediate.
310 **
311 **	Returns:
312 **		none.
313 **
314 **	Side Effects:
315 **		String pointed to by p is translated to lower case.
316 **
317 **	Called By:
318 **		parse
319 */
320 
321 makelower(p)
322 	register char *p;
323 {
324 	register char c;
325 
326 	if (p == NULL)
327 		return;
328 	for (; (c = *p) != '\0'; p++)
329 		if ((c & 0200) == 0 && isupper(c))
330 			*p = c - 'A' + 'a';
331 }
332 /*
333 **  PRESCAN -- Prescan name and make it canonical
334 **
335 **	Scans a name and turns it into canonical form.  This involves
336 **	deleting blanks, comments (in parentheses), and turning the
337 **	word "at" into an at-sign ("@").  The name is copied as this
338 **	is done; it is legal to copy a name onto itself, since this
339 **	process can only make things smaller.
340 **
341 **	This routine knows about quoted strings and angle brackets.
342 **
343 **	There are certain subtleties to this routine.  The one that
344 **	comes to mind now is that backslashes on the ends of names
345 **	are silently stripped off; this is intentional.  The problem
346 **	is that some versions of sndmsg (like at LBL) set the kill
347 **	character to something other than @ when reading addresses;
348 **	so people type "csvax.eric\@berkeley" -- which screws up the
349 **	berknet mailer.
350 **
351 **	Parameters:
352 **		addr -- the name to chomp.
353 **		buf -- the buffer to copy it into.
354 **		buflim -- the last usable address in the buffer
355 **			(which will old a null byte).  Normally
356 **			&buf[sizeof buf - 1].
357 **		delim -- the delimiter for the address, normally
358 **			'\0' or ','; \0 is accepted in any case.
359 **			are moving in place; set buflim to high core.
360 **
361 **	Returns:
362 **		A pointer to the terminator of buf.
363 **		NULL on error.
364 **
365 **	Side Effects:
366 **		buf gets clobbered.
367 **
368 **	Called By:
369 **		parse
370 **		maketemp
371 */
372 
373 char *
374 prescan(addr, buf, buflim, delim)
375 	char *addr;
376 	char *buf;
377 	char *buflim;
378 	char delim;
379 {
380 	register char *p;
381 	bool space;
382 	bool quotemode;
383 	bool bslashmode;
384 	bool delimmode;
385 	int cmntcnt;
386 	int brccnt;
387 	register char c;
388 	register char *q;
389 	extern char *index();
390 
391 	space = FALSE;
392 	delimmode = TRUE;
393 	q = buf;
394 	bslashmode = quotemode = FALSE;
395 	cmntcnt = brccnt = 0;
396 	for (p = addr; (c = *p++) != '\0'; )
397 	{
398 		/* chew up special characters */
399 		*q = '\0';
400 		if (bslashmode)
401 		{
402 			c |= 0200;
403 			bslashmode = FALSE;
404 		}
405 		else if (c == '"')
406 			quotemode = !quotemode;
407 		else if (c == '\\')
408 		{
409 			bslashmode++;
410 			continue;
411 		}
412 		else if (quotemode)
413 			c |= 0200;
414 		else if (c == delim)
415 			break;
416 		else if (c == '(')
417 		{
418 			cmntcnt++;
419 			continue;
420 		}
421 		else if (c == ')')
422 		{
423 			if (cmntcnt <= 0)
424 			{
425 				usrerr("Unbalanced ')'");
426 				return (NULL);
427 			}
428 			else
429 			{
430 				cmntcnt--;
431 				continue;
432 			}
433 		}
434 		if (cmntcnt > 0)
435 			continue;
436 		else if (isascii(c) && isspace(c) && (space || delimmode))
437 			continue;
438 		else if (c == '<')
439 		{
440 			if (brccnt < 0)
441 			{
442 				usrerr("multiple < spec");
443 				return (NULL);
444 			}
445 			brccnt++;
446 			delimmode = TRUE;
447 			space = FALSE;
448 			if (brccnt == 1)
449 			{
450 				/* we prefer using machine readable name */
451 				q = buf;
452 				*q = '\0';
453 				continue;
454 			}
455 		}
456 		else if (c == '>')
457 		{
458 			if (brccnt <= 0)
459 			{
460 				usrerr("Unbalanced `>'");
461 				return (NULL);
462 			}
463 			else
464 				brccnt--;
465 			if (brccnt <= 0)
466 			{
467 				brccnt = -1;
468 				continue;
469 			}
470 		}
471 
472 		/*
473 		**  Turn "at" into "@",
474 		**	but only if "at" is a word.
475 		**	By the way, I violate the ARPANET RFC-733
476 		**	standard here, by assuming that 'space' delimits
477 		**	atoms.  I assume that is just a mistake, since
478 		**	it violates the spirit of the semantics
479 		**	of the document.....
480 		*/
481 
482 		if (delimmode && (c == 'a' || c == 'A') &&
483 		    (p[0] == 't' || p[0] == 'T') &&
484 		    (index(DELIMCHARS, p[1]) != NULL || p[1] <= 040))
485 		{
486 			c = '@';
487 			p++;
488 		}
489 
490 		if (delimmode = (index(DELIMCHARS, c) != NULL))
491 			space = FALSE;
492 
493 		/* if not a space, squirrel it away */
494 		if ((!isascii(c) || !isspace(c)) && brccnt >= 0)
495 		{
496 			if (q >= buflim-1)
497 			{
498 				usrerr("Address too long");
499 				return (NULL);
500 			}
501 			if (space)
502 				*q++ = SPACESUB;
503 			*q++ = c;
504 		}
505 		space = isascii(c) && isspace(c);
506 	}
507 	*q = '\0';
508 	if (c == '\0')
509 		p--;
510 	if (cmntcnt > 0)
511 		usrerr("Unbalanced '('");
512 	else if (quotemode)
513 		usrerr("Unbalanced '\"'");
514 	else if (brccnt > 0)
515 		usrerr("Unbalanced '<'");
516 	else if (buf[0] != '\0')
517 		return (p);
518 	return (NULL);
519 }
520