1 # include <stdio.h>
2 # include <ctype.h>
3 # include "postbox.h"
4 
5 static char	SccsId[] = "@(#)parseaddr.c	3.1	03/07/81";
6 
7 /*
8 **  PARSE -- Parse an address
9 **
10 **	Parses an address and breaks it up into three parts: a
11 **	net to transmit the message on, the host to transmit it
12 **	to, and a user on that host.  These are loaded into an
13 **	ADDRESS header with the values squirreled away if necessary.
14 **	The "user" part may not be a real user; the process may
15 **	just reoccur on that machine.  For example, on a machine
16 **	with an arpanet connection, the address
17 **		csvax.bill@berkeley
18 **	will break up to a "user" of 'csvax.bill' and a host
19 **	of 'berkeley' -- to be transmitted over the arpanet.
20 **
21 **	Parameters:
22 **		addr -- the address to parse.
23 **		a -- a pointer to the address descriptor buffer.
24 **			If NULL, a header will be created.
25 **		copyf -- determines what shall be copied:
26 **			-1 -- don't copy anything.  The printname
27 **				(q_paddr) is just addr, and the
28 **				user & host are allocated internally
29 **				to parse.
30 **			0 -- copy out the parsed user & host, but
31 **				don't copy the printname.
32 **			+1 -- copy everything.
33 **
34 **	Returns:
35 **		A pointer to the address descriptor header (`a' if
36 **			`a' is non-NULL).
37 **		NULL on error.
38 **
39 **	Side Effects:
40 **		none
41 **
42 **	Called By:
43 **		main
44 **		sendto
45 **		alias
46 **		savemail
47 */
48 
49 # define DELIMCHARS	"()<>@!.,;:\\\" \t\r\n"	/* word delimiters */
50 # define SPACESUB	('.'|0200)		/* substitution for <lwsp> */
51 
52 ADDRESS *
53 parse(addr, a, copyf)
54 	char *addr;
55 	register ADDRESS *a;
56 	int copyf;
57 {
58 	register char *p;
59 	register struct parsetab *t;
60 	extern struct parsetab ParseTab[];
61 	static char buf[MAXNAME];
62 	register char c;
63 	register char *q;
64 	bool got_one;
65 	extern char *prescan();
66 	extern char *xalloc();
67 	extern char *newstr();
68 	char **pvp;
69 
70 	/*
71 	**  Initialize and prescan address.
72 	*/
73 
74 	To = addr;
75 	if (prescan(addr, buf, &buf[sizeof buf], '\0') == NULL)
76 		return (NULL);
77 
78 	/*
79 	**  Scan parse table.
80 	**	Look for the first entry designating a character
81 	**		that is contained in the address.
82 	**	Arrange for q to point to that character.
83 	**	Check to see that there is only one of the char
84 	**		if it must be unique.
85 	**	Find the last one if the host is on the RHS.
86 	**	Insist that the host name is atomic.
87 	**	If just doing a map, do the map and then start all
88 	**		over.
89 	*/
90 
91  rescan:
92 	got_one = FALSE;
93 	for (t = ParseTab; t->p_char != '\0'; t++)
94 	{
95 		q = NULL;
96 		for (p = buf; (c = *p) != '\0'; p++)
97 		{
98 			/* find the end of this token */
99 			while (isalnum(c) || c == '-' || c == '_')
100 				c = *++p;
101 			if (c == '\0')
102 				break;
103 
104 			if (c == t->p_char)
105 			{
106 				got_one = TRUE;
107 
108 				/* do mapping as appropriate */
109 				if (bitset(P_MAP, t->p_flags))
110 				{
111 					*p = t->p_arg[0];
112 					if (bitset(P_ONE, t->p_flags))
113 						goto rescan;
114 					else
115 						continue;
116 				}
117 
118 				/* arrange for q to point to it */
119 				if (q != NULL && bitset(P_ONE, t->p_flags))
120 				{
121 					usrerr("multichar error");
122 					ExitStat = EX_USAGE;
123 					return (NULL);
124 				}
125 				if (q == NULL || bitset(P_HLAST, t->p_flags))
126 					q = p;
127 			}
128 			else
129 			{
130 				/* insist that host name is atomic */
131 				if (bitset(P_HLAST, t->p_flags))
132 					q = NULL;
133 				else
134 					break;
135 			}
136 		}
137 
138 		if (q != NULL)
139 			break;
140 	}
141 
142 	/*
143 	**  If we matched nothing cleanly, but we did match something
144 	**  somewhere in the process of scanning, then we have a
145 	**  syntax error.  This can happen on things like a@b:c where
146 	**  @ has a right host and : has a left host.
147 	**
148 	**  We also set `q' to the null string, in case someone forgets
149 	**  to put the P_MOVE bit in the local mailer entry of the
150 	**  configuration table.
151 	*/
152 
153 	if (q == NULL)
154 	{
155 		q = "";
156 		if (got_one)
157 		{
158 			usrerr("syntax error");
159 			ExitStat = EX_USAGE;
160 			return (NULL);
161 		}
162 	}
163 
164 	/*
165 	**  Interpret entry.
166 	**	t points to the entry for the mailer we will use.
167 	**	q points to the significant character.
168 	*/
169 
170 	if (a == NULL)
171 		a = (ADDRESS *) xalloc(sizeof *a);
172 	if (copyf > 0)
173 		a->q_paddr = newstr(addr);
174 	else
175 		a->q_paddr = addr;
176 	a->q_mailer = &Mailer[t->p_mailer];
177 
178 	if (bitset(P_MOVE, t->p_flags))
179 	{
180 		/* send the message to another host & retry */
181 		a->q_host = t->p_arg;
182 		if (copyf >= 0)
183 			a->q_user = newstr(buf);
184 		else
185 			a->q_user = buf;
186 	}
187 	else
188 	{
189 		/*
190 		**  Make local copies of the host & user and then
191 		**  transport them out.
192 		*/
193 
194 		*q++ = '\0';
195 		if (bitset(P_HLAST, t->p_flags))
196 		{
197 			a->q_host = q;
198 			a->q_user = buf;
199 		}
200 		else
201 		{
202 			a->q_host = buf;
203 			a->q_user = q;
204 		}
205 
206 		/*
207 		**  Don't go to the net if already on the target host.
208 		**	This is important on the berkeley network, since
209 		**	it get confused if we ask to send to ourselves.
210 		**	For nets like the ARPANET, we probably will have
211 		**	the local list set to NULL to simplify testing.
212 		**	The canonical representation of the name is also set
213 		**	to be just the local name so the duplicate letter
214 		**	suppression algorithm will work.
215 		*/
216 
217 		if ((pvp = a->q_mailer->m_local) != NULL)
218 		{
219 			while (*pvp != NULL)
220 			{
221 				auto char buf2[MAXNAME];
222 
223 				strcpy(buf2, a->q_host);
224 				if (!bitset(P_HST_UPPER, t->p_flags))
225 					makelower(buf2);
226 				if (strcmp(*pvp++, buf2) == 0)
227 				{
228 					strcpy(buf2, a->q_user);
229 					p = a->q_paddr;
230 					if (parse(buf2, a, -1) == NULL)
231 					{
232 						To = addr;
233 						return (NULL);
234 					}
235 					To = a->q_paddr = p;
236 					break;
237 				}
238 			}
239 		}
240 
241 		/* make copies if specified */
242 		if (copyf >= 0)
243 		{
244 			a->q_host = newstr(a->q_host);
245 			a->q_user = newstr(a->q_user);
246 		}
247 	}
248 
249 	/*
250 	**  Do UPPER->lower case mapping unless inhibited.
251 	*/
252 
253 	if (!bitset(P_HST_UPPER, t->p_flags))
254 		makelower(a->q_host);
255 	if (!bitset(P_USR_UPPER, t->p_flags))
256 		makelower(a->q_user);
257 
258 	/*
259 	**  Compute return value.
260 	*/
261 
262 # ifdef DEBUG
263 	if (Debug)
264 		printf("parse(\"%s\"): host \"%s\" user \"%s\" mailer %d\n",
265 		    addr, a->q_host, a->q_user, t->p_mailer);
266 # endif DEBUG
267 
268 	return (a);
269 }
270 /*
271 **  MAKELOWER -- Translate a line into lower case
272 **
273 **	Parameters:
274 **		p -- the string to translate.  If NULL, return is
275 **			immediate.
276 **
277 **	Returns:
278 **		none.
279 **
280 **	Side Effects:
281 **		String pointed to by p is translated to lower case.
282 **
283 **	Called By:
284 **		parse
285 */
286 
287 makelower(p)
288 	register char *p;
289 {
290 	register char c;
291 
292 	if (p == NULL)
293 		return;
294 	for (; (c = *p) != '\0'; p++)
295 		if ((c & 0200) == 0 && isupper(c))
296 			*p = c - 'A' + 'a';
297 }
298 /*
299 **  PRESCAN -- Prescan name and make it canonical
300 **
301 **	Scans a name and turns it into canonical form.  This involves
302 **	deleting blanks, comments (in parentheses), and turning the
303 **	word "at" into an at-sign ("@").  The name is copied as this
304 **	is done; it is legal to copy a name onto itself, since this
305 **	process can only make things smaller.
306 **
307 **	This routine knows about quoted strings and angle brackets.
308 **
309 **	There are certain subtleties to this routine.  The one that
310 **	comes to mind now is that backslashes on the ends of names
311 **	are silently stripped off; this is intentional.  The problem
312 **	is that some versions of sndmsg (like at LBL) set the kill
313 **	character to something other than @ when reading addresses;
314 **	so people type "csvax.eric\@berkeley" -- which screws up the
315 **	berknet mailer.
316 **
317 **	Parameters:
318 **		addr -- the name to chomp.
319 **		buf -- the buffer to copy it into.
320 **		buflim -- the last usable address in the buffer
321 **			(which will old a null byte).  Normally
322 **			&buf[sizeof buf - 1].
323 **		delim -- the delimiter for the address, normally
324 **			'\0' or ','; \0 is accepted in any case.
325 **			are moving in place; set buflim to high core.
326 **
327 **	Returns:
328 **		A pointer to the terminator of buf.
329 **		NULL on error.
330 **
331 **	Side Effects:
332 **		buf gets clobbered.
333 **
334 **	Called By:
335 **		parse
336 **		maketemp
337 */
338 
339 char *
340 prescan(addr, buf, buflim, delim)
341 	char *addr;
342 	char *buf;
343 	char *buflim;
344 	char delim;
345 {
346 	register char *p;
347 	bool space;
348 	bool quotemode;
349 	bool bslashmode;
350 	bool delimmode;
351 	int cmntcnt;
352 	int brccnt;
353 	register char c;
354 	register char *q;
355 	extern char *index();
356 
357 	space = FALSE;
358 	delimmode = TRUE;
359 	q = buf;
360 	bslashmode = quotemode = FALSE;
361 	cmntcnt = brccnt = 0;
362 	for (p = addr; (c = *p++) != '\0'; )
363 	{
364 		/* chew up special characters */
365 		*q = '\0';
366 		if (bslashmode)
367 		{
368 			c |= 0200;
369 			bslashmode = FALSE;
370 		}
371 		else if (c == '"')
372 			quotemode = !quotemode;
373 		else if (c == '\\')
374 		{
375 			bslashmode++;
376 			continue;
377 		}
378 		else if (quotemode)
379 			c |= 0200;
380 		else if (c == delim)
381 			break;
382 		else if (c == '(')
383 		{
384 			cmntcnt++;
385 			continue;
386 		}
387 		else if (c == ')')
388 		{
389 			if (cmntcnt <= 0)
390 			{
391 				usrerr("Unbalanced ')'");
392 				return (NULL);
393 			}
394 			else
395 			{
396 				cmntcnt--;
397 				continue;
398 			}
399 		}
400 		if (cmntcnt > 0)
401 			continue;
402 		else if (isascii(c) && isspace(c) && (space || delimmode))
403 			continue;
404 		else if (c == '<')
405 		{
406 			if (brccnt < 0)
407 			{
408 				usrerr("multiple < spec");
409 				return (NULL);
410 			}
411 			brccnt++;
412 			delimmode = TRUE;
413 			space = FALSE;
414 			if (brccnt == 1)
415 			{
416 				/* we prefer using machine readable name */
417 				q = buf;
418 				*q = '\0';
419 				continue;
420 			}
421 		}
422 		else if (c == '>')
423 		{
424 			if (brccnt <= 0)
425 			{
426 				usrerr("Unbalanced `>'");
427 				return (NULL);
428 			}
429 			else
430 				brccnt--;
431 			if (brccnt <= 0)
432 			{
433 				brccnt = -1;
434 				continue;
435 			}
436 		}
437 
438 		/*
439 		**  Turn "at" into "@",
440 		**	but only if "at" is a word.
441 		**	By the way, I violate the ARPANET RFC-733
442 		**	standard here, by assuming that 'space' delimits
443 		**	atoms.  I assume that is just a mistake, since
444 		**	it violates the spirit of the semantics
445 		**	of the document.....
446 		*/
447 
448 		if (delimmode && (c == 'a' || c == 'A') &&
449 		    (p[0] == 't' || p[0] == 'T') &&
450 		    (index(DELIMCHARS, p[1]) != NULL || p[1] <= 040))
451 		{
452 			c = '@';
453 			p++;
454 		}
455 
456 		if (delimmode = (index(DELIMCHARS, c) != NULL))
457 			space = FALSE;
458 
459 		/* if not a space, squirrel it away */
460 		if ((!isascii(c) || !isspace(c)) && brccnt >= 0)
461 		{
462 			if (q >= buflim-1)
463 			{
464 				usrerr("Address too long");
465 				return (NULL);
466 			}
467 			if (space)
468 				*q++ = SPACESUB;
469 			*q++ = c;
470 		}
471 		space = isascii(c) && isspace(c);
472 	}
473 	*q = '\0';
474 	if (c == '\0')
475 		p--;
476 	if (cmntcnt > 0)
477 		usrerr("Unbalanced '('");
478 	else if (quotemode)
479 		usrerr("Unbalanced '\"'");
480 	else if (brccnt > 0)
481 		usrerr("Unbalanced '<'");
482 	else if (buf[0] != '\0')
483 		return (p);
484 	return (NULL);
485 }
486