1 # include <stdio.h>
2 # include <ctype.h>
3 # include "dlvrmail.h"
4 
5 static char	SccsId[] = "@(#)parseaddr.c	2.1	11/05/80";
6 
7 /*
8 **  PARSE -- Parse an address
9 **
10 **	Parses an address and breaks it up into three parts: a
11 **	net to transmit the message on, the host to transmit it
12 **	to, and a user on that host.  These are loaded into an
13 **	addrq header with the values squirreled away if necessary.
14 **	The "user" part may not be a real user; the process may
15 **	just reoccur on that machine.  For example, on a machine
16 **	with an arpanet connection, the address
17 **		csvax.bill@berkeley
18 **	will break up to a "user" of 'csvax.bill' and a host
19 **	of 'berkeley' -- to be transmitted over the arpanet.
20 **
21 **	Parameters:
22 **		addr -- the address to parse.
23 **		a -- a pointer to the address descriptor buffer.
24 **			If NULL, a header will be created.
25 **		copyf -- determines what shall be copied:
26 **			-1 -- don't copy anything.  The printname
27 **				(q_paddr) is just addr, and the
28 **				user & host are allocated internally
29 **				to parse.
30 **			0 -- copy out the parsed user & host, but
31 **				don't copy the printname.
32 **			+1 -- copy everything.
33 **
34 **	Returns:
35 **		A pointer to the address descriptor header (`a' if
36 **			`a' is non-NULL).
37 **		NULL on error.
38 **
39 **	Side Effects:
40 **		none
41 **
42 **	Called By:
43 **		main
44 **		sendto
45 **		alias
46 **		savemail
47 */
48 
49 addrq *
50 parse(addr, a, copyf)
51 	char *addr;
52 	register addrq *a;
53 	int copyf;
54 {
55 	register char *p;
56 	register struct parsetab *t;
57 	extern struct parsetab ParseTab[];
58 	static char buf[MAXNAME];
59 	register char c;
60 	register char *q;
61 	bool got_one;
62 	extern char *prescan();
63 	extern char *xalloc();
64 	char **pvp;
65 
66 	/*
67 	**  Initialize and prescan address.
68 	*/
69 
70 	To = addr;
71 	if (prescan(addr, buf, &buf[sizeof buf], '\0') == NULL)
72 		return (NULL);
73 
74 	/*
75 	**  Scan parse table.
76 	**	Look for the first entry designating a character
77 	**		that is contained in the address.
78 	**	Arrange for q to point to that character.
79 	**	Check to see that there is only one of the char
80 	**		if it must be unique.
81 	**	Find the last one if the host is on the RHS.
82 	**	Insist that the host name is atomic.
83 	**	If just doing a map, do the map and then start all
84 	**		over.
85 	*/
86 
87  rescan:
88 	got_one = FALSE;
89 	for (t = ParseTab; t->p_char != '\0'; t++)
90 	{
91 		q = NULL;
92 		for (p = buf; (c = *p) != '\0'; p++)
93 		{
94 			/* find the end of this token */
95 			while (isalnum(c) || c == '-' || c == '_')
96 				c = *++p;
97 			if (c == '\0')
98 				break;
99 
100 			if (c == t->p_char)
101 			{
102 				got_one = TRUE;
103 
104 				/* do mapping as appropriate */
105 				if (flagset(P_MAP, t->p_flags))
106 				{
107 					*p = t->p_arg[0];
108 					if (flagset(P_ONE, t->p_flags))
109 						goto rescan;
110 					else
111 						continue;
112 				}
113 
114 				/* arrange for q to point to it */
115 				if (q != NULL && flagset(P_ONE, t->p_flags))
116 				{
117 					usrerr("multichar error");
118 					ExitStat = EX_USAGE;
119 					return (NULL);
120 				}
121 				if (q == NULL || flagset(P_HLAST, t->p_flags))
122 					q = p;
123 			}
124 			else
125 			{
126 				/* insist that host name is atomic */
127 				if (flagset(P_HLAST, t->p_flags))
128 					q = NULL;
129 				else
130 					break;
131 			}
132 		}
133 
134 		if (q != NULL)
135 			break;
136 	}
137 
138 	/*
139 	**  If we matched nothing cleanly, but we did match something
140 	**  somewhere in the process of scanning, then we have a
141 	**  syntax error.  This can happen on things like a@b:c where
142 	**  @ has a right host and : has a left host.
143 	**
144 	**  We also set `q' to the null string, in case someone forgets
145 	**  to put the P_MOVE bit in the local mailer entry of the
146 	**  configuration table.
147 	*/
148 
149 	if (q == NULL)
150 	{
151 		q = "";
152 		if (got_one)
153 		{
154 			usrerr("syntax error");
155 			ExitStat = EX_USAGE;
156 			return (NULL);
157 		}
158 	}
159 
160 	/*
161 	**  Interpret entry.
162 	**	t points to the entry for the mailer we will use.
163 	**	q points to the significant character.
164 	*/
165 
166 	if (a == NULL)
167 		a = (addrq *) xalloc(sizeof *a);
168 	if (copyf > 0)
169 	{
170 		p = xalloc((unsigned) strlen(addr) + 1);
171 		strcpy(p, addr);
172 		a->q_paddr = p;
173 	}
174 	else
175 		a->q_paddr = addr;
176 	a->q_mailer = &Mailer[t->p_mailer];
177 
178 	if (flagset(P_MOVE, t->p_flags))
179 	{
180 		/* send the message to another host & retry */
181 		a->q_host = t->p_arg;
182 		if (copyf >= 0)
183 		{
184 			p = xalloc((unsigned) strlen(buf) + 1);
185 			strcpy(p, buf);
186 			a->q_user = p;
187 		}
188 		else
189 			a->q_user = buf;
190 	}
191 	else
192 	{
193 		/*
194 		**  Make local copies of the host & user and then
195 		**  transport them out.
196 		*/
197 
198 		*q++ = '\0';
199 		if (flagset(P_HLAST, t->p_flags))
200 		{
201 			a->q_host = q;
202 			a->q_user = buf;
203 		}
204 		else
205 		{
206 			a->q_host = buf;
207 			a->q_user = q;
208 		}
209 
210 		/*
211 		**  Don't go to the net if already on the target host.
212 		**	This is important on the berkeley network, since
213 		**	it get confused if we ask to send to ourselves.
214 		**	For nets like the ARPANET, we probably will have
215 		**	the local list set to NULL to simplify testing.
216 		**	The canonical representation of the name is also set
217 		**	to be just the local name so the duplicate letter
218 		**	suppression algorithm will work.
219 		*/
220 
221 		if ((pvp = a->q_mailer->m_local) != NULL)
222 		{
223 			while (*pvp != NULL)
224 			{
225 				auto char buf2[MAXNAME];
226 
227 				strcpy(buf2, a->q_host);
228 				if (!flagset(P_HST_UPPER, t->p_flags))
229 					makelower(buf2);
230 				if (strcmp(*pvp++, buf2) == 0)
231 				{
232 					strcpy(buf2, a->q_user);
233 					p = a->q_paddr;
234 					if (parse(buf2, a, -1) == NULL)
235 					{
236 						To = addr;
237 						return (NULL);
238 					}
239 					To = a->q_paddr = p;
240 					break;
241 				}
242 			}
243 		}
244 
245 		/* make copies if specified */
246 		if (copyf >= 0)
247 		{
248 			p = xalloc((unsigned) strlen(a->q_host) + 1);
249 			strcpy(p, a->q_host);
250 			a->q_host = p;
251 			p = xalloc((unsigned) strlen(a->q_user) + 1);
252 			strcpy(p, a->q_user);
253 			a->q_user = p;
254 		}
255 	}
256 
257 	/*
258 	**  Do UPPER->lower case mapping unless inhibited.
259 	*/
260 
261 	if (!flagset(P_HST_UPPER, t->p_flags))
262 		makelower(a->q_host);
263 	if (!flagset(P_USR_UPPER, t->p_flags))
264 		makelower(a->q_user);
265 
266 	/*
267 	**  Compute return value.
268 	*/
269 
270 # ifdef DEBUG
271 	if (Debug)
272 		printf("parse(\"%s\"): host \"%s\" user \"%s\" mailer %d\n",
273 		    addr, a->q_host, a->q_user, t->p_mailer);
274 # endif DEBUG
275 
276 	return (a);
277 }
278 /*
279 **  MAKELOWER -- Translate a line into lower case
280 **
281 **	Parameters:
282 **		p -- the string to translate.  If NULL, return is
283 **			immediate.
284 **
285 **	Returns:
286 **		none.
287 **
288 **	Side Effects:
289 **		String pointed to by p is translated to lower case.
290 **
291 **	Called By:
292 **		parse
293 */
294 
295 makelower(p)
296 	register char *p;
297 {
298 	register char c;
299 
300 	if (p == NULL)
301 		return;
302 	for (; (c = *p) != '\0'; p++)
303 		if ((c & 0200) == 0 && isupper(c))
304 			*p = c - 'A' + 'a';
305 }
306 /*
307 **  PRESCAN -- Prescan name and make it canonical
308 **
309 **	Scans a name and turns it into canonical form.  This involves
310 **	deleting blanks, comments (in parentheses), and turning the
311 **	word "at" into an at-sign ("@").  The name is copied as this
312 **	is done; it is legal to copy a name onto itself, since this
313 **	process can only make things smaller.
314 **
315 **	This routine knows about quoted strings and angle brackets.
316 **
317 **	There are certain subtleties to this routine.  The one that
318 **	comes to mind now is that backslashes on the ends of names
319 **	are silently stripped off; this is intentional.  The problem
320 **	is that some versions of sndmsg (like at LBL) set the kill
321 **	character to something other than @ when reading addresses;
322 **	so people type "csvax.eric\@berkeley" -- which screws up the
323 **	berknet mailer.
324 **
325 **	Parameters:
326 **		addr -- the name to chomp.
327 **		buf -- the buffer to copy it into.
328 **		buflim -- the last usable address in the buffer
329 **			(which will old a null byte).  Normally
330 **			&buf[sizeof buf - 1].
331 **		delim -- the delimiter for the address, normally
332 **			'\0' or ','; \0 is accepted in any case.
333 **			are moving in place; set buflim to high core.
334 **
335 **	Returns:
336 **		A pointer to the terminator of buf.
337 **		NULL on error.
338 **
339 **	Side Effects:
340 **		buf gets clobbered.
341 **
342 **	Called By:
343 **		parse
344 **		maketemp
345 */
346 
347 char *
348 prescan(addr, buf, buflim, delim)
349 	char *addr;
350 	char *buf;
351 	char *buflim;
352 	char delim;
353 {
354 	register char *p;
355 	bool space;
356 	bool quotemode;
357 	bool bslashmode;
358 	int cmntcnt;
359 	int brccnt;
360 	register char c;
361 	register char *q;
362 	extern bool any();
363 
364 	space = TRUE;
365 	q = buf;
366 	bslashmode = quotemode = FALSE;
367 	cmntcnt = brccnt = 0;
368 	for (p = addr; (c = *p++) != '\0'; )
369 	{
370 		/* chew up special characters */
371 		*q = '\0';
372 		if (bslashmode)
373 		{
374 			c |= 0200;
375 			bslashmode = FALSE;
376 		}
377 		else if (c == '"')
378 			quotemode = !quotemode;
379 		else if (c == '\\')
380 		{
381 			bslashmode++;
382 			continue;
383 		}
384 		else if (quotemode)
385 			c |= 0200;
386 		else if (c == delim)
387 			break;
388 		else if (c == '(')
389 		{
390 			cmntcnt++;
391 			continue;
392 		}
393 		else if (c == ')')
394 		{
395 			if (cmntcnt <= 0)
396 			{
397 				usrerr("Unbalanced ')'");
398 				return (NULL);
399 			}
400 			else
401 			{
402 				cmntcnt--;
403 				continue;
404 			}
405 		}
406 		else if (c == '<')
407 		{
408 			brccnt++;
409 			if (brccnt == 1)
410 			{
411 				/* we prefer using machine readable name */
412 				q = buf;
413 				*q = '\0';
414 				continue;
415 			}
416 		}
417 		else if (c == '>')
418 		{
419 			if (brccnt <= 0)
420 			{
421 				usrerr("Unbalanced `>'");
422 				return (NULL);
423 			}
424 			else
425 				brccnt--;
426 			if (brccnt <= 0)
427 				continue;
428 		}
429 
430 		/*
431 		**  Turn "at" into "@",
432 		**	but only if "at" is a word.
433 		**	By the way, I violate the ARPANET RFC-733
434 		**	standard here, by assuming that 'space' delimits
435 		**	atoms.  I assume that is just a mistake, since
436 		**	it violates the spirit of the semantics
437 		**	of the document.....
438 		*/
439 
440 		if (space && (c == 'a' || c == 'A') &&
441 		    (p[0] == 't' || p[0] == 'T') &&
442 		    (any(p[1], "()<>@,;:\\\"") || p[1] <= 040))
443 		{
444 			c = '@';
445 			p++;
446 		}
447 
448 		/* skip blanks */
449 		if (((c & 0200) != 0 || !isspace(c)) && cmntcnt <= 0)
450 		{
451 			if (q >= buflim)
452 			{
453 				usrerr("Address too long");
454 				return (NULL);
455 			}
456 			*q++ = c;
457 		}
458 		space = isspace(c);
459 	}
460 	*q = '\0';
461 	if (c == '\0')
462 		p--;
463 	if (cmntcnt > 0)
464 		usrerr("Unbalanced '('");
465 	else if (quotemode)
466 		usrerr("Unbalanced '\"'");
467 	else if (brccnt > 0)
468 		usrerr("Unbalanced '<'");
469 	else if (buf[0] != '\0')
470 		return (p);
471 	return (NULL);
472 }
473