1 # include <stdio.h>
2 # include <ctype.h>
3 # include "dlvrmail.h"
4 
5 /*
6 **  PARSE -- Parse an address
7 **
8 **	Parses an address and breaks it up into three parts: a
9 **	net to transmit the message on, the host to transmit it
10 **	to, and a user on that host.  These are loaded into an
11 **	addrq header with the values squirreled away if necessary.
12 **	The "user" part may not be a real user; the process may
13 **	just reoccur on that machine.  For example, on a machine
14 **	with an arpanet connection, the address
15 **		csvax.bill@berkeley
16 **	will break up to a "user" of 'csvax.bill' and a host
17 **	of 'berkeley' -- to be transmitted over the arpanet.
18 **
19 **	Parameters:
20 **		addr -- the address to parse.
21 **		a -- a pointer to the address descriptor buffer.
22 **			If NULL, a header will be created.
23 **		copyf -- determines what shall be copied:
24 **			-1 -- don't copy anything.  The printname
25 **				(q_paddr) is just addr, and the
26 **				user & host are allocated internally
27 **				to parse.
28 **			0 -- copy out the parsed user & host, but
29 **				don't copy the printname.
30 **			+1 -- copy everything.
31 **
32 **	Returns:
33 **		A pointer to the address descriptor header (`a' if
34 **			`a' is non-NULL).
35 **		NULL on error.
36 **
37 **	Side Effects:
38 **		none
39 **
40 **	Defined Constants:
41 **		none
42 **
43 **	Requires:
44 **		usrerr
45 **		strcpy (sys)
46 **		isalpha (sys)
47 **		xalloc
48 **		prescan
49 **		flagset
50 **		makelower
51 **		printf (sys)
52 **		ParseTab -- the parse table.
53 **
54 **	Called By:
55 **		main
56 **		sendto
57 **		alias
58 **		savemail
59 **
60 **	History:
61 **		12/26/79 -- written.
62 */
63 
64 addrq *
65 parse(addr, a, copyf)
66 	char *addr;
67 	register addrq *a;
68 	int copyf;
69 {
70 	register char *p;
71 	register struct parsetab *t;
72 	extern struct parsetab ParseTab[];
73 	static char buf[MAXNAME];
74 	register char c;
75 	register char *q;
76 	bool got_one;
77 	extern char *prescan();
78 	extern char *xalloc();
79 
80 	/*
81 	**  Initialize and prescan address.
82 	*/
83 
84 	To = addr;
85 	if (prescan(addr, buf, &buf[sizeof buf], '\0') == NULL)
86 		return (NULL);
87 
88 	/*
89 	**  Scan parse table.
90 	**	Look for the first entry designating a character
91 	**		that is contained in the address.
92 	**	Arrange for q to point to that character.
93 	**	Check to see that there is only one of the char
94 	**		if it must be unique.
95 	**	Find the last one if the host is on the RHS.
96 	**	Insist that the host name is atomic.
97 	**	If just doing a map, do the map and then start all
98 	**		over.
99 	*/
100 
101  rescan:
102 	got_one = FALSE;
103 	for (t = ParseTab; t->p_char != '\0'; t++)
104 	{
105 		q = NULL;
106 		for (p = buf; (c = *p) != '\0'; p++)
107 		{
108 			/* find the end of this token */
109 			while (isalnum(c) || c == '-' || c == '_')
110 				c = *++p;
111 			if (c == '\0')
112 				break;
113 
114 			if (c == t->p_char)
115 			{
116 				got_one = TRUE;
117 
118 				/* do mapping as appropriate */
119 				if (flagset(P_MAP, t->p_flags))
120 				{
121 					*p = t->p_arg[0];
122 					if (flagset(P_ONE, t->p_flags))
123 						goto rescan;
124 					else
125 						continue;
126 				}
127 
128 				/* arrange for q to point to it */
129 				if (q != NULL && flagset(P_ONE, t->p_flags))
130 				{
131 					usrerr("multichar error");
132 					ExitStat = EX_USAGE;
133 					return (NULL);
134 				}
135 				if (q == NULL || flagset(P_HLAST, t->p_flags))
136 					q = p;
137 			}
138 			else
139 			{
140 				/* insist that host name is atomic */
141 				if (flagset(P_HLAST, t->p_flags))
142 					q = NULL;
143 				else
144 					break;
145 			}
146 		}
147 
148 		if (q != NULL)
149 			break;
150 	}
151 
152 	/*
153 	**  If we matched nothing cleanly, but we did match something
154 	**  somewhere in the process of scanning, then we have a
155 	**  syntax error.  This can happen on things like a@b:c where
156 	**  @ has a right host and : has a left host.
157 	**
158 	**  We also set `q' to the null string, in case someone forgets
159 	**  to put the P_MOVE bit in the local mailer entry of the
160 	**  configuration table.
161 	*/
162 
163 	if (q == NULL)
164 	{
165 		q = "";
166 		if (got_one)
167 		{
168 			usrerr("syntax error");
169 			ExitStat = EX_USAGE;
170 			return (NULL);
171 		}
172 	}
173 
174 	/*
175 	**  Interpret entry.
176 	**	t points to the entry for the mailer we will use.
177 	**	q points to the significant character.
178 	*/
179 
180 	if (a == NULL)
181 		a = (addrq *) xalloc(sizeof *a);
182 	if (copyf > 0)
183 	{
184 		p = xalloc((unsigned) strlen(addr) + 1);
185 		strcpy(p, addr);
186 		a->q_paddr = p;
187 	}
188 	else
189 		a->q_paddr = addr;
190 	a->q_mailer = &Mailer[t->p_mailer];
191 
192 	if (flagset(P_MOVE, t->p_flags))
193 	{
194 		/* send the message to another host & retry */
195 		a->q_host = t->p_arg;
196 		if (copyf >= 0)
197 		{
198 			p = xalloc((unsigned) strlen(buf) + 1);
199 			strcpy(p, buf);
200 			a->q_user = p;
201 		}
202 		else
203 			a->q_user = buf;
204 	}
205 	else
206 	{
207 		/*
208 		**  Make local copies of the host & user and then
209 		**  transport them out.
210 		*/
211 
212 		*q++ = '\0';
213 		if (flagset(P_HLAST, t->p_flags))
214 		{
215 			a->q_host = q;
216 			a->q_user = buf;
217 		}
218 		else
219 		{
220 			a->q_host = buf;
221 			a->q_user = q;
222 		}
223 		if (copyf >= 0)
224 		{
225 			p = xalloc((unsigned) strlen(a->q_host) + 1);
226 			strcpy(p, a->q_host);
227 			a->q_host = p;
228 			p = xalloc((unsigned) strlen(a->q_user) + 1);
229 			strcpy(p, a->q_user);
230 			a->q_user = p;
231 		}
232 	}
233 
234 	/*
235 	**  Do UPPER->lower case mapping unless inhibited.
236 	*/
237 
238 	if (!flagset(P_HST_UPPER, t->p_flags))
239 		makelower(a->q_host);
240 	if (!flagset(P_USR_UPPER, t->p_flags))
241 		makelower(a->q_user);
242 
243 	/*
244 	**  Compute return value.
245 	*/
246 
247 # ifdef DEBUG
248 	if (Debug && copyf >= 0)
249 		printf("parse(\"%s\"): host \"%s\" user \"%s\" mailer %d\n",
250 		    addr, a->q_host, a->q_user, t->p_mailer);
251 # endif DEBUG
252 
253 	return (a);
254 }
255 /*
256 **  MAKELOWER -- Translate a line into lower case
257 **
258 **	Parameters:
259 **		p -- the string to translate.  If NULL, return is
260 **			immediate.
261 **
262 **	Returns:
263 **		none.
264 **
265 **	Side Effects:
266 **		String pointed to by p is translated to lower case.
267 **
268 **	Requires:
269 **		isupper (sys)
270 **
271 **	Called By:
272 **		parse
273 **
274 **	History:
275 **		12/26/79 -- written.
276 */
277 
278 makelower(p)
279 	register char *p;
280 {
281 	register char c;
282 
283 	if (p == NULL)
284 		return;
285 	for (; (c = *p) != '\0'; p++)
286 		if ((c & 0200) == 0 && isupper(c))
287 			*p = c - 'A' + 'a';
288 }
289 /*
290 **  PRESCAN -- Prescan name and make it canonical
291 **
292 **	Scans a name and turns it into canonical form.  This involves
293 **	deleting blanks, comments (in parentheses), and turning the
294 **	word "at" into an at-sign ("@").  The name is copied as this
295 **	is done; it is legal to copy a name onto itself, since this
296 **	process can only make things smaller.
297 **
298 **	This routine knows about quoted strings and angle brackets.
299 **
300 **	There are certain subtleties to this routine.  The one that
301 **	comes to mind now is that backslashes on the ends of names
302 **	are silently stripped off; this is intentional.  The problem
303 **	is that some versions of sndmsg (like at LBL) set the kill
304 **	character to something other than @ when reading addresses;
305 **	so people type "csvax.eric\@berkeley" -- which screws up the
306 **	berknet mailer.
307 **
308 **	Parameters:
309 **		addr -- the name to chomp.
310 **		buf -- the buffer to copy it into.
311 **		buflim -- the last usable address in the buffer
312 **			(which will old a null byte).  Normally
313 **			&buf[sizeof buf - 1].
314 **		delim -- the delimiter for the address, normally
315 **			'\0' or ','; \0 is accepted in any case.
316 **			are moving in place; set buflim to high core.
317 **
318 **	Returns:
319 **		A pointer to the terminator of buf.
320 **		NULL on error.
321 **
322 **	Side Effects:
323 **		buf gets clobbered.
324 **
325 **	Requires:
326 **		isspace (sys)
327 **		any
328 **		usrerr
329 **
330 **	Called By:
331 **		parse
332 **		maketemp
333 **
334 **	History:
335 **		12/30/79 -- broken from parse; comment processing
336 **			added.
337 */
338 
339 char *
340 prescan(addr, buf, buflim, delim)
341 	char *addr;
342 	char *buf;
343 	char *buflim;
344 	char delim;
345 {
346 	register char *p;
347 	bool space;
348 	bool quotemode;
349 	bool bslashmode;
350 	int cmntcnt;
351 	int brccnt;
352 	register char c;
353 	register char *q;
354 	extern bool any();
355 
356 	space = TRUE;
357 	q = buf;
358 	bslashmode = quotemode = FALSE;
359 	cmntcnt = brccnt = 0;
360 	for (p = addr; (c = *p++ & 0177) != '\0'; )
361 	{
362 		/* chew up special characters */
363 		*q = '\0';
364 		if (bslashmode)
365 		{
366 			c |= 0200;
367 			bslashmode == FALSE;
368 		}
369 		else if (c == '"')
370 			quotemode = !quotemode;
371 		else if (c == '\\')
372 		{
373 			bslashmode++;
374 			continue;
375 		}
376 		else if (quotemode)
377 			c |= 0200;
378 		else if (c == delim)
379 			break;
380 		else if (c == '(')
381 			cmntcnt++;
382 		else if (c == ')')
383 		{
384 			if (cmntcnt <= 0)
385 			{
386 				usrerr("Unbalanced ')'");
387 				return (NULL);
388 			}
389 			else
390 			{
391 				cmntcnt--;
392 				continue;
393 			}
394 		}
395 		if (cmntcnt > 0)
396 			continue;
397 		else if (c == '<')
398 		{
399 			brccnt++;
400 			if (brccnt == 1)
401 			{
402 				/* we prefer using machine readable name */
403 				q = buf;
404 				*q = '\0';
405 				continue;
406 			}
407 		}
408 		else if (c == '>')
409 		{
410 			if (brccnt <= 0)
411 			{
412 				usrerr("Unbalanced `>'");
413 				return (NULL);
414 			}
415 			else
416 				brccnt--;
417 			if (brccnt <= 0)
418 				continue;
419 		}
420 
421 		/*
422 		**  Turn "at" into "@",
423 		**	but only if "at" is a word in and to itself.
424 		**	By the way, I violate the ARPANET RFC-733
425 		**	standard here, by assuming that 'space' delimits
426 		**	atoms.  I assume that is just a mistake, since
427 		**	it violates the spirit of the semantics
428 		**	of the document.....
429 		*/
430 
431 		if (space && (c == 'a' || c == 'A') &&
432 		    (p[0] == 't' || p[0] == 'T') &&
433 		    (any(p[1], "()<>@,;:\\\"") || p[1] <= 040))
434 		{
435 			c = '@';
436 			p++;
437 		}
438 
439 		/* skip blanks */
440 		if (((c & 0200) != 0 || !isspace(c)) && cmntcnt <= 0)
441 		{
442 			if (q >= buflim)
443 			{
444 				usrerr("Address too long");
445 				return (NULL);
446 			}
447 			*q++ = c;
448 		}
449 		space = isspace(c);
450 	}
451 	*q = '\0';
452 	if (c == '\0')
453 		p--;
454 	if (cmntcnt > 0)
455 		usrerr("Unbalanced '('");
456 	else if (quotemode)
457 		usrerr("Unbalanced '\"'");
458 	else if (brccnt > 0)
459 		usrerr("Unbalanced '<'");
460 	else if (buf[0] != '\0')
461 		return (p);
462 	return (NULL);
463 }
464