1 /*
2  * Copyright (c) 1983 Eric P. Allman
3  * Copyright (c) 1988, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * %sccs.include.redist.c%
7  */
8 
9 #ifndef lint
10 static char sccsid[] = "@(#)collect.c	8.21 (Berkeley) 08/15/94";
11 #endif /* not lint */
12 
13 # include <errno.h>
14 # include "sendmail.h"
15 
16 /*
17 **  COLLECT -- read & parse message header & make temp file.
18 **
19 **	Creates a temporary file name and copies the standard
20 **	input to that file.  Leading UNIX-style "From" lines are
21 **	stripped off (after important information is extracted).
22 **
23 **	Parameters:
24 **		fp -- file to read.
25 **		smtpmode -- if set, we are running SMTP: give an RFC821
26 **			style message to say we are ready to collect
27 **			input, and never ignore a single dot to mean
28 **			end of message.
29 **		requeueflag -- this message will be requeued later, so
30 **			don't do final processing on it.
31 **		hdrp -- the location to stash the header.
32 **		e -- the current envelope.
33 **
34 **	Returns:
35 **		none.
36 **
37 **	Side Effects:
38 **		Temp file is created and filled.
39 **		The from person may be set.
40 */
41 
42 char	*CollectErrorMessage;
43 bool	CollectErrno;
44 
45 static jmp_buf	CtxCollectTimeout;
46 static int	collecttimeout();
47 static bool	CollectProgress;
48 static EVENT	*CollectTimeout;
49 
50 /* values for input state machine */
51 #define IS_NORM		0	/* middle of line */
52 #define IS_BOL		1	/* beginning of line */
53 #define IS_DOT		2	/* read a dot at beginning of line */
54 #define IS_DOTCR	3	/* read ".\r" at beginning of line */
55 #define IS_CR		4	/* read a carriage return */
56 
57 /* values for message state machine */
58 #define MS_UFROM	0	/* reading Unix from line */
59 #define MS_HEADER	1	/* reading message header */
60 #define MS_BODY		2	/* reading message body */
61 
62 
63 collect(fp, smtpmode, requeueflag, hdrp, e)
64 	FILE *fp;
65 	bool smtpmode;
66 	bool requeueflag;
67 	HDR **hdrp;
68 	register ENVELOPE *e;
69 {
70 	register FILE *tf;
71 	bool ignrdot = smtpmode ? FALSE : IgnrDot;
72 	time_t dbto = smtpmode ? TimeOuts.to_datablock : 0;
73 	register char *bp;
74 	register int c;
75 	bool inputerr = FALSE;
76 	bool headeronly = FALSE;
77 	char *buf;
78 	int buflen;
79 	int istate;
80 	int mstate;
81 	char *pbp;
82 	char peekbuf[8];
83 	char bufbuf[MAXLINE];
84 	extern bool isheader();
85 
86 	CollectErrorMessage = NULL;
87 	CollectErrno = 0;
88 	if (hdrp == NULL)
89 		hdrp = &e->e_header;
90 	else
91 		headeronly = TRUE;
92 
93 	/*
94 	**  Create the temp file name and create the file.
95 	*/
96 
97 	if (!headeronly)
98 	{
99 		struct stat stbuf;
100 
101 		e->e_df = queuename(e, 'd');
102 		e->e_df = newstr(e->e_df);
103 		if ((tf = dfopen(e->e_df, O_WRONLY|O_CREAT|O_TRUNC, FileMode)) == NULL)
104 		{
105 			syserr("Cannot create %s", e->e_df);
106 			e->e_flags |= EF_NORETURN;
107 			finis();
108 		}
109 		if (fstat(fileno(tf), &stbuf) < 0)
110 			e->e_dfino = -1;
111 		else
112 			e->e_dfino = stbuf.st_ino;
113 		HasEightBits = FALSE;
114 	}
115 
116 	/*
117 	**  Tell ARPANET to go ahead.
118 	*/
119 
120 	if (smtpmode)
121 		message("354 Enter mail, end with \".\" on a line by itself");
122 
123 	/*
124 	**  Read the message.
125 	**
126 	**	This is done using two interleaved state machines.
127 	**	The input state machine is looking for things like
128 	**	hidden dots; the message state machine is handling
129 	**	the larger picture (e.g., header versus body).
130 	*/
131 
132 	buf = bp = bufbuf;
133 	buflen = sizeof bufbuf;
134 	pbp = peekbuf;
135 	istate = IS_BOL;
136 	mstate = SaveFrom ? MS_HEADER : MS_UFROM;
137 	CollectProgress = FALSE;
138 
139 	/* if transmitting binary, don't map NL to EOL */
140 	if (e->e_bodytype != NULL && strcasecmp(e->e_bodytype, "8BITMIME") == 0)
141 		e->e_flags |= EF_NL_NOT_EOL;
142 
143 	if (dbto != 0)
144 	{
145 		/* handle possible input timeout */
146 		if (setjmp(CtxCollectTimeout) != 0)
147 		{
148 #ifdef LOG
149 			syslog(LOG_NOTICE,
150 			    "timeout waiting for input from %s during message collect",
151 			    CurHostName ? CurHostName : "<local machine>");
152 #endif
153 			errno = 0;
154 			usrerr("451 timeout waiting for input during message collect");
155 			goto readerr;
156 		}
157 		CollectTimeout = setevent(dbto, collecttimeout, dbto);
158 	}
159 
160 	for (;;)
161 	{
162 		if (tTd(30, 35))
163 			printf("top, istate=%d, mstate=%d\n", istate, mstate);
164 		for (;;)
165 		{
166 			if (pbp > peekbuf)
167 				c = *--pbp;
168 			else
169 			{
170 				while (!feof(InChannel) && !ferror(InChannel))
171 				{
172 					errno = 0;
173 					c = fgetc(InChannel);
174 					if (errno != EINTR)
175 						break;
176 					clearerr(InChannel);
177 				}
178 				CollectProgress = TRUE;
179 				if (TrafficLogFile != NULL)
180 				{
181 					if (istate == IS_BOL)
182 						fprintf(TrafficLogFile, "%05d <<< ",
183 							getpid());
184 					if (c == EOF)
185 						fprintf(TrafficLogFile, "[EOF]\n");
186 					else
187 						fputc(c, TrafficLogFile);
188 				}
189 				if (c == EOF)
190 					goto readerr;
191 				if (SevenBitInput)
192 					c &= 0x7f;
193 				else
194 					HasEightBits |= bitset(0x80, c);
195 				e->e_msgsize++;
196 			}
197 			if (tTd(30, 94))
198 				printf("istate=%d, c=%c (0x%x)\n",
199 					istate, c, c);
200 			switch (istate)
201 			{
202 			  case IS_BOL:
203 				if (c == '.')
204 				{
205 					istate = IS_DOT;
206 					continue;
207 				}
208 				break;
209 
210 			  case IS_DOT:
211 				if (c == '\n' && !ignrdot &&
212 				    !bitset(EF_NL_NOT_EOL, e->e_flags))
213 					goto readerr;
214 				else if (c == '\r' &&
215 					 !bitset(EF_CRLF_NOT_EOL, e->e_flags))
216 				{
217 					istate = IS_DOTCR;
218 					continue;
219 				}
220 				else if (c != '.' ||
221 					 (OpMode != MD_SMTP &&
222 					  OpMode != MD_DAEMON &&
223 					  OpMode != MD_ARPAFTP))
224 				{
225 					*pbp++ = c;
226 					c = '.';
227 				}
228 				break;
229 
230 			  case IS_DOTCR:
231 				if (c == '\n')
232 					goto readerr;
233 				else
234 				{
235 					/* push back the ".\rx" */
236 					*pbp++ = c;
237 					*pbp++ = '\r';
238 					c = '.';
239 				}
240 				break;
241 
242 			  case IS_CR:
243 				if (c != '\n')
244 				{
245 					ungetc(c, InChannel);
246 					c = '\r';
247 				}
248 				else if (!bitset(EF_CRLF_NOT_EOL, e->e_flags))
249 					istate = IS_BOL;
250 				break;
251 			}
252 
253 			if (c == '\r')
254 			{
255 				istate = IS_CR;
256 				continue;
257 			}
258 			else if (c == '\n' && !bitset(EF_NL_NOT_EOL, e->e_flags))
259 				istate = IS_BOL;
260 			else
261 				istate = IS_NORM;
262 
263 			if (mstate == MS_BODY)
264 			{
265 				/* just put the character out */
266 				fputc(c, tf);
267 				continue;
268 			}
269 
270 			/* header -- buffer up */
271 			if (bp >= &buf[buflen - 2])
272 			{
273 				char *obuf;
274 
275 				if (mstate != MS_HEADER)
276 					break;
277 
278 				/* out of space for header */
279 				obuf = buf;
280 				if (buflen < MEMCHUNKSIZE)
281 					buflen *= 2;
282 				else
283 					buflen += MEMCHUNKSIZE;
284 				buf = xalloc(buflen);
285 				bcopy(obuf, buf, bp - obuf);
286 				bp = &buf[bp - obuf];
287 				if (obuf != bufbuf)
288 					free(obuf);
289 			}
290 			*bp++ = c;
291 			if (istate == IS_BOL)
292 				break;
293 		}
294 		*bp = '\0';
295 
296 nextstate:
297 		if (tTd(30, 35))
298 			printf("nextstate, istate=%d, mstate=%d, line = \"%s\"\n",
299 				istate, mstate, buf);
300 		switch (mstate)
301 		{
302 		  case MS_UFROM:
303 			mstate = MS_HEADER;
304 			if (strncmp(buf, "From ", 5) == 0)
305 			{
306 				eatfrom(buf, e);
307 				continue;
308 			}
309 			/* fall through */
310 
311 		  case MS_HEADER:
312 			if (!isheader(buf))
313 			{
314 				mstate = MS_BODY;
315 				goto nextstate;
316 			}
317 
318 			/* check for possible continuation line */
319 			do
320 			{
321 				clearerr(InChannel);
322 				errno = 0;
323 				c = fgetc(InChannel);
324 			} while (errno == EINTR);
325 			if (c != EOF)
326 				ungetc(c, InChannel);
327 			if (c == ' ' || c == '\t')
328 			{
329 				/* yep -- defer this */
330 				continue;
331 			}
332 
333 			/* trim off trailing CRLF or NL */
334 			if (*--bp != '\n' || *--bp != '\r')
335 				bp++;
336 			*bp = '\0';
337 			if (bitset(H_EOH, chompheader(buf, FALSE, e)))
338 				mstate = MS_BODY;
339 			break;
340 
341 		  case MS_BODY:
342 			if (tTd(30, 1))
343 				printf("EOH\n");
344 			if (headeronly)
345 				goto readerr;
346 			bp = buf;
347 
348 			/* toss blank line */
349 			if ((!bitset(EF_CRLF_NOT_EOL, e->e_flags) &&
350 				bp[0] == '\r' && bp[1] == '\n') ||
351 			    (!bitset(EF_NL_NOT_EOL, e->e_flags) &&
352 				bp[0] == '\n'))
353 			{
354 				break;
355 			}
356 
357 			/* if not a blank separator, write it out */
358 			while (*bp != '\0')
359 				fputc(*bp++, tf);
360 			break;
361 		}
362 		bp = buf;
363 	}
364 
365 readerr:
366 	if ((feof(fp) && smtpmode) || ferror(fp))
367 	{
368 		if (tTd(30, 1))
369 			printf("collect: read error\n");
370 		inputerr = TRUE;
371 	}
372 
373 	/* reset global timer */
374 	clrevent(CollectTimeout);
375 
376 	if (headeronly)
377 		return;
378 
379 	if (tf != NULL)
380 	{
381 		if (fflush(tf) != 0)
382 			tferror(tf, e);
383 		if (fsync(fileno(tf)) < 0 || fclose(tf) < 0)
384 		{
385 			tferror(tf, e);
386 			finis();
387 		}
388 	}
389 
390 	if (CollectErrorMessage != NULL && Errors <= 0)
391 	{
392 		if (CollectErrno != 0)
393 		{
394 			errno = CollectErrno;
395 			syserr(CollectErrorMessage, e->e_df);
396 			finis();
397 		}
398 		usrerr(CollectErrorMessage);
399 	}
400 	else if (inputerr && (OpMode == MD_SMTP || OpMode == MD_DAEMON))
401 	{
402 		/* An EOF when running SMTP is an error */
403 		char *host;
404 		char *problem;
405 
406 		host = RealHostName;
407 		if (host == NULL)
408 			host = "localhost";
409 
410 		if (feof(fp))
411 			problem = "unexpected close";
412 		else if (ferror(fp))
413 			problem = "I/O error";
414 		else
415 			problem = "read timeout";
416 # ifdef LOG
417 		if (LogLevel > 0 && feof(fp))
418 			syslog(LOG_NOTICE,
419 			    "collect: %s on connection from %s, sender=%s: %s\n",
420 			    problem, host, e->e_from.q_paddr, errstring(errno));
421 # endif
422 		if (feof(fp))
423 			usrerr("451 collect: %s on connection from %s, from=%s",
424 				problem, host, e->e_from.q_paddr);
425 		else
426 			syserr("451 collect: %s on connection from %s, from=%s",
427 				problem, host, e->e_from.q_paddr);
428 
429 		/* don't return an error indication */
430 		e->e_to = NULL;
431 		e->e_flags &= ~EF_FATALERRS;
432 		e->e_flags |= EF_CLRQUEUE;
433 
434 		/* and don't try to deliver the partial message either */
435 		if (InChild)
436 			ExitStat = EX_QUIT;
437 		finis();
438 	}
439 
440 	/*
441 	**  Find out some information from the headers.
442 	**	Examples are who is the from person & the date.
443 	*/
444 
445 	eatheader(e, !requeueflag);
446 
447 	/* collect statistics */
448 	if (OpMode != MD_VERIFY)
449 		markstats(e, (ADDRESS *) NULL);
450 
451 	/*
452 	**  Add an Apparently-To: line if we have no recipient lines.
453 	*/
454 
455 	if (hvalue("to", e->e_header) == NULL &&
456 	    hvalue("cc", e->e_header) == NULL &&
457 	    hvalue("bcc", e->e_header) == NULL &&
458 	    hvalue("apparently-to", e->e_header) == NULL)
459 	{
460 		register ADDRESS *q;
461 
462 		/* create an Apparently-To: field */
463 		/*    that or reject the message.... */
464 		for (q = e->e_sendqueue; q != NULL; q = q->q_next)
465 		{
466 			if (q->q_alias != NULL)
467 				continue;
468 			if (tTd(30, 3))
469 				printf("Adding Apparently-To: %s\n", q->q_paddr);
470 			addheader("Apparently-To", q->q_paddr, &e->e_header);
471 		}
472 	}
473 
474 	/* check for message too large */
475 	if (MaxMessageSize > 0 && e->e_msgsize > MaxMessageSize)
476 	{
477 		usrerr("552 Message exceeds maximum fixed size (%ld)",
478 			MaxMessageSize);
479 	}
480 
481 	/* check for illegal 8-bit data */
482 	if (HasEightBits)
483 	{
484 		e->e_flags |= EF_HAS8BIT;
485 		if (bitset(MM_MIME8BIT, MimeMode))
486 		{
487 			/* convert it to MIME */
488 			if (hvalue("MIME-Version", e->e_header) == NULL)
489 			{
490 				char mimebuf[20];
491 
492 				strcpy(mimebuf, "MIME-Version: 1.0");
493 				chompheader(mimebuf, FALSE, e);
494 			}
495 			if (e->e_bodytype == NULL)
496 				e->e_bodytype = "8BITMIME";
497 		}
498 		else if (!bitset(MM_PASS8BIT, MimeMode))
499 			usrerr("554 Eight bit data not allowed");
500 	}
501 
502 	if ((e->e_dfp = fopen(e->e_df, "r")) == NULL)
503 	{
504 		/* we haven't acked receipt yet, so just chuck this */
505 		syserr("Cannot reopen %s", e->e_df);
506 		finis();
507 	}
508 }
509 
510 
511 static
512 collecttimeout(timeout)
513 	time_t timeout;
514 {
515 	/* if no progress was made, die now */
516 	if (!CollectProgress)
517 		longjmp(CtxCollectTimeout, 1);
518 
519 	/* otherwise reset the timeout */
520 	CollectTimeout = setevent(timeout, collecttimeout, timeout);
521 	CollectProgress = FALSE;
522 }
523 /*
524 **  TFERROR -- signal error on writing the temporary file.
525 **
526 **	Parameters:
527 **		tf -- the file pointer for the temporary file.
528 **
529 **	Returns:
530 **		none.
531 **
532 **	Side Effects:
533 **		Gives an error message.
534 **		Arranges for following output to go elsewhere.
535 */
536 
537 tferror(tf, e)
538 	FILE *tf;
539 	register ENVELOPE *e;
540 {
541 	CollectErrno = errno;
542 	if (errno == ENOSPC)
543 	{
544 		struct stat st;
545 		long avail;
546 		long bsize;
547 
548 		e->e_flags |= EF_NORETURN;
549 		if (fstat(fileno(tf), &st) < 0)
550 			st.st_size = 0;
551 		(void) freopen(e->e_df, "w", tf);
552 		if (st.st_size <= 0)
553 			fprintf(tf, "\n*** Mail could not be accepted");
554 		else if (sizeof st.st_size > sizeof (long))
555 			fprintf(tf, "\n*** Mail of at least %qd bytes could not be accepted\n",
556 				st.st_size);
557 		else
558 			fprintf(tf, "\n*** Mail of at least %ld bytes could not be accepted\n",
559 				st.st_size);
560 		fprintf(tf, "*** at %s due to lack of disk space for temp file.\n",
561 			MyHostName);
562 		avail = freespace(QueueDir, &bsize);
563 		if (avail > 0)
564 		{
565 			if (bsize > 1024)
566 				avail *= bsize / 1024;
567 			else if (bsize < 1024)
568 				avail /= 1024 / bsize;
569 			fprintf(tf, "*** Currently, %ld kilobytes are available for mail temp files.\n",
570 				avail);
571 		}
572 		CollectErrorMessage = "452 Out of disk space for temp file";
573 	}
574 	else
575 	{
576 		CollectErrorMessage = "cannot write message body to disk (%s)";
577 	}
578 	(void) freopen("/dev/null", "w", tf);
579 }
580 /*
581 **  EATFROM -- chew up a UNIX style from line and process
582 **
583 **	This does indeed make some assumptions about the format
584 **	of UNIX messages.
585 **
586 **	Parameters:
587 **		fm -- the from line.
588 **
589 **	Returns:
590 **		none.
591 **
592 **	Side Effects:
593 **		extracts what information it can from the header,
594 **		such as the date.
595 */
596 
597 # ifndef NOTUNIX
598 
599 char	*DowList[] =
600 {
601 	"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
602 };
603 
604 char	*MonthList[] =
605 {
606 	"Jan", "Feb", "Mar", "Apr", "May", "Jun",
607 	"Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
608 	NULL
609 };
610 
611 eatfrom(fm, e)
612 	char *fm;
613 	register ENVELOPE *e;
614 {
615 	register char *p;
616 	register char **dt;
617 
618 	if (tTd(30, 2))
619 		printf("eatfrom(%s)\n", fm);
620 
621 	/* find the date part */
622 	p = fm;
623 	while (*p != '\0')
624 	{
625 		/* skip a word */
626 		while (*p != '\0' && *p != ' ')
627 			p++;
628 		while (*p == ' ')
629 			p++;
630 		if (!(isascii(*p) && isupper(*p)) ||
631 		    p[3] != ' ' || p[13] != ':' || p[16] != ':')
632 			continue;
633 
634 		/* we have a possible date */
635 		for (dt = DowList; *dt != NULL; dt++)
636 			if (strncmp(*dt, p, 3) == 0)
637 				break;
638 		if (*dt == NULL)
639 			continue;
640 
641 		for (dt = MonthList; *dt != NULL; dt++)
642 			if (strncmp(*dt, &p[4], 3) == 0)
643 				break;
644 		if (*dt != NULL)
645 			break;
646 	}
647 
648 	if (*p != '\0')
649 	{
650 		char *q;
651 		extern char *arpadate();
652 
653 		/* we have found a date */
654 		q = xalloc(25);
655 		(void) strncpy(q, p, 25);
656 		q[24] = '\0';
657 		q = arpadate(q);
658 		define('a', newstr(q), e);
659 	}
660 }
661 
662 # endif /* NOTUNIX */
663