1 /*
2  * Copyright (c) 1983 Eric P. Allman
3  * Copyright (c) 1988, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * %sccs.include.redist.c%
7  */
8 
9 #ifndef lint
10 static char sccsid[] = "@(#)collect.c	8.25 (Berkeley) 10/17/94";
11 #endif /* not lint */
12 
13 # include <errno.h>
14 # include "sendmail.h"
15 
16 /*
17 **  COLLECT -- read & parse message header & make temp file.
18 **
19 **	Creates a temporary file name and copies the standard
20 **	input to that file.  Leading UNIX-style "From" lines are
21 **	stripped off (after important information is extracted).
22 **
23 **	Parameters:
24 **		fp -- file to read.
25 **		smtpmode -- if set, we are running SMTP: give an RFC821
26 **			style message to say we are ready to collect
27 **			input, and never ignore a single dot to mean
28 **			end of message.
29 **		requeueflag -- this message will be requeued later, so
30 **			don't do final processing on it.
31 **		hdrp -- the location to stash the header.
32 **		e -- the current envelope.
33 **
34 **	Returns:
35 **		none.
36 **
37 **	Side Effects:
38 **		Temp file is created and filled.
39 **		The from person may be set.
40 */
41 
42 char	*CollectErrorMessage;
43 bool	CollectErrno;
44 
45 static jmp_buf	CtxCollectTimeout;
46 static int	collecttimeout();
47 static bool	CollectProgress;
48 static EVENT	*CollectTimeout;
49 
50 /* values for input state machine */
51 #define IS_NORM		0	/* middle of line */
52 #define IS_BOL		1	/* beginning of line */
53 #define IS_DOT		2	/* read a dot at beginning of line */
54 #define IS_DOTCR	3	/* read ".\r" at beginning of line */
55 #define IS_CR		4	/* read a carriage return */
56 
57 /* values for message state machine */
58 #define MS_UFROM	0	/* reading Unix from line */
59 #define MS_HEADER	1	/* reading message header */
60 #define MS_BODY		2	/* reading message body */
61 
62 
63 collect(fp, smtpmode, requeueflag, hdrp, e)
64 	FILE *fp;
65 	bool smtpmode;
66 	bool requeueflag;
67 	HDR **hdrp;
68 	register ENVELOPE *e;
69 {
70 	register FILE *tf;
71 	bool ignrdot = smtpmode ? FALSE : IgnrDot;
72 	time_t dbto = smtpmode ? TimeOuts.to_datablock : 0;
73 	register char *bp;
74 	register int c;
75 	bool inputerr = FALSE;
76 	bool headeronly = FALSE;
77 	char *buf;
78 	int buflen;
79 	int istate;
80 	int mstate;
81 	char *pbp;
82 	char peekbuf[8];
83 	char bufbuf[MAXLINE];
84 	extern bool isheader();
85 
86 	CollectErrorMessage = NULL;
87 	CollectErrno = 0;
88 	if (hdrp == NULL)
89 		hdrp = &e->e_header;
90 	else
91 		headeronly = TRUE;
92 
93 	/*
94 	**  Create the temp file name and create the file.
95 	*/
96 
97 	if (!headeronly)
98 	{
99 		struct stat stbuf;
100 
101 		e->e_df = queuename(e, 'd');
102 		e->e_df = newstr(e->e_df);
103 		if ((tf = dfopen(e->e_df, O_WRONLY|O_CREAT|O_TRUNC, FileMode)) == NULL)
104 		{
105 			syserr("Cannot create %s", e->e_df);
106 			e->e_flags |= EF_NORETURN;
107 			finis();
108 		}
109 		if (fstat(fileno(tf), &stbuf) < 0)
110 			e->e_dfino = -1;
111 		else
112 		{
113 			e->e_dfdev = stbuf.st_dev;
114 			e->e_dfino = stbuf.st_ino;
115 		}
116 		HasEightBits = FALSE;
117 	}
118 
119 	/*
120 	**  Tell ARPANET to go ahead.
121 	*/
122 
123 	if (smtpmode)
124 		message("354 Enter mail, end with \".\" on a line by itself");
125 
126 	/*
127 	**  Read the message.
128 	**
129 	**	This is done using two interleaved state machines.
130 	**	The input state machine is looking for things like
131 	**	hidden dots; the message state machine is handling
132 	**	the larger picture (e.g., header versus body).
133 	*/
134 
135 	buf = bp = bufbuf;
136 	buflen = sizeof bufbuf;
137 	pbp = peekbuf;
138 	istate = IS_BOL;
139 	mstate = SaveFrom ? MS_HEADER : MS_UFROM;
140 	CollectProgress = FALSE;
141 
142 	/* if transmitting binary, don't map NL to EOL */
143 	if (e->e_bodytype != NULL && strcasecmp(e->e_bodytype, "8BITMIME") == 0)
144 		e->e_flags |= EF_NL_NOT_EOL;
145 
146 	if (dbto != 0)
147 	{
148 		/* handle possible input timeout */
149 		if (setjmp(CtxCollectTimeout) != 0)
150 		{
151 #ifdef LOG
152 			syslog(LOG_NOTICE,
153 			    "timeout waiting for input from %s during message collect",
154 			    CurHostName ? CurHostName : "<local machine>");
155 #endif
156 			errno = 0;
157 			usrerr("451 timeout waiting for input during message collect");
158 			goto readerr;
159 		}
160 		CollectTimeout = setevent(dbto, collecttimeout, dbto);
161 	}
162 
163 	for (;;)
164 	{
165 		if (tTd(30, 35))
166 			printf("top, istate=%d, mstate=%d\n", istate, mstate);
167 		for (;;)
168 		{
169 			if (pbp > peekbuf)
170 				c = *--pbp;
171 			else
172 			{
173 				while (!feof(InChannel) && !ferror(InChannel))
174 				{
175 					errno = 0;
176 					c = fgetc(InChannel);
177 					if (errno != EINTR)
178 						break;
179 					clearerr(InChannel);
180 				}
181 				CollectProgress = TRUE;
182 				if (TrafficLogFile != NULL)
183 				{
184 					if (istate == IS_BOL)
185 						fprintf(TrafficLogFile, "%05d <<< ",
186 							getpid());
187 					if (c == EOF)
188 						fprintf(TrafficLogFile, "[EOF]\n");
189 					else
190 						fputc(c, TrafficLogFile);
191 				}
192 				if (c == EOF)
193 					goto readerr;
194 				if (SevenBitInput)
195 					c &= 0x7f;
196 				else
197 					HasEightBits |= bitset(0x80, c);
198 				e->e_msgsize++;
199 			}
200 			if (tTd(30, 94))
201 				printf("istate=%d, c=%c (0x%x)\n",
202 					istate, c, c);
203 			switch (istate)
204 			{
205 			  case IS_BOL:
206 				if (c == '.')
207 				{
208 					istate = IS_DOT;
209 					continue;
210 				}
211 				break;
212 
213 			  case IS_DOT:
214 				if (c == '\n' && !ignrdot &&
215 				    !bitset(EF_NL_NOT_EOL, e->e_flags))
216 					goto readerr;
217 				else if (c == '\r' &&
218 					 !bitset(EF_CRLF_NOT_EOL, e->e_flags))
219 				{
220 					istate = IS_DOTCR;
221 					continue;
222 				}
223 				else if (c != '.' ||
224 					 (OpMode != MD_SMTP &&
225 					  OpMode != MD_DAEMON &&
226 					  OpMode != MD_ARPAFTP))
227 				{
228 					*pbp++ = c;
229 					c = '.';
230 				}
231 				break;
232 
233 			  case IS_DOTCR:
234 				if (c == '\n')
235 					goto readerr;
236 				else
237 				{
238 					/* push back the ".\rx" */
239 					*pbp++ = c;
240 					*pbp++ = '\r';
241 					c = '.';
242 				}
243 				break;
244 
245 			  case IS_CR:
246 				if (c == '\n')
247 					istate = IS_BOL;
248 				else
249 				{
250 					ungetc(c, InChannel);
251 					c = '\r';
252 					istate = IS_NORM;
253 				}
254 				goto bufferchar;
255 			}
256 
257 			if (c == '\r' && !bitset(EF_CRLF_NOT_EOL, e->e_flags))
258 			{
259 				istate = IS_CR;
260 				continue;
261 			}
262 			else if (c == '\n' && !bitset(EF_NL_NOT_EOL, e->e_flags))
263 				istate = IS_BOL;
264 			else
265 				istate = IS_NORM;
266 
267 bufferchar:
268 			if (mstate == MS_BODY)
269 			{
270 				/* just put the character out */
271 				if (MaxMessageSize <= 0 ||
272 				    e->e_msgsize <= MaxMessageSize)
273 					fputc(c, tf);
274 				continue;
275 			}
276 
277 			/* header -- buffer up */
278 			if (bp >= &buf[buflen - 2])
279 			{
280 				char *obuf;
281 
282 				if (mstate != MS_HEADER)
283 					break;
284 
285 				/* out of space for header */
286 				obuf = buf;
287 				if (buflen < MEMCHUNKSIZE)
288 					buflen *= 2;
289 				else
290 					buflen += MEMCHUNKSIZE;
291 				buf = xalloc(buflen);
292 				bcopy(obuf, buf, bp - obuf);
293 				bp = &buf[bp - obuf];
294 				if (obuf != bufbuf)
295 					free(obuf);
296 			}
297 			*bp++ = c;
298 			if (istate == IS_BOL)
299 				break;
300 		}
301 		*bp = '\0';
302 
303 nextstate:
304 		if (tTd(30, 35))
305 			printf("nextstate, istate=%d, mstate=%d, line = \"%s\"\n",
306 				istate, mstate, buf);
307 		switch (mstate)
308 		{
309 		  case MS_UFROM:
310 			mstate = MS_HEADER;
311 			if (strncmp(buf, "From ", 5) == 0)
312 			{
313 				bp = buf;
314 				eatfrom(buf, e);
315 				continue;
316 			}
317 			/* fall through */
318 
319 		  case MS_HEADER:
320 			if (!isheader(buf))
321 			{
322 				mstate = MS_BODY;
323 				goto nextstate;
324 			}
325 
326 			/* check for possible continuation line */
327 			do
328 			{
329 				clearerr(InChannel);
330 				errno = 0;
331 				c = fgetc(InChannel);
332 			} while (errno == EINTR);
333 			if (c != EOF)
334 				ungetc(c, InChannel);
335 			if (c == ' ' || c == '\t')
336 			{
337 				/* yep -- defer this */
338 				continue;
339 			}
340 
341 			/* trim off trailing CRLF or NL */
342 			if (*--bp != '\n' || *--bp != '\r')
343 				bp++;
344 			*bp = '\0';
345 			if (bitset(H_EOH, chompheader(buf, FALSE, e)))
346 				mstate = MS_BODY;
347 			break;
348 
349 		  case MS_BODY:
350 			if (tTd(30, 1))
351 				printf("EOH\n");
352 			if (headeronly)
353 				goto readerr;
354 			bp = buf;
355 
356 			/* toss blank line */
357 			if ((!bitset(EF_CRLF_NOT_EOL, e->e_flags) &&
358 				bp[0] == '\r' && bp[1] == '\n') ||
359 			    (!bitset(EF_NL_NOT_EOL, e->e_flags) &&
360 				bp[0] == '\n'))
361 			{
362 				break;
363 			}
364 
365 			/* if not a blank separator, write it out */
366 			if (MaxMessageSize <= 0 ||
367 			    e->e_msgsize <= MaxMessageSize)
368 			{
369 				while (*bp != '\0')
370 					fputc(*bp++, tf);
371 			}
372 			break;
373 		}
374 		bp = buf;
375 	}
376 
377 readerr:
378 	if ((feof(fp) && smtpmode) || ferror(fp))
379 	{
380 		if (tTd(30, 1))
381 			printf("collect: read error\n");
382 		inputerr = TRUE;
383 	}
384 
385 	/* reset global timer */
386 	clrevent(CollectTimeout);
387 
388 	if (headeronly)
389 		return;
390 
391 	if (tf != NULL)
392 	{
393 		if (fflush(tf) != 0)
394 			tferror(tf, e);
395 		if (fsync(fileno(tf)) < 0 || fclose(tf) < 0)
396 		{
397 			tferror(tf, e);
398 			finis();
399 		}
400 	}
401 
402 	if (CollectErrorMessage != NULL && Errors <= 0)
403 	{
404 		if (CollectErrno != 0)
405 		{
406 			errno = CollectErrno;
407 			syserr(CollectErrorMessage, e->e_df);
408 			finis();
409 		}
410 		usrerr(CollectErrorMessage);
411 	}
412 	else if (inputerr && (OpMode == MD_SMTP || OpMode == MD_DAEMON))
413 	{
414 		/* An EOF when running SMTP is an error */
415 		char *host;
416 		char *problem;
417 
418 		host = RealHostName;
419 		if (host == NULL)
420 			host = "localhost";
421 
422 		if (feof(fp))
423 			problem = "unexpected close";
424 		else if (ferror(fp))
425 			problem = "I/O error";
426 		else
427 			problem = "read timeout";
428 # ifdef LOG
429 		if (LogLevel > 0 && feof(fp))
430 			syslog(LOG_NOTICE,
431 			    "collect: %s on connection from %s, sender=%s: %s\n",
432 			    problem, host, e->e_from.q_paddr, errstring(errno));
433 # endif
434 		if (feof(fp))
435 			usrerr("451 collect: %s on connection from %s, from=%s",
436 				problem, host, e->e_from.q_paddr);
437 		else
438 			syserr("451 collect: %s on connection from %s, from=%s",
439 				problem, host, e->e_from.q_paddr);
440 
441 		/* don't return an error indication */
442 		e->e_to = NULL;
443 		e->e_flags &= ~EF_FATALERRS;
444 		e->e_flags |= EF_CLRQUEUE;
445 
446 		/* and don't try to deliver the partial message either */
447 		if (InChild)
448 			ExitStat = EX_QUIT;
449 		finis();
450 	}
451 
452 	/*
453 	**  Find out some information from the headers.
454 	**	Examples are who is the from person & the date.
455 	*/
456 
457 	eatheader(e, !requeueflag);
458 
459 	/* collect statistics */
460 	if (OpMode != MD_VERIFY)
461 		markstats(e, (ADDRESS *) NULL);
462 
463 	/*
464 	**  Add an Apparently-To: line if we have no recipient lines.
465 	*/
466 
467 	if (hvalue("to", e->e_header) == NULL &&
468 	    hvalue("cc", e->e_header) == NULL &&
469 	    hvalue("bcc", e->e_header) == NULL &&
470 	    hvalue("apparently-to", e->e_header) == NULL)
471 	{
472 		register ADDRESS *q;
473 
474 		/* create an Apparently-To: field */
475 		/*    that or reject the message.... */
476 		for (q = e->e_sendqueue; q != NULL; q = q->q_next)
477 		{
478 			if (q->q_alias != NULL)
479 				continue;
480 			if (tTd(30, 3))
481 				printf("Adding Apparently-To: %s\n", q->q_paddr);
482 			addheader("Apparently-To", q->q_paddr, &e->e_header);
483 		}
484 	}
485 
486 	/* check for message too large */
487 	if (MaxMessageSize > 0 && e->e_msgsize > MaxMessageSize)
488 	{
489 		usrerr("552 Message exceeds maximum fixed size (%ld)",
490 			MaxMessageSize);
491 	}
492 
493 	/* check for illegal 8-bit data */
494 	if (HasEightBits)
495 	{
496 		e->e_flags |= EF_HAS8BIT;
497 		if (bitset(MM_MIME8BIT, MimeMode))
498 		{
499 			/* convert it to MIME */
500 			if (hvalue("MIME-Version", e->e_header) == NULL)
501 			{
502 				char mimebuf[20];
503 
504 				strcpy(mimebuf, "MIME-Version: 1.0");
505 				chompheader(mimebuf, FALSE, e);
506 			}
507 			if (e->e_bodytype == NULL)
508 				e->e_bodytype = "8BITMIME";
509 		}
510 		else if (!bitset(MM_PASS8BIT, MimeMode))
511 			usrerr("554 Eight bit data not allowed");
512 	}
513 
514 	if ((e->e_dfp = fopen(e->e_df, "r")) == NULL)
515 	{
516 		/* we haven't acked receipt yet, so just chuck this */
517 		syserr("Cannot reopen %s", e->e_df);
518 		finis();
519 	}
520 }
521 
522 
523 static
524 collecttimeout(timeout)
525 	time_t timeout;
526 {
527 	/* if no progress was made, die now */
528 	if (!CollectProgress)
529 		longjmp(CtxCollectTimeout, 1);
530 
531 	/* otherwise reset the timeout */
532 	CollectTimeout = setevent(timeout, collecttimeout, timeout);
533 	CollectProgress = FALSE;
534 }
535 /*
536 **  TFERROR -- signal error on writing the temporary file.
537 **
538 **	Parameters:
539 **		tf -- the file pointer for the temporary file.
540 **
541 **	Returns:
542 **		none.
543 **
544 **	Side Effects:
545 **		Gives an error message.
546 **		Arranges for following output to go elsewhere.
547 */
548 
549 tferror(tf, e)
550 	FILE *tf;
551 	register ENVELOPE *e;
552 {
553 	CollectErrno = errno;
554 	if (errno == ENOSPC)
555 	{
556 		struct stat st;
557 		long avail;
558 		long bsize;
559 
560 		e->e_flags |= EF_NORETURN;
561 		if (fstat(fileno(tf), &st) < 0)
562 			st.st_size = 0;
563 		(void) freopen(e->e_df, "w", tf);
564 		if (st.st_size <= 0)
565 			fprintf(tf, "\n*** Mail could not be accepted");
566 		else if (sizeof st.st_size > sizeof (long))
567 			fprintf(tf, "\n*** Mail of at least %qd bytes could not be accepted\n",
568 				st.st_size);
569 		else
570 			fprintf(tf, "\n*** Mail of at least %ld bytes could not be accepted\n",
571 				st.st_size);
572 		fprintf(tf, "*** at %s due to lack of disk space for temp file.\n",
573 			MyHostName);
574 		avail = freespace(QueueDir, &bsize);
575 		if (avail > 0)
576 		{
577 			if (bsize > 1024)
578 				avail *= bsize / 1024;
579 			else if (bsize < 1024)
580 				avail /= 1024 / bsize;
581 			fprintf(tf, "*** Currently, %ld kilobytes are available for mail temp files.\n",
582 				avail);
583 		}
584 		CollectErrorMessage = "452 Out of disk space for temp file";
585 	}
586 	else
587 	{
588 		CollectErrorMessage = "cannot write message body to disk (%s)";
589 	}
590 	(void) freopen("/dev/null", "w", tf);
591 }
592 /*
593 **  EATFROM -- chew up a UNIX style from line and process
594 **
595 **	This does indeed make some assumptions about the format
596 **	of UNIX messages.
597 **
598 **	Parameters:
599 **		fm -- the from line.
600 **
601 **	Returns:
602 **		none.
603 **
604 **	Side Effects:
605 **		extracts what information it can from the header,
606 **		such as the date.
607 */
608 
609 # ifndef NOTUNIX
610 
611 char	*DowList[] =
612 {
613 	"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
614 };
615 
616 char	*MonthList[] =
617 {
618 	"Jan", "Feb", "Mar", "Apr", "May", "Jun",
619 	"Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
620 	NULL
621 };
622 
623 eatfrom(fm, e)
624 	char *fm;
625 	register ENVELOPE *e;
626 {
627 	register char *p;
628 	register char **dt;
629 
630 	if (tTd(30, 2))
631 		printf("eatfrom(%s)\n", fm);
632 
633 	/* find the date part */
634 	p = fm;
635 	while (*p != '\0')
636 	{
637 		/* skip a word */
638 		while (*p != '\0' && *p != ' ')
639 			p++;
640 		while (*p == ' ')
641 			p++;
642 		if (!(isascii(*p) && isupper(*p)) ||
643 		    p[3] != ' ' || p[13] != ':' || p[16] != ':')
644 			continue;
645 
646 		/* we have a possible date */
647 		for (dt = DowList; *dt != NULL; dt++)
648 			if (strncmp(*dt, p, 3) == 0)
649 				break;
650 		if (*dt == NULL)
651 			continue;
652 
653 		for (dt = MonthList; *dt != NULL; dt++)
654 			if (strncmp(*dt, &p[4], 3) == 0)
655 				break;
656 		if (*dt != NULL)
657 			break;
658 	}
659 
660 	if (*p != '\0')
661 	{
662 		char *q;
663 		extern char *arpadate();
664 
665 		/* we have found a date */
666 		q = xalloc(25);
667 		(void) strncpy(q, p, 25);
668 		q[24] = '\0';
669 		q = arpadate(q);
670 		define('a', newstr(q), e);
671 	}
672 }
673 
674 # endif /* NOTUNIX */
675