1 /*
2  * Copyright (c) 1983 Eric P. Allman
3  * Copyright (c) 1988, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * %sccs.include.redist.c%
7  */
8 
9 #ifndef lint
10 static char sccsid[] = "@(#)collect.c	8.30 (Berkeley) 03/21/95";
11 #endif /* not lint */
12 
13 # include <errno.h>
14 # include "sendmail.h"
15 
16 /*
17 **  COLLECT -- read & parse message header & make temp file.
18 **
19 **	Creates a temporary file name and copies the standard
20 **	input to that file.  Leading UNIX-style "From" lines are
21 **	stripped off (after important information is extracted).
22 **
23 **	Parameters:
24 **		fp -- file to read.
25 **		smtpmode -- if set, we are running SMTP: give an RFC821
26 **			style message to say we are ready to collect
27 **			input, and never ignore a single dot to mean
28 **			end of message.
29 **		requeueflag -- this message will be requeued later, so
30 **			don't do final processing on it.
31 **		hdrp -- the location to stash the header.
32 **		e -- the current envelope.
33 **
34 **	Returns:
35 **		none.
36 **
37 **	Side Effects:
38 **		Temp file is created and filled.
39 **		The from person may be set.
40 */
41 
42 char	*CollectErrorMessage;
43 bool	CollectErrno;
44 
45 static jmp_buf	CtxCollectTimeout;
46 static void	collecttimeout();
47 static bool	CollectProgress;
48 static EVENT	*CollectTimeout;
49 
50 /* values for input state machine */
51 #define IS_NORM		0	/* middle of line */
52 #define IS_BOL		1	/* beginning of line */
53 #define IS_DOT		2	/* read a dot at beginning of line */
54 #define IS_DOTCR	3	/* read ".\r" at beginning of line */
55 #define IS_CR		4	/* read a carriage return */
56 
57 /* values for message state machine */
58 #define MS_UFROM	0	/* reading Unix from line */
59 #define MS_HEADER	1	/* reading message header */
60 #define MS_BODY		2	/* reading message body */
61 
62 
63 void
64 collect(fp, smtpmode, requeueflag, hdrp, e)
65 	FILE *fp;
66 	bool smtpmode;
67 	bool requeueflag;
68 	HDR **hdrp;
69 	register ENVELOPE *e;
70 {
71 	register FILE *tf;
72 	bool ignrdot = smtpmode ? FALSE : IgnrDot;
73 	time_t dbto = smtpmode ? TimeOuts.to_datablock : 0;
74 	register char *bp;
75 	int c = '\0';
76 	bool inputerr = FALSE;
77 	bool headeronly = FALSE;
78 	char *buf;
79 	int buflen;
80 	int istate;
81 	int mstate;
82 	char *pbp;
83 	char peekbuf[8];
84 	char bufbuf[MAXLINE];
85 	extern bool isheader();
86 	extern void eatheader();
87 	extern void tferror();
88 
89 	CollectErrorMessage = NULL;
90 	CollectErrno = 0;
91 	if (hdrp == NULL)
92 		hdrp = &e->e_header;
93 	else
94 		headeronly = TRUE;
95 
96 	/*
97 	**  Create the temp file name and create the file.
98 	*/
99 
100 	if (!headeronly)
101 	{
102 		struct stat stbuf;
103 
104 		e->e_df = queuename(e, 'd');
105 		e->e_df = newstr(e->e_df);
106 		if ((tf = dfopen(e->e_df, O_WRONLY|O_CREAT|O_TRUNC, FileMode)) == NULL)
107 		{
108 			syserr("Cannot create %s", e->e_df);
109 			e->e_flags |= EF_NO_BODY_RETN;
110 			finis();
111 		}
112 		if (fstat(fileno(tf), &stbuf) < 0)
113 			e->e_dfino = -1;
114 		else
115 		{
116 			e->e_dfdev = stbuf.st_dev;
117 			e->e_dfino = stbuf.st_ino;
118 		}
119 		HasEightBits = FALSE;
120 		e->e_msgsize = 0;
121 	}
122 
123 	/*
124 	**  Tell ARPANET to go ahead.
125 	*/
126 
127 	if (smtpmode)
128 		message("354 Enter mail, end with \".\" on a line by itself");
129 
130 	/*
131 	**  Read the message.
132 	**
133 	**	This is done using two interleaved state machines.
134 	**	The input state machine is looking for things like
135 	**	hidden dots; the message state machine is handling
136 	**	the larger picture (e.g., header versus body).
137 	*/
138 
139 	buf = bp = bufbuf;
140 	buflen = sizeof bufbuf;
141 	pbp = peekbuf;
142 	istate = IS_BOL;
143 	mstate = SaveFrom ? MS_HEADER : MS_UFROM;
144 	CollectProgress = FALSE;
145 
146 	/* if transmitting binary, don't map NL to EOL */
147 	if (e->e_bodytype != NULL && strcasecmp(e->e_bodytype, "8BITMIME") == 0)
148 		e->e_flags |= EF_NL_NOT_EOL;
149 
150 	if (dbto != 0)
151 	{
152 		/* handle possible input timeout */
153 		if (setjmp(CtxCollectTimeout) != 0)
154 		{
155 #ifdef LOG
156 			syslog(LOG_NOTICE,
157 			    "timeout waiting for input from %s during message collect",
158 			    CurHostName ? CurHostName : "<local machine>");
159 #endif
160 			errno = 0;
161 			usrerr("451 timeout waiting for input during message collect");
162 			goto readerr;
163 		}
164 		CollectTimeout = setevent(dbto, collecttimeout, dbto);
165 	}
166 
167 	for (;;)
168 	{
169 		if (tTd(30, 35))
170 			printf("top, istate=%d, mstate=%d\n", istate, mstate);
171 		for (;;)
172 		{
173 			if (pbp > peekbuf)
174 				c = *--pbp;
175 			else
176 			{
177 				while (!feof(InChannel) && !ferror(InChannel))
178 				{
179 					errno = 0;
180 					c = fgetc(InChannel);
181 					if (errno != EINTR)
182 						break;
183 					clearerr(InChannel);
184 				}
185 				CollectProgress = TRUE;
186 				if (TrafficLogFile != NULL)
187 				{
188 					if (istate == IS_BOL)
189 						fprintf(TrafficLogFile, "%05d <<< ",
190 							getpid());
191 					if (c == EOF)
192 						fprintf(TrafficLogFile, "[EOF]\n");
193 					else
194 						fputc(c, TrafficLogFile);
195 				}
196 				if (c == EOF)
197 					goto readerr;
198 				if (SevenBitInput)
199 					c &= 0x7f;
200 				else
201 					HasEightBits |= bitset(0x80, c);
202 				if (!headeronly)
203 					e->e_msgsize++;
204 			}
205 			if (tTd(30, 94))
206 				printf("istate=%d, c=%c (0x%x)\n",
207 					istate, c, c);
208 			switch (istate)
209 			{
210 			  case IS_BOL:
211 				if (c == '.')
212 				{
213 					istate = IS_DOT;
214 					continue;
215 				}
216 				break;
217 
218 			  case IS_DOT:
219 				if (c == '\n' && !ignrdot &&
220 				    !bitset(EF_NL_NOT_EOL, e->e_flags))
221 					goto readerr;
222 				else if (c == '\r' &&
223 					 !bitset(EF_CRLF_NOT_EOL, e->e_flags))
224 				{
225 					istate = IS_DOTCR;
226 					continue;
227 				}
228 				else if (c != '.' ||
229 					 (OpMode != MD_SMTP &&
230 					  OpMode != MD_DAEMON &&
231 					  OpMode != MD_ARPAFTP))
232 				{
233 					*pbp++ = c;
234 					c = '.';
235 				}
236 				break;
237 
238 			  case IS_DOTCR:
239 				if (c == '\n')
240 					goto readerr;
241 				else
242 				{
243 					/* push back the ".\rx" */
244 					*pbp++ = c;
245 					*pbp++ = '\r';
246 					c = '.';
247 				}
248 				break;
249 
250 			  case IS_CR:
251 				if (c == '\n')
252 					istate = IS_BOL;
253 				else
254 				{
255 					ungetc(c, InChannel);
256 					c = '\r';
257 					istate = IS_NORM;
258 				}
259 				goto bufferchar;
260 			}
261 
262 			if (c == '\r' && !bitset(EF_CRLF_NOT_EOL, e->e_flags))
263 			{
264 				istate = IS_CR;
265 				continue;
266 			}
267 			else if (c == '\n' && !bitset(EF_NL_NOT_EOL, e->e_flags))
268 				istate = IS_BOL;
269 			else
270 				istate = IS_NORM;
271 
272 bufferchar:
273 			if (mstate == MS_BODY)
274 			{
275 				/* just put the character out */
276 				if (MaxMessageSize <= 0 ||
277 				    e->e_msgsize <= MaxMessageSize)
278 					fputc(c, tf);
279 				continue;
280 			}
281 
282 			/* header -- buffer up */
283 			if (bp >= &buf[buflen - 2])
284 			{
285 				char *obuf;
286 
287 				if (mstate != MS_HEADER)
288 					break;
289 
290 				/* out of space for header */
291 				obuf = buf;
292 				if (buflen < MEMCHUNKSIZE)
293 					buflen *= 2;
294 				else
295 					buflen += MEMCHUNKSIZE;
296 				buf = xalloc(buflen);
297 				bcopy(obuf, buf, bp - obuf);
298 				bp = &buf[bp - obuf];
299 				if (obuf != bufbuf)
300 					free(obuf);
301 			}
302 			*bp++ = c;
303 			if (istate == IS_BOL)
304 				break;
305 		}
306 		*bp = '\0';
307 
308 nextstate:
309 		if (tTd(30, 35))
310 			printf("nextstate, istate=%d, mstate=%d, line = \"%s\"\n",
311 				istate, mstate, buf);
312 		switch (mstate)
313 		{
314 			extern int chompheader();
315 
316 		  case MS_UFROM:
317 			mstate = MS_HEADER;
318 			if (strncmp(buf, "From ", 5) == 0)
319 			{
320 				extern void eatfrom();
321 
322 				bp = buf;
323 				eatfrom(buf, e);
324 				continue;
325 			}
326 			/* fall through */
327 
328 		  case MS_HEADER:
329 			if (!isheader(buf))
330 			{
331 				mstate = MS_BODY;
332 				goto nextstate;
333 			}
334 
335 			/* check for possible continuation line */
336 			do
337 			{
338 				clearerr(InChannel);
339 				errno = 0;
340 				c = fgetc(InChannel);
341 			} while (errno == EINTR);
342 			if (c != EOF)
343 				ungetc(c, InChannel);
344 			if (c == ' ' || c == '\t')
345 			{
346 				/* yep -- defer this */
347 				continue;
348 			}
349 
350 			/* trim off trailing CRLF or NL */
351 			if (*--bp != '\n' || *--bp != '\r')
352 				bp++;
353 			*bp = '\0';
354 			if (bitset(H_EOH, chompheader(buf, FALSE, e)))
355 				mstate = MS_BODY;
356 			break;
357 
358 		  case MS_BODY:
359 			if (tTd(30, 1))
360 				printf("EOH\n");
361 			if (headeronly)
362 				goto readerr;
363 			bp = buf;
364 
365 			/* toss blank line */
366 			if ((!bitset(EF_CRLF_NOT_EOL, e->e_flags) &&
367 				bp[0] == '\r' && bp[1] == '\n') ||
368 			    (!bitset(EF_NL_NOT_EOL, e->e_flags) &&
369 				bp[0] == '\n'))
370 			{
371 				break;
372 			}
373 
374 			/* if not a blank separator, write it out */
375 			if (MaxMessageSize <= 0 ||
376 			    e->e_msgsize <= MaxMessageSize)
377 			{
378 				while (*bp != '\0')
379 					fputc(*bp++, tf);
380 			}
381 			break;
382 		}
383 		bp = buf;
384 	}
385 
386 readerr:
387 	if ((feof(fp) && smtpmode) || ferror(fp))
388 	{
389 		if (tTd(30, 1))
390 			printf("collect: read error\n");
391 		inputerr = TRUE;
392 	}
393 
394 	/* reset global timer */
395 	clrevent(CollectTimeout);
396 
397 	if (headeronly)
398 		return;
399 
400 	if (tf != NULL)
401 	{
402 		if (fflush(tf) != 0)
403 			tferror(tf, e);
404 		if (fsync(fileno(tf)) < 0 || fclose(tf) < 0)
405 		{
406 			tferror(tf, e);
407 			finis();
408 		}
409 	}
410 
411 	if (CollectErrorMessage != NULL && Errors <= 0)
412 	{
413 		if (CollectErrno != 0)
414 		{
415 			errno = CollectErrno;
416 			syserr(CollectErrorMessage, e->e_df);
417 			finis();
418 		}
419 		usrerr(CollectErrorMessage);
420 	}
421 	else if (inputerr && (OpMode == MD_SMTP || OpMode == MD_DAEMON))
422 	{
423 		/* An EOF when running SMTP is an error */
424 		char *host;
425 		char *problem;
426 
427 		host = RealHostName;
428 		if (host == NULL)
429 			host = "localhost";
430 
431 		if (feof(fp))
432 			problem = "unexpected close";
433 		else if (ferror(fp))
434 			problem = "I/O error";
435 		else
436 			problem = "read timeout";
437 # ifdef LOG
438 		if (LogLevel > 0 && feof(fp))
439 			syslog(LOG_NOTICE,
440 			    "collect: %s on connection from %s, sender=%s: %s\n",
441 			    problem, host, e->e_from.q_paddr, errstring(errno));
442 # endif
443 		if (feof(fp))
444 			usrerr("451 collect: %s on connection from %s, from=%s",
445 				problem, host, e->e_from.q_paddr);
446 		else
447 			syserr("451 collect: %s on connection from %s, from=%s",
448 				problem, host, e->e_from.q_paddr);
449 
450 		/* don't return an error indication */
451 		e->e_to = NULL;
452 		e->e_flags &= ~EF_FATALERRS;
453 		e->e_flags |= EF_CLRQUEUE;
454 
455 		/* and don't try to deliver the partial message either */
456 		if (InChild)
457 			ExitStat = EX_QUIT;
458 		finis();
459 	}
460 
461 	/*
462 	**  Find out some information from the headers.
463 	**	Examples are who is the from person & the date.
464 	*/
465 
466 	eatheader(e, !requeueflag);
467 
468 	/* collect statistics */
469 	if (OpMode != MD_VERIFY)
470 	{
471 		extern void markstats();
472 
473 		markstats(e, (ADDRESS *) NULL);
474 	}
475 
476 	/*
477 	**  Add an Apparently-To: line if we have no recipient lines.
478 	*/
479 
480 	if (hvalue("to", e->e_header) == NULL &&
481 	    hvalue("cc", e->e_header) == NULL &&
482 	    hvalue("bcc", e->e_header) == NULL &&
483 	    hvalue("apparently-to", e->e_header) == NULL)
484 	{
485 		register ADDRESS *q;
486 		char *hdr = NULL;
487 		extern void addheader();
488 
489 		/* create an Apparently-To: field */
490 		/*    that or reject the message.... */
491 		switch (NoRecipientAction)
492 		{
493 		  case NRA_ADD_APPARENTLY_TO:
494 			hdr = "Apparently-To";
495 			break;
496 
497 		  case NRA_ADD_TO:
498 			hdr = "To";
499 			break;
500 
501 		  case NRA_ADD_BCC:
502 			addheader("Bcc", "", &e->e_header);
503 			break;
504 
505 		  case NRA_ADD_TO_UNDISCLOSED:
506 			addheader("To", "undisclosed-recipients:;", &e->e_header);
507 			break;
508 		}
509 
510 		if (hdr != NULL)
511 		{
512 			for (q = e->e_sendqueue; q != NULL; q = q->q_next)
513 			{
514 				if (q->q_alias != NULL)
515 					continue;
516 				if (tTd(30, 3))
517 					printf("Adding %s: %s\n",
518 						hdr, q->q_paddr);
519 				addheader(hdr, q->q_paddr, &e->e_header);
520 			}
521 		}
522 	}
523 
524 	/* check for message too large */
525 	if (MaxMessageSize > 0 && e->e_msgsize > MaxMessageSize)
526 	{
527 		usrerr("552 Message exceeds maximum fixed size (%ld)",
528 			MaxMessageSize);
529 	}
530 
531 	/* check for illegal 8-bit data */
532 	if (HasEightBits)
533 	{
534 		e->e_flags |= EF_HAS8BIT;
535 		if (!bitset(MM_PASS8BIT|MM_MIME8BIT, MimeMode))
536 			usrerr("554 Eight bit data not allowed");
537 	}
538 
539 	if ((e->e_dfp = fopen(e->e_df, "r")) == NULL)
540 	{
541 		/* we haven't acked receipt yet, so just chuck this */
542 		syserr("Cannot reopen %s", e->e_df);
543 		finis();
544 	}
545 }
546 
547 
548 static void
549 collecttimeout(timeout)
550 	time_t timeout;
551 {
552 	/* if no progress was made, die now */
553 	if (!CollectProgress)
554 		longjmp(CtxCollectTimeout, 1);
555 
556 	/* otherwise reset the timeout */
557 	CollectTimeout = setevent(timeout, collecttimeout, timeout);
558 	CollectProgress = FALSE;
559 }
560 /*
561 **  TFERROR -- signal error on writing the temporary file.
562 **
563 **	Parameters:
564 **		tf -- the file pointer for the temporary file.
565 **
566 **	Returns:
567 **		none.
568 **
569 **	Side Effects:
570 **		Gives an error message.
571 **		Arranges for following output to go elsewhere.
572 */
573 
574 void
575 tferror(tf, e)
576 	FILE *tf;
577 	register ENVELOPE *e;
578 {
579 	CollectErrno = errno;
580 	if (errno == ENOSPC)
581 	{
582 		struct stat st;
583 		long avail;
584 		long bsize;
585 
586 		e->e_flags |= EF_NO_BODY_RETN;
587 		if (fstat(fileno(tf), &st) < 0)
588 			st.st_size = 0;
589 		(void) freopen(e->e_df, "w", tf);
590 		if (st.st_size <= 0)
591 			fprintf(tf, "\n*** Mail could not be accepted");
592 		else if (sizeof st.st_size > sizeof (long))
593 			fprintf(tf, "\n*** Mail of at least %qd bytes could not be accepted\n",
594 				st.st_size);
595 		else
596 			fprintf(tf, "\n*** Mail of at least %ld bytes could not be accepted\n",
597 				st.st_size);
598 		fprintf(tf, "*** at %s due to lack of disk space for temp file.\n",
599 			MyHostName);
600 		avail = freespace(QueueDir, &bsize);
601 		if (avail > 0)
602 		{
603 			if (bsize > 1024)
604 				avail *= bsize / 1024;
605 			else if (bsize < 1024)
606 				avail /= 1024 / bsize;
607 			fprintf(tf, "*** Currently, %ld kilobytes are available for mail temp files.\n",
608 				avail);
609 		}
610 		CollectErrorMessage = "452 Out of disk space for temp file";
611 	}
612 	else
613 	{
614 		CollectErrorMessage = "cannot write message body to disk (%s)";
615 	}
616 	(void) freopen("/dev/null", "w", tf);
617 }
618 /*
619 **  EATFROM -- chew up a UNIX style from line and process
620 **
621 **	This does indeed make some assumptions about the format
622 **	of UNIX messages.
623 **
624 **	Parameters:
625 **		fm -- the from line.
626 **
627 **	Returns:
628 **		none.
629 **
630 **	Side Effects:
631 **		extracts what information it can from the header,
632 **		such as the date.
633 */
634 
635 # ifndef NOTUNIX
636 
637 char	*DowList[] =
638 {
639 	"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
640 };
641 
642 char	*MonthList[] =
643 {
644 	"Jan", "Feb", "Mar", "Apr", "May", "Jun",
645 	"Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
646 	NULL
647 };
648 
649 void
650 eatfrom(fm, e)
651 	char *fm;
652 	register ENVELOPE *e;
653 {
654 	register char *p;
655 	register char **dt;
656 
657 	if (tTd(30, 2))
658 		printf("eatfrom(%s)\n", fm);
659 
660 	/* find the date part */
661 	p = fm;
662 	while (*p != '\0')
663 	{
664 		/* skip a word */
665 		while (*p != '\0' && *p != ' ')
666 			p++;
667 		while (*p == ' ')
668 			p++;
669 		if (!(isascii(*p) && isupper(*p)) ||
670 		    p[3] != ' ' || p[13] != ':' || p[16] != ':')
671 			continue;
672 
673 		/* we have a possible date */
674 		for (dt = DowList; *dt != NULL; dt++)
675 			if (strncmp(*dt, p, 3) == 0)
676 				break;
677 		if (*dt == NULL)
678 			continue;
679 
680 		for (dt = MonthList; *dt != NULL; dt++)
681 			if (strncmp(*dt, &p[4], 3) == 0)
682 				break;
683 		if (*dt != NULL)
684 			break;
685 	}
686 
687 	if (*p != '\0')
688 	{
689 		char *q;
690 		extern char *arpadate();
691 
692 		/* we have found a date */
693 		q = xalloc(25);
694 		(void) strncpy(q, p, 25);
695 		q[24] = '\0';
696 		q = arpadate(q);
697 		define('a', newstr(q), e);
698 	}
699 }
700 
701 # endif /* NOTUNIX */
702