1 /*
2  * Copyright (c) 1983 Eric P. Allman
3  * Copyright (c) 1988, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * %sccs.include.redist.c%
7  */
8 
9 #ifndef lint
10 static char sccsid[] = "@(#)collect.c	8.29 (Berkeley) 03/21/95";
11 #endif /* not lint */
12 
13 # include <errno.h>
14 # include "sendmail.h"
15 
16 /*
17 **  COLLECT -- read & parse message header & make temp file.
18 **
19 **	Creates a temporary file name and copies the standard
20 **	input to that file.  Leading UNIX-style "From" lines are
21 **	stripped off (after important information is extracted).
22 **
23 **	Parameters:
24 **		fp -- file to read.
25 **		smtpmode -- if set, we are running SMTP: give an RFC821
26 **			style message to say we are ready to collect
27 **			input, and never ignore a single dot to mean
28 **			end of message.
29 **		requeueflag -- this message will be requeued later, so
30 **			don't do final processing on it.
31 **		hdrp -- the location to stash the header.
32 **		e -- the current envelope.
33 **
34 **	Returns:
35 **		none.
36 **
37 **	Side Effects:
38 **		Temp file is created and filled.
39 **		The from person may be set.
40 */
41 
42 char	*CollectErrorMessage;
43 bool	CollectErrno;
44 
45 static jmp_buf	CtxCollectTimeout;
46 static void	collecttimeout();
47 static bool	CollectProgress;
48 static EVENT	*CollectTimeout;
49 
50 /* values for input state machine */
51 #define IS_NORM		0	/* middle of line */
52 #define IS_BOL		1	/* beginning of line */
53 #define IS_DOT		2	/* read a dot at beginning of line */
54 #define IS_DOTCR	3	/* read ".\r" at beginning of line */
55 #define IS_CR		4	/* read a carriage return */
56 
57 /* values for message state machine */
58 #define MS_UFROM	0	/* reading Unix from line */
59 #define MS_HEADER	1	/* reading message header */
60 #define MS_BODY		2	/* reading message body */
61 
62 
63 void
64 collect(fp, smtpmode, requeueflag, hdrp, e)
65 	FILE *fp;
66 	bool smtpmode;
67 	bool requeueflag;
68 	HDR **hdrp;
69 	register ENVELOPE *e;
70 {
71 	register FILE *tf;
72 	bool ignrdot = smtpmode ? FALSE : IgnrDot;
73 	time_t dbto = smtpmode ? TimeOuts.to_datablock : 0;
74 	register char *bp;
75 	int c = '\0';
76 	bool inputerr = FALSE;
77 	bool headeronly = FALSE;
78 	char *buf;
79 	int buflen;
80 	int istate;
81 	int mstate;
82 	char *pbp;
83 	char peekbuf[8];
84 	char bufbuf[MAXLINE];
85 	extern bool isheader();
86 	extern void eatheader();
87 	extern void tferror();
88 
89 	CollectErrorMessage = NULL;
90 	CollectErrno = 0;
91 	if (hdrp == NULL)
92 		hdrp = &e->e_header;
93 	else
94 		headeronly = TRUE;
95 
96 	/*
97 	**  Create the temp file name and create the file.
98 	*/
99 
100 	if (!headeronly)
101 	{
102 		struct stat stbuf;
103 
104 		e->e_df = queuename(e, 'd');
105 		e->e_df = newstr(e->e_df);
106 		if ((tf = dfopen(e->e_df, O_WRONLY|O_CREAT|O_TRUNC, FileMode)) == NULL)
107 		{
108 			syserr("Cannot create %s", e->e_df);
109 			e->e_flags |= EF_NO_BODY_RETN;
110 			finis();
111 		}
112 		if (fstat(fileno(tf), &stbuf) < 0)
113 			e->e_dfino = -1;
114 		else
115 		{
116 			e->e_dfdev = stbuf.st_dev;
117 			e->e_dfino = stbuf.st_ino;
118 		}
119 		HasEightBits = FALSE;
120 	}
121 
122 	/*
123 	**  Tell ARPANET to go ahead.
124 	*/
125 
126 	if (smtpmode)
127 		message("354 Enter mail, end with \".\" on a line by itself");
128 
129 	/*
130 	**  Read the message.
131 	**
132 	**	This is done using two interleaved state machines.
133 	**	The input state machine is looking for things like
134 	**	hidden dots; the message state machine is handling
135 	**	the larger picture (e.g., header versus body).
136 	*/
137 
138 	buf = bp = bufbuf;
139 	buflen = sizeof bufbuf;
140 	pbp = peekbuf;
141 	istate = IS_BOL;
142 	mstate = SaveFrom ? MS_HEADER : MS_UFROM;
143 	CollectProgress = FALSE;
144 
145 	/* if transmitting binary, don't map NL to EOL */
146 	if (e->e_bodytype != NULL && strcasecmp(e->e_bodytype, "8BITMIME") == 0)
147 		e->e_flags |= EF_NL_NOT_EOL;
148 
149 	if (dbto != 0)
150 	{
151 		/* handle possible input timeout */
152 		if (setjmp(CtxCollectTimeout) != 0)
153 		{
154 #ifdef LOG
155 			syslog(LOG_NOTICE,
156 			    "timeout waiting for input from %s during message collect",
157 			    CurHostName ? CurHostName : "<local machine>");
158 #endif
159 			errno = 0;
160 			usrerr("451 timeout waiting for input during message collect");
161 			goto readerr;
162 		}
163 		CollectTimeout = setevent(dbto, collecttimeout, dbto);
164 	}
165 
166 	for (;;)
167 	{
168 		if (tTd(30, 35))
169 			printf("top, istate=%d, mstate=%d\n", istate, mstate);
170 		for (;;)
171 		{
172 			if (pbp > peekbuf)
173 				c = *--pbp;
174 			else
175 			{
176 				while (!feof(InChannel) && !ferror(InChannel))
177 				{
178 					errno = 0;
179 					c = fgetc(InChannel);
180 					if (errno != EINTR)
181 						break;
182 					clearerr(InChannel);
183 				}
184 				CollectProgress = TRUE;
185 				if (TrafficLogFile != NULL)
186 				{
187 					if (istate == IS_BOL)
188 						fprintf(TrafficLogFile, "%05d <<< ",
189 							getpid());
190 					if (c == EOF)
191 						fprintf(TrafficLogFile, "[EOF]\n");
192 					else
193 						fputc(c, TrafficLogFile);
194 				}
195 				if (c == EOF)
196 					goto readerr;
197 				if (SevenBitInput)
198 					c &= 0x7f;
199 				else
200 					HasEightBits |= bitset(0x80, c);
201 				e->e_msgsize++;
202 			}
203 			if (tTd(30, 94))
204 				printf("istate=%d, c=%c (0x%x)\n",
205 					istate, c, c);
206 			switch (istate)
207 			{
208 			  case IS_BOL:
209 				if (c == '.')
210 				{
211 					istate = IS_DOT;
212 					continue;
213 				}
214 				break;
215 
216 			  case IS_DOT:
217 				if (c == '\n' && !ignrdot &&
218 				    !bitset(EF_NL_NOT_EOL, e->e_flags))
219 					goto readerr;
220 				else if (c == '\r' &&
221 					 !bitset(EF_CRLF_NOT_EOL, e->e_flags))
222 				{
223 					istate = IS_DOTCR;
224 					continue;
225 				}
226 				else if (c != '.' ||
227 					 (OpMode != MD_SMTP &&
228 					  OpMode != MD_DAEMON &&
229 					  OpMode != MD_ARPAFTP))
230 				{
231 					*pbp++ = c;
232 					c = '.';
233 				}
234 				break;
235 
236 			  case IS_DOTCR:
237 				if (c == '\n')
238 					goto readerr;
239 				else
240 				{
241 					/* push back the ".\rx" */
242 					*pbp++ = c;
243 					*pbp++ = '\r';
244 					c = '.';
245 				}
246 				break;
247 
248 			  case IS_CR:
249 				if (c == '\n')
250 					istate = IS_BOL;
251 				else
252 				{
253 					ungetc(c, InChannel);
254 					c = '\r';
255 					istate = IS_NORM;
256 				}
257 				goto bufferchar;
258 			}
259 
260 			if (c == '\r' && !bitset(EF_CRLF_NOT_EOL, e->e_flags))
261 			{
262 				istate = IS_CR;
263 				continue;
264 			}
265 			else if (c == '\n' && !bitset(EF_NL_NOT_EOL, e->e_flags))
266 				istate = IS_BOL;
267 			else
268 				istate = IS_NORM;
269 
270 bufferchar:
271 			if (mstate == MS_BODY)
272 			{
273 				/* just put the character out */
274 				if (MaxMessageSize <= 0 ||
275 				    e->e_msgsize <= MaxMessageSize)
276 					fputc(c, tf);
277 				continue;
278 			}
279 
280 			/* header -- buffer up */
281 			if (bp >= &buf[buflen - 2])
282 			{
283 				char *obuf;
284 
285 				if (mstate != MS_HEADER)
286 					break;
287 
288 				/* out of space for header */
289 				obuf = buf;
290 				if (buflen < MEMCHUNKSIZE)
291 					buflen *= 2;
292 				else
293 					buflen += MEMCHUNKSIZE;
294 				buf = xalloc(buflen);
295 				bcopy(obuf, buf, bp - obuf);
296 				bp = &buf[bp - obuf];
297 				if (obuf != bufbuf)
298 					free(obuf);
299 			}
300 			*bp++ = c;
301 			if (istate == IS_BOL)
302 				break;
303 		}
304 		*bp = '\0';
305 
306 nextstate:
307 		if (tTd(30, 35))
308 			printf("nextstate, istate=%d, mstate=%d, line = \"%s\"\n",
309 				istate, mstate, buf);
310 		switch (mstate)
311 		{
312 			extern int chompheader();
313 
314 		  case MS_UFROM:
315 			mstate = MS_HEADER;
316 			if (strncmp(buf, "From ", 5) == 0)
317 			{
318 				extern void eatfrom();
319 
320 				bp = buf;
321 				eatfrom(buf, e);
322 				continue;
323 			}
324 			/* fall through */
325 
326 		  case MS_HEADER:
327 			if (!isheader(buf))
328 			{
329 				mstate = MS_BODY;
330 				goto nextstate;
331 			}
332 
333 			/* check for possible continuation line */
334 			do
335 			{
336 				clearerr(InChannel);
337 				errno = 0;
338 				c = fgetc(InChannel);
339 			} while (errno == EINTR);
340 			if (c != EOF)
341 				ungetc(c, InChannel);
342 			if (c == ' ' || c == '\t')
343 			{
344 				/* yep -- defer this */
345 				continue;
346 			}
347 
348 			/* trim off trailing CRLF or NL */
349 			if (*--bp != '\n' || *--bp != '\r')
350 				bp++;
351 			*bp = '\0';
352 			if (bitset(H_EOH, chompheader(buf, FALSE, e)))
353 				mstate = MS_BODY;
354 			break;
355 
356 		  case MS_BODY:
357 			if (tTd(30, 1))
358 				printf("EOH\n");
359 			if (headeronly)
360 				goto readerr;
361 			bp = buf;
362 
363 			/* toss blank line */
364 			if ((!bitset(EF_CRLF_NOT_EOL, e->e_flags) &&
365 				bp[0] == '\r' && bp[1] == '\n') ||
366 			    (!bitset(EF_NL_NOT_EOL, e->e_flags) &&
367 				bp[0] == '\n'))
368 			{
369 				break;
370 			}
371 
372 			/* if not a blank separator, write it out */
373 			if (MaxMessageSize <= 0 ||
374 			    e->e_msgsize <= MaxMessageSize)
375 			{
376 				while (*bp != '\0')
377 					fputc(*bp++, tf);
378 			}
379 			break;
380 		}
381 		bp = buf;
382 	}
383 
384 readerr:
385 	if ((feof(fp) && smtpmode) || ferror(fp))
386 	{
387 		if (tTd(30, 1))
388 			printf("collect: read error\n");
389 		inputerr = TRUE;
390 	}
391 
392 	/* reset global timer */
393 	clrevent(CollectTimeout);
394 
395 	if (headeronly)
396 		return;
397 
398 	if (tf != NULL)
399 	{
400 		if (fflush(tf) != 0)
401 			tferror(tf, e);
402 		if (fsync(fileno(tf)) < 0 || fclose(tf) < 0)
403 		{
404 			tferror(tf, e);
405 			finis();
406 		}
407 	}
408 
409 	if (CollectErrorMessage != NULL && Errors <= 0)
410 	{
411 		if (CollectErrno != 0)
412 		{
413 			errno = CollectErrno;
414 			syserr(CollectErrorMessage, e->e_df);
415 			finis();
416 		}
417 		usrerr(CollectErrorMessage);
418 	}
419 	else if (inputerr && (OpMode == MD_SMTP || OpMode == MD_DAEMON))
420 	{
421 		/* An EOF when running SMTP is an error */
422 		char *host;
423 		char *problem;
424 
425 		host = RealHostName;
426 		if (host == NULL)
427 			host = "localhost";
428 
429 		if (feof(fp))
430 			problem = "unexpected close";
431 		else if (ferror(fp))
432 			problem = "I/O error";
433 		else
434 			problem = "read timeout";
435 # ifdef LOG
436 		if (LogLevel > 0 && feof(fp))
437 			syslog(LOG_NOTICE,
438 			    "collect: %s on connection from %s, sender=%s: %s\n",
439 			    problem, host, e->e_from.q_paddr, errstring(errno));
440 # endif
441 		if (feof(fp))
442 			usrerr("451 collect: %s on connection from %s, from=%s",
443 				problem, host, e->e_from.q_paddr);
444 		else
445 			syserr("451 collect: %s on connection from %s, from=%s",
446 				problem, host, e->e_from.q_paddr);
447 
448 		/* don't return an error indication */
449 		e->e_to = NULL;
450 		e->e_flags &= ~EF_FATALERRS;
451 		e->e_flags |= EF_CLRQUEUE;
452 
453 		/* and don't try to deliver the partial message either */
454 		if (InChild)
455 			ExitStat = EX_QUIT;
456 		finis();
457 	}
458 
459 	/*
460 	**  Find out some information from the headers.
461 	**	Examples are who is the from person & the date.
462 	*/
463 
464 	eatheader(e, !requeueflag);
465 
466 	/* collect statistics */
467 	if (OpMode != MD_VERIFY)
468 	{
469 		extern void markstats();
470 
471 		markstats(e, (ADDRESS *) NULL);
472 	}
473 
474 	/*
475 	**  Add an Apparently-To: line if we have no recipient lines.
476 	*/
477 
478 	if (hvalue("to", e->e_header) == NULL &&
479 	    hvalue("cc", e->e_header) == NULL &&
480 	    hvalue("bcc", e->e_header) == NULL &&
481 	    hvalue("apparently-to", e->e_header) == NULL)
482 	{
483 		register ADDRESS *q;
484 		char *hdr = NULL;
485 		extern void addheader();
486 
487 		/* create an Apparently-To: field */
488 		/*    that or reject the message.... */
489 		switch (NoRecipientAction)
490 		{
491 		  case NRA_ADD_APPARENTLY_TO:
492 			hdr = "Apparently-To";
493 			break;
494 
495 		  case NRA_ADD_TO:
496 			hdr = "To";
497 			break;
498 
499 		  case NRA_ADD_BCC:
500 			addheader("Bcc", "", &e->e_header);
501 			break;
502 
503 		  case NRA_ADD_TO_UNDISCLOSED:
504 			addheader("To", "undisclosed-recipients:;", &e->e_header);
505 			break;
506 		}
507 
508 		if (hdr != NULL)
509 		{
510 			for (q = e->e_sendqueue; q != NULL; q = q->q_next)
511 			{
512 				if (q->q_alias != NULL)
513 					continue;
514 				if (tTd(30, 3))
515 					printf("Adding %s: %s\n",
516 						hdr, q->q_paddr);
517 				addheader(hdr, q->q_paddr, &e->e_header);
518 			}
519 		}
520 	}
521 
522 	/* check for message too large */
523 	if (MaxMessageSize > 0 && e->e_msgsize > MaxMessageSize)
524 	{
525 		usrerr("552 Message exceeds maximum fixed size (%ld)",
526 			MaxMessageSize);
527 	}
528 
529 	/* check for illegal 8-bit data */
530 	if (HasEightBits)
531 	{
532 		e->e_flags |= EF_HAS8BIT;
533 		if (!bitset(MM_PASS8BIT|MM_MIME8BIT, MimeMode))
534 			usrerr("554 Eight bit data not allowed");
535 	}
536 
537 	if ((e->e_dfp = fopen(e->e_df, "r")) == NULL)
538 	{
539 		/* we haven't acked receipt yet, so just chuck this */
540 		syserr("Cannot reopen %s", e->e_df);
541 		finis();
542 	}
543 }
544 
545 
546 static void
547 collecttimeout(timeout)
548 	time_t timeout;
549 {
550 	/* if no progress was made, die now */
551 	if (!CollectProgress)
552 		longjmp(CtxCollectTimeout, 1);
553 
554 	/* otherwise reset the timeout */
555 	CollectTimeout = setevent(timeout, collecttimeout, timeout);
556 	CollectProgress = FALSE;
557 }
558 /*
559 **  TFERROR -- signal error on writing the temporary file.
560 **
561 **	Parameters:
562 **		tf -- the file pointer for the temporary file.
563 **
564 **	Returns:
565 **		none.
566 **
567 **	Side Effects:
568 **		Gives an error message.
569 **		Arranges for following output to go elsewhere.
570 */
571 
572 void
573 tferror(tf, e)
574 	FILE *tf;
575 	register ENVELOPE *e;
576 {
577 	CollectErrno = errno;
578 	if (errno == ENOSPC)
579 	{
580 		struct stat st;
581 		long avail;
582 		long bsize;
583 
584 		e->e_flags |= EF_NO_BODY_RETN;
585 		if (fstat(fileno(tf), &st) < 0)
586 			st.st_size = 0;
587 		(void) freopen(e->e_df, "w", tf);
588 		if (st.st_size <= 0)
589 			fprintf(tf, "\n*** Mail could not be accepted");
590 		else if (sizeof st.st_size > sizeof (long))
591 			fprintf(tf, "\n*** Mail of at least %qd bytes could not be accepted\n",
592 				st.st_size);
593 		else
594 			fprintf(tf, "\n*** Mail of at least %ld bytes could not be accepted\n",
595 				st.st_size);
596 		fprintf(tf, "*** at %s due to lack of disk space for temp file.\n",
597 			MyHostName);
598 		avail = freespace(QueueDir, &bsize);
599 		if (avail > 0)
600 		{
601 			if (bsize > 1024)
602 				avail *= bsize / 1024;
603 			else if (bsize < 1024)
604 				avail /= 1024 / bsize;
605 			fprintf(tf, "*** Currently, %ld kilobytes are available for mail temp files.\n",
606 				avail);
607 		}
608 		CollectErrorMessage = "452 Out of disk space for temp file";
609 	}
610 	else
611 	{
612 		CollectErrorMessage = "cannot write message body to disk (%s)";
613 	}
614 	(void) freopen("/dev/null", "w", tf);
615 }
616 /*
617 **  EATFROM -- chew up a UNIX style from line and process
618 **
619 **	This does indeed make some assumptions about the format
620 **	of UNIX messages.
621 **
622 **	Parameters:
623 **		fm -- the from line.
624 **
625 **	Returns:
626 **		none.
627 **
628 **	Side Effects:
629 **		extracts what information it can from the header,
630 **		such as the date.
631 */
632 
633 # ifndef NOTUNIX
634 
635 char	*DowList[] =
636 {
637 	"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
638 };
639 
640 char	*MonthList[] =
641 {
642 	"Jan", "Feb", "Mar", "Apr", "May", "Jun",
643 	"Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
644 	NULL
645 };
646 
647 void
648 eatfrom(fm, e)
649 	char *fm;
650 	register ENVELOPE *e;
651 {
652 	register char *p;
653 	register char **dt;
654 
655 	if (tTd(30, 2))
656 		printf("eatfrom(%s)\n", fm);
657 
658 	/* find the date part */
659 	p = fm;
660 	while (*p != '\0')
661 	{
662 		/* skip a word */
663 		while (*p != '\0' && *p != ' ')
664 			p++;
665 		while (*p == ' ')
666 			p++;
667 		if (!(isascii(*p) && isupper(*p)) ||
668 		    p[3] != ' ' || p[13] != ':' || p[16] != ':')
669 			continue;
670 
671 		/* we have a possible date */
672 		for (dt = DowList; *dt != NULL; dt++)
673 			if (strncmp(*dt, p, 3) == 0)
674 				break;
675 		if (*dt == NULL)
676 			continue;
677 
678 		for (dt = MonthList; *dt != NULL; dt++)
679 			if (strncmp(*dt, &p[4], 3) == 0)
680 				break;
681 		if (*dt != NULL)
682 			break;
683 	}
684 
685 	if (*p != '\0')
686 	{
687 		char *q;
688 		extern char *arpadate();
689 
690 		/* we have found a date */
691 		q = xalloc(25);
692 		(void) strncpy(q, p, 25);
693 		q[24] = '\0';
694 		q = arpadate(q);
695 		define('a', newstr(q), e);
696 	}
697 }
698 
699 # endif /* NOTUNIX */
700