1 /*
2  * Copyright (c) 1983 Eric P. Allman
3  * Copyright (c) 1988, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * %sccs.include.redist.c%
7  */
8 
9 #ifndef lint
10 static char sccsid[] = "@(#)collect.c	8.33 (Berkeley) 04/03/95";
11 #endif /* not lint */
12 
13 # include <errno.h>
14 # include "sendmail.h"
15 
16 /*
17 **  COLLECT -- read & parse message header & make temp file.
18 **
19 **	Creates a temporary file name and copies the standard
20 **	input to that file.  Leading UNIX-style "From" lines are
21 **	stripped off (after important information is extracted).
22 **
23 **	Parameters:
24 **		fp -- file to read.
25 **		smtpmode -- if set, we are running SMTP: give an RFC821
26 **			style message to say we are ready to collect
27 **			input, and never ignore a single dot to mean
28 **			end of message.
29 **		requeueflag -- this message will be requeued later, so
30 **			don't do final processing on it.
31 **		hdrp -- the location to stash the header.
32 **		e -- the current envelope.
33 **
34 **	Returns:
35 **		none.
36 **
37 **	Side Effects:
38 **		Temp file is created and filled.
39 **		The from person may be set.
40 */
41 
42 static jmp_buf	CtxCollectTimeout;
43 static void	collecttimeout();
44 static bool	CollectProgress;
45 static EVENT	*CollectTimeout;
46 
47 /* values for input state machine */
48 #define IS_NORM		0	/* middle of line */
49 #define IS_BOL		1	/* beginning of line */
50 #define IS_DOT		2	/* read a dot at beginning of line */
51 #define IS_DOTCR	3	/* read ".\r" at beginning of line */
52 #define IS_CR		4	/* read a carriage return */
53 
54 /* values for message state machine */
55 #define MS_UFROM	0	/* reading Unix from line */
56 #define MS_HEADER	1	/* reading message header */
57 #define MS_BODY		2	/* reading message body */
58 
59 void
60 collect(fp, smtpmode, requeueflag, hdrp, e)
61 	FILE *fp;
62 	bool smtpmode;
63 	bool requeueflag;
64 	HDR **hdrp;
65 	register ENVELOPE *e;
66 {
67 	register FILE *tf;
68 	bool ignrdot = smtpmode ? FALSE : IgnrDot;
69 	time_t dbto = smtpmode ? TimeOuts.to_datablock : 0;
70 	register char *bp;
71 	int c = '\0';
72 	bool inputerr = FALSE;
73 	bool headeronly;
74 	char *buf;
75 	int buflen;
76 	int istate;
77 	int mstate;
78 	char *pbp;
79 	char peekbuf[8];
80 	char dfname[20];
81 	char bufbuf[MAXLINE];
82 	extern bool isheader();
83 	extern void eatheader();
84 	extern void tferror();
85 
86 	headeronly = hdrp != NULL;
87 
88 	/*
89 	**  Create the temp file name and create the file.
90 	*/
91 
92 	if (!headeronly)
93 	{
94 		struct stat stbuf;
95 
96 		strcpy(dfname, queuename(e, 'd'));
97 		if ((tf = dfopen(dfname, O_WRONLY|O_CREAT|O_TRUNC, FileMode)) == NULL)
98 		{
99 			syserr("Cannot create %s", dfname);
100 			e->e_flags |= EF_NO_BODY_RETN;
101 			finis();
102 		}
103 		if (fstat(fileno(tf), &stbuf) < 0)
104 			e->e_dfino = -1;
105 		else
106 		{
107 			e->e_dfdev = stbuf.st_dev;
108 			e->e_dfino = stbuf.st_ino;
109 		}
110 		HasEightBits = FALSE;
111 		e->e_msgsize = 0;
112 		e->e_flags |= EF_HAS_DF;
113 	}
114 
115 	/*
116 	**  Tell ARPANET to go ahead.
117 	*/
118 
119 	if (smtpmode)
120 		message("354 Enter mail, end with \".\" on a line by itself");
121 
122 	if (tTd(30, 2))
123 		printf("collect\n");
124 
125 	/*
126 	**  Read the message.
127 	**
128 	**	This is done using two interleaved state machines.
129 	**	The input state machine is looking for things like
130 	**	hidden dots; the message state machine is handling
131 	**	the larger picture (e.g., header versus body).
132 	*/
133 
134 	buf = bp = bufbuf;
135 	buflen = sizeof bufbuf;
136 	pbp = peekbuf;
137 	istate = IS_BOL;
138 	mstate = SaveFrom ? MS_HEADER : MS_UFROM;
139 	CollectProgress = FALSE;
140 
141 	/* if transmitting binary, don't map NL to EOL */
142 	if (e->e_bodytype != NULL && strcasecmp(e->e_bodytype, "8BITMIME") == 0)
143 		e->e_flags |= EF_NL_NOT_EOL;
144 
145 	if (dbto != 0)
146 	{
147 		/* handle possible input timeout */
148 		if (setjmp(CtxCollectTimeout) != 0)
149 		{
150 #ifdef LOG
151 			syslog(LOG_NOTICE,
152 			    "timeout waiting for input from %s during message collect",
153 			    CurHostName ? CurHostName : "<local machine>");
154 #endif
155 			errno = 0;
156 			usrerr("451 timeout waiting for input during message collect");
157 			goto readerr;
158 		}
159 		CollectTimeout = setevent(dbto, collecttimeout, dbto);
160 	}
161 
162 	for (;;)
163 	{
164 		if (tTd(30, 35))
165 			printf("top, istate=%d, mstate=%d\n", istate, mstate);
166 		for (;;)
167 		{
168 			if (pbp > peekbuf)
169 				c = *--pbp;
170 			else
171 			{
172 				while (!feof(fp) && !ferror(fp))
173 				{
174 					errno = 0;
175 					c = fgetc(fp);
176 					if (errno != EINTR)
177 						break;
178 					clearerr(fp);
179 				}
180 				CollectProgress = TRUE;
181 				if (TrafficLogFile != NULL && !headeronly)
182 				{
183 					if (istate == IS_BOL)
184 						fprintf(TrafficLogFile, "%05d <<< ",
185 							getpid());
186 					if (c == EOF)
187 						fprintf(TrafficLogFile, "[EOF]\n");
188 					else
189 						fputc(c, TrafficLogFile);
190 				}
191 				if (c == EOF)
192 					goto readerr;
193 				if (SevenBitInput)
194 					c &= 0x7f;
195 				else
196 					HasEightBits |= bitset(0x80, c);
197 				if (!headeronly)
198 					e->e_msgsize++;
199 			}
200 			if (tTd(30, 94))
201 				printf("istate=%d, c=%c (0x%x)\n",
202 					istate, c, c);
203 			switch (istate)
204 			{
205 			  case IS_BOL:
206 				if (c == '.')
207 				{
208 					istate = IS_DOT;
209 					continue;
210 				}
211 				break;
212 
213 			  case IS_DOT:
214 				if (c == '\n' && !ignrdot &&
215 				    !bitset(EF_NL_NOT_EOL, e->e_flags))
216 					goto readerr;
217 				else if (c == '\r' &&
218 					 !bitset(EF_CRLF_NOT_EOL, e->e_flags))
219 				{
220 					istate = IS_DOTCR;
221 					continue;
222 				}
223 				else if (c != '.' ||
224 					 (OpMode != MD_SMTP &&
225 					  OpMode != MD_DAEMON &&
226 					  OpMode != MD_ARPAFTP))
227 				{
228 					*pbp++ = c;
229 					c = '.';
230 				}
231 				break;
232 
233 			  case IS_DOTCR:
234 				if (c == '\n')
235 					goto readerr;
236 				else
237 				{
238 					/* push back the ".\rx" */
239 					*pbp++ = c;
240 					*pbp++ = '\r';
241 					c = '.';
242 				}
243 				break;
244 
245 			  case IS_CR:
246 				if (c == '\n')
247 					istate = IS_BOL;
248 				else
249 				{
250 					ungetc(c, fp);
251 					c = '\r';
252 					istate = IS_NORM;
253 				}
254 				goto bufferchar;
255 			}
256 
257 			if (c == '\r' && !bitset(EF_CRLF_NOT_EOL, e->e_flags))
258 			{
259 				istate = IS_CR;
260 				continue;
261 			}
262 			else if (c == '\n' && !bitset(EF_NL_NOT_EOL, e->e_flags))
263 				istate = IS_BOL;
264 			else
265 				istate = IS_NORM;
266 
267 bufferchar:
268 			if (mstate == MS_BODY)
269 			{
270 				/* just put the character out */
271 				if (MaxMessageSize <= 0 ||
272 				    e->e_msgsize <= MaxMessageSize)
273 					fputc(c, tf);
274 				continue;
275 			}
276 
277 			/* header -- buffer up */
278 			if (bp >= &buf[buflen - 2])
279 			{
280 				char *obuf;
281 
282 				if (mstate != MS_HEADER)
283 					break;
284 
285 				/* out of space for header */
286 				obuf = buf;
287 				if (buflen < MEMCHUNKSIZE)
288 					buflen *= 2;
289 				else
290 					buflen += MEMCHUNKSIZE;
291 				buf = xalloc(buflen);
292 				bcopy(obuf, buf, bp - obuf);
293 				bp = &buf[bp - obuf];
294 				if (obuf != bufbuf)
295 					free(obuf);
296 			}
297 			*bp++ = c;
298 			if (istate == IS_BOL)
299 				break;
300 		}
301 		*bp = '\0';
302 
303 nextstate:
304 		if (tTd(30, 35))
305 			printf("nextstate, istate=%d, mstate=%d, line = \"%s\"\n",
306 				istate, mstate, buf);
307 		switch (mstate)
308 		{
309 			extern int chompheader();
310 
311 		  case MS_UFROM:
312 			mstate = MS_HEADER;
313 			if (strncmp(buf, "From ", 5) == 0)
314 			{
315 				extern void eatfrom();
316 
317 				bp = buf;
318 				eatfrom(buf, e);
319 				continue;
320 			}
321 			/* fall through */
322 
323 		  case MS_HEADER:
324 			if (!isheader(buf))
325 			{
326 				mstate = MS_BODY;
327 				goto nextstate;
328 			}
329 
330 			/* check for possible continuation line */
331 			do
332 			{
333 				clearerr(fp);
334 				errno = 0;
335 				c = fgetc(fp);
336 			} while (errno == EINTR);
337 			if (c != EOF)
338 				ungetc(c, fp);
339 			if (c == ' ' || c == '\t')
340 			{
341 				/* yep -- defer this */
342 				continue;
343 			}
344 
345 			/* trim off trailing CRLF or NL */
346 			if (*--bp != '\n' || *--bp != '\r')
347 				bp++;
348 			*bp = '\0';
349 			if (bitset(H_EOH, chompheader(buf, FALSE, hdrp, e)))
350 				mstate = MS_BODY;
351 			break;
352 
353 		  case MS_BODY:
354 			if (tTd(30, 1))
355 				printf("EOH\n");
356 			if (headeronly)
357 				goto readerr;
358 			bp = buf;
359 
360 			/* toss blank line */
361 			if ((!bitset(EF_CRLF_NOT_EOL, e->e_flags) &&
362 				bp[0] == '\r' && bp[1] == '\n') ||
363 			    (!bitset(EF_NL_NOT_EOL, e->e_flags) &&
364 				bp[0] == '\n'))
365 			{
366 				break;
367 			}
368 
369 			/* if not a blank separator, write it out */
370 			if (MaxMessageSize <= 0 ||
371 			    e->e_msgsize <= MaxMessageSize)
372 			{
373 				while (*bp != '\0')
374 					fputc(*bp++, tf);
375 			}
376 			break;
377 		}
378 		bp = buf;
379 	}
380 
381 readerr:
382 	if ((feof(fp) && smtpmode) || ferror(fp))
383 	{
384 		if (tTd(30, 1))
385 			printf("collect: read error\n");
386 		inputerr = TRUE;
387 	}
388 
389 	/* reset global timer */
390 	clrevent(CollectTimeout);
391 
392 	if (headeronly)
393 		return;
394 
395 	if (tf != NULL)
396 	{
397 		if (fflush(tf) != 0)
398 			tferror(tf, e);
399 		if (fsync(fileno(tf)) < 0 || fclose(tf) < 0)
400 		{
401 			tferror(tf, e);
402 			finis();
403 		}
404 	}
405 
406 	/* An EOF when running SMTP is an error */
407 	if (inputerr && (OpMode == MD_SMTP || OpMode == MD_DAEMON))
408 	{
409 		char *host;
410 		char *problem;
411 
412 		host = RealHostName;
413 		if (host == NULL)
414 			host = "localhost";
415 
416 		if (feof(fp))
417 			problem = "unexpected close";
418 		else if (ferror(fp))
419 			problem = "I/O error";
420 		else
421 			problem = "read timeout";
422 # ifdef LOG
423 		if (LogLevel > 0 && feof(fp))
424 			syslog(LOG_NOTICE,
425 			    "collect: %s on connection from %s, sender=%s: %s\n",
426 			    problem, host, e->e_from.q_paddr, errstring(errno));
427 # endif
428 		if (feof(fp))
429 			usrerr("451 collect: %s on connection from %s, from=%s",
430 				problem, host, e->e_from.q_paddr);
431 		else
432 			syserr("451 collect: %s on connection from %s, from=%s",
433 				problem, host, e->e_from.q_paddr);
434 
435 		/* don't return an error indication */
436 		e->e_to = NULL;
437 		e->e_flags &= ~EF_FATALERRS;
438 		e->e_flags |= EF_CLRQUEUE;
439 
440 		/* and don't try to deliver the partial message either */
441 		if (InChild)
442 			ExitStat = EX_QUIT;
443 		finis();
444 	}
445 
446 	/*
447 	**  Find out some information from the headers.
448 	**	Examples are who is the from person & the date.
449 	*/
450 
451 	eatheader(e, !requeueflag);
452 
453 	/* collect statistics */
454 	if (OpMode != MD_VERIFY)
455 	{
456 		extern void markstats();
457 
458 		markstats(e, (ADDRESS *) NULL);
459 	}
460 
461 	/*
462 	**  Add an Apparently-To: line if we have no recipient lines.
463 	*/
464 
465 	if (hvalue("to", e->e_header) == NULL &&
466 	    hvalue("cc", e->e_header) == NULL &&
467 	    hvalue("bcc", e->e_header) == NULL &&
468 	    hvalue("apparently-to", e->e_header) == NULL)
469 	{
470 		register ADDRESS *q;
471 		char *hdr = NULL;
472 		extern void addheader();
473 
474 		/* create an Apparently-To: field */
475 		/*    that or reject the message.... */
476 		switch (NoRecipientAction)
477 		{
478 		  case NRA_ADD_APPARENTLY_TO:
479 			hdr = "Apparently-To";
480 			break;
481 
482 		  case NRA_ADD_TO:
483 			hdr = "To";
484 			break;
485 
486 		  case NRA_ADD_BCC:
487 			addheader("Bcc", "", &e->e_header);
488 			break;
489 
490 		  case NRA_ADD_TO_UNDISCLOSED:
491 			addheader("To", "undisclosed-recipients:;", &e->e_header);
492 			break;
493 		}
494 
495 		if (hdr != NULL)
496 		{
497 			for (q = e->e_sendqueue; q != NULL; q = q->q_next)
498 			{
499 				if (q->q_alias != NULL)
500 					continue;
501 				if (tTd(30, 3))
502 					printf("Adding %s: %s\n",
503 						hdr, q->q_paddr);
504 				addheader(hdr, q->q_paddr, &e->e_header);
505 			}
506 		}
507 	}
508 
509 	/* check for message too large */
510 	if (MaxMessageSize > 0 && e->e_msgsize > MaxMessageSize)
511 	{
512 		usrerr("552 Message exceeds maximum fixed size (%ld)",
513 			MaxMessageSize);
514 	}
515 
516 	/* check for illegal 8-bit data */
517 	if (HasEightBits)
518 	{
519 		e->e_flags |= EF_HAS8BIT;
520 		if (!bitset(MM_PASS8BIT|MM_MIME8BIT, MimeMode))
521 			usrerr("554 Eight bit data not allowed");
522 	}
523 
524 	if ((e->e_dfp = fopen(dfname, "r")) == NULL)
525 	{
526 		/* we haven't acked receipt yet, so just chuck this */
527 		syserr("Cannot reopen %s", dfname);
528 		finis();
529 	}
530 }
531 
532 
533 static void
534 collecttimeout(timeout)
535 	time_t timeout;
536 {
537 	/* if no progress was made, die now */
538 	if (!CollectProgress)
539 		longjmp(CtxCollectTimeout, 1);
540 
541 	/* otherwise reset the timeout */
542 	CollectTimeout = setevent(timeout, collecttimeout, timeout);
543 	CollectProgress = FALSE;
544 }
545 /*
546 **  TFERROR -- signal error on writing the temporary file.
547 **
548 **	Parameters:
549 **		tf -- the file pointer for the temporary file.
550 **		e -- the current envelope.
551 **
552 **	Returns:
553 **		none.
554 **
555 **	Side Effects:
556 **		Gives an error message.
557 **		Arranges for following output to go elsewhere.
558 */
559 
560 void
561 tferror(tf, e)
562 	FILE *tf;
563 	register ENVELOPE *e;
564 {
565 	if (errno == ENOSPC)
566 	{
567 		struct stat st;
568 		long avail;
569 		long bsize;
570 
571 		e->e_flags |= EF_NO_BODY_RETN;
572 		if (fstat(fileno(tf), &st) < 0)
573 			st.st_size = 0;
574 		(void) freopen(queuename(e, 'd'), "w", tf);
575 		if (st.st_size <= 0)
576 			fprintf(tf, "\n*** Mail could not be accepted");
577 		else if (sizeof st.st_size > sizeof (long))
578 			fprintf(tf, "\n*** Mail of at least %qd bytes could not be accepted\n",
579 				st.st_size);
580 		else
581 			fprintf(tf, "\n*** Mail of at least %ld bytes could not be accepted\n",
582 				st.st_size);
583 		fprintf(tf, "*** at %s due to lack of disk space for temp file.\n",
584 			MyHostName);
585 		avail = freespace(QueueDir, &bsize);
586 		if (avail > 0)
587 		{
588 			if (bsize > 1024)
589 				avail *= bsize / 1024;
590 			else if (bsize < 1024)
591 				avail /= 1024 / bsize;
592 			fprintf(tf, "*** Currently, %ld kilobytes are available for mail temp files.\n",
593 				avail);
594 		}
595 		usrerr("452 Out of disk space for temp file");
596 	}
597 	else
598 		syserr("collect: Cannot write tf%s", e->e_id);
599 	(void) freopen("/dev/null", "w", tf);
600 }
601 /*
602 **  EATFROM -- chew up a UNIX style from line and process
603 **
604 **	This does indeed make some assumptions about the format
605 **	of UNIX messages.
606 **
607 **	Parameters:
608 **		fm -- the from line.
609 **
610 **	Returns:
611 **		none.
612 **
613 **	Side Effects:
614 **		extracts what information it can from the header,
615 **		such as the date.
616 */
617 
618 # ifndef NOTUNIX
619 
620 char	*DowList[] =
621 {
622 	"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
623 };
624 
625 char	*MonthList[] =
626 {
627 	"Jan", "Feb", "Mar", "Apr", "May", "Jun",
628 	"Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
629 	NULL
630 };
631 
632 void
633 eatfrom(fm, e)
634 	char *fm;
635 	register ENVELOPE *e;
636 {
637 	register char *p;
638 	register char **dt;
639 
640 	if (tTd(30, 2))
641 		printf("eatfrom(%s)\n", fm);
642 
643 	/* find the date part */
644 	p = fm;
645 	while (*p != '\0')
646 	{
647 		/* skip a word */
648 		while (*p != '\0' && *p != ' ')
649 			p++;
650 		while (*p == ' ')
651 			p++;
652 		if (!(isascii(*p) && isupper(*p)) ||
653 		    p[3] != ' ' || p[13] != ':' || p[16] != ':')
654 			continue;
655 
656 		/* we have a possible date */
657 		for (dt = DowList; *dt != NULL; dt++)
658 			if (strncmp(*dt, p, 3) == 0)
659 				break;
660 		if (*dt == NULL)
661 			continue;
662 
663 		for (dt = MonthList; *dt != NULL; dt++)
664 			if (strncmp(*dt, &p[4], 3) == 0)
665 				break;
666 		if (*dt != NULL)
667 			break;
668 	}
669 
670 	if (*p != '\0')
671 	{
672 		char *q;
673 		extern char *arpadate();
674 
675 		/* we have found a date */
676 		q = xalloc(25);
677 		(void) strncpy(q, p, 25);
678 		q[24] = '\0';
679 		q = arpadate(q);
680 		define('a', newstr(q), e);
681 	}
682 }
683 
684 # endif /* NOTUNIX */
685