11392Seric # include <stdio.h>
21392Seric # include <ctype.h>
31439Seric # include <errno.h>
41392Seric # include "dlvrmail.h"
51392Seric 
6*2900Seric static char	SccsId[] = "@(#)collect.c	3.1	03/04/81";
71392Seric 
81392Seric /*
91392Seric **  MAKETEMP -- read & parse message header & make temp file.
101392Seric **
111392Seric **	Creates a temporary file name and copies the standard
121392Seric **	input to that file.  While it is doing it, it looks for
131392Seric **	"From:" and "Sender:" fields to use as the from-person
141392Seric **	(but only if the -a flag is specified).  It prefers to
151392Seric **	to use the "Sender:" field.
161392Seric **
171392Seric **	MIT seems to like to produce "Sent-By:" fields instead
181392Seric **	of "Sender:" fields.  We used to catch this, but it turns
191392Seric **	out that the "Sent-By:" field doesn't always correspond
201392Seric **	to someone real ("___057", for instance), as required by
211392Seric **	the protocol.  So we limp by.....
221392Seric **
231392Seric **	Parameters:
241875Seric **		none
251392Seric **
261392Seric **	Returns:
271875Seric **		Name of temp file.
281392Seric **
291392Seric **	Side Effects:
301392Seric **		Temp file is created and filled.
311392Seric **
321392Seric **	Called By:
331392Seric **		main
341392Seric **
351392Seric **	Notes:
361392Seric **		This is broken off from main largely so that the
371392Seric **		temp buffer can be deallocated.
381392Seric */
391392Seric 
40*2900Seric char	*MsgId;			/* message-id, determined or created */
411624Seric long	MsgSize;		/* size of message in bytes */
42*2900Seric char	*Date;			/* UNIX-style origination date */
431397Seric 
441392Seric char *
451875Seric maketemp()
461392Seric {
471392Seric 	register FILE *tf;
481392Seric 	char buf[MAXFIELD+1];
491392Seric 	register char *p;
501392Seric 	char c;
511439Seric 	extern int errno;
52*2900Seric 	register HDR *h;
53*2900Seric 	HDR **hp;
54*2900Seric 	extern bool isheader();
55*2900Seric 	extern char *newstr();
56*2900Seric 	extern char *xalloc();
57*2900Seric 	char *fname;
58*2900Seric 	char *fvalue;
59*2900Seric 	extern char *index(), *rindex();
60*2900Seric 	char *xfrom;
61*2900Seric 	extern char *hvalue();
62*2900Seric 	extern char *makemsgid();
63*2900Seric 	struct hdrinfo *hi;
641392Seric 
651392Seric 	/*
661392Seric 	**  Create the temp file name and create the file.
671392Seric 	*/
681392Seric 
691392Seric 	mktemp(InFileName);
701392Seric 	close(creat(InFileName, 0600));
711392Seric 	if ((tf = fopen(InFileName, "w")) == NULL)
721392Seric 	{
731392Seric 		syserr("Cannot create %s", InFileName);
741392Seric 		return (NULL);
751392Seric 	}
761392Seric 
77*2900Seric 	/* try to read a UNIX-style From line */
78*2900Seric 	if (fgets(buf, sizeof buf, stdin) == NULL)
79*2900Seric 		return (NULL);
80*2900Seric 	if (strncmp(buf, "From ", 5) == 0)
81*2900Seric 	{
82*2900Seric 		eatfrom(buf);
83*2900Seric 		fgets(buf, sizeof buf, stdin);
84*2900Seric 	}
85*2900Seric 
861392Seric 	/*
871392Seric 	**  Copy stdin to temp file & do message editting.
881392Seric 	**	To keep certain mailers from getting confused,
891392Seric 	**	and to keep the output clean, lines that look
901392Seric 	**	like UNIX "From" lines are deleted in the header,
911392Seric 	**	and prepended with ">" in the body.
921392Seric 	*/
931392Seric 
94*2900Seric 	for (; !feof(stdin); !feof(stdin) && fgets(buf, sizeof buf, stdin))
951392Seric 	{
96*2900Seric 		/* see if the header is over */
97*2900Seric 		if (!isheader(buf))
98*2900Seric 			break;
99*2900Seric 
100*2900Seric 		/* get the rest of this field */
101*2900Seric 		while ((c = getc(stdin)) == ' ' || c == '\t')
1021392Seric 		{
103*2900Seric 			p = &buf[strlen(buf)];
104*2900Seric 			*p++ = c;
105*2900Seric 			if (fgets(p, sizeof buf - (p - buf), stdin) == NULL)
106*2900Seric 				break;
1071392Seric 		}
108*2900Seric 		if (c != EOF)
109*2900Seric 			ungetc(c, stdin);
1101392Seric 
111*2900Seric 		MsgSize += strlen(buf);
1121392Seric 
113*2900Seric 		/*
114*2900Seric 		**  Snarf header away.
115*2900Seric 		*/
116*2900Seric 
117*2900Seric 		/* strip off trailing newline */
118*2900Seric 		p = rindex(buf, '\n');
119*2900Seric 		if (p != NULL)
120*2900Seric 			*p = '\0';
121*2900Seric 
122*2900Seric 		/* find canonical name */
123*2900Seric 		fname = buf;
124*2900Seric 		p = index(buf, ':');
125*2900Seric 		fvalue = &p[1];
126*2900Seric 		while (isspace(*--p))
127*2900Seric 			continue;
128*2900Seric 		*++p = '\0';
129*2900Seric 		makelower(fname);
130*2900Seric 
131*2900Seric 		/* strip field value on front */
132*2900Seric 		if (*fvalue == ' ')
133*2900Seric 			fvalue++;
134*2900Seric 
135*2900Seric 		/* search header list for this header */
136*2900Seric 		for (hp = &Header, h = Header; h != NULL; hp = &h->h_link, h = h->h_link)
1371392Seric 		{
138*2900Seric 			if (strcmp(fname, h->h_field) == 0 && flagset(H_CONCAT|H_DEFAULT, h->h_flags))
139*2900Seric 				break;
140*2900Seric 		}
141*2900Seric 		if (h == NULL)
142*2900Seric 		{
143*2900Seric 			/* create a new node */
144*2900Seric # ifdef DEBUG
145*2900Seric 			if (Debug)
146*2900Seric 				printf("new field '%s', value '%s'\n", fname, fvalue);
147*2900Seric # endif DEBUG
148*2900Seric 			*hp = h = (HDR *) xalloc(sizeof *h);
149*2900Seric 			h->h_field = newstr(fname);
150*2900Seric 			h->h_value = NULL;
151*2900Seric 			h->h_link = NULL;
152*2900Seric 			h->h_flags = 0;
153*2900Seric 
154*2900Seric 			/* see if it is a known type */
155*2900Seric 			for (hi = HdrInfo; hi->hi_field != NULL; hi++)
1561392Seric 			{
157*2900Seric 				if (strcmp(hi->hi_field, h->h_field) == 0)
158*2900Seric 				{
159*2900Seric 					h->h_flags = hi->hi_flags;
160*2900Seric 					break;
161*2900Seric 				}
1621392Seric 			}
163*2900Seric 		}
164*2900Seric 		else if (flagset(H_DEFAULT, h->h_flags))
165*2900Seric 		{
166*2900Seric 			/* overriding default, throw out old value */
1671392Seric # ifdef DEBUG
1681392Seric 			if (Debug)
169*2900Seric 				printf("overriding '%s', old='%s', new='%s'\n",
170*2900Seric 				       fname, h->h_value, fvalue);
1711392Seric # endif DEBUG
172*2900Seric 			free(h->h_value);
173*2900Seric 			h->h_value = NULL;
1741392Seric 		}
1751392Seric 
176*2900Seric 		/* do something with the value */
177*2900Seric 		if (h->h_value == NULL)
1781392Seric 		{
179*2900Seric # ifdef DEBUG
180*2900Seric 			if (Debug)
181*2900Seric 				printf("installing '%s: %s'\n", fname, fvalue);
182*2900Seric # endif DEBUG
183*2900Seric 			h->h_value = newstr(fvalue);
1841392Seric 		}
185*2900Seric 		else
186*2900Seric 		{
187*2900Seric 			register int len;
1881392Seric 
189*2900Seric 			/* concatenate the two values */
1901392Seric # ifdef DEBUG
191*2900Seric 			if (Debug)
192*2900Seric 				printf("concat '%s: %s' with '%s'\n", fname,
193*2900Seric 				       h->h_value, fvalue);
1941392Seric # endif DEBUG
195*2900Seric 			len = strlen(h->h_value) + strlen(fvalue) + 2;
196*2900Seric 			p = xalloc(len);
197*2900Seric 			strcpy(p, h->h_value);
198*2900Seric 			strcat(p, ",");
199*2900Seric 			strcat(p, fvalue);
200*2900Seric 			free(h->h_value);
201*2900Seric 			h->h_value = p;
2021392Seric 		}
203*2900Seric 	}
2041392Seric 
205*2900Seric # ifdef DEBUG
206*2900Seric 	if (Debug)
207*2900Seric 		printf("EOH\n");
208*2900Seric # endif DEBUG
209*2900Seric 
210*2900Seric 	/* throw away a blank line */
211*2900Seric 	if (buf[0] == '\n')
212*2900Seric 		fgets(buf, sizeof buf, stdin);
213*2900Seric 
214*2900Seric 	/*
215*2900Seric 	**  Collect the body of the message.
216*2900Seric 	*/
217*2900Seric 
218*2900Seric 	for (; !feof(stdin); !feof(stdin) && fgets(buf, sizeof buf, stdin) != NULL)
219*2900Seric 	{
220*2900Seric 		/* check for end-of-message */
221*2900Seric 		if (!IgnrDot && buf[0] == '.' && (buf[1] == '\n' || buf[1] == '\0'))
222*2900Seric 			break;
223*2900Seric 
224*2900Seric 		/* Hide UNIX-like From lines */
225*2900Seric 		if (strncmp(buf, "From ", 5) == 0)
2261392Seric 		{
227*2900Seric 			fputs(">", tf);
228*2900Seric 			MsgSize++;
2291392Seric 		}
2301624Seric 		MsgSize += strlen(buf);
2311392Seric 		fputs(buf, tf);
2321392Seric 		if (ferror(tf))
2331392Seric 		{
2341439Seric 			if (errno == ENOSPC)
2351439Seric 			{
2361439Seric 				freopen(InFileName, "w", tf);
2371439Seric 				fputs("\nMAIL DELETED BECAUSE OF LACK OF DISK SPACE\n\n", tf);
2381439Seric 				syserr("Out of disk space for temp file");
2391439Seric 			}
2401439Seric 			else
2411439Seric 				syserr("Cannot write %s", InFileName);
2421439Seric 			freopen("/dev/null", "w", tf);
2431392Seric 		}
2441392Seric 	}
2451392Seric 	fclose(tf);
246*2900Seric 
247*2900Seric 	/*
248*2900Seric 	**  Find out some information from the headers.
249*2900Seric 	**	Examples are who is the from person, the date, the
250*2900Seric 	**	message-id, etc.
251*2900Seric 	*/
252*2900Seric 
253*2900Seric 	/* from person */
254*2900Seric 	xfrom = hvalue("sender");
255*2900Seric 	if (xfrom == NULL)
256*2900Seric 		xfrom = hvalue("from");
257*2900Seric 
258*2900Seric 	/* date message originated */
259*2900Seric 	/* we don't seem to have a good way to do canonical conversion ....
260*2900Seric 	p = hvalue("date");
261*2900Seric 	if (p != NULL)
262*2900Seric 		Date = newstr(arpatounix(p));
263*2900Seric 	.... so we will ignore the problem for the time being */
264*2900Seric 	if (Date == NULL)
265*2900Seric 	{
266*2900Seric 		auto long t;
267*2900Seric 		extern char *ctime();
268*2900Seric 
269*2900Seric 		time(&t);
270*2900Seric 		Date = newstr(ctime(&t));
271*2900Seric 	}
272*2900Seric 
273*2900Seric 	/* message id */
274*2900Seric 	MsgId = hvalue("message-id");
275*2900Seric 	if (MsgId == NULL)
276*2900Seric 		MsgId = makemsgid();
277*2900Seric 
2781392Seric 	if (freopen(InFileName, "r", stdin) == NULL)
2791392Seric 		syserr("Cannot reopen %s", InFileName);
280*2900Seric 
281*2900Seric # ifdef DEBUG
282*2900Seric 	if (Debug)
283*2900Seric 	{
284*2900Seric 		printf("----- collected header -----\n");
285*2900Seric 		for (h = Header; h != NULL; h = h->h_link)
286*2900Seric 			printf("%s: %s\n", capitalize(h->h_field), h->h_value);
287*2900Seric 		printf("----------------------------\n");
288*2900Seric 	}
289*2900Seric # endif DEBUG
290*2900Seric 	return (ArpaFmt ? xfrom : NULL);
2911392Seric }
2921392Seric /*
293*2900Seric **  EATFROM -- chew up a UNIX style from line and process
294*2900Seric **
295*2900Seric **	This does indeed make some assumptions about the format
296*2900Seric **	of UNIX messages.
297*2900Seric **
298*2900Seric **	Parameters:
299*2900Seric **		fm -- the from line.
300*2900Seric **
301*2900Seric **	Returns:
302*2900Seric **		none.
303*2900Seric **
304*2900Seric **	Side Effects:
305*2900Seric **		extracts what information it can from the header,
306*2900Seric **		such as the Date.
307*2900Seric */
308*2900Seric 
309*2900Seric char	*MonthList[] =
310*2900Seric {
311*2900Seric 	"Jan", "Feb", "Mar", "Apr", "May", "Jun",
312*2900Seric 	"Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
313*2900Seric 	NULL
314*2900Seric };
315*2900Seric 
316*2900Seric eatfrom(fm)
317*2900Seric 	char *fm;
318*2900Seric {
319*2900Seric 	register char *p;
320*2900Seric 	register char **dt;
321*2900Seric 
322*2900Seric 	/* find the date part */
323*2900Seric 	p = fm;
324*2900Seric 	while (*p != '\0')
325*2900Seric 	{
326*2900Seric 		/* skip a word */
327*2900Seric 		while (*p != '\0' && *p != ' ')
328*2900Seric 			*p++;
329*2900Seric 		while (*p == ' ')
330*2900Seric 			*p++;
331*2900Seric 		if (!isupper(*p) || p[3] != ' ' || p[13] != ':' || p[16] != ':')
332*2900Seric 			continue;
333*2900Seric 
334*2900Seric 		/* we have a possible date */
335*2900Seric 		for (dt = MonthList; *dt != NULL; dt++)
336*2900Seric 			if (strncmp(*dt, p, 3) == 0)
337*2900Seric 				break;
338*2900Seric 
339*2900Seric 		if (*dt != NULL)
340*2900Seric 			break;
341*2900Seric 	}
342*2900Seric 
343*2900Seric 	if (*p != NULL)
344*2900Seric 	{
345*2900Seric 		/* we have found a date */
346*2900Seric 		Date = xalloc(25);
347*2900Seric 		strncpy(Date, p, 25);
348*2900Seric 		Date[24] = '\0';
349*2900Seric 	}
350*2900Seric }
351*2900Seric /*
352*2900Seric **  HVALUE -- return value of a header.
353*2900Seric **
354*2900Seric **	Parameters:
355*2900Seric **		field -- the field name.
356*2900Seric **
357*2900Seric **	Returns:
358*2900Seric **		pointer to the value part.
359*2900Seric **		NULL if not found.
360*2900Seric **
361*2900Seric **	Side Effects:
362*2900Seric **		sets the H_USED bit in the header if found.
363*2900Seric */
364*2900Seric 
365*2900Seric char *
366*2900Seric hvalue(field)
367*2900Seric 	char *field;
368*2900Seric {
369*2900Seric 	register HDR *h;
370*2900Seric 
371*2900Seric 	for (h = Header; h != NULL; h = h->h_link)
372*2900Seric 	{
373*2900Seric 		if (strcmp(h->h_field, field) == 0)
374*2900Seric 		{
375*2900Seric 			h->h_flags |= H_USED;
376*2900Seric 			return (h->h_value);
377*2900Seric 		}
378*2900Seric 	}
379*2900Seric 	return (NULL);
380*2900Seric }
381*2900Seric /*
3821392Seric **  MAKEMSGID -- Compute a message id for this process.
3831392Seric **
3841392Seric **	This routine creates a message id for a message if
3851392Seric **	it did not have one already.  If the MESSAGEID compile
3861392Seric **	flag is set, the messageid will be added to any message
3871392Seric **	that does not already have one.  Currently it is more
3881392Seric **	of an artifact, but I suggest that if you are hacking,
3891392Seric **	you leave it in -- I may want to use it someday if
3901392Seric **	duplicate messages turn out to be a problem.
3911392Seric **
3921392Seric **	Parameters:
3931392Seric **		none.
3941392Seric **
3951392Seric **	Returns:
396*2900Seric **		a message id.
3971392Seric **
3981392Seric **	Side Effects:
399*2900Seric **		none.
4001392Seric */
4011392Seric 
402*2900Seric char *
4031392Seric makemsgid()
4041392Seric {
4051392Seric 	auto long t;
4061392Seric 	extern char *MyLocName;
4071392Seric 	extern char *ArpaHost;
408*2900Seric 	static char buf[50];
4091392Seric 
4101392Seric 	time(&t);
411*2900Seric 	sprintf(buf, "<%ld.%d.%s@%s>", t, getpid(), MyLocName, ArpaHost);
412*2900Seric 	return (buf);
4131392Seric }
414*2900Seric /*
415*2900Seric **  ISHEADER -- predicate telling if argument is a header.
416*2900Seric **
417*2900Seric **	Parameters:
418*2900Seric **		s -- string to check for possible headerness.
419*2900Seric **
420*2900Seric **	Returns:
421*2900Seric **		TRUE if s is a header.
422*2900Seric **		FALSE otherwise.
423*2900Seric **
424*2900Seric **	Side Effects:
425*2900Seric **		none.
426*2900Seric */
427*2900Seric 
428*2900Seric bool
429*2900Seric isheader(s)
430*2900Seric 	register char *s;
431*2900Seric {
432*2900Seric 	if (!isalnum(*s))
433*2900Seric 		return (FALSE);
434*2900Seric 	while (!isspace(*s) && *s != ':')
435*2900Seric 		s++;
436*2900Seric 	while (isspace(*s))
437*2900Seric 		s++;
438*2900Seric 	return (*s == ':');
439*2900Seric }
440