xref: /netbsd-src/external/ibm-public/postfix/dist/src/cleanup/cleanup_message.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /*	$NetBSD: cleanup_message.c,v 1.2 2017/02/14 01:16:44 christos Exp $	*/
2 
3 /*++
4 /* NAME
5 /*	cleanup_message 3
6 /* SUMMARY
7 /*	process message segment
8 /* SYNOPSIS
9 /*	#include "cleanup.h"
10 /*
11 /*	void	cleanup_message(state, type, buf, len)
12 /*	CLEANUP_STATE *state;
13 /*	int	type;
14 /*	const char *buf;
15 /*	ssize_t	len;
16 /* DESCRIPTION
17 /*	This module processes message content records and copies the
18 /*	result to the queue file.  It validates the input, rewrites
19 /*	sender/recipient addresses to canonical form, inserts missing
20 /*	message headers, and extracts information from message headers
21 /*	to be used later when generating the extracted output segment.
22 /*	This routine absorbs but does not emit the content to extracted
23 /*	boundary record.
24 /*
25 /*	Arguments:
26 /* .IP state
27 /*	Queue file and message processing state. This state is updated
28 /*	as records are processed and as errors happen.
29 /* .IP type
30 /*	Record type.
31 /* .IP buf
32 /*	Record content.
33 /* .IP len
34 /*	Record content length.
35 /* LICENSE
36 /* .ad
37 /* .fi
38 /*	The Secure Mailer license must be distributed with this software.
39 /* AUTHOR(S)
40 /*	Wietse Venema
41 /*	IBM T.J. Watson Research
42 /*	P.O. Box 704
43 /*	Yorktown Heights, NY 10598, USA
44 /*--*/
45 
46 /* System library. */
47 
48 #include <sys_defs.h>
49 #include <ctype.h>
50 #include <string.h>
51 #include <time.h>
52 #include <unistd.h>
53 
54 #ifdef STRCASECMP_IN_STRINGS_H
55 #include <strings.h>
56 #endif
57 
58 /* Utility library. */
59 
60 #include <msg.h>
61 #include <vstring.h>
62 #include <vstream.h>
63 #include <argv.h>
64 #include <split_at.h>
65 #include <mymalloc.h>
66 #include <stringops.h>
67 #include <nvtable.h>
68 
69 /* Global library. */
70 
71 #include <record.h>
72 #include <rec_type.h>
73 #include <cleanup_user.h>
74 #include <tok822.h>
75 #include <header_opts.h>
76 #include <quote_822_local.h>
77 #include <mail_params.h>
78 #include <mail_date.h>
79 #include <mail_addr.h>
80 #include <is_header.h>
81 #include <ext_prop.h>
82 #include <mail_proto.h>
83 #include <mime_state.h>
84 #include <lex_822.h>
85 #include <dsn_util.h>
86 #include <conv_time.h>
87 
88 /* Application-specific. */
89 
90 #include "cleanup.h"
91 
92 /* cleanup_fold_header - wrap address list header */
93 
94 static void cleanup_fold_header(CLEANUP_STATE *state, VSTRING *header_buf)
95 {
96     char   *start_line = vstring_str(header_buf);
97     char   *end_line;
98     char   *next_line;
99     char   *line;
100 
101     /*
102      * A rewritten address list contains one address per line. The code below
103      * replaces newlines by spaces, to fit as many addresses on a line as
104      * possible (without rearranging the order of addresses). Prepending
105      * white space to the beginning of lines is delegated to the output
106      * routine.
107      */
108     for (line = start_line; line != 0; line = next_line) {
109 	end_line = line + strcspn(line, "\n");
110 	if (line > start_line) {
111 	    if (end_line - start_line < 70) {	/* TAB counts as one */
112 		line[-1] = ' ';
113 	    } else {
114 		start_line = line;
115 	    }
116 	}
117 	next_line = *end_line ? end_line + 1 : 0;
118     }
119     cleanup_out_header(state, header_buf);
120 }
121 
122 /* cleanup_extract_internal - save unquoted copy of extracted address */
123 
124 static char *cleanup_extract_internal(VSTRING *buffer, TOK822 *addr)
125 {
126 
127     /*
128      * A little routine to stash away a copy of an address that we extracted
129      * from a message header line.
130      */
131     tok822_internalize(buffer, addr->head, TOK822_STR_DEFL);
132     return (mystrdup(vstring_str(buffer)));
133 }
134 
135 /* cleanup_rewrite_sender - sender address rewriting */
136 
137 static void cleanup_rewrite_sender(CLEANUP_STATE *state,
138 				           const HEADER_OPTS *hdr_opts,
139 				           VSTRING *header_buf)
140 {
141     TOK822 *tree;
142     TOK822 **addr_list;
143     TOK822 **tpp;
144     int     did_rewrite = 0;
145 
146     if (msg_verbose)
147 	msg_info("rewrite_sender: %s", hdr_opts->name);
148 
149     /*
150      * Parse the header line, rewrite each address found, and regenerate the
151      * header line. Finally, pipe the result through the header line folding
152      * routine.
153      */
154     tree = tok822_parse_limit(vstring_str(header_buf)
155 			      + strlen(hdr_opts->name) + 1,
156 			      var_token_limit);
157     addr_list = tok822_grep(tree, TOK822_ADDR);
158     for (tpp = addr_list; *tpp; tpp++) {
159 	did_rewrite |= cleanup_rewrite_tree(state->hdr_rewrite_context, *tpp);
160 	if (state->flags & CLEANUP_FLAG_MAP_OK) {
161 	    if (cleanup_send_canon_maps
162 		&& (cleanup_send_canon_flags & CLEANUP_CANON_FLAG_HDR_FROM))
163 		did_rewrite |=
164 		    cleanup_map11_tree(state, *tpp, cleanup_send_canon_maps,
165 				cleanup_ext_prop_mask & EXT_PROP_CANONICAL);
166 	    if (cleanup_comm_canon_maps
167 		&& (cleanup_comm_canon_flags & CLEANUP_CANON_FLAG_HDR_FROM))
168 		did_rewrite |=
169 		    cleanup_map11_tree(state, *tpp, cleanup_comm_canon_maps,
170 				cleanup_ext_prop_mask & EXT_PROP_CANONICAL);
171 	    if (cleanup_masq_domains
172 		&& (cleanup_masq_flags & CLEANUP_MASQ_FLAG_HDR_FROM))
173 		did_rewrite |=
174 		    cleanup_masquerade_tree(state, *tpp, cleanup_masq_domains);
175 	}
176     }
177     if (did_rewrite) {
178 	vstring_truncate(header_buf, strlen(hdr_opts->name));
179 	vstring_strcat(header_buf, ": ");
180 	tok822_externalize(header_buf, tree, TOK822_STR_HEAD);
181     }
182     myfree((void *) addr_list);
183     tok822_free_tree(tree);
184     if ((hdr_opts->flags & HDR_OPT_DROP) == 0) {
185 	if (did_rewrite)
186 	    cleanup_fold_header(state, header_buf);
187 	else
188 	    cleanup_out_header(state, header_buf);
189     }
190 }
191 
192 /* cleanup_rewrite_recip - recipient address rewriting */
193 
194 static void cleanup_rewrite_recip(CLEANUP_STATE *state,
195 				          const HEADER_OPTS *hdr_opts,
196 				          VSTRING *header_buf)
197 {
198     TOK822 *tree;
199     TOK822 **addr_list;
200     TOK822 **tpp;
201     int     did_rewrite = 0;
202 
203     if (msg_verbose)
204 	msg_info("rewrite_recip: %s", hdr_opts->name);
205 
206     /*
207      * Parse the header line, rewrite each address found, and regenerate the
208      * header line. Finally, pipe the result through the header line folding
209      * routine.
210      */
211     tree = tok822_parse_limit(vstring_str(header_buf)
212 			      + strlen(hdr_opts->name) + 1,
213 			      var_token_limit);
214     addr_list = tok822_grep(tree, TOK822_ADDR);
215     for (tpp = addr_list; *tpp; tpp++) {
216 	did_rewrite |= cleanup_rewrite_tree(state->hdr_rewrite_context, *tpp);
217 	if (state->flags & CLEANUP_FLAG_MAP_OK) {
218 	    if (cleanup_rcpt_canon_maps
219 		&& (cleanup_rcpt_canon_flags & CLEANUP_CANON_FLAG_HDR_RCPT))
220 		did_rewrite |=
221 		    cleanup_map11_tree(state, *tpp, cleanup_rcpt_canon_maps,
222 				cleanup_ext_prop_mask & EXT_PROP_CANONICAL);
223 	    if (cleanup_comm_canon_maps
224 		&& (cleanup_comm_canon_flags & CLEANUP_CANON_FLAG_HDR_RCPT))
225 		did_rewrite |=
226 		    cleanup_map11_tree(state, *tpp, cleanup_comm_canon_maps,
227 				cleanup_ext_prop_mask & EXT_PROP_CANONICAL);
228 	    if (cleanup_masq_domains
229 		&& (cleanup_masq_flags & CLEANUP_MASQ_FLAG_HDR_RCPT))
230 		did_rewrite |=
231 		    cleanup_masquerade_tree(state, *tpp, cleanup_masq_domains);
232 	}
233     }
234     if (did_rewrite) {
235 	vstring_truncate(header_buf, strlen(hdr_opts->name));
236 	vstring_strcat(header_buf, ": ");
237 	tok822_externalize(header_buf, tree, TOK822_STR_HEAD);
238     }
239     myfree((void *) addr_list);
240     tok822_free_tree(tree);
241     if ((hdr_opts->flags & HDR_OPT_DROP) == 0) {
242 	if (did_rewrite)
243 	    cleanup_fold_header(state, header_buf);
244 	else
245 	    cleanup_out_header(state, header_buf);
246     }
247 }
248 
249 /* cleanup_act_log - log action with context */
250 
251 static void cleanup_act_log(CLEANUP_STATE *state,
252 			            const char *action, const char *class,
253 			            const char *content, const char *text)
254 {
255     const char *attr;
256 
257     if ((attr = nvtable_find(state->attr, MAIL_ATTR_LOG_ORIGIN)) == 0)
258 	attr = "unknown";
259     vstring_sprintf(state->temp1, "%s: %s: %s %.200s from %s;",
260 		    state->queue_id, action, class, content, attr);
261     if (state->sender)
262 	vstring_sprintf_append(state->temp1, " from=<%s>", state->sender);
263     if (state->recip)
264 	vstring_sprintf_append(state->temp1, " to=<%s>", state->recip);
265     if ((attr = nvtable_find(state->attr, MAIL_ATTR_LOG_PROTO_NAME)) != 0)
266 	vstring_sprintf_append(state->temp1, " proto=%s", attr);
267     if ((attr = nvtable_find(state->attr, MAIL_ATTR_LOG_HELO_NAME)) != 0)
268 	vstring_sprintf_append(state->temp1, " helo=<%s>", attr);
269     if (text && *text)
270 	vstring_sprintf_append(state->temp1, ": %s", text);
271     msg_info("%s", vstring_str(state->temp1));
272 }
273 
274 #define CLEANUP_ACT_CTXT_HEADER	"header"
275 #define CLEANUP_ACT_CTXT_BODY	"body"
276 #define CLEANUP_ACT_CTXT_ANY	"content"
277 
278 /* cleanup_act - act upon a header/body match */
279 
280 static const char *cleanup_act(CLEANUP_STATE *state, char *context,
281 			               const char *buf, const char *value,
282 			               const char *map_class)
283 {
284     const char *optional_text = value + strcspn(value, " \t");
285     int     command_len = optional_text - value;
286 
287 #ifdef DELAY_ACTION
288     int     defer_delay;
289 
290 #endif
291 
292     while (*optional_text && ISSPACE(*optional_text))
293 	optional_text++;
294 
295 #define STREQUAL(x,y,l) (strncasecmp((x), (y), (l)) == 0 && (y)[l] == 0)
296 #define CLEANUP_ACT_DROP 0
297 
298     /*
299      * CLEANUP_STAT_CONT and CLEANUP_STAT_DEFER both update the reason
300      * attribute, but CLEANUP_STAT_DEFER takes precedence. It terminates
301      * queue record processing, and prevents bounces from being sent.
302      */
303     if (STREQUAL(value, "REJECT", command_len)) {
304 	const CLEANUP_STAT_DETAIL *detail;
305 
306 	if (state->reason)
307 	    myfree(state->reason);
308 	if (*optional_text) {
309 	    state->reason = dsn_prepend("5.7.1", optional_text);
310 	    if (*state->reason != '4' && *state->reason != '5') {
311 		msg_warn("bad DSN action in %s -- need 4.x.x or 5.x.x",
312 			 optional_text);
313 		*state->reason = '4';
314 	    }
315 	} else {
316 	    detail = cleanup_stat_detail(CLEANUP_STAT_CONT);
317 	    state->reason = dsn_prepend(detail->dsn, detail->text);
318 	}
319 	if (*state->reason == '4')
320 	    state->errs |= CLEANUP_STAT_DEFER;
321 	else
322 	    state->errs |= CLEANUP_STAT_CONT;
323 	state->flags &= ~CLEANUP_FLAG_FILTER_ALL;
324 	cleanup_act_log(state, "reject", context, buf, state->reason);
325 	return (buf);
326     }
327     if (STREQUAL(value, "WARN", command_len)) {
328 	cleanup_act_log(state, "warning", context, buf, optional_text);
329 	return (buf);
330     }
331     if (STREQUAL(value, "INFO", command_len)) {
332 	cleanup_act_log(state, "info", context, buf, optional_text);
333 	return (buf);
334     }
335     if (STREQUAL(value, "FILTER", command_len)) {
336 	if (*optional_text == 0) {
337 	    msg_warn("missing FILTER command argument in %s map", map_class);
338 	} else if (strchr(optional_text, ':') == 0) {
339 	    msg_warn("bad FILTER command %s in %s -- "
340 		     "need transport:destination",
341 		     optional_text, map_class);
342 	} else {
343 	    if (state->filter)
344 		myfree(state->filter);
345 	    state->filter = mystrdup(optional_text);
346 	    cleanup_act_log(state, "filter", context, buf, optional_text);
347 	}
348 	return (buf);
349     }
350     if (STREQUAL(value, "DISCARD", command_len)) {
351 	cleanup_act_log(state, "discard", context, buf, optional_text);
352 	state->flags |= CLEANUP_FLAG_DISCARD;
353 	state->flags &= ~CLEANUP_FLAG_FILTER_ALL;
354 	return (buf);
355     }
356     if (STREQUAL(value, "HOLD", command_len)) {
357 	if ((state->flags & (CLEANUP_FLAG_HOLD | CLEANUP_FLAG_DISCARD)) == 0) {
358 	    cleanup_act_log(state, "hold", context, buf, optional_text);
359 	    state->flags |= CLEANUP_FLAG_HOLD;
360 	}
361 	return (buf);
362     }
363 
364     /*
365      * The DELAY feature is disabled because it has too many problems. 1) It
366      * does not work on some remote file systems; 2) mail will be delivered
367      * anyway with "sendmail -q" etc.; 3) while the mail is queued it bogs
368      * down the deferred queue scan with huge amounts of useless disk I/O
369      * operations.
370      */
371 #ifdef DELAY_ACTION
372     if (STREQUAL(value, "DELAY", command_len)) {
373 	if ((state->flags & (CLEANUP_FLAG_HOLD | CLEANUP_FLAG_DISCARD)) == 0) {
374 	    if (*optional_text == 0) {
375 		msg_warn("missing DELAY argument in %s map", map_class);
376 	    } else if (conv_time(optional_text, &defer_delay, 's') == 0) {
377 		msg_warn("ignoring bad DELAY argument %s in %s map",
378 			 optional_text, map_class);
379 	    } else {
380 		cleanup_act_log(state, "delay", context, buf, optional_text);
381 		state->defer_delay = defer_delay;
382 	    }
383 	}
384 	return (buf);
385     }
386 #endif
387     if (STREQUAL(value, "PREPEND", command_len)) {
388 	if (*optional_text == 0) {
389 	    msg_warn("PREPEND action without text in %s map", map_class);
390 	} else if (strcmp(context, CLEANUP_ACT_CTXT_HEADER) == 0) {
391 	    if (!is_header(optional_text)) {
392 		msg_warn("bad PREPEND header text \"%s\" in %s map -- "
393 			 "need \"headername: headervalue\"",
394 			 optional_text, map_class);
395 	    }
396 
397 	    /*
398 	     * By design, cleanup_out_header() may modify content. Play safe
399 	     * and prepare for future developments.
400 	     */
401 	    else {
402 		VSTRING *temp;
403 
404 		cleanup_act_log(state, "prepend", context, buf, optional_text);
405 		temp = vstring_strcpy(vstring_alloc(strlen(optional_text)),
406 						    optional_text);
407 		cleanup_out_header(state, temp);
408 		vstring_free(temp);
409 	    }
410 	} else {
411 	    cleanup_act_log(state, "prepend", context, buf, optional_text);
412 	    cleanup_out_string(state, REC_TYPE_NORM, optional_text);
413 	}
414 	return (buf);
415     }
416     if (STREQUAL(value, "REPLACE", command_len)) {
417 	if (*optional_text == 0) {
418 	    msg_warn("REPLACE action without text in %s map", map_class);
419 	    return (buf);
420 	} else if (strcmp(context, CLEANUP_ACT_CTXT_HEADER) == 0
421 		   && !is_header(optional_text)) {
422 	    msg_warn("bad REPLACE header text \"%s\" in %s map -- "
423 		     "need \"headername: headervalue\"",
424 		     optional_text, map_class);
425 	    return (buf);
426 	} else {
427 	    cleanup_act_log(state, "replace", context, buf, optional_text);
428 	    return (mystrdup(optional_text));
429 	}
430     }
431     if (STREQUAL(value, "REDIRECT", command_len)) {
432 	if (strchr(optional_text, '@') == 0) {
433 	    msg_warn("bad REDIRECT target \"%s\" in %s map -- "
434 		     "need user@domain",
435 		     optional_text, map_class);
436 	} else {
437 	    if (state->redirect)
438 		myfree(state->redirect);
439 	    state->redirect = mystrdup(optional_text);
440 	    cleanup_act_log(state, "redirect", context, buf, optional_text);
441 	    state->flags &= ~CLEANUP_FLAG_FILTER_ALL;
442 	}
443 	return (buf);
444     }
445     if (STREQUAL(value, "BCC", command_len)) {
446 	if (strchr(optional_text, '@') == 0) {
447 	    msg_warn("bad BCC address \"%s\" in %s map -- "
448 		     "need user@domain",
449 		     optional_text, map_class);
450 	} else {
451 	    if (state->hbc_rcpt == 0)
452 		state->hbc_rcpt = argv_alloc(1);
453 	    argv_add(state->hbc_rcpt, optional_text, (char *) 0);
454 	    cleanup_act_log(state, "bcc", context, buf, optional_text);
455 	}
456 	return (buf);
457     }
458     /* Allow and ignore optional text after the action. */
459 
460     if (STREQUAL(value, "IGNORE", command_len))
461 	return (CLEANUP_ACT_DROP);
462 
463     if (STREQUAL(value, "DUNNO", command_len))	/* preferred */
464 	return (buf);
465 
466     if (STREQUAL(value, "OK", command_len))	/* compat */
467 	return (buf);
468 
469     msg_warn("unknown command in %s map: %s", map_class, value);
470     return (buf);
471 }
472 
473 /* cleanup_header_callback - process one complete header line */
474 
475 static void cleanup_header_callback(void *context, int header_class,
476 				            const HEADER_OPTS *hdr_opts,
477 				            VSTRING *header_buf,
478 				            off_t unused_offset)
479 {
480     CLEANUP_STATE *state = (CLEANUP_STATE *) context;
481     const char *myname = "cleanup_header_callback";
482     char   *hdrval;
483     struct code_map {
484 	const char *name;
485 	const char *encoding;
486     };
487     static struct code_map code_map[] = {	/* RFC 2045 */
488 	"7bit", MAIL_ATTR_ENC_7BIT,
489 	"8bit", MAIL_ATTR_ENC_8BIT,
490 	"binary", MAIL_ATTR_ENC_8BIT,	/* XXX Violation */
491 	"quoted-printable", MAIL_ATTR_ENC_7BIT,
492 	"base64", MAIL_ATTR_ENC_7BIT,
493 	0,
494     };
495     struct code_map *cmp;
496     MAPS   *checks;
497     const char *map_class;
498 
499     if (msg_verbose)
500 	msg_info("%s: '%.200s'", myname, vstring_str(header_buf));
501 
502     /*
503      * Crude header filtering. This stops malware that isn't sophisticated
504      * enough to use fancy header encodings.
505      */
506 #define CHECK(class, maps, var_name) \
507 	(header_class == class && (map_class = var_name, checks = maps) != 0)
508 
509     if (hdr_opts && (hdr_opts->flags & HDR_OPT_MIME))
510 	header_class = MIME_HDR_MULTIPART;
511 
512     /* Update the Received: header count before maybe dropping headers below. */
513     if (hdr_opts && hdr_opts->type == HDR_RECEIVED)
514 	state->hop_count += 1;
515 
516     if ((state->flags & CLEANUP_FLAG_FILTER)
517 	&& (CHECK(MIME_HDR_PRIMARY, cleanup_header_checks, VAR_HEADER_CHECKS)
518     || CHECK(MIME_HDR_MULTIPART, cleanup_mimehdr_checks, VAR_MIMEHDR_CHECKS)
519     || CHECK(MIME_HDR_NESTED, cleanup_nesthdr_checks, VAR_NESTHDR_CHECKS))) {
520 	char   *header = vstring_str(header_buf);
521 	const char *value;
522 
523 	if ((value = maps_find(checks, header, 0)) != 0) {
524 	    const char *result;
525 
526 	    if ((result = cleanup_act(state, CLEANUP_ACT_CTXT_HEADER,
527 				      header, value, map_class))
528 		== CLEANUP_ACT_DROP) {
529 		return;
530 	    } else if (result != header) {
531 		vstring_strcpy(header_buf, result);
532 		hdr_opts = header_opts_find(result);
533 		myfree((void *) result);
534 	    }
535 	} else if (checks->error) {
536 	    msg_warn("%s: %s map lookup problem -- "
537 		     "message not accepted, try again later",
538 		     state->queue_id, checks->title);
539 	    state->errs |= CLEANUP_STAT_WRITE;
540 	}
541     }
542 
543     /*
544      * If this is an "unknown" header, just copy it to the output without
545      * even bothering to fold long lines. cleanup_out() will split long
546      * headers that do not fit a REC_TYPE_NORM record.
547      */
548     if (hdr_opts == 0) {
549 	cleanup_out_header(state, header_buf);
550 	return;
551     }
552 
553     /*
554      * Allow 8-bit type info to override 7-bit type info. XXX Should reuse
555      * the effort that went into MIME header parsing.
556      */
557     hdrval = vstring_str(header_buf) + strlen(hdr_opts->name) + 1;
558     while (ISSPACE(*hdrval))
559 	hdrval++;
560     /* trimblanks(hdrval, 0)[0] = 0; */
561     if (var_auto_8bit_enc_hdr
562 	&& hdr_opts->type == HDR_CONTENT_TRANSFER_ENCODING) {
563 	for (cmp = code_map; cmp->name != 0; cmp++) {
564 	    if (strcasecmp(hdrval, cmp->name) == 0) {
565 		if (strcasecmp(cmp->encoding, MAIL_ATTR_ENC_8BIT) == 0)
566 		    nvtable_update(state->attr, MAIL_ATTR_ENCODING,
567 				   cmp->encoding);
568 		break;
569 	    }
570 	}
571     }
572 
573     /*
574      * Copy attachment etc. header blocks without further inspection.
575      */
576     if (header_class != MIME_HDR_PRIMARY) {
577 	cleanup_out_header(state, header_buf);
578 	return;
579     }
580 
581     /*
582      * Known header. Remember that we have seen at least one. Find out what
583      * we should do with this header: delete, count, rewrite. Note that we
584      * should examine headers even when they will be deleted from the output,
585      * because the addresses in those headers might be needed elsewhere.
586      *
587      * XXX 2821: Return-path breakage.
588      *
589      * RFC 821 specifies: When the receiver-SMTP makes the "final delivery" of a
590      * message it inserts at the beginning of the mail data a return path
591      * line.  The return path line preserves the information in the
592      * <reverse-path> from the MAIL command.  Here, final delivery means the
593      * message leaves the SMTP world.  Normally, this would mean it has been
594      * delivered to the destination user, but in some cases it may be further
595      * processed and transmitted by another mail system.
596      *
597      * And that is what Postfix implements. Delivery agents prepend
598      * Return-Path:. In order to avoid cluttering up the message with
599      * possibly inconsistent Return-Path: information (the sender can change
600      * as the result of mail forwarding or mailing list delivery), Postfix
601      * removes any existing Return-Path: headers.
602      *
603      * RFC 2821 Section 4.4 specifies:    A message-originating SMTP system
604      * SHOULD NOT send a message that already contains a Return-path header.
605      * SMTP servers performing a relay function MUST NOT inspect the message
606      * data, and especially not to the extent needed to determine if
607      * Return-path headers are present. SMTP servers making final delivery
608      * MAY remove Return-path headers before adding their own.
609      */
610     else {
611 	state->headers_seen |= (1 << hdr_opts->type);
612 	if (hdr_opts->type == HDR_MESSAGE_ID)
613 	    msg_info("%s: message-id=%s", state->queue_id, hdrval);
614 	if (hdr_opts->type == HDR_RESENT_MESSAGE_ID)
615 	    msg_info("%s: resent-message-id=%s", state->queue_id, hdrval);
616 	if (hdr_opts->type == HDR_RECEIVED) {
617 	    if (state->hop_count >= var_hopcount_limit) {
618 		msg_warn("%s: message rejected: hopcount exceeded",
619 			 state->queue_id);
620 		state->errs |= CLEANUP_STAT_HOPS;
621 	    }
622 	    /* Save our Received: header after maybe updating headers above. */
623 	    if (state->hop_count == 1)
624 		argv_add(state->auto_hdrs, vstring_str(header_buf), ARGV_END);
625 	}
626 	if (CLEANUP_OUT_OK(state)) {
627 	    if (hdr_opts->flags & HDR_OPT_RR)
628 		state->resent = "Resent-";
629 	    if ((hdr_opts->flags & HDR_OPT_SENDER)
630 		&& state->hdr_rewrite_context) {
631 		cleanup_rewrite_sender(state, hdr_opts, header_buf);
632 	    } else if ((hdr_opts->flags & HDR_OPT_RECIP)
633 		       && state->hdr_rewrite_context) {
634 		cleanup_rewrite_recip(state, hdr_opts, header_buf);
635 	    } else if ((hdr_opts->flags & HDR_OPT_DROP) == 0) {
636 		cleanup_out_header(state, header_buf);
637 	    }
638 	}
639     }
640 }
641 
642 /* cleanup_header_done_callback - insert missing message headers */
643 
644 static void cleanup_header_done_callback(void *context)
645 {
646     const char *myname = "cleanup_header_done_callback";
647     CLEANUP_STATE *state = (CLEANUP_STATE *) context;
648     char    time_stamp[1024];		/* XXX locale dependent? */
649     struct tm *tp;
650     TOK822 *token;
651     time_t  tv;
652 
653     /*
654      * XXX Workaround: when we reach the end of headers, mime_state_update()
655      * may execute up to three call-backs before returning to the caller:
656      * head_out(), head_end(), and body_out() or body_end(). As long as
657      * call-backs don't return a result, each call-back has to check for
658      * itself if the previous call-back experienced a problem.
659      */
660     if (CLEANUP_OUT_OK(state) == 0)
661 	return;
662 
663     /*
664      * Future proofing: the Milter client's header suppression algorithm
665      * assumes that the MTA prepends its own Received: header. This
666      * assupmtion may be violated after some source-code update. The
667      * following check ensures consistency, at least for local submission.
668      */
669     if (state->hop_count < 1) {
670 	msg_warn("%s: message rejected: no Received: header",
671 		 state->queue_id);
672 	state->errs |= CLEANUP_STAT_BAD;
673 	return;
674     }
675 
676     /*
677      * Add a missing (Resent-)Message-Id: header. The message ID gives the
678      * time in GMT units, plus the local queue ID.
679      *
680      * XXX Message-Id is not a required message header (RFC 822 and RFC 2822).
681      *
682      * XXX It is the queue ID non-inode bits that prevent messages from getting
683      * the same Message-Id within the same second.
684      *
685      * XXX An arbitrary amount of time may pass between the start of the mail
686      * transaction and the creation of a queue file. Since we guarantee queue
687      * ID uniqueness only within a second, we must ensure that the time in
688      * the message ID matches the queue ID creation time, as long as we use
689      * the queue ID in the message ID.
690      *
691      * XXX We log a dummy name=value record so that we (hopefully) don't break
692      * compatibility with existing logfile analyzers, and so that we don't
693      * complicate future code that wants to log more name=value attributes.
694      */
695     if ((state->hdr_rewrite_context || var_always_add_hdrs)
696 	&& (state->headers_seen & (1 << (state->resent[0] ?
697 			   HDR_RESENT_MESSAGE_ID : HDR_MESSAGE_ID))) == 0) {
698 	if (var_long_queue_ids) {
699 	    vstring_sprintf(state->temp1, "%s@%s",
700 			    state->queue_id, var_myhostname);
701 	} else {
702 	    tv = state->handle->ctime.tv_sec;
703 	    tp = gmtime(&tv);
704 	    strftime(time_stamp, sizeof(time_stamp), "%Y%m%d%H%M%S", tp);
705 	    vstring_sprintf(state->temp1, "%s.%s@%s",
706 			    time_stamp, state->queue_id, var_myhostname);
707 	}
708 	cleanup_out_format(state, REC_TYPE_NORM, "%sMessage-Id: <%s>",
709 			   state->resent, vstring_str(state->temp1));
710 	msg_info("%s: %smessage-id=<%s>",
711 		 state->queue_id, *state->resent ? "resent-" : "",
712 		 vstring_str(state->temp1));
713 	state->headers_seen |= (1 << (state->resent[0] ?
714 				   HDR_RESENT_MESSAGE_ID : HDR_MESSAGE_ID));
715     }
716     if ((state->headers_seen & (1 << HDR_MESSAGE_ID)) == 0)
717 	msg_info("%s: message-id=<>", state->queue_id);
718 
719     /*
720      * Add a missing (Resent-)Date: header. The date is in local time units,
721      * with the GMT offset at the end.
722      */
723     if ((state->hdr_rewrite_context || var_always_add_hdrs)
724 	&& (state->headers_seen & (1 << (state->resent[0] ?
725 				       HDR_RESENT_DATE : HDR_DATE))) == 0) {
726 	cleanup_out_format(state, REC_TYPE_NORM, "%sDate: %s",
727 		      state->resent, mail_date(state->arrival_time.tv_sec));
728     }
729 
730     /*
731      * Add a missing (Resent-)From: header.
732      */
733     if ((state->hdr_rewrite_context || var_always_add_hdrs)
734 	&& (state->headers_seen & (1 << (state->resent[0] ?
735 				       HDR_RESENT_FROM : HDR_FROM))) == 0) {
736 	quote_822_local(state->temp1, *state->sender ?
737 			state->sender : MAIL_ADDR_MAIL_DAEMON);
738 	vstring_sprintf(state->temp2, "%sFrom: %s",
739 			state->resent, vstring_str(state->temp1));
740 	if (*state->sender && state->fullname && *state->fullname) {
741 	    vstring_sprintf(state->temp1, "(%s)", state->fullname);
742 	    token = tok822_parse(vstring_str(state->temp1));
743 	    vstring_strcat(state->temp2, " ");
744 	    tok822_externalize(state->temp2, token, TOK822_STR_NONE);
745 	    tok822_free_tree(token);
746 	}
747 	CLEANUP_OUT_BUF(state, REC_TYPE_NORM, state->temp2);
748     }
749 
750     /*
751      * XXX 2821: Appendix B: The return address in the MAIL command SHOULD,
752      * if possible, be derived from the system's identity for the submitting
753      * (local) user, and the "From:" header field otherwise. If there is a
754      * system identity available, it SHOULD also be copied to the Sender
755      * header field if it is different from the address in the From header
756      * field.  (Any Sender field that was already there SHOULD be removed.)
757      * Similar wording appears in RFC 2822 section 3.6.2.
758      *
759      * Postfix presently does not insert a Sender: header if envelope and From:
760      * address differ. Older Postfix versions assumed that the envelope
761      * sender address specifies the system identity and inserted Sender:
762      * whenever envelope and From: differed. This was wrong with relayed
763      * mail, and was often not even desirable with original submissions.
764      *
765      * XXX 2822 Section 3.6.2, as well as RFC 822 Section 4.1: FROM headers can
766      * contain multiple addresses. If this is the case, then a Sender: header
767      * must be provided with a single address.
768      *
769      * Postfix does not count the number of addresses in a From: header
770      * (although doing so is trivial, once the address is parsed).
771      */
772 
773     /*
774      * Add a missing destination header.
775      */
776 #define VISIBLE_RCPT	((1 << HDR_TO) | (1 << HDR_RESENT_TO) \
777 			| (1 << HDR_CC) | (1 << HDR_RESENT_CC))
778 
779     if ((state->hdr_rewrite_context || var_always_add_hdrs)
780 	&& (state->headers_seen & VISIBLE_RCPT) == 0 && *var_rcpt_witheld) {
781 	if (!is_header(var_rcpt_witheld)) {
782 	    msg_warn("bad %s header text \"%s\" -- "
783 		     "need \"headername: headervalue\"",
784 		     VAR_RCPT_WITHELD, var_rcpt_witheld);
785 	} else {
786 	    cleanup_out_format(state, REC_TYPE_NORM, "%s", var_rcpt_witheld);
787 	}
788     }
789 
790     /*
791      * Place a dummy PTR record right after the last header so that we can
792      * append headers without having to worry about clobbering the
793      * end-of-content marker.
794      */
795     if (state->milters || cleanup_milters) {
796 	if ((state->append_hdr_pt_offset = vstream_ftell(state->dst)) < 0)
797 	    msg_fatal("%s: vstream_ftell %s: %m", myname, cleanup_path);
798 	cleanup_out_format(state, REC_TYPE_PTR, REC_TYPE_PTR_FORMAT, 0L);
799 	if ((state->append_hdr_pt_target = vstream_ftell(state->dst)) < 0)
800 	    msg_fatal("%s: vstream_ftell %s: %m", myname, cleanup_path);
801 	state->body_offset = state->append_hdr_pt_target;
802     }
803 }
804 
805 /* cleanup_body_callback - output one body record */
806 
807 static void cleanup_body_callback(void *context, int type,
808 				          const char *buf, ssize_t len,
809 				          off_t offset)
810 {
811     CLEANUP_STATE *state = (CLEANUP_STATE *) context;
812 
813     /*
814      * XXX Workaround: when we reach the end of headers, mime_state_update()
815      * may execute up to three call-backs before returning to the caller:
816      * head_out(), head_end(), and body_out() or body_end(). As long as
817      * call-backs don't return a result, each call-back has to check for
818      * itself if the previous call-back experienced a problem.
819      */
820     if (CLEANUP_OUT_OK(state) == 0)
821 	return;
822 
823     /*
824      * Crude message body content filter for emergencies. This code has
825      * several problems: it sees one line at a time; it looks at long lines
826      * only in chunks of line_length_limit (2048) characters; it is easily
827      * bypassed with encodings and other tricks.
828      */
829     if ((state->flags & CLEANUP_FLAG_FILTER)
830 	&& cleanup_body_checks
831 	&& (var_body_check_len == 0 || offset < var_body_check_len)) {
832 	const char *value;
833 
834 	if ((value = maps_find(cleanup_body_checks, buf, 0)) != 0) {
835 	    const char *result;
836 
837 	    if ((result = cleanup_act(state, CLEANUP_ACT_CTXT_BODY,
838 				      buf, value, VAR_BODY_CHECKS))
839 		== CLEANUP_ACT_DROP) {
840 		return;
841 	    } else if (result != buf) {
842 		cleanup_out(state, type, result, strlen(result));
843 		myfree((void *) result);
844 		return;
845 	    }
846 	} else if (cleanup_body_checks->error) {
847 	    msg_warn("%s: %s map lookup problem -- "
848 		     "message not accepted, try again later",
849 		     state->queue_id, cleanup_body_checks->title);
850 	    state->errs |= CLEANUP_STAT_WRITE;
851 	}
852     }
853     cleanup_out(state, type, buf, len);
854 }
855 
856 /* cleanup_message_headerbody - process message content, header and body */
857 
858 static void cleanup_message_headerbody(CLEANUP_STATE *state, int type,
859 				               const char *buf, ssize_t len)
860 {
861     const char *myname = "cleanup_message_headerbody";
862     const MIME_STATE_DETAIL *detail;
863     const char *cp;
864     char   *dst;
865 
866     /*
867      * Reject unwanted characters.
868      *
869      * XXX Possible optimization: simplify the loop when the "reject" set
870      * contains only one character.
871      */
872     if ((state->flags & CLEANUP_FLAG_FILTER) && cleanup_reject_chars) {
873 	for (cp = buf; cp < buf + len; cp++) {
874 	    if (memchr(vstring_str(cleanup_reject_chars),
875 		       *(const unsigned char *) cp,
876 		       VSTRING_LEN(cleanup_reject_chars))) {
877 		cleanup_act(state, CLEANUP_ACT_CTXT_ANY,
878 			    buf, "REJECT disallowed character",
879 			    "character reject");
880 		return;
881 	    }
882 	}
883     }
884 
885     /*
886      * Strip unwanted characters. Don't overwrite the input.
887      *
888      * XXX Possible space+time optimization: use a bitset.
889      *
890      * XXX Possible optimization: simplify the loop when the "strip" set
891      * contains only one character.
892      *
893      * XXX Possible optimization: copy the input only if we really have to.
894      */
895     if ((state->flags & CLEANUP_FLAG_FILTER) && cleanup_strip_chars) {
896 	VSTRING_RESET(state->stripped_buf);
897 	VSTRING_SPACE(state->stripped_buf, len + 1);
898 	dst = vstring_str(state->stripped_buf);
899 	for (cp = buf; cp < buf + len; cp++)
900 	    if (!memchr(vstring_str(cleanup_strip_chars),
901 			*(const unsigned char *) cp,
902 			VSTRING_LEN(cleanup_strip_chars)))
903 		*dst++ = *cp;
904 	*dst = 0;
905 	buf = vstring_str(state->stripped_buf);
906 	len = dst - buf;
907     }
908 
909     /*
910      * Copy text record to the output.
911      */
912     if (type == REC_TYPE_NORM || type == REC_TYPE_CONT) {
913 	state->mime_errs = mime_state_update(state->mime_state, type, buf, len);
914     }
915 
916     /*
917      * If we have reached the end of the message content segment, record the
918      * current file position so we can compute the message size lateron.
919      */
920     else if (type == REC_TYPE_XTRA) {
921 	state->mime_errs = mime_state_update(state->mime_state, type, buf, len);
922 	if (state->milters || cleanup_milters)
923 	    /* Make room for body modification. */
924 	    cleanup_out_format(state, REC_TYPE_PTR, REC_TYPE_PTR_FORMAT, 0L);
925 	/* Ignore header truncation after primary message headers. */
926 	state->mime_errs &= ~MIME_ERR_TRUNC_HEADER;
927 	if (state->mime_errs && state->reason == 0) {
928 	    state->errs |= CLEANUP_STAT_CONT;
929 	    detail = mime_state_detail(state->mime_errs);
930 	    state->reason = dsn_prepend(detail->dsn, detail->text);
931 	}
932 	state->mime_state = mime_state_free(state->mime_state);
933 	if ((state->xtra_offset = vstream_ftell(state->dst)) < 0)
934 	    msg_fatal("%s: vstream_ftell %s: %m", myname, cleanup_path);
935 	state->cont_length = state->xtra_offset - state->data_offset;
936 	state->action = cleanup_extracted;
937     }
938 
939     /*
940      * This should never happen.
941      */
942     else {
943 	msg_warn("%s: message rejected: "
944 	      "unexpected record type %d in message content", myname, type);
945 	state->errs |= CLEANUP_STAT_BAD;
946     }
947 }
948 
949 /* cleanup_mime_error_callback - error report call-back routine */
950 
951 static void cleanup_mime_error_callback(void *context, int err_code,
952 				              const char *text, ssize_t len)
953 {
954     CLEANUP_STATE *state = (CLEANUP_STATE *) context;
955     const char *origin;
956 
957     /*
958      * Message header too large errors are handled after the end of the
959      * primary message headers.
960      */
961     if ((err_code & ~MIME_ERR_TRUNC_HEADER) != 0) {
962 	if ((origin = nvtable_find(state->attr, MAIL_ATTR_LOG_ORIGIN)) == 0)
963 	    origin = MAIL_ATTR_ORG_NONE;
964 #define TEXT_LEN (len < 100 ? (int) len : 100)
965 	msg_info("%s: reject: mime-error %s: %.*s from %s; from=<%s> to=<%s>",
966 		 state->queue_id, mime_state_error(err_code), TEXT_LEN, text,
967 	    origin, state->sender, state->recip ? state->recip : "unknown");
968     }
969 }
970 
971 /* cleanup_message - initialize message content segment */
972 
973 void    cleanup_message(CLEANUP_STATE *state, int type, const char *buf, ssize_t len)
974 {
975     const char *myname = "cleanup_message";
976     int     mime_options;
977 
978     /*
979      * Write the start-of-content segment marker.
980      */
981     cleanup_out_string(state, REC_TYPE_MESG, "");
982     if ((state->data_offset = vstream_ftell(state->dst)) < 0)
983 	msg_fatal("%s: vstream_ftell %s: %m", myname, cleanup_path);
984 
985     /*
986      * Set up MIME processing options, if any. MIME_OPT_DISABLE_MIME disables
987      * special processing of Content-Type: headers, and thus, causes all text
988      * after the primary headers to be treated as the message body.
989      */
990     mime_options = 0;
991     if (var_disable_mime_input) {
992 	mime_options |= MIME_OPT_DISABLE_MIME;
993     } else {
994 	/* Turn off content checks if bouncing or forwarding mail. */
995 	if (state->flags & CLEANUP_FLAG_FILTER) {
996 	    if (var_strict_8bitmime || var_strict_7bit_hdrs)
997 		mime_options |= MIME_OPT_REPORT_8BIT_IN_HEADER;
998 	    if (var_strict_8bitmime || var_strict_8bit_body)
999 		mime_options |= MIME_OPT_REPORT_8BIT_IN_7BIT_BODY;
1000 	    if (var_strict_encoding)
1001 		mime_options |= MIME_OPT_REPORT_ENCODING_DOMAIN;
1002 	    if (var_strict_8bitmime || var_strict_7bit_hdrs
1003 		|| var_strict_8bit_body || var_strict_encoding
1004 		|| *var_header_checks || *var_mimehdr_checks
1005 		|| *var_nesthdr_checks)
1006 		mime_options |= MIME_OPT_REPORT_NESTING;
1007 	}
1008     }
1009     state->mime_state = mime_state_alloc(mime_options,
1010 					 cleanup_header_callback,
1011 					 cleanup_header_done_callback,
1012 					 cleanup_body_callback,
1013 					 (MIME_STATE_ANY_END) 0,
1014 					 cleanup_mime_error_callback,
1015 					 (void *) state);
1016 
1017     /*
1018      * XXX Workaround: truncate a long message header so that we don't exceed
1019      * the default Sendmail libmilter request size limit of 65535.
1020      */
1021 #define KLUDGE_HEADER_LIMIT	60000
1022     if ((cleanup_milters || state->milters)
1023 	&& var_header_limit > KLUDGE_HEADER_LIMIT)
1024 	var_header_limit = KLUDGE_HEADER_LIMIT;
1025 
1026     /*
1027      * Pass control to the header processing routine.
1028      */
1029     state->action = cleanup_message_headerbody;
1030     cleanup_message_headerbody(state, type, buf, len);
1031 }
1032