xref: /openbsd-src/sys/netinet/ip_output.c (revision 3a3fbb3f2e2521ab7c4a56b7ff7462ebd9095ec5)
1 /*	$OpenBSD: ip_output.c,v 1.140 2001/11/26 16:50:26 jasoni Exp $	*/
2 /*	$NetBSD: ip_output.c,v 1.28 1996/02/13 23:43:07 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
37  */
38 
39 #include "pf.h"
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/mbuf.h>
44 #include <sys/protosw.h>
45 #include <sys/socket.h>
46 #include <sys/socketvar.h>
47 #include <sys/proc.h>
48 #include <sys/kernel.h>
49 
50 #include <net/if.h>
51 #include <net/if_enc.h>
52 #include <net/route.h>
53 
54 #include <netinet/in.h>
55 #include <netinet/in_systm.h>
56 #include <netinet/ip.h>
57 #include <netinet/in_pcb.h>
58 #include <netinet/in_var.h>
59 #include <netinet/ip_var.h>
60 #include <netinet/ip_icmp.h>
61 #include <netinet/tcp.h>
62 #include <netinet/udp.h>
63 #include <netinet/tcp_timer.h>
64 #include <netinet/tcp_var.h>
65 #include <netinet/udp_var.h>
66 
67 #if NPF > 0
68 #include <net/pfvar.h>
69 #endif
70 
71 #ifdef vax
72 #include <machine/mtpr.h>
73 #endif
74 
75 #ifdef IPSEC
76 #ifdef ENCDEBUG
77 #define DPRINTF(x)    do { if (encdebug) printf x ; } while (0)
78 #else
79 #define DPRINTF(x)
80 #endif
81 
82 extern u_int8_t get_sa_require  __P((struct inpcb *));
83 
84 extern int ipsec_auth_default_level;
85 extern int ipsec_esp_trans_default_level;
86 extern int ipsec_esp_network_default_level;
87 extern int ipsec_ipcomp_default_level;
88 #endif /* IPSEC */
89 
90 static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
91 static void ip_mloopback
92 	__P((struct ifnet *, struct mbuf *, struct sockaddr_in *));
93 
94 /*
95  * IP output.  The packet in mbuf chain m contains a skeletal IP
96  * header (with len, off, ttl, proto, tos, src, dst).
97  * The mbuf chain containing the packet will be freed.
98  * The mbuf opt, if present, will not be freed.
99  */
100 int
101 #if __STDC__
102 ip_output(struct mbuf *m0, ...)
103 #else
104 ip_output(m0, va_alist)
105 	struct mbuf *m0;
106 	va_dcl
107 #endif
108 {
109 	register struct ip *ip, *mhip;
110 	register struct ifnet *ifp;
111 	struct mbuf *m = m0;
112 	register int hlen = sizeof (struct ip);
113 	int len, off, error = 0;
114 	struct route iproute;
115 	struct sockaddr_in *dst;
116 	struct in_ifaddr *ia;
117 	struct mbuf *opt;
118 	struct route *ro;
119 	int flags;
120 	struct ip_moptions *imo;
121 	va_list ap;
122 	u_int8_t sproto = 0, donerouting = 0;
123 #ifdef IPSEC
124 	u_int32_t icmp_mtu = 0;
125 	union sockaddr_union sdst;
126 	u_int32_t sspi;
127 	struct m_tag *mtag;
128 	struct tdb_ident *tdbi;
129 
130 	struct inpcb *inp;
131 	struct tdb *tdb;
132 	int s;
133 #endif /* IPSEC */
134 
135 	va_start(ap, m0);
136 	opt = va_arg(ap, struct mbuf *);
137 	ro = va_arg(ap, struct route *);
138 	flags = va_arg(ap, int);
139 	imo = va_arg(ap, struct ip_moptions *);
140 #ifdef IPSEC
141 	inp = va_arg(ap, struct inpcb *);
142 	if (inp && (inp->inp_flags & INP_IPV6) != 0)
143 		panic("ip_output: IPv6 pcb is passed");
144 #endif /* IPSEC */
145 	va_end(ap);
146 
147 #ifdef	DIAGNOSTIC
148 	if ((m->m_flags & M_PKTHDR) == 0)
149 		panic("ip_output no HDR");
150 #endif
151 	if (opt) {
152 		m = ip_insertoptions(m, opt, &len);
153 		hlen = len;
154 	}
155 
156 	ip = mtod(m, struct ip *);
157 
158 	/*
159 	 * Fill in IP header.
160 	 */
161 	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
162 		ip->ip_v = IPVERSION;
163 		ip->ip_off &= IP_DF;
164 		ip->ip_id = htons(ip_randomid());
165 		ip->ip_hl = hlen >> 2;
166 		ipstat.ips_localout++;
167 	} else {
168 		hlen = ip->ip_hl << 2;
169 	}
170 
171 	/*
172 	 * If we're missing the IP source address, do a route lookup. We'll
173 	 * remember this result, in case we don't need to do any IPsec
174 	 * processing on the packet. We need the source address so we can
175 	 * do an SPD lookup in IPsec; for most packets, the source address
176 	 * is set at a higher level protocol. ICMPs and other packets
177 	 * though (e.g., traceroute) have a source address of zeroes.
178 	 */
179 	if (ip->ip_src.s_addr == INADDR_ANY) {
180 	        donerouting = 1;
181 
182 	        if (ro == 0) {
183 		        ro = &iproute;
184 			bzero((caddr_t)ro, sizeof (*ro));
185 		}
186 
187 		dst = satosin(&ro->ro_dst);
188 
189 		/*
190 		 * If there is a cached route, check that it is to the same
191 		 * destination and is still up.  If not, free it and try again.
192 		 */
193 		if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
194 				  dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
195 		        RTFREE(ro->ro_rt);
196 			ro->ro_rt = (struct rtentry *)0;
197 		}
198 
199 		if (ro->ro_rt == 0) {
200 		        dst->sin_family = AF_INET;
201 			dst->sin_len = sizeof(*dst);
202 			dst->sin_addr = ip->ip_dst;
203 		}
204 
205 		/*
206 		 * If routing to interface only, short-circuit routing lookup.
207 		 */
208 		if (flags & IP_ROUTETOIF) {
209 		        if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
210 			    (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
211 			    ipstat.ips_noroute++;
212 			    error = ENETUNREACH;
213 			    goto bad;
214 			}
215 
216 			ifp = ia->ia_ifp;
217 			ip->ip_ttl = 1;
218 		} else {
219 		        if (ro->ro_rt == 0)
220 			        rtalloc(ro);
221 
222 			if (ro->ro_rt == 0) {
223 			        ipstat.ips_noroute++;
224 				error = EHOSTUNREACH;
225 				goto bad;
226 			}
227 
228 			ia = ifatoia(ro->ro_rt->rt_ifa);
229 			ifp = ro->ro_rt->rt_ifp;
230 			ro->ro_rt->rt_use++;
231 
232 			if (ro->ro_rt->rt_flags & RTF_GATEWAY)
233 			        dst = satosin(ro->ro_rt->rt_gateway);
234 		}
235 
236 		/* Set the source IP address */
237                 if (!IN_MULTICAST(ip->ip_dst.s_addr))
238 		        ip->ip_src = ia->ia_addr.sin_addr;
239 	}
240 
241 #ifdef IPSEC
242 	/*
243 	 * splnet is chosen over spltdb because we are not allowed to
244 	 * lower the level, and udp_output calls us in splnet().
245 	 */
246 	s = splnet();
247 
248 	/* Do we have any pending SAs to apply ? */
249 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
250 	if (mtag != NULL) {
251 #ifdef DIAGNOSTIC
252 		if (mtag->m_tag_len != sizeof (struct tdb_ident))
253 			panic("ip_output: tag of length %d (should be %d",
254 			    mtag->m_tag_len, sizeof (struct tdb_ident));
255 #endif
256 		tdbi = (struct tdb_ident *)(mtag + 1);
257 		tdb = gettdb(tdbi->spi, &tdbi->dst, tdbi->proto);
258 		if (tdb == NULL)
259 			error = -EINVAL;
260 		m_tag_delete(m, mtag);
261 	}
262 	else
263 		tdb = ipsp_spd_lookup(m, AF_INET, hlen, &error,
264 		    IPSP_DIRECTION_OUT, NULL, inp);
265 
266 	if (tdb == NULL) {
267 	        splx(s);
268 
269 		if (error == 0) {
270 		        /*
271 			 * No IPsec processing required, we'll just send the
272 			 * packet out.
273 			 */
274 		        sproto = 0;
275 
276 			/* Fall through to routing/multicast handling */
277 		} else {
278 		        /*
279 			 * -EINVAL is used to indicate that the packet should
280 			 * be silently dropped, typically because we've asked
281 			 * key management for an SA.
282 			 */
283 		        if (error == -EINVAL) /* Should silently drop packet */
284 			  error = 0;
285 
286 			m_freem(m);
287 			goto done;
288 		}
289 	} else {
290 		/* Loop detection */
291 		for (mtag = m_tag_first(m); mtag != NULL;
292 		    mtag = m_tag_next(m, mtag)) {
293 			if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE &&
294 			    mtag->m_tag_id !=
295 			    PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED)
296 				continue;
297 			tdbi = (struct tdb_ident *)(mtag + 1);
298 			if (tdbi->spi == tdb->tdb_spi &&
299 			    tdbi->proto == tdb->tdb_sproto &&
300 			    !bcmp(&tdbi->dst, &tdb->tdb_dst,
301 			    sizeof(union sockaddr_union))) {
302 				splx(s);
303 				sproto = 0; /* mark as no-IPsec-needed */
304 				goto done_spd;
305 			}
306 		}
307 
308 	        /* We need to do IPsec */
309 	        bcopy(&tdb->tdb_dst, &sdst, sizeof(sdst));
310 		sspi = tdb->tdb_spi;
311 		sproto = tdb->tdb_sproto;
312 		splx(s);
313 
314 		/*
315 		 * If it needs TCP/UDP hardware-checksumming, do the
316 		 * computation now.
317 		 */
318 		if (m->m_pkthdr.csum & (M_TCPV4_CSUM_OUT | M_UDPV4_CSUM_OUT)) {
319 			in_delayed_cksum(m);
320 			m->m_pkthdr.csum &=
321 			    ~(M_UDPV4_CSUM_OUT | M_TCPV4_CSUM_OUT);
322 		}
323 
324 		/* If it's not a multicast packet, try to fast-path */
325 		if (!IN_MULTICAST(ip->ip_dst.s_addr)) {
326 			goto sendit;
327 		}
328 	}
329 
330 	/* Fall through to the routing/multicast handling code */
331  done_spd:
332 #endif /* IPSEC */
333 
334 	if (donerouting == 0) {
335 	        if (ro == 0) {
336 		        ro = &iproute;
337 			bzero((caddr_t)ro, sizeof (*ro));
338 		}
339 
340 		dst = satosin(&ro->ro_dst);
341 
342 		/*
343 		 * If there is a cached route, check that it is to the same
344 		 * destination and is still up.  If not, free it and try again.
345 		 */
346 		if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
347 				  dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
348 		        RTFREE(ro->ro_rt);
349 			ro->ro_rt = (struct rtentry *)0;
350 		}
351 
352 		if (ro->ro_rt == 0) {
353 		        dst->sin_family = AF_INET;
354 			dst->sin_len = sizeof(*dst);
355 			dst->sin_addr = ip->ip_dst;
356 		}
357 
358 		/*
359 		 * If routing to interface only, short-circuit routing lookup.
360 		 */
361 		if (flags & IP_ROUTETOIF) {
362 		        if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
363 			    (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
364 			    ipstat.ips_noroute++;
365 			    error = ENETUNREACH;
366 			    goto bad;
367 			}
368 
369 			ifp = ia->ia_ifp;
370 			ip->ip_ttl = 1;
371 		} else {
372 		        if (ro->ro_rt == 0)
373 			        rtalloc(ro);
374 
375 			if (ro->ro_rt == 0) {
376 			        ipstat.ips_noroute++;
377 				error = EHOSTUNREACH;
378 				goto bad;
379 			}
380 
381 			ia = ifatoia(ro->ro_rt->rt_ifa);
382 			ifp = ro->ro_rt->rt_ifp;
383 			ro->ro_rt->rt_use++;
384 
385 			if (ro->ro_rt->rt_flags & RTF_GATEWAY)
386 			        dst = satosin(ro->ro_rt->rt_gateway);
387 		}
388 
389 		/* Set the source IP address */
390 		if (ip->ip_src.s_addr == INADDR_ANY)
391 			ip->ip_src = ia->ia_addr.sin_addr;
392 	}
393 
394 	if (IN_MULTICAST(ip->ip_dst.s_addr) ||
395 	    (ip->ip_dst.s_addr == INADDR_BROADCAST)) {
396 		struct in_multi *inm;
397 
398 		m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ?
399 			M_BCAST : M_MCAST;
400 
401 		/*
402 		 * IP destination address is multicast.  Make sure "dst"
403 		 * still points to the address in "ro".  (It may have been
404 		 * changed to point to a gateway address, above.)
405 		 */
406 		dst = satosin(&ro->ro_dst);
407 
408 		/*
409 		 * See if the caller provided any multicast options
410 		 */
411 		if (imo != NULL) {
412 			ip->ip_ttl = imo->imo_multicast_ttl;
413 			if (imo->imo_multicast_ifp != NULL)
414 				ifp = imo->imo_multicast_ifp;
415 		} else
416 			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
417 
418 		/*
419 		 * Confirm that the outgoing interface supports multicast,
420 		 * but only if the packet actually is going out on that
421 		 * interface (i.e., no IPsec is applied).
422 		 */
423 		if ((((m->m_flags & M_MCAST) &&
424 		      (ifp->if_flags & IFF_MULTICAST) == 0) ||
425 		     ((m->m_flags & M_BCAST) &&
426 		      (ifp->if_flags & IFF_BROADCAST) == 0)) && (sproto == 0))  {
427 			ipstat.ips_noroute++;
428 			error = ENETUNREACH;
429 			goto bad;
430 		}
431 
432 		/*
433 		 * If source address not specified yet, use address
434 		 * of outgoing interface.
435 		 */
436 		if (ip->ip_src.s_addr == INADDR_ANY) {
437 			register struct in_ifaddr *ia;
438 
439 			for (ia = in_ifaddr.tqh_first;
440 			     ia;
441 			     ia = ia->ia_list.tqe_next)
442 				if (ia->ia_ifp == ifp) {
443 					ip->ip_src = ia->ia_addr.sin_addr;
444 					break;
445 				}
446 		}
447 
448 		IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
449 		if (inm != NULL &&
450 		   (imo == NULL || imo->imo_multicast_loop)) {
451 			/*
452 			 * If we belong to the destination multicast group
453 			 * on the outgoing interface, and the caller did not
454 			 * forbid loopback, loop back a copy.
455 			 * Can't defer TCP/UDP checksumming, do the
456 			 * computation now.
457 			 */
458 			if (m->m_pkthdr.csum &
459 			    (M_TCPV4_CSUM_OUT | M_UDPV4_CSUM_OUT)) {
460 				in_delayed_cksum(m);
461 				m->m_pkthdr.csum &=
462 				    ~(M_UDPV4_CSUM_OUT | M_TCPV4_CSUM_OUT);
463 			}
464 			ip_mloopback(ifp, m, dst);
465 		}
466 #ifdef MROUTING
467 		else {
468 			/*
469 			 * If we are acting as a multicast router, perform
470 			 * multicast forwarding as if the packet had just
471 			 * arrived on the interface to which we are about
472 			 * to send.  The multicast forwarding function
473 			 * recursively calls this function, using the
474 			 * IP_FORWARDING flag to prevent infinite recursion.
475 			 *
476 			 * Multicasts that are looped back by ip_mloopback(),
477 			 * above, will be forwarded by the ip_input() routine,
478 			 * if necessary.
479 			 */
480 			extern struct socket *ip_mrouter;
481 
482 			if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
483 				if (ip_mforward(m, ifp) != 0) {
484 					m_freem(m);
485 					goto done;
486 				}
487 			}
488 		}
489 #endif
490 		/*
491 		 * Multicasts with a time-to-live of zero may be looped-
492 		 * back, above, but must not be transmitted on a network.
493 		 * Also, multicasts addressed to the loopback interface
494 		 * are not sent -- the above call to ip_mloopback() will
495 		 * loop back a copy if this host actually belongs to the
496 		 * destination group on the loopback interface.
497 		 */
498 		if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) {
499 			m_freem(m);
500 			goto done;
501 		}
502 
503 		goto sendit;
504 	}
505 
506 	/*
507 	 * Look for broadcast address and and verify user is allowed to send
508 	 * such a packet; if the packet is going in an IPsec tunnel, skip
509 	 * this check.
510 	 */
511 	if ((sproto == 0) && (in_broadcast(dst->sin_addr, ifp))) {
512 		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
513 			error = EADDRNOTAVAIL;
514 			goto bad;
515 		}
516 		if ((flags & IP_ALLOWBROADCAST) == 0) {
517 			error = EACCES;
518 			goto bad;
519 		}
520 
521 		/* Don't allow broadcast messages to be fragmented */
522 		if ((u_int16_t)ip->ip_len > ifp->if_mtu) {
523 			error = EMSGSIZE;
524 			goto bad;
525 		}
526 		m->m_flags |= M_BCAST;
527 	} else
528 		m->m_flags &= ~M_BCAST;
529 
530 sendit:
531         /*
532          * If we're doing Path MTU discovery, we need to set DF unless
533          * the route's MTU is locked.
534 	 */
535 	if ((flags & IP_MTUDISC) && ro && ro->ro_rt &&
536 	    (ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
537 		ip->ip_off |= IP_DF;
538 
539 #ifdef IPSEC
540 	/*
541 	 * Check if the packet needs encapsulation.
542 	 */
543 	if (sproto != 0) {
544 	        s = splnet();
545 
546 		/*
547 		 * Packet filter
548 		 */
549 #if NPF > 0
550 
551 		if (pf_test(PF_OUT, &encif[0].sc_if, &m) != PF_PASS) {
552 			error = EHOSTUNREACH;
553 			splx(s);
554 			m_freem(m);
555 			goto done;
556 		}
557 		if (m == NULL) {
558 			splx(s);
559 			goto done;
560 		}
561 		ip = mtod(m, struct ip *);
562 		hlen = ip->ip_hl << 2;
563 #endif
564 
565 		tdb = gettdb(sspi, &sdst, sproto);
566 		if (tdb == NULL) {
567 			error = EHOSTUNREACH;
568 			splx(s);
569 			m_freem(m);
570 			goto done;
571 		}
572 
573 		/* Latch to PCB */
574 		if (inp)
575 		        tdb_add_inp(tdb, inp, 0);
576 
577 		/* Check if we are allowed to fragment */
578 		if ((ip->ip_off & IP_DF) && tdb->tdb_mtu &&
579 		    (u_int16_t)ip->ip_len > tdb->tdb_mtu &&
580 		    tdb->tdb_mtutimeout > time.tv_sec) {
581 			struct rtentry *rt = NULL;
582 
583 			icmp_mtu = tdb->tdb_mtu;
584 			splx(s);
585 
586 			/* Find a host route to store the mtu in */
587 			if (ro != NULL)
588 				rt = ro->ro_rt;
589 			if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0) {
590 				struct sockaddr_in dst = {
591 					sizeof(struct sockaddr_in), AF_INET};
592 				dst.sin_addr = ip->ip_dst;
593 				rt = icmp_mtudisc_clone((struct sockaddr *)&dst);
594 			}
595 			if (rt != NULL) {
596 				rt->rt_rmx.rmx_mtu = icmp_mtu;
597 				if (ro && ro->ro_rt != NULL) {
598 					RTFREE(ro->ro_rt);
599 					ro->ro_rt = (struct rtentry *) 0;
600 					rtalloc(ro);
601 				}
602 			}
603 			error = EMSGSIZE;
604 			goto bad;
605 		}
606 
607 		/* Massage the IP header for use by the IPsec code */
608 		ip->ip_len = htons((u_short) ip->ip_len);
609 		ip->ip_off = htons((u_short) ip->ip_off);
610 
611 		/*
612 		 * Clear these -- they'll be set in the recursive invocation
613 		 * as needed.
614 		 */
615 		m->m_flags &= ~(M_MCAST | M_BCAST);
616 
617 		/* Callee frees mbuf */
618 		error = ipsp_process_packet(m, tdb, AF_INET, 0);
619 		splx(s);
620 		return error;  /* Nothing more to be done */
621 	}
622 
623 	/*
624 	 * If deferred crypto processing is needed, check that the
625 	 * interface supports it.
626 	 */
627 	if ((mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL))
628 	    != NULL && (ifp->if_capabilities & IFCAP_IPSEC) == 0) {
629 		/* Notify IPsec to do its own crypto. */
630 		ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
631 		m_freem(m);
632 		error = EHOSTUNREACH;
633 		goto done;
634 	}
635 #endif /* IPSEC */
636 
637 	/* Catch routing changes wrt. hardware checksumming for TCP or UDP. */
638 	if (m->m_pkthdr.csum & M_TCPV4_CSUM_OUT) {
639 		if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) ||
640 		    ifp->if_bridge != NULL) {
641 			in_delayed_cksum(m);
642 			m->m_pkthdr.csum &= ~M_TCPV4_CSUM_OUT; /* Clear */
643 		}
644 	} else if (m->m_pkthdr.csum & M_UDPV4_CSUM_OUT) {
645 		if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) ||
646 		    ifp->if_bridge != NULL) {
647 			in_delayed_cksum(m);
648 			m->m_pkthdr.csum &= ~M_UDPV4_CSUM_OUT; /* Clear */
649 		}
650 	}
651 
652 	/*
653 	 * Packet filter
654 	 */
655 #if NPF > 0
656 	if (pf_test(PF_OUT, ifp, &m) != PF_PASS) {
657 		error = EHOSTUNREACH;
658 		m_freem(m);
659 		goto done;
660 	}
661 	if (m == NULL)
662 		goto done;
663 
664 	ip = mtod(m, struct ip *);
665 	hlen = ip->ip_hl << 2;
666 #endif
667 
668 	/*
669 	 * If small enough for interface, can just send directly.
670 	 */
671 	if ((u_int16_t)ip->ip_len <= ifp->if_mtu) {
672 		ip->ip_len = htons((u_int16_t)ip->ip_len);
673 		ip->ip_off = htons((u_int16_t)ip->ip_off);
674 		if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
675 		    ifp->if_bridge == NULL) {
676 			m->m_pkthdr.csum |= M_IPV4_CSUM_OUT;
677 			ipstat.ips_outhwcsum++;
678 		} else {
679 			ip->ip_sum = 0;
680 			ip->ip_sum = in_cksum(m, hlen);
681 		}
682 		/* Update relevant hardware checksum stats for TCP/UDP */
683 		if (m->m_pkthdr.csum & M_TCPV4_CSUM_OUT)
684 			tcpstat.tcps_outhwcsum++;
685 		else if (m->m_pkthdr.csum & M_UDPV4_CSUM_OUT)
686 			udpstat.udps_outhwcsum++;
687 		error = (*ifp->if_output)(ifp, m, sintosa(dst), ro->ro_rt);
688 		goto done;
689 	}
690 
691 	/*
692 	 * Too large for interface; fragment if possible.
693 	 * Must be able to put at least 8 bytes per fragment.
694 	 */
695 	if (ip->ip_off & IP_DF) {
696 #ifdef IPSEC
697 		icmp_mtu = ifp->if_mtu;
698 #endif
699 		error = EMSGSIZE;
700 		/*
701 		 * This case can happen if the user changed the MTU
702 		 * of an interface after enabling IP on it.  Because
703 		 * most netifs don't keep track of routes pointing to
704 		 * them, there is no way for one to update all its
705 		 * routes when the MTU is changed.
706 		 */
707 		if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST)) &&
708 		    !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) &&
709 		    (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
710 			ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
711 		}
712 		ipstat.ips_cantfrag++;
713 		goto bad;
714 	}
715 	len = (ifp->if_mtu - hlen) &~ 7;
716 	if (len < 8) {
717 		error = EMSGSIZE;
718 		goto bad;
719 	}
720 
721 	/*
722 	 * If we are doing fragmentation, we can't defer TCP/UDP
723 	 * checksumming; compute the checksum and clear the flag.
724 	 */
725 	if (m->m_pkthdr.csum & (M_TCPV4_CSUM_OUT | M_UDPV4_CSUM_OUT)) {
726 		in_delayed_cksum(m);
727 		m->m_pkthdr.csum &= ~(M_UDPV4_CSUM_OUT | M_TCPV4_CSUM_OUT);
728 	}
729 
730     {
731 	int mhlen, firstlen = len;
732 	struct mbuf **mnext = &m->m_nextpkt;
733 
734 	/*
735 	 * Loop through length of segment after first fragment,
736 	 * make new header and copy data of each part and link onto chain.
737 	 */
738 	m0 = m;
739 	mhlen = sizeof (struct ip);
740 	for (off = hlen + len; off < (u_int16_t)ip->ip_len; off += len) {
741 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
742 		if (m == 0) {
743 			error = ENOBUFS;
744 			ipstat.ips_odropped++;
745 			goto sendorfree;
746 		}
747 		*mnext = m;
748 		mnext = &m->m_nextpkt;
749 		m->m_data += max_linkhdr;
750 		mhip = mtod(m, struct ip *);
751 		*mhip = *ip;
752 		/* we must inherit MCAST and BCAST flags */
753 		m->m_flags |= m0->m_flags & (M_MCAST|M_BCAST);
754 		if (hlen > sizeof (struct ip)) {
755 			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
756 			mhip->ip_hl = mhlen >> 2;
757 		}
758 		m->m_len = mhlen;
759 		mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
760 		if (ip->ip_off & IP_MF)
761 			mhip->ip_off |= IP_MF;
762 		if (off + len >= (u_int16_t)ip->ip_len)
763 			len = (u_int16_t)ip->ip_len - off;
764 		else
765 			mhip->ip_off |= IP_MF;
766 		mhip->ip_len = htons((u_int16_t)(len + mhlen));
767 		m->m_next = m_copy(m0, off, len);
768 		if (m->m_next == 0) {
769 			error = ENOBUFS;	/* ??? */
770 			ipstat.ips_odropped++;
771 			goto sendorfree;
772 		}
773 		m->m_pkthdr.len = mhlen + len;
774 		m->m_pkthdr.rcvif = (struct ifnet *)0;
775 		mhip->ip_off = htons((u_int16_t)mhip->ip_off);
776 		if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
777 		    ifp->if_bridge == NULL) {
778 			m->m_pkthdr.csum |= M_IPV4_CSUM_OUT;
779 			ipstat.ips_outhwcsum++;
780 		} else {
781 			mhip->ip_sum = 0;
782 			mhip->ip_sum = in_cksum(m, mhlen);
783 		}
784 		ipstat.ips_ofragments++;
785 	}
786 	/*
787 	 * Update first fragment by trimming what's been copied out
788 	 * and updating header, then send each fragment (in order).
789 	 */
790 	m = m0;
791 	m_adj(m, hlen + firstlen - (u_int16_t)ip->ip_len);
792 	m->m_pkthdr.len = hlen + firstlen;
793 	ip->ip_len = htons((u_int16_t)m->m_pkthdr.len);
794 	ip->ip_off = htons((u_int16_t)(ip->ip_off | IP_MF));
795 	if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
796 	    ifp->if_bridge == NULL) {
797 		m->m_pkthdr.csum |= M_IPV4_CSUM_OUT;
798 		ipstat.ips_outhwcsum++;
799 	} else {
800 		ip->ip_sum = 0;
801 		ip->ip_sum = in_cksum(m, hlen);
802 	}
803 sendorfree:
804 	for (m = m0; m; m = m0) {
805 		m0 = m->m_nextpkt;
806 		m->m_nextpkt = 0;
807 		if (error == 0)
808 			error = (*ifp->if_output)(ifp, m, sintosa(dst),
809 			    ro->ro_rt);
810 		else
811 			m_freem(m);
812 	}
813 
814 	if (error == 0)
815 		ipstat.ips_fragmented++;
816     }
817 done:
818 	if (ro == &iproute && (flags & IP_ROUTETOIF) == 0 && ro->ro_rt)
819 		RTFREE(ro->ro_rt);
820 	return (error);
821 bad:
822 #ifdef IPSEC
823 	if (error == EMSGSIZE && icmp_mtu != 0)
824 		ipsec_adjust_mtu(m, icmp_mtu);
825 #endif
826 	m_freem(m0);
827 	goto done;
828 }
829 
830 /*
831  * Insert IP options into preformed packet.
832  * Adjust IP destination as required for IP source routing,
833  * as indicated by a non-zero in_addr at the start of the options.
834  */
835 static struct mbuf *
836 ip_insertoptions(m, opt, phlen)
837 	register struct mbuf *m;
838 	struct mbuf *opt;
839 	int *phlen;
840 {
841 	register struct ipoption *p = mtod(opt, struct ipoption *);
842 	struct mbuf *n;
843 	register struct ip *ip = mtod(m, struct ip *);
844 	unsigned optlen;
845 
846 	optlen = opt->m_len - sizeof(p->ipopt_dst);
847 	if (optlen + (u_int16_t)ip->ip_len > IP_MAXPACKET)
848 		return (m);		/* XXX should fail */
849 	if (p->ipopt_dst.s_addr)
850 		ip->ip_dst = p->ipopt_dst;
851 	if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
852 		MGETHDR(n, M_DONTWAIT, MT_HEADER);
853 		if (n == 0)
854 			return (m);
855 		M_MOVE_HDR(n, m);
856 		n->m_pkthdr.len += optlen;
857 		m->m_len -= sizeof(struct ip);
858 		m->m_data += sizeof(struct ip);
859 		n->m_next = m;
860 		m = n;
861 		m->m_len = optlen + sizeof(struct ip);
862 		m->m_data += max_linkhdr;
863 		bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
864 	} else {
865 		m->m_data -= optlen;
866 		m->m_len += optlen;
867 		m->m_pkthdr.len += optlen;
868 		ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
869 	}
870 	ip = mtod(m, struct ip *);
871 	bcopy((caddr_t)p->ipopt_list, (caddr_t)(ip + 1), (unsigned)optlen);
872 	*phlen = sizeof(struct ip) + optlen;
873 	ip->ip_len += optlen;
874 	return (m);
875 }
876 
877 /*
878  * Copy options from ip to jp,
879  * omitting those not copied during fragmentation.
880  */
881 int
882 ip_optcopy(ip, jp)
883 	struct ip *ip, *jp;
884 {
885 	register u_char *cp, *dp;
886 	int opt, optlen, cnt;
887 
888 	cp = (u_char *)(ip + 1);
889 	dp = (u_char *)(jp + 1);
890 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
891 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
892 		opt = cp[0];
893 		if (opt == IPOPT_EOL)
894 			break;
895 		if (opt == IPOPT_NOP) {
896 			/* Preserve for IP mcast tunnel's LSRR alignment. */
897 			*dp++ = IPOPT_NOP;
898 			optlen = 1;
899 			continue;
900 		}
901 #ifdef DIAGNOSTIC
902 		if (cnt < IPOPT_OLEN + sizeof(*cp))
903 			panic("malformed IPv4 option passed to ip_optcopy");
904 #endif
905 		optlen = cp[IPOPT_OLEN];
906 #ifdef DIAGNOSTIC
907 		if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
908 			panic("malformed IPv4 option passed to ip_optcopy");
909 #endif
910 		/* bogus lengths should have been caught by ip_dooptions */
911 		if (optlen > cnt)
912 			optlen = cnt;
913 		if (IPOPT_COPIED(opt)) {
914 			bcopy((caddr_t)cp, (caddr_t)dp, (unsigned)optlen);
915 			dp += optlen;
916 		}
917 	}
918 	for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
919 		*dp++ = IPOPT_EOL;
920 	return (optlen);
921 }
922 
923 /*
924  * IP socket option processing.
925  */
926 int
927 ip_ctloutput(op, so, level, optname, mp)
928 	int op;
929 	struct socket *so;
930 	int level, optname;
931 	struct mbuf **mp;
932 {
933 	register struct inpcb *inp = sotoinpcb(so);
934 	register struct mbuf *m = *mp;
935 	register int optval = 0;
936 #ifdef IPSEC
937 	struct proc *p = curproc; /* XXX */
938 	struct ipsec_ref *ipr;
939 	u_int16_t opt16val;
940 #endif
941 	int error = 0;
942 
943 	if (level != IPPROTO_IP) {
944 		error = EINVAL;
945 		if (op == PRCO_SETOPT && *mp)
946 			(void) m_free(*mp);
947 	} else switch (op) {
948 	case PRCO_SETOPT:
949 		switch (optname) {
950 		case IP_OPTIONS:
951 #ifdef notyet
952 		case IP_RETOPTS:
953 			return (ip_pcbopts(optname, &inp->inp_options, m));
954 #else
955 			return (ip_pcbopts(&inp->inp_options, m));
956 #endif
957 
958 		case IP_TOS:
959 		case IP_TTL:
960 		case IP_RECVOPTS:
961 		case IP_RECVRETOPTS:
962 		case IP_RECVDSTADDR:
963 			if (m == NULL || m->m_len != sizeof(int))
964 				error = EINVAL;
965 			else {
966 				optval = *mtod(m, int *);
967 				switch (optname) {
968 
969 				case IP_TOS:
970 					inp->inp_ip.ip_tos = optval;
971 					break;
972 
973 				case IP_TTL:
974 					inp->inp_ip.ip_ttl = optval;
975 					break;
976 #define	OPTSET(bit) \
977 	if (optval) \
978 		inp->inp_flags |= bit; \
979 	else \
980 		inp->inp_flags &= ~bit;
981 
982 				case IP_RECVOPTS:
983 					OPTSET(INP_RECVOPTS);
984 					break;
985 
986 				case IP_RECVRETOPTS:
987 					OPTSET(INP_RECVRETOPTS);
988 					break;
989 
990 				case IP_RECVDSTADDR:
991 					OPTSET(INP_RECVDSTADDR);
992 					break;
993 				}
994 			}
995 			break;
996 #undef OPTSET
997 
998 		case IP_MULTICAST_IF:
999 		case IP_MULTICAST_TTL:
1000 		case IP_MULTICAST_LOOP:
1001 		case IP_ADD_MEMBERSHIP:
1002 		case IP_DROP_MEMBERSHIP:
1003 			error = ip_setmoptions(optname, &inp->inp_moptions, m);
1004 			break;
1005 
1006 		case IP_PORTRANGE:
1007 			if (m == 0 || m->m_len != sizeof(int))
1008 				error = EINVAL;
1009 			else {
1010 				optval = *mtod(m, int *);
1011 
1012 				switch (optval) {
1013 
1014 				case IP_PORTRANGE_DEFAULT:
1015 					inp->inp_flags &= ~(INP_LOWPORT);
1016 					inp->inp_flags &= ~(INP_HIGHPORT);
1017 					break;
1018 
1019 				case IP_PORTRANGE_HIGH:
1020 					inp->inp_flags &= ~(INP_LOWPORT);
1021 					inp->inp_flags |= INP_HIGHPORT;
1022 					break;
1023 
1024 				case IP_PORTRANGE_LOW:
1025 					inp->inp_flags &= ~(INP_HIGHPORT);
1026 					inp->inp_flags |= INP_LOWPORT;
1027 					break;
1028 
1029 				default:
1030 
1031 					error = EINVAL;
1032 					break;
1033 				}
1034 			}
1035 			break;
1036 		case IP_AUTH_LEVEL:
1037 		case IP_ESP_TRANS_LEVEL:
1038 		case IP_ESP_NETWORK_LEVEL:
1039 		case IP_IPCOMP_LEVEL:
1040 #ifndef IPSEC
1041 			error = EOPNOTSUPP;
1042 #else
1043 			if (m == 0 || m->m_len != sizeof(int)) {
1044 				error = EINVAL;
1045 				break;
1046 			}
1047 			optval = *mtod(m, int *);
1048 
1049 			if (optval < IPSEC_LEVEL_BYPASS ||
1050 			    optval > IPSEC_LEVEL_UNIQUE) {
1051 				error = EINVAL;
1052 				break;
1053 			}
1054 
1055 			/* Unlink cached output TDB to force a re-search */
1056 			if (inp->inp_tdb_out) {
1057 				int s = spltdb();
1058 				TAILQ_REMOVE(&inp->inp_tdb_out->tdb_inp_out,
1059 				    inp, inp_tdb_out_next);
1060 				splx(s);
1061 			}
1062 
1063 			if (inp->inp_tdb_in) {
1064 				int s = spltdb();
1065 				TAILQ_REMOVE(&inp->inp_tdb_in->tdb_inp_in,
1066 				    inp, inp_tdb_in_next);
1067 				splx(s);
1068 			}
1069 
1070 			switch (optname) {
1071 			case IP_AUTH_LEVEL:
1072 			        if (optval < ipsec_auth_default_level &&
1073 				    suser(p->p_ucred, &p->p_acflag)) {
1074 					error = EACCES;
1075 					break;
1076 				}
1077 				inp->inp_seclevel[SL_AUTH] = optval;
1078 				break;
1079 
1080 			case IP_ESP_TRANS_LEVEL:
1081 			        if (optval < ipsec_esp_trans_default_level &&
1082 				    suser(p->p_ucred, &p->p_acflag)) {
1083 					error = EACCES;
1084 					break;
1085 				}
1086 				inp->inp_seclevel[SL_ESP_TRANS] = optval;
1087 				break;
1088 
1089 			case IP_ESP_NETWORK_LEVEL:
1090 			        if (optval < ipsec_esp_network_default_level &&
1091 				    suser(p->p_ucred, &p->p_acflag)) {
1092 					error = EACCES;
1093 					break;
1094 				}
1095 				inp->inp_seclevel[SL_ESP_NETWORK] = optval;
1096 				break;
1097 			case IP_IPCOMP_LEVEL:
1098 			        if (optval < ipsec_ipcomp_default_level &&
1099 				    suser(p->p_ucred, &p->p_acflag)) {
1100 				        error = EACCES;
1101 					break;
1102 				}
1103 				inp->inp_seclevel[SL_IPCOMP] = optval;
1104 				break;
1105 			}
1106 			if (!error)
1107 				inp->inp_secrequire = get_sa_require(inp);
1108 #endif
1109 			break;
1110 
1111 		case IP_IPSEC_REMOTE_CRED:
1112 		case IP_IPSEC_REMOTE_AUTH:
1113 			/* Can't set the remote credential or key */
1114 			error = EOPNOTSUPP;
1115 			break;
1116 
1117 		case IP_IPSEC_LOCAL_ID:
1118 		case IP_IPSEC_REMOTE_ID:
1119 		case IP_IPSEC_LOCAL_CRED:
1120 		case IP_IPSEC_LOCAL_AUTH:
1121 #ifndef IPSEC
1122 			error = EOPNOTSUPP;
1123 #else
1124 			if (m->m_len < 2) {
1125 				error = EINVAL;
1126 				break;
1127 			}
1128 
1129 			m_copydata(m, 0, 2, (caddr_t) &opt16val);
1130 
1131 			/* If the type is 0, then we cleanup and return */
1132 			if (opt16val == 0) {
1133 				switch (optname) {
1134 				case IP_IPSEC_LOCAL_ID:
1135 					if (inp->inp_ipsec_localid != NULL)
1136 						ipsp_reffree(inp->inp_ipsec_localid);
1137 					inp->inp_ipsec_localid = NULL;
1138 					break;
1139 
1140 				case IP_IPSEC_REMOTE_ID:
1141 					if (inp->inp_ipsec_remoteid != NULL)
1142 						ipsp_reffree(inp->inp_ipsec_remoteid);
1143 					inp->inp_ipsec_remoteid = NULL;
1144 					break;
1145 
1146 				case IP_IPSEC_LOCAL_CRED:
1147 					if (inp->inp_ipsec_localcred != NULL)
1148 						ipsp_reffree(inp->inp_ipsec_localcred);
1149 					inp->inp_ipsec_localcred = NULL;
1150 					break;
1151 
1152 				case IP_IPSEC_LOCAL_AUTH:
1153 					if (inp->inp_ipsec_localauth != NULL)
1154 						ipsp_reffree(inp->inp_ipsec_localauth);
1155 					inp->inp_ipsec_localauth = NULL;
1156 					break;
1157 				}
1158 
1159 				error = 0;
1160 				break;
1161 			}
1162 
1163 			/* Can't have an empty payload */
1164 			if (m->m_len == 2) {
1165 				error = EINVAL;
1166 				break;
1167 			}
1168 
1169 			MALLOC(ipr, struct ipsec_ref *,
1170 			       sizeof(struct ipsec_ref) + m->m_len - 2,
1171 			       M_CREDENTIALS, M_NOWAIT);
1172 			if (ipr == NULL) {
1173 				error = ENOBUFS;
1174 				break;
1175 			}
1176 			ipr->ref_count = 1;
1177 			ipr->ref_malloctype = M_CREDENTIALS;
1178 			ipr->ref_len = m->m_len - 2;
1179 			ipr->ref_type = opt16val;
1180 			m_copydata(m, 2, m->m_len - 2, (caddr_t)(ipr + 1));
1181 
1182 			switch (optname) {
1183 			case IP_IPSEC_LOCAL_ID:
1184 				/* Check valid types and NUL-termination */
1185 				if (ipr->ref_type < IPSP_IDENTITY_PREFIX ||
1186 				    ipr->ref_type > IPSP_IDENTITY_CONNECTION ||
1187 				    ((char *)(ipr + 1))[ipr->ref_len - 1]) {
1188 					FREE(ipr, M_CREDENTIALS);
1189 					error = EINVAL;
1190 				} else {
1191 					if (inp->inp_ipsec_localid != NULL)
1192 						ipsp_reffree(inp->inp_ipsec_localid);
1193 					inp->inp_ipsec_localid = ipr;
1194 				}
1195 				break;
1196 			case IP_IPSEC_REMOTE_ID:
1197 				/* Check valid types and NUL-termination */
1198 				if (ipr->ref_type < IPSP_IDENTITY_PREFIX ||
1199 				    ipr->ref_type > IPSP_IDENTITY_CONNECTION ||
1200 				    ((char *)(ipr + 1))[ipr->ref_len - 1]) {
1201 					FREE(ipr, M_CREDENTIALS);
1202 					error = EINVAL;
1203 				} else {
1204 					if (inp->inp_ipsec_remoteid != NULL)
1205 						ipsp_reffree(inp->inp_ipsec_remoteid);
1206 					inp->inp_ipsec_remoteid = ipr;
1207 				}
1208 				break;
1209 			case IP_IPSEC_LOCAL_CRED:
1210 				if (ipr->ref_type < IPSP_CRED_KEYNOTE ||
1211 				    ipr->ref_type > IPSP_CRED_X509) {
1212 					FREE(ipr, M_CREDENTIALS);
1213 					error = EINVAL;
1214 				} else {
1215 					if (inp->inp_ipsec_localcred != NULL)
1216 						ipsp_reffree(inp->inp_ipsec_localcred);
1217 					inp->inp_ipsec_localcred = ipr;
1218 				}
1219 				break;
1220 			case IP_IPSEC_LOCAL_AUTH:
1221 				if (ipr->ref_type < IPSP_AUTH_PASSPHRASE ||
1222 				    ipr->ref_type > IPSP_AUTH_RSA) {
1223 					FREE(ipr, M_CREDENTIALS);
1224 					error = EINVAL;
1225 				} else {
1226 					if (inp->inp_ipsec_localauth != NULL)
1227 						ipsp_reffree(inp->inp_ipsec_localauth);
1228 					inp->inp_ipsec_localauth = ipr;
1229 				}
1230 				break;
1231 			}
1232 
1233 			/* Unlink cached output TDB to force a re-search */
1234 			if (inp->inp_tdb_out) {
1235 				int s = spltdb();
1236 				TAILQ_REMOVE(&inp->inp_tdb_out->tdb_inp_out,
1237 				    inp, inp_tdb_out_next);
1238 				splx(s);
1239 			}
1240 
1241 			if (inp->inp_tdb_in) {
1242 				int s = spltdb();
1243 				TAILQ_REMOVE(&inp->inp_tdb_in->tdb_inp_in,
1244 				    inp, inp_tdb_in_next);
1245 				splx(s);
1246 			}
1247 #endif
1248 			break;
1249 		default:
1250 			error = ENOPROTOOPT;
1251 			break;
1252 		}
1253 		if (m)
1254 			(void)m_free(m);
1255 		break;
1256 
1257 	case PRCO_GETOPT:
1258 		switch (optname) {
1259 		case IP_OPTIONS:
1260 		case IP_RETOPTS:
1261 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
1262 			if (inp->inp_options) {
1263 				m->m_len = inp->inp_options->m_len;
1264 				bcopy(mtod(inp->inp_options, caddr_t),
1265 				    mtod(m, caddr_t), (unsigned)m->m_len);
1266 			} else
1267 				m->m_len = 0;
1268 			break;
1269 
1270 		case IP_TOS:
1271 		case IP_TTL:
1272 		case IP_RECVOPTS:
1273 		case IP_RECVRETOPTS:
1274 		case IP_RECVDSTADDR:
1275 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
1276 			m->m_len = sizeof(int);
1277 			switch (optname) {
1278 
1279 			case IP_TOS:
1280 				optval = inp->inp_ip.ip_tos;
1281 				break;
1282 
1283 			case IP_TTL:
1284 				optval = inp->inp_ip.ip_ttl;
1285 				break;
1286 
1287 #define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
1288 
1289 			case IP_RECVOPTS:
1290 				optval = OPTBIT(INP_RECVOPTS);
1291 				break;
1292 
1293 			case IP_RECVRETOPTS:
1294 				optval = OPTBIT(INP_RECVRETOPTS);
1295 				break;
1296 
1297 			case IP_RECVDSTADDR:
1298 				optval = OPTBIT(INP_RECVDSTADDR);
1299 				break;
1300 			}
1301 			*mtod(m, int *) = optval;
1302 			break;
1303 
1304 		case IP_MULTICAST_IF:
1305 		case IP_MULTICAST_TTL:
1306 		case IP_MULTICAST_LOOP:
1307 		case IP_ADD_MEMBERSHIP:
1308 		case IP_DROP_MEMBERSHIP:
1309 			error = ip_getmoptions(optname, inp->inp_moptions, mp);
1310 			break;
1311 
1312 		case IP_PORTRANGE:
1313 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
1314 			m->m_len = sizeof(int);
1315 
1316 			if (inp->inp_flags & INP_HIGHPORT)
1317 				optval = IP_PORTRANGE_HIGH;
1318 			else if (inp->inp_flags & INP_LOWPORT)
1319 				optval = IP_PORTRANGE_LOW;
1320 			else
1321 				optval = 0;
1322 
1323 			*mtod(m, int *) = optval;
1324 			break;
1325 
1326 		case IP_AUTH_LEVEL:
1327 		case IP_ESP_TRANS_LEVEL:
1328 		case IP_ESP_NETWORK_LEVEL:
1329 		case IP_IPCOMP_LEVEL:
1330 #ifndef IPSEC
1331 			m->m_len = sizeof(int);
1332 			*mtod(m, int *) = IPSEC_LEVEL_NONE;
1333 #else
1334 			m->m_len = sizeof(int);
1335 			switch (optname) {
1336 			case IP_AUTH_LEVEL:
1337 				optval = inp->inp_seclevel[SL_AUTH];
1338 				break;
1339 
1340 			case IP_ESP_TRANS_LEVEL:
1341 				optval = inp->inp_seclevel[SL_ESP_TRANS];
1342 				break;
1343 
1344 			case IP_ESP_NETWORK_LEVEL:
1345 				optval = inp->inp_seclevel[SL_ESP_NETWORK];
1346 				break;
1347 			case IP_IPCOMP_LEVEL:
1348 			        optval = inp->inp_seclevel[SL_IPCOMP];
1349 				break;
1350 			}
1351 			*mtod(m, int *) = optval;
1352 #endif
1353 			break;
1354 		case IP_IPSEC_LOCAL_ID:
1355 		case IP_IPSEC_REMOTE_ID:
1356 		case IP_IPSEC_LOCAL_CRED:
1357 		case IP_IPSEC_REMOTE_CRED:
1358 		case IP_IPSEC_LOCAL_AUTH:
1359 		case IP_IPSEC_REMOTE_AUTH:
1360 #ifndef IPSEC
1361 			error = EOPNOTSUPP;
1362 #else
1363 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
1364 			m->m_len = sizeof(u_int16_t);
1365 			switch (optname) {
1366 			case IP_IPSEC_LOCAL_ID:
1367 				ipr = inp->inp_ipsec_localid;
1368 				opt16val = IPSP_IDENTITY_NONE;
1369 				break;
1370 			case IP_IPSEC_REMOTE_ID:
1371 				ipr = inp->inp_ipsec_remoteid;
1372 				opt16val = IPSP_IDENTITY_NONE;
1373 				break;
1374 			case IP_IPSEC_LOCAL_CRED:
1375 				ipr = inp->inp_ipsec_localcred;
1376 				opt16val = IPSP_CRED_NONE;
1377 				break;
1378 			case IP_IPSEC_REMOTE_CRED:
1379 				ipr = inp->inp_ipsec_remotecred;
1380 				opt16val = IPSP_CRED_NONE;
1381 				break;
1382 			case IP_IPSEC_LOCAL_AUTH:
1383 				ipr = inp->inp_ipsec_localauth;
1384 				break;
1385 			case IP_IPSEC_REMOTE_AUTH:
1386 				ipr = inp->inp_ipsec_remoteauth;
1387 				break;
1388 			}
1389 			if (ipr == NULL)
1390 				*mtod(m, u_int16_t *) = opt16val;
1391 			else {
1392 				m->m_len += ipr->ref_len;
1393 				*mtod(m, u_int16_t *) = ipr->ref_type;
1394 				m_copyback(m, sizeof(u_int16_t), ipr->ref_len,
1395 					   (caddr_t)(ipr + 1));
1396 			}
1397 #endif
1398 			break;
1399 		default:
1400 			error = ENOPROTOOPT;
1401 			break;
1402 		}
1403 		break;
1404 	}
1405 	return (error);
1406 }
1407 
1408 /*
1409  * Set up IP options in pcb for insertion in output packets.
1410  * Store in mbuf with pointer in pcbopt, adding pseudo-option
1411  * with destination address if source routed.
1412  */
1413 int
1414 #ifdef notyet
1415 ip_pcbopts(optname, pcbopt, m)
1416 	int optname;
1417 #else
1418 ip_pcbopts(pcbopt, m)
1419 #endif
1420 	struct mbuf **pcbopt;
1421 	register struct mbuf *m;
1422 {
1423 	register int cnt, optlen;
1424 	register u_char *cp;
1425 	u_char opt;
1426 
1427 	/* turn off any old options */
1428 	if (*pcbopt)
1429 		(void)m_free(*pcbopt);
1430 	*pcbopt = 0;
1431 	if (m == (struct mbuf *)0 || m->m_len == 0) {
1432 		/*
1433 		 * Only turning off any previous options.
1434 		 */
1435 		if (m)
1436 			(void)m_free(m);
1437 		return (0);
1438 	}
1439 
1440 #ifndef	vax
1441 	if (m->m_len % sizeof(int32_t))
1442 		goto bad;
1443 #endif
1444 	/*
1445 	 * IP first-hop destination address will be stored before
1446 	 * actual options; move other options back
1447 	 * and clear it when none present.
1448 	 */
1449 	if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1450 		goto bad;
1451 	cnt = m->m_len;
1452 	m->m_len += sizeof(struct in_addr);
1453 	cp = mtod(m, u_char *) + sizeof(struct in_addr);
1454 	ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
1455 	bzero(mtod(m, caddr_t), sizeof(struct in_addr));
1456 
1457 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
1458 		opt = cp[IPOPT_OPTVAL];
1459 		if (opt == IPOPT_EOL)
1460 			break;
1461 		if (opt == IPOPT_NOP)
1462 			optlen = 1;
1463 		else {
1464 			if (cnt < IPOPT_OLEN + sizeof(*cp))
1465 				goto bad;
1466 			optlen = cp[IPOPT_OLEN];
1467 			if (optlen < IPOPT_OLEN  + sizeof(*cp) || optlen > cnt)
1468 				goto bad;
1469 		}
1470 		switch (opt) {
1471 
1472 		default:
1473 			break;
1474 
1475 		case IPOPT_LSRR:
1476 		case IPOPT_SSRR:
1477 			/*
1478 			 * user process specifies route as:
1479 			 *	->A->B->C->D
1480 			 * D must be our final destination (but we can't
1481 			 * check that since we may not have connected yet).
1482 			 * A is first hop destination, which doesn't appear in
1483 			 * actual IP option, but is stored before the options.
1484 			 */
1485 			if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1486 				goto bad;
1487 			m->m_len -= sizeof(struct in_addr);
1488 			cnt -= sizeof(struct in_addr);
1489 			optlen -= sizeof(struct in_addr);
1490 			cp[IPOPT_OLEN] = optlen;
1491 			/*
1492 			 * Move first hop before start of options.
1493 			 */
1494 			bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1495 			    sizeof(struct in_addr));
1496 			/*
1497 			 * Then copy rest of options back
1498 			 * to close up the deleted entry.
1499 			 */
1500 			ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
1501 			    sizeof(struct in_addr)),
1502 			    (caddr_t)&cp[IPOPT_OFFSET+1],
1503 			    (unsigned)cnt + sizeof(struct in_addr));
1504 			break;
1505 		}
1506 	}
1507 	if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1508 		goto bad;
1509 	*pcbopt = m;
1510 	return (0);
1511 
1512 bad:
1513 	(void)m_free(m);
1514 	return (EINVAL);
1515 }
1516 
1517 /*
1518  * Set the IP multicast options in response to user setsockopt().
1519  */
1520 int
1521 ip_setmoptions(optname, imop, m)
1522 	int optname;
1523 	struct ip_moptions **imop;
1524 	struct mbuf *m;
1525 {
1526 	register int error = 0;
1527 	u_char loop;
1528 	register int i;
1529 	struct in_addr addr;
1530 	register struct ip_mreq *mreq;
1531 	register struct ifnet *ifp;
1532 	register struct ip_moptions *imo = *imop;
1533 	struct route ro;
1534 	register struct sockaddr_in *dst;
1535 
1536 	if (imo == NULL) {
1537 		/*
1538 		 * No multicast option buffer attached to the pcb;
1539 		 * allocate one and initialize to default values.
1540 		 */
1541 		imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS,
1542 		    M_WAITOK);
1543 
1544 		*imop = imo;
1545 		imo->imo_multicast_ifp = NULL;
1546 		imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1547 		imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1548 		imo->imo_num_memberships = 0;
1549 	}
1550 
1551 	switch (optname) {
1552 
1553 	case IP_MULTICAST_IF:
1554 		/*
1555 		 * Select the interface for outgoing multicast packets.
1556 		 */
1557 		if (m == NULL || m->m_len != sizeof(struct in_addr)) {
1558 			error = EINVAL;
1559 			break;
1560 		}
1561 		addr = *(mtod(m, struct in_addr *));
1562 		/*
1563 		 * INADDR_ANY is used to remove a previous selection.
1564 		 * When no interface is selected, a default one is
1565 		 * chosen every time a multicast packet is sent.
1566 		 */
1567 		if (addr.s_addr == INADDR_ANY) {
1568 			imo->imo_multicast_ifp = NULL;
1569 			break;
1570 		}
1571 		/*
1572 		 * The selected interface is identified by its local
1573 		 * IP address.  Find the interface and confirm that
1574 		 * it supports multicasting.
1575 		 */
1576 		INADDR_TO_IFP(addr, ifp);
1577 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1578 			error = EADDRNOTAVAIL;
1579 			break;
1580 		}
1581 		imo->imo_multicast_ifp = ifp;
1582 		break;
1583 
1584 	case IP_MULTICAST_TTL:
1585 		/*
1586 		 * Set the IP time-to-live for outgoing multicast packets.
1587 		 */
1588 		if (m == NULL || m->m_len != 1) {
1589 			error = EINVAL;
1590 			break;
1591 		}
1592 		imo->imo_multicast_ttl = *(mtod(m, u_char *));
1593 		break;
1594 
1595 	case IP_MULTICAST_LOOP:
1596 		/*
1597 		 * Set the loopback flag for outgoing multicast packets.
1598 		 * Must be zero or one.
1599 		 */
1600 		if (m == NULL || m->m_len != 1 ||
1601 		   (loop = *(mtod(m, u_char *))) > 1) {
1602 			error = EINVAL;
1603 			break;
1604 		}
1605 		imo->imo_multicast_loop = loop;
1606 		break;
1607 
1608 	case IP_ADD_MEMBERSHIP:
1609 		/*
1610 		 * Add a multicast group membership.
1611 		 * Group must be a valid IP multicast address.
1612 		 */
1613 		if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1614 			error = EINVAL;
1615 			break;
1616 		}
1617 		mreq = mtod(m, struct ip_mreq *);
1618 		if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) {
1619 			error = EINVAL;
1620 			break;
1621 		}
1622 		/*
1623 		 * If no interface address was provided, use the interface of
1624 		 * the route to the given multicast address.
1625 		 */
1626 		if (mreq->imr_interface.s_addr == INADDR_ANY) {
1627 			ro.ro_rt = NULL;
1628 			dst = satosin(&ro.ro_dst);
1629 			dst->sin_len = sizeof(*dst);
1630 			dst->sin_family = AF_INET;
1631 			dst->sin_addr = mreq->imr_multiaddr;
1632 			rtalloc(&ro);
1633 			if (ro.ro_rt == NULL) {
1634 				error = EADDRNOTAVAIL;
1635 				break;
1636 			}
1637 			ifp = ro.ro_rt->rt_ifp;
1638 			rtfree(ro.ro_rt);
1639 		} else {
1640 			INADDR_TO_IFP(mreq->imr_interface, ifp);
1641 		}
1642 		/*
1643 		 * See if we found an interface, and confirm that it
1644 		 * supports multicast.
1645 		 */
1646 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1647 			error = EADDRNOTAVAIL;
1648 			break;
1649 		}
1650 		/*
1651 		 * See if the membership already exists or if all the
1652 		 * membership slots are full.
1653 		 */
1654 		for (i = 0; i < imo->imo_num_memberships; ++i) {
1655 			if (imo->imo_membership[i]->inm_ifp == ifp &&
1656 			    imo->imo_membership[i]->inm_addr.s_addr
1657 						== mreq->imr_multiaddr.s_addr)
1658 				break;
1659 		}
1660 		if (i < imo->imo_num_memberships) {
1661 			error = EADDRINUSE;
1662 			break;
1663 		}
1664 		if (i == IP_MAX_MEMBERSHIPS) {
1665 			error = ETOOMANYREFS;
1666 			break;
1667 		}
1668 		/*
1669 		 * Everything looks good; add a new record to the multicast
1670 		 * address list for the given interface.
1671 		 */
1672 		if ((imo->imo_membership[i] =
1673 		    in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
1674 			error = ENOBUFS;
1675 			break;
1676 		}
1677 		++imo->imo_num_memberships;
1678 		break;
1679 
1680 	case IP_DROP_MEMBERSHIP:
1681 		/*
1682 		 * Drop a multicast group membership.
1683 		 * Group must be a valid IP multicast address.
1684 		 */
1685 		if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1686 			error = EINVAL;
1687 			break;
1688 		}
1689 		mreq = mtod(m, struct ip_mreq *);
1690 		if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) {
1691 			error = EINVAL;
1692 			break;
1693 		}
1694 		/*
1695 		 * If an interface address was specified, get a pointer
1696 		 * to its ifnet structure.
1697 		 */
1698 		if (mreq->imr_interface.s_addr == INADDR_ANY)
1699 			ifp = NULL;
1700 		else {
1701 			INADDR_TO_IFP(mreq->imr_interface, ifp);
1702 			if (ifp == NULL) {
1703 				error = EADDRNOTAVAIL;
1704 				break;
1705 			}
1706 		}
1707 		/*
1708 		 * Find the membership in the membership array.
1709 		 */
1710 		for (i = 0; i < imo->imo_num_memberships; ++i) {
1711 			if ((ifp == NULL ||
1712 			     imo->imo_membership[i]->inm_ifp == ifp) &&
1713 			     imo->imo_membership[i]->inm_addr.s_addr ==
1714 			     mreq->imr_multiaddr.s_addr)
1715 				break;
1716 		}
1717 		if (i == imo->imo_num_memberships) {
1718 			error = EADDRNOTAVAIL;
1719 			break;
1720 		}
1721 		/*
1722 		 * Give up the multicast address record to which the
1723 		 * membership points.
1724 		 */
1725 		in_delmulti(imo->imo_membership[i]);
1726 		/*
1727 		 * Remove the gap in the membership array.
1728 		 */
1729 		for (++i; i < imo->imo_num_memberships; ++i)
1730 			imo->imo_membership[i-1] = imo->imo_membership[i];
1731 		--imo->imo_num_memberships;
1732 		break;
1733 
1734 	default:
1735 		error = EOPNOTSUPP;
1736 		break;
1737 	}
1738 
1739 	/*
1740 	 * If all options have default values, no need to keep the mbuf.
1741 	 */
1742 	if (imo->imo_multicast_ifp == NULL &&
1743 	    imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1744 	    imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1745 	    imo->imo_num_memberships == 0) {
1746 		free(*imop, M_IPMOPTS);
1747 		*imop = NULL;
1748 	}
1749 
1750 	return (error);
1751 }
1752 
1753 /*
1754  * Return the IP multicast options in response to user getsockopt().
1755  */
1756 int
1757 ip_getmoptions(optname, imo, mp)
1758 	int optname;
1759 	register struct ip_moptions *imo;
1760 	register struct mbuf **mp;
1761 {
1762 	u_char *ttl;
1763 	u_char *loop;
1764 	struct in_addr *addr;
1765 	struct in_ifaddr *ia;
1766 
1767 	*mp = m_get(M_WAIT, MT_SOOPTS);
1768 
1769 	switch (optname) {
1770 
1771 	case IP_MULTICAST_IF:
1772 		addr = mtod(*mp, struct in_addr *);
1773 		(*mp)->m_len = sizeof(struct in_addr);
1774 		if (imo == NULL || imo->imo_multicast_ifp == NULL)
1775 			addr->s_addr = INADDR_ANY;
1776 		else {
1777 			IFP_TO_IA(imo->imo_multicast_ifp, ia);
1778 			addr->s_addr = (ia == NULL) ? INADDR_ANY
1779 					: ia->ia_addr.sin_addr.s_addr;
1780 		}
1781 		return (0);
1782 
1783 	case IP_MULTICAST_TTL:
1784 		ttl = mtod(*mp, u_char *);
1785 		(*mp)->m_len = 1;
1786 		*ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL
1787 				     : imo->imo_multicast_ttl;
1788 		return (0);
1789 
1790 	case IP_MULTICAST_LOOP:
1791 		loop = mtod(*mp, u_char *);
1792 		(*mp)->m_len = 1;
1793 		*loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP
1794 				      : imo->imo_multicast_loop;
1795 		return (0);
1796 
1797 	default:
1798 		return (EOPNOTSUPP);
1799 	}
1800 }
1801 
1802 /*
1803  * Discard the IP multicast options.
1804  */
1805 void
1806 ip_freemoptions(imo)
1807 	register struct ip_moptions *imo;
1808 {
1809 	register int i;
1810 
1811 	if (imo != NULL) {
1812 		for (i = 0; i < imo->imo_num_memberships; ++i)
1813 			in_delmulti(imo->imo_membership[i]);
1814 		free(imo, M_IPMOPTS);
1815 	}
1816 }
1817 
1818 /*
1819  * Routine called from ip_output() to loop back a copy of an IP multicast
1820  * packet to the input queue of a specified interface.  Note that this
1821  * calls the output routine of the loopback "driver", but with an interface
1822  * pointer that might NOT be &loif -- easier than replicating that code here.
1823  */
1824 static void
1825 ip_mloopback(ifp, m, dst)
1826 	struct ifnet *ifp;
1827 	register struct mbuf *m;
1828 	register struct sockaddr_in *dst;
1829 {
1830 	register struct ip *ip;
1831 	struct mbuf *copym;
1832 
1833 	copym = m_copym2(m, 0, M_COPYALL, M_DONTWAIT);
1834 	if (copym != NULL) {
1835 		/*
1836 		 * We don't bother to fragment if the IP length is greater
1837 		 * than the interface's MTU.  Can this possibly matter?
1838 		 */
1839 		ip = mtod(copym, struct ip *);
1840 		ip->ip_len = htons((u_int16_t)ip->ip_len);
1841 		ip->ip_off = htons((u_int16_t)ip->ip_off);
1842 		ip->ip_sum = 0;
1843 		ip->ip_sum = in_cksum(copym, ip->ip_hl << 2);
1844 		(void) looutput(ifp, copym, sintosa(dst), NULL);
1845 	}
1846 }
1847 
1848 /*
1849  * Process a delayed payload checksum calculation.
1850  */
1851 void
1852 in_delayed_cksum(struct mbuf *m)
1853 {
1854 	struct ip *ip;
1855 	u_int16_t csum, offset;
1856 
1857 	ip = mtod(m, struct ip *);
1858 	offset = ip->ip_hl << 2;
1859 	csum = in4_cksum(m, 0, offset, m->m_pkthdr.len - offset);
1860 	if (csum == 0 && ip->ip_p == IPPROTO_UDP)
1861 		csum = 0xffff;
1862 
1863 	switch (ip->ip_p) {
1864 	case IPPROTO_TCP:
1865 		offset += offsetof(struct tcphdr, th_sum);
1866 		break;
1867 
1868 	case IPPROTO_UDP:
1869 		offset += offsetof(struct udphdr, uh_sum);
1870 		break;
1871 
1872 	default:
1873 		return;
1874 	}
1875 
1876 	if ((offset + sizeof(u_int16_t)) > m->m_len)
1877 		m_copyback(m, offset, sizeof(csum), (caddr_t) &csum);
1878 	else
1879 		*(u_int16_t *)(mtod(m, caddr_t) + offset) = csum;
1880 }
1881