xref: /openbsd-src/sys/netinet/ip_output.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /*	$OpenBSD: ip_output.c,v 1.134 2001/07/17 20:34:50 provos Exp $	*/
2 /*	$NetBSD: ip_output.c,v 1.28 1996/02/13 23:43:07 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
37  */
38 
39 #include "pf.h"
40 
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/mbuf.h>
44 #include <sys/protosw.h>
45 #include <sys/socket.h>
46 #include <sys/socketvar.h>
47 #include <sys/proc.h>
48 #include <sys/kernel.h>
49 
50 #include <net/if.h>
51 #include <net/if_enc.h>
52 #include <net/route.h>
53 
54 #include <netinet/in.h>
55 #include <netinet/in_systm.h>
56 #include <netinet/ip.h>
57 #include <netinet/in_pcb.h>
58 #include <netinet/in_var.h>
59 #include <netinet/ip_var.h>
60 #include <netinet/ip_icmp.h>
61 #include <netinet/tcp.h>
62 #include <netinet/udp.h>
63 #include <netinet/tcp_timer.h>
64 #include <netinet/tcp_var.h>
65 #include <netinet/udp_var.h>
66 
67 #if NPF > 0
68 #include <net/pfvar.h>
69 #endif
70 
71 #ifdef vax
72 #include <machine/mtpr.h>
73 #endif
74 
75 #ifdef IPSEC
76 #ifdef ENCDEBUG
77 #define DPRINTF(x)    do { if (encdebug) printf x ; } while (0)
78 #else
79 #define DPRINTF(x)
80 #endif
81 
82 extern u_int8_t get_sa_require  __P((struct inpcb *));
83 
84 extern int ipsec_auth_default_level;
85 extern int ipsec_esp_trans_default_level;
86 extern int ipsec_esp_network_default_level;
87 extern int ipsec_ipcomp_default_level;
88 #endif /* IPSEC */
89 
90 static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
91 static void ip_mloopback
92 	__P((struct ifnet *, struct mbuf *, struct sockaddr_in *));
93 
94 /*
95  * IP output.  The packet in mbuf chain m contains a skeletal IP
96  * header (with len, off, ttl, proto, tos, src, dst).
97  * The mbuf chain containing the packet will be freed.
98  * The mbuf opt, if present, will not be freed.
99  */
100 int
101 #if __STDC__
102 ip_output(struct mbuf *m0, ...)
103 #else
104 ip_output(m0, va_alist)
105 	struct mbuf *m0;
106 	va_dcl
107 #endif
108 {
109 	register struct ip *ip, *mhip;
110 	register struct ifnet *ifp;
111 	struct mbuf *m = m0;
112 	register int hlen = sizeof (struct ip);
113 	int len, off, error = 0;
114 	struct route iproute;
115 	struct sockaddr_in *dst;
116 	struct in_ifaddr *ia;
117 	struct mbuf *opt;
118 	struct route *ro;
119 	int flags;
120 	struct ip_moptions *imo;
121 	va_list ap;
122 	u_int8_t sproto = 0, donerouting = 0;
123 #ifdef IPSEC
124 	u_int32_t icmp_mtu = 0;
125 	union sockaddr_union sdst;
126 	u_int32_t sspi;
127 	struct m_tag *mtag;
128 	struct tdb_ident *tdbi;
129 
130 	struct inpcb *inp;
131 	struct tdb *tdb;
132 	int s;
133 #endif /* IPSEC */
134 
135 	va_start(ap, m0);
136 	opt = va_arg(ap, struct mbuf *);
137 	ro = va_arg(ap, struct route *);
138 	flags = va_arg(ap, int);
139 	imo = va_arg(ap, struct ip_moptions *);
140 #ifdef IPSEC
141 	inp = va_arg(ap, struct inpcb *);
142 	if (inp && (inp->inp_flags & INP_IPV6) != 0)
143 		panic("ip_output: IPv6 pcb is passed");
144 #endif /* IPSEC */
145 	va_end(ap);
146 
147 #ifdef	DIAGNOSTIC
148 	if ((m->m_flags & M_PKTHDR) == 0)
149 		panic("ip_output no HDR");
150 #endif
151 	if (opt) {
152 		m = ip_insertoptions(m, opt, &len);
153 		hlen = len;
154 	}
155 
156 	ip = mtod(m, struct ip *);
157 
158 	/*
159 	 * Fill in IP header.
160 	 */
161 	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
162 		ip->ip_v = IPVERSION;
163 		ip->ip_off &= IP_DF;
164 		ip->ip_id = htons(ip_randomid());
165 		ip->ip_hl = hlen >> 2;
166 		ipstat.ips_localout++;
167 	} else {
168 		hlen = ip->ip_hl << 2;
169 	}
170 
171 	/*
172 	 * If we're missing the IP source address, do a route lookup. We'll
173 	 * remember this result, in case we don't need to do any IPsec
174 	 * processing on the packet. We need the source address so we can
175 	 * do an SPD lookup in IPsec; for most packets, the source address
176 	 * is set at a higher level protocol. ICMPs and other packets
177 	 * though (e.g., traceroute) have a source address of zeroes.
178 	 */
179 	if (ip->ip_src.s_addr == INADDR_ANY) {
180 	        donerouting = 1;
181 
182 	        if (ro == 0) {
183 		        ro = &iproute;
184 			bzero((caddr_t)ro, sizeof (*ro));
185 		}
186 
187 		dst = satosin(&ro->ro_dst);
188 
189 		/*
190 		 * If there is a cached route, check that it is to the same
191 		 * destination and is still up.  If not, free it and try again.
192 		 */
193 		if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
194 				  dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
195 		        RTFREE(ro->ro_rt);
196 			ro->ro_rt = (struct rtentry *)0;
197 		}
198 
199 		if (ro->ro_rt == 0) {
200 		        dst->sin_family = AF_INET;
201 			dst->sin_len = sizeof(*dst);
202 			dst->sin_addr = ip->ip_dst;
203 		}
204 
205 		/*
206 		 * If routing to interface only, short-circuit routing lookup.
207 		 */
208 		if (flags & IP_ROUTETOIF) {
209 		        if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
210 			    (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
211 			    ipstat.ips_noroute++;
212 			    error = ENETUNREACH;
213 			    goto bad;
214 			}
215 
216 			ifp = ia->ia_ifp;
217 			ip->ip_ttl = 1;
218 		} else {
219 		        if (ro->ro_rt == 0)
220 			        rtalloc(ro);
221 
222 			if (ro->ro_rt == 0) {
223 			        ipstat.ips_noroute++;
224 				error = EHOSTUNREACH;
225 				goto bad;
226 			}
227 
228 			ia = ifatoia(ro->ro_rt->rt_ifa);
229 			ifp = ro->ro_rt->rt_ifp;
230 			ro->ro_rt->rt_use++;
231 
232 			if (ro->ro_rt->rt_flags & RTF_GATEWAY)
233 			        dst = satosin(ro->ro_rt->rt_gateway);
234 		}
235 
236 		/* Set the source IP address */
237                 if (!IN_MULTICAST(ip->ip_dst.s_addr))
238 		        ip->ip_src = ia->ia_addr.sin_addr;
239 	}
240 
241 #ifdef IPSEC
242 	/*
243 	 * splnet is chosen over spltdb because we are not allowed to
244 	 * lower the level, and udp_output calls us in splnet().
245 	 */
246 	s = splnet();
247 
248 	/* Do we have any pending SAs to apply ? */
249 	mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
250 	if (mtag != NULL) {
251 #ifdef DIAGNOSTIC
252 		if (mtag->m_tag_len != sizeof (struct tdb_ident))
253 			panic("ip_output: tag of length %d (should be %d",
254 			    mtag->m_tag_len, sizeof (struct tdb_ident));
255 #endif
256 		tdbi = (struct tdb_ident *)(mtag + 1);
257 		tdb = gettdb(tdbi->spi, &tdbi->dst, tdbi->proto);
258 		if (tdb == NULL)
259 			error = -EINVAL;
260 		m_tag_delete(m, mtag);
261 	}
262 	else
263 		tdb = ipsp_spd_lookup(m, AF_INET, hlen, &error,
264 		    IPSP_DIRECTION_OUT, NULL, inp);
265 
266 	if (tdb == NULL) {
267 	        splx(s);
268 
269 		if (error == 0) {
270 		        /*
271 			 * No IPsec processing required, we'll just send the
272 			 * packet out.
273 			 */
274 		        sproto = 0;
275 
276 			/* Fall through to routing/multicast handling */
277 		} else {
278 		        /*
279 			 * -EINVAL is used to indicate that the packet should
280 			 * be silently dropped, typically because we've asked
281 			 * key management for an SA.
282 			 */
283 		        if (error == -EINVAL) /* Should silently drop packet */
284 			  error = 0;
285 
286 			m_freem(m);
287 			goto done;
288 		}
289 	} else {
290 		/*
291 		 * If the socket has set the bypass flags and SA
292 		 * destination matches the IP destination, skip
293 		 * IPsec. This allows IKE packets to travel through
294 		 * IPsec tunnels.
295 		 */
296 		if ((inp != NULL) &&
297 		    (inp->inp_seclevel[SL_AUTH] == IPSEC_LEVEL_BYPASS) &&
298 		    (inp->inp_seclevel[SL_ESP_TRANS] == IPSEC_LEVEL_BYPASS) &&
299 		    (inp->inp_seclevel[SL_ESP_NETWORK] == IPSEC_LEVEL_BYPASS)
300 		    && (inp->inp_seclevel[SL_IPCOMP] == IPSEC_LEVEL_BYPASS)
301 		    && (sdst.sa.sa_family == AF_INET) &&
302 		    (sdst.sin.sin_addr.s_addr == ip->ip_dst.s_addr)) {
303 			splx(s);
304 			sproto = 0; /* mark as no-IPsec-needed */
305 			goto done_spd;
306 		}
307 
308 		/* Loop detection */
309 		for (mtag = m_tag_first(m); mtag != NULL;
310 		    mtag = m_tag_next(m, mtag)) {
311 			if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE &&
312 			    mtag->m_tag_id !=
313 			    PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED)
314 				continue;
315 			tdbi = (struct tdb_ident *)(mtag + 1);
316 			if (tdbi->spi == tdb->tdb_spi &&
317 			    tdbi->proto == tdb->tdb_sproto &&
318 			    !bcmp(&tdbi->dst, &tdb->tdb_dst,
319 			    sizeof(union sockaddr_union))) {
320 				splx(s);
321 				sproto = 0; /* mark as no-IPsec-needed */
322 				goto done_spd;
323 			}
324 		}
325 
326 	        /* We need to do IPsec */
327 	        bcopy(&tdb->tdb_dst, &sdst, sizeof(sdst));
328 		sspi = tdb->tdb_spi;
329 		sproto = tdb->tdb_sproto;
330 		splx(s);
331 
332 		/*
333 		 * If it needs TCP/UDP hardware-checksumming, do the
334 		 * computation now.
335 		 */
336 		if (m->m_pkthdr.csum & (M_TCPV4_CSUM_OUT | M_UDPV4_CSUM_OUT)) {
337 			in_delayed_cksum(m);
338 			m->m_pkthdr.csum &=
339 			    ~(M_UDPV4_CSUM_OUT | M_TCPV4_CSUM_OUT);
340 		}
341 
342 		/* If it's not a multicast packet, try to fast-path */
343 		if (!IN_MULTICAST(ip->ip_dst.s_addr)) {
344 			goto sendit;
345 		}
346 	}
347 
348 	/* Fall through to the routing/multicast handling code */
349  done_spd:
350 #endif /* IPSEC */
351 
352 	if (donerouting == 0) {
353 	        if (ro == 0) {
354 		        ro = &iproute;
355 			bzero((caddr_t)ro, sizeof (*ro));
356 		}
357 
358 		dst = satosin(&ro->ro_dst);
359 
360 		/*
361 		 * If there is a cached route, check that it is to the same
362 		 * destination and is still up.  If not, free it and try again.
363 		 */
364 		if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
365 				  dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
366 		        RTFREE(ro->ro_rt);
367 			ro->ro_rt = (struct rtentry *)0;
368 		}
369 
370 		if (ro->ro_rt == 0) {
371 		        dst->sin_family = AF_INET;
372 			dst->sin_len = sizeof(*dst);
373 			dst->sin_addr = ip->ip_dst;
374 		}
375 
376 		/*
377 		 * If routing to interface only, short-circuit routing lookup.
378 		 */
379 		if (flags & IP_ROUTETOIF) {
380 		        if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
381 			    (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
382 			    ipstat.ips_noroute++;
383 			    error = ENETUNREACH;
384 			    goto bad;
385 			}
386 
387 			ifp = ia->ia_ifp;
388 			ip->ip_ttl = 1;
389 		} else {
390 		        if (ro->ro_rt == 0)
391 			        rtalloc(ro);
392 
393 			if (ro->ro_rt == 0) {
394 			        ipstat.ips_noroute++;
395 				error = EHOSTUNREACH;
396 				goto bad;
397 			}
398 
399 			ia = ifatoia(ro->ro_rt->rt_ifa);
400 			ifp = ro->ro_rt->rt_ifp;
401 			ro->ro_rt->rt_use++;
402 
403 			if (ro->ro_rt->rt_flags & RTF_GATEWAY)
404 			        dst = satosin(ro->ro_rt->rt_gateway);
405 		}
406 
407 		/* Set the source IP address */
408 		if (ip->ip_src.s_addr == INADDR_ANY)
409 			ip->ip_src = ia->ia_addr.sin_addr;
410 	}
411 
412 	if (IN_MULTICAST(ip->ip_dst.s_addr) ||
413 	    (ip->ip_dst.s_addr == INADDR_BROADCAST)) {
414 		struct in_multi *inm;
415 
416 		m->m_flags |= (ip->ip_dst.s_addr == INADDR_BROADCAST) ?
417 			M_BCAST : M_MCAST;
418 
419 		/*
420 		 * IP destination address is multicast.  Make sure "dst"
421 		 * still points to the address in "ro".  (It may have been
422 		 * changed to point to a gateway address, above.)
423 		 */
424 		dst = satosin(&ro->ro_dst);
425 
426 		/*
427 		 * See if the caller provided any multicast options
428 		 */
429 		if (imo != NULL) {
430 			ip->ip_ttl = imo->imo_multicast_ttl;
431 			if (imo->imo_multicast_ifp != NULL)
432 				ifp = imo->imo_multicast_ifp;
433 		} else
434 			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
435 
436 		/*
437 		 * Confirm that the outgoing interface supports multicast,
438 		 * but only if the packet actually is going out on that
439 		 * interface (i.e., no IPsec is applied).
440 		 */
441 		if ((((m->m_flags & M_MCAST) &&
442 		      (ifp->if_flags & IFF_MULTICAST) == 0) ||
443 		     ((m->m_flags & M_BCAST) &&
444 		      (ifp->if_flags & IFF_BROADCAST) == 0)) && (sproto == 0))  {
445 			ipstat.ips_noroute++;
446 			error = ENETUNREACH;
447 			goto bad;
448 		}
449 
450 		/*
451 		 * If source address not specified yet, use address
452 		 * of outgoing interface.
453 		 */
454 		if (ip->ip_src.s_addr == INADDR_ANY) {
455 			register struct in_ifaddr *ia;
456 
457 			for (ia = in_ifaddr.tqh_first;
458 			     ia;
459 			     ia = ia->ia_list.tqe_next)
460 				if (ia->ia_ifp == ifp) {
461 					ip->ip_src = ia->ia_addr.sin_addr;
462 					break;
463 				}
464 		}
465 
466 		IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
467 		if (inm != NULL &&
468 		   (imo == NULL || imo->imo_multicast_loop)) {
469 			/*
470 			 * If we belong to the destination multicast group
471 			 * on the outgoing interface, and the caller did not
472 			 * forbid loopback, loop back a copy.
473 			 */
474 			ip_mloopback(ifp, m, dst);
475 		}
476 #ifdef MROUTING
477 		else {
478 			/*
479 			 * If we are acting as a multicast router, perform
480 			 * multicast forwarding as if the packet had just
481 			 * arrived on the interface to which we are about
482 			 * to send.  The multicast forwarding function
483 			 * recursively calls this function, using the
484 			 * IP_FORWARDING flag to prevent infinite recursion.
485 			 *
486 			 * Multicasts that are looped back by ip_mloopback(),
487 			 * above, will be forwarded by the ip_input() routine,
488 			 * if necessary.
489 			 */
490 			extern struct socket *ip_mrouter;
491 
492 			if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
493 				if (ip_mforward(m, ifp) != 0) {
494 					m_freem(m);
495 					goto done;
496 				}
497 			}
498 		}
499 #endif
500 		/*
501 		 * Multicasts with a time-to-live of zero may be looped-
502 		 * back, above, but must not be transmitted on a network.
503 		 * Also, multicasts addressed to the loopback interface
504 		 * are not sent -- the above call to ip_mloopback() will
505 		 * loop back a copy if this host actually belongs to the
506 		 * destination group on the loopback interface.
507 		 */
508 		if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) {
509 			m_freem(m);
510 			goto done;
511 		}
512 
513 		goto sendit;
514 	}
515 
516 	/*
517 	 * Look for broadcast address and and verify user is allowed to send
518 	 * such a packet; if the packet is going in an IPsec tunnel, skip
519 	 * this check.
520 	 */
521 	if ((sproto == 0) && (in_broadcast(dst->sin_addr, ifp))) {
522 		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
523 			error = EADDRNOTAVAIL;
524 			goto bad;
525 		}
526 		if ((flags & IP_ALLOWBROADCAST) == 0) {
527 			error = EACCES;
528 			goto bad;
529 		}
530 
531 		/* Don't allow broadcast messages to be fragmented */
532 		if ((u_int16_t)ip->ip_len > ifp->if_mtu) {
533 			error = EMSGSIZE;
534 			goto bad;
535 		}
536 		m->m_flags |= M_BCAST;
537 	} else
538 		m->m_flags &= ~M_BCAST;
539 
540 sendit:
541         /*
542          * If we're doing Path MTU discovery, we need to set DF unless
543          * the route's MTU is locked.
544 	 */
545 	if ((flags & IP_MTUDISC) && ro && ro->ro_rt &&
546 	    (ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
547 		ip->ip_off |= IP_DF;
548 
549 #ifdef IPSEC
550 	/*
551 	 * Check if the packet needs encapsulation.
552 	 */
553 	if (sproto != 0) {
554 	        s = splnet();
555 
556 		/*
557 		 * Packet filter
558 		 */
559 #if NPF > 0
560 
561 		if (pf_test(PF_OUT, &encif[0].sc_if, &m) != PF_PASS) {
562 			error = EHOSTUNREACH;
563 			splx(s);
564 			m_freem(m);
565 			goto done;
566 		}
567 		ip = mtod(m, struct ip *);
568 		hlen = ip->ip_hl << 2;
569 #endif
570 
571 		tdb = gettdb(sspi, &sdst, sproto);
572 		if (tdb == NULL) {
573 			error = EHOSTUNREACH;
574 			splx(s);
575 			m_freem(m);
576 			goto done;
577 		}
578 
579 		/* Latch to PCB */
580 		if (inp)
581 		        tdb_add_inp(tdb, inp, 0);
582 
583 		/* Check if we are allowed to fragment */
584 		if ((ip->ip_off & IP_DF) && tdb->tdb_mtu &&
585 		    (u_int16_t)ip->ip_len > tdb->tdb_mtu &&
586 		    tdb->tdb_mtutimeout > time.tv_sec) {
587 			struct rtentry *rt;
588 
589 			icmp_mtu = tdb->tdb_mtu;
590 			splx(s);
591 
592 			/* Find a host route to store the mtu in */
593 			if (ro != NULL)
594 				rt = ro->ro_rt;
595 			if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0) {
596 				struct sockaddr_in dst = {
597 					sizeof(struct sockaddr_in), AF_INET};
598 				dst.sin_addr = ip->ip_dst;
599 				rt = icmp_mtudisc_clone((struct sockaddr *)&dst);
600 			}
601 			if (rt != NULL) {
602 				rt->rt_rmx.rmx_mtu = icmp_mtu;
603 				if (ro && ro->ro_rt != NULL) {
604 					RTFREE(ro->ro_rt);
605 					ro->ro_rt = (struct rtentry *) 0;
606 					rtalloc(ro);
607 				}
608 			}
609 			error = EMSGSIZE;
610 			goto bad;
611 		}
612 
613 		/* Massage the IP header for use by the IPsec code */
614 		ip->ip_len = htons((u_short) ip->ip_len);
615 		ip->ip_off = htons((u_short) ip->ip_off);
616 
617 		/*
618 		 * Clear these -- they'll be set in the recursive invocation
619 		 * as needed.
620 		 */
621 		m->m_flags &= ~(M_MCAST | M_BCAST);
622 
623 		/* Callee frees mbuf */
624 		error = ipsp_process_packet(m, tdb, AF_INET, 0);
625 		splx(s);
626 		return error;  /* Nothing more to be done */
627 	}
628 
629 	/*
630 	 * If deferred crypto processing is needed, check that the
631 	 * interface supports it.
632 	 */
633 	if ((mtag = m_tag_find(m, PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED, NULL))
634 	    != NULL && (ifp->if_capabilities & IFCAP_IPSEC) == 0) {
635 		/* Notify IPsec to do its own crypto. */
636 		ipsp_skipcrypto_unmark((struct tdb_ident *)(mtag + 1));
637 		m_freem(m);
638 		error = EHOSTUNREACH;
639 		goto done;
640 	}
641 #endif /* IPSEC */
642 
643 	/* Catch routing changes wrt. hardware checksumming for TCP or UDP. */
644 	if (m->m_pkthdr.csum & M_TCPV4_CSUM_OUT) {
645 		if (!(ifp->if_capabilities & IFCAP_CSUM_TCPv4) ||
646 		    ifp->if_bridge != NULL) {
647 			in_delayed_cksum(m);
648 			m->m_pkthdr.csum &= ~M_TCPV4_CSUM_OUT; /* Clear */
649 		}
650 	} else if (m->m_pkthdr.csum & M_UDPV4_CSUM_OUT) {
651 		if (!(ifp->if_capabilities & IFCAP_CSUM_UDPv4) ||
652 		    ifp->if_bridge != NULL) {
653 			in_delayed_cksum(m);
654 			m->m_pkthdr.csum &= ~M_UDPV4_CSUM_OUT; /* Clear */
655 		}
656 	}
657 
658 	/*
659 	 * Packet filter
660 	 */
661 #if NPF > 0
662 	if (pf_test(PF_OUT, ifp, &m) != PF_PASS) {
663 		error = EHOSTUNREACH;
664 		m_freem(m);
665 		goto done;
666 	}
667 	ip = mtod(m, struct ip *);
668 	hlen = ip->ip_hl << 2;
669 #endif
670 
671 	/*
672 	 * If small enough for interface, can just send directly.
673 	 */
674 	if ((u_int16_t)ip->ip_len <= ifp->if_mtu) {
675 		ip->ip_len = htons((u_int16_t)ip->ip_len);
676 		ip->ip_off = htons((u_int16_t)ip->ip_off);
677 		if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
678 		    ifp->if_bridge == NULL) {
679 			m->m_pkthdr.csum |= M_IPV4_CSUM_OUT;
680 			ipstat.ips_outhwcsum++;
681 		} else {
682 			ip->ip_sum = 0;
683 			ip->ip_sum = in_cksum(m, hlen);
684 		}
685 		/* Update relevant hardware checksum stats for TCP/UDP */
686 		if (m->m_pkthdr.csum & M_TCPV4_CSUM_OUT)
687 			tcpstat.tcps_outhwcsum++;
688 		else if (m->m_pkthdr.csum & M_UDPV4_CSUM_OUT)
689 			udpstat.udps_outhwcsum++;
690 		error = (*ifp->if_output)(ifp, m, sintosa(dst), ro->ro_rt);
691 		goto done;
692 	}
693 
694 	/*
695 	 * Too large for interface; fragment if possible.
696 	 * Must be able to put at least 8 bytes per fragment.
697 	 */
698 	if (ip->ip_off & IP_DF) {
699 #ifdef IPSEC
700 		icmp_mtu = ifp->if_mtu;
701 #endif
702 		error = EMSGSIZE;
703 		/*
704 		 * This case can happen if the user changed the MTU
705 		 * of an interface after enabling IP on it.  Because
706 		 * most netifs don't keep track of routes pointing to
707 		 * them, there is no way for one to update all its
708 		 * routes when the MTU is changed.
709 		 */
710 		if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
711 		    && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
712 		    && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
713 			ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
714 		}
715 		ipstat.ips_cantfrag++;
716 		goto bad;
717 	}
718 	len = (ifp->if_mtu - hlen) &~ 7;
719 	if (len < 8) {
720 		error = EMSGSIZE;
721 		goto bad;
722 	}
723 
724 	/*
725 	 * If we are doing fragmentation, we can't defer TCP/UDP
726 	 * checksumming; compute the checksum and clear the flag.
727 	 */
728 	if (m->m_pkthdr.csum & (M_TCPV4_CSUM_OUT | M_UDPV4_CSUM_OUT)) {
729 		in_delayed_cksum(m);
730 		m->m_pkthdr.csum &= ~(M_UDPV4_CSUM_OUT | M_TCPV4_CSUM_OUT);
731 	}
732 
733     {
734 	int mhlen, firstlen = len;
735 	struct mbuf **mnext = &m->m_nextpkt;
736 
737 	/*
738 	 * Loop through length of segment after first fragment,
739 	 * make new header and copy data of each part and link onto chain.
740 	 */
741 	m0 = m;
742 	mhlen = sizeof (struct ip);
743 	for (off = hlen + len; off < (u_int16_t)ip->ip_len; off += len) {
744 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
745 		if (m == 0) {
746 			error = ENOBUFS;
747 			ipstat.ips_odropped++;
748 			goto sendorfree;
749 		}
750 		*mnext = m;
751 		mnext = &m->m_nextpkt;
752 		m->m_data += max_linkhdr;
753 		mhip = mtod(m, struct ip *);
754 		*mhip = *ip;
755 		/* we must inherit MCAST and BCAST flags */
756 		m->m_flags |= m0->m_flags & (M_MCAST|M_BCAST);
757 		if (hlen > sizeof (struct ip)) {
758 			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
759 			mhip->ip_hl = mhlen >> 2;
760 		}
761 		m->m_len = mhlen;
762 		mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
763 		if (ip->ip_off & IP_MF)
764 			mhip->ip_off |= IP_MF;
765 		if (off + len >= (u_int16_t)ip->ip_len)
766 			len = (u_int16_t)ip->ip_len - off;
767 		else
768 			mhip->ip_off |= IP_MF;
769 		mhip->ip_len = htons((u_int16_t)(len + mhlen));
770 		m->m_next = m_copy(m0, off, len);
771 		if (m->m_next == 0) {
772 			error = ENOBUFS;	/* ??? */
773 			ipstat.ips_odropped++;
774 			goto sendorfree;
775 		}
776 		m->m_pkthdr.len = mhlen + len;
777 		m->m_pkthdr.rcvif = (struct ifnet *)0;
778 		mhip->ip_off = htons((u_int16_t)mhip->ip_off);
779 		if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
780 		    ifp->if_bridge == NULL) {
781 			m->m_pkthdr.csum |= M_IPV4_CSUM_OUT;
782 			ipstat.ips_outhwcsum++;
783 		} else {
784 			mhip->ip_sum = 0;
785 			mhip->ip_sum = in_cksum(m, mhlen);
786 		}
787 		ipstat.ips_ofragments++;
788 	}
789 	/*
790 	 * Update first fragment by trimming what's been copied out
791 	 * and updating header, then send each fragment (in order).
792 	 */
793 	m = m0;
794 	m_adj(m, hlen + firstlen - (u_int16_t)ip->ip_len);
795 	m->m_pkthdr.len = hlen + firstlen;
796 	ip->ip_len = htons((u_int16_t)m->m_pkthdr.len);
797 	ip->ip_off = htons((u_int16_t)(ip->ip_off | IP_MF));
798 	if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
799 	    ifp->if_bridge == NULL) {
800 		m->m_pkthdr.csum |= M_IPV4_CSUM_OUT;
801 		ipstat.ips_outhwcsum++;
802 	} else {
803 		ip->ip_sum = 0;
804 		ip->ip_sum = in_cksum(m, hlen);
805 	}
806 sendorfree:
807 	for (m = m0; m; m = m0) {
808 		m0 = m->m_nextpkt;
809 		m->m_nextpkt = 0;
810 		if (error == 0)
811 			error = (*ifp->if_output)(ifp, m, sintosa(dst),
812 			    ro->ro_rt);
813 		else
814 			m_freem(m);
815 	}
816 
817 	if (error == 0)
818 		ipstat.ips_fragmented++;
819     }
820 done:
821 	if (ro == &iproute && (flags & IP_ROUTETOIF) == 0 && ro->ro_rt)
822 		RTFREE(ro->ro_rt);
823 	return (error);
824 bad:
825 #ifdef IPSEC
826 	if (error == EMSGSIZE && icmp_mtu != 0)
827 		ipsec_adjust_mtu(m, icmp_mtu);
828 #endif
829 	m_freem(m0);
830 	goto done;
831 }
832 
833 /*
834  * Insert IP options into preformed packet.
835  * Adjust IP destination as required for IP source routing,
836  * as indicated by a non-zero in_addr at the start of the options.
837  */
838 static struct mbuf *
839 ip_insertoptions(m, opt, phlen)
840 	register struct mbuf *m;
841 	struct mbuf *opt;
842 	int *phlen;
843 {
844 	register struct ipoption *p = mtod(opt, struct ipoption *);
845 	struct mbuf *n;
846 	register struct ip *ip = mtod(m, struct ip *);
847 	unsigned optlen;
848 
849 	optlen = opt->m_len - sizeof(p->ipopt_dst);
850 	if (optlen + (u_int16_t)ip->ip_len > IP_MAXPACKET)
851 		return (m);		/* XXX should fail */
852 	if (p->ipopt_dst.s_addr)
853 		ip->ip_dst = p->ipopt_dst;
854 	if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
855 		MGETHDR(n, M_DONTWAIT, MT_HEADER);
856 		if (n == 0)
857 			return (m);
858 		M_MOVE_HDR(n, m);
859 		n->m_pkthdr.len += optlen;
860 		m->m_len -= sizeof(struct ip);
861 		m->m_data += sizeof(struct ip);
862 		n->m_next = m;
863 		m = n;
864 		m->m_len = optlen + sizeof(struct ip);
865 		m->m_data += max_linkhdr;
866 		bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
867 	} else {
868 		m->m_data -= optlen;
869 		m->m_len += optlen;
870 		m->m_pkthdr.len += optlen;
871 		ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
872 	}
873 	ip = mtod(m, struct ip *);
874 	bcopy((caddr_t)p->ipopt_list, (caddr_t)(ip + 1), (unsigned)optlen);
875 	*phlen = sizeof(struct ip) + optlen;
876 	ip->ip_len += optlen;
877 	return (m);
878 }
879 
880 /*
881  * Copy options from ip to jp,
882  * omitting those not copied during fragmentation.
883  */
884 int
885 ip_optcopy(ip, jp)
886 	struct ip *ip, *jp;
887 {
888 	register u_char *cp, *dp;
889 	int opt, optlen, cnt;
890 
891 	cp = (u_char *)(ip + 1);
892 	dp = (u_char *)(jp + 1);
893 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
894 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
895 		opt = cp[0];
896 		if (opt == IPOPT_EOL)
897 			break;
898 		if (opt == IPOPT_NOP) {
899 			/* Preserve for IP mcast tunnel's LSRR alignment. */
900 			*dp++ = IPOPT_NOP;
901 			optlen = 1;
902 			continue;
903 		}
904 #ifdef DIAGNOSTIC
905 		if (cnt < IPOPT_OLEN + sizeof(*cp))
906 			panic("malformed IPv4 option passed to ip_optcopy");
907 #endif
908 		optlen = cp[IPOPT_OLEN];
909 #ifdef DIAGNOSTIC
910 		if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
911 			panic("malformed IPv4 option passed to ip_optcopy");
912 #endif
913 		/* bogus lengths should have been caught by ip_dooptions */
914 		if (optlen > cnt)
915 			optlen = cnt;
916 		if (IPOPT_COPIED(opt)) {
917 			bcopy((caddr_t)cp, (caddr_t)dp, (unsigned)optlen);
918 			dp += optlen;
919 		}
920 	}
921 	for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
922 		*dp++ = IPOPT_EOL;
923 	return (optlen);
924 }
925 
926 /*
927  * IP socket option processing.
928  */
929 int
930 ip_ctloutput(op, so, level, optname, mp)
931 	int op;
932 	struct socket *so;
933 	int level, optname;
934 	struct mbuf **mp;
935 {
936 	register struct inpcb *inp = sotoinpcb(so);
937 	register struct mbuf *m = *mp;
938 	register int optval = 0;
939 #ifdef IPSEC
940 	struct proc *p = curproc; /* XXX */
941 	struct ipsec_ref *ipr;
942 	u_int16_t opt16val;
943 #endif
944 	int error = 0;
945 
946 	if (level != IPPROTO_IP) {
947 		error = EINVAL;
948 		if (op == PRCO_SETOPT && *mp)
949 			(void) m_free(*mp);
950 	} else switch (op) {
951 	case PRCO_SETOPT:
952 		switch (optname) {
953 		case IP_OPTIONS:
954 #ifdef notyet
955 		case IP_RETOPTS:
956 			return (ip_pcbopts(optname, &inp->inp_options, m));
957 #else
958 			return (ip_pcbopts(&inp->inp_options, m));
959 #endif
960 
961 		case IP_TOS:
962 		case IP_TTL:
963 		case IP_RECVOPTS:
964 		case IP_RECVRETOPTS:
965 		case IP_RECVDSTADDR:
966 			if (m == NULL || m->m_len != sizeof(int))
967 				error = EINVAL;
968 			else {
969 				optval = *mtod(m, int *);
970 				switch (optname) {
971 
972 				case IP_TOS:
973 					inp->inp_ip.ip_tos = optval;
974 					break;
975 
976 				case IP_TTL:
977 					inp->inp_ip.ip_ttl = optval;
978 					break;
979 #define	OPTSET(bit) \
980 	if (optval) \
981 		inp->inp_flags |= bit; \
982 	else \
983 		inp->inp_flags &= ~bit;
984 
985 				case IP_RECVOPTS:
986 					OPTSET(INP_RECVOPTS);
987 					break;
988 
989 				case IP_RECVRETOPTS:
990 					OPTSET(INP_RECVRETOPTS);
991 					break;
992 
993 				case IP_RECVDSTADDR:
994 					OPTSET(INP_RECVDSTADDR);
995 					break;
996 				}
997 			}
998 			break;
999 #undef OPTSET
1000 
1001 		case IP_MULTICAST_IF:
1002 		case IP_MULTICAST_TTL:
1003 		case IP_MULTICAST_LOOP:
1004 		case IP_ADD_MEMBERSHIP:
1005 		case IP_DROP_MEMBERSHIP:
1006 			error = ip_setmoptions(optname, &inp->inp_moptions, m);
1007 			break;
1008 
1009 		case IP_PORTRANGE:
1010 			if (m == 0 || m->m_len != sizeof(int))
1011 				error = EINVAL;
1012 			else {
1013 				optval = *mtod(m, int *);
1014 
1015 				switch (optval) {
1016 
1017 				case IP_PORTRANGE_DEFAULT:
1018 					inp->inp_flags &= ~(INP_LOWPORT);
1019 					inp->inp_flags &= ~(INP_HIGHPORT);
1020 					break;
1021 
1022 				case IP_PORTRANGE_HIGH:
1023 					inp->inp_flags &= ~(INP_LOWPORT);
1024 					inp->inp_flags |= INP_HIGHPORT;
1025 					break;
1026 
1027 				case IP_PORTRANGE_LOW:
1028 					inp->inp_flags &= ~(INP_HIGHPORT);
1029 					inp->inp_flags |= INP_LOWPORT;
1030 					break;
1031 
1032 				default:
1033 
1034 					error = EINVAL;
1035 					break;
1036 				}
1037 			}
1038 			break;
1039 		case IP_AUTH_LEVEL:
1040 		case IP_ESP_TRANS_LEVEL:
1041 		case IP_ESP_NETWORK_LEVEL:
1042 #ifndef IPSEC
1043 			error = EOPNOTSUPP;
1044 #else
1045 			if (m == 0 || m->m_len != sizeof(int)) {
1046 				error = EINVAL;
1047 				break;
1048 			}
1049 			optval = *mtod(m, int *);
1050 
1051 			if (optval < IPSEC_LEVEL_BYPASS ||
1052 			    optval > IPSEC_LEVEL_UNIQUE) {
1053 				error = EINVAL;
1054 				break;
1055 			}
1056 
1057 			/* Unlink cached output TDB to force a re-search */
1058 			if (inp->inp_tdb_out) {
1059 				int s = spltdb();
1060 				TAILQ_REMOVE(&inp->inp_tdb_out->tdb_inp_out,
1061 				    inp, inp_tdb_out_next);
1062 				splx(s);
1063 			}
1064 
1065 			if (inp->inp_tdb_in) {
1066 				int s = spltdb();
1067 				TAILQ_REMOVE(&inp->inp_tdb_in->tdb_inp_in,
1068 				    inp, inp_tdb_in_next);
1069 				splx(s);
1070 			}
1071 
1072 			switch (optname) {
1073 			case IP_AUTH_LEVEL:
1074 			        if (optval < ipsec_auth_default_level &&
1075 				    suser(p->p_ucred, &p->p_acflag)) {
1076 					error = EACCES;
1077 					break;
1078 				}
1079 				inp->inp_seclevel[SL_AUTH] = optval;
1080 				break;
1081 
1082 			case IP_ESP_TRANS_LEVEL:
1083 			        if (optval < ipsec_esp_trans_default_level &&
1084 				    suser(p->p_ucred, &p->p_acflag)) {
1085 					error = EACCES;
1086 					break;
1087 				}
1088 				inp->inp_seclevel[SL_ESP_TRANS] = optval;
1089 				break;
1090 
1091 			case IP_ESP_NETWORK_LEVEL:
1092 			        if (optval < ipsec_esp_network_default_level &&
1093 				    suser(p->p_ucred, &p->p_acflag)) {
1094 					error = EACCES;
1095 					break;
1096 				}
1097 				inp->inp_seclevel[SL_ESP_NETWORK] = optval;
1098 				break;
1099 			case IP_IPCOMP_LEVEL:
1100 			        if (optval < ipsec_ipcomp_default_level &&
1101 				    suser(p->p_ucred, &p->p_acflag)) {
1102 				        error = EACCES;
1103 					break;
1104 				}
1105 				inp->inp_seclevel[SL_IPCOMP] = optval;
1106 				break;
1107 			}
1108 			if (!error)
1109 				inp->inp_secrequire = get_sa_require(inp);
1110 #endif
1111 			break;
1112 
1113 		case IP_IPSEC_REMOTE_CRED:
1114 		case IP_IPSEC_REMOTE_AUTH:
1115 			/* Can't set the remote credential or key */
1116 			error = EOPNOTSUPP;
1117 			break;
1118 
1119 		case IP_IPSEC_LOCAL_ID:
1120 		case IP_IPSEC_REMOTE_ID:
1121 		case IP_IPSEC_LOCAL_CRED:
1122 		case IP_IPSEC_LOCAL_AUTH:
1123 #ifndef IPSEC
1124 			error = EOPNOTSUPP;
1125 #else
1126 			if (m->m_len < 2) {
1127 				error = EINVAL;
1128 				break;
1129 			}
1130 
1131 			m_copydata(m, 0, 2, (caddr_t) &opt16val);
1132 
1133 			/* If the type is 0, then we cleanup and return */
1134 			if (opt16val == 0) {
1135 				switch (optname) {
1136 				case IP_IPSEC_LOCAL_ID:
1137 					if (inp->inp_ipsec_localid != NULL)
1138 						ipsp_reffree(inp->inp_ipsec_localid);
1139 					inp->inp_ipsec_localid = NULL;
1140 					break;
1141 
1142 				case IP_IPSEC_REMOTE_ID:
1143 					if (inp->inp_ipsec_remoteid != NULL)
1144 						ipsp_reffree(inp->inp_ipsec_remoteid);
1145 					inp->inp_ipsec_remoteid = NULL;
1146 					break;
1147 
1148 				case IP_IPSEC_LOCAL_CRED:
1149 					if (inp->inp_ipsec_localcred != NULL)
1150 						ipsp_reffree(inp->inp_ipsec_localcred);
1151 					inp->inp_ipsec_localcred = NULL;
1152 					break;
1153 
1154 				case IP_IPSEC_LOCAL_AUTH:
1155 					if (inp->inp_ipsec_localauth != NULL)
1156 						ipsp_reffree(inp->inp_ipsec_localauth);
1157 					inp->inp_ipsec_localauth = NULL;
1158 					break;
1159 				}
1160 
1161 				error = 0;
1162 				break;
1163 			}
1164 
1165 			/* Can't have an empty payload */
1166 			if (m->m_len == 2) {
1167 				error = EINVAL;
1168 				break;
1169 			}
1170 
1171 			MALLOC(ipr, struct ipsec_ref *,
1172 			       sizeof(struct ipsec_ref) + m->m_len - 2,
1173 			       M_CREDENTIALS, M_NOWAIT);
1174 			if (ipr == NULL) {
1175 				error = ENOBUFS;
1176 				break;
1177 			}
1178 			ipr->ref_count = 1;
1179 			ipr->ref_malloctype = M_CREDENTIALS;
1180 			ipr->ref_len = m->m_len - 2;
1181 			ipr->ref_type = opt16val;
1182 			m_copydata(m, 2, m->m_len - 2, (caddr_t)(ipr + 1));
1183 
1184 			switch (optname) {
1185 			case IP_IPSEC_LOCAL_ID:
1186 				/* Check valid types and NUL-termination */
1187 				if (ipr->ref_type < IPSP_IDENTITY_PREFIX
1188 				    || ipr->ref_type > IPSP_IDENTITY_CONNECTION
1189 				    || ((char *)(ipr + 1))[ipr->ref_len - 1]) {
1190 					FREE(ipr, M_CREDENTIALS);
1191 					error = EINVAL;
1192 				} else {
1193 					if (inp->inp_ipsec_localid != NULL)
1194 						ipsp_reffree(inp->inp_ipsec_localid);
1195 					inp->inp_ipsec_localid = ipr;
1196 				}
1197 				break;
1198 			case IP_IPSEC_REMOTE_ID:
1199 				/* Check valid types and NUL-termination */
1200 				if (ipr->ref_type < IPSP_IDENTITY_PREFIX
1201 				    || ipr->ref_type > IPSP_IDENTITY_CONNECTION
1202 				    || ((char *)(ipr + 1))[ipr->ref_len - 1]) {
1203 					FREE(ipr, M_CREDENTIALS);
1204 					error = EINVAL;
1205 				} else {
1206 					if (inp->inp_ipsec_remoteid != NULL)
1207 						ipsp_reffree(inp->inp_ipsec_remoteid);
1208 					inp->inp_ipsec_remoteid = ipr;
1209 				}
1210 				break;
1211 			case IP_IPSEC_LOCAL_CRED:
1212 				if (ipr->ref_type < IPSP_CRED_KEYNOTE ||
1213 				    ipr->ref_type > IPSP_CRED_X509) {
1214 					FREE(ipr, M_CREDENTIALS);
1215 					error = EINVAL;
1216 				} else {
1217 					if (inp->inp_ipsec_localcred != NULL)
1218 						ipsp_reffree(inp->inp_ipsec_localcred);
1219 					inp->inp_ipsec_localcred = ipr;
1220 				}
1221 				break;
1222 			case IP_IPSEC_LOCAL_AUTH:
1223 				if (ipr->ref_type < IPSP_AUTH_PASSPHRASE ||
1224 				    ipr->ref_type > IPSP_AUTH_RSA) {
1225 					FREE(ipr, M_CREDENTIALS);
1226 					error = EINVAL;
1227 				} else {
1228 					if (inp->inp_ipsec_localauth != NULL)
1229 						ipsp_reffree(inp->inp_ipsec_localauth);
1230 					inp->inp_ipsec_localauth = ipr;
1231 				}
1232 				break;
1233 			}
1234 
1235 			/* Unlink cached output TDB to force a re-search */
1236 			if (inp->inp_tdb_out) {
1237 				int s = spltdb();
1238 				TAILQ_REMOVE(&inp->inp_tdb_out->tdb_inp_out,
1239 				    inp, inp_tdb_out_next);
1240 				splx(s);
1241 			}
1242 
1243 			if (inp->inp_tdb_in) {
1244 				int s = spltdb();
1245 				TAILQ_REMOVE(&inp->inp_tdb_in->tdb_inp_in,
1246 				    inp, inp_tdb_in_next);
1247 				splx(s);
1248 			}
1249 #endif
1250 			break;
1251 		default:
1252 			error = ENOPROTOOPT;
1253 			break;
1254 		}
1255 		if (m)
1256 			(void)m_free(m);
1257 		break;
1258 
1259 	case PRCO_GETOPT:
1260 		switch (optname) {
1261 		case IP_OPTIONS:
1262 		case IP_RETOPTS:
1263 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
1264 			if (inp->inp_options) {
1265 				m->m_len = inp->inp_options->m_len;
1266 				bcopy(mtod(inp->inp_options, caddr_t),
1267 				    mtod(m, caddr_t), (unsigned)m->m_len);
1268 			} else
1269 				m->m_len = 0;
1270 			break;
1271 
1272 		case IP_TOS:
1273 		case IP_TTL:
1274 		case IP_RECVOPTS:
1275 		case IP_RECVRETOPTS:
1276 		case IP_RECVDSTADDR:
1277 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
1278 			m->m_len = sizeof(int);
1279 			switch (optname) {
1280 
1281 			case IP_TOS:
1282 				optval = inp->inp_ip.ip_tos;
1283 				break;
1284 
1285 			case IP_TTL:
1286 				optval = inp->inp_ip.ip_ttl;
1287 				break;
1288 
1289 #define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
1290 
1291 			case IP_RECVOPTS:
1292 				optval = OPTBIT(INP_RECVOPTS);
1293 				break;
1294 
1295 			case IP_RECVRETOPTS:
1296 				optval = OPTBIT(INP_RECVRETOPTS);
1297 				break;
1298 
1299 			case IP_RECVDSTADDR:
1300 				optval = OPTBIT(INP_RECVDSTADDR);
1301 				break;
1302 			}
1303 			*mtod(m, int *) = optval;
1304 			break;
1305 
1306 		case IP_MULTICAST_IF:
1307 		case IP_MULTICAST_TTL:
1308 		case IP_MULTICAST_LOOP:
1309 		case IP_ADD_MEMBERSHIP:
1310 		case IP_DROP_MEMBERSHIP:
1311 			error = ip_getmoptions(optname, inp->inp_moptions, mp);
1312 			break;
1313 
1314 		case IP_PORTRANGE:
1315 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
1316 			m->m_len = sizeof(int);
1317 
1318 			if (inp->inp_flags & INP_HIGHPORT)
1319 				optval = IP_PORTRANGE_HIGH;
1320 			else if (inp->inp_flags & INP_LOWPORT)
1321 				optval = IP_PORTRANGE_LOW;
1322 			else
1323 				optval = 0;
1324 
1325 			*mtod(m, int *) = optval;
1326 			break;
1327 
1328 		case IP_AUTH_LEVEL:
1329 		case IP_ESP_TRANS_LEVEL:
1330 		case IP_ESP_NETWORK_LEVEL:
1331 		case IP_IPCOMP_LEVEL:
1332 #ifndef IPSEC
1333 			m->m_len = sizeof(int);
1334 			*mtod(m, int *) = IPSEC_LEVEL_NONE;
1335 #else
1336 			m->m_len = sizeof(int);
1337 			switch (optname) {
1338 			case IP_AUTH_LEVEL:
1339 				optval = inp->inp_seclevel[SL_AUTH];
1340 				break;
1341 
1342 			case IP_ESP_TRANS_LEVEL:
1343 				optval = inp->inp_seclevel[SL_ESP_TRANS];
1344 				break;
1345 
1346 			case IP_ESP_NETWORK_LEVEL:
1347 				optval = inp->inp_seclevel[SL_ESP_NETWORK];
1348 				break;
1349 			case IP_IPCOMP_LEVEL:
1350 			        optval = inp->inp_seclevel[SL_IPCOMP];
1351 				break;
1352 			}
1353 			*mtod(m, int *) = optval;
1354 #endif
1355 			break;
1356 		case IP_IPSEC_LOCAL_ID:
1357 		case IP_IPSEC_REMOTE_ID:
1358 		case IP_IPSEC_LOCAL_CRED:
1359 		case IP_IPSEC_REMOTE_CRED:
1360 		case IP_IPSEC_LOCAL_AUTH:
1361 		case IP_IPSEC_REMOTE_AUTH:
1362 #ifndef IPSEC
1363 			error = EOPNOTSUPP;
1364 #else
1365 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
1366 			m->m_len = sizeof(u_int16_t);
1367 			switch (optname) {
1368 			case IP_IPSEC_LOCAL_ID:
1369 				ipr = inp->inp_ipsec_localid;
1370 				opt16val = IPSP_IDENTITY_NONE;
1371 				break;
1372 			case IP_IPSEC_REMOTE_ID:
1373 				ipr = inp->inp_ipsec_remoteid;
1374 				opt16val = IPSP_IDENTITY_NONE;
1375 				break;
1376 			case IP_IPSEC_LOCAL_CRED:
1377 				ipr = inp->inp_ipsec_localcred;
1378 				opt16val = IPSP_CRED_NONE;
1379 				break;
1380 			case IP_IPSEC_REMOTE_CRED:
1381 				ipr = inp->inp_ipsec_remotecred;
1382 				opt16val = IPSP_CRED_NONE;
1383 				break;
1384 			case IP_IPSEC_LOCAL_AUTH:
1385 				ipr = inp->inp_ipsec_localauth;
1386 				break;
1387 			case IP_IPSEC_REMOTE_AUTH:
1388 				ipr = inp->inp_ipsec_remoteauth;
1389 				break;
1390 			}
1391 			if (ipr == NULL)
1392 				*mtod(m, u_int16_t *) = opt16val;
1393 			else {
1394 				m->m_len += ipr->ref_len;
1395 				*mtod(m, u_int16_t *) = ipr->ref_type;
1396 				m_copyback(m, sizeof(u_int16_t), ipr->ref_len,
1397 					   (caddr_t)(ipr + 1));
1398 			}
1399 #endif
1400 			break;
1401 		default:
1402 			error = ENOPROTOOPT;
1403 			break;
1404 		}
1405 		break;
1406 	}
1407 	return (error);
1408 }
1409 
1410 /*
1411  * Set up IP options in pcb for insertion in output packets.
1412  * Store in mbuf with pointer in pcbopt, adding pseudo-option
1413  * with destination address if source routed.
1414  */
1415 int
1416 #ifdef notyet
1417 ip_pcbopts(optname, pcbopt, m)
1418 	int optname;
1419 #else
1420 ip_pcbopts(pcbopt, m)
1421 #endif
1422 	struct mbuf **pcbopt;
1423 	register struct mbuf *m;
1424 {
1425 	register int cnt, optlen;
1426 	register u_char *cp;
1427 	u_char opt;
1428 
1429 	/* turn off any old options */
1430 	if (*pcbopt)
1431 		(void)m_free(*pcbopt);
1432 	*pcbopt = 0;
1433 	if (m == (struct mbuf *)0 || m->m_len == 0) {
1434 		/*
1435 		 * Only turning off any previous options.
1436 		 */
1437 		if (m)
1438 			(void)m_free(m);
1439 		return (0);
1440 	}
1441 
1442 #ifndef	vax
1443 	if (m->m_len % sizeof(int32_t))
1444 		goto bad;
1445 #endif
1446 	/*
1447 	 * IP first-hop destination address will be stored before
1448 	 * actual options; move other options back
1449 	 * and clear it when none present.
1450 	 */
1451 	if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1452 		goto bad;
1453 	cnt = m->m_len;
1454 	m->m_len += sizeof(struct in_addr);
1455 	cp = mtod(m, u_char *) + sizeof(struct in_addr);
1456 	ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
1457 	bzero(mtod(m, caddr_t), sizeof(struct in_addr));
1458 
1459 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
1460 		opt = cp[IPOPT_OPTVAL];
1461 		if (opt == IPOPT_EOL)
1462 			break;
1463 		if (opt == IPOPT_NOP)
1464 			optlen = 1;
1465 		else {
1466 			if (cnt < IPOPT_OLEN + sizeof(*cp))
1467 				goto bad;
1468 			optlen = cp[IPOPT_OLEN];
1469 			if (optlen < IPOPT_OLEN  + sizeof(*cp) || optlen > cnt)
1470 				goto bad;
1471 		}
1472 		switch (opt) {
1473 
1474 		default:
1475 			break;
1476 
1477 		case IPOPT_LSRR:
1478 		case IPOPT_SSRR:
1479 			/*
1480 			 * user process specifies route as:
1481 			 *	->A->B->C->D
1482 			 * D must be our final destination (but we can't
1483 			 * check that since we may not have connected yet).
1484 			 * A is first hop destination, which doesn't appear in
1485 			 * actual IP option, but is stored before the options.
1486 			 */
1487 			if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1488 				goto bad;
1489 			m->m_len -= sizeof(struct in_addr);
1490 			cnt -= sizeof(struct in_addr);
1491 			optlen -= sizeof(struct in_addr);
1492 			cp[IPOPT_OLEN] = optlen;
1493 			/*
1494 			 * Move first hop before start of options.
1495 			 */
1496 			bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1497 			    sizeof(struct in_addr));
1498 			/*
1499 			 * Then copy rest of options back
1500 			 * to close up the deleted entry.
1501 			 */
1502 			ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
1503 			    sizeof(struct in_addr)),
1504 			    (caddr_t)&cp[IPOPT_OFFSET+1],
1505 			    (unsigned)cnt + sizeof(struct in_addr));
1506 			break;
1507 		}
1508 	}
1509 	if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1510 		goto bad;
1511 	*pcbopt = m;
1512 	return (0);
1513 
1514 bad:
1515 	(void)m_free(m);
1516 	return (EINVAL);
1517 }
1518 
1519 /*
1520  * Set the IP multicast options in response to user setsockopt().
1521  */
1522 int
1523 ip_setmoptions(optname, imop, m)
1524 	int optname;
1525 	struct ip_moptions **imop;
1526 	struct mbuf *m;
1527 {
1528 	register int error = 0;
1529 	u_char loop;
1530 	register int i;
1531 	struct in_addr addr;
1532 	register struct ip_mreq *mreq;
1533 	register struct ifnet *ifp;
1534 	register struct ip_moptions *imo = *imop;
1535 	struct route ro;
1536 	register struct sockaddr_in *dst;
1537 
1538 	if (imo == NULL) {
1539 		/*
1540 		 * No multicast option buffer attached to the pcb;
1541 		 * allocate one and initialize to default values.
1542 		 */
1543 		imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS,
1544 		    M_WAITOK);
1545 
1546 		*imop = imo;
1547 		imo->imo_multicast_ifp = NULL;
1548 		imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1549 		imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1550 		imo->imo_num_memberships = 0;
1551 	}
1552 
1553 	switch (optname) {
1554 
1555 	case IP_MULTICAST_IF:
1556 		/*
1557 		 * Select the interface for outgoing multicast packets.
1558 		 */
1559 		if (m == NULL || m->m_len != sizeof(struct in_addr)) {
1560 			error = EINVAL;
1561 			break;
1562 		}
1563 		addr = *(mtod(m, struct in_addr *));
1564 		/*
1565 		 * INADDR_ANY is used to remove a previous selection.
1566 		 * When no interface is selected, a default one is
1567 		 * chosen every time a multicast packet is sent.
1568 		 */
1569 		if (addr.s_addr == INADDR_ANY) {
1570 			imo->imo_multicast_ifp = NULL;
1571 			break;
1572 		}
1573 		/*
1574 		 * The selected interface is identified by its local
1575 		 * IP address.  Find the interface and confirm that
1576 		 * it supports multicasting.
1577 		 */
1578 		INADDR_TO_IFP(addr, ifp);
1579 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1580 			error = EADDRNOTAVAIL;
1581 			break;
1582 		}
1583 		imo->imo_multicast_ifp = ifp;
1584 		break;
1585 
1586 	case IP_MULTICAST_TTL:
1587 		/*
1588 		 * Set the IP time-to-live for outgoing multicast packets.
1589 		 */
1590 		if (m == NULL || m->m_len != 1) {
1591 			error = EINVAL;
1592 			break;
1593 		}
1594 		imo->imo_multicast_ttl = *(mtod(m, u_char *));
1595 		break;
1596 
1597 	case IP_MULTICAST_LOOP:
1598 		/*
1599 		 * Set the loopback flag for outgoing multicast packets.
1600 		 * Must be zero or one.
1601 		 */
1602 		if (m == NULL || m->m_len != 1 ||
1603 		   (loop = *(mtod(m, u_char *))) > 1) {
1604 			error = EINVAL;
1605 			break;
1606 		}
1607 		imo->imo_multicast_loop = loop;
1608 		break;
1609 
1610 	case IP_ADD_MEMBERSHIP:
1611 		/*
1612 		 * Add a multicast group membership.
1613 		 * Group must be a valid IP multicast address.
1614 		 */
1615 		if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1616 			error = EINVAL;
1617 			break;
1618 		}
1619 		mreq = mtod(m, struct ip_mreq *);
1620 		if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) {
1621 			error = EINVAL;
1622 			break;
1623 		}
1624 		/*
1625 		 * If no interface address was provided, use the interface of
1626 		 * the route to the given multicast address.
1627 		 */
1628 		if (mreq->imr_interface.s_addr == INADDR_ANY) {
1629 			ro.ro_rt = NULL;
1630 			dst = satosin(&ro.ro_dst);
1631 			dst->sin_len = sizeof(*dst);
1632 			dst->sin_family = AF_INET;
1633 			dst->sin_addr = mreq->imr_multiaddr;
1634 			rtalloc(&ro);
1635 			if (ro.ro_rt == NULL) {
1636 				error = EADDRNOTAVAIL;
1637 				break;
1638 			}
1639 			ifp = ro.ro_rt->rt_ifp;
1640 			rtfree(ro.ro_rt);
1641 		} else {
1642 			INADDR_TO_IFP(mreq->imr_interface, ifp);
1643 		}
1644 		/*
1645 		 * See if we found an interface, and confirm that it
1646 		 * supports multicast.
1647 		 */
1648 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1649 			error = EADDRNOTAVAIL;
1650 			break;
1651 		}
1652 		/*
1653 		 * See if the membership already exists or if all the
1654 		 * membership slots are full.
1655 		 */
1656 		for (i = 0; i < imo->imo_num_memberships; ++i) {
1657 			if (imo->imo_membership[i]->inm_ifp == ifp &&
1658 			    imo->imo_membership[i]->inm_addr.s_addr
1659 						== mreq->imr_multiaddr.s_addr)
1660 				break;
1661 		}
1662 		if (i < imo->imo_num_memberships) {
1663 			error = EADDRINUSE;
1664 			break;
1665 		}
1666 		if (i == IP_MAX_MEMBERSHIPS) {
1667 			error = ETOOMANYREFS;
1668 			break;
1669 		}
1670 		/*
1671 		 * Everything looks good; add a new record to the multicast
1672 		 * address list for the given interface.
1673 		 */
1674 		if ((imo->imo_membership[i] =
1675 		    in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
1676 			error = ENOBUFS;
1677 			break;
1678 		}
1679 		++imo->imo_num_memberships;
1680 		break;
1681 
1682 	case IP_DROP_MEMBERSHIP:
1683 		/*
1684 		 * Drop a multicast group membership.
1685 		 * Group must be a valid IP multicast address.
1686 		 */
1687 		if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1688 			error = EINVAL;
1689 			break;
1690 		}
1691 		mreq = mtod(m, struct ip_mreq *);
1692 		if (!IN_MULTICAST(mreq->imr_multiaddr.s_addr)) {
1693 			error = EINVAL;
1694 			break;
1695 		}
1696 		/*
1697 		 * If an interface address was specified, get a pointer
1698 		 * to its ifnet structure.
1699 		 */
1700 		if (mreq->imr_interface.s_addr == INADDR_ANY)
1701 			ifp = NULL;
1702 		else {
1703 			INADDR_TO_IFP(mreq->imr_interface, ifp);
1704 			if (ifp == NULL) {
1705 				error = EADDRNOTAVAIL;
1706 				break;
1707 			}
1708 		}
1709 		/*
1710 		 * Find the membership in the membership array.
1711 		 */
1712 		for (i = 0; i < imo->imo_num_memberships; ++i) {
1713 			if ((ifp == NULL ||
1714 			     imo->imo_membership[i]->inm_ifp == ifp) &&
1715 			     imo->imo_membership[i]->inm_addr.s_addr ==
1716 			     mreq->imr_multiaddr.s_addr)
1717 				break;
1718 		}
1719 		if (i == imo->imo_num_memberships) {
1720 			error = EADDRNOTAVAIL;
1721 			break;
1722 		}
1723 		/*
1724 		 * Give up the multicast address record to which the
1725 		 * membership points.
1726 		 */
1727 		in_delmulti(imo->imo_membership[i]);
1728 		/*
1729 		 * Remove the gap in the membership array.
1730 		 */
1731 		for (++i; i < imo->imo_num_memberships; ++i)
1732 			imo->imo_membership[i-1] = imo->imo_membership[i];
1733 		--imo->imo_num_memberships;
1734 		break;
1735 
1736 	default:
1737 		error = EOPNOTSUPP;
1738 		break;
1739 	}
1740 
1741 	/*
1742 	 * If all options have default values, no need to keep the mbuf.
1743 	 */
1744 	if (imo->imo_multicast_ifp == NULL &&
1745 	    imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1746 	    imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1747 	    imo->imo_num_memberships == 0) {
1748 		free(*imop, M_IPMOPTS);
1749 		*imop = NULL;
1750 	}
1751 
1752 	return (error);
1753 }
1754 
1755 /*
1756  * Return the IP multicast options in response to user getsockopt().
1757  */
1758 int
1759 ip_getmoptions(optname, imo, mp)
1760 	int optname;
1761 	register struct ip_moptions *imo;
1762 	register struct mbuf **mp;
1763 {
1764 	u_char *ttl;
1765 	u_char *loop;
1766 	struct in_addr *addr;
1767 	struct in_ifaddr *ia;
1768 
1769 	*mp = m_get(M_WAIT, MT_SOOPTS);
1770 
1771 	switch (optname) {
1772 
1773 	case IP_MULTICAST_IF:
1774 		addr = mtod(*mp, struct in_addr *);
1775 		(*mp)->m_len = sizeof(struct in_addr);
1776 		if (imo == NULL || imo->imo_multicast_ifp == NULL)
1777 			addr->s_addr = INADDR_ANY;
1778 		else {
1779 			IFP_TO_IA(imo->imo_multicast_ifp, ia);
1780 			addr->s_addr = (ia == NULL) ? INADDR_ANY
1781 					: ia->ia_addr.sin_addr.s_addr;
1782 		}
1783 		return (0);
1784 
1785 	case IP_MULTICAST_TTL:
1786 		ttl = mtod(*mp, u_char *);
1787 		(*mp)->m_len = 1;
1788 		*ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL
1789 				     : imo->imo_multicast_ttl;
1790 		return (0);
1791 
1792 	case IP_MULTICAST_LOOP:
1793 		loop = mtod(*mp, u_char *);
1794 		(*mp)->m_len = 1;
1795 		*loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP
1796 				      : imo->imo_multicast_loop;
1797 		return (0);
1798 
1799 	default:
1800 		return (EOPNOTSUPP);
1801 	}
1802 }
1803 
1804 /*
1805  * Discard the IP multicast options.
1806  */
1807 void
1808 ip_freemoptions(imo)
1809 	register struct ip_moptions *imo;
1810 {
1811 	register int i;
1812 
1813 	if (imo != NULL) {
1814 		for (i = 0; i < imo->imo_num_memberships; ++i)
1815 			in_delmulti(imo->imo_membership[i]);
1816 		free(imo, M_IPMOPTS);
1817 	}
1818 }
1819 
1820 /*
1821  * Routine called from ip_output() to loop back a copy of an IP multicast
1822  * packet to the input queue of a specified interface.  Note that this
1823  * calls the output routine of the loopback "driver", but with an interface
1824  * pointer that might NOT be &loif -- easier than replicating that code here.
1825  */
1826 static void
1827 ip_mloopback(ifp, m, dst)
1828 	struct ifnet *ifp;
1829 	register struct mbuf *m;
1830 	register struct sockaddr_in *dst;
1831 {
1832 	register struct ip *ip;
1833 	struct mbuf *copym;
1834 
1835 	copym = m_copym2(m, 0, M_COPYALL, M_DONTWAIT);
1836 	if (copym != NULL) {
1837 		/*
1838 		 * We don't bother to fragment if the IP length is greater
1839 		 * than the interface's MTU.  Can this possibly matter?
1840 		 */
1841 		ip = mtod(copym, struct ip *);
1842 		ip->ip_len = htons((u_int16_t)ip->ip_len);
1843 		ip->ip_off = htons((u_int16_t)ip->ip_off);
1844 		ip->ip_sum = 0;
1845 		ip->ip_sum = in_cksum(copym, ip->ip_hl << 2);
1846 		(void) looutput(ifp, copym, sintosa(dst), NULL);
1847 	}
1848 }
1849 
1850 /*
1851  * Process a delayed payload checksum calculation.
1852  */
1853 void
1854 in_delayed_cksum(struct mbuf *m)
1855 {
1856 	struct ip *ip;
1857 	u_int16_t csum, offset;
1858 
1859 	ip = mtod(m, struct ip *);
1860 	offset = ip->ip_hl << 2;
1861 	csum = in4_cksum(m, 0, offset, m->m_pkthdr.len - offset);
1862 	if (csum == 0 && ip->ip_p == IPPROTO_UDP)
1863 		csum = 0xffff;
1864 
1865 	switch (ip->ip_p) {
1866 	case IPPROTO_TCP:
1867 		offset += offsetof(struct tcphdr, th_sum);
1868 		break;
1869 
1870 	case IPPROTO_UDP:
1871 		offset += offsetof(struct udphdr, uh_sum);
1872 		break;
1873 
1874 	default:
1875 		return;
1876 	}
1877 
1878 	if ((offset + sizeof(u_int16_t)) > m->m_len)
1879 		m_copyback(m, offset, sizeof(csum), (caddr_t) &csum);
1880 	else
1881 		*(u_int16_t *)(mtod(m, caddr_t) + offset) = csum;
1882 }
1883